All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
@ 2015-06-15 14:30 Jan Beulich
  2015-06-15 15:55 ` Andrew Cooper
  2015-06-16  6:44 ` Jan Beulich
  0 siblings, 2 replies; 14+ messages in thread
From: Jan Beulich @ 2015-06-15 14:30 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 6975 bytes --]

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. Extend I/O req server creation so the caller can
indicate that it is using suitable atomic accesses where needed (not
all accesses to the two pointers really need to be atomic), allowing
the hypervisor to atomically canonicalize both pointers when both have
gone through at least one cycle.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: Do we need to be worried about non-libxc users of the changed
     (tools only) interface?
     Do we also need a way for default servers to flag atomicity?

--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
     hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
 
     arg->domid = domid;
-    arg->handle_bufioreq = !!handle_bufioreq;
+    arg->handle_bufioreq = handle_bufioreq;
 
     rc = do_xen_hypercall(xch, &hypercall);
 
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
 
 static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
                                  domid_t domid, bool_t is_default,
-                                 bool_t handle_bufioreq, ioservid_t id)
+                                 int bufioreq_handling, ioservid_t id)
 {
     struct vcpu *v;
     int rc;
@@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
     if ( rc )
         return rc;
 
-    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
+    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        s->bufioreq_atomic = 1;
+
+    rc = hvm_ioreq_server_setup_pages(
+             s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
     if ( rc )
         goto fail_map;
 
@@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
 }
 
 static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
-                                   bool_t is_default, bool_t handle_bufioreq,
+                                   bool_t is_default, int bufioreq_handling,
                                    ioservid_t *id)
 {
     struct hvm_ioreq_server *s;
     int rc;
 
+    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        return -EINVAL;
+
     rc = -ENOMEM;
     s = xzalloc(struct hvm_ioreq_server);
     if ( !s )
@@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
     if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
         goto fail2;
 
-    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
+    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
                                next_ioservid(d));
     if ( rc )
         goto fail3;
@@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
 
     spin_lock(&s->bufioreq_lock);
 
-    if ( (pg->write_pointer - pg->read_pointer) >=
+    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
          (IOREQ_BUFFER_SLOT_NUM - qw) )
     {
         /* The queue is full: send the iopacket through the normal path. */
@@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
         return 0;
     }
 
-    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
+    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
 
     if ( qw )
     {
         bp.data = p->data >> 32;
-        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
+        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
     }
 
     /* Make the ioreq_t visible /before/ write_pointer. */
     wmb();
-    pg->write_pointer += qw ? 2 : 1;
+    pg->ptrs.write_pointer += qw ? 2 : 1;
+
+    /* Canonicalize read/write pointers to prevent their overflow. */
+    while ( s->bufioreq_atomic &&
+            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
+    {
+        union bufioreq_pointers old = pg->ptrs, new;
+        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
+
+        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        cmpxchg(&pg->ptrs.full, old.full, new.full);
+    }
 
     notify_via_xen_event_channel(d, s->bufioreq_evtchn);
     spin_unlock(&s->bufioreq_lock);
@@ -5446,7 +5465,7 @@ static int hvmop_create_ioreq_server(
         goto out;
 
     rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0,
-                                 !!op.handle_bufioreq, &op.id);
+                                 op.handle_bufioreq, &op.id);
     if ( rc != 0 )
         goto out;
 
@@ -5928,7 +5947,8 @@ static int hvmop_get_param(
 
         /* May need to create server. */
         domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
-        rc = hvm_create_ioreq_server(d, domid, 1, 1, NULL);
+        rc = hvm_create_ioreq_server(d, domid, 1,
+                                     HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL);
         if ( rc != 0 && rc != -EEXIST )
             goto out;
     }
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -70,6 +70,7 @@ struct hvm_ioreq_server {
     evtchn_port_t          bufioreq_evtchn;
     struct rangeset        *range[NR_IO_RANGE_TYPES];
     bool_t                 enabled;
+    bool_t                 bufioreq_atomic;
 };
 
 struct hvm_domain {
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -266,6 +266,13 @@ typedef uint16_t ioservid_t;
 #define HVMOP_create_ioreq_server 17
 struct xen_hvm_create_ioreq_server {
     domid_t domid;           /* IN - domain to be serviced */
+#define HVM_IOREQSRV_BUFIOREQ_OFF    0
+#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
+/*
+ * Use this when read_pointer gets updated atomically and
+ * the pointer pair gets read atomically:
+ */
+#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
     uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */
     ioservid_t id;           /* OUT - server id */
 };
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t;
 
 #define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes */
 struct buffered_iopage {
-    unsigned int read_pointer;
-    unsigned int write_pointer;
+#ifdef __XEN__
+    union bufioreq_pointers {
+        struct {
+#endif
+            uint32_t read_pointer;
+            uint32_t write_pointer;
+#ifdef __XEN__
+        };
+        uint64_t full;
+    } ptrs;
+#endif
     buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
 }; /* NB. Size of this structure must be no greater than one page. */
 typedef struct buffered_iopage buffered_iopage_t;



[-- Attachment #2: x86-HVM-bufioreq-atomic.patch --]
[-- Type: text/plain, Size: 7029 bytes --]

x86/HVM: avoid pointer wraparound in bufioreq handling

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. Extend I/O req server creation so the caller can
indicate that it is using suitable atomic accesses where needed (not
all accesses to the two pointers really need to be atomic), allowing
the hypervisor to atomically canonicalize both pointers when both have
gone through at least one cycle.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: Do we need to be worried about non-libxc users of the changed
     (tools only) interface?
     Do we also need a way for default servers to flag atomicity?

--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
     hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
 
     arg->domid = domid;
-    arg->handle_bufioreq = !!handle_bufioreq;
+    arg->handle_bufioreq = handle_bufioreq;
 
     rc = do_xen_hypercall(xch, &hypercall);
 
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
 
 static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
                                  domid_t domid, bool_t is_default,
-                                 bool_t handle_bufioreq, ioservid_t id)
+                                 int bufioreq_handling, ioservid_t id)
 {
     struct vcpu *v;
     int rc;
@@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
     if ( rc )
         return rc;
 
-    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
+    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        s->bufioreq_atomic = 1;
+
+    rc = hvm_ioreq_server_setup_pages(
+             s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
     if ( rc )
         goto fail_map;
 
@@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
 }
 
 static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
-                                   bool_t is_default, bool_t handle_bufioreq,
+                                   bool_t is_default, int bufioreq_handling,
                                    ioservid_t *id)
 {
     struct hvm_ioreq_server *s;
     int rc;
 
+    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        return -EINVAL;
+
     rc = -ENOMEM;
     s = xzalloc(struct hvm_ioreq_server);
     if ( !s )
@@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
     if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
         goto fail2;
 
-    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
+    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
                                next_ioservid(d));
     if ( rc )
         goto fail3;
@@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
 
     spin_lock(&s->bufioreq_lock);
 
-    if ( (pg->write_pointer - pg->read_pointer) >=
+    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
          (IOREQ_BUFFER_SLOT_NUM - qw) )
     {
         /* The queue is full: send the iopacket through the normal path. */
@@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
         return 0;
     }
 
-    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
+    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
 
     if ( qw )
     {
         bp.data = p->data >> 32;
-        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
+        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
     }
 
     /* Make the ioreq_t visible /before/ write_pointer. */
     wmb();
-    pg->write_pointer += qw ? 2 : 1;
+    pg->ptrs.write_pointer += qw ? 2 : 1;
+
+    /* Canonicalize read/write pointers to prevent their overflow. */
+    while ( s->bufioreq_atomic &&
+            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
+    {
+        union bufioreq_pointers old = pg->ptrs, new;
+        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
+
+        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        cmpxchg(&pg->ptrs.full, old.full, new.full);
+    }
 
     notify_via_xen_event_channel(d, s->bufioreq_evtchn);
     spin_unlock(&s->bufioreq_lock);
@@ -5446,7 +5465,7 @@ static int hvmop_create_ioreq_server(
         goto out;
 
     rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0,
-                                 !!op.handle_bufioreq, &op.id);
+                                 op.handle_bufioreq, &op.id);
     if ( rc != 0 )
         goto out;
 
@@ -5928,7 +5947,8 @@ static int hvmop_get_param(
 
         /* May need to create server. */
         domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
-        rc = hvm_create_ioreq_server(d, domid, 1, 1, NULL);
+        rc = hvm_create_ioreq_server(d, domid, 1,
+                                     HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL);
         if ( rc != 0 && rc != -EEXIST )
             goto out;
     }
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -70,6 +70,7 @@ struct hvm_ioreq_server {
     evtchn_port_t          bufioreq_evtchn;
     struct rangeset        *range[NR_IO_RANGE_TYPES];
     bool_t                 enabled;
+    bool_t                 bufioreq_atomic;
 };
 
 struct hvm_domain {
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -266,6 +266,13 @@ typedef uint16_t ioservid_t;
 #define HVMOP_create_ioreq_server 17
 struct xen_hvm_create_ioreq_server {
     domid_t domid;           /* IN - domain to be serviced */
+#define HVM_IOREQSRV_BUFIOREQ_OFF    0
+#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
+/*
+ * Use this when read_pointer gets updated atomically and
+ * the pointer pair gets read atomically:
+ */
+#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
     uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */
     ioservid_t id;           /* OUT - server id */
 };
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t;
 
 #define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes */
 struct buffered_iopage {
-    unsigned int read_pointer;
-    unsigned int write_pointer;
+#ifdef __XEN__
+    union bufioreq_pointers {
+        struct {
+#endif
+            uint32_t read_pointer;
+            uint32_t write_pointer;
+#ifdef __XEN__
+        };
+        uint64_t full;
+    } ptrs;
+#endif
     buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
 }; /* NB. Size of this structure must be no greater than one page. */
 typedef struct buffered_iopage buffered_iopage_t;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-15 14:30 [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
@ 2015-06-15 15:55 ` Andrew Cooper
  2015-06-16  6:40   ` Jan Beulich
  2015-06-16  6:44 ` Jan Beulich
  1 sibling, 1 reply; 14+ messages in thread
From: Andrew Cooper @ 2015-06-15 15:55 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 15/06/15 15:30, Jan Beulich wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. Extend I/O req server creation so the caller can
> indicate that it is using suitable atomic accesses where needed (not
> all accesses to the two pointers really need to be atomic), allowing
> the hypervisor to atomically canonicalize both pointers when both have
> gone through at least one cycle.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Oh dear.  How did we end up with a circular buffer with non power-of-two
size.

> ---
> TBD: Do we need to be worried about non-libxc users of the changed
>      (tools only) interface?
>      Do we also need a way for default servers to flag atomicity?

It should only be qemu-trad using the default server these days, but
this issue probably does want fixing there as well.

> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
>          return 0;
>      }
>  
> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> +    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>  
>      if ( qw )
>      {
>          bp.data = p->data >> 32;
> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> +        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
>      }
>  
>      /* Make the ioreq_t visible /before/ write_pointer. */
>      wmb();
> -    pg->write_pointer += qw ? 2 : 1;
> +    pg->ptrs.write_pointer += qw ? 2 : 1;
> +
> +    /* Canonicalize read/write pointers to prevent their overflow. */
> +    while ( s->bufioreq_atomic &&
> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
> +    {
> +        union bufioreq_pointers old = pg->ptrs, new;
> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
> +
> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        cmpxchg(&pg->ptrs.full, old.full, new.full);

This has the possibility for a misbehaving emulator to livelock Xen by
playing with the pointers.

I think you need to break and kill the ioreq server if the read pointer
is ever observed going backwards, or overtaking the write pointer.  It
is however legitimate to observe the read pointer stepping forwards one
entry at a time, as processing is occurring.

~Andrew

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-15 15:55 ` Andrew Cooper
@ 2015-06-16  6:40   ` Jan Beulich
  2015-06-16  9:32     ` Andrew Cooper
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2015-06-16  6:40 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 15.06.15 at 17:55, <andrew.cooper3@citrix.com> wrote:
> On 15/06/15 15:30, Jan Beulich wrote:
>> +    /* Canonicalize read/write pointers to prevent their overflow. */
>> +    while ( s->bufioreq_atomic &&
>> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
>> +    {
>> +        union bufioreq_pointers old = pg->ptrs, new;
>> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
>> +
>> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
>> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
>> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
> 
> This has the possibility for a misbehaving emulator to livelock Xen by
> playing with the pointers.
> 
> I think you need to break and kill the ioreq server if the read pointer
> is ever observed going backwards, or overtaking the write pointer.  It
> is however legitimate to observe the read pointer stepping forwards one
> entry at a time, as processing is occurring.

Watching for the pointer to step backwards isn't nice; what I
would do instead is to limit the loop count here to
IOREQ_BUFFER_SLOT_NUM (on the basis that we're not
creating new entries, and hence the reader can't legitimately
update the pointer more than that number of times); for
simplicity's sake I wouldn't try to limit the loop further (e.g. to
write_pointer - read_pointer iterations).

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-15 14:30 [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
  2015-06-15 15:55 ` Andrew Cooper
@ 2015-06-16  6:44 ` Jan Beulich
  2015-06-16  8:20   ` Ian Campbell
  1 sibling, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2015-06-16  6:44 UTC (permalink / raw)
  To: xen-devel
  Cc: Keir Fraser, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Ian Campbell, Wei Liu

>>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. Extend I/O req server creation so the caller can
> indicate that it is using suitable atomic accesses where needed (not
> all accesses to the two pointers really need to be atomic), allowing
> the hypervisor to atomically canonicalize both pointers when both have
> gone through at least one cycle.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

No matter that it's just a single line change, I realized that I
forgot to Cc the tools maintainers. While a v2 will be needed (see
the reply just sent to Andrew) I'd still appreciate input (if any) to
limit the number of revisions needed.

> ---
> TBD: Do we need to be worried about non-libxc users of the changed
>      (tools only) interface?
>      Do we also need a way for default servers to flag atomicity?
> 
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
>      hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
>  
>      arg->domid = domid;
> -    arg->handle_bufioreq = !!handle_bufioreq;
> +    arg->handle_bufioreq = handle_bufioreq;
>  
>      rc = do_xen_hypercall(xch, &hypercall);
>  
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
>  
>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain 
> *d,
>                                   domid_t domid, bool_t is_default,
> -                                 bool_t handle_bufioreq, ioservid_t id)
> +                                 int bufioreq_handling, ioservid_t id)
>  {
>      struct vcpu *v;
>      int rc;
> @@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
>      if ( rc )
>          return rc;
>  
> -    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
> +    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> +        s->bufioreq_atomic = 1;
> +
> +    rc = hvm_ioreq_server_setup_pages(
> +             s, is_default, bufioreq_handling != 
> HVM_IOREQSRV_BUFIOREQ_OFF);
>      if ( rc )
>          goto fail_map;
>  
> @@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
>  }
>  
>  static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
> -                                   bool_t is_default, bool_t 
> handle_bufioreq,
> +                                   bool_t is_default, int 
> bufioreq_handling,
>                                     ioservid_t *id)
>  {
>      struct hvm_ioreq_server *s;
>      int rc;
>  
> +    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> +        return -EINVAL;
> +
>      rc = -ENOMEM;
>      s = xzalloc(struct hvm_ioreq_server);
>      if ( !s )
> @@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
>      if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
>          goto fail2;
>  
> -    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
> +    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
>                                 next_ioservid(d));
>      if ( rc )
>          goto fail3;
> @@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
>  
>      spin_lock(&s->bufioreq_lock);
>  
> -    if ( (pg->write_pointer - pg->read_pointer) >=
> +    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
>           (IOREQ_BUFFER_SLOT_NUM - qw) )
>      {
>          /* The queue is full: send the iopacket through the normal path. */
> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
>          return 0;
>      }
>  
> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> +    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>  
>      if ( qw )
>      {
>          bp.data = p->data >> 32;
> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> +        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
>      }
>  
>      /* Make the ioreq_t visible /before/ write_pointer. */
>      wmb();
> -    pg->write_pointer += qw ? 2 : 1;
> +    pg->ptrs.write_pointer += qw ? 2 : 1;
> +
> +    /* Canonicalize read/write pointers to prevent their overflow. */
> +    while ( s->bufioreq_atomic &&
> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
> +    {
> +        union bufioreq_pointers old = pg->ptrs, new;
> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
> +
> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
> +    }
>  
>      notify_via_xen_event_channel(d, s->bufioreq_evtchn);
>      spin_unlock(&s->bufioreq_lock);
> @@ -5446,7 +5465,7 @@ static int hvmop_create_ioreq_server(
>          goto out;
>  
>      rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0,
> -                                 !!op.handle_bufioreq, &op.id);
> +                                 op.handle_bufioreq, &op.id);
>      if ( rc != 0 )
>          goto out;
>  
> @@ -5928,7 +5947,8 @@ static int hvmop_get_param(
>  
>          /* May need to create server. */
>          domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
> -        rc = hvm_create_ioreq_server(d, domid, 1, 1, NULL);
> +        rc = hvm_create_ioreq_server(d, domid, 1,
> +                                     HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL);
>          if ( rc != 0 && rc != -EEXIST )
>              goto out;
>      }
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -70,6 +70,7 @@ struct hvm_ioreq_server {
>      evtchn_port_t          bufioreq_evtchn;
>      struct rangeset        *range[NR_IO_RANGE_TYPES];
>      bool_t                 enabled;
> +    bool_t                 bufioreq_atomic;
>  };
>  
>  struct hvm_domain {
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -266,6 +266,13 @@ typedef uint16_t ioservid_t;
>  #define HVMOP_create_ioreq_server 17
>  struct xen_hvm_create_ioreq_server {
>      domid_t domid;           /* IN - domain to be serviced */
> +#define HVM_IOREQSRV_BUFIOREQ_OFF    0
> +#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
> +/*
> + * Use this when read_pointer gets updated atomically and
> + * the pointer pair gets read atomically:
> + */
> +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
>      uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */
>      ioservid_t id;           /* OUT - server id */
>  };
> --- a/xen/include/public/hvm/ioreq.h
> +++ b/xen/include/public/hvm/ioreq.h
> @@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t;
>  
>  #define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes 
> */
>  struct buffered_iopage {
> -    unsigned int read_pointer;
> -    unsigned int write_pointer;
> +#ifdef __XEN__
> +    union bufioreq_pointers {
> +        struct {
> +#endif
> +            uint32_t read_pointer;
> +            uint32_t write_pointer;
> +#ifdef __XEN__
> +        };
> +        uint64_t full;
> +    } ptrs;
> +#endif
>      buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
>  }; /* NB. Size of this structure must be no greater than one page. */
>  typedef struct buffered_iopage buffered_iopage_t;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  6:44 ` Jan Beulich
@ 2015-06-16  8:20   ` Ian Campbell
  2015-06-16  8:37     ` Jan Beulich
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Campbell @ 2015-06-16  8:20 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Keir Fraser, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, xen-devel

On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
> > The number of slots per page being 511 (i.e. not a power of two) means
> > that the (32-bit) read and write indexes going beyond 2^32 will likely
> > disturb operation. Extend I/O req server creation so the caller can
> > indicate that it is using suitable atomic accesses where needed (not
> > all accesses to the two pointers really need to be atomic), allowing
> > the hypervisor to atomically canonicalize both pointers when both have
> > gone through at least one cycle.
> > 
> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> No matter that it's just a single line change, I realized that I
> forgot to Cc the tools maintainers. While a v2 will be needed (see
> the reply just sent to Andrew) I'd still appreciate input (if any) to
> limit the number of revisions needed.

For such a simple toolstack side change which just reflects the
underlying hcall interface I have no real opinion so far as the tools
side goes, but it would be good to update the comments in xenctrl.h too.
With that done for the tools change:
        Acked-by: Ian Campbell <ian.campbell@citrix.com>

For the hypercall interface level, I wonder if handle_bufioreq is still
an appropriate name given its no longer treated as a boolean flag?
bufioreq_type or something perhaps?

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  8:20   ` Ian Campbell
@ 2015-06-16  8:37     ` Jan Beulich
  2015-06-16  8:59       ` Ian Campbell
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2015-06-16  8:37 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Wei Liu, Keir Fraser, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, xen-devel

>>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
> On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
>> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
>> > The number of slots per page being 511 (i.e. not a power of two) means
>> > that the (32-bit) read and write indexes going beyond 2^32 will likely
>> > disturb operation. Extend I/O req server creation so the caller can
>> > indicate that it is using suitable atomic accesses where needed (not
>> > all accesses to the two pointers really need to be atomic), allowing
>> > the hypervisor to atomically canonicalize both pointers when both have
>> > gone through at least one cycle.
>> > 
>> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> 
>> No matter that it's just a single line change, I realized that I
>> forgot to Cc the tools maintainers. While a v2 will be needed (see
>> the reply just sent to Andrew) I'd still appreciate input (if any) to
>> limit the number of revisions needed.
> 
> For such a simple toolstack side change which just reflects the
> underlying hcall interface I have no real opinion so far as the tools
> side goes, but it would be good to update the comments in xenctrl.h too.
> With that done for the tools change:
>         Acked-by: Ian Campbell <ian.campbell@citrix.com>

Thanks. The request for feedback went beyond the request for
an ack though, namely

TBD: Do we need to be worried about non-libxc users of the changed
     (tools only) interface?
     Do we also need a way for default servers to flag atomicity?

> For the hypercall interface level, I wonder if handle_bufioreq is still
> an appropriate name given its no longer treated as a boolean flag?
> bufioreq_type or something perhaps?

I think the name is fine as is (it doesn't really imply boolean-ness
in my reading), but if others agree with you that it should be
renamed I also wouldn't object.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  8:37     ` Jan Beulich
@ 2015-06-16  8:59       ` Ian Campbell
  2015-06-16  9:15         ` Paul Durrant
  2015-06-16  9:34         ` Jan Beulich
  0 siblings, 2 replies; 14+ messages in thread
From: Ian Campbell @ 2015-06-16  8:59 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Keir Fraser, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Paul Durrant, xen-devel

On Tue, 2015-06-16 at 09:37 +0100, Jan Beulich wrote:
> >>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
> > On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
> >> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
> >> > The number of slots per page being 511 (i.e. not a power of two) means
> >> > that the (32-bit) read and write indexes going beyond 2^32 will likely
> >> > disturb operation. Extend I/O req server creation so the caller can
> >> > indicate that it is using suitable atomic accesses where needed (not
> >> > all accesses to the two pointers really need to be atomic), allowing
> >> > the hypervisor to atomically canonicalize both pointers when both have
> >> > gone through at least one cycle.
> >> > 
> >> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> 
> >> No matter that it's just a single line change, I realized that I
> >> forgot to Cc the tools maintainers. While a v2 will be needed (see
> >> the reply just sent to Andrew) I'd still appreciate input (if any) to
> >> limit the number of revisions needed.
> > 
> > For such a simple toolstack side change which just reflects the
> > underlying hcall interface I have no real opinion so far as the tools
> > side goes, but it would be good to update the comments in xenctrl.h too.
> > With that done for the tools change:
> >         Acked-by: Ian Campbell <ian.campbell@citrix.com>
> 
> Thanks. The request for feedback went beyond the request for
> an ack though, namely
> 
> TBD: Do we need to be worried about non-libxc users of the changed
>      (tools only) interface?

It's (currently at least) a declared non-stable API, so in principal no.
It would be polite to give a heads up to the expected potential users
though, which you've done by CCing the QEMU maintainers I think. Adding
Paul D for completeness though.

>      Do we also need a way for default servers to flag atomicity?

Not doing so leaves them open to the issue which you are fixing here, I
think?

Andy indicated that qemu-trad was the only default-server these days, so
perhaps we can just decree that it is so while fixing qemu-trad as
necessary?

In fact, are ioreq servers new enough and with few enough users that can
we decree that even they are always atomic, perhaps after having audited
the current users to ensure they behave correctly?

> > For the hypercall interface level, I wonder if handle_bufioreq is still
> > an appropriate name given its no longer treated as a boolean flag?
> > bufioreq_type or something perhaps?
> 
> I think the name is fine as is (it doesn't really imply boolean-ness
> in my reading), but if others agree with you that it should be
> renamed I also wouldn't object.

I read it as "should handle buf ioreqs", with should being a boolean
type predicate thing, but that might just be me, I suppose it could also
be read as "kind of buf ioreqs to handle".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  8:59       ` Ian Campbell
@ 2015-06-16  9:15         ` Paul Durrant
  2015-06-16  9:29           ` Jan Beulich
  2015-06-16  9:34         ` Jan Beulich
  1 sibling, 1 reply; 14+ messages in thread
From: Paul Durrant @ 2015-06-16  9:15 UTC (permalink / raw)
  To: Ian Campbell, Jan Beulich
  Cc: Keir (Xen.org), Andrew Cooper, Ian Jackson, Stefano Stabellini,
	xen-devel, Wei Liu

> -----Original Message-----
> From: Ian Campbell [mailto:ian.campbell@citrix.com]
> Sent: 16 June 2015 10:00
> To: Jan Beulich
> Cc: Andrew Cooper; Wei Liu; Ian Jackson; Stefano Stabellini; xen-devel; Keir
> (Xen.org); Paul Durrant
> Subject: Re: [Xen-devel] [PATCH] x86/HVM: avoid pointer wraparound in
> bufioreq handling
> 
> On Tue, 2015-06-16 at 09:37 +0100, Jan Beulich wrote:
> > >>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
> > > On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
> > >> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
> > >> > The number of slots per page being 511 (i.e. not a power of two)
> means
> > >> > that the (32-bit) read and write indexes going beyond 2^32 will likely
> > >> > disturb operation. Extend I/O req server creation so the caller can
> > >> > indicate that it is using suitable atomic accesses where needed (not
> > >> > all accesses to the two pointers really need to be atomic), allowing
> > >> > the hypervisor to atomically canonicalize both pointers when both
> have
> > >> > gone through at least one cycle.
> > >> >
> > >> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
> > >>
> > >> No matter that it's just a single line change, I realized that I
> > >> forgot to Cc the tools maintainers. While a v2 will be needed (see
> > >> the reply just sent to Andrew) I'd still appreciate input (if any) to
> > >> limit the number of revisions needed.
> > >
> > > For such a simple toolstack side change which just reflects the
> > > underlying hcall interface I have no real opinion so far as the tools
> > > side goes, but it would be good to update the comments in xenctrl.h too.
> > > With that done for the tools change:
> > >         Acked-by: Ian Campbell <ian.campbell@citrix.com>
> >
> > Thanks. The request for feedback went beyond the request for
> > an ack though, namely
> >
> > TBD: Do we need to be worried about non-libxc users of the changed
> >      (tools only) interface?
> 
> It's (currently at least) a declared non-stable API, so in principal no.
> It would be polite to give a heads up to the expected potential users
> though, which you've done by CCing the QEMU maintainers I think. Adding
> Paul D for completeness though.

>From my reading, both QEMU upstream and trad are safe. They use a loop of the form:

while (read_ptr != write_ptr)
{
   do stuff

  read_ptr += (handled a qword) ? 2 : 1;
}

So, since the only test is for equality I think overflow should be handled correctly. So, does anything actually need to be fixed?

  Paul

> 
> >      Do we also need a way for default servers to flag atomicity?
> 
> Not doing so leaves them open to the issue which you are fixing here, I
> think?
> 
> Andy indicated that qemu-trad was the only default-server these days, so
> perhaps we can just decree that it is so while fixing qemu-trad as
> necessary?
> 
> In fact, are ioreq servers new enough and with few enough users that can
> we decree that even they are always atomic, perhaps after having audited
> the current users to ensure they behave correctly?
> 
> > > For the hypercall interface level, I wonder if handle_bufioreq is still
> > > an appropriate name given its no longer treated as a boolean flag?
> > > bufioreq_type or something perhaps?
> >
> > I think the name is fine as is (it doesn't really imply boolean-ness
> > in my reading), but if others agree with you that it should be
> > renamed I also wouldn't object.
> 
> I read it as "should handle buf ioreqs", with should being a boolean
> type predicate thing, but that might just be me, I suppose it could also
> be read as "kind of buf ioreqs to handle".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  9:15         ` Paul Durrant
@ 2015-06-16  9:29           ` Jan Beulich
  2015-06-16  9:45             ` Paul Durrant
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2015-06-16  9:29 UTC (permalink / raw)
  To: Paul Durrant
  Cc: Wei Liu, Ian Campbell, Andrew Cooper, Stefano Stabellini,
	Ian Jackson, xen-devel, Keir (Xen.org)

>>> On 16.06.15 at 11:15, <Paul.Durrant@citrix.com> wrote:
>>  -----Original Message-----
>> From: Ian Campbell [mailto:ian.campbell@citrix.com]
>> Sent: 16 June 2015 10:00
>> To: Jan Beulich
>> Cc: Andrew Cooper; Wei Liu; Ian Jackson; Stefano Stabellini; xen-devel; Keir
>> (Xen.org); Paul Durrant
>> Subject: Re: [Xen-devel] [PATCH] x86/HVM: avoid pointer wraparound in
>> bufioreq handling
>> 
>> On Tue, 2015-06-16 at 09:37 +0100, Jan Beulich wrote:
>> > >>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
>> > > On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
>> > >> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
>> > >> > The number of slots per page being 511 (i.e. not a power of two)
>> means
>> > >> > that the (32-bit) read and write indexes going beyond 2^32 will likely
>> > >> > disturb operation. Extend I/O req server creation so the caller can
>> > >> > indicate that it is using suitable atomic accesses where needed (not
>> > >> > all accesses to the two pointers really need to be atomic), allowing
>> > >> > the hypervisor to atomically canonicalize both pointers when both
>> have
>> > >> > gone through at least one cycle.
>> > >> >
>> > >> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> > >>
>> > >> No matter that it's just a single line change, I realized that I
>> > >> forgot to Cc the tools maintainers. While a v2 will be needed (see
>> > >> the reply just sent to Andrew) I'd still appreciate input (if any) to
>> > >> limit the number of revisions needed.
>> > >
>> > > For such a simple toolstack side change which just reflects the
>> > > underlying hcall interface I have no real opinion so far as the tools
>> > > side goes, but it would be good to update the comments in xenctrl.h too.
>> > > With that done for the tools change:
>> > >         Acked-by: Ian Campbell <ian.campbell@citrix.com>
>> >
>> > Thanks. The request for feedback went beyond the request for
>> > an ack though, namely
>> >
>> > TBD: Do we need to be worried about non-libxc users of the changed
>> >      (tools only) interface?
>> 
>> It's (currently at least) a declared non-stable API, so in principal no.
>> It would be polite to give a heads up to the expected potential users
>> though, which you've done by CCing the QEMU maintainers I think. Adding
>> Paul D for completeness though.
> 
> From my reading, both QEMU upstream and trad are safe. They use a loop of 
> the form:
> 
> while (read_ptr != write_ptr)
> {
>    do stuff
> 
>   read_ptr += (handled a qword) ? 2 : 1;
> }
> 
> So, since the only test is for equality I think overflow should be handled 
> correctly. So, does anything actually need to be fixed?

Of course this needs to be fixed: When either pointer crosses the
2^32 boundary, the slot referenced goes from 0x1f to 0 (due to the
"modulo 511" operation determining the slot to be used), introducing
a discontinuity and potentially corrupting data in slots not consumed
yet.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  6:40   ` Jan Beulich
@ 2015-06-16  9:32     ` Andrew Cooper
  0 siblings, 0 replies; 14+ messages in thread
From: Andrew Cooper @ 2015-06-16  9:32 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser

On 16/06/15 07:40, Jan Beulich wrote:
>>>> On 15.06.15 at 17:55, <andrew.cooper3@citrix.com> wrote:
>> On 15/06/15 15:30, Jan Beulich wrote:
>>> +    /* Canonicalize read/write pointers to prevent their overflow. */
>>> +    while ( s->bufioreq_atomic &&
>>> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
>>> +    {
>>> +        union bufioreq_pointers old = pg->ptrs, new;
>>> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
>>> +
>>> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
>>> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
>>> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
>> This has the possibility for a misbehaving emulator to livelock Xen by
>> playing with the pointers.
>>
>> I think you need to break and kill the ioreq server if the read pointer
>> is ever observed going backwards, or overtaking the write pointer.  It
>> is however legitimate to observe the read pointer stepping forwards one
>> entry at a time, as processing is occurring.
> Watching for the pointer to step backwards isn't nice; what I
> would do instead is to limit the loop count here to
> IOREQ_BUFFER_SLOT_NUM (on the basis that we're not
> creating new entries, and hence the reader can't legitimately
> update the pointer more than that number of times); for
> simplicity's sake I wouldn't try to limit the loop further (e.g. to
> write_pointer - read_pointer iterations).

That seems like a reasonable compromise.  511 cmpxchg()s isn't over the
top in terms of time.

~Andrew

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  8:59       ` Ian Campbell
  2015-06-16  9:15         ` Paul Durrant
@ 2015-06-16  9:34         ` Jan Beulich
  2015-06-16  9:41           ` Ian Campbell
  1 sibling, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2015-06-16  9:34 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Paul Durrant, xen-devel, Keir Fraser

>>> On 16.06.15 at 10:59, <ian.campbell@citrix.com> wrote:
> On Tue, 2015-06-16 at 09:37 +0100, Jan Beulich wrote:
>> >>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
>> > On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
>> >> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
>> >> > The number of slots per page being 511 (i.e. not a power of two) means
>> >> > that the (32-bit) read and write indexes going beyond 2^32 will likely
>> >> > disturb operation. Extend I/O req server creation so the caller can
>> >> > indicate that it is using suitable atomic accesses where needed (not
>> >> > all accesses to the two pointers really need to be atomic), allowing
>> >> > the hypervisor to atomically canonicalize both pointers when both have
>> >> > gone through at least one cycle.
>> >> > 
>> >> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> >> 
>> >> No matter that it's just a single line change, I realized that I
>> >> forgot to Cc the tools maintainers. While a v2 will be needed (see
>> >> the reply just sent to Andrew) I'd still appreciate input (if any) to
>> >> limit the number of revisions needed.
>> > 
>> > For such a simple toolstack side change which just reflects the
>> > underlying hcall interface I have no real opinion so far as the tools
>> > side goes, but it would be good to update the comments in xenctrl.h too.
>> > With that done for the tools change:
>> >         Acked-by: Ian Campbell <ian.campbell@citrix.com>
>> 
>> Thanks. The request for feedback went beyond the request for
>> an ack though, namely
>> 
>> TBD: Do we need to be worried about non-libxc users of the changed
>>      (tools only) interface?
> 
> It's (currently at least) a declared non-stable API, so in principal no.
> It would be polite to give a heads up to the expected potential users
> though, which you've done by CCing the QEMU maintainers I think. Adding
> Paul D for completeness though.

Right, but qemu specifically uses libxc, so is not a problem. Are we
aware of any users bypassing libxc at all?

>>      Do we also need a way for default servers to flag atomicity?
> 
> Not doing so leaves them open to the issue which you are fixing here, I
> think?
> 
> Andy indicated that qemu-trad was the only default-server these days, so
> perhaps we can just decree that it is so while fixing qemu-trad as
> necessary?
> 
> In fact, are ioreq servers new enough and with few enough users that can
> we decree that even they are always atomic, perhaps after having audited
> the current users to ensure they behave correctly?

I'm afraid not, as there is a qemu side fix for this too (i.e. beyond
just making it invoke the API with the new HVM_IOREQSRV_BUFIOREQ_ATOMIC).
And for qemu-trad the question then would be how to make it
announce that it's using atomic accesses (or whether the imply
default servers always do, in turn implying suitable qemu-trad
versions always being packaged with the hypervisor/tools pair).

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  9:34         ` Jan Beulich
@ 2015-06-16  9:41           ` Ian Campbell
  0 siblings, 0 replies; 14+ messages in thread
From: Ian Campbell @ 2015-06-16  9:41 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Paul Durrant, xen-devel, Keir Fraser

On Tue, 2015-06-16 at 10:34 +0100, Jan Beulich wrote:
> >>> On 16.06.15 at 10:59, <ian.campbell@citrix.com> wrote:
> > On Tue, 2015-06-16 at 09:37 +0100, Jan Beulich wrote:
> >> >>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
> >> > On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
> >> >> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
> >> >> > The number of slots per page being 511 (i.e. not a power of two) means
> >> >> > that the (32-bit) read and write indexes going beyond 2^32 will likely
> >> >> > disturb operation. Extend I/O req server creation so the caller can
> >> >> > indicate that it is using suitable atomic accesses where needed (not
> >> >> > all accesses to the two pointers really need to be atomic), allowing
> >> >> > the hypervisor to atomically canonicalize both pointers when both have
> >> >> > gone through at least one cycle.
> >> >> > 
> >> >> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> >> 
> >> >> No matter that it's just a single line change, I realized that I
> >> >> forgot to Cc the tools maintainers. While a v2 will be needed (see
> >> >> the reply just sent to Andrew) I'd still appreciate input (if any) to
> >> >> limit the number of revisions needed.
> >> > 
> >> > For such a simple toolstack side change which just reflects the
> >> > underlying hcall interface I have no real opinion so far as the tools
> >> > side goes, but it would be good to update the comments in xenctrl.h too.
> >> > With that done for the tools change:
> >> >         Acked-by: Ian Campbell <ian.campbell@citrix.com>
> >> 
> >> Thanks. The request for feedback went beyond the request for
> >> an ack though, namely
> >> 
> >> TBD: Do we need to be worried about non-libxc users of the changed
> >>      (tools only) interface?
> > 
> > It's (currently at least) a declared non-stable API, so in principal no.
> > It would be polite to give a heads up to the expected potential users
> > though, which you've done by CCing the QEMU maintainers I think. Adding
> > Paul D for completeness though.
> 
> Right, but qemu specifically uses libxc, so is not a problem. Are we
> aware of any users bypassing libxc at all?

I'm not, and TBH I don't think we should concern ourselves too much with
such users, they should use the library.

> >>      Do we also need a way for default servers to flag atomicity?
> > 
> > Not doing so leaves them open to the issue which you are fixing here, I
> > think?
> > 
> > Andy indicated that qemu-trad was the only default-server these days, so
> > perhaps we can just decree that it is so while fixing qemu-trad as
> > necessary?
> > 
> > In fact, are ioreq servers new enough and with few enough users that can
> > we decree that even they are always atomic, perhaps after having audited
> > the current users to ensure they behave correctly?
> 
> I'm afraid not, as there is a qemu side fix for this too (i.e. beyond
> just making it invoke the API with the new HVM_IOREQSRV_BUFIOREQ_ATOMIC).
> And for qemu-trad the question then would be how to make it
> announce that it's using atomic accesses (or whether the imply
> default servers always do, in turn implying suitable qemu-trad
> versions always being packaged with the hypervisor/tools pair).

I think we can (and historically have) assume/require that the qemu-trad
version is the one we shipped with a given release.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  9:29           ` Jan Beulich
@ 2015-06-16  9:45             ` Paul Durrant
  2015-06-16  9:54               ` Jan Beulich
  0 siblings, 1 reply; 14+ messages in thread
From: Paul Durrant @ 2015-06-16  9:45 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Ian Campbell, Andrew Cooper, Stefano Stabellini,
	Ian Jackson, xen-devel, Keir (Xen.org)

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 16 June 2015 10:30
> To: Paul Durrant
> Cc: Andrew Cooper; Ian Campbell; Ian Jackson; Stefano Stabellini; Wei Liu;
> xen-devel; Keir (Xen.org)
> Subject: RE: [Xen-devel] [PATCH] x86/HVM: avoid pointer wraparound in
> bufioreq handling
> 
> >>> On 16.06.15 at 11:15, <Paul.Durrant@citrix.com> wrote:
> >>  -----Original Message-----
> >> From: Ian Campbell [mailto:ian.campbell@citrix.com]
> >> Sent: 16 June 2015 10:00
> >> To: Jan Beulich
> >> Cc: Andrew Cooper; Wei Liu; Ian Jackson; Stefano Stabellini; xen-devel;
> Keir
> >> (Xen.org); Paul Durrant
> >> Subject: Re: [Xen-devel] [PATCH] x86/HVM: avoid pointer wraparound in
> >> bufioreq handling
> >>
> >> On Tue, 2015-06-16 at 09:37 +0100, Jan Beulich wrote:
> >> > >>> On 16.06.15 at 10:20, <ian.campbell@citrix.com> wrote:
> >> > > On Tue, 2015-06-16 at 07:44 +0100, Jan Beulich wrote:
> >> > >> >>> On 15.06.15 at 16:30, <JBeulich@suse.com> wrote:
> >> > >> > The number of slots per page being 511 (i.e. not a power of two)
> >> means
> >> > >> > that the (32-bit) read and write indexes going beyond 2^32 will
> likely
> >> > >> > disturb operation. Extend I/O req server creation so the caller can
> >> > >> > indicate that it is using suitable atomic accesses where needed (not
> >> > >> > all accesses to the two pointers really need to be atomic), allowing
> >> > >> > the hypervisor to atomically canonicalize both pointers when both
> >> have
> >> > >> > gone through at least one cycle.
> >> > >> >
> >> > >> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> > >>
> >> > >> No matter that it's just a single line change, I realized that I
> >> > >> forgot to Cc the tools maintainers. While a v2 will be needed (see
> >> > >> the reply just sent to Andrew) I'd still appreciate input (if any) to
> >> > >> limit the number of revisions needed.
> >> > >
> >> > > For such a simple toolstack side change which just reflects the
> >> > > underlying hcall interface I have no real opinion so far as the tools
> >> > > side goes, but it would be good to update the comments in xenctrl.h
> too.
> >> > > With that done for the tools change:
> >> > >         Acked-by: Ian Campbell <ian.campbell@citrix.com>
> >> >
> >> > Thanks. The request for feedback went beyond the request for
> >> > an ack though, namely
> >> >
> >> > TBD: Do we need to be worried about non-libxc users of the changed
> >> >      (tools only) interface?
> >>
> >> It's (currently at least) a declared non-stable API, so in principal no.
> >> It would be polite to give a heads up to the expected potential users
> >> though, which you've done by CCing the QEMU maintainers I think.
> Adding
> >> Paul D for completeness though.
> >
> > From my reading, both QEMU upstream and trad are safe. They use a loop
> of
> > the form:
> >
> > while (read_ptr != write_ptr)
> > {
> >    do stuff
> >
> >   read_ptr += (handled a qword) ? 2 : 1;
> > }
> >
> > So, since the only test is for equality I think overflow should be handled
> > correctly. So, does anything actually need to be fixed?
> 
> Of course this needs to be fixed: When either pointer crosses the
> 2^32 boundary, the slot referenced goes from 0x1f to 0 (due to the
> "modulo 511" operation determining the slot to be used), introducing
> a discontinuity and potentially corrupting data in slots not consumed
> yet.
> 

Ah yes. I thought you were worried about inequality checks going wrong.

The way that QEMU processes buffered requests means that a synchronous ioreq is a barrier to buffered ring processing. So I guess it should be possible to send a synchronous request and then zero the buffered ring counters before they reach overflow.

  Paul
 
> Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-16  9:45             ` Paul Durrant
@ 2015-06-16  9:54               ` Jan Beulich
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2015-06-16  9:54 UTC (permalink / raw)
  To: Paul Durrant
  Cc: Wei Liu, Ian Campbell, Andrew Cooper, StefanoStabellini,
	Ian Jackson, xen-devel, Keir (Xen.org)

>>> On 16.06.15 at 11:45, <Paul.Durrant@citrix.com> wrote:
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 16 June 2015 10:30
>> >>> On 16.06.15 at 11:15, <Paul.Durrant@citrix.com> wrote:
>> > From my reading, both QEMU upstream and trad are safe. They use a loop
>> of
>> > the form:
>> >
>> > while (read_ptr != write_ptr)
>> > {
>> >    do stuff
>> >
>> >   read_ptr += (handled a qword) ? 2 : 1;
>> > }
>> >
>> > So, since the only test is for equality I think overflow should be handled
>> > correctly. So, does anything actually need to be fixed?
>> 
>> Of course this needs to be fixed: When either pointer crosses the
>> 2^32 boundary, the slot referenced goes from 0x1f to 0 (due to the
>> "modulo 511" operation determining the slot to be used), introducing
>> a discontinuity and potentially corrupting data in slots not consumed
>> yet.
>> 
> 
> Ah yes. I thought you were worried about inequality checks going wrong.
> 
> The way that QEMU processes buffered requests means that a synchronous ioreq 
> is a barrier to buffered ring processing. So I guess it should be possible to 
> send a synchronous request and then zero the buffered ring counters before 
> they reach overflow.

Or, following Ian's most recent reply, simply assume default servers
to work atomically here (once a qemu-trad fix went in of course).

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-06-16  9:55 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-06-15 14:30 [PATCH] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
2015-06-15 15:55 ` Andrew Cooper
2015-06-16  6:40   ` Jan Beulich
2015-06-16  9:32     ` Andrew Cooper
2015-06-16  6:44 ` Jan Beulich
2015-06-16  8:20   ` Ian Campbell
2015-06-16  8:37     ` Jan Beulich
2015-06-16  8:59       ` Ian Campbell
2015-06-16  9:15         ` Paul Durrant
2015-06-16  9:29           ` Jan Beulich
2015-06-16  9:45             ` Paul Durrant
2015-06-16  9:54               ` Jan Beulich
2015-06-16  9:34         ` Jan Beulich
2015-06-16  9:41           ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.