[PATCH] x86/hvm: don't rely on shared ioreq state for completion handling

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
@ 2015-07-31 15:34 Paul Durrant
  2015-07-31 15:53 ` Andrew Cooper
  2015-08-11 15:19 ` Jan Beulich
  0 siblings, 2 replies; 9+ messages in thread
From: Paul Durrant @ 2015-07-31 15:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Keir Fraser, Jan Beulich

Both hvm_io_pending() and hvm_wait_for_io() use the shared (with emulator)
ioreq structure to determined whether there is a pending I/O. The latter will
misbehave if the shared state is driven to STATE_IOREQ_NONE by the emulator,
or when the shared ioreq page is cleared for re-insertion into the guest
P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because it
will terminate its wait without calling hvm_io_assist() to adjust Xen's
internal I/O emulation state. This may then lead to an io completion
handler finding incorrect internal emulation state and calling
domain_crash().

This patch fixes the problem by adding a pending flag to the ioreq server's
per-vcpu structure which cannot be directly manipulated by the emulator
and thus can be used to determine whether an I/O is actually pending for
that vcpu on that ioreq server. If an I/O is pending and the shared state
is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
completion of emulation (hence the data placed in the shared structure
is not used) and the internal state is adjusted as for a normal completion.
Thus, when a completion handler subsequently runs, the internal state is as
expected and domain_crash() will not be called.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Tested-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
 xen/arch/x86/hvm/hvm.c           |   46 +++++++++++++++++++++++++-------------
 xen/include/asm-x86/hvm/domain.h |    1 +
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ec1d797..4ab9498 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
                           &d->arch.hvm_domain.ioreq_server.list,
                           list_entry )
     {
-        ioreq_t *p = get_ioreq(s, v);
+        struct hvm_ioreq_vcpu *sv;
 
-        if ( p->state != STATE_IOREQ_NONE )
-            return 1;
+        list_for_each_entry ( sv,
+                              &s->ioreq_vcpu_list,
+                              list_entry )
+        {
+            if ( sv->vcpu == v && sv->pending )
+                return 1;
+        }
     }
 
     return 0;
 }
 
-static void hvm_io_assist(ioreq_t *p)
+static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
 {
-    struct vcpu *curr = current;
-    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
-
-    p->state = STATE_IOREQ_NONE;
+    struct vcpu *v = sv->vcpu;
+    struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
 
     if ( hvm_vcpu_io_need_completion(vio) )
     {
         vio->io_req.state = STATE_IORESP_READY;
-        vio->io_req.data = p->data;
+        vio->io_req.data = data;
     }
     else
         vio->io_req.state = STATE_IOREQ_NONE;
 
-    msix_write_completion(curr);
-    vcpu_end_shutdown_deferral(curr);
+    msix_write_completion(v);
+    vcpu_end_shutdown_deferral(v);
+
+    sv->pending = 0;
 }
 
 static bool_t hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
 {
-    /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
-    while ( p->state != STATE_IOREQ_NONE )
+    while ( sv->pending )
     {
         switch ( p->state )
         {
+        case STATE_IOREQ_NONE:
+            /*
+             * The only reason we should see this case is when an
+             * emulator is dying and it races with an I/O being
+             * requested.
+             */
+            hvm_io_assist(sv, ~0ul);
+            break;
         case STATE_IORESP_READY: /* IORESP_READY -> NONE */
             rmb(); /* see IORESP_READY /then/ read contents of ioreq */
-            hvm_io_assist(p);
+            p->state = STATE_IOREQ_NONE;
+            hvm_io_assist(sv, p->data);
             break;
         case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
         case STATE_IOREQ_INPROCESS:
@@ -459,6 +472,7 @@ static bool_t hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
             break;
         default:
             gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
+            sv->pending = 0;
             domain_crash(sv->vcpu->domain);
             return 0; /* bail */
         }
@@ -489,7 +503,7 @@ void hvm_do_resume(struct vcpu *v)
                               &s->ioreq_vcpu_list,
                               list_entry )
         {
-            if ( sv->vcpu == v )
+            if ( sv->vcpu == v && sv->pending )
             {
                 if ( !hvm_wait_for_io(sv, get_ioreq(s, v)) )
                     return;
@@ -2743,6 +2757,8 @@ int hvm_send_ioreq(struct hvm_ioreq_server *s, ioreq_t *proto_p,
              */
             p->state = STATE_IOREQ_READY;
             notify_via_xen_event_channel(d, port);
+
+            sv->pending = 1;
             return X86EMUL_RETRY;
         }
     }
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index b612755..12b5e0c 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -46,6 +46,7 @@ struct hvm_ioreq_vcpu {
     struct list_head list_entry;
     struct vcpu      *vcpu;
     evtchn_port_t    ioreq_evtchn;
+    bool_t           pending;
 };
 
 #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-07-31 15:34 [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling Paul Durrant
@ 2015-07-31 15:53 ` Andrew Cooper
  2015-08-03  9:48   ` Ian Campbell
  2015-08-11 15:19 ` Jan Beulich
  1 sibling, 1 reply; 9+ messages in thread
From: Andrew Cooper @ 2015-07-31 15:53 UTC (permalink / raw)
  To: Paul Durrant, xen-devel; +Cc: Keir Fraser, Jan Beulich

On 31/07/15 16:34, Paul Durrant wrote:
> Both hvm_io_pending() and hvm_wait_for_io() use the shared (with emulator)
> ioreq structure to determined whether there is a pending I/O. The latter will
> misbehave if the shared state is driven to STATE_IOREQ_NONE by the emulator,
> or when the shared ioreq page is cleared for re-insertion into the guest
> P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because it
> will terminate its wait without calling hvm_io_assist() to adjust Xen's
> internal I/O emulation state. This may then lead to an io completion
> handler finding incorrect internal emulation state and calling
> domain_crash().
>
> This patch fixes the problem by adding a pending flag to the ioreq server's
> per-vcpu structure which cannot be directly manipulated by the emulator
> and thus can be used to determine whether an I/O is actually pending for
> that vcpu on that ioreq server. If an I/O is pending and the shared state
> is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
> completion of emulation (hence the data placed in the shared structure
> is not used) and the internal state is adjusted as for a normal completion.
> Thus, when a completion handler subsequently runs, the internal state is as
> expected and domain_crash() will not be called.
>
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
> Tested-by: Roger Pau Monné <roger.pau@citrix.com>
> Cc: Keir Fraser <keir@xen.org>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-07-31 15:53 ` Andrew Cooper
@ 2015-08-03  9:48   ` Ian Campbell
  0 siblings, 0 replies; 9+ messages in thread
From: Ian Campbell @ 2015-08-03  9:48 UTC (permalink / raw)
  To: Andrew Cooper, Paul Durrant, xen-devel; +Cc: Keir Fraser, Jan Beulich

On Fri, 2015-07-31 at 16:53 +0100, Andrew Cooper wrote:
> On 31/07/15 16:34, Paul Durrant wrote:
> > Both hvm_io_pending() and hvm_wait_for_io() use the shared (with 
> > emulator)
> > ioreq structure to determined whether there is a pending I/O. The 
> > latter will
> > misbehave if the shared state is driven to STATE_IOREQ_NONE by the 
> > emulator,
> > or when the shared ioreq page is cleared for re-insertion into the 
> > guest
> > P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because 
> > it
> > will terminate its wait without calling hvm_io_assist() to adjust Xen's
> > internal I/O emulation state. This may then lead to an io completion
> > handler finding incorrect internal emulation state and calling
> > domain_crash().
> > 
> > This patch fixes the problem by adding a pending flag to the ioreq 
> > server's
> > per-vcpu structure which cannot be directly manipulated by the emulator
> > and thus can be used to determine whether an I/O is actually pending 
> > for
> > that vcpu on that ioreq server. If an I/O is pending and the shared 
> > state
> > is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
> > completion of emulation (hence the data placed in the shared structure
> > is not used) and the internal state is adjusted as for a normal 
> > completion.
> > Thus, when a completion handler subsequently runs, the internal state 
> > is as
> > expected and domain_crash() will not be called.
> > 
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
> > Tested-by: Roger Pau Monné <roger.pau@citrix.com>
> > Cc: Keir Fraser <keir@xen.org>
> > Cc: Jan Beulich <jbeulich@suse.com>
> > Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> 
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

Applied, thanks.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-07-31 15:34 [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling Paul Durrant
  2015-07-31 15:53 ` Andrew Cooper
@ 2015-08-11 15:19 ` Jan Beulich
  2015-08-11 15:32   ` Paul Durrant
  1 sibling, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2015-08-11 15:19 UTC (permalink / raw)
  To: Paul Durrant; +Cc: Andrew Cooper, Keir Fraser, xen-devel

>>> On 31.07.15 at 17:34, <paul.durrant@citrix.com> wrote:
> Both hvm_io_pending() and hvm_wait_for_io() use the shared (with emulator)
> ioreq structure to determined whether there is a pending I/O. The latter 
> will
> misbehave if the shared state is driven to STATE_IOREQ_NONE by the emulator,
> or when the shared ioreq page is cleared for re-insertion into the guest
> P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because it
> will terminate its wait without calling hvm_io_assist() to adjust Xen's
> internal I/O emulation state. This may then lead to an io completion
> handler finding incorrect internal emulation state and calling
> domain_crash().
> 
> This patch fixes the problem by adding a pending flag to the ioreq server's
> per-vcpu structure which cannot be directly manipulated by the emulator
> and thus can be used to determine whether an I/O is actually pending for
> that vcpu on that ioreq server. If an I/O is pending and the shared state
> is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
> completion of emulation (hence the data placed in the shared structure
> is not used) and the internal state is adjusted as for a normal completion.
> Thus, when a completion handler subsequently runs, the internal state is as
> expected and domain_crash() will not be called.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
> Tested-by: Roger Pau Monné <roger.pau@citrix.com>

I realize this went in already, but ...

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
>                            &d->arch.hvm_domain.ioreq_server.list,
>                            list_entry )
>      {
> -        ioreq_t *p = get_ioreq(s, v);
> +        struct hvm_ioreq_vcpu *sv;
>  
> -        if ( p->state != STATE_IOREQ_NONE )
> -            return 1;
> +        list_for_each_entry ( sv,
> +                              &s->ioreq_vcpu_list,
> +                              list_entry )
> +        {
> +            if ( sv->vcpu == v && sv->pending )
> +                return 1;
> +        }

... while from the review of the original series I recall that doing the
outer loop without any lock is fine (due to using domain_pause()
when registering servers) I'm not convinced this extends to the
inner loop. Can you clarify please? (There are a couple more such
loops that I can't immediately see being protected by a lock.)

> -static void hvm_io_assist(ioreq_t *p)
> +static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
>  {
> -    struct vcpu *curr = current;
> -    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
> -
> -    p->state = STATE_IOREQ_NONE;
> +    struct vcpu *v = sv->vcpu;
> +    struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
>  
>      if ( hvm_vcpu_io_need_completion(vio) )
>      {
>          vio->io_req.state = STATE_IORESP_READY;
> -        vio->io_req.data = p->data;
> +        vio->io_req.data = data;
>      }
>      else
>          vio->io_req.state = STATE_IOREQ_NONE;
>  
> -    msix_write_completion(curr);
> -    vcpu_end_shutdown_deferral(curr);
> +    msix_write_completion(v);
> +    vcpu_end_shutdown_deferral(v);
> +
> +    sv->pending = 0;
>  }

Also the renaming of "curr" here is less than optimal, not the least
because msix_write_completion() requires v == current. I.e. I'd
like to ask for a cleanup patch converting v back to curr, adding
ASSERT(curr == current) (and if that doesn't hold we've got a
problem).

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-08-11 15:19 ` Jan Beulich
@ 2015-08-11 15:32   ` Paul Durrant
  2015-08-11 15:46     ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Paul Durrant @ 2015-08-11 15:32 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Keir (Xen.org), xen-devel@lists.xenproject.org

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 11 August 2015 16:20
> To: Paul Durrant
> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> Subject: Re: [PATCH] x86/hvm: don't rely on shared ioreq state for
> completion handling
> 
> >>> On 31.07.15 at 17:34, <paul.durrant@citrix.com> wrote:
> > Both hvm_io_pending() and hvm_wait_for_io() use the shared (with
> emulator)
> > ioreq structure to determined whether there is a pending I/O. The latter
> > will
> > misbehave if the shared state is driven to STATE_IOREQ_NONE by the
> emulator,
> > or when the shared ioreq page is cleared for re-insertion into the guest
> > P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because
> it
> > will terminate its wait without calling hvm_io_assist() to adjust Xen's
> > internal I/O emulation state. This may then lead to an io completion
> > handler finding incorrect internal emulation state and calling
> > domain_crash().
> >
> > This patch fixes the problem by adding a pending flag to the ioreq server's
> > per-vcpu structure which cannot be directly manipulated by the emulator
> > and thus can be used to determine whether an I/O is actually pending for
> > that vcpu on that ioreq server. If an I/O is pending and the shared state
> > is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
> > completion of emulation (hence the data placed in the shared structure
> > is not used) and the internal state is adjusted as for a normal completion.
> > Thus, when a completion handler subsequently runs, the internal state is as
> > expected and domain_crash() will not be called.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
> > Tested-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> I realize this went in already, but ...
> 
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
> >                            &d->arch.hvm_domain.ioreq_server.list,
> >                            list_entry )
> >      {
> > -        ioreq_t *p = get_ioreq(s, v);
> > +        struct hvm_ioreq_vcpu *sv;
> >
> > -        if ( p->state != STATE_IOREQ_NONE )
> > -            return 1;
> > +        list_for_each_entry ( sv,
> > +                              &s->ioreq_vcpu_list,
> > +                              list_entry )
> > +        {
> > +            if ( sv->vcpu == v && sv->pending )
> > +                return 1;
> > +        }
> 
> ... while from the review of the original series I recall that doing the
> outer loop without any lock is fine (due to using domain_pause()
> when registering servers) I'm not convinced this extends to the
> inner loop. Can you clarify please? (There are a couple more such
> loops that I can't immediately see being protected by a lock.)

Yes, I think you are right. If a cpu were to disappear then the list walk would be compromised. It should either be locked or rcu in all places.

> 
> > -static void hvm_io_assist(ioreq_t *p)
> > +static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
> >  {
> > -    struct vcpu *curr = current;
> > -    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
> > -
> > -    p->state = STATE_IOREQ_NONE;
> > +    struct vcpu *v = sv->vcpu;
> > +    struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
> >
> >      if ( hvm_vcpu_io_need_completion(vio) )
> >      {
> >          vio->io_req.state = STATE_IORESP_READY;
> > -        vio->io_req.data = p->data;
> > +        vio->io_req.data = data;
> >      }
> >      else
> >          vio->io_req.state = STATE_IOREQ_NONE;
> >
> > -    msix_write_completion(curr);
> > -    vcpu_end_shutdown_deferral(curr);
> > +    msix_write_completion(v);
> > +    vcpu_end_shutdown_deferral(v);
> > +
> > +    sv->pending = 0;
> >  }
> 
> Also the renaming of "curr" here is less than optimal, not the least
> because msix_write_completion() requires v == current. I.e. I'd
> like to ask for a cleanup patch converting v back to curr, adding
> ASSERT(curr == current) (and if that doesn't hold we've got a
> problem).

The naming was changed because I am now digging the vcpu pointer out of the hvm_ioreq_vcpu struct. IMO any renaming or ASSERT really belongs all the way out in hvm_do_resume(). I'll come up with a patch.

  Paul

> 
> Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-08-11 15:32   ` Paul Durrant
@ 2015-08-11 15:46     ` Jan Beulich
  2015-08-11 15:49       ` Paul Durrant
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2015-08-11 15:46 UTC (permalink / raw)
  To: Paul Durrant; +Cc: Andrew Cooper, Keir(Xen.org), xen-devel@lists.xenproject.org

>>> On 11.08.15 at 17:32, <Paul.Durrant@citrix.com> wrote:
>>  -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 11 August 2015 16:20
>> To: Paul Durrant
>> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
>> Subject: Re: [PATCH] x86/hvm: don't rely on shared ioreq state for
>> completion handling
>> 
>> >>> On 31.07.15 at 17:34, <paul.durrant@citrix.com> wrote:
>> > Both hvm_io_pending() and hvm_wait_for_io() use the shared (with
>> emulator)
>> > ioreq structure to determined whether there is a pending I/O. The latter
>> > will
>> > misbehave if the shared state is driven to STATE_IOREQ_NONE by the
>> emulator,
>> > or when the shared ioreq page is cleared for re-insertion into the guest
>> > P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because
>> it
>> > will terminate its wait without calling hvm_io_assist() to adjust Xen's
>> > internal I/O emulation state. This may then lead to an io completion
>> > handler finding incorrect internal emulation state and calling
>> > domain_crash().
>> >
>> > This patch fixes the problem by adding a pending flag to the ioreq server's
>> > per-vcpu structure which cannot be directly manipulated by the emulator
>> > and thus can be used to determine whether an I/O is actually pending for
>> > that vcpu on that ioreq server. If an I/O is pending and the shared state
>> > is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
>> > completion of emulation (hence the data placed in the shared structure
>> > is not used) and the internal state is adjusted as for a normal completion.
>> > Thus, when a completion handler subsequently runs, the internal state is as
>> > expected and domain_crash() will not be called.
>> >
>> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
>> > Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
>> > Tested-by: Roger Pau Monné <roger.pau@citrix.com>
>> 
>> I realize this went in already, but ...
>> 
>> > --- a/xen/arch/x86/hvm/hvm.c
>> > +++ b/xen/arch/x86/hvm/hvm.c
>> > @@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
>> >                            &d->arch.hvm_domain.ioreq_server.list,
>> >                            list_entry )
>> >      {
>> > -        ioreq_t *p = get_ioreq(s, v);
>> > +        struct hvm_ioreq_vcpu *sv;
>> >
>> > -        if ( p->state != STATE_IOREQ_NONE )
>> > -            return 1;
>> > +        list_for_each_entry ( sv,
>> > +                              &s->ioreq_vcpu_list,
>> > +                              list_entry )
>> > +        {
>> > +            if ( sv->vcpu == v && sv->pending )
>> > +                return 1;
>> > +        }
>> 
>> ... while from the review of the original series I recall that doing the
>> outer loop without any lock is fine (due to using domain_pause()
>> when registering servers) I'm not convinced this extends to the
>> inner loop. Can you clarify please? (There are a couple more such
>> loops that I can't immediately see being protected by a lock.)
> 
> Yes, I think you are right. If a cpu were to disappear then the list walk 
> would be compromised. It should either be locked or rcu in all places.

I don't think we need to be concerned of vCPU-s disappearing,
since that doesn't happen during the lifetime of a VM. And the
hvm_do_resume() path is used only for domains still alive. Of
course, if any of the lockless loops sit on paths reachable after
a domain got marked dying, that would need fixing.

What I'm more concerned about are list manipulations behind
the back of a list traversing CPU. Or do those happen only upon
vCPU creation/destruction?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-08-11 15:46     ` Jan Beulich
@ 2015-08-11 15:49       ` Paul Durrant
  2015-08-11 16:03         ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Paul Durrant @ 2015-08-11 15:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Keir (Xen.org), xen-devel@lists.xenproject.org

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 11 August 2015 16:46
> To: Paul Durrant
> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> Subject: RE: [PATCH] x86/hvm: don't rely on shared ioreq state for
> completion handling
> 
> >>> On 11.08.15 at 17:32, <Paul.Durrant@citrix.com> wrote:
> >>  -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: 11 August 2015 16:20
> >> To: Paul Durrant
> >> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> >> Subject: Re: [PATCH] x86/hvm: don't rely on shared ioreq state for
> >> completion handling
> >>
> >> >>> On 31.07.15 at 17:34, <paul.durrant@citrix.com> wrote:
> >> > Both hvm_io_pending() and hvm_wait_for_io() use the shared (with
> >> emulator)
> >> > ioreq structure to determined whether there is a pending I/O. The latter
> >> > will
> >> > misbehave if the shared state is driven to STATE_IOREQ_NONE by the
> >> emulator,
> >> > or when the shared ioreq page is cleared for re-insertion into the guest
> >> > P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0)
> because
> >> it
> >> > will terminate its wait without calling hvm_io_assist() to adjust Xen's
> >> > internal I/O emulation state. This may then lead to an io completion
> >> > handler finding incorrect internal emulation state and calling
> >> > domain_crash().
> >> >
> >> > This patch fixes the problem by adding a pending flag to the ioreq
> server's
> >> > per-vcpu structure which cannot be directly manipulated by the
> emulator
> >> > and thus can be used to determine whether an I/O is actually pending
> for
> >> > that vcpu on that ioreq server. If an I/O is pending and the shared state
> >> > is seen to go to STATE_IOREQ_NONE then it can be treated as an
> abnormal
> >> > completion of emulation (hence the data placed in the shared structure
> >> > is not used) and the internal state is adjusted as for a normal
> completion.
> >> > Thus, when a completion handler subsequently runs, the internal state
> is as
> >> > expected and domain_crash() will not be called.
> >> >
> >> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> >> > Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
> >> > Tested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>
> >> I realize this went in already, but ...
> >>
> >> > --- a/xen/arch/x86/hvm/hvm.c
> >> > +++ b/xen/arch/x86/hvm/hvm.c
> >> > @@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
> >> >                            &d->arch.hvm_domain.ioreq_server.list,
> >> >                            list_entry )
> >> >      {
> >> > -        ioreq_t *p = get_ioreq(s, v);
> >> > +        struct hvm_ioreq_vcpu *sv;
> >> >
> >> > -        if ( p->state != STATE_IOREQ_NONE )
> >> > -            return 1;
> >> > +        list_for_each_entry ( sv,
> >> > +                              &s->ioreq_vcpu_list,
> >> > +                              list_entry )
> >> > +        {
> >> > +            if ( sv->vcpu == v && sv->pending )
> >> > +                return 1;
> >> > +        }
> >>
> >> ... while from the review of the original series I recall that doing the
> >> outer loop without any lock is fine (due to using domain_pause()
> >> when registering servers) I'm not convinced this extends to the
> >> inner loop. Can you clarify please? (There are a couple more such
> >> loops that I can't immediately see being protected by a lock.)
> >
> > Yes, I think you are right. If a cpu were to disappear then the list walk
> > would be compromised. It should either be locked or rcu in all places.
> 
> I don't think we need to be concerned of vCPU-s disappearing,
> since that doesn't happen during the lifetime of a VM. And the
> hvm_do_resume() path is used only for domains still alive. Of
> course, if any of the lockless loops sit on paths reachable after
> a domain got marked dying, that would need fixing.
> 
> What I'm more concerned about are list manipulations behind
> the back of a list traversing CPU. Or do those happen only upon
> vCPU creation/destruction?

Theoretically we could do vcpu hot remove, couldn't we? That's the case I was thinking of.

  Paul

> 
> Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-08-11 15:49       ` Paul Durrant
@ 2015-08-11 16:03         ` Jan Beulich
  2015-08-11 16:17           ` Paul Durrant
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2015-08-11 16:03 UTC (permalink / raw)
  To: Paul Durrant; +Cc: Andrew Cooper, Keir(Xen.org), xen-devel@lists.xenproject.org

>>> On 11.08.15 at 17:49, <Paul.Durrant@citrix.com> wrote:
>>  -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 11 August 2015 16:46
>> To: Paul Durrant
>> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
>> Subject: RE: [PATCH] x86/hvm: don't rely on shared ioreq state for
>> completion handling
>> 
>> >>> On 11.08.15 at 17:32, <Paul.Durrant@citrix.com> wrote:
>> >>  -----Original Message-----
>> >> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> Sent: 11 August 2015 16:20
>> >> To: Paul Durrant
>> >> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
>> >> Subject: Re: [PATCH] x86/hvm: don't rely on shared ioreq state for
>> >> completion handling
>> >>
>> >> >>> On 31.07.15 at 17:34, <paul.durrant@citrix.com> wrote:
>> >> > Both hvm_io_pending() and hvm_wait_for_io() use the shared (with
>> >> emulator)
>> >> > ioreq structure to determined whether there is a pending I/O. The latter
>> >> > will
>> >> > misbehave if the shared state is driven to STATE_IOREQ_NONE by the
>> >> emulator,
>> >> > or when the shared ioreq page is cleared for re-insertion into the guest
>> >> > P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0)
>> because
>> >> it
>> >> > will terminate its wait without calling hvm_io_assist() to adjust Xen's
>> >> > internal I/O emulation state. This may then lead to an io completion
>> >> > handler finding incorrect internal emulation state and calling
>> >> > domain_crash().
>> >> >
>> >> > This patch fixes the problem by adding a pending flag to the ioreq
>> server's
>> >> > per-vcpu structure which cannot be directly manipulated by the
>> emulator
>> >> > and thus can be used to determine whether an I/O is actually pending
>> for
>> >> > that vcpu on that ioreq server. If an I/O is pending and the shared state
>> >> > is seen to go to STATE_IOREQ_NONE then it can be treated as an
>> abnormal
>> >> > completion of emulation (hence the data placed in the shared structure
>> >> > is not used) and the internal state is adjusted as for a normal
>> completion.
>> >> > Thus, when a completion handler subsequently runs, the internal state
>> is as
>> >> > expected and domain_crash() will not be called.
>> >> >
>> >> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
>> >> > Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
>> >> > Tested-by: Roger Pau Monné <roger.pau@citrix.com>
>> >>
>> >> I realize this went in already, but ...
>> >>
>> >> > --- a/xen/arch/x86/hvm/hvm.c
>> >> > +++ b/xen/arch/x86/hvm/hvm.c
>> >> > @@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
>> >> >                            &d->arch.hvm_domain.ioreq_server.list,
>> >> >                            list_entry )
>> >> >      {
>> >> > -        ioreq_t *p = get_ioreq(s, v);
>> >> > +        struct hvm_ioreq_vcpu *sv;
>> >> >
>> >> > -        if ( p->state != STATE_IOREQ_NONE )
>> >> > -            return 1;
>> >> > +        list_for_each_entry ( sv,
>> >> > +                              &s->ioreq_vcpu_list,
>> >> > +                              list_entry )
>> >> > +        {
>> >> > +            if ( sv->vcpu == v && sv->pending )
>> >> > +                return 1;
>> >> > +        }
>> >>
>> >> ... while from the review of the original series I recall that doing the
>> >> outer loop without any lock is fine (due to using domain_pause()
>> >> when registering servers) I'm not convinced this extends to the
>> >> inner loop. Can you clarify please? (There are a couple more such
>> >> loops that I can't immediately see being protected by a lock.)
>> >
>> > Yes, I think you are right. If a cpu were to disappear then the list walk
>> > would be compromised. It should either be locked or rcu in all places.
>> 
>> I don't think we need to be concerned of vCPU-s disappearing,
>> since that doesn't happen during the lifetime of a VM. And the
>> hvm_do_resume() path is used only for domains still alive. Of
>> course, if any of the lockless loops sit on paths reachable after
>> a domain got marked dying, that would need fixing.
>> 
>> What I'm more concerned about are list manipulations behind
>> the back of a list traversing CPU. Or do those happen only upon
>> vCPU creation/destruction?
> 
> Theoretically we could do vcpu hot remove, couldn't we? That's the case I 
> was thinking of.

But that only removes the vCPU from what the guest sees. The
hypervisor never de-allocates a vCPU prior to domain death.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling
  2015-08-11 16:03         ` Jan Beulich
@ 2015-08-11 16:17           ` Paul Durrant
  0 siblings, 0 replies; 9+ messages in thread
From: Paul Durrant @ 2015-08-11 16:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Keir (Xen.org), xen-devel@lists.xenproject.org

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 11 August 2015 17:03
> To: Paul Durrant
> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> Subject: RE: [PATCH] x86/hvm: don't rely on shared ioreq state for
> completion handling
> 
> >>> On 11.08.15 at 17:49, <Paul.Durrant@citrix.com> wrote:
> >>  -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: 11 August 2015 16:46
> >> To: Paul Durrant
> >> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> >> Subject: RE: [PATCH] x86/hvm: don't rely on shared ioreq state for
> >> completion handling
> >>
> >> >>> On 11.08.15 at 17:32, <Paul.Durrant@citrix.com> wrote:
> >> >>  -----Original Message-----
> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> Sent: 11 August 2015 16:20
> >> >> To: Paul Durrant
> >> >> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> >> >> Subject: Re: [PATCH] x86/hvm: don't rely on shared ioreq state for
> >> >> completion handling
> >> >>
> >> >> >>> On 31.07.15 at 17:34, <paul.durrant@citrix.com> wrote:
> >> >> > Both hvm_io_pending() and hvm_wait_for_io() use the shared (with
> >> >> emulator)
> >> >> > ioreq structure to determined whether there is a pending I/O. The
> latter
> >> >> > will
> >> >> > misbehave if the shared state is driven to STATE_IOREQ_NONE by
> the
> >> >> emulator,
> >> >> > or when the shared ioreq page is cleared for re-insertion into the
> guest
> >> >> > P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0)
> >> because
> >> >> it
> >> >> > will terminate its wait without calling hvm_io_assist() to adjust Xen's
> >> >> > internal I/O emulation state. This may then lead to an io completion
> >> >> > handler finding incorrect internal emulation state and calling
> >> >> > domain_crash().
> >> >> >
> >> >> > This patch fixes the problem by adding a pending flag to the ioreq
> >> server's
> >> >> > per-vcpu structure which cannot be directly manipulated by the
> >> emulator
> >> >> > and thus can be used to determine whether an I/O is actually
> pending
> >> for
> >> >> > that vcpu on that ioreq server. If an I/O is pending and the shared
> state
> >> >> > is seen to go to STATE_IOREQ_NONE then it can be treated as an
> >> abnormal
> >> >> > completion of emulation (hence the data placed in the shared
> structure
> >> >> > is not used) and the internal state is adjusted as for a normal
> >> completion.
> >> >> > Thus, when a completion handler subsequently runs, the internal
> state
> >> is as
> >> >> > expected and domain_crash() will not be called.
> >> >> >
> >> >> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> >> >> > Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
> >> >> > Tested-by: Roger Pau Monné <roger.pau@citrix.com>
> >> >>
> >> >> I realize this went in already, but ...
> >> >>
> >> >> > --- a/xen/arch/x86/hvm/hvm.c
> >> >> > +++ b/xen/arch/x86/hvm/hvm.c
> >> >> > @@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
> >> >> >                            &d->arch.hvm_domain.ioreq_server.list,
> >> >> >                            list_entry )
> >> >> >      {
> >> >> > -        ioreq_t *p = get_ioreq(s, v);
> >> >> > +        struct hvm_ioreq_vcpu *sv;
> >> >> >
> >> >> > -        if ( p->state != STATE_IOREQ_NONE )
> >> >> > -            return 1;
> >> >> > +        list_for_each_entry ( sv,
> >> >> > +                              &s->ioreq_vcpu_list,
> >> >> > +                              list_entry )
> >> >> > +        {
> >> >> > +            if ( sv->vcpu == v && sv->pending )
> >> >> > +                return 1;
> >> >> > +        }
> >> >>
> >> >> ... while from the review of the original series I recall that doing the
> >> >> outer loop without any lock is fine (due to using domain_pause()
> >> >> when registering servers) I'm not convinced this extends to the
> >> >> inner loop. Can you clarify please? (There are a couple more such
> >> >> loops that I can't immediately see being protected by a lock.)
> >> >
> >> > Yes, I think you are right. If a cpu were to disappear then the list walk
> >> > would be compromised. It should either be locked or rcu in all places.
> >>
> >> I don't think we need to be concerned of vCPU-s disappearing,
> >> since that doesn't happen during the lifetime of a VM. And the
> >> hvm_do_resume() path is used only for domains still alive. Of
> >> course, if any of the lockless loops sit on paths reachable after
> >> a domain got marked dying, that would need fixing.
> >>
> >> What I'm more concerned about are list manipulations behind
> >> the back of a list traversing CPU. Or do those happen only upon
> >> vCPU creation/destruction?
> >
> > Theoretically we could do vcpu hot remove, couldn't we? That's the case I
> > was thinking of.
> 
> But that only removes the vCPU from what the guest sees. The
> hypervisor never de-allocates a vCPU prior to domain death.
> 

Oh, in that case I think the loops are safe.

  Paul

> Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-08-11 16:17 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-31 15:34 [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling Paul Durrant
2015-07-31 15:53 ` Andrew Cooper
2015-08-03  9:48   ` Ian Campbell
2015-08-11 15:19 ` Jan Beulich
2015-08-11 15:32   ` Paul Durrant
2015-08-11 15:46     ` Jan Beulich
2015-08-11 15:49       ` Paul Durrant
2015-08-11 16:03         ` Jan Beulich
2015-08-11 16:17           ` Paul Durrant

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.