[PATCH 1/2] enable event channel wake-up for mem

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] enable event channel wake-up for mem_event interfaces
@ 2011-09-28 21:24 Adin Scannell
  2011-10-06 11:07 ` Tim Deegan
  0 siblings, 1 reply; 5+ messages in thread
From: Adin Scannell @ 2011-09-28 21:24 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: memevent-dont-lose-events.patch --]
[-- Type: application/octet-stream, Size: 13745 bytes --]

The memevent code currently has a mechanism for reserving space in the ring
before putting an event, but each caller must individually ensure that the
vCPUs are correctly paused if no space is available.

This fixes that issue by reversing the semantics, we ensure that ensure space
is always left in the ring for one event per vCPU per ring.  If this constraint
is above to violated by a vCPU putting a response, we pause the vCPU on the way
out.

Additionally, we ensure that no events on lost by Xen as a consumer if multiple
responses land on the ring for a single domctl. (This is currently impossible,
but safe anyways... this patch is required by the next in the series).

Signed-off-by: Adin Scannell <adin@scannell.ca>

diff -r a422e2a4451e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4017,7 +4017,6 @@ static int hvm_memory_event_traps(long p
     struct vcpu* v = current;
     struct domain *d = v->domain;
     mem_event_request_t req;
-    int rc;
 
     if ( !(p & HVMPME_MODE_MASK) ) 
         return 0;
@@ -4025,10 +4024,6 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_access);
-    if ( rc )
-        return rc;
-    
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
     req.reason = reason;
diff -r a422e2a4451e xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -78,6 +78,9 @@ static int mem_event_enable(struct domai
     med->ring_page = map_domain_page(mfn_x(ring_mfn));
     med->shared_page = map_domain_page(mfn_x(shared_mfn));
 
+    /* Set the number of currently blocked vCPUs to 0. */
+    med->blocked = 0;
+
     /* Allocate event channel */
     rc = alloc_unbound_xen_event_channel(d->vcpu[0],
                                          current->domain->domain_id);
@@ -95,7 +98,7 @@ static int mem_event_enable(struct domai
     mem_event_ring_lock_init(med);
 
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, med);
 
     return 0;
 
@@ -120,13 +123,41 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static inline int mem_event_ring_free(struct domain *d, struct mem_event_domain *med)
+{
+    int free_requests;
+
+    free_requests = RING_FREE_REQUESTS(&med->front_ring);
+    if ( unlikely(free_requests < d->max_vcpus) )
+    {
+        /* This may happen. */
+        gdprintk(XENLOG_INFO, "mem_event request slots for domain %d: %d\n",
+                               d->domain_id, free_requests);
+        WARN_ON(1);
+    }
+
+    return free_requests;
+}
+
+int mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX req_prod;
 
+    if( mem_event_check_ring(d, med) < 0 )
+        return 0;
+
     mem_event_ring_lock(med);
 
+    if( mem_event_ring_free(d, med) == 0 )
+    {
+        /* This shouldn't happen. */
+        gdprintk(XENLOG_WARNING, "possible lost mem_event for domain %d\n",
+                                 d->domain_id);
+        mem_event_mark_and_pause(current, med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -135,16 +166,28 @@ void mem_event_put_request(struct domain
     req_prod++;
 
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
+    /*
+     * We ensure that each vcpu can put at least *one* event -- because some
+     * events are not repeatable, such as dropping a page.  This will ensure no
+     * vCPU is left with an event that they must place on the ring, but cannot.
+     * They will be paused after the event is placed.
+     * See large comment below in mem_event_unpause_vcpus().
+     */
+    if( current->domain->domain_id == d->domain_id &&
+        mem_event_ring_free(d, med) < d->max_vcpus )
+        mem_event_mark_and_pause(current, med);
+
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
 }
 
-void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, mem_event_response_t *rsp)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX rsp_cons;
@@ -154,6 +197,12 @@ void mem_event_get_response(struct mem_e
     front_ring = &med->front_ring;
     rsp_cons = front_ring->rsp_cons;
 
+    if( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
+    {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     /* Copy response */
     memcpy(rsp, RING_GET_RESPONSE(front_ring, rsp_cons), sizeof(*rsp));
     rsp_cons++;
@@ -163,54 +212,65 @@ void mem_event_get_response(struct mem_e
     front_ring->sring->rsp_event = rsp_cons + 1;
 
     mem_event_ring_unlock(med);
+
+    return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
+    int free;
+    int online = d->max_vcpus;
 
+    if( !med->blocked )
+        return;
+
+    mem_event_ring_lock(med);
+    free = mem_event_ring_free(d, med);
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit). 
+     * See large comment above in mem_event_put_request().
+     */
     for_each_vcpu ( d, v )
+        if ( test_bit(_VPF_mem_event, &v->pause_flags) )
+            online--;
+ 
+    for_each_vcpu ( d, v )
+    {
+        if ( !(med->blocked) || online >= mem_event_ring_free(d, med) )
+            break;
         if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        {
             vcpu_wake(v);
+            online++;
+            med->blocked--;
+        }
+    }
+
+    mem_event_ring_unlock(med);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
-    vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
-{
-    mem_event_ring_lock(med);
-    med->req_producers--;
-    mem_event_ring_unlock(med);
+    if ( !test_bit(_VPF_mem_event, &v->pause_flags) )
+    {
+        set_bit(_VPF_mem_event, &v->pause_flags);
+        vcpu_sleep_nosync(v);
+        med->blocked++;
+    }
 }
 
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
-    int ring_full = 1;
-
     if ( !med->ring_page )
         return -1;
 
-    mem_event_ring_lock(med);
-
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
-    {
-        med->req_producers++;
-        ring_full = 0;
-    }
-
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
-    mem_event_ring_unlock(med);
-
-    return ring_full;
+    return 0;
 }
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
diff -r a422e2a4451e xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -300,12 +300,16 @@ int mem_sharing_sharing_resume(struct do
 {
     mem_event_response_t rsp;
 
-    /* Get request off the ring */
-    mem_event_get_response(&d->mem_share, &rsp);
+    /* Get all requests off the ring */
+    while( mem_event_get_response(d, &d->mem_share, &rsp) )
+    {
+        /* Unpause domain/vcpu */
+        if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    }
 
-    /* Unpause domain/vcpu */
-    if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    /* Unpause any domains that were paused because the ring was full */
+    mem_event_unpause_vcpus(d, &d->mem_paging);
 
     return 0;
 }
diff -r a422e2a4451e xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -803,7 +803,6 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_paging);
         return;
     }
 
@@ -841,26 +840,27 @@ void p2m_mem_paging_resume(struct domain
     p2m_type_t p2mt;
     mfn_t mfn;
 
-    /* Pull the response off the ring */
-    mem_event_get_response(&d->mem_paging, &rsp);
+    /* Pull all responses off the ring */
+    while( mem_event_get_response(d, &d->mem_paging, &rsp) )
+    {
+        /* Fix p2m entry if the page was not dropped */
+        if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
+        {
+            mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
+            p2m_lock(p2m);
+            set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
+            set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+            audit_p2m(p2m, 1);
+            p2m_unlock(p2m);
+        }
 
-    /* Fix p2m entry if the page was not dropped */
-    if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
-    {
-        mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
-        p2m_lock(p2m);
-        set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
-        set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
-        audit_p2m(p2m, 1);
-        p2m_unlock(p2m);
+        /* Unpause domain */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
-
     /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, &d->mem_paging);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -899,7 +899,7 @@ void p2m_mem_access_check(unsigned long 
                    "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_pause(v, &d->mem_access);
         }
         else
         {
@@ -911,8 +911,6 @@ void p2m_mem_access_check(unsigned long 
 
         return;
     }
-    else if ( res > 0 )
-        return;  /* No space in buffer; VCPU paused */
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
@@ -943,15 +941,17 @@ void p2m_mem_access_resume(struct p2m_do
     struct domain *d = p2m->domain;
     mem_event_response_t rsp;
 
-    mem_event_get_response(&d->mem_access, &rsp);
-
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    /* Pull all responses off the ring */
+    while( mem_event_get_response(d, &d->mem_access, &rsp) )
+    {
+        /* Unpause domain */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    }
 
     /* Unpause any domains that were paused because the ring was full or no listener 
      * was available */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, &d->mem_access);
 }
 
 
diff -r a422e2a4451e xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -25,12 +25,18 @@
 #define __MEM_EVENT_H__
 
 /* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med);
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
-void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med);
+
+/* Returns 0 if ring is full/empty and 1 upon success.
+ * In either case, the vCPU may be blocked afterwards to ensure that the
+ * ring does not lose events.  In general, put_request should not fail, as
+ * it attempts to ensure that each vCPU can put at least one request. */
+int mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                            mem_event_request_t *req);
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med,
+                            mem_event_response_t *rsp);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r a422e2a4451e xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -183,13 +183,14 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
     void *ring_page;
     /* front-end ring */
     mem_event_front_ring_t front_ring;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
     /* event channel port (vcpu0 only) */
     int xen_port;
 };

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] enable event channel wake-up for mem_event interfaces
  2011-09-28 21:24 [PATCH 1/2] enable event channel wake-up for mem_event interfaces Adin Scannell
@ 2011-10-06 11:07 ` Tim Deegan
  2011-10-13 17:18   ` Adin Scannell
  2011-10-27 14:22   ` Olaf Hering
  0 siblings, 2 replies; 5+ messages in thread
From: Tim Deegan @ 2011-10-06 11:07 UTC (permalink / raw)
  To: Adin Scannell; +Cc: xen-devel

Hi, 

The general approach seems good.  A few comments on the detail, below:

At 17:24 -0400 on 28 Sep (1317230698), Adin Scannell wrote:
> -void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
> +static inline int mem_event_ring_free(struct domain *d, struct mem_event_domain *med)
> +{
> +    int free_requests;
> +
> +    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> +    if ( unlikely(free_requests < d->max_vcpus) )
> +    {
> +        /* This may happen. */
> +        gdprintk(XENLOG_INFO, "mem_event request slots for domain %d: %d\n",
> +                               d->domain_id, free_requests);
> +        WARN_ON(1);

If this is something that might happen on production systems (and is
basically benign except for the performance), we shouldn't print a full
WARN().  The printk is more than enough.

> +    }
> +
> +    return free_requests;
> +}
> +
> +int mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
>  {
>      mem_event_front_ring_t *front_ring;
>      RING_IDX req_prod;
>  
> +    if( mem_event_check_ring(d, med) < 0 )

Xen coding style has another space between the 'if' and the '(' (here
and elsewhere).

> @@ -135,16 +166,28 @@ void mem_event_put_request(struct domain
>      req_prod++;
>  
>      /* Update ring */
> -    med->req_producers--;
>      front_ring->req_prod_pvt = req_prod;
>      RING_PUSH_REQUESTS(front_ring);
>  
> +    /*
> +     * We ensure that each vcpu can put at least *one* event -- because some
> +     * events are not repeatable, such as dropping a page.  This will ensure no
> +     * vCPU is left with an event that they must place on the ring, but cannot.
> +     * They will be paused after the event is placed.
> +     * See large comment below in mem_event_unpause_vcpus().
> +     */
> +    if( current->domain->domain_id == d->domain_id &&
> +        mem_event_ring_free(d, med) < d->max_vcpus )
> +        mem_event_mark_and_pause(current, med);
> +

This idiom of comparing domain-ids cropped up in the earlier mem-event
patches and seems to be spreading, but the right check is just
(current->domain == d).

Also: are there cases where current->domain != d?  If so, can't those cases
cause the ring to fill up?
  
> -void mem_event_unpause_vcpus(struct domain *d)
> +void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med)
>  {
>      struct vcpu *v;
> +    int free;
> +    int online = d->max_vcpus;
>  
5A> +    if( !med->blocked )
> +        return;
> +
> +    mem_event_ring_lock(med);
> +    free = mem_event_ring_free(d, med);
> +
> +    /*
> +     * We ensure that we only have vCPUs online if there are enough free slots
> +     * for their memory events to be processed.  This will ensure that no
> +     * memory events are lost (due to the fact that certain types of events
> +     * cannot be replayed, we need to ensure that there is space in the ring
> +     * for when they are hit). 
> +     * See large comment above in mem_event_put_request().
> +     */
>      for_each_vcpu ( d, v )
> +        if ( test_bit(_VPF_mem_event, &v->pause_flags) )
> +            online--;
> + 
> +    for_each_vcpu ( d, v )
> +    {
> +        if ( !(med->blocked) || online >= mem_event_ring_free(d, med) )
> +            break;

Is there a risk that under heavy mem-event loads vcpu 0 might starve
other vcpus entirely because they're never allowed to unpause here?

>          if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> +        {
>              vcpu_wake(v);
> +            online++;
> +            med->blocked--;
> +        }
> +    }
> +
> +    mem_event_ring_unlock(med);
>  }
>  
> -void mem_event_mark_and_pause(struct vcpu *v)
> +void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
>  {
> -    set_bit(_VPF_mem_event, &v->pause_flags);
> -    vcpu_sleep_nosync(v);
> -}
> -
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> -{
> -    mem_event_ring_lock(med);
> -    med->req_producers--;
> -    mem_event_ring_unlock(med);
> +    if ( !test_bit(_VPF_mem_event, &v->pause_flags) )
> +    {
> +        set_bit(_VPF_mem_event, &v->pause_flags);

Does this need to be an atomic test-and-set?

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] enable event channel wake-up for mem_event interfaces
  2011-10-06 11:07 ` Tim Deegan
@ 2011-10-13 17:18   ` Adin Scannell
  2011-10-20  9:52     ` Tim Deegan
  2011-10-27 14:22   ` Olaf Hering
  1 sibling, 1 reply; 5+ messages in thread
From: Adin Scannell @ 2011-10-13 17:18 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

I'll rework the patches incorporating the feedback and resend the
modified patches.

I've got a couple of quick notes, inline below.

>> @@ -135,16 +166,28 @@ void mem_event_put_request(struct domain
>> +    /*
>> +     * We ensure that each vcpu can put at least *one* event -- because some
>> +     * events are not repeatable, such as dropping a page.  This will ensure no
>> +     * vCPU is left with an event that they must place on the ring, but cannot.
>> +     * They will be paused after the event is placed.
>> +     * See large comment below in mem_event_unpause_vcpus().
>> +     */
>> +    if( current->domain->domain_id == d->domain_id &&
>> +        mem_event_ring_free(d, med) < d->max_vcpus )
>> +        mem_event_mark_and_pause(current, med);
>> +
>
> This idiom of comparing domain-ids cropped up in the earlier mem-event
> patches and seems to be spreading, but the right check is just
> (current->domain == d).
>
> Also: are there cases where current->domain != d?  If so, can't those cases
> cause the ring to fill up?

Yes, I believe there are there are cases where a different domain
(i.e. the domain w/ the device model) can map a page generating an
event (mapping a paged-out page, writeable mappings of pages with
non-writable access bits, etc.).  Unfortunately, we can't pause any
vcpu in those cases.

This is something that I intend to revisit, as guaranteeing that
absolutely no events are lost may be quite complicated (especially
when there are certain events which are not repeatable). I'm
considering the use of the new wait queues or other mechanisms to wait
for the ring to clear up while in the same context.... but that is
another sort of tricky.

I'm quite glad you pointed this out, because I believe the
mem_event_mark_and_pause following the 'possible lost mem_event for
domain' is incorrect.  If this is a different domain generating the
event, this line is very wrong.

>> +    for_each_vcpu ( d, v )
>> +    {
>> +        if ( !(med->blocked) || online >= mem_event_ring_free(d, med) )
>> +            break;
>
> Is there a risk that under heavy mem-event loads vcpu 0 might starve
> other vcpus entirely because they're never allowed to unpause here?

Unfortunately, yes.  With heavy mem event load (& a dom0 that isn't
taking them off the ring fast enough).  I think there are two fair
alternatives:
1) Unpausing a vcpu at random.
2) Waiting until the ring reaches a watermark and unpausing all vCPUs.

Any thoughts on these?

>> +    if ( !test_bit(_VPF_mem_event, &v->pause_flags) )
>> +    {
>> +        set_bit(_VPF_mem_event, &v->pause_flags);
>
> Does this need to be an atomic test-and-set?

I believe that it is currently always called within a locked
(mem_event_ring_lock) context, but it should be atomic test-and-set
anyways in case it's not.

Cheers!
-Adin

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] enable event channel wake-up for mem_event interfaces
  2011-10-13 17:18   ` Adin Scannell
@ 2011-10-20  9:52     ` Tim Deegan
  0 siblings, 0 replies; 5+ messages in thread
From: Tim Deegan @ 2011-10-20  9:52 UTC (permalink / raw)
  To: Adin Scannell; +Cc: xen-devel

At 13:18 -0400 on 13 Oct (1318511910), Adin Scannell wrote:
> I'll rework the patches incorporating the feedback and resend the
> modified patches.
> 
> I've got a couple of quick notes, inline below.
> 
> >> @@ -135,16 +166,28 @@ void mem_event_put_request(struct domain
> >> +    /*
> >> +     * We ensure that each vcpu can put at least *one* event -- because some
> >> +     * events are not repeatable, such as dropping a page.  This will ensure no
> >> +     * vCPU is left with an event that they must place on the ring, but cannot.
> >> +     * They will be paused after the event is placed.
> >> +     * See large comment below in mem_event_unpause_vcpus().
> >> +     */
> >> +    if( current->domain->domain_id == d->domain_id &&
> >> +        mem_event_ring_free(d, med) < d->max_vcpus )
> >> +        mem_event_mark_and_pause(current, med);
> >> +
> >
> > This idiom of comparing domain-ids cropped up in the earlier mem-event
> > patches and seems to be spreading, but the right check is just
> > (current->domain == d).
> >
> > Also: are there cases where current->domain != d?  If so, can't those cases
> > cause the ring to fill up?
> 
> Yes, I believe there are there are cases where a different domain
> (i.e. the domain w/ the device model) can map a page generating an
> event (mapping a paged-out page, writeable mappings of pages with
> non-writable access bits, etc.).  Unfortunately, we can't pause any
> vcpu in those cases.

True.

> This is something that I intend to revisit, as guaranteeing that
> absolutely no events are lost may be quite complicated (especially
> when there are certain events which are not repeatable). I'm
> considering the use of the new wait queues or other mechanisms to wait
> for the ring to clear up while in the same context.... but that is
> another sort of tricky.

Yep.  You could do what the timer code does in this situation and have a
linked list of events that didnt make it on to the queue (to be tidied
up later when there's space) but I can see that being messy too, and 
in extremis you might not be able to xmalloc another entry...

> > Is there a risk that under heavy mem-event loads vcpu 0 might starve
> > other vcpus entirely because they're never allowed to unpause here?
> 
> Unfortunately, yes.  With heavy mem event load (& a dom0 that isn't
> taking them off the ring fast enough).  I think there are two fair
> alternatives:
> 1) Unpausing a vcpu at random.
> 2) Waiting until the ring reaches a watermark and unpausing all vCPUs.
> 
> Any thoughts on these?

3) always start the scan from a different place (either a round-robin or
from the last-serviced-vcpu + 1)?

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] enable event channel wake-up for mem_event interfaces
  2011-10-06 11:07 ` Tim Deegan
  2011-10-13 17:18   ` Adin Scannell
@ 2011-10-27 14:22   ` Olaf Hering
  1 sibling, 0 replies; 5+ messages in thread
From: Olaf Hering @ 2011-10-27 14:22 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel, Adin Scannell

On Thu, Oct 06, Tim Deegan wrote:

> At 17:24 -0400 on 28 Sep (1317230698), Adin Scannell wrote:
> > -void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
> > +static inline int mem_event_ring_free(struct domain *d, struct mem_event_domain *med)
> > +{
> > +    int free_requests;
> > +
> > +    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> > +    if ( unlikely(free_requests < d->max_vcpus) )
> > +    {
> > +        /* This may happen. */
> > +        gdprintk(XENLOG_INFO, "mem_event request slots for domain %d: %d\n",
> > +                               d->domain_id, free_requests);
> > +        WARN_ON(1);
> 
> If this is something that might happen on production systems (and is
> basically benign except for the performance), we shouldn't print a full
> WARN().  The printk is more than enough.

While I havent reviewed the whole patch (sorry for that), one thing that
will break is p2m_mem_paging_populate() called from dom0.

If the ring is full, the gfn state was eventually forwarded from
paging-out state to paging-in state. But since the ring was full, no
request was sent to xenpaging which means the gfn remains in
p2m_ram_paging_in_start until the guest eventually tries to access the
gfn as well. Dom0 will call p2m_mem_paging_populate() again and again (I
think), but there will be no attempt to send a new request once the ring
has free slots again, because the gfn is already in the page-in path and
the vcpu does not belong to the guest.

I have some wild ideas how to handle this situation, but the patch as is
will break page-in attempts from xenpaging itself.

Olaf

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-10-27 14:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-28 21:24 [PATCH 1/2] enable event channel wake-up for mem_event interfaces Adin Scannell
2011-10-06 11:07 ` Tim Deegan
2011-10-13 17:18   ` Adin Scannell
2011-10-20  9:52     ` Tim Deegan
2011-10-27 14:22   ` Olaf Hering

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.