[PATCH] mem_event: use wait queue when ring is full

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] mem_event: use wait queue when ring is full
@ 2011-12-05 11:19 Olaf Hering
  2011-12-05 11:33 ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2011-12-05 11:19 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1323083831 -3600
# Node ID cd163bcd0f066e52ee74e42090188d632f0086bf
# Parent  a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.


v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -39,6 +40,7 @@
 
 static int mem_event_enable(struct domain *d,
                             xen_domctl_mem_event_op_t *mec,
+                            int bit,
                             struct mem_event_domain *med)
 {
     int rc;
@@ -107,8 +109,12 @@ static int mem_event_enable(struct domai
 
     mem_event_ring_lock_init(med);
 
+    med->bit = bit;
+
+    init_waitqueue_head(&med->wq);
+
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -124,6 +130,9 @@ static int mem_event_enable(struct domai
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req;
     RING_IDX req_prod;
 
     mem_event_ring_lock(med);
 
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -147,14 +167,35 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    /* Update accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
@@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e
     mem_event_ring_unlock(med);
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+
+    mem_event_ring_unlock(med);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to 
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->bit, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->bit, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d ) {
+        med->target_producers++;
+        ring_full = 0;
+    } else if ( med->foreign_producers + med->target_producers + 1 < free_req )
     {
-        med->req_producers++;
+        med->foreign_producers++;
         ring_full = 0;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
     mem_event_ring_unlock(med);
 
     return ring_full;
@@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
         }
         break;
 
@@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
         }
         break;
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d) lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -653,7 +643,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0); 
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
@@ -661,6 +651,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long 
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long 
                    "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full or no listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,8 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    unsigned short foreign_producers;
+    unsigned short target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +194,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int bit;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +621,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-05 11:19 Olaf Hering
@ 2011-12-05 11:33 ` Olaf Hering
  0 siblings, 0 replies; 23+ messages in thread
From: Olaf Hering @ 2011-12-05 11:33 UTC (permalink / raw)
  To: xen-devel

On Mon, Dec 05, Olaf Hering wrote:

> Wakeup will take the number of free slots into account.

This is not entirely tree unless this additional change is applied:

diff -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -235,6 +235,7 @@ void mem_event_wake_requesters(struct me
     mem_event_ring_lock(med);
 
     free_req = RING_FREE_REQUESTS(&med->front_ring);
+    free_req -= med->foreign_producers;
     if ( free_req )
         wake_up_nr(&med->wq, free_req);
 

And perhaps the wake_up should be done outside the ring lock?

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
       [not found] <mailman.3332.1323083995.12970.xen-devel@lists.xensource.com>
@ 2011-12-05 15:45 ` Andres Lagar-Cavilla
  2011-12-05 16:20   ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2011-12-05 15:45 UTC (permalink / raw)
  To: xen-devel; +Cc: olaf

> Date: Mon, 05 Dec 2011 12:19:03 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: xen-devel@lists.xensource.com
> Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is
> 	full
> Message-ID: <cd163bcd0f066e52ee74.1323083943@probook.site>
> Content-Type: text/plain; charset="us-ascii"
>
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1323083831 -3600
> # Node ID cd163bcd0f066e52ee74e42090188d632f0086bf
> # Parent  a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
> mem_event: use wait queue when ring is full
>
> This change is based on an idea/patch from Adin Scannell.
>
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
>
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
>
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
>
>
> v5:
>   - rename mem_event_check_ring() to mem_event_claim_slot()
>   - rename mem_event_put_req_producers() to mem_event_release_slot()
>   - add local/foreign request accounting
>   - keep room for at least one guest request
>
> v4:
>  - fix off-by-one bug in _mem_event_put_request
>  - add mem_event_wake_requesters() and use wake_up_nr()
>  - rename mem_event_mark_and_pause() and mem_event_mark_and_pause()
> functions
>  - req_producers counts foreign request producers, rename member
>
> v3:
>  - rename ->mem_event_bit to ->bit
>  - remove me_ from new VPF_ defines
>
> v2:
>  - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise
> the
>    vcpu will not wake_up after a wait_event because the pause_count was
>    increased twice. Fixes guest hangs.
>  - update free space check in _mem_event_put_request()
>  - simplify mem_event_put_request()
>
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p
>      if ( (p & HVMPME_onchangeonly) && (value == old) )
>          return 1;
>
> -    rc = mem_event_check_ring(d, &d->mem_event->access);
> -    if ( rc )
> +    rc = mem_event_claim_slot(d, &d->mem_event->access);
> +    if ( rc < 0 )
>          return rc;
>
>      memset(&req, 0, sizeof(req));
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -23,6 +23,7 @@
>
>  #include <asm/domain.h>
>  #include <xen/event.h>
> +#include <xen/wait.h>
>  #include <asm/p2m.h>
>  #include <asm/mem_event.h>
>  #include <asm/mem_paging.h>
> @@ -39,6 +40,7 @@
>
>  static int mem_event_enable(struct domain *d,
>                              xen_domctl_mem_event_op_t *mec,
> +                            int bit,
>                              struct mem_event_domain *med)
>  {
>      int rc;
> @@ -107,8 +109,12 @@ static int mem_event_enable(struct domai
>
>      mem_event_ring_lock_init(med);
>
> +    med->bit = bit;
I think it's been asked before for this to have a more expressive name.

> +
> +    init_waitqueue_head(&med->wq);
> +
>      /* Wake any VCPUs paused for memory events */
> -    mem_event_unpause_vcpus(d);
> +    mem_event_wake_waiters(d, med);
>
>      return 0;
>
> @@ -124,6 +130,9 @@ static int mem_event_enable(struct domai
>
>  static int mem_event_disable(struct mem_event_domain *med)
>  {
> +    if (!list_empty(&med->wq.list))
> +        return -EBUSY;
> +
What does the caller do with EBUSY? Retry?

>      unmap_domain_page(med->ring_page);
>      med->ring_page = NULL;
>
> @@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_
>      return 0;
>  }
>
> -void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req)
> +static int _mem_event_put_request(struct domain *d,
> +                                  struct mem_event_domain *med,
> +                                  mem_event_request_t *req)
>  {
>      mem_event_front_ring_t *front_ring;
> +    int free_req, claimed_req;
>      RING_IDX req_prod;
>
>      mem_event_ring_lock(med);
>
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    /* Foreign requests must succeed because their vcpus can not sleep */
> +    claimed_req = med->foreign_producers;
> +    if ( !free_req || ( current->domain == d && free_req <= claimed_req )
> ) {
> +        mem_event_ring_unlock(med);
> +        return 0;
> +    }
> +
>      front_ring = &med->front_ring;
>      req_prod = front_ring->req_prod_pvt;
>
> @@ -147,14 +167,35 @@ void mem_event_put_request(struct domain
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    /* Update accounting */
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
> +
>      /* Update ring */
> -    med->req_producers--;
>      front_ring->req_prod_pvt = req_prod;
>      RING_PUSH_REQUESTS(front_ring);
>
>      mem_event_ring_unlock(med);
>
>      notify_via_xen_event_channel(d, med->xen_port);
> +
> +    return 1;
> +}
> +
> +void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
> +                           mem_event_request_t *req)
> +{
> +    /* Go to sleep if request came from guest */
> +    if (current->domain == d) {
> +        wait_event(med->wq, _mem_event_put_request(d, med, req));
> +        return;
> +    }
> +    /* Ring was full anyway, unable to sleep in non-guest context */
> +    if (!_mem_event_put_request(d, med, req))
> +        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
> +                req->type, req->flags, (unsigned long)req->gfn);
>  }
>
>  void mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp)
> @@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e
>      mem_event_ring_unlock(med);
>  }
>
> -void mem_event_unpause_vcpus(struct domain *d)
> +/**
> + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
> + * ring. Only as many as can place another request in the ring will
> + * resume execution.
> + */
> +void mem_event_wake_requesters(struct mem_event_domain *med)
> +{
> +    int free_req;
> +
> +    mem_event_ring_lock(med);
> +
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    if ( free_req )
> +        wake_up_nr(&med->wq, free_req);
> +
> +    mem_event_ring_unlock(med);
> +}
> +
> +/**
> + * mem_event_wake_waiters - Wake all vcpus waiting for the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
> + * become available.
> + */
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med)
>  {
>      struct vcpu *v;
>
>      for_each_vcpu ( d, v )
> -        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> +        if ( test_and_clear_bit(med->bit, &v->pause_flags) )
>              vcpu_wake(v);
>  }
>
> -void mem_event_mark_and_pause(struct vcpu *v)
> +/**
> + * mem_event_mark_and_sleep - Put vcpu to sleep
> + * @v: guest vcpu
> + * @med: mem_event ring
> + *
> + * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
> + * The vcpu will resume execution in mem_event_wake_waiters().
> + */
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med)
>  {
> -    set_bit(_VPF_mem_event, &v->pause_flags);
> +    set_bit(med->bit, &v->pause_flags);
>      vcpu_sleep_nosync(v);
>  }
>
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> +/**
> + * mem_event_release_slot - Release a claimed slot
> + * @med: mem_event ring
> + *
> + * mem_event_release_slot() releases a claimed slot in the mem_event
> ring.
> + */
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med)
>  {
>      mem_event_ring_lock(med);
> -    med->req_producers--;
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
>      mem_event_ring_unlock(med);
>  }
>
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
> +/**
> + * mem_event_claim_slot - Check state of a mem_event ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * Return codes: < 0: the ring is not yet configured
> + *                 0: the ring has some room
> + *               > 0: the ring is full
> + *
> + * mem_event_claim_slot() checks the state of the given mem_event ring.
> + * If the current vcpu belongs to the guest domain, the function assumes
> that
> + * mem_event_put_request() will sleep until the ring has room again.
> + * A guest can always place at least one request.
> + *
> + * If the current vcpu does not belong to the target domain the caller
> must try
> + * again until there is room. A slot is claimed and the caller can place
> a
> + * request. If the caller does not need to send a request, the claimed
> slot has
> + * to be released with mem_event_release_slot().
> + */
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
>  {
> -    struct vcpu *curr = current;
> -    int free_requests;
> +    int free_req;
>      int ring_full = 1;
>
>      if ( !med->ring_page )
> @@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain *
>
>      mem_event_ring_lock(med);
>
> -    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> -    if ( med->req_producers < free_requests )
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +
> +    if ( current->domain == d ) {
> +        med->target_producers++;
> +        ring_full = 0;
> +    } else if ( med->foreign_producers + med->target_producers + 1 <
> free_req )
>      {
> -        med->req_producers++;
> +        med->foreign_producers++;
>          ring_full = 0;
>      }
>
> -    if ( ring_full && (curr->domain == d) )
> -        mem_event_mark_and_pause(curr);
> -
>      mem_event_ring_unlock(med);
>
>      return ring_full;
> @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( p2m->pod.entry_count )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med);
> +            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
>          }
>          break;
>
> @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med);
> +            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
Ok, the idea of bit is that different vcpus will sleep with different
pause flags, depending on the ring they're sleeping on. But this is only
used in wake_waiters, which is not used by all rings. In fact, why do we
need wake_waiters with wait queues?

>          }
>          break;
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
>  #endif
>
>
> -static struct page_info* mem_sharing_alloc_page(struct domain *d,
> -                                                unsigned long gfn)
> +static void mem_sharing_notify_helper(struct domain *d, unsigned long
> gfn)
>  {
> -    struct page_info* page;
>      struct vcpu *v = current;
> -    mem_event_request_t req;
> -
> -    page = alloc_domheap_page(d, 0);
> -    if(page != NULL) return page;
> -
> -    memset(&req, 0, sizeof(req));
> -    req.type = MEM_EVENT_TYPE_SHARED;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
>
>      if ( v->domain != d )
>      {
> @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
>          gdprintk(XENLOG_ERR,
>                   "Failed alloc on unshare path for foreign (%d)
> lookup\n",
>                   d->domain_id);
> -        return page;
> +        return;
>      }
>
> -    vcpu_pause_nosync(v);
> -    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
> +    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
> +        return;
>
> -    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
> -
> +    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
>      req.gfn = gfn;
>      req.p2mt = p2m_ram_shared;
>      req.vcpu_id = v->vcpu_id;
>      mem_event_put_request(d, &d->mem_event->share, &req);
> -
> -    return page;
> +    vcpu_pause_nosync(v);
>  }
>
>  unsigned int mem_sharing_get_nr_saved_mfns(void)
> @@ -653,7 +643,7 @@ gfn_found:
>      if(ret == 0) goto private_page_found;
>
>      old_page = page;
> -    page = mem_sharing_alloc_page(d, gfn);
> +    page = alloc_domheap_page(d, 0);
>      if(!page)
>      {
>          /* We've failed to obtain memory for private page. Need to re-add
> the
> @@ -661,6 +651,7 @@ gfn_found:
>          list_add(&gfn_info->list, &hash_entry->gfns);
>          put_gfn(d, gfn);
>          shr_unlock();
> +        mem_sharing_notify_helper(d, gfn);
This is nice. Do you think PoD could use this, should it ever run into a
ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a
sharing ring (which is bit rotted) we could have an ENOMEM ring with a
utility launched by xencommons listening. The problem, again, is what if
ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page)

>          return -ENOMEM;
>      }
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
>   */
>  void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
>  {
> -    struct vcpu *v = current;
> -    mem_event_request_t req;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn
> };
>
> -    /* Check that there's space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
> -    {
> -        /* Send release notification to pager */
> -        memset(&req, 0, sizeof(req));
> -        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
> -        req.gfn = gfn;
> -        req.vcpu_id = v->vcpu_id;
> +    /* Send release notification to pager */
> +    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
>
> -        mem_event_put_request(d, &d->mem_event->paging, &req);
> -    }
> +    mem_event_put_request(d, &d->mem_event->paging, &req);
>  }
>
>  /**
> @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>      /* Check that there's space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) )
> +    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
>          return;
>
>      memset(&req, 0, sizeof(req));
> @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
>      else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
>      {
>          /* gfn is already on its way back and vcpu is not paused */
> -        mem_event_put_req_producers(&d->mem_event->paging);
> +        mem_event_release_slot(d, &d->mem_event->paging);
>          return;
>      }
>
> @@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain
>      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
>          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>
> -    /* Unpause any domains that were paused because the ring was full */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->paging);
>  }
>
>  void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned
> long gla,
> @@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long
>      p2m_unlock(p2m);
>
>      /* Otherwise, check if there is a memory event listener, and send the
> message along */
> -    res = mem_event_check_ring(d, &d->mem_event->access);
> +    res = mem_event_claim_slot(d, &d->mem_event->access);
>      if ( res < 0 )
>      {
>          /* No listener */
> @@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long
>                     "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
>                     v->vcpu_id, d->domain_id);
>
> -            mem_event_mark_and_pause(v);
> +            mem_event_mark_and_sleep(v, &d->mem_event->access);
>          }
>          else
>          {
> @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
>      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
>          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>
> -    /* Unpause any domains that were paused because the ring was full or
> no listener
> -     * was available */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->access);
> +
> +    /* Unpause all vcpus that were paused because no listener was
> available */
> +    mem_event_wake_waiters(d, &d->mem_event->access);
Is this not used in p2m_mem_paging_resume? Why the difference? Why are two
mechanisms needed (wake_requesters, wake_sleepers)?

Thanks
Andres

>  }
>
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h
> +++ b/xen/include/asm-x86/mem_event.h
> @@ -24,13 +24,13 @@
>  #ifndef __MEM_EVENT_H__
>  #define __MEM_EVENT_H__
>
> -/* Pauses VCPU while marking pause flag for mem event */
> -void mem_event_mark_and_pause(struct vcpu *v);
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
> -void mem_event_put_req_producers(struct mem_event_domain *med);
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med);
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req);
>  void mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp);
> -void mem_event_unpause_vcpus(struct domain *d);
> +void mem_event_wake_requesters(struct mem_event_domain *med);
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med);
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med);
>
>  int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>                       XEN_GUEST_HANDLE(void) u_domctl);
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -14,6 +14,7 @@
>  #include <xen/nodemask.h>
>  #include <xen/radix-tree.h>
>  #include <xen/multicall.h>
> +#include <xen/wait.h>
>  #include <public/xen.h>
>  #include <public/domctl.h>
>  #include <public/sysctl.h>
> @@ -183,7 +184,8 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned int req_producers;
> +    unsigned short foreign_producers;
> +    unsigned short target_producers;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
> @@ -192,6 +194,10 @@ struct mem_event_domain
>      mem_event_front_ring_t front_ring;
>      /* event channel port (vcpu0 only) */
>      int xen_port;
> +    /* mem_event bit for vcpu->pause_flags */
> +    int bit;
> +    /* list of vcpus waiting for room in the ring */
> +    struct waitqueue_head wq;
>  };
>
>  struct mem_event_per_domain
> @@ -615,9 +621,12 @@ static inline struct domain *next_domain
>   /* VCPU affinity has changed: migrating to a new CPU. */
>  #define _VPF_migrating       3
>  #define VPF_migrating        (1UL<<_VPF_migrating)
> - /* VCPU is blocked on memory-event ring. */
> -#define _VPF_mem_event       4
> -#define VPF_mem_event        (1UL<<_VPF_mem_event)
> + /* VCPU is blocked due to missing mem_paging ring. */
> +#define _VPF_mem_paging      4
> +#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
> + /* VCPU is blocked due to missing mem_access ring. */
> +#define _VPF_mem_access      5
> +#define VPF_mem_access       (1UL<<_VPF_mem_access)
>
>  static inline int vcpu_runnable(struct vcpu *v)
>  {
>
>
>
> ------------------------------
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>
> End of Xen-devel Digest, Vol 82, Issue 66
> *****************************************
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-05 15:45 ` Andres Lagar-Cavilla
@ 2011-12-05 16:20   ` Olaf Hering
  2011-12-05 16:34     ` Andres Lagar-Cavilla
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2011-12-05 16:20 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: xen-devel

On Mon, Dec 05, Andres Lagar-Cavilla wrote:

> > +    med->bit = bit;
> I think it's been asked before for this to have a more expressive name.

I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.

> >  static int mem_event_disable(struct mem_event_domain *med)
> >  {
> > +    if (!list_empty(&med->wq.list))
> > +        return -EBUSY;
> > +
> What does the caller do with EBUSY? Retry?

Yes, and mail the devs at xen-devel that something isn't right ;-)

At least the pager uses this just in the exit path. I dont know about
access and sharing, wether these tools enable/disable more than once at
guest runtime.

> > @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
> >              if ( p2m->pod.entry_count )
> >                  break;
> >
> > -            rc = mem_event_enable(d, mec, med);
> > +            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
> >          }
> >          break;
> >
> > @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
> >              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
> >                  break;
> >
> > -            rc = mem_event_enable(d, mec, med);
> > +            rc = mem_event_enable(d, mec, _VPF_mem_access, med);

> Ok, the idea of bit is that different vcpus will sleep with different
> pause flags, depending on the ring they're sleeping on. But this is only
> used in wake_waiters, which is not used by all rings. In fact, why do we
> need wake_waiters with wait queues?

Before this patch, mem_event_unpause_vcpus() was used to resume waiters
for the ring itself and for room in the ring.
Now there is mem_event_wake_waiters(), which indicates the ring is
active, and there is mem_event_wake_requesters() which indicates the
ring has room to place guest requests.

I agree that only _VPF_mem_access is really needed, and _VPF_mem_paging
could be removed because paging without having a ring first is not
possible.


> > @@ -653,7 +643,7 @@ gfn_found:
> >      if(ret == 0) goto private_page_found;
> >
> >      old_page = page;
> > -    page = mem_sharing_alloc_page(d, gfn);
> > +    page = alloc_domheap_page(d, 0);
> >      if(!page)
> >      {
> >          /* We've failed to obtain memory for private page. Need to re-add
> > the
> > @@ -661,6 +651,7 @@ gfn_found:
> >          list_add(&gfn_info->list, &hash_entry->gfns);
> >          put_gfn(d, gfn);
> >          shr_unlock();
> > +        mem_sharing_notify_helper(d, gfn);
> This is nice. Do you think PoD could use this, should it ever run into a
> ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a
> sharing ring (which is bit rotted) we could have an ENOMEM ring with a
> utility launched by xencommons listening. The problem, again, is what if
> ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page)

I have no idea about mem_sharing. I just move the existing code outside
the lock so that mem_event_put_request() is (hopefully) called without
any locks from mem_sharing_get_nr_saved_mfns().
Since there is appearently no user of a sharing ring, this whole new
mem_sharing_notify_helper() is a big no-op.

> > @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
> >      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
> >          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
> >
> > -    /* Unpause any domains that were paused because the ring was full or
> > no listener
> > -     * was available */
> > -    mem_event_unpause_vcpus(d);
> > +    /* Wake vcpus waiting for room in the ring */
> > +    mem_event_wake_requesters(&d->mem_event->access);
> > +
> > +    /* Unpause all vcpus that were paused because no listener was
> > available */
> > +    mem_event_wake_waiters(d, &d->mem_event->access);
> Is this not used in p2m_mem_paging_resume? Why the difference? Why are two
> mechanisms needed (wake_requesters, wake_sleepers)?

As said above, wake_sleepers is for those who wait for the ring itself,
and wake_requesters is for room in the ring.
p2m_mem_paging_resume() always has a ring, so it does not need to call
wake_sleepers.


Do you have a suggestion for a better name?

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-05 16:20   ` Olaf Hering
@ 2011-12-05 16:34     ` Andres Lagar-Cavilla
  2011-12-07 13:20       ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2011-12-05 16:34 UTC (permalink / raw)
  To: Olaf Hering; +Cc: xen-devel

> On Mon, Dec 05, Andres Lagar-Cavilla wrote:
>
>> > +    med->bit = bit;
>> I think it's been asked before for this to have a more expressive name.
>
> I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.
how about pause_flag?

>
>> >  static int mem_event_disable(struct mem_event_domain *med)
>> >  {
>> > +    if (!list_empty(&med->wq.list))
>> > +        return -EBUSY;
>> > +
>> What does the caller do with EBUSY? Retry?
>
> Yes, and mail the devs at xen-devel that something isn't right ;-)
Heh, good one :)

>
> At least the pager uses this just in the exit path. I dont know about
> access and sharing, wether these tools enable/disable more than once at
> guest runtime.
>
>> > @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
>> >              if ( p2m->pod.entry_count )
>> >                  break;
>> >
>> > -            rc = mem_event_enable(d, mec, med);
>> > +            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
>> >          }
>> >          break;
>> >
>> > @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
>> >              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>> >                  break;
>> >
>> > -            rc = mem_event_enable(d, mec, med);
>> > +            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
>
>> Ok, the idea of bit is that different vcpus will sleep with different
>> pause flags, depending on the ring they're sleeping on. But this is only
>> used in wake_waiters, which is not used by all rings. In fact, why do we
>> need wake_waiters with wait queues?
>
> Before this patch, mem_event_unpause_vcpus() was used to resume waiters
> for the ring itself and for room in the ring.
> Now there is mem_event_wake_waiters(), which indicates the ring is
> active, and there is mem_event_wake_requesters() which indicates the
> ring has room to place guest requests.

I think that if there is no ring where one is expected, harsher actions
should happen. That is what we do in our patch. e.g.
p2m_mem_paging_populate -> no ring -> crash domain, or
p2m_mem_access_check -> access_required -> no ring -> crash domain.

That would eliminate wake_waiters, methinks?

>
> I agree that only _VPF_mem_access is really needed, and _VPF_mem_paging
> could be removed because paging without having a ring first is not
> possible.
>
>
>> > @@ -653,7 +643,7 @@ gfn_found:
>> >      if(ret == 0) goto private_page_found;
>> >
>> >      old_page = page;
>> > -    page = mem_sharing_alloc_page(d, gfn);
>> > +    page = alloc_domheap_page(d, 0);
>> >      if(!page)
>> >      {
>> >          /* We've failed to obtain memory for private page. Need to
>> re-add
>> > the
>> > @@ -661,6 +651,7 @@ gfn_found:
>> >          list_add(&gfn_info->list, &hash_entry->gfns);
>> >          put_gfn(d, gfn);
>> >          shr_unlock();
>> > +        mem_sharing_notify_helper(d, gfn);
>> This is nice. Do you think PoD could use this, should it ever run into a
>> ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a
>> sharing ring (which is bit rotted) we could have an ENOMEM ring with a
>> utility launched by xencommons listening. The problem, again, is what if
>> ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page)
>
> I have no idea about mem_sharing. I just move the existing code outside
> the lock so that mem_event_put_request() is (hopefully) called without
> any locks from mem_sharing_get_nr_saved_mfns().
> Since there is appearently no user of a sharing ring, this whole new
> mem_sharing_notify_helper() is a big no-op.
Fair enough. I do think that generally, for x86/mm, an ENOMEM mem_event
ring is a good idea. Later...

>
>> > @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
>> >      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
>> >          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>> >
>> > -    /* Unpause any domains that were paused because the ring was full
>> or
>> > no listener
>> > -     * was available */
>> > -    mem_event_unpause_vcpus(d);
>> > +    /* Wake vcpus waiting for room in the ring */
>> > +    mem_event_wake_requesters(&d->mem_event->access);
>> > +
>> > +    /* Unpause all vcpus that were paused because no listener was
>> > available */
>> > +    mem_event_wake_waiters(d, &d->mem_event->access);
>> Is this not used in p2m_mem_paging_resume? Why the difference? Why are
>> two
>> mechanisms needed (wake_requesters, wake_sleepers)?
>
> As said above, wake_sleepers is for those who wait for the ring itself,
> and wake_requesters is for room in the ring.
> p2m_mem_paging_resume() always has a ring, so it does not need to call
> wake_sleepers.
>
>
> Do you have a suggestion for a better name?
>
> Olaf
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-05 16:34     ` Andres Lagar-Cavilla
@ 2011-12-07 13:20       ` Olaf Hering
  2011-12-07 16:27         ` Andres Lagar-Cavilla
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2011-12-07 13:20 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: xen-devel

On Mon, Dec 05, Andres Lagar-Cavilla wrote:

> > On Mon, Dec 05, Andres Lagar-Cavilla wrote:
> >
> >> > +    med->bit = bit;
> >> I think it's been asked before for this to have a more expressive name.
> >
> > I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.
> how about pause_flag?

I made this change in my patch.

> > Before this patch, mem_event_unpause_vcpus() was used to resume waiters
> > for the ring itself and for room in the ring.
> > Now there is mem_event_wake_waiters(), which indicates the ring is
> > active, and there is mem_event_wake_requesters() which indicates the
> > ring has room to place guest requests.
> 
> I think that if there is no ring where one is expected, harsher actions
> should happen. That is what we do in our patch. e.g.
> p2m_mem_paging_populate -> no ring -> crash domain, or
> p2m_mem_access_check -> access_required -> no ring -> crash domain.
> 
> That would eliminate wake_waiters, methinks?

In p2m_mem_paging_populate() a sanity check could be added. I think it
would indicate bad p2mt state because nominate was called without ring.
How else can a gfn enter paging state?

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-07 13:20       ` Olaf Hering
@ 2011-12-07 16:27         ` Andres Lagar-Cavilla
  0 siblings, 0 replies; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2011-12-07 16:27 UTC (permalink / raw)
  To: Olaf Hering; +Cc: xen-devel

> On Mon, Dec 05, Andres Lagar-Cavilla wrote:
>
>> > On Mon, Dec 05, Andres Lagar-Cavilla wrote:
>> >
>> >> > +    med->bit = bit;
>> >> I think it's been asked before for this to have a more expressive
>> name.
>> >
>> > I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.
>> how about pause_flag?
>
> I made this change in my patch.
>
>> > Before this patch, mem_event_unpause_vcpus() was used to resume
>> waiters
>> > for the ring itself and for room in the ring.
>> > Now there is mem_event_wake_waiters(), which indicates the ring is
>> > active, and there is mem_event_wake_requesters() which indicates the
>> > ring has room to place guest requests.
>>
>> I think that if there is no ring where one is expected, harsher actions
>> should happen. That is what we do in our patch. e.g.
>> p2m_mem_paging_populate -> no ring -> crash domain, or
>> p2m_mem_access_check -> access_required -> no ring -> crash domain.
>>
>> That would eliminate wake_waiters, methinks?
>
> In p2m_mem_paging_populate() a sanity check could be added. I think it
> would indicate bad p2mt state because nominate was called without ring.
> How else can a gfn enter paging state?
Definitely. Crash that domain. Maybe the pager crashed and burned, or quit
carelessly.
Andres
>
> Olaf
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH] mem_event: use wait queue when ring is full
@ 2011-12-09 19:23 Olaf Hering
  2011-12-15 12:43 ` Tim Deegan
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2011-12-09 19:23 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1323456817 -3600
# Node ID b25b2bcc2c6a987086bf37ec67a64d989813eb4d
# Parent  1c58bb664d8d55e475d179cb5f81693991859fc8
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.

v6:
  - take foreign requests into account before calling wake_up_nr()
  - call wake_up_nr() outside of ring lock
 - rename ->bit to ->pause_flag
 - update comment in mem_event_enable()

v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
     struct domain *d,
     xen_domctl_mem_event_op_t *mec,
     struct mem_event_domain *med,
+    int pause_flag,
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
@@ -110,8 +112,12 @@ static int mem_event_enable(
 
     mem_event_ring_lock_init(med);
 
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    med->pause_flag = pause_flag;
+
+    init_waitqueue_head(&med->wq);
+
+    /* Wake any VCPUs waiting for the ring to appear */
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -127,6 +133,9 @@ static int mem_event_enable(
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -136,13 +145,24 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req;
     RING_IDX req_prod;
 
     mem_event_ring_lock(med);
 
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -156,14 +176,35 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    /* Update accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
@@ -195,32 +236,97 @@ int mem_event_get_response(struct mem_ev
     return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    free_req -= med->foreign_producers;
+    mem_event_ring_unlock(med);
+
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->pause_flag, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -228,16 +334,17 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d ) {
+        med->target_producers++;
+        ring_full = 0;
+    } else if ( med->foreign_producers + med->target_producers + 1 < free_req )
     {
-        med->req_producers++;
+        med->foreign_producers++;
         ring_full = 0;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
     mem_event_ring_unlock(med);
 
     return ring_full;
@@ -316,7 +423,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging, mem_paging_notification);
         }
         break;
 
@@ -355,7 +462,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access, mem_access_notification);
         }
         break;
 
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d) lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -656,7 +646,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
@@ -664,6 +654,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
                    "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full or no listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,8 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    unsigned short foreign_producers;
+    unsigned short target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +194,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +621,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
       [not found] <mailman.3873.1323460242.12970.xen-devel@lists.xensource.com>
@ 2011-12-10  5:22 ` Andres Lagar-Cavilla
  2011-12-13 13:40   ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2011-12-10  5:22 UTC (permalink / raw)
  To: xen-devel; +Cc: olaf, tim

Olaf,
Tim pointed out we need both solutions to ring management in the
hypervisor. With our patch ("Improve ring management for memory events. Do
not lose guest events."), we can handle the common case quickly, without
preempting VMs. With your patch, we can handle extreme situations of ring
congestion with the big hammer called wait queue.

After thinking a little bit about how to integrate both solutions, I've
come to one way of doing it. Might not be the best, but it's an option.

In our patch, we have this pseudo-code snippet
arch/x86/mm/mem_event.c: mem_event_put_request
{
    if (foreign vcpu) and (space_in_ring < vcpus)
        /* Foreign vcpus need to retry. No mercy */
        return -EAGAIN

    if ( mem_event_ring_free(d, med) == 0 )
    {
        /* This *should* never happen */
        gdprintk(XENLOG_WARNING, "possible lost mem_event for domain %d\n",
                                 d->domain_id);
        ret = -EBUSY; <-- *go to sleep on a waitqueue here, instead*
    }
    else
        put_event

    if (space_in_ring < vcpus) && (guest vcpu)
        pause this vcpu
        ret = -EBUSY
}

I hope my pseudo-code description makes sense. All the meta data for queue
management from your patch is necessary. But there is one single,
well-defined, point for wait queue invocation. And the rest is taken care
of (hopefully)

A thought,
Andres
> Date: Fri, 09 Dec 2011 20:23:55 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: xen-devel@lists.xensource.com
> Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is
> 	full
> Message-ID: <b25b2bcc2c6a987086bf.1323458635@probook.site>
> Content-Type: text/plain; charset="us-ascii"
>
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1323456817 -3600
> # Node ID b25b2bcc2c6a987086bf37ec67a64d989813eb4d
> # Parent  1c58bb664d8d55e475d179cb5f81693991859fc8
> mem_event: use wait queue when ring is full
>
> This change is based on an idea/patch from Adin Scannell.
>
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
>
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
>
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
>
> v6:
>   - take foreign requests into account before calling wake_up_nr()
>   - call wake_up_nr() outside of ring lock
>  - rename ->bit to ->pause_flag
>  - update comment in mem_event_enable()
>
> v5:
>   - rename mem_event_check_ring() to mem_event_claim_slot()
>   - rename mem_event_put_req_producers() to mem_event_release_slot()
>   - add local/foreign request accounting
>   - keep room for at least one guest request
>
> v4:
>  - fix off-by-one bug in _mem_event_put_request
>  - add mem_event_wake_requesters() and use wake_up_nr()
>  - rename mem_event_mark_and_pause() and mem_event_mark_and_pause()
> functions
>  - req_producers counts foreign request producers, rename member
>
> v3:
>  - rename ->mem_event_bit to ->bit
>  - remove me_ from new VPF_ defines
>
> v2:
>  - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise
> the
>    vcpu will not wake_up after a wait_event because the pause_count was
>    increased twice. Fixes guest hangs.
>  - update free space check in _mem_event_put_request()
>  - simplify mem_event_put_request()
>
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
>      if ( (p & HVMPME_onchangeonly) && (value == old) )
>          return 1;
>
> -    rc = mem_event_check_ring(d, &d->mem_event->access);
> -    if ( rc )
> +    rc = mem_event_claim_slot(d, &d->mem_event->access);
> +    if ( rc < 0 )
>          return rc;
>
>      memset(&req, 0, sizeof(req));
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -23,6 +23,7 @@
>
>  #include <asm/domain.h>
>  #include <xen/event.h>
> +#include <xen/wait.h>
>  #include <asm/p2m.h>
>  #include <asm/mem_event.h>
>  #include <asm/mem_paging.h>
> @@ -41,6 +42,7 @@ static int mem_event_enable(
>      struct domain *d,
>      xen_domctl_mem_event_op_t *mec,
>      struct mem_event_domain *med,
> +    int pause_flag,
>      xen_event_channel_notification_t notification_fn)
>  {
>      int rc;
> @@ -110,8 +112,12 @@ static int mem_event_enable(
>
>      mem_event_ring_lock_init(med);
>
> -    /* Wake any VCPUs paused for memory events */
> -    mem_event_unpause_vcpus(d);
> +    med->pause_flag = pause_flag;
> +
> +    init_waitqueue_head(&med->wq);
> +
> +    /* Wake any VCPUs waiting for the ring to appear */
> +    mem_event_wake_waiters(d, med);
>
>      return 0;
>
> @@ -127,6 +133,9 @@ static int mem_event_enable(
>
>  static int mem_event_disable(struct mem_event_domain *med)
>  {
> +    if (!list_empty(&med->wq.list))
> +        return -EBUSY;
> +
>      unmap_domain_page(med->ring_page);
>      med->ring_page = NULL;
>
> @@ -136,13 +145,24 @@ static int mem_event_disable(struct mem_
>      return 0;
>  }
>
> -void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req)
> +static int _mem_event_put_request(struct domain *d,
> +                                  struct mem_event_domain *med,
> +                                  mem_event_request_t *req)
>  {
>      mem_event_front_ring_t *front_ring;
> +    int free_req, claimed_req;
>      RING_IDX req_prod;
>
>      mem_event_ring_lock(med);
>
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    /* Foreign requests must succeed because their vcpus can not sleep */
> +    claimed_req = med->foreign_producers;
> +    if ( !free_req || ( current->domain == d && free_req <= claimed_req )
> ) {
> +        mem_event_ring_unlock(med);
> +        return 0;
> +    }
> +
>      front_ring = &med->front_ring;
>      req_prod = front_ring->req_prod_pvt;
>
> @@ -156,14 +176,35 @@ void mem_event_put_request(struct domain
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    /* Update accounting */
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
> +
>      /* Update ring */
> -    med->req_producers--;
>      front_ring->req_prod_pvt = req_prod;
>      RING_PUSH_REQUESTS(front_ring);
>
>      mem_event_ring_unlock(med);
>
>      notify_via_xen_event_channel(d, med->xen_port);
> +
> +    return 1;
> +}
> +
> +void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
> +                           mem_event_request_t *req)
> +{
> +    /* Go to sleep if request came from guest */
> +    if (current->domain == d) {
> +        wait_event(med->wq, _mem_event_put_request(d, med, req));
> +        return;
> +    }
> +    /* Ring was full anyway, unable to sleep in non-guest context */
> +    if (!_mem_event_put_request(d, med, req))
> +        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
> +                req->type, req->flags, (unsigned long)req->gfn);
>  }
>
>  int mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp)
> @@ -195,32 +236,97 @@ int mem_event_get_response(struct mem_ev
>      return 1;
>  }
>
> -void mem_event_unpause_vcpus(struct domain *d)
> +/**
> + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
> + * ring. Only as many as can place another request in the ring will
> + * resume execution.
> + */
> +void mem_event_wake_requesters(struct mem_event_domain *med)
> +{
> +    int free_req;
> +
> +    mem_event_ring_lock(med);
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    free_req -= med->foreign_producers;
> +    mem_event_ring_unlock(med);
> +
> +    if ( free_req )
> +        wake_up_nr(&med->wq, free_req);
> +}
> +
> +/**
> + * mem_event_wake_waiters - Wake all vcpus waiting for the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
> + * become available.
> + */
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med)
>  {
>      struct vcpu *v;
>
>      for_each_vcpu ( d, v )
> -        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> +        if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
>              vcpu_wake(v);
>  }
>
> -void mem_event_mark_and_pause(struct vcpu *v)
> +/**
> + * mem_event_mark_and_sleep - Put vcpu to sleep
> + * @v: guest vcpu
> + * @med: mem_event ring
> + *
> + * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
> + * The vcpu will resume execution in mem_event_wake_waiters().
> + */
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med)
>  {
> -    set_bit(_VPF_mem_event, &v->pause_flags);
> +    set_bit(med->pause_flag, &v->pause_flags);
>      vcpu_sleep_nosync(v);
>  }
>
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> +/**
> + * mem_event_release_slot - Release a claimed slot
> + * @med: mem_event ring
> + *
> + * mem_event_release_slot() releases a claimed slot in the mem_event
> ring.
> + */
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med)
>  {
>      mem_event_ring_lock(med);
> -    med->req_producers--;
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
>      mem_event_ring_unlock(med);
>  }
>
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
> +/**
> + * mem_event_claim_slot - Check state of a mem_event ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * Return codes: < 0: the ring is not yet configured
> + *                 0: the ring has some room
> + *               > 0: the ring is full
> + *
> + * mem_event_claim_slot() checks the state of the given mem_event ring.
> + * If the current vcpu belongs to the guest domain, the function assumes
> that
> + * mem_event_put_request() will sleep until the ring has room again.
> + * A guest can always place at least one request.
> + *
> + * If the current vcpu does not belong to the target domain the caller
> must try
> + * again until there is room. A slot is claimed and the caller can place
> a
> + * request. If the caller does not need to send a request, the claimed
> slot has
> + * to be released with mem_event_release_slot().
> + */
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
>  {
> -    struct vcpu *curr = current;
> -    int free_requests;
> +    int free_req;
>      int ring_full = 1;
>
>      if ( !med->ring_page )
> @@ -228,16 +334,17 @@ int mem_event_check_ring(struct domain *
>
>      mem_event_ring_lock(med);
>
> -    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> -    if ( med->req_producers < free_requests )
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +
> +    if ( current->domain == d ) {
> +        med->target_producers++;
> +        ring_full = 0;
> +    } else if ( med->foreign_producers + med->target_producers + 1 <
> free_req )
>      {
> -        med->req_producers++;
> +        med->foreign_producers++;
>          ring_full = 0;
>      }
>
> -    if ( ring_full && (curr->domain == d) )
> -        mem_event_mark_and_pause(curr);
> -
>      mem_event_ring_unlock(med);
>
>      return ring_full;
> @@ -316,7 +423,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( p2m->pod.entry_count )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med, mem_paging_notification);
> +            rc = mem_event_enable(d, mec, med, _VPF_mem_paging,
> mem_paging_notification);
>          }
>          break;
>
> @@ -355,7 +462,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med, mem_access_notification);
> +            rc = mem_event_enable(d, mec, med, _VPF_mem_access,
> mem_access_notification);
>          }
>          break;
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_sharing.c
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
>  #endif
>
>
> -static struct page_info* mem_sharing_alloc_page(struct domain *d,
> -                                                unsigned long gfn)
> +static void mem_sharing_notify_helper(struct domain *d, unsigned long
> gfn)
>  {
> -    struct page_info* page;
>      struct vcpu *v = current;
> -    mem_event_request_t req;
> -
> -    page = alloc_domheap_page(d, 0);
> -    if(page != NULL) return page;
> -
> -    memset(&req, 0, sizeof(req));
> -    req.type = MEM_EVENT_TYPE_SHARED;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
>
>      if ( v->domain != d )
>      {
> @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
>          gdprintk(XENLOG_ERR,
>                   "Failed alloc on unshare path for foreign (%d)
> lookup\n",
>                   d->domain_id);
> -        return page;
> +        return;
>      }
>
> -    vcpu_pause_nosync(v);
> -    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
> +    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
> +        return;
>
> -    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
> -
> +    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
>      req.gfn = gfn;
>      req.p2mt = p2m_ram_shared;
>      req.vcpu_id = v->vcpu_id;
>      mem_event_put_request(d, &d->mem_event->share, &req);
> -
> -    return page;
> +    vcpu_pause_nosync(v);
>  }
>
>  unsigned int mem_sharing_get_nr_saved_mfns(void)
> @@ -656,7 +646,7 @@ gfn_found:
>      if(ret == 0) goto private_page_found;
>
>      old_page = page;
> -    page = mem_sharing_alloc_page(d, gfn);
> +    page = alloc_domheap_page(d, 0);
>      if(!page)
>      {
>          /* We've failed to obtain memory for private page. Need to re-add
> the
> @@ -664,6 +654,7 @@ gfn_found:
>          list_add(&gfn_info->list, &hash_entry->gfns);
>          put_gfn(d, gfn);
>          shr_unlock();
> +        mem_sharing_notify_helper(d, gfn);
>          return -ENOMEM;
>      }
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
>   */
>  void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
>  {
> -    struct vcpu *v = current;
> -    mem_event_request_t req;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn
> };
>
> -    /* Check that there's space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
> -    {
> -        /* Send release notification to pager */
> -        memset(&req, 0, sizeof(req));
> -        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
> -        req.gfn = gfn;
> -        req.vcpu_id = v->vcpu_id;
> +    /* Send release notification to pager */
> +    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
>
> -        mem_event_put_request(d, &d->mem_event->paging, &req);
> -    }
> +    mem_event_put_request(d, &d->mem_event->paging, &req);
>  }
>
>  /**
> @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>      /* Check that there's space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) )
> +    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
>          return;
>
>      memset(&req, 0, sizeof(req));
> @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
>      else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
>      {
>          /* gfn is already on its way back and vcpu is not paused */
> -        mem_event_put_req_producers(&d->mem_event->paging);
> +        mem_event_release_slot(d, &d->mem_event->paging);
>          return;
>      }
>
> @@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
>              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>      }
>
> -    /* Unpause any domains that were paused because the ring was full */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->paging);
>  }
>
>  bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned
> long gla,
> @@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
>      p2m_unlock(p2m);
>
>      /* Otherwise, check if there is a memory event listener, and send the
> message along */
> -    res = mem_event_check_ring(d, &d->mem_event->access);
> +    res = mem_event_claim_slot(d, &d->mem_event->access);
>      if ( res < 0 )
>      {
>          /* No listener */
> @@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
>                     "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
>                     v->vcpu_id, d->domain_id);
>
> -            mem_event_mark_and_pause(v);
> +            mem_event_mark_and_sleep(v, &d->mem_event->access);
>          }
>          else
>          {
> @@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
>              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>      }
>
> -    /* Unpause any domains that were paused because the ring was full or
> no listener
> -     * was available */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->access);
> +
> +    /* Unpause all vcpus that were paused because no listener was
> available */
> +    mem_event_wake_waiters(d, &d->mem_event->access);
>  }
>
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h
> +++ b/xen/include/asm-x86/mem_event.h
> @@ -24,13 +24,13 @@
>  #ifndef __MEM_EVENT_H__
>  #define __MEM_EVENT_H__
>
> -/* Pauses VCPU while marking pause flag for mem event */
> -void mem_event_mark_and_pause(struct vcpu *v);
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
> -void mem_event_put_req_producers(struct mem_event_domain *med);
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med);
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req);
>  int mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp);
> -void mem_event_unpause_vcpus(struct domain *d);
> +void mem_event_wake_requesters(struct mem_event_domain *med);
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med);
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med);
>
>  int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>                       XEN_GUEST_HANDLE(void) u_domctl);
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -14,6 +14,7 @@
>  #include <xen/nodemask.h>
>  #include <xen/radix-tree.h>
>  #include <xen/multicall.h>
> +#include <xen/wait.h>
>  #include <public/xen.h>
>  #include <public/domctl.h>
>  #include <public/sysctl.h>
> @@ -183,7 +184,8 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned int req_producers;
> +    unsigned short foreign_producers;
> +    unsigned short target_producers;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
> @@ -192,6 +194,10 @@ struct mem_event_domain
>      mem_event_front_ring_t front_ring;
>      /* event channel port (vcpu0 only) */
>      int xen_port;
> +    /* mem_event bit for vcpu->pause_flags */
> +    int pause_flag;
> +    /* list of vcpus waiting for room in the ring */
> +    struct waitqueue_head wq;
>  };
>
>  struct mem_event_per_domain
> @@ -615,9 +621,12 @@ static inline struct domain *next_domain
>   /* VCPU affinity has changed: migrating to a new CPU. */
>  #define _VPF_migrating       3
>  #define VPF_migrating        (1UL<<_VPF_migrating)
> - /* VCPU is blocked on memory-event ring. */
> -#define _VPF_mem_event       4
> -#define VPF_mem_event        (1UL<<_VPF_mem_event)
> + /* VCPU is blocked due to missing mem_paging ring. */
> +#define _VPF_mem_paging      4
> +#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
> + /* VCPU is blocked due to missing mem_access ring. */
> +#define _VPF_mem_access      5
> +#define VPF_mem_access       (1UL<<_VPF_mem_access)
>
>  static inline int vcpu_runnable(struct vcpu *v)
>  {
>
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-10  5:22 ` Andres Lagar-Cavilla
@ 2011-12-13 13:40   ` Olaf Hering
  0 siblings, 0 replies; 23+ messages in thread
From: Olaf Hering @ 2011-12-13 13:40 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: xen-devel, tim

On Fri, Dec 09, Andres Lagar-Cavilla wrote:

> Olaf,
> Tim pointed out we need both solutions to ring management in the
> hypervisor. With our patch ("Improve ring management for memory events. Do
> not lose guest events."), we can handle the common case quickly, without
> preempting VMs. With your patch, we can handle extreme situations of ring
> congestion with the big hammer called wait queue.

With my patch the requests get processed as they come in, both foreign
and target requests get handled equally. There is no special accounting.

A few questions about your requirements:
- Is the goal is that each guest vcpu can always put at least one request?
- How many requests should foreign vcpus place in the ring if the guest
  has more vcpus than available slots in the ring? Just a single one so
  that foreigners can also make some progress?
- Should access and paging have the same rules for accounting?

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-09 19:23 Olaf Hering
@ 2011-12-15 12:43 ` Tim Deegan
  2011-12-15 13:15   ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Tim Deegan @ 2011-12-15 12:43 UTC (permalink / raw)
  To: Olaf Hering; +Cc: xen-devel

Hi, 

At 20:23 +0100 on 09 Dec (1323462235), Olaf Hering wrote:
> mem_event: use wait queue when ring is full
> 
> This change is based on an idea/patch from Adin Scannell.
> 
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
> 
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
> 
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
[...] 
> Signed-off-by: Olaf Hering <olaf@aepfle.de>

This patch is OK by me as it stands, so
Acked-by: Tim Deegan <tim@xen.org>

but I'll wait for an explicit ack from Andres or Adin before applying
it. 

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-15 12:43 ` Tim Deegan
@ 2011-12-15 13:15   ` Olaf Hering
  0 siblings, 0 replies; 23+ messages in thread
From: Olaf Hering @ 2011-12-15 13:15 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

On Thu, Dec 15, Tim Deegan wrote:

> This patch is OK by me as it stands, so
> Acked-by: Tim Deegan <tim@xen.org>
> 
> but I'll wait for an explicit ack from Andres or Adin before applying
> it. 

Thanks.

I will add some sort of accounting as it is done in the version from
Andres, so that each vcpu can place at least one request.
Hopefully I will send v7 of my change out today.

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
       [not found] <mailman.4227.1323785898.12970.xen-devel@lists.xensource.com>
@ 2011-12-15 14:56 ` Andres Lagar-Cavilla
  2011-12-16 16:40   ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2011-12-15 14:56 UTC (permalink / raw)
  To: xen-devel; +Cc: olaf, tim

> Date: Tue, 13 Dec 2011 14:40:16 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: Andres Lagar-Cavilla <andres@lagarcavilla.org>
> Cc: xen-devel@lists.xensource.com, tim@xen.org
> Subject: Re: [Xen-devel] [PATCH] mem_event: use wait queue when ring
> 	is full
> Message-ID: <20111213134016.GA20700@aepfle.de>
> Content-Type: text/plain; charset=utf-8
>
> On Fri, Dec 09, Andres Lagar-Cavilla wrote:
>
>> Olaf,
>> Tim pointed out we need both solutions to ring management in the
>> hypervisor. With our patch ("Improve ring management for memory events.
>> Do
>> not lose guest events."), we can handle the common case quickly, without
>> preempting VMs. With your patch, we can handle extreme situations of
>> ring
>> congestion with the big hammer called wait queue.
>
> With my patch the requests get processed as they come in, both foreign
> and target requests get handled equally. There is no special accounting.
>
> A few questions about your requirements:
> - Is the goal is that each guest vcpu can always put at least one request?
Yes

> - How many requests should foreign vcpus place in the ring if the guest
>   has more vcpus than available slots in the ring? Just a single one so
>   that foreigners can also make some progress?
The idea is that foreign vcpus can place as many events as they want as
long as each guest vcpu that is not blocked on a men event has room to
send one men event. When we reach that border condition, no more foreign
men events.

The case for which there are way too many guest vcpus needs to be handled,
either by capping the max number of vcpus for domains using a men event,
or by growing the ring size.

> - Should access and paging have the same rules for accounting?
Absolutely.

And both should use wait queues in extreme cases in which a guest vcpu
with a single action generates multiple memory events. Given that when we
hit a border condition the guest vcpu will place one event and be flagged
VPF_mem_event_paused (or whatever that flag is named), if a guest vcpu
generates another event when flagged, that's our queue for putting the
vcpu on a wait queue.

Thanks
Andres
>
> Olaf
>
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-15 14:56 ` Andres Lagar-Cavilla
@ 2011-12-16 16:40   ` Olaf Hering
  2011-12-16 17:04     ` Andres Lagar-Cavilla
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2011-12-16 16:40 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: xen-devel, tim

On Thu, Dec 15, Andres Lagar-Cavilla wrote:

> > - How many requests should foreign vcpus place in the ring if the guest
> >   has more vcpus than available slots in the ring? Just a single one so
> >   that foreigners can also make some progress?
> The idea is that foreign vcpus can place as many events as they want as
> long as each guest vcpu that is not blocked on a men event has room to
> send one men event. When we reach that border condition, no more foreign
> men events.
> 
> The case for which there are way too many guest vcpus needs to be handled,
> either by capping the max number of vcpus for domains using a men event,
> or by growing the ring size.

Right now the ring is one page, so it can not hold more than 64 entries.
If that ever changes, the accounting can be adjusted.

> > - Should access and paging have the same rules for accounting?
> Absolutely.
> 
> And both should use wait queues in extreme cases in which a guest vcpu
> with a single action generates multiple memory events. Given that when we
> hit a border condition the guest vcpu will place one event and be flagged
> VPF_mem_event_paused (or whatever that flag is named), if a guest vcpu
> generates another event when flagged, that's our queue for putting the
> vcpu on a wait queue.

An extra flag is not needed.

Below is an incremental patch (on top of v6) which does some accounting.
Its just compile tested.

Olaf


diff -r 5d5d10e1568b xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -114,6 +114,19 @@ static int mem_event_enable(
 
     med->pause_flag = pause_flag;
 
+    /*
+     * Configure ring accounting:
+     * Each guest vcpu should be able to place at least one request.
+     * If there are more vcpus than available slots in the ring, not all vcpus
+     * can place requests in the ring anyway.  A minimum (arbitrary) number of
+     * foreign requests will be allowed in this case.
+     */
+    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
+        med->max_foreign = RING_SIZE(&med->front_ring) - d->max_vcpus;
+    if ( med->max_foreign < 13 )
+        med->max_foreign = 13;
+    med->max_target = RING_SIZE(&med->front_ring) - med->max_foreign;
+
     init_waitqueue_head(&med->wq);
 
     /* Wake any VCPUs waiting for the ring to appear */
@@ -147,23 +160,28 @@ static int mem_event_disable(struct mem_
 
 static int _mem_event_put_request(struct domain *d,
                                   struct mem_event_domain *med,
-                                  mem_event_request_t *req)
+                                  mem_event_request_t *req,
+                                  int *done)
 {
     mem_event_front_ring_t *front_ring;
-    int free_req, claimed_req;
+    int free_req, claimed_req, ret;
     RING_IDX req_prod;
 
+    if ( *done )
+        return 1;
+
     mem_event_ring_lock(med);
 
-    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    front_ring = &med->front_ring;
+
     /* Foreign requests must succeed because their vcpus can not sleep */
     claimed_req = med->foreign_producers;
+    free_req = RING_FREE_REQUESTS(front_ring);
     if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
         mem_event_ring_unlock(med);
         return 0;
     }
 
-    front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
     if ( current->domain != d )
@@ -176,9 +194,18 @@ static int _mem_event_put_request(struct
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    ret = 1;
+    *done = 1;
+    free_req--;
+
     /* Update accounting */
     if ( current->domain == d )
+    {
         med->target_producers--;
+        /* Ring is full, no more requests from this vcpu, go to sleep */
+        if ( free_req < med->max_target )
+            ret = 0;
+    }
     else
         med->foreign_producers--;
 
@@ -190,19 +217,20 @@ static int _mem_event_put_request(struct
 
     notify_via_xen_event_channel(d, med->xen_port);
 
-    return 1;
+    return ret;
 }
 
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
                            mem_event_request_t *req)
 {
+    int done = 0;
     /* Go to sleep if request came from guest */
     if (current->domain == d) {
-        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        wait_event(med->wq, _mem_event_put_request(d, med, req, &done));
         return;
     }
     /* Ring was full anyway, unable to sleep in non-guest context */
-    if (!_mem_event_put_request(d, med, req))
+    if (!_mem_event_put_request(d, med, req, &done))
         printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
                 req->type, req->flags, (unsigned long)req->gfn);
 }
@@ -341,7 +369,8 @@ int mem_event_claim_slot(struct domain *
         med->target_producers++;
         ring_full = 0;
     }
-    else if ( med->foreign_producers + med->target_producers + 1 < free_req )
+    else if ( med->foreign_producers + med->target_producers < free_req &&
+              med->foreign_producers < med->max_foreign )
     {
         med->foreign_producers++;
         ring_full = 0;
diff -r 5d5d10e1568b xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -184,8 +184,11 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned short foreign_producers;
-    unsigned short target_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char max_foreign;
+    unsigned char target_producers;
+    unsigned char max_target;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-16 16:40   ` Olaf Hering
@ 2011-12-16 17:04     ` Andres Lagar-Cavilla
  2011-12-16 17:33       ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2011-12-16 17:04 UTC (permalink / raw)
  To: Olaf Hering; +Cc: xen-devel, tim, adin

> On Thu, Dec 15, Andres Lagar-Cavilla wrote:
>
>> > - How many requests should foreign vcpus place in the ring if the
>> guest
>> >   has more vcpus than available slots in the ring? Just a single one
>> so
>> >   that foreigners can also make some progress?
>> The idea is that foreign vcpus can place as many events as they want as
>> long as each guest vcpu that is not blocked on a men event has room to
>> send one men event. When we reach that border condition, no more foreign
>> men events.
>>
>> The case for which there are way too many guest vcpus needs to be
>> handled,
>> either by capping the max number of vcpus for domains using a men event,
>> or by growing the ring size.
>
> Right now the ring is one page, so it can not hold more than 64 entries.
> If that ever changes, the accounting can be adjusted.
>
>> > - Should access and paging have the same rules for accounting?
>> Absolutely.
>>
>> And both should use wait queues in extreme cases in which a guest vcpu
>> with a single action generates multiple memory events. Given that when
>> we
>> hit a border condition the guest vcpu will place one event and be
>> flagged
>> VPF_mem_event_paused (or whatever that flag is named), if a guest vcpu
>> generates another event when flagged, that's our queue for putting the
>> vcpu on a wait queue.
>
> An extra flag is not needed.
Can you elaborate? Which flag is not needed? And why?

>
> Below is an incremental patch (on top of v6) which does some accounting.
> Its just compile tested.
>
> Olaf
>
>
> diff -r 5d5d10e1568b xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -114,6 +114,19 @@ static int mem_event_enable(
>
>      med->pause_flag = pause_flag;
>
> +    /*
> +     * Configure ring accounting:
> +     * Each guest vcpu should be able to place at least one request.
> +     * If there are more vcpus than available slots in the ring, not all
> vcpus
> +     * can place requests in the ring anyway.  A minimum (arbitrary)
> number of
> +     * foreign requests will be allowed in this case.
> +     */
> +    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
> +        med->max_foreign = RING_SIZE(&med->front_ring) - d->max_vcpus;
> +    if ( med->max_foreign < 13 )
> +        med->max_foreign = 13;
Magic number! Why?

More generally, does this patch apply on top of a previous patch? What's
the context here?

Thanks
Andres

> +    med->max_target = RING_SIZE(&med->front_ring) - med->max_foreign;
> +
>      init_waitqueue_head(&med->wq);
>
>      /* Wake any VCPUs waiting for the ring to appear */
> @@ -147,23 +160,28 @@ static int mem_event_disable(struct mem_
>
>  static int _mem_event_put_request(struct domain *d,
>                                    struct mem_event_domain *med,
> -                                  mem_event_request_t *req)
> +                                  mem_event_request_t *req,
> +                                  int *done)
>  {
>      mem_event_front_ring_t *front_ring;
> -    int free_req, claimed_req;
> +    int free_req, claimed_req, ret;
>      RING_IDX req_prod;
>
> +    if ( *done )
> +        return 1;
> +
>      mem_event_ring_lock(med);
>
> -    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    front_ring = &med->front_ring;
> +
>      /* Foreign requests must succeed because their vcpus can not sleep */
>      claimed_req = med->foreign_producers;
> +    free_req = RING_FREE_REQUESTS(front_ring);
>      if ( !free_req || ( current->domain == d && free_req <= claimed_req )
> ) {
>          mem_event_ring_unlock(med);
>          return 0;
>      }
>
> -    front_ring = &med->front_ring;
>      req_prod = front_ring->req_prod_pvt;
>
>      if ( current->domain != d )
> @@ -176,9 +194,18 @@ static int _mem_event_put_request(struct
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    ret = 1;
> +    *done = 1;
> +    free_req--;
> +
>      /* Update accounting */
>      if ( current->domain == d )
> +    {
>          med->target_producers--;
> +        /* Ring is full, no more requests from this vcpu, go to sleep */
> +        if ( free_req < med->max_target )
> +            ret = 0;
> +    }
>      else
>          med->foreign_producers--;
>
> @@ -190,19 +217,20 @@ static int _mem_event_put_request(struct
>
>      notify_via_xen_event_channel(d, med->xen_port);
>
> -    return 1;
> +    return ret;
>  }
>
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
>                             mem_event_request_t *req)
>  {
> +    int done = 0;
>      /* Go to sleep if request came from guest */
>      if (current->domain == d) {
> -        wait_event(med->wq, _mem_event_put_request(d, med, req));
> +        wait_event(med->wq, _mem_event_put_request(d, med, req, &done));
>          return;
>      }
>      /* Ring was full anyway, unable to sleep in non-guest context */
> -    if (!_mem_event_put_request(d, med, req))
> +    if (!_mem_event_put_request(d, med, req, &done))
>          printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
>                  req->type, req->flags, (unsigned long)req->gfn);
>  }
> @@ -341,7 +369,8 @@ int mem_event_claim_slot(struct domain *
>          med->target_producers++;
>          ring_full = 0;
>      }
> -    else if ( med->foreign_producers + med->target_producers + 1 <
> free_req )
> +    else if ( med->foreign_producers + med->target_producers < free_req
> &&
> +              med->foreign_producers < med->max_foreign )
>      {
>          med->foreign_producers++;
>          ring_full = 0;
> diff -r 5d5d10e1568b xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -184,8 +184,11 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned short foreign_producers;
> -    unsigned short target_producers;
> +    /* The ring has 64 entries */
> +    unsigned char foreign_producers;
> +    unsigned char max_foreign;
> +    unsigned char target_producers;
> +    unsigned char max_target;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-16 17:04     ` Andres Lagar-Cavilla
@ 2011-12-16 17:33       ` Olaf Hering
  0 siblings, 0 replies; 23+ messages in thread
From: Olaf Hering @ 2011-12-16 17:33 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: xen-devel, tim, adin

On Fri, Dec 16, Andres Lagar-Cavilla wrote:

> >> And both should use wait queues in extreme cases in which a guest
> >> vcpu with a single action generates multiple memory events. Given
> >> that when we hit a border condition the guest vcpu will place one
> >> event and be flagged VPF_mem_event_paused (or whatever that flag is
> >> named), if a guest vcpu generates another event when flagged,
> >> that's our queue for putting the vcpu on a wait queue.
> >
> > An extra flag is not needed.
> Can you elaborate? Which flag is not needed? And why?

The flag you mentioned in your earlier reply, VPF_mem_event_paused.
Since the vcpu is preempted, a new pause_flag will be no gain.

> > +    /*
> > +     * Configure ring accounting:
> > +     * Each guest vcpu should be able to place at least one request.
> > +     * If there are more vcpus than available slots in the ring, not all vcpus
> > +     * can place requests in the ring anyway.  A minimum (arbitrary) number of
> > +     * foreign requests will be allowed in this case.
> > +     */
> > +    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
> > +        med->max_foreign = RING_SIZE(&med->front_ring) - d->max_vcpus;
> > +    if ( med->max_foreign < 13 )
> > +        med->max_foreign = 13;
> Magic number! Why?

Yes, an arbitrary number of slots for foreign requests.
Which amount is correct? 1? 5? 10?
1 is probably closer to the goal of 'let each vcpu put at least one
request'.

> More generally, does this patch apply on top of a previous patch? What's
> the context here?

As I said, its on top of v6 of my patch. I will send out the full patch
later, but I wont be able to actually test the newer version this year.

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH] mem_event: use wait queue when ring is full
@ 2011-12-19 11:39 Olaf Hering
  2011-12-22 11:27 ` Tim Deegan
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2011-12-19 11:39 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1324294734 -3600
# Node ID d910d738f31b4ee1a6b0727ac0a66cff4c8933b0
# Parent  03138a08366b895d79e143119d4c9c72833cdbcd
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.

v7:
  - add ring accounting so that each vcpu can place at least one request

v6:
  - take foreign requests into account before calling wake_up_nr()
  - call wake_up_nr() outside of ring lock
 - rename ->bit to ->pause_flag
 - update comment in mem_event_enable()

v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 03138a08366b -r d910d738f31b xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
     struct domain *d,
     xen_domctl_mem_event_op_t *mec,
     struct mem_event_domain *med,
+    int pause_flag,
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
@@ -110,8 +112,25 @@ static int mem_event_enable(
 
     mem_event_ring_lock_init(med);
 
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    med->pause_flag = pause_flag;
+
+    /*
+     * Configure ring accounting:
+     * Each guest vcpu should be able to place at least one request.
+     * If there are more vcpus than available slots in the ring, not all vcpus
+     * can place requests in the ring anyway.  A minimum (arbitrary) number of
+     * foreign requests will be allowed in this case.
+     */
+    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
+        med->max_foreign = RING_SIZE(&med->front_ring) - d->max_vcpus;
+    if ( med->max_foreign < 13 )
+        med->max_foreign = 13;
+    med->max_target = RING_SIZE(&med->front_ring) - med->max_foreign;
+
+    init_waitqueue_head(&med->wq);
+
+    /* Wake any VCPUs waiting for the ring to appear */
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -127,6 +146,9 @@ static int mem_event_enable(
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -136,14 +158,30 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req,
+                                  int *done)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req, ret;
     RING_IDX req_prod;
 
+    if ( *done )
+        return 1;
+
     mem_event_ring_lock(med);
 
     front_ring = &med->front_ring;
+
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     req_prod = front_ring->req_prod_pvt;
 
     if ( current->domain != d )
@@ -156,14 +194,45 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    ret = 1;
+    *done = 1;
+    free_req--;
+
+    /* Update accounting */
+    if ( current->domain == d )
+    {
+        med->target_producers--;
+        /* Ring is full, no more requests from this vcpu, go to sleep */
+        if ( free_req < med->max_target )
+            ret = 0;
+    }
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return ret;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    int done = 0;
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req, &done));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req, &done))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
@@ -195,32 +264,97 @@ int mem_event_get_response(struct mem_ev
     return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    free_req -= med->foreign_producers;
+    mem_event_ring_unlock(med);
+
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->pause_flag, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -228,15 +362,19 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d )
     {
-        med->req_producers++;
+        med->target_producers++;
         ring_full = 0;
     }
-
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
+    else if ( med->foreign_producers + med->target_producers < free_req &&
+              med->foreign_producers < med->max_foreign )
+    {
+        med->foreign_producers++;
+        ring_full = 0;
+    }
 
     mem_event_ring_unlock(med);
 
@@ -316,7 +454,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging, mem_paging_notification);
         }
         break;
 
@@ -355,7 +493,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access, mem_access_notification);
         }
         break;
 
diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d) lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -656,7 +646,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
@@ -664,6 +654,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
                    "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full or no listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r 03138a08366b -r d910d738f31b xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r 03138a08366b -r d910d738f31b xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,11 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char max_foreign;
+    unsigned char target_producers;
+    unsigned char max_target;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +197,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +624,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2011-12-19 11:39 [PATCH] mem_event: use wait queue when ring is full Olaf Hering
@ 2011-12-22 11:27 ` Tim Deegan
  0 siblings, 0 replies; 23+ messages in thread
From: Tim Deegan @ 2011-12-22 11:27 UTC (permalink / raw)
  To: Olaf Hering; +Cc: xen-devel

At 12:39 +0100 on 19 Dec (1324298386), Olaf Hering wrote:
> mem_event: use wait queue when ring is full
> 
> This change is based on an idea/patch from Adin Scannell.
> 
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
> 
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
> 
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.

Thanks.  As before, I'm happy to apply this when Adin or Andres acks it. 

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
       [not found] <mailman.4853.1324294828.12970.xen-devel@lists.xensource.com>
@ 2012-01-11 18:02 ` Andres Lagar-Cavilla
  2012-01-12 13:59   ` Olaf Hering
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2012-01-11 18:02 UTC (permalink / raw)
  To: xen-devel; +Cc: olaf, tim, adin

> Date: Mon, 19 Dec 2011 12:39:46 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: xen-devel@lists.xensource.com
> Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is
> 	full
> Message-ID: <d910d738f31b4ee1a6b0.1324294786@probook.site>
> Content-Type: text/plain; charset="us-ascii"
>
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1324294734 -3600
> # Node ID d910d738f31b4ee1a6b0727ac0a66cff4c8933b0
> # Parent  03138a08366b895d79e143119d4c9c72833cdbcd
> mem_event: use wait queue when ring is full
>
> This change is based on an idea/patch from Adin Scannell.

Olaf,
thanks for the post. We'll have to nack this patch in its current form. It
hard reboots our host during our testing.

What we did is take this patch, amalgamate it with some bits from our ring
management approach. We're ready to submit that, along with a description
of how we test it. It works for us, and it involves wait queue's for
corner cases.

Stay tuned, test, and hopefully, ack.
Thanks
Andres


>
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
>
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
>
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
>
> v7:
>   - add ring accounting so that each vcpu can place at least one request
>
> v6:
>   - take foreign requests into account before calling wake_up_nr()
>   - call wake_up_nr() outside of ring lock
>  - rename ->bit to ->pause_flag
>  - update comment in mem_event_enable()
>
> v5:
>   - rename mem_event_check_ring() to mem_event_claim_slot()
>   - rename mem_event_put_req_producers() to mem_event_release_slot()
>   - add local/foreign request accounting
>   - keep room for at least one guest request
>
> v4:
>  - fix off-by-one bug in _mem_event_put_request
>  - add mem_event_wake_requesters() and use wake_up_nr()
>  - rename mem_event_mark_and_pause() and mem_event_mark_and_pause()
> functions
>  - req_producers counts foreign request producers, rename member
>
> v3:
>  - rename ->mem_event_bit to ->bit
>  - remove me_ from new VPF_ defines
>
> v2:
>  - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise
> the
>    vcpu will not wake_up after a wait_event because the pause_count was
>    increased twice. Fixes guest hangs.
>  - update free space check in _mem_event_put_request()
>  - simplify mem_event_put_request()
>
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
>
> diff -r 03138a08366b -r d910d738f31b xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
>      if ( (p & HVMPME_onchangeonly) && (value == old) )
>          return 1;
>
> -    rc = mem_event_check_ring(d, &d->mem_event->access);
> -    if ( rc )
> +    rc = mem_event_claim_slot(d, &d->mem_event->access);
> +    if ( rc < 0 )
>          return rc;
>
>      memset(&req, 0, sizeof(req));
> diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -23,6 +23,7 @@
>
>  #include <asm/domain.h>
>  #include <xen/event.h>
> +#include <xen/wait.h>
>  #include <asm/p2m.h>
>  #include <asm/mem_event.h>
>  #include <asm/mem_paging.h>
> @@ -41,6 +42,7 @@ static int mem_event_enable(
>      struct domain *d,
>      xen_domctl_mem_event_op_t *mec,
>      struct mem_event_domain *med,
> +    int pause_flag,
>      xen_event_channel_notification_t notification_fn)
>  {
>      int rc;
> @@ -110,8 +112,25 @@ static int mem_event_enable(
>
>      mem_event_ring_lock_init(med);
>
> -    /* Wake any VCPUs paused for memory events */
> -    mem_event_unpause_vcpus(d);
> +    med->pause_flag = pause_flag;
> +
> +    /*
> +     * Configure ring accounting:
> +     * Each guest vcpu should be able to place at least one request.
> +     * If there are more vcpus than available slots in the ring, not all
> vcpus
> +     * can place requests in the ring anyway.  A minimum (arbitrary)
> number of
> +     * foreign requests will be allowed in this case.
> +     */
> +    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
> +        med->max_foreign = RING_SIZE(&med->front_ring) - d->max_vcpus;
> +    if ( med->max_foreign < 13 )
> +        med->max_foreign = 13;
> +    med->max_target = RING_SIZE(&med->front_ring) - med->max_foreign;
> +
> +    init_waitqueue_head(&med->wq);
> +
> +    /* Wake any VCPUs waiting for the ring to appear */
> +    mem_event_wake_waiters(d, med);
>
>      return 0;
>
> @@ -127,6 +146,9 @@ static int mem_event_enable(
>
>  static int mem_event_disable(struct mem_event_domain *med)
>  {
> +    if (!list_empty(&med->wq.list))
> +        return -EBUSY;
> +
>      unmap_domain_page(med->ring_page);
>      med->ring_page = NULL;
>
> @@ -136,14 +158,30 @@ static int mem_event_disable(struct mem_
>      return 0;
>  }
>
> -void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req)
> +static int _mem_event_put_request(struct domain *d,
> +                                  struct mem_event_domain *med,
> +                                  mem_event_request_t *req,
> +                                  int *done)
>  {
>      mem_event_front_ring_t *front_ring;
> +    int free_req, claimed_req, ret;
>      RING_IDX req_prod;
>
> +    if ( *done )
> +        return 1;
> +
>      mem_event_ring_lock(med);
>
>      front_ring = &med->front_ring;
> +
> +    /* Foreign requests must succeed because their vcpus can not sleep */
> +    claimed_req = med->foreign_producers;
> +    free_req = RING_FREE_REQUESTS(front_ring);
> +    if ( !free_req || ( current->domain == d && free_req <= claimed_req )
> ) {
> +        mem_event_ring_unlock(med);
> +        return 0;
> +    }
> +
>      req_prod = front_ring->req_prod_pvt;
>
>      if ( current->domain != d )
> @@ -156,14 +194,45 @@ void mem_event_put_request(struct domain
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    ret = 1;
> +    *done = 1;
> +    free_req--;
> +
> +    /* Update accounting */
> +    if ( current->domain == d )
> +    {
> +        med->target_producers--;
> +        /* Ring is full, no more requests from this vcpu, go to sleep */
> +        if ( free_req < med->max_target )
> +            ret = 0;
> +    }
> +    else
> +        med->foreign_producers--;
> +
>      /* Update ring */
> -    med->req_producers--;
>      front_ring->req_prod_pvt = req_prod;
>      RING_PUSH_REQUESTS(front_ring);
>
>      mem_event_ring_unlock(med);
>
>      notify_via_xen_event_channel(d, med->xen_port);
> +
> +    return ret;
> +}
> +
> +void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
> +                           mem_event_request_t *req)
> +{
> +    int done = 0;
> +    /* Go to sleep if request came from guest */
> +    if (current->domain == d) {
> +        wait_event(med->wq, _mem_event_put_request(d, med, req, &done));
> +        return;
> +    }
> +    /* Ring was full anyway, unable to sleep in non-guest context */
> +    if (!_mem_event_put_request(d, med, req, &done))
> +        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
> +                req->type, req->flags, (unsigned long)req->gfn);
>  }
>
>  int mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp)
> @@ -195,32 +264,97 @@ int mem_event_get_response(struct mem_ev
>      return 1;
>  }
>
> -void mem_event_unpause_vcpus(struct domain *d)
> +/**
> + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
> + * ring. Only as many as can place another request in the ring will
> + * resume execution.
> + */
> +void mem_event_wake_requesters(struct mem_event_domain *med)
> +{
> +    int free_req;
> +
> +    mem_event_ring_lock(med);
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    free_req -= med->foreign_producers;
> +    mem_event_ring_unlock(med);
> +
> +    if ( free_req )
> +        wake_up_nr(&med->wq, free_req);
> +}
> +
> +/**
> + * mem_event_wake_waiters - Wake all vcpus waiting for the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
> + * become available.
> + */
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med)
>  {
>      struct vcpu *v;
>
>      for_each_vcpu ( d, v )
> -        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> +        if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
>              vcpu_wake(v);
>  }
>
> -void mem_event_mark_and_pause(struct vcpu *v)
> +/**
> + * mem_event_mark_and_sleep - Put vcpu to sleep
> + * @v: guest vcpu
> + * @med: mem_event ring
> + *
> + * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
> + * The vcpu will resume execution in mem_event_wake_waiters().
> + */
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med)
>  {
> -    set_bit(_VPF_mem_event, &v->pause_flags);
> +    set_bit(med->pause_flag, &v->pause_flags);
>      vcpu_sleep_nosync(v);
>  }
>
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> +/**
> + * mem_event_release_slot - Release a claimed slot
> + * @med: mem_event ring
> + *
> + * mem_event_release_slot() releases a claimed slot in the mem_event
> ring.
> + */
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med)
>  {
>      mem_event_ring_lock(med);
> -    med->req_producers--;
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
>      mem_event_ring_unlock(med);
>  }
>
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
> +/**
> + * mem_event_claim_slot - Check state of a mem_event ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * Return codes: < 0: the ring is not yet configured
> + *                 0: the ring has some room
> + *               > 0: the ring is full
> + *
> + * mem_event_claim_slot() checks the state of the given mem_event ring.
> + * If the current vcpu belongs to the guest domain, the function assumes
> that
> + * mem_event_put_request() will sleep until the ring has room again.
> + * A guest can always place at least one request.
> + *
> + * If the current vcpu does not belong to the target domain the caller
> must try
> + * again until there is room. A slot is claimed and the caller can place
> a
> + * request. If the caller does not need to send a request, the claimed
> slot has
> + * to be released with mem_event_release_slot().
> + */
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
>  {
> -    struct vcpu *curr = current;
> -    int free_requests;
> +    int free_req;
>      int ring_full = 1;
>
>      if ( !med->ring_page )
> @@ -228,15 +362,19 @@ int mem_event_check_ring(struct domain *
>
>      mem_event_ring_lock(med);
>
> -    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> -    if ( med->req_producers < free_requests )
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +
> +    if ( current->domain == d )
>      {
> -        med->req_producers++;
> +        med->target_producers++;
>          ring_full = 0;
>      }
> -
> -    if ( ring_full && (curr->domain == d) )
> -        mem_event_mark_and_pause(curr);
> +    else if ( med->foreign_producers + med->target_producers < free_req
> &&
> +              med->foreign_producers < med->max_foreign )
> +    {
> +        med->foreign_producers++;
> +        ring_full = 0;
> +    }
>
>      mem_event_ring_unlock(med);
>
> @@ -316,7 +454,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( p2m->pod.entry_count )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med, mem_paging_notification);
> +            rc = mem_event_enable(d, mec, med, _VPF_mem_paging,
> mem_paging_notification);
>          }
>          break;
>
> @@ -355,7 +493,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med, mem_access_notification);
> +            rc = mem_event_enable(d, mec, med, _VPF_mem_access,
> mem_access_notification);
>          }
>          break;
>
> diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/mem_sharing.c
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
>  #endif
>
>
> -static struct page_info* mem_sharing_alloc_page(struct domain *d,
> -                                                unsigned long gfn)
> +static void mem_sharing_notify_helper(struct domain *d, unsigned long
> gfn)
>  {
> -    struct page_info* page;
>      struct vcpu *v = current;
> -    mem_event_request_t req;
> -
> -    page = alloc_domheap_page(d, 0);
> -    if(page != NULL) return page;
> -
> -    memset(&req, 0, sizeof(req));
> -    req.type = MEM_EVENT_TYPE_SHARED;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
>
>      if ( v->domain != d )
>      {
> @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
>          gdprintk(XENLOG_ERR,
>                   "Failed alloc on unshare path for foreign (%d)
> lookup\n",
>                   d->domain_id);
> -        return page;
> +        return;
>      }
>
> -    vcpu_pause_nosync(v);
> -    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
> +    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
> +        return;
>
> -    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
> -
> +    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
>      req.gfn = gfn;
>      req.p2mt = p2m_ram_shared;
>      req.vcpu_id = v->vcpu_id;
>      mem_event_put_request(d, &d->mem_event->share, &req);
> -
> -    return page;
> +    vcpu_pause_nosync(v);
>  }
>
>  unsigned int mem_sharing_get_nr_saved_mfns(void)
> @@ -656,7 +646,7 @@ gfn_found:
>      if(ret == 0) goto private_page_found;
>
>      old_page = page;
> -    page = mem_sharing_alloc_page(d, gfn);
> +    page = alloc_domheap_page(d, 0);
>      if(!page)
>      {
>          /* We've failed to obtain memory for private page. Need to re-add
> the
> @@ -664,6 +654,7 @@ gfn_found:
>          list_add(&gfn_info->list, &hash_entry->gfns);
>          put_gfn(d, gfn);
>          shr_unlock();
> +        mem_sharing_notify_helper(d, gfn);
>          return -ENOMEM;
>      }
>
> diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
>   */
>  void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
>  {
> -    struct vcpu *v = current;
> -    mem_event_request_t req;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn
> };
>
> -    /* Check that there's space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
> -    {
> -        /* Send release notification to pager */
> -        memset(&req, 0, sizeof(req));
> -        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
> -        req.gfn = gfn;
> -        req.vcpu_id = v->vcpu_id;
> +    /* Send release notification to pager */
> +    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
>
> -        mem_event_put_request(d, &d->mem_event->paging, &req);
> -    }
> +    mem_event_put_request(d, &d->mem_event->paging, &req);
>  }
>
>  /**
> @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>      /* Check that there's space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) )
> +    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
>          return;
>
>      memset(&req, 0, sizeof(req));
> @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
>      else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
>      {
>          /* gfn is already on its way back and vcpu is not paused */
> -        mem_event_put_req_producers(&d->mem_event->paging);
> +        mem_event_release_slot(d, &d->mem_event->paging);
>          return;
>      }
>
> @@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
>              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>      }
>
> -    /* Unpause any domains that were paused because the ring was full */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->paging);
>  }
>
>  bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned
> long gla,
> @@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
>      p2m_unlock(p2m);
>
>      /* Otherwise, check if there is a memory event listener, and send the
> message along */
> -    res = mem_event_check_ring(d, &d->mem_event->access);
> +    res = mem_event_claim_slot(d, &d->mem_event->access);
>      if ( res < 0 )
>      {
>          /* No listener */
> @@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
>                     "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
>                     v->vcpu_id, d->domain_id);
>
> -            mem_event_mark_and_pause(v);
> +            mem_event_mark_and_sleep(v, &d->mem_event->access);
>          }
>          else
>          {
> @@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
>              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>      }
>
> -    /* Unpause any domains that were paused because the ring was full or
> no listener
> -     * was available */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->access);
> +
> +    /* Unpause all vcpus that were paused because no listener was
> available */
> +    mem_event_wake_waiters(d, &d->mem_event->access);
>  }
>
>
> diff -r 03138a08366b -r d910d738f31b xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h
> +++ b/xen/include/asm-x86/mem_event.h
> @@ -24,13 +24,13 @@
>  #ifndef __MEM_EVENT_H__
>  #define __MEM_EVENT_H__
>
> -/* Pauses VCPU while marking pause flag for mem event */
> -void mem_event_mark_and_pause(struct vcpu *v);
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
> -void mem_event_put_req_producers(struct mem_event_domain *med);
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med);
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req);
>  int mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp);
> -void mem_event_unpause_vcpus(struct domain *d);
> +void mem_event_wake_requesters(struct mem_event_domain *med);
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med);
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med);
>
>  int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>                       XEN_GUEST_HANDLE(void) u_domctl);
> diff -r 03138a08366b -r d910d738f31b xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -14,6 +14,7 @@
>  #include <xen/nodemask.h>
>  #include <xen/radix-tree.h>
>  #include <xen/multicall.h>
> +#include <xen/wait.h>
>  #include <public/xen.h>
>  #include <public/domctl.h>
>  #include <public/sysctl.h>
> @@ -183,7 +184,11 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned int req_producers;
> +    /* The ring has 64 entries */
> +    unsigned char foreign_producers;
> +    unsigned char max_foreign;
> +    unsigned char target_producers;
> +    unsigned char max_target;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
> @@ -192,6 +197,10 @@ struct mem_event_domain
>      mem_event_front_ring_t front_ring;
>      /* event channel port (vcpu0 only) */
>      int xen_port;
> +    /* mem_event bit for vcpu->pause_flags */
> +    int pause_flag;
> +    /* list of vcpus waiting for room in the ring */
> +    struct waitqueue_head wq;
>  };
>
>  struct mem_event_per_domain
> @@ -615,9 +624,12 @@ static inline struct domain *next_domain
>   /* VCPU affinity has changed: migrating to a new CPU. */
>  #define _VPF_migrating       3
>  #define VPF_migrating        (1UL<<_VPF_migrating)
> - /* VCPU is blocked on memory-event ring. */
> -#define _VPF_mem_event       4
> -#define VPF_mem_event        (1UL<<_VPF_mem_event)
> + /* VCPU is blocked due to missing mem_paging ring. */
> +#define _VPF_mem_paging      4
> +#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
> + /* VCPU is blocked due to missing mem_access ring. */
> +#define _VPF_mem_access      5
> +#define VPF_mem_access       (1UL<<_VPF_mem_access)
>
>  static inline int vcpu_runnable(struct vcpu *v)
>  {
>
>
>
> ------------------------------
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>
> End of Xen-devel Digest, Vol 82, Issue 325
> ******************************************
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2012-01-11 18:02 ` Andres Lagar-Cavilla
@ 2012-01-12 13:59   ` Olaf Hering
  2012-01-12 16:11     ` Andres Lagar-Cavilla
  0 siblings, 1 reply; 23+ messages in thread
From: Olaf Hering @ 2012-01-12 13:59 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: xen-devel, tim, adin

On Wed, Jan 11, Andres Lagar-Cavilla wrote:

> > mem_event: use wait queue when ring is full
> >
> > This change is based on an idea/patch from Adin Scannell.
> 
> Olaf,
> thanks for the post. We'll have to nack this patch in its current form. It
> hard reboots our host during our testing.

Thats very unfortunate. I have seen such unexpected reboots myself a few
weeks ago. I suspect they were caused by an incorrect debug change which
I had on top of my waitqueue changes. Also the fixes Keir provided a few
weeks ago may have helped.

Is it an otherwise unmodified xen-unstable build, or do you use other
patches as well? Whats your environment and workload anyway in dom0 and
domU?

It would be very good to know why the reboots happen. Perhaps such
failures can not be debugged without special hardware, or a detailed
code review.

I just tested an otherwise unmodified xen-unstable build and did not
encounter reboots while ballooning a single 2G guest up and down. The
guest did just hang after a few iterations, most likely because v7 of my
patch again (or still?) has the math wrong in the ring accounting. I
will check what the issue is. I think v6 was ok in that respect, but I
will double check that older version as well.

> What we did is take this patch, amalgamate it with some bits from our ring
> management approach. We're ready to submit that, along with a description
> of how we test it. It works for us, and it involves wait queue's for
> corner cases.

Now if the patch you just sent out uses wait queues as well, and using
wait queues causes sudden host reboots for reasons not yet known, how is
your patch any better other that the reboots dont appear to happen
anymore?

I did not use anything but paging for testing, perhaps I should also run
some access tests. How should I use tools/tests/xen-access/xen-access.c?

Olaf

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2012-01-12 13:59   ` Olaf Hering
@ 2012-01-12 16:11     ` Andres Lagar-Cavilla
  2012-01-12 17:50       ` Adin Scannell
  0 siblings, 1 reply; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2012-01-12 16:11 UTC (permalink / raw)
  To: Olaf Hering; +Cc: xen-devel, tim, adin

> On Wed, Jan 11, Andres Lagar-Cavilla wrote:
>
>> > mem_event: use wait queue when ring is full
>> >
>> > This change is based on an idea/patch from Adin Scannell.
>>
>> Olaf,
>> thanks for the post. We'll have to nack this patch in its current form.
>> It
>> hard reboots our host during our testing.
>
> Thats very unfortunate. I have seen such unexpected reboots myself a few
> weeks ago. I suspect they were caused by an incorrect debug change which
> I had on top of my waitqueue changes. Also the fixes Keir provided a few
> weeks ago may have helped.
>
> Is it an otherwise unmodified xen-unstable build, or do you use other
> patches as well? Whats your environment and workload anyway in dom0 and
> domU?
>
> It would be very good to know why the reboots happen. Perhaps such
> failures can not be debugged without special hardware, or a detailed
> code review.
>
>
> I just tested an otherwise unmodified xen-unstable build and did not
> encounter reboots while ballooning a single 2G guest up and down. The
> guest did just hang after a few iterations, most likely because v7 of my
> patch again (or still?) has the math wrong in the ring accounting. I
> will check what the issue is. I think v6 was ok in that respect, but I
> will double check that older version as well.
>
>
>> What we did is take this patch, amalgamate it with some bits from our
>> ring
>> management approach. We're ready to submit that, along with a
>> description
>> of how we test it. It works for us, and it involves wait queue's for
>> corner cases.
>
> Now if the patch you just sent out uses wait queues as well, and using
> wait queues causes sudden host reboots for reasons not yet known, how is
> your patch any better other that the reboots dont appear to happen
> anymore?

I believe you were missing some unlocks, which were triggering ASSERTs
going into a wait queue.

In any case, the patch was crashing, we spent quite some time merging it
all towards the endgame we all want (wait queues and better ring logic)
and now it doesn't seem to crash.

But obviously our testing rigs are quite different, which is a good thing.

I'll post the mem access testing code, with a description of how we drive
that test.

Thanks!
Andres
>
> I did not use anything but paging for testing, perhaps I should also run
> some access tests. How should I use tools/tests/xen-access/xen-access.c?
>
> Olaf
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
  2012-01-12 16:11     ` Andres Lagar-Cavilla
@ 2012-01-12 17:50       ` Adin Scannell
       [not found]         ` <B28ADCC9-CC5A-479D-8A7C-38FF4DB78A55@gridcentric.ca>
  0 siblings, 1 reply; 23+ messages in thread
From: Adin Scannell @ 2012-01-12 17:50 UTC (permalink / raw)
  To: Olaf Hering; +Cc: Andres Lagar-Cavilla, xen-devel, tim

On Thu, Jan 12, 2012 at 11:11 AM, Andres Lagar-Cavilla
<andres@lagarcavilla.org> wrote:
>>> What we did is take this patch, amalgamate it with some bits from our
>>> ring
>>> management approach. We're ready to submit that, along with a
>>> description
>>> of how we test it. It works for us, and it involves wait queue's for
>>> corner cases.
>>
>> Now if the patch you just sent out uses wait queues as well, and using
>> wait queues causes sudden host reboots for reasons not yet known, how is
>> your patch any better other that the reboots dont appear to happen
>> anymore?

I didn't spend a lot of time diagnosing exactly what was going wrong
with the patch.  I did have some local patches applied (in the process
of being submitted to the list) and some debug changes to ensure that
the correct code paths were hit, so it's quite possible that it may
have been my mistake. If so, I apologize. I didn't want to spend a lot
of time debugging and I'd had a similar experience with waitqueues in
the fall.

As Andres pointed out, we spent time merging our local approach into
your patch and testing that one. As a result of the combination, I
also did a few interface changes to ensure that callers use the
mem_event code correctly (i.e. calls to wake() are handled internally,
rather than relying on callers), and dropped some of the complexity of
accounting separately for foreign mappers.  With the waitqueue
failsafe in place, I don't think that's necessary.  Anyways, I tried
to preserve the spirit of your code, and would love to hear thoughts.

We'll be doing more testing today to ensure that we've properly
exercised all the different code paths (including wait queues).

Cheers,
-Adin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] mem_event: use wait queue when ring is full
       [not found]         ` <B28ADCC9-CC5A-479D-8A7C-38FF4DB78A55@gridcentric.ca>
@ 2012-01-12 19:22           ` Andres Lagar-Cavilla
  0 siblings, 0 replies; 23+ messages in thread
From: Andres Lagar-Cavilla @ 2012-01-12 19:22 UTC (permalink / raw)
  To: adin; +Cc: andres, xen-devel, tim, olaf


>
> I didn't spend a lot of time diagnosing exactly what was going wrong
> with the patch.  I did have some local patches applied (in the process
> of being submitted to the list) and some debug changes to ensure that
> the correct code paths were hit, so it's quite possible that it may
> have been my mistake. If so, I apologize. I didn't want to spend a lot
> of time debugging and I'd had a similar experience with waitqueues in
> the fall.
>
> As Andres pointed out, we spent time merging our local approach into
> your patch and testing that one. As a result of the combination, I
> also did a few interface changes to ensure that callers use the
> mem_event code correctly (i.e. calls to wake() are handled internally,
> rather than relying on callers), and dropped some of the complexity of
> accounting separately for foreign mappers.  With the waitqueue
> failsafe in place, I don't think that's necessary.  Anyways, I tried
> to preserve the spirit of your code, and would love to hear thoughts.
>
> We'll be doing more testing today to ensure that we've properly
> exercised all the different code paths (including wait queues).

It works.

1. Fire off 1GB HVM with PV drivers. Enable balloon
2. Fire off xenpaging
3. xenstore write memory/target-tot_pages 524288
(wait until everything is paged)
4. xenstore write memory/target 524288

No crashes, neither domain nor host nor xenpaging. Phew ;)

Olaf, let us know if you have further concerns, afaict the patch is ready
for showtime.

Andres
>
> Cheers,
> -Adin
>>>> What we did is take this patch, amalgamate it with some bits from our
>>>> ring
>>>> management approach. We're ready to submit that, along with a
>>>> description
>>>> of how we test it. It works for us, and it involves wait queue's for
>>>> corner cases.
>>>
>>> Now if the patch you just sent out uses wait queues as well, and using
>>> wait queues causes sudden host reboots for reasons not yet known, how
>>> is
>>> your patch any better other that the reboots dont appear to happen
>>> anymore?
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2012-01-12 19:22 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-12-19 11:39 [PATCH] mem_event: use wait queue when ring is full Olaf Hering
2011-12-22 11:27 ` Tim Deegan
     [not found] <mailman.4853.1324294828.12970.xen-devel@lists.xensource.com>
2012-01-11 18:02 ` Andres Lagar-Cavilla
2012-01-12 13:59   ` Olaf Hering
2012-01-12 16:11     ` Andres Lagar-Cavilla
2012-01-12 17:50       ` Adin Scannell
     [not found]         ` <B28ADCC9-CC5A-479D-8A7C-38FF4DB78A55@gridcentric.ca>
2012-01-12 19:22           ` Andres Lagar-Cavilla
     [not found] <mailman.4227.1323785898.12970.xen-devel@lists.xensource.com>
2011-12-15 14:56 ` Andres Lagar-Cavilla
2011-12-16 16:40   ` Olaf Hering
2011-12-16 17:04     ` Andres Lagar-Cavilla
2011-12-16 17:33       ` Olaf Hering
     [not found] <mailman.3873.1323460242.12970.xen-devel@lists.xensource.com>
2011-12-10  5:22 ` Andres Lagar-Cavilla
2011-12-13 13:40   ` Olaf Hering
  -- strict thread matches above, loose matches on Subject: below --
2011-12-09 19:23 Olaf Hering
2011-12-15 12:43 ` Tim Deegan
2011-12-15 13:15   ` Olaf Hering
     [not found] <mailman.3332.1323083995.12970.xen-devel@lists.xensource.com>
2011-12-05 15:45 ` Andres Lagar-Cavilla
2011-12-05 16:20   ` Olaf Hering
2011-12-05 16:34     ` Andres Lagar-Cavilla
2011-12-07 13:20       ` Olaf Hering
2011-12-07 16:27         ` Andres Lagar-Cavilla
2011-12-05 11:19 Olaf Hering
2011-12-05 11:33 ` Olaf Hering

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).