xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	ian.jackson@eu.citrix.com, tim@xen.org, jbeulich@suse.com,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v5 6/8] mm: Keep heap accessible to others while scrubbing
Date: Thu, 22 Jun 2017 14:57:08 -0400	[thread overview]
Message-ID: <1498157830-21845-7-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1498157830-21845-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's BUDDY_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v5:
* Fixed off-by-one error in setting first_dirty
* Changed struct page_info.u.free to a union to permit use of ACCESS_ONCE in
  check_and_stop_scrub()
* Renamed PAGE_SCRUBBING etc. macros to BUDDY_SCRUBBING etc

 xen/common/page_alloc.c  | 105 +++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/mm.h |  28 ++++++++-----
 xen/include/asm-x86/mm.h |  29 ++++++++-----
 3 files changed, 138 insertions(+), 24 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 4e2775f..f0e5399 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -687,6 +687,7 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
 {
     PFN_ORDER(pg) = order;
     pg->u.free.first_dirty = first_dirty;
+    pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
 
     if ( first_dirty != INVALID_DIRTY_IDX )
         page_list_add_tail(pg, &heap(node, zone, order));
@@ -694,6 +695,25 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state == BUDDY_SCRUBBING )
+    {
+        struct page_info pg;
+
+        head->u.free.scrub_state = BUDDY_SCRUB_ABORT;
+        spin_lock_kick();
+        for ( ; ; )
+        {
+            /* Can't ACCESS_ONCE() a bitfield. */
+            pg.u.free.val = ACCESS_ONCE(head->u.free.val);
+            if ( pg.u.free.scrub_state != BUDDY_SCRUB_ABORT )
+                break;
+            cpu_relax();
+        }
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int memflags,
@@ -738,14 +758,19 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+                        return pg;
                     /*
                      * We grab single pages (order=0) even if they are
                      * unscrubbed. Given that scrubbing one page is fairly quick
                      * it is not worth breaking higher orders.
                      */
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
                         return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -928,6 +953,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1084,6 +1110,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1106,25 +1155,53 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = BUDDY_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
+
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
                     else
                         cnt++;
 
+                    if ( pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order) - 1) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemption. But also count non-scrubbing loop iterations
@@ -1138,6 +1215,17 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                st.first_dirty = (i >= (1UL << order) - 1) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i >= (1U << order) - 1 )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1146,6 +1234,8 @@ bool scrub_free_pages(void)
                 else
                     pg->u.free.first_dirty = i + 1;
 
+                pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1154,6 +1244,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1235,6 +1327,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1256,6 +1350,9 @@ static void free_heap_pages(
                  (PFN_ORDER(successor) != order) ||
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
+
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
 
             if ( successor->u.free.first_dirty != INVALID_DIRTY_IDX )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 889a85e..625aa16 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -42,18 +42,26 @@ struct page_info
             unsigned long type_info;
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            unsigned long need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more than maximum possible order (MAX_ORDER+1) to
-             * accommodate INVALID_DIRTY_IDX.
-             */
+        union {
+            struct {
+                /* Do TLBs need flushing for safety before next page use? */
+                unsigned long need_tlbflush:1;
+
+                /*
+                 * Index of the first *possibly* unscrubbed page in the buddy.
+                 * One more than maximum possible order (MAX_ORDER+1) to
+                 * accommodate INVALID_DIRTY_IDX.
+                 */
 #define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
-            unsigned long first_dirty:MAX_ORDER + 2;
+                unsigned long first_dirty:MAX_ORDER + 2;
+
+#define BUDDY_NOT_SCRUBBING   0
+#define BUDDY_SCRUBBING       1
+#define BUDDY_SCRUB_ABORT     2
+                unsigned long scrub_state:2;
+            };
 
+           unsigned long val;
         } free;
 
     } u;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index cd00bef..db6f3a5 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -86,17 +86,26 @@ struct page_info
         } sh;
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            unsigned long need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more than maximum possible order (MAX_ORDER+1) to
-             * accommodate INVALID_DIRTY_IDX.
-             */
+        union {
+	    struct {
+		/* Do TLBs need flushing for safety before next page use? */
+		unsigned long need_tlbflush:1;
+
+		/*
+		 * Index of the first *possibly* unscrubbed page in the buddy.
+		 * One more than maximum possible order (MAX_ORDER+1) to
+		 * accommodate INVALID_DIRTY_IDX.
+		 */
 #define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
-            unsigned long first_dirty:MAX_ORDER + 2;
+		unsigned long first_dirty:MAX_ORDER + 2;
+
+#define BUDDY_NOT_SCRUBBING   0
+#define BUDDY_SCRUBBING       1
+#define BUDDY_SCRUB_ABORT     2
+		unsigned long scrub_state:2;
+	    };
+
+	    unsigned long val;
         } free;
 
     } u;
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-06-22 18:57 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-22 18:57 [PATCH v5 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-06-22 18:57 ` [PATCH v5 1/8] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-06-27 17:06   ` Jan Beulich
2017-07-23  2:00     ` Boris Ostrovsky
2017-07-31 14:45       ` Jan Beulich
2017-07-31 16:03         ` Boris Ostrovsky
2017-08-02  9:24           ` Jan Beulich
2017-08-02 15:31             ` Boris Ostrovsky
2017-06-22 18:57 ` [PATCH v5 2/8] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-06-27 17:59   ` Jan Beulich
2017-06-22 18:57 ` [PATCH v5 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-06-27 18:00   ` Jan Beulich
2017-07-23  2:07     ` Boris Ostrovsky
2017-07-31 15:16       ` Jan Beulich
2017-07-31 16:07         ` Boris Ostrovsky
2017-06-22 18:57 ` [PATCH v5 4/8] mm: Scrub memory from idle loop Boris Ostrovsky
2017-06-23  8:36   ` Dario Faggioli
2017-06-27 18:01   ` Jan Beulich
2017-07-23  2:14     ` Boris Ostrovsky
2017-07-31 15:20       ` Jan Beulich
2017-07-31 16:15         ` Boris Ostrovsky
2017-08-02  9:27           ` Jan Beulich
2017-06-22 18:57 ` [PATCH v5 5/8] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-06-22 18:57 ` Boris Ostrovsky [this message]
2017-06-27 19:28   ` [PATCH v5 6/8] mm: Keep heap accessible to others while scrubbing Jan Beulich
2017-06-27 19:31     ` Jan Beulich
2017-07-23  2:28     ` Boris Ostrovsky
2017-08-02  8:34       ` Jan Beulich
2017-06-22 18:57 ` [PATCH v5 7/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-06-22 18:57 ` [PATCH v5 8/8] mm: Make sure pages are scrubbed Boris Ostrovsky
2017-06-27 19:29   ` Jan Beulich
2017-06-23  9:36 ` [PATCH v5 0/8] Memory scrubbing from idle loop Jan Beulich
2017-06-23 13:11   ` Boris Ostrovsky
2017-06-23 13:22     ` Jan Beulich
2017-06-23 13:29       ` Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1498157830-21845-7-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).