xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	ian.jackson@eu.citrix.com, tim@xen.org, julien.grall@arm.com,
	jbeulich@suse.com, Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v7 7/9] mm: Keep heap accessible to others while scrubbing
Date: Tue,  8 Aug 2017 17:45:05 -0400	[thread overview]
Message-ID: <1502228707-31883-8-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1502228707-31883-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's BUDDY_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v7:
* Replaced page_info with typeof(head->u.free) in check_and_stop_scrub()
* Replaced 1UL with 1U in scrub_free_pages()
* Fixed formatting in asm-*/mm.h

 xen/common/page_alloc.c  | 110 +++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/mm.h |  28 +++++++-----
 xen/include/asm-x86/mm.h |  30 ++++++++-----
 3 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index b886983..726f857 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -687,6 +687,7 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
 {
     PFN_ORDER(pg) = order;
     pg->u.free.first_dirty = first_dirty;
+    pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
 
     if ( first_dirty != INVALID_DIRTY_IDX )
     {
@@ -697,6 +698,25 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state == BUDDY_SCRUBBING )
+    {
+        typeof(head->u.free) pgfree;
+
+        head->u.free.scrub_state = BUDDY_SCRUB_ABORT;
+        spin_lock_kick();
+        for ( ; ; )
+        {
+            /* Can't ACCESS_ONCE() a bitfield. */
+            pgfree.val = ACCESS_ONCE(head->u.free.val);
+            if ( pgfree.scrub_state != BUDDY_SCRUB_ABORT )
+                break;
+            cpu_relax();
+        }
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int memflags,
@@ -741,14 +761,19 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+                        return pg;
                     /*
                      * We grab single pages (order=0) even if they are
                      * unscrubbed. Given that scrubbing one page is fairly quick
                      * it is not worth breaking higher orders.
                      */
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
                         return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -929,6 +954,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1079,6 +1105,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1101,25 +1150,53 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(pg->u.free.scrub_state == BUDDY_NOT_SCRUBBING);
+                pg->u.free.scrub_state = BUDDY_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
+
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
                     else
                         cnt++;
 
+                    if ( pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order) - 1) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemption. But also count non-scrubbing loop iterations
@@ -1133,6 +1210,23 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                /*
+                 * get_free_buddy() grabs a buddy with first_dirty set to
+                 * INVALID_DIRTY_IDX so we can't set pg's first_dirty here.
+                 * It will be set either below or in the lock callback (in
+                 * scrub_continue()).
+                 */
+                st.first_dirty = (i >= (1U << order) - 1) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i >= (1U << order) - 1 )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1141,6 +1235,8 @@ bool scrub_free_pages(void)
                 else
                     pg->u.free.first_dirty = i + 1;
 
+                pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1149,6 +1245,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return node_to_scrub(false) != NUMA_NO_NODE;
 }
@@ -1230,6 +1328,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             /* Keep predecessor's first_dirty if it is already set. */
@@ -1251,6 +1351,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
 
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
         }
 
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 572337c..d42b070 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -42,18 +42,26 @@ struct page_info
             unsigned long type_info;
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            bool need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more bit than maximum possible order to accommodate
-             * INVALID_DIRTY_IDX.
-             */
+        union {
+            struct {
+                /* Do TLBs need flushing for safety before next page use? */
+                bool need_tlbflush:1;
+
+                /*
+                 * Index of the first *possibly* unscrubbed page in the buddy.
+                 * One more bit than maximum possible order to accommodate
+                 * INVALID_DIRTY_IDX.
+                 */
 #define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1)
-            unsigned long first_dirty:MAX_ORDER + 1;
+                unsigned long first_dirty:MAX_ORDER + 1;
+
+#define BUDDY_NOT_SCRUBBING    0
+#define BUDDY_SCRUBBING        1
+#define BUDDY_SCRUB_ABORT      2
+                unsigned long scrub_state:2;
+            };
 
+            unsigned long val;
         } free;
 
     } u;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 07dc963..c9cf6c3 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -86,19 +86,27 @@ struct page_info
         } sh;
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            bool need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more bit than maximum possible order to accommodate
-             * INVALID_DIRTY_IDX.
-             */
+        union {
+            struct {
+                /* Do TLBs need flushing for safety before next page use? */
+                bool need_tlbflush:1;
+
+                /*
+                 * Index of the first *possibly* unscrubbed page in the buddy.
+                 * One more bit than maximum possible order to accommodate
+                 * INVALID_DIRTY_IDX.
+                 */
 #define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1)
-            unsigned long first_dirty:MAX_ORDER + 1;
-        } free;
+                unsigned long first_dirty:MAX_ORDER + 1;
 
+#define BUDDY_NOT_SCRUBBING    0
+#define BUDDY_SCRUBBING        1
+#define BUDDY_SCRUB_ABORT      2
+                unsigned long scrub_state:2;
+            };
+
+            unsigned long val;
+        } free;
     } u;
 
     union {
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-08-08 21:45 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-08 21:44 [PATCH v7 0/9] Memory scrubbing from idle loop Boris Ostrovsky
2017-08-08 21:44 ` [PATCH v7 1/9] mm: Clean up free_heap_pages() Boris Ostrovsky
2017-08-10 12:21   ` Wei Liu
2017-08-14 10:30   ` Jan Beulich
2017-08-08 21:45 ` [PATCH v7 2/9] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-08-14 10:37   ` Jan Beulich
2017-08-14 14:29     ` Boris Ostrovsky
2017-08-15  8:18       ` Jan Beulich
2017-08-15 14:41         ` Boris Ostrovsky
2017-08-15 14:51           ` Jan Beulich
2017-08-15 14:52             ` Julien Grall
2017-08-15 15:03               ` Boris Ostrovsky
2017-08-15 15:08                 ` Jan Beulich
2017-08-14 11:16   ` Julien Grall
2017-08-08 21:45 ` [PATCH v7 3/9] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 4/9] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 5/9] mm: Scrub memory from idle loop Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 6/9] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-08-14 11:22   ` Julien Grall
2017-08-14 14:39     ` Boris Ostrovsky
2017-08-14 14:42       ` Julien Grall
2017-08-14 14:57         ` Boris Ostrovsky
2017-08-08 21:45 ` Boris Ostrovsky [this message]
2017-08-14 10:38   ` [PATCH v7 7/9] mm: Keep heap accessible to others while scrubbing Jan Beulich
2017-08-08 21:45 ` [PATCH v7 8/9] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 9/9] mm: Make sure pages are scrubbed Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1502228707-31883-8-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=julien.grall@arm.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).