[PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	ian.jackson@eu.citrix.com, tim@xen.org, jbeulich@suse.com,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing
Date: Fri, 19 May 2017 11:50:38 -0400	[thread overview]
Message-ID: <1495209040-11101-7-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1495209040-11101-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v4:
* Drop unnecessary ACCESS_ONCE in scrub_free_pages, add smp_wmb()
  in scrub_continue()
* Keep track of first_dirty in scrub_wait_state

 xen/common/page_alloc.c  | 100 ++++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/mm.h |   4 ++
 xen/include/asm-x86/mm.h |   4 ++
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6e505b1..b365305 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int memflags,
@@ -738,9 +749,15 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                         return pg;
+
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -925,6 +942,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1069,6 +1087,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1090,24 +1131,51 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     cnt++;
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
 
+                    if ( pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order)) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = 0;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemtion. But also count non-scrubbing loop iteration
@@ -1121,6 +1189,17 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                st.first_dirty = (i == (1UL << order)) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i == (1U << order) )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1128,7 +1207,9 @@ bool scrub_free_pages(void)
                 }
                 else
                     pg->u.free.first_dirty = i + 1;
- 
+
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1137,6 +1218,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1175,6 +1258,8 @@ static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state = 0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
@@ -1218,6 +1303,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1240,6 +1327,9 @@ static void free_heap_pages(
                  (PFN_ORDER(successor) != order) ||
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
+
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
 
             need_scrub |= (successor->u.free.first_dirty != INVALID_DIRTY_IDX);
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 38d4fba..148d60b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -46,6 +46,10 @@ struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index e20f161..f46b242 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -90,6 +90,10 @@ struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

next prev parent reply	other threads:[~2017-05-19 15:50 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-19 15:50 [PATCH v4 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 1/8] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-06-09 14:50   ` Jan Beulich
2017-06-09 20:07     ` Boris Ostrovsky
2017-06-12  6:50       ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 2/8] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-06-09 15:08   ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-06-09 15:22   ` Jan Beulich
2017-06-09 20:55     ` Boris Ostrovsky
2017-06-12  6:54       ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 4/8] mm: Scrub memory from idle loop Boris Ostrovsky
2017-06-12  8:08   ` Jan Beulich
2017-06-12 17:01     ` Boris Ostrovsky
2017-06-12 21:28       ` Dario Faggioli
2017-06-13  8:19         ` Jan Beulich
2017-06-13 18:39           ` Boris Ostrovsky
2017-06-13 20:36             ` Dario Faggioli
2017-06-13 21:54               ` Boris Ostrovsky
2017-06-14  9:18             ` Jan Beulich
2017-06-13  8:12       ` Jan Beulich
2017-06-13 18:20         ` Boris Ostrovsky
2017-06-14  9:17           ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 5/8] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-06-12  8:23   ` Jan Beulich
2017-05-19 15:50 ` Boris Ostrovsky [this message]
2017-06-12  8:30   ` [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing Jan Beulich
2017-06-12 17:11     ` Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 7/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 8/8] mm: Make sure pages are scrubbed Boris Ostrovsky
2017-06-12  8:43   ` Jan Beulich

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6e505b1 dfblob:b365305 dfblob:38d4fba dfblob:148d60b
dfblob:e20f161 dfblob:f46b242 )
 OR (
bs:"[PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1495209040-11101-7-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).