xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	ian.jackson@eu.citrix.com, tim@xen.org, jbeulich@suse.com,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v3 7/9] mm: Keep pages available for allocation while scrubbing
Date: Fri, 14 Apr 2017 11:37:36 -0400	[thread overview]
Message-ID: <1492184258-3277-8-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1492184258-3277-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v3:
* Adjusted page_info's scrub_state definitions but kept them as binary
  flags since I think having both PAGE_SCRUBBING and PAGE_SCRUB_ABORT
  bits set make sense.

 xen/common/page_alloc.c  |   92 ++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/mm.h |    4 ++
 xen/include/asm-x86/mm.h |    4 ++
 3 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 0b2dff1..514a4a1 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
@@ -780,10 +791,15 @@ static struct page_info *alloc_heap_pages(
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         !pg->u.free.dirty_head )
+                    if ( !pg->u.free.dirty_head )
                         goto found;
 
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        goto found;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -921,6 +937,8 @@ static int reserve_offlined_page(struct page_info *head)
 
     head->u.free.dirty_head = false;
 
+    check_and_stop_scrub(head);
+
     page_list_del(head, &heap(node, zone, head_order));
 
     while ( cur_head < (head + (1 << head_order)) )
@@ -1027,6 +1045,9 @@ merge_and_free_buddy(struct page_info *pg, unsigned int node,
              (phys_to_nid(page_to_maddr(buddy)) != node) )
             break;
 
+        if ( buddy->u.free.scrub_state & PAGE_SCRUBBING )
+            break;
+
         page_list_del(buddy, &heap(node, zone, order));
         need_scrub |= buddy->u.free.dirty_head;
         buddy->u.free.dirty_head = false;
@@ -1098,14 +1119,35 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
     unsigned int zone, order, scrub_order;
-    unsigned long i, num_processed, start, end;
+    unsigned long i, num_processed, start, end, dirty_cnt;
     unsigned int cpu = smp_processor_id();
     bool preempt = false, is_frag;
     nodeid_t node;
+    struct scrub_wait_state st;
 
     /* Scrubbing granularity. */
 #define SCRUB_CHUNK_ORDER  8
@@ -1134,8 +1176,13 @@ bool scrub_free_pages(void)
                 if ( !pg->u.free.dirty_head )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
                 scrub_order = MIN(order, SCRUB_CHUNK_ORDER);
-                num_processed = 0;
+                num_processed = dirty_cnt = 0;
                 is_frag = false;
                 while ( num_processed < (1UL << order) )
                 {
@@ -1145,8 +1192,24 @@ bool scrub_free_pages(void)
                         if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                         {
                             scrub_one_page(&pg[i]);
+                            /*
+                             * We can modify count_info without holding heap
+                             * lock since we effectively locked this buddy by
+                             * setting its scrub_state.
+                             */
                             pg[i].count_info &= ~PGC_need_scrub;
-                            node_need_scrub[node]--;
+                            dirty_cnt++;
+                        }
+
+                        if ( ACCESS_ONCE(pg->u.free.scrub_state) &
+                             PAGE_SCRUB_ABORT )
+                        {
+                            /* Someone wants this chunk. Drop everything. */
+                            pg->u.free.scrub_state = 0;
+                            spin_lock(&heap_lock);
+                            node_need_scrub[node] -= dirty_cnt;
+                            spin_unlock(&heap_lock);
+                            goto out_nolock;
                         }
                     }
 
@@ -1159,11 +1222,20 @@ bool scrub_free_pages(void)
                     }
                 }
 
-                start = 0;
-                end = num_processed;
+                st.pg = pg;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
 
                 page_list_del(pg, &heap(node, zone, order));
 
+                start = 0;
+                end = num_processed;
+
                 /* Merge clean pages */
                 while ( start < end )
                 {
@@ -1194,6 +1266,8 @@ bool scrub_free_pages(void)
                     end += (1UL << chunk_order);
                 }
 
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1202,6 +1276,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1240,6 +1316,8 @@ static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state = 0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index abc3f6b..b333b16 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -43,6 +43,10 @@ struct page_info
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+#define PAGE_SCRUBBING      (1<<0)
+#define PAGE_SCRUB_ABORT    (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
             /* Set on a buddy head if the buddy has unscrubbed pages. */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 5cf528a..d00c4a1 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -87,6 +87,10 @@ struct page_info
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+#define PAGE_SCRUBBING      (1<<0)
+#define PAGE_SCRUB_ABORT    (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
             /* Set on a buddy head if the buddy has unscrubbed pages. */
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-04-14 15:37 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-14 15:37 [PATCH v3 0/9] Memory scrubbing from idle loop Boris Ostrovsky
2017-04-14 15:37 ` [PATCH v3 1/9] mm: Separate free page chunk merging into its own routine Boris Ostrovsky
2017-05-04  9:45   ` Jan Beulich
2017-04-14 15:37 ` [PATCH v3 2/9] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-05-04 10:17   ` Jan Beulich
2017-05-04 14:53     ` Boris Ostrovsky
2017-05-04 15:00       ` Jan Beulich
2017-05-08 16:41   ` George Dunlap
2017-05-08 16:59     ` Boris Ostrovsky
2017-04-14 15:37 ` [PATCH v3 3/9] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-05-04 14:44   ` Jan Beulich
2017-05-04 15:04     ` Boris Ostrovsky
2017-05-04 15:36       ` Jan Beulich
2017-04-14 15:37 ` [PATCH v3 4/9] mm: Scrub memory from idle loop Boris Ostrovsky
2017-05-04 15:31   ` Jan Beulich
2017-05-04 17:09     ` Boris Ostrovsky
2017-05-05 10:21       ` Jan Beulich
2017-05-05 13:42         ` Boris Ostrovsky
2017-05-05 14:10           ` Jan Beulich
2017-05-05 14:14             ` Jan Beulich
2017-05-05 14:27               ` Boris Ostrovsky
2017-05-05 14:51                 ` Jan Beulich
2017-05-05 15:23                   ` Boris Ostrovsky
2017-05-05 16:05                     ` Jan Beulich
2017-05-05 16:49                       ` Boris Ostrovsky
2017-05-08  7:14                         ` Jan Beulich
2017-05-11 10:26   ` Dario Faggioli
2017-05-11 14:19     ` Boris Ostrovsky
2017-05-11 15:48       ` Dario Faggioli
2017-05-11 17:05         ` Boris Ostrovsky
2017-05-12  8:17           ` Dario Faggioli
2017-05-12 14:42             ` Boris Ostrovsky
2017-04-14 15:37 ` [PATCH v3 5/9] mm: Do not discard already-scrubbed pages if softirqs are pending Boris Ostrovsky
2017-05-04 15:43   ` Jan Beulich
2017-05-04 17:18     ` Boris Ostrovsky
2017-05-05 10:27       ` Jan Beulich
2017-05-05 13:51         ` Boris Ostrovsky
2017-05-05 14:13           ` Jan Beulich
2017-04-14 15:37 ` [PATCH v3 6/9] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-04-14 15:37 ` Boris Ostrovsky [this message]
2017-05-04 16:03   ` [PATCH v3 7/9] mm: Keep pages available for allocation while scrubbing Jan Beulich
2017-05-04 17:26     ` Boris Ostrovsky
2017-05-05 10:28       ` Jan Beulich
2017-04-14 15:37 ` [PATCH v3 8/9] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-04-14 15:37 ` [PATCH v3 9/9] mm: Make sure pages are scrubbed Boris Ostrovsky
2017-05-05 15:05   ` Jan Beulich
2017-05-08 15:48     ` Konrad Rzeszutek Wilk
2017-05-08 16:23       ` Boris Ostrovsky
2017-05-02 14:46 ` [PATCH v3 0/9] Memory scrubbing from idle loop Boris Ostrovsky
2017-05-02 14:58   ` Jan Beulich
2017-05-02 15:07     ` Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1492184258-3277-8-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).