xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	ian.jackson@eu.citrix.com, tim@xen.org, jbeulich@suse.com,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v4 4/8] mm: Scrub memory from idle loop
Date: Fri, 19 May 2017 11:50:36 -0400	[thread overview]
Message-ID: <1495209040-11101-5-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1495209040-11101-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages during guest destruction (from
free_heap_pages()) do this opportunistically, from the idle loop.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v4:
* Be careful with tasklets in idle_loop()
* Use per-cpu mapcache override
* Update node_to_scrub() algorithm to select closest node (and add comment
   explaining what it does)
* Put buddy back in the heap directly (as opposed to using merge_and_free_buddy()
  which is dropped anyway)
* Don't stop scrubbing immediately when softirq is pending, try to scrub at least
  a few (8) pages.

 xen/arch/arm/domain.c      |  16 ++++---
 xen/arch/x86/domain.c      |   3 +-
 xen/arch/x86/domain_page.c |   8 ++--
 xen/common/page_alloc.c    | 113 +++++++++++++++++++++++++++++++++++++++------
 xen/include/xen/mm.h       |   1 +
 5 files changed, 117 insertions(+), 24 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 76310ed..9931ca2 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -46,15 +46,19 @@ void idle_loop(void)
         if ( cpu_is_offline(smp_processor_id()) )
             stop_cpu();
 
-        local_irq_disable();
-        if ( cpu_is_haltable(smp_processor_id()) )
+        do_tasklet();
+
+        if ( cpu_is_haltable(smp_processor_id()) && !scrub_free_pages() )
         {
-            dsb(sy);
-            wfi();
+            local_irq_disable();
+            if ( cpu_is_haltable(smp_processor_id()) )
+            {
+                dsb(sy);
+                wfi();
+            }
+            local_irq_enable();
         }
-        local_irq_enable();
 
-        do_tasklet();
         do_softirq();
         /*
          * We MUST be last (or before dsb, wfi). Otherwise after we get the
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 13cdc50..229711f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -118,8 +118,9 @@ static void idle_loop(void)
     {
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
-        (*pm_idle)();
         do_tasklet();
+        if ( cpu_is_haltable(smp_processor_id()) && !scrub_free_pages() )
+            (*pm_idle)();
         do_softirq();
         /*
          * We MUST be last (or before pm_idle). Otherwise after we get the
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index 71baede..cfe7cc1 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -18,12 +18,14 @@
 #include <asm/hardirq.h>
 #include <asm/setup.h>
 
-static struct vcpu *__read_mostly override;
+static DEFINE_PER_CPU(struct vcpu *, override);
 
 static inline struct vcpu *mapcache_current_vcpu(void)
 {
+    struct vcpu *v, *this_vcpu = this_cpu(override);
+
     /* In the common case we use the mapcache of the running VCPU. */
-    struct vcpu *v = override ?: current;
+    v = this_vcpu ?: current;
 
     /*
      * When current isn't properly set up yet, this is equivalent to
@@ -59,7 +61,7 @@ static inline struct vcpu *mapcache_current_vcpu(void)
 
 void __init mapcache_override_current(struct vcpu *v)
 {
-    override = v;
+    this_cpu(override) = v;
 }
 
 #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index b7c7426..6e505b1 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1010,15 +1010,79 @@ static int reserve_offlined_page(struct page_info *head)
     return count;
 }
 
-static void scrub_free_pages(unsigned int node)
+static nodemask_t node_scrubbing;
+
+/*
+ * If get_node is true this will return closest node that needs to be scrubbed,
+ * with appropriate bit in node_scrubbing set.
+ * If get_node is not set, this will return *a* node that needs to be scrubbed.
+ * node_scrubbing bitmask will no be updated.
+ * If no node needs scrubbing then NUMA_NO_NODE is returned.
+ */
+static unsigned int node_to_scrub(bool get_node)
 {
-    struct page_info *pg;
-    unsigned int zone;
+    nodeid_t node = cpu_to_node(smp_processor_id()), local_node;
+    nodeid_t closest = NUMA_NO_NODE;
+    u8 dist, shortest = 0xff;
 
-    ASSERT(spin_is_locked(&heap_lock));
+    if ( node == NUMA_NO_NODE )
+        node = 0;
 
-    if ( !node_need_scrub[node] )
-        return;
+    if ( node_need_scrub[node] &&
+         (!get_node || !node_test_and_set(node, node_scrubbing)) )
+        return node;
+
+    /*
+     * See if there are memory-only nodes that need scrubbing and choose
+     * the closest one.
+     */
+    local_node = node;
+    while ( 1 )
+    {
+        do {
+            node = cycle_node(node, node_online_map);
+        } while ( !cpumask_empty(&node_to_cpumask(node)) &&
+                  (node != local_node) );
+
+        if ( node == local_node )
+            break;
+
+        if ( node_need_scrub[node] )
+        {
+            if ( !get_node )
+                return node;
+
+            dist = __node_distance(local_node, node);
+            if ( dist < shortest || closest == NUMA_NO_NODE )
+            {
+                if ( !node_test_and_set(node, node_scrubbing) )
+                {
+                    if ( closest != NUMA_NO_NODE )
+                        node_clear(closest, node_scrubbing);
+                    shortest = dist;
+                    closest = node;
+                }
+            }
+        }
+    }
+
+    return closest;
+}
+
+bool scrub_free_pages(void)
+{
+    struct page_info *pg;
+    unsigned int zone;
+    unsigned int cpu = smp_processor_id();
+    bool preempt = false;
+    nodeid_t node;
+    unsigned int cnt = 0;
+  
+    node = node_to_scrub(true);
+    if ( node == NUMA_NO_NODE )
+        return false;
+ 
+    spin_lock(&heap_lock);
 
     for ( zone = 0; zone < NR_ZONES; zone++ )
     {
@@ -1035,22 +1099,46 @@ static void scrub_free_pages(unsigned int node)
 
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
+                    cnt++;
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
                         pg[i].count_info &= ~PGC_need_scrub;
                         node_need_scrub[node]--;
+                        cnt += 100; /* scrubbed pages add heavier weight. */
                     }
-                }
 
-                page_list_del(pg, &heap(node, zone, order));
-                page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+                    /*
+                     * Scrub a few (8) pages before becoming eligible for
+                     * preemtion. But also count non-scrubbing loop iteration
+                     * so that we don't get stuck here with an almost clean
+                     * heap.
+                     */
+                    if ( softirq_pending(cpu) && cnt > 800 )
+                    {
+                        preempt = true;
+                        break;
+                    }
+                }
 
-                if ( node_need_scrub[node] == 0 )
-                    return;
+                if ( i == (1U << order) )
+                {
+                    page_list_del(pg, &heap(node, zone, order));
+                    page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+                }
+                else
+                    pg->u.free.first_dirty = i + 1;
+ 
+                if ( preempt || (node_need_scrub[node] == 0) )
+                    goto out;
             }
         } while ( order-- != 0 );
     }
+
+ out:
+    spin_unlock(&heap_lock);
+    node_clear(node, node_scrubbing);
+    return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
 
 /* Free 2^@order set of pages. */
@@ -1166,9 +1254,6 @@ static void free_heap_pages(
     if ( tainted )
         reserve_offlined_page(pg);
 
-    if ( need_scrub )
-        scrub_free_pages(node);
-
     spin_unlock(&heap_lock);
 }
 
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 0d4b7c2..ed90a61 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -138,6 +138,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe);
 void xenheap_max_mfn(unsigned long mfn);
 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
 void free_xenheap_pages(void *v, unsigned int order);
+bool scrub_free_pages(void);
 #define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
 #define free_xenheap_page(v) (free_xenheap_pages(v,0))
 /* Map machine page range in Xen virtual address space. */
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-05-19 15:50 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-19 15:50 [PATCH v4 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 1/8] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-06-09 14:50   ` Jan Beulich
2017-06-09 20:07     ` Boris Ostrovsky
2017-06-12  6:50       ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 2/8] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-06-09 15:08   ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-06-09 15:22   ` Jan Beulich
2017-06-09 20:55     ` Boris Ostrovsky
2017-06-12  6:54       ` Jan Beulich
2017-05-19 15:50 ` Boris Ostrovsky [this message]
2017-06-12  8:08   ` [PATCH v4 4/8] mm: Scrub memory from idle loop Jan Beulich
2017-06-12 17:01     ` Boris Ostrovsky
2017-06-12 21:28       ` Dario Faggioli
2017-06-13  8:19         ` Jan Beulich
2017-06-13 18:39           ` Boris Ostrovsky
2017-06-13 20:36             ` Dario Faggioli
2017-06-13 21:54               ` Boris Ostrovsky
2017-06-14  9:18             ` Jan Beulich
2017-06-13  8:12       ` Jan Beulich
2017-06-13 18:20         ` Boris Ostrovsky
2017-06-14  9:17           ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 5/8] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-06-12  8:23   ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing Boris Ostrovsky
2017-06-12  8:30   ` Jan Beulich
2017-06-12 17:11     ` Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 7/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 8/8] mm: Make sure pages are scrubbed Boris Ostrovsky
2017-06-12  8:43   ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1495209040-11101-5-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).