xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Bob Liu <lliubbo@gmail.com>
To: xen-devel@lists.xenproject.org
Cc: keir@xen.org, ian.campbell@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	JBeulich@suse.com
Subject: [PATCH v2 3/3] xen: use idle vcpus to scrub pages
Date: Mon, 30 Jun 2014 21:39:44 +0800	[thread overview]
Message-ID: <1404135584-29206-3-git-send-email-bob.liu@oracle.com> (raw)
In-Reply-To: <1404135584-29206-1-git-send-email-bob.liu@oracle.com>

In case of heavy lock contention, use two percpu lists.
 - Delist a batch of pages to a percpu list from _heap[] free page list.
 - Scrub pages on this percpu list and add to another percpu free list.
 - Free those clean pages to _heap[], merge with other chunks if needed.

v2:
* Avoid having two hyperthreads within the same core doing scrubbing
* Limit (1<<SCRUB_BATCH_ORDER) pages to percpu list in one go
* Won't spin on heap lock when there is nothing to scrub
* Partial numa aware

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 xen/arch/arm/domain.c   |    1 +
 xen/arch/x86/domain.c   |    1 +
 xen/common/page_alloc.c |  130 +++++++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/mm.h    |    1 +
 4 files changed, 133 insertions(+)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 04d0cd0..b6bc3ac 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -44,6 +44,7 @@ void idle_loop(void)
         if ( cpu_is_offline(smp_processor_id()) )
             stop_cpu();
 
+        scrub_free_pages();
         local_irq_disable();
         if ( cpu_is_haltable(smp_processor_id()) )
         {
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e896210..e8d4fe7 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,6 +116,7 @@ static void idle_loop(void)
     {
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
+        scrub_free_pages();
         (*pm_idle)();
         do_tasklet();
         do_softirq();
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index ab293c8..6ab1d1d 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -86,6 +86,12 @@ PAGE_LIST_HEAD(page_offlined_list);
 /* Broken page list, protected by heap_lock. */
 PAGE_LIST_HEAD(page_broken_list);
 
+/* A rough flag to indicate whether a node have need_scrub pages */
+static bool_t node_need_scrub[MAX_NUMNODES];
+static DEFINE_PER_CPU(bool_t, is_scrubbing);
+static DEFINE_PER_CPU(struct page_list_head, scrub_list_cpu);
+static DEFINE_PER_CPU(struct page_list_head, free_list_cpu);
+
 /*************************
  * BOOT-TIME ALLOCATOR
  */
@@ -948,6 +954,7 @@ static void free_heap_pages(
     {
         if ( !tainted )
         {
+            node_need_scrub[node] = 1;
             for ( i = 0; i < (1 << order); i++ )
                 pg[i].count_info |= PGC_need_scrub;
         }
@@ -1525,7 +1532,130 @@ void __init scrub_heap_pages(void)
     setup_low_mem_virq();
 }
 
+#define SCRUB_BATCH_ORDER 12
+static void __scrub_free_pages(unsigned int node, unsigned int cpu)
+{
+    struct page_info *pg, *tmp;
+    unsigned int i;
+    int order;
+    struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu);
+    struct page_list_head *local_free_list = &this_cpu(free_list_cpu);
+
+    /* Scrub percpu list */
+    while ( !page_list_empty(local_scrub_list) )
+    {
+        pg = page_list_remove_head(local_scrub_list);
+        order = PFN_ORDER(pg);
+        ASSERT( pg && order <= SCRUB_BATCH_ORDER );
+        for ( i = 0; i < (1 << order); i++ )
+        {
+            ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) );
+            scrub_one_page(&pg[i]);
+        }
+        page_list_add_tail(pg, local_free_list);
+        if ( softirq_pending(cpu) )
+		return;
+    }
+
+    /* free percpu free list */
+    if ( !page_list_empty(local_free_list) )
+    {
+        spin_lock(&heap_lock);
+        page_list_for_each_safe( pg, tmp, local_free_list )
+        {
+            order = PFN_ORDER(pg);
+            page_list_del(pg, local_free_list);
+            for ( i = 0; i < (1 << order); i++ )
+	    {
+                pg[i].count_info |= PGC_state_free;
+                pg[i].count_info &= ~PGC_need_scrub;
+            }
+            merge_free_trunks(pg, order, node, page_to_zone(pg), 0);
+        }
+        spin_unlock(&heap_lock);
+    }
+}
+
+void scrub_free_pages(void)
+{
+    int order;
+    struct page_info *pg, *tmp;
+    unsigned int i, zone, nr_delisted = 0;
+    unsigned int cpu = smp_processor_id();
+    unsigned int node = cpu_to_node(cpu);
+    struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu);
+
+    /* Return if our sibling already started scrubbing */
+    for_each_cpu( i, per_cpu(cpu_sibling_mask,cpu) )
+        if ( per_cpu(is_scrubbing, i) )
+            return;
+    this_cpu(is_scrubbing) = 1;
+
+    while ( !softirq_pending(cpu) )
+    {
+        if ( !node_need_scrub[node] )
+        {
+            /* Free local per cpu list before we exit */
+            __scrub_free_pages(node, cpu);
+            goto out;
+        }
+
+        /* Delist a batch of pages from global scrub list */
+        if ( page_list_empty(local_scrub_list) )
+        {
+            spin_lock(&heap_lock);
+            for ( zone = 0; zone < NR_ZONES; zone++ )
+            {
+                for ( order = MAX_ORDER; order >= 0; order-- )
+                {
+                    page_list_for_each_safe( pg, tmp, &heap(node, zone, order) )
+                    {
+                        if ( !test_bit(_PGC_need_scrub, &(pg->count_info)) )
+                            continue;
+
+                        page_list_del( pg, &heap(node, zone, order) );
+                        if ( order > SCRUB_BATCH_ORDER)
+                        {
+                            /* putback extra pages */
+                            i = order;
+                            while ( i != SCRUB_BATCH_ORDER )
+                            {
+                                PFN_ORDER(pg) = --i;
+                                page_list_add_tail(pg, &heap(node, zone, i));
+                                pg += 1 << i;
+                            }
+                            PFN_ORDER(pg) = SCRUB_BATCH_ORDER;
+                        }
+
+                        for ( i = 0; i < (1 << PFN_ORDER(pg)); i++ )
+                        {
+                            ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) );
+                            ASSERT( !test_bit(_PGC_broken, &pg[i].count_info) );
+                            mark_page_offline(&pg[i], 0);
+                        }
+                        page_list_add_tail(pg, local_scrub_list);
+                        nr_delisted += ( 1 << PFN_ORDER(pg) );
+                        if ( nr_delisted >= (1 << SCRUB_BATCH_ORDER) )
+                        {
+                            nr_delisted = 0;
+                            spin_unlock(&heap_lock);
+                            goto start_scrub;
+                        }
+                    }
+                }
+            }
+
+            node_need_scrub[node] = 0;
+            spin_unlock(&heap_lock);
+        }
 
+ start_scrub:
+        __scrub_free_pages(node, cpu);
+    }
+
+ out:
+    this_cpu(is_scrubbing) = 0;
+}
 
 /*************************
  * XEN-HEAP SUB-ALLOCATOR
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index b183189..1fa8c3d 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -78,6 +78,7 @@ int query_page_offline(unsigned long mfn, uint32_t *status);
 unsigned long total_free_pages(void);
 
 void scrub_heap_pages(void);
+void scrub_free_pages(void);
 
 int assign_pages(
     struct domain *d,
-- 
1.7.10.4

  parent reply	other threads:[~2014-06-30 13:40 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-30 13:39 [PATCH v2 1/3] xen: delay page scrubbing to allocation path Bob Liu
2014-06-30 13:39 ` [PATCH v2 2/3] xen: introduce function merge_free_trunks Bob Liu
2014-06-30 15:58   ` Jan Beulich
2014-07-01  8:14     ` Bob Liu
2014-07-01  8:27       ` Jan Beulich
2014-06-30 13:39 ` Bob Liu [this message]
2014-07-01  9:12   ` [PATCH v2 3/3] xen: use idle vcpus to scrub pages Jan Beulich
2014-07-01 12:25     ` Bob Liu
2014-07-01 12:59       ` Jan Beulich
2014-07-02  6:27         ` Bob Liu
2014-07-07 12:20           ` Bob Liu
2014-07-15  9:16         ` Bob Liu
2014-07-23  0:38           ` Konrad Rzeszutek Wilk
2014-07-23  1:30             ` Bob Liu
2014-07-23  7:28           ` Jan Beulich
2014-07-24  2:08             ` Bob Liu
2014-07-24  6:24               ` Jan Beulich
2014-07-25  0:42                 ` Bob Liu
2014-07-25  6:51                   ` Jan Beulich
2014-07-25  7:28                     ` Bob Liu
2014-07-25  7:36                       ` Jan Beulich
2014-07-25  8:18                         ` Bob Liu
2014-07-25  8:28                           ` Jan Beulich
2014-06-30 15:56 ` [PATCH v2 1/3] xen: delay page scrubbing to allocation path Jan Beulich
2014-07-01  8:12   ` Bob Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1404135584-29206-3-git-send-email-bob.liu@oracle.com \
    --to=lliubbo@gmail.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=keir@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).