From: Bob Liu <lliubbo@gmail.com>
To: xen-devel@lists.xenproject.org
Cc: keir@xen.org, ian.campbell@citrix.com,
George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
JBeulich@suse.com
Subject: [PATCH v2 3/3] xen: use idle vcpus to scrub pages
Date: Mon, 30 Jun 2014 21:39:44 +0800 [thread overview]
Message-ID: <1404135584-29206-3-git-send-email-bob.liu@oracle.com> (raw)
In-Reply-To: <1404135584-29206-1-git-send-email-bob.liu@oracle.com>
In case of heavy lock contention, use two percpu lists.
- Delist a batch of pages to a percpu list from _heap[] free page list.
- Scrub pages on this percpu list and add to another percpu free list.
- Free those clean pages to _heap[], merge with other chunks if needed.
v2:
* Avoid having two hyperthreads within the same core doing scrubbing
* Limit (1<<SCRUB_BATCH_ORDER) pages to percpu list in one go
* Won't spin on heap lock when there is nothing to scrub
* Partial numa aware
Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
xen/arch/arm/domain.c | 1 +
xen/arch/x86/domain.c | 1 +
xen/common/page_alloc.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++
xen/include/xen/mm.h | 1 +
4 files changed, 133 insertions(+)
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 04d0cd0..b6bc3ac 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -44,6 +44,7 @@ void idle_loop(void)
if ( cpu_is_offline(smp_processor_id()) )
stop_cpu();
+ scrub_free_pages();
local_irq_disable();
if ( cpu_is_haltable(smp_processor_id()) )
{
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e896210..e8d4fe7 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,6 +116,7 @@ static void idle_loop(void)
{
if ( cpu_is_offline(smp_processor_id()) )
play_dead();
+ scrub_free_pages();
(*pm_idle)();
do_tasklet();
do_softirq();
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index ab293c8..6ab1d1d 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -86,6 +86,12 @@ PAGE_LIST_HEAD(page_offlined_list);
/* Broken page list, protected by heap_lock. */
PAGE_LIST_HEAD(page_broken_list);
+/* A rough flag to indicate whether a node have need_scrub pages */
+static bool_t node_need_scrub[MAX_NUMNODES];
+static DEFINE_PER_CPU(bool_t, is_scrubbing);
+static DEFINE_PER_CPU(struct page_list_head, scrub_list_cpu);
+static DEFINE_PER_CPU(struct page_list_head, free_list_cpu);
+
/*************************
* BOOT-TIME ALLOCATOR
*/
@@ -948,6 +954,7 @@ static void free_heap_pages(
{
if ( !tainted )
{
+ node_need_scrub[node] = 1;
for ( i = 0; i < (1 << order); i++ )
pg[i].count_info |= PGC_need_scrub;
}
@@ -1525,7 +1532,130 @@ void __init scrub_heap_pages(void)
setup_low_mem_virq();
}
+#define SCRUB_BATCH_ORDER 12
+static void __scrub_free_pages(unsigned int node, unsigned int cpu)
+{
+ struct page_info *pg, *tmp;
+ unsigned int i;
+ int order;
+ struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu);
+ struct page_list_head *local_free_list = &this_cpu(free_list_cpu);
+
+ /* Scrub percpu list */
+ while ( !page_list_empty(local_scrub_list) )
+ {
+ pg = page_list_remove_head(local_scrub_list);
+ order = PFN_ORDER(pg);
+ ASSERT( pg && order <= SCRUB_BATCH_ORDER );
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) );
+ scrub_one_page(&pg[i]);
+ }
+ page_list_add_tail(pg, local_free_list);
+ if ( softirq_pending(cpu) )
+ return;
+ }
+
+ /* free percpu free list */
+ if ( !page_list_empty(local_free_list) )
+ {
+ spin_lock(&heap_lock);
+ page_list_for_each_safe( pg, tmp, local_free_list )
+ {
+ order = PFN_ORDER(pg);
+ page_list_del(pg, local_free_list);
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ pg[i].count_info |= PGC_state_free;
+ pg[i].count_info &= ~PGC_need_scrub;
+ }
+ merge_free_trunks(pg, order, node, page_to_zone(pg), 0);
+ }
+ spin_unlock(&heap_lock);
+ }
+}
+
+void scrub_free_pages(void)
+{
+ int order;
+ struct page_info *pg, *tmp;
+ unsigned int i, zone, nr_delisted = 0;
+ unsigned int cpu = smp_processor_id();
+ unsigned int node = cpu_to_node(cpu);
+ struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu);
+
+ /* Return if our sibling already started scrubbing */
+ for_each_cpu( i, per_cpu(cpu_sibling_mask,cpu) )
+ if ( per_cpu(is_scrubbing, i) )
+ return;
+ this_cpu(is_scrubbing) = 1;
+
+ while ( !softirq_pending(cpu) )
+ {
+ if ( !node_need_scrub[node] )
+ {
+ /* Free local per cpu list before we exit */
+ __scrub_free_pages(node, cpu);
+ goto out;
+ }
+
+ /* Delist a batch of pages from global scrub list */
+ if ( page_list_empty(local_scrub_list) )
+ {
+ spin_lock(&heap_lock);
+ for ( zone = 0; zone < NR_ZONES; zone++ )
+ {
+ for ( order = MAX_ORDER; order >= 0; order-- )
+ {
+ page_list_for_each_safe( pg, tmp, &heap(node, zone, order) )
+ {
+ if ( !test_bit(_PGC_need_scrub, &(pg->count_info)) )
+ continue;
+
+ page_list_del( pg, &heap(node, zone, order) );
+ if ( order > SCRUB_BATCH_ORDER)
+ {
+ /* putback extra pages */
+ i = order;
+ while ( i != SCRUB_BATCH_ORDER )
+ {
+ PFN_ORDER(pg) = --i;
+ page_list_add_tail(pg, &heap(node, zone, i));
+ pg += 1 << i;
+ }
+ PFN_ORDER(pg) = SCRUB_BATCH_ORDER;
+ }
+
+ for ( i = 0; i < (1 << PFN_ORDER(pg)); i++ )
+ {
+ ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) );
+ ASSERT( !test_bit(_PGC_broken, &pg[i].count_info) );
+ mark_page_offline(&pg[i], 0);
+ }
+ page_list_add_tail(pg, local_scrub_list);
+ nr_delisted += ( 1 << PFN_ORDER(pg) );
+ if ( nr_delisted >= (1 << SCRUB_BATCH_ORDER) )
+ {
+ nr_delisted = 0;
+ spin_unlock(&heap_lock);
+ goto start_scrub;
+ }
+ }
+ }
+ }
+
+ node_need_scrub[node] = 0;
+ spin_unlock(&heap_lock);
+ }
+ start_scrub:
+ __scrub_free_pages(node, cpu);
+ }
+
+ out:
+ this_cpu(is_scrubbing) = 0;
+}
/*************************
* XEN-HEAP SUB-ALLOCATOR
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index b183189..1fa8c3d 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -78,6 +78,7 @@ int query_page_offline(unsigned long mfn, uint32_t *status);
unsigned long total_free_pages(void);
void scrub_heap_pages(void);
+void scrub_free_pages(void);
int assign_pages(
struct domain *d,
--
1.7.10.4
next prev parent reply other threads:[~2014-06-30 13:40 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-30 13:39 [PATCH v2 1/3] xen: delay page scrubbing to allocation path Bob Liu
2014-06-30 13:39 ` [PATCH v2 2/3] xen: introduce function merge_free_trunks Bob Liu
2014-06-30 15:58 ` Jan Beulich
2014-07-01 8:14 ` Bob Liu
2014-07-01 8:27 ` Jan Beulich
2014-06-30 13:39 ` Bob Liu [this message]
2014-07-01 9:12 ` [PATCH v2 3/3] xen: use idle vcpus to scrub pages Jan Beulich
2014-07-01 12:25 ` Bob Liu
2014-07-01 12:59 ` Jan Beulich
2014-07-02 6:27 ` Bob Liu
2014-07-07 12:20 ` Bob Liu
2014-07-15 9:16 ` Bob Liu
2014-07-23 0:38 ` Konrad Rzeszutek Wilk
2014-07-23 1:30 ` Bob Liu
2014-07-23 7:28 ` Jan Beulich
2014-07-24 2:08 ` Bob Liu
2014-07-24 6:24 ` Jan Beulich
2014-07-25 0:42 ` Bob Liu
2014-07-25 6:51 ` Jan Beulich
2014-07-25 7:28 ` Bob Liu
2014-07-25 7:36 ` Jan Beulich
2014-07-25 8:18 ` Bob Liu
2014-07-25 8:28 ` Jan Beulich
2014-06-30 15:56 ` [PATCH v2 1/3] xen: delay page scrubbing to allocation path Jan Beulich
2014-07-01 8:12 ` Bob Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1404135584-29206-3-git-send-email-bob.liu@oracle.com \
--to=lliubbo@gmail.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=andrew.cooper3@citrix.com \
--cc=ian.campbell@citrix.com \
--cc=keir@xen.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).