From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Liu Subject: [PATCH v2 3/3] xen: use idle vcpus to scrub pages Date: Mon, 30 Jun 2014 21:39:44 +0800 Message-ID: <1404135584-29206-3-git-send-email-bob.liu@oracle.com> References: <1404135584-29206-1-git-send-email-bob.liu@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta4.messagelabs.com ([85.158.143.247]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1X1bod-0007re-Ee for xen-devel@lists.xenproject.org; Mon, 30 Jun 2014 13:40:15 +0000 Received: by mail-ob0-f182.google.com with SMTP id nu7so8951941obb.27 for ; Mon, 30 Jun 2014 06:40:12 -0700 (PDT) In-Reply-To: <1404135584-29206-1-git-send-email-bob.liu@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xenproject.org Cc: keir@xen.org, ian.campbell@citrix.com, George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com, JBeulich@suse.com List-Id: xen-devel@lists.xenproject.org In case of heavy lock contention, use two percpu lists. - Delist a batch of pages to a percpu list from _heap[] free page list. - Scrub pages on this percpu list and add to another percpu free list. - Free those clean pages to _heap[], merge with other chunks if needed. v2: * Avoid having two hyperthreads within the same core doing scrubbing * Limit (1< --- xen/arch/arm/domain.c | 1 + xen/arch/x86/domain.c | 1 + xen/common/page_alloc.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++ xen/include/xen/mm.h | 1 + 4 files changed, 133 insertions(+) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 04d0cd0..b6bc3ac 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -44,6 +44,7 @@ void idle_loop(void) if ( cpu_is_offline(smp_processor_id()) ) stop_cpu(); + scrub_free_pages(); local_irq_disable(); if ( cpu_is_haltable(smp_processor_id()) ) { diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index e896210..e8d4fe7 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -116,6 +116,7 @@ static void idle_loop(void) { if ( cpu_is_offline(smp_processor_id()) ) play_dead(); + scrub_free_pages(); (*pm_idle)(); do_tasklet(); do_softirq(); diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index ab293c8..6ab1d1d 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -86,6 +86,12 @@ PAGE_LIST_HEAD(page_offlined_list); /* Broken page list, protected by heap_lock. */ PAGE_LIST_HEAD(page_broken_list); +/* A rough flag to indicate whether a node have need_scrub pages */ +static bool_t node_need_scrub[MAX_NUMNODES]; +static DEFINE_PER_CPU(bool_t, is_scrubbing); +static DEFINE_PER_CPU(struct page_list_head, scrub_list_cpu); +static DEFINE_PER_CPU(struct page_list_head, free_list_cpu); + /************************* * BOOT-TIME ALLOCATOR */ @@ -948,6 +954,7 @@ static void free_heap_pages( { if ( !tainted ) { + node_need_scrub[node] = 1; for ( i = 0; i < (1 << order); i++ ) pg[i].count_info |= PGC_need_scrub; } @@ -1525,7 +1532,130 @@ void __init scrub_heap_pages(void) setup_low_mem_virq(); } +#define SCRUB_BATCH_ORDER 12 +static void __scrub_free_pages(unsigned int node, unsigned int cpu) +{ + struct page_info *pg, *tmp; + unsigned int i; + int order; + struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu); + struct page_list_head *local_free_list = &this_cpu(free_list_cpu); + + /* Scrub percpu list */ + while ( !page_list_empty(local_scrub_list) ) + { + pg = page_list_remove_head(local_scrub_list); + order = PFN_ORDER(pg); + ASSERT( pg && order <= SCRUB_BATCH_ORDER ); + for ( i = 0; i < (1 << order); i++ ) + { + ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) ); + scrub_one_page(&pg[i]); + } + page_list_add_tail(pg, local_free_list); + if ( softirq_pending(cpu) ) + return; + } + + /* free percpu free list */ + if ( !page_list_empty(local_free_list) ) + { + spin_lock(&heap_lock); + page_list_for_each_safe( pg, tmp, local_free_list ) + { + order = PFN_ORDER(pg); + page_list_del(pg, local_free_list); + for ( i = 0; i < (1 << order); i++ ) + { + pg[i].count_info |= PGC_state_free; + pg[i].count_info &= ~PGC_need_scrub; + } + merge_free_trunks(pg, order, node, page_to_zone(pg), 0); + } + spin_unlock(&heap_lock); + } +} + +void scrub_free_pages(void) +{ + int order; + struct page_info *pg, *tmp; + unsigned int i, zone, nr_delisted = 0; + unsigned int cpu = smp_processor_id(); + unsigned int node = cpu_to_node(cpu); + struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu); + + /* Return if our sibling already started scrubbing */ + for_each_cpu( i, per_cpu(cpu_sibling_mask,cpu) ) + if ( per_cpu(is_scrubbing, i) ) + return; + this_cpu(is_scrubbing) = 1; + + while ( !softirq_pending(cpu) ) + { + if ( !node_need_scrub[node] ) + { + /* Free local per cpu list before we exit */ + __scrub_free_pages(node, cpu); + goto out; + } + + /* Delist a batch of pages from global scrub list */ + if ( page_list_empty(local_scrub_list) ) + { + spin_lock(&heap_lock); + for ( zone = 0; zone < NR_ZONES; zone++ ) + { + for ( order = MAX_ORDER; order >= 0; order-- ) + { + page_list_for_each_safe( pg, tmp, &heap(node, zone, order) ) + { + if ( !test_bit(_PGC_need_scrub, &(pg->count_info)) ) + continue; + + page_list_del( pg, &heap(node, zone, order) ); + if ( order > SCRUB_BATCH_ORDER) + { + /* putback extra pages */ + i = order; + while ( i != SCRUB_BATCH_ORDER ) + { + PFN_ORDER(pg) = --i; + page_list_add_tail(pg, &heap(node, zone, i)); + pg += 1 << i; + } + PFN_ORDER(pg) = SCRUB_BATCH_ORDER; + } + + for ( i = 0; i < (1 << PFN_ORDER(pg)); i++ ) + { + ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) ); + ASSERT( !test_bit(_PGC_broken, &pg[i].count_info) ); + mark_page_offline(&pg[i], 0); + } + page_list_add_tail(pg, local_scrub_list); + nr_delisted += ( 1 << PFN_ORDER(pg) ); + if ( nr_delisted >= (1 << SCRUB_BATCH_ORDER) ) + { + nr_delisted = 0; + spin_unlock(&heap_lock); + goto start_scrub; + } + } + } + } + + node_need_scrub[node] = 0; + spin_unlock(&heap_lock); + } + start_scrub: + __scrub_free_pages(node, cpu); + } + + out: + this_cpu(is_scrubbing) = 0; +} /************************* * XEN-HEAP SUB-ALLOCATOR diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index b183189..1fa8c3d 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -78,6 +78,7 @@ int query_page_offline(unsigned long mfn, uint32_t *status); unsigned long total_free_pages(void); void scrub_heap_pages(void); +void scrub_free_pages(void); int assign_pages( struct domain *d, -- 1.7.10.4