From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH 2/2] xen: spread page scrubbing across all idle CPU Date: Tue, 10 Jun 2014 13:42:29 +0100 Message-ID: <5396FD35.2010703@citrix.com> References: <1402402717-26736-1-git-send-email-bob.liu@oracle.com> <1402402717-26736-2-git-send-email-bob.liu@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta4.messagelabs.com ([85.158.143.247]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1WuLNs-0001WQ-9f for xen-devel@lists.xenproject.org; Tue, 10 Jun 2014 12:42:36 +0000 In-Reply-To: <1402402717-26736-2-git-send-email-bob.liu@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Bob Liu Cc: keir@xen.org, ian.campbell@citrix.com, jbeulich@suse.com, xen-devel@lists.xenproject.org List-Id: xen-devel@lists.xenproject.org On 10/06/14 13:18, Bob Liu wrote: > Because of page scrubbing, it's very slow to destroy a domain with large memory. > It takes around 10 minutes when destroy a guest of nearly 1 TB of memory. > > Besides the CPU running 'xl destory', this patch try to accelerate the scrubbing destroy > time by using all idle CPUs to do the scrubbing work in parallel. > > Compared with other approaches, this way has a minimum effect to the whole > system. The disadvantage is if all CPUs are not idle, the scrubbing time can't > be reduced. > > On a 60 CPUs system, the 'xl destroy' time of a 30G guest reduced from 15s to > 8s. > > Signed-off-by: Bob Liu > --- > xen/arch/x86/domain.c | 1 + > xen/common/domain.c | 4 +++ > xen/common/page_alloc.c | 73 +++++++++++++++++++++++++++++++++++++++++++++-- > xen/include/xen/mm.h | 2 ++ > 4 files changed, 78 insertions(+), 2 deletions(-) > > diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c > index 6fddd4c..ffa9b91 100644 > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -116,6 +116,7 @@ static void idle_loop(void) > { > if ( cpu_is_offline(smp_processor_id()) ) > play_dead(); > + scrub_free_pages(0); > (*pm_idle)(); > do_tasklet(); > do_softirq(); > diff --git a/xen/common/domain.c b/xen/common/domain.c > index 4291e29..7a8c6bf 100644 > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -593,6 +593,10 @@ int domain_kill(struct domain *d) > BUG_ON(rc != -EAGAIN); > break; > } > + > + tasklet_init(&global_scrub_tasklet, scrub_free_pages, 1); > + tasklet_schedule_on_cpu(&global_scrub_tasklet, smp_processor_id()); > + Doesn't this result in re-init()ing the global scrub tasklet if domain_kill() is called on two domains at the same time? > for_each_vcpu ( d, v ) > unmap_vcpu_info(v); > d->is_dying = DOMDYING_dead; > diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c > index 56826b4..d51926d 100644 > --- a/xen/common/page_alloc.c > +++ b/xen/common/page_alloc.c > @@ -79,6 +79,14 @@ PAGE_LIST_HEAD(page_offlined_list); > /* Broken page list, protected by heap_lock. */ > PAGE_LIST_HEAD(page_broken_list); > > +/* Global scrub page list, protected by scrub_lock */ > +PAGE_LIST_HEAD(global_scrub_list); > +DEFINE_SPINLOCK(scrub_lock); > +struct tasklet global_scrub_tasklet; > + > +DEFINE_PER_CPU(struct page_list_head, scrub_list); > +DEFINE_PER_CPU(struct page_list_head, scrubbed_list); > + > /************************* > * BOOT-TIME ALLOCATOR > */ > @@ -1569,9 +1577,13 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) > * domain has died we assume responsibility for erasure. > */ > if ( unlikely(d->is_dying) ) > + { > + spin_lock(&scrub_lock); > for ( i = 0; i < (1 << order); i++ ) > - scrub_one_page(&pg[i]); > - > + page_list_add_tail(&pg[i], &global_scrub_list); > + spin_unlock(&scrub_lock); > + goto out; > + } > free_heap_pages(pg, order); > } > else if ( unlikely(d == dom_cow) ) > @@ -1588,6 +1600,7 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) > drop_dom_ref = 0; > } > > +out: > if ( drop_dom_ref ) > put_domain(d); > } > @@ -1680,6 +1693,62 @@ void scrub_one_page(struct page_info *pg) > unmap_domain_page(p); > } > > +#define SCRUB_BATCH 1024 > +void scrub_free_pages(unsigned long is_tasklet) > +{ > + struct page_info *pg; > + unsigned int i, cpu, empty = 0; bool_t empty. cpu can be initialised when declared. > + > + cpu = smp_processor_id(); > + do > + { > + if ( page_list_empty(&this_cpu(scrub_list)) ) Repeatedly using this_cpu() is bad, due to the hidden RELOC_HIDE. Instead, use something like this: struct *page_list_head this_scrub_list = &this_cpu(scrub_list) > + { > + /* Delist a batch of pages from global scrub list */ > + spin_lock(&scrub_lock); > + for( i = 0; i < SCRUB_BATCH; i++ ) > + { > + pg = page_list_remove_head(&global_scrub_list); > + if ( !pg ) > + { > + empty = 1; > + break; > + } > + page_list_add_tail(pg, &this_cpu(scrub_list)); > + } > + spin_unlock(&scrub_lock); > + } > + > + /* Scrub percpu list */ > + while ( !page_list_empty(&this_cpu(scrub_list)) ) > + { > + pg = page_list_remove_head(&this_cpu(scrub_list)); > + ASSERT(pg); > + scrub_one_page(pg); > + page_list_add_tail(pg, &this_cpu(scrubbed_list)); > + } > + > + /* Free percpu scrubbed list */ > + if ( !page_list_empty(&this_cpu(scrubbed_list)) ) > + { > + spin_lock(&heap_lock); > + while ( !page_list_empty(&this_cpu(scrubbed_list)) ) > + { > + pg = page_list_remove_head(&this_cpu(scrubbed_list)); > + __free_heap_pages(pg, 0); > + } > + spin_unlock(&heap_lock); > + } > + > + /* Global list is empty */ > + if (empty) Spaces inside brackets. ~Andrew > + return; > + } while ( !softirq_pending(cpu) ); > + > + if( is_tasklet ) > + tasklet_schedule_on_cpu(&global_scrub_tasklet, cpu); > +} > + > static void dump_heap(unsigned char key) > { > s_time_t now = NOW(); > diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h > index b183189..29821c4 100644 > --- a/xen/include/xen/mm.h > +++ b/xen/include/xen/mm.h > @@ -78,6 +78,8 @@ int query_page_offline(unsigned long mfn, uint32_t *status); > unsigned long total_free_pages(void); > > void scrub_heap_pages(void); > +void scrub_free_pages(unsigned long is_tasklet); > +extern struct tasklet global_scrub_tasklet; > > int assign_pages( > struct domain *d,