From mboxrd@z Thu Jan 1 00:00:00 1970 From: Olaf Hering Subject: Re: [PATCH] xentrace: dynamic tracebuffer size allocation Date: Mon, 14 Mar 2011 18:33:11 +0100 Message-ID: <20110314173311.GA10655@aepfle.de> References: <20110205140717.GA3224@aepfle.de> <20110206133913.GA7487@aepfle.de> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable Return-path: Content-Disposition: inline In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: George Dunlap Cc: xen-devel@lists.xensource.com, Keir Fraser List-Id: xen-devel@lists.xenproject.org On Mon, Feb 07, George Dunlap wrote: > On Sun, Feb 6, 2011 at 1:39 PM, Olaf Hering wrote: > > @@ -85,20 +100,30 @@ static void calc_tinfo_first_offset(void > > =C2=A0} > > > > =C2=A0/** > > - * check_tbuf_size - check to make sure that the proposed size will = fit > > + * calculate_tbuf_size - check to make sure that the proposed size w= ill fit > > =C2=A0* in the currently sized struct t_info and allows prod and cons= to > > =C2=A0* reach double the value without overflow. > > =C2=A0*/ > > -static int check_tbuf_size(u32 pages) > > +static int calculate_tbuf_size(unsigned int pages) > > =C2=A0{ > > =C2=A0 =C2=A0 struct t_buf dummy; > > - =C2=A0 =C2=A0typeof(dummy.prod) size; > > - > > - =C2=A0 =C2=A0size =3D ((typeof(dummy.prod))pages) =C2=A0* PAGE_SIZE= ; > > - > > - =C2=A0 =C2=A0return (size / PAGE_SIZE !=3D pages) > > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 || (size + size < size) > > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 || (num_online_cpus() * pages + = t_info_first_offset > T_INFO_SIZE / sizeof(uint32_t)); > > + =C2=A0 =C2=A0typeof(dummy.prod) size =3D -1; > > + > > + =C2=A0 =C2=A0/* max size holds up to n pages */ > > + =C2=A0 =C2=A0size /=3D PAGE_SIZE; >=20 > size=3D-1, then size /=3D PAGE_SIZE? Is this a clever way of finding t= he > maximum buffer size able to be pointed to? If so, it needs a comment > explaining why it works; I'm not convinced just by looking at it this > is will work properly. George, are you ok with the attached version? Allocate tracebuffers dynamically, based on the requested buffer size. Calculate t_info_size from requested t_buf size. Fix allocation failure path, free pages outside the spinlock. Remove casts for rawbuf, it can be a void pointer since no math is done. Signed-off-by: Olaf Hering --- v3: add comments to calculate_tbuf_size for side effects and max value v2: if per_cpu allocation fails, free also t_info to allow a retry with a smaller tbuf_size xen/common/trace.c | 249 ++++++++++++++++++++++------------------------= ------- 1 file changed, 104 insertions(+), 145 deletions(-) Index: xen-unstable.hg-4.1.23033/xen/common/trace.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- xen-unstable.hg-4.1.23033.orig/xen/common/trace.c +++ xen-unstable.hg-4.1.23033/xen/common/trace.c @@ -42,14 +42,14 @@ CHECK_t_buf; #define compat_t_rec t_rec #endif =20 -/* opt_tbuf_size: trace buffer size (in pages) */ -static unsigned int opt_tbuf_size =3D 0; +/* opt_tbuf_size: trace buffer size (in pages) for each cpu */ +static unsigned int opt_tbuf_size; integer_param("tbuf_size", opt_tbuf_size); =20 /* Pointers to the meta-data objects for all system trace buffers */ static struct t_info *t_info; -#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ -#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE)) +static unsigned int t_info_pages; + static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); @@ -78,6 +78,21 @@ static u32 tb_event_mask =3D TRC_ALL; * i.e., sizeof(_type) * ans >=3D _x. */ #define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type)) =20 +static int cpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu =3D (unsigned long)hcpu; + + if ( action =3D=3D CPU_UP_PREPARE ) + spin_lock_init(&per_cpu(t_lock, cpu)); + + return NOTIFY_DONE; +} + +static struct notifier_block cpu_nfb =3D { + .notifier_call =3D cpu_callback +}; + static void calc_tinfo_first_offset(void) { int offset_in_bytes =3D offsetof(struct t_info, mfn_offset[NR_CPUS])= ; @@ -85,20 +100,34 @@ static void calc_tinfo_first_offset(void } =20 /** - * check_tbuf_size - check to make sure that the proposed size will fit + * calculate_tbuf_size - check to make sure that the proposed size will = fit * in the currently sized struct t_info and allows prod and cons to * reach double the value without overflow. + * Initialize t_info_pages based on number of trace pages. */ -static int check_tbuf_size(u32 pages) +static int calculate_tbuf_size(unsigned int pages) { struct t_buf dummy; typeof(dummy.prod) size; - =20 - size =3D ((typeof(dummy.prod))pages) * PAGE_SIZE; - =20 - return (size / PAGE_SIZE !=3D pages) - || (size + size < size) - || (num_online_cpus() * pages + t_info_first_offset > T_INFO_= SIZE / sizeof(uint32_t)); + + /* force maximum value for an unsigned type */ + size =3D -1; + + /* max size holds up to n pages */ + size /=3D PAGE_SIZE; + if ( pages > size ) + { + gdprintk(XENLOG_INFO, "%s: requested number of %u pages reduced = to %u\n", + __func__, pages, (unsigned int)size); + pages =3D size; + } + + t_info_pages =3D num_online_cpus() * pages + t_info_first_offset; + t_info_pages *=3D sizeof(uint32_t); + t_info_pages /=3D PAGE_SIZE; + if ( t_info_pages % PAGE_SIZE ) + t_info_pages++; + return pages; } =20 /** @@ -111,47 +140,28 @@ static int check_tbuf_size(u32 pages) * This function may also be called later when enabling trace buffers=20 * via the SET_SIZE hypercall. */ -static int alloc_trace_bufs(void) +static int alloc_trace_bufs(unsigned int pages) { - int i, cpu, order; - unsigned long nr_pages; + int i, cpu, order; /* Start after a fixed-size array of NR_CPUS */ uint32_t *t_info_mfn_list; int offset; =20 - if ( opt_tbuf_size =3D=3D 0 ) - return -EINVAL; + if ( t_info ) + return -EBUSY; =20 - if ( check_tbuf_size(opt_tbuf_size) ) - { - printk("Xen trace buffers: tb size %d too large. " - "Tracing disabled.\n", - opt_tbuf_size); + if ( pages =3D=3D 0 ) return -EINVAL; - } =20 - /* t_info size is fixed for now. Currently this works great, so ther= e - * seems to be no need to make it dynamic. */ - t_info =3D alloc_xenheap_pages(get_order_from_pages(T_INFO_PAGES), 0= ); - if ( t_info =3D=3D NULL ) - { - printk("Xen trace buffers: t_info allocation failed! " - "Tracing disabled.\n"); - return -ENOMEM; - } - - for ( i =3D 0; i < T_INFO_PAGES; i++ ) - share_xen_page_with_privileged_guests( - virt_to_page(t_info) + i, XENSHARE_readonly); - - t_info_mfn_list =3D (uint32_t *)t_info; - offset =3D t_info_first_offset; + /* Calculate offset in u32 of first mfn */ + calc_tinfo_first_offset(); =20 - t_info->tbuf_size =3D opt_tbuf_size; - printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size); + pages =3D calculate_tbuf_size(pages); + order =3D get_order_from_pages(pages); =20 - nr_pages =3D opt_tbuf_size; - order =3D get_order_from_pages(nr_pages); + t_info =3D alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0= ); + if ( t_info =3D=3D NULL ) + goto out_dealloc; =20 /* * First, allocate buffers for all of the cpus. If any @@ -159,27 +169,29 @@ static int alloc_trace_bufs(void) */ for_each_online_cpu(cpu) { - int flags; - char *rawbuf; + void *rawbuf; struct t_buf *buf; =20 if ( (rawbuf =3D alloc_xenheap_pages( order, MEMF_bits(32 + PAGE_SHIFT))) =3D=3D NULL ) { - printk("Xen trace buffers: memory allocation failed\n"); - opt_tbuf_size =3D 0; + printk("Xen trace buffers: memory allocation failed on cpu %= d\n", cpu); goto out_dealloc; } =20 - spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); - - per_cpu(t_bufs, cpu) =3D buf =3D (struct t_buf *)rawbuf; + per_cpu(t_bufs, cpu) =3D buf =3D rawbuf; buf->cons =3D buf->prod =3D 0; per_cpu(t_data, cpu) =3D (unsigned char *)(buf + 1); + } =20 - spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags); + offset =3D t_info_first_offset; + t_info_mfn_list =3D (uint32_t *)t_info; =20 - } + for(i =3D 0; i < t_info_pages; i++) + share_xen_page_with_privileged_guests( + virt_to_page(t_info) + i, XENSHARE_readonly); + + t_info->tbuf_size =3D pages; =20 /* * Now share the pages to xentrace can map them, and write them in @@ -188,89 +200,75 @@ static int alloc_trace_bufs(void) for_each_online_cpu(cpu) { /* Share pages so that xentrace can map them. */ - char *rawbuf; + void *rawbuf =3D per_cpu(t_bufs, cpu); + struct page_info *p =3D virt_to_page(rawbuf); + uint32_t mfn =3D virt_to_mfn(rawbuf); =20 - if ( (rawbuf =3D (char *)per_cpu(t_bufs, cpu)) ) + for ( i =3D 0; i < pages; i++ ) { - struct page_info *p =3D virt_to_page(rawbuf); - uint32_t mfn =3D virt_to_mfn(rawbuf); + share_xen_page_with_privileged_guests(p + i, XENSHARE_writab= le); =20 - for ( i =3D 0; i < nr_pages; i++ ) - { - share_xen_page_with_privileged_guests( - p + i, XENSHARE_writable); - =20 - t_info_mfn_list[offset + i]=3Dmfn + i; - } - /* Write list first, then write per-cpu offset. */ - wmb(); - t_info->mfn_offset[cpu]=3Doffset; - printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", - cpu, mfn, offset); - offset+=3Di; + t_info_mfn_list[offset + i]=3Dmfn + i; } + t_info->mfn_offset[cpu]=3Doffset; + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", + cpu, mfn, offset); + offset+=3Di; + + spin_lock_init(&per_cpu(t_lock, cpu)); } =20 - data_size =3D (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)); + data_size =3D (pages * PAGE_SIZE - sizeof(struct t_buf)); t_buf_highwater =3D data_size >> 1; /* 50% high water */ + opt_tbuf_size =3D pages; + + register_cpu_notifier(&cpu_nfb); + + printk("Xen trace buffers: initialised\n"); + wmb(); /* above must be visible before tb_init_done flag set */ + tb_init_done =3D 1; =20 return 0; + out_dealloc: for_each_online_cpu(cpu) { - int flags; - char * rawbuf; - - spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); - if ( (rawbuf =3D (char *)per_cpu(t_bufs, cpu)) ) + void *rawbuf =3D per_cpu(t_bufs, cpu); + per_cpu(t_bufs, cpu) =3D NULL; + printk("Xen trace buffers: cpu %d p %p\n", cpu, rawbuf); + if ( rawbuf ) { - per_cpu(t_bufs, cpu) =3D NULL; ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); free_xenheap_pages(rawbuf, order); } - spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags); } - =20 + free_xenheap_pages(t_info, get_order_from_pages(t_info_pages)); + t_info =3D NULL; + printk("Xen trace buffers: allocation failed! Tracing disabled.\n"); return -ENOMEM; } =20 =20 /** - * tb_set_size - handle the logic involved with dynamically - * allocating and deallocating tbufs + * tb_set_size - handle the logic involved with dynamically allocating t= bufs * * This function is called when the SET_SIZE hypercall is done. */ -static int tb_set_size(int size) +static int tb_set_size(unsigned int pages) { /* * Setting size is a one-shot operation. It can be done either at * boot time or via control tools, but not by both. Once buffers * are created they cannot be destroyed. */ - int ret =3D 0; - - if ( opt_tbuf_size !=3D 0 ) + if ( opt_tbuf_size && pages !=3D opt_tbuf_size ) { - if ( size !=3D opt_tbuf_size ) - gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not impleme= nted\n", - opt_tbuf_size, size); + gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented= \n", + opt_tbuf_size, pages); return -EINVAL; } =20 - if ( size <=3D 0 ) - return -EINVAL; - - opt_tbuf_size =3D size; - - if ( (ret =3D alloc_trace_bufs()) !=3D 0 ) - { - opt_tbuf_size =3D 0; - return ret; - } - - printk("Xen trace buffers: initialized\n"); - return 0; + return alloc_trace_bufs(pages); } =20 int trace_will_trace_event(u32 event) @@ -299,21 +297,6 @@ int trace_will_trace_event(u32 event) return 1; } =20 -static int cpu_callback( - struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu =3D (unsigned long)hcpu; - - if ( action =3D=3D CPU_UP_PREPARE ) - spin_lock_init(&per_cpu(t_lock, cpu)); - - return NOTIFY_DONE; -} - -static struct notifier_block cpu_nfb =3D { - .notifier_call =3D cpu_callback -}; - /** * init_trace_bufs - performs initialization of the per-cpu trace buffer= s. * @@ -323,37 +306,13 @@ static struct notifier_block cpu_nfb =3D { */ void __init init_trace_bufs(void) { - int i; - - /* Calculate offset in u32 of first mfn */ - calc_tinfo_first_offset(); - - /* Per-cpu t_lock initialisation. */ - for_each_online_cpu ( i ) - spin_lock_init(&per_cpu(t_lock, i)); - register_cpu_notifier(&cpu_nfb); - - if ( opt_tbuf_size =3D=3D 0 ) - { - printk("Xen trace buffers: disabled\n"); - goto fail; - } - - if ( alloc_trace_bufs() !=3D 0 ) + if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) ) { - dprintk(XENLOG_INFO, "Xen trace buffers: " - "allocation size %d failed, disabling\n", - opt_tbuf_size); - goto fail; + gdprintk(XENLOG_INFO, "Xen trace buffers: " + "allocation size %d failed, disabling\n", + opt_tbuf_size); + opt_tbuf_size =3D 0; } - - printk("Xen trace buffers: initialised\n"); - wmb(); /* above must be visible before tb_init_done flag set */ - tb_init_done =3D 1; - return; - - fail: - opt_tbuf_size =3D 0; } =20 /** @@ -372,7 +331,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc case XEN_SYSCTL_TBUFOP_get_info: tbc->evt_mask =3D tb_event_mask; tbc->buffer_mfn =3D t_info ? virt_to_mfn(t_info) : 0; - tbc->size =3D T_INFO_PAGES * PAGE_SIZE; + tbc->size =3D t_info_pages * PAGE_SIZE; break; case XEN_SYSCTL_TBUFOP_set_cpu_mask: rc =3D xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);