From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758202Ab0EUJoa (ORCPT ); Fri, 21 May 2010 05:44:30 -0400 Received: from mail-ww0-f46.google.com ([74.125.82.46]:38155 "EHLO mail-ww0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756564Ab0EUJo2 (ORCPT ); Fri, 21 May 2010 05:44:28 -0400 DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=date:from:to:cc:subject:message-id:references:mime-version :content-type:content-disposition:in-reply-to:user-agent; b=P60BAhVT0IhclJrZfTvY/ZpqwGnfQzxsbWXo+1Mqh1WOAnDzkrOLVdoBuTRS9afjnQ F4gIzDZNV5mfBdszeg+Z0+n55j3nNLCzbKeGCra3V4TeHkgJ6bs6w7aNmkRgg8AVh03q i79bQTZv6FRyRDQCOPGzOldpzsctr37B16dFU= Date: Fri, 21 May 2010 11:44:35 +0200 From: Frederic Weisbecker To: Peter Zijlstra Cc: Ingo Molnar , Paul Mackerras , Arnaldo Carvalho de Melo , Steven Rostedt , LKML Subject: Re: [PATCH 05/10] perf-record: Share per-cpu buffers Message-ID: <20100521094434.GB30108@nowhere> References: <20100521090201.326791353@chello.nl> <20100521090710.634824884@chello.nl> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20100521090710.634824884@chello.nl> User-Agent: Mutt/1.5.18 (2008-05-17) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, May 21, 2010 at 11:02:06AM +0200, Peter Zijlstra wrote: > It seems a waste of space to create a buffer per event, share it per-cpu. > > Signed-off-by: Peter Zijlstra > --- > tools/perf/builtin-record.c | 52 +++++++++++++++++++++++--------------------- > 1 file changed, 28 insertions(+), 24 deletions(-) > > Index: linux-2.6/tools/perf/builtin-record.c > =================================================================== > --- linux-2.6.orig/tools/perf/builtin-record.c > +++ linux-2.6/tools/perf/builtin-record.c > @@ -85,7 +85,7 @@ struct mmap_data { > unsigned int prev; > }; > > -static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; > +static struct mmap_data mmap_array[MAX_NR_CPUS]; > > static unsigned long mmap_read_head(struct mmap_data *md) > { > @@ -380,18 +380,29 @@ try_again: > if (group && group_fd == -1) > group_fd = fd[nr_cpu][counter][thread_index]; > > - event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; > - event_array[nr_poll].events = POLLIN; > - nr_poll++; > - > - mmap_array[nr_cpu][counter][thread_index].counter = counter; > - mmap_array[nr_cpu][counter][thread_index].prev = 0; > - mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1; > - mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, > - PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); > - if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) { > - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); > - exit(-1); > + if (counter || thread_index) { > + ret = ioctl(fd[nr_cpu][counter][thread_index], > + PERF_EVENT_IOC_SET_OUTPUT, > + fd[nr_cpu][0][0]); > + if (ret) { > + error("failed to set output: %d (%s)\n", errno, > + strerror(errno)); > + exit(-1); > + } > + } else { > + mmap_array[nr_cpu].counter = counter; > + mmap_array[nr_cpu].prev = 0; > + mmap_array[nr_cpu].mask = mmap_pages*page_size - 1; If you do this multiplex per cpu (which I think is a very good thing), you should also increase the default value of mmap_pages as it might not be big enough anymore. > + mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size, > + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); > + if (mmap_array[nr_cpu].base == MAP_FAILED) { > + error("failed to mmap with %d (%s)\n", errno, strerror(errno)); > + exit(-1); > + } > + > + event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; > + event_array[nr_poll].events = POLLIN; > + nr_poll++; > } > > if (filter != NULL) { > @@ -492,16 +503,11 @@ static struct perf_event_header finished > > static void mmap_read_all(void) > { > - int i, counter, thread; > + int i; > > for (i = 0; i < nr_cpu; i++) { > - for (counter = 0; counter < nr_counters; counter++) { > - for (thread = 0; thread < thread_num; thread++) { > - if (mmap_array[i][counter][thread].base) > - mmap_read(&mmap_array[i][counter][thread]); > - } > - > - } > + if (mmap_array[i].base) > + mmap_read(&mmap_array[i]); > } > > if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) > @@ -876,9 +882,7 @@ int cmd_record(int argc, const char **ar > for (i = 0; i < MAX_NR_CPUS; i++) { > for (j = 0; j < MAX_COUNTERS; j++) { > fd[i][j] = malloc(sizeof(int)*thread_num); > - mmap_array[i][j] = zalloc( > - sizeof(struct mmap_data)*thread_num); > - if (!fd[i][j] || !mmap_array[i][j]) > + if (!fd[i][j]) > return -ENOMEM; > } > } > >