public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Ingo Molnar <mingo@elte.hu>
Cc: linux-kernel@vger.kernel.org, Paul Mackerras <paulus@samba.org>,
	Mike Galbraith <efault@gmx.de>,
	Arjan van de Ven <arjan@infradead.org>,
	Wu Fengguang <fengguang.wu@intel.com>
Subject: Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
Date: Wed, 25 Mar 2009 15:52:54 +0100	[thread overview]
Message-ID: <1237992774.7972.1178.camel@twins> (raw)
In-Reply-To: <1237985832.7972.930.camel@twins>

On Wed, 2009-03-25 at 13:57 +0100, Peter Zijlstra wrote:
> On Wed, 2009-03-25 at 13:54 +0100, Ingo Molnar wrote:
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > 
> > > On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
> > > 
> > > > > Also, when mixing streams (events,mmap) is a single: you missed 
> > > > > 'n' events still good?
> > > > 
> > > > How would such mixing work? Multiple counters streaming into the 
> > > > same mmap area?
> > > 
> > > No basically having overflow events and mmap-vma changed events in 
> > > a single output stream.
> > 
> > ah, and i missed the impact of variable size records - that too 
> > makes it somewhat impractical to emit overflow records in situ. (the 
> > kernel does not really know the precise start of the previous 
> > record, typically.)
> 
> Alternatively, we could simply not emit new events until the read
> position increases,. that's much simpler.
> 
> Still don't like mapping the stuff writable though..

This is what it would look like I suppose...

Any thoughts?

Not-signed-off-by: me
---
 include/linux/perf_counter.h |    4 ++
 kernel/perf_counter.c        |   67 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6bf67ce..d5a599c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -165,6 +165,8 @@ struct perf_counter_mmap_page {
 	__s64	offset;			/* add to hardware counter value */
 
 	__u32   data_head;		/* head in the data section */
+	__u32	data_tail;		/* user-space written tail */
+	__u32	overflow;		/* number of lost events */
 };
 
 struct perf_event_header {
@@ -269,8 +271,10 @@ struct file;
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;
+	int				writable;
 	atomic_t			wakeup;
 	atomic_t			head;
+	atomic_t			overflow;
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b862a7..1f5c515 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1330,6 +1330,7 @@ static void __perf_counter_update_userpage(struct perf_counter *counter,
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
 	userpg->data_head = atomic_read(&data->head);
+	userpg->overflow = atomic_read(&data->overflow);
 	smp_wmb();
 	++userpg->lock;
 	preempt_enable();
@@ -1375,6 +1376,28 @@ unlock:
 	return ret;
 }
 
+static int perf_mmap_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	/*
+	 * Only allow writes to the control page.
+	 */
+	if (page != virt_to_page(data->user_page))
+		goto unlock;
+
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
 {
 	struct perf_mmap_data *data;
@@ -1463,6 +1486,7 @@ static struct vm_operations_struct perf_mmap_vmops = {
 	.open = perf_mmap_open,
 	.close = perf_mmap_close,
 	.fault = perf_mmap_fault,
+	.page_mkwrite = perf_mmap_mkwrite,
 };
 
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1473,7 +1497,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long locked, lock_limit;
 	int ret = 0;
 
-	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
 	vma_size = vma->vm_end - vma->vm_start;
@@ -1503,16 +1527,19 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	mutex_lock(&counter->mmap_mutex);
 	if (atomic_inc_not_zero(&counter->mmap_count))
-		goto out;
+		goto unlock;
 
 	WARN_ON(counter->data);
 	ret = perf_mmap_data_alloc(counter, nr_pages);
-	if (!ret)
-		atomic_set(&counter->mmap_count, 1);
-out:
+	if (ret)
+		goto unlock;
+
+	atomic_set(&counter->mmap_count, 1);
+	if (vma->vm_flags & VM_WRITE)
+		counter->data->writable = 1;
+unlock:
 	mutex_unlock(&counter->mmap_mutex);
 
-	vma->vm_flags &= ~VM_MAYWRITE;
 	vma->vm_flags |= VM_RESERVED;
 	vma->vm_ops = &perf_mmap_vmops;
 
@@ -1540,6 +1567,28 @@ struct perf_output_handle {
 	int			wakeup;
 };
 
+static int perf_output_overflow(struct perf_mmap_data *data,
+				unsigned int offset, unsigned int head)
+{
+	unsigned int tail;
+	unsigned int mask;
+
+	if (!data->writable)
+		return 0;
+       
+	mask = (data->nr_pages << PAGE_SHIFT) - 1;
+	smp_rmb();
+	tail = ACCESS_ONCE(data->user_page->data_tail);
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return 1;
+
+	return 0;
+}
+
 static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size)
 {
@@ -1552,11 +1601,13 @@ static int perf_output_begin(struct perf_output_handle *handle,
 		goto out;
 
 	if (!data->nr_pages)
-		goto out;
+		goto fail;
 
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
+		if (unlikely(perf_output_overflow(data, offset, head)))
+			goto fail;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
 	handle->counter	= counter;
@@ -1567,6 +1618,8 @@ static int perf_output_begin(struct perf_output_handle *handle,
 
 	return 0;
 
+fail:
+	atomic_inc(&data->overflow);
 out:
 	rcu_read_unlock();
 


  reply	other threads:[~2009-03-25 14:53 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
2009-03-25 11:30 ` [PATCH 1/6] perf_counter: more elaborate write API Peter Zijlstra
2009-03-25 12:06   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 2/6] perf_counter: output objects Peter Zijlstra
2009-03-25 12:06   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 3/6] perf_counter: sanity check on the output API Peter Zijlstra
2009-03-25 12:06   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 4/6] perf_counter: optionally provide the pid/tid of the sampled task Peter Zijlstra
2009-03-25 12:06   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument Peter Zijlstra
2009-03-25 12:07   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 12:18   ` [PATCH 5/6] " Ingo Molnar
2009-03-25 12:27     ` Peter Zijlstra
2009-03-25 12:35       ` Ingo Molnar
2009-03-25 12:41         ` Peter Zijlstra
2009-03-25 12:54           ` Ingo Molnar
2009-03-25 12:57             ` Peter Zijlstra
2009-03-25 14:52               ` Peter Zijlstra [this message]
2009-03-25 17:16                 ` Ingo Molnar
2009-03-25 21:18                   ` Peter Zijlstra
2009-03-26  2:22       ` Paul Mackerras
2009-03-25 11:30 ` [PATCH 6/6] perf_counter: kerneltop: output event support Peter Zijlstra
2009-03-25 12:07   ` [tip:perfcounters/core] " Peter Zijlstra
2009-04-04  0:21   ` [PATCH 6/6] " Corey Ashford
2009-04-04 12:17     ` Peter Zijlstra
2009-04-04 18:10       ` Corey Ashford
2009-03-25 12:05 ` [PATCH 0/6] perf_counter: new output ABI Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1237992774.7972.1178.camel@twins \
    --to=peterz@infradead.org \
    --cc=arjan@infradead.org \
    --cc=efault@gmx.de \
    --cc=fengguang.wu@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox