All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-tip-commits@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, paulus@samba.org, hpa@zytor.com,
	mingo@redhat.com, a.p.zijlstra@chello.nl, tglx@linutronix.de,
	mingo@elte.hu
Subject: [tip:perfcounters/core] perf_counter: fix update_userpage()
Date: Wed, 1 Apr 2009 10:12:57 GMT	[thread overview]
Message-ID: <tip-c48b1e4e8565e663bf085048afc61bf845a4801f@git.kernel.org> (raw)
In-Reply-To: <20090330171023.241410660@chello.nl>

Commit-ID:  c48b1e4e8565e663bf085048afc61bf845a4801f
Gitweb:     http://git.kernel.org/tip/c48b1e4e8565e663bf085048afc61bf845a4801f
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Mon, 30 Mar 2009 19:07:03 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 1 Apr 2009 11:33:32 +0200

perf_counter: fix update_userpage()

It just occured to me it is possible to have multiple contending
updates of the userpage (mmap information vs overflow vs counter).
This would break the seqlock logic.

It appear the arch code uses this from NMI context, so we cannot
possibly serialize its use, therefore separate the data_head update
from it and let it return to its original use.

The arch code needs to make sure there are no contending callers by
disabling the counter before using it -- powerpc appears to do this
nicely.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090330171023.241410660@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 include/linux/perf_counter.h |   35 +++++++++++++++++++++++++++++++++++
 kernel/perf_counter.c        |   38 +++++++++++++++++++++++---------------
 2 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0d83322..8ac1885 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -160,10 +160,45 @@ struct perf_counter_hw_event {
 struct perf_counter_mmap_page {
 	__u32	version;		/* version number of this structure */
 	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 * The index and offset should be read atomically using the seqlock:
+	 *
+	 *   __u32 seq, index;
+	 *   __s64 offset;
+	 *
+	 * again:
+	 *   rmb();
+	 *   seq = pc->lock;
+	 *
+	 *   if (unlikely(seq & 1)) {
+	 *     cpu_relax();
+	 *     goto again;
+	 *   }
+	 *
+	 *   index = pc->index;
+	 *   offset = pc->offset;
+	 *
+	 *   rmb();
+	 *   if (pc->lock != seq)
+	 *     goto again;
+	 *
+	 * After this, index contains architecture specific counter index + 1,
+	 * so that 0 means unavailable, offset contains the value to be added
+	 * to the result of the raw timer read to obtain this counter's value.
+	 */
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
 
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading this value should issue an rmb(), on SMP capable
+	 * platforms, after reading this value -- see perf_counter_wakeup().
+	 */
 	__u32   data_head;		/* head in the data section */
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f70ff80..c95e923 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1316,10 +1316,22 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-static void __perf_counter_update_userpage(struct perf_counter *counter,
-					   struct perf_mmap_data *data)
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_counter_update_userpage(struct perf_counter *counter)
 {
-	struct perf_counter_mmap_page *userpg = data->user_page;
+	struct perf_mmap_data *data;
+	struct perf_counter_mmap_page *userpg;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
 
 	/*
 	 * Disable preemption so as to not let the corresponding user-space
@@ -1333,20 +1345,10 @@ static void __perf_counter_update_userpage(struct perf_counter *counter,
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
-	userpg->data_head = atomic_read(&data->head);
 	smp_wmb();
 	++userpg->lock;
 	preempt_enable();
-}
-
-void perf_counter_update_userpage(struct perf_counter *counter)
-{
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data)
-		__perf_counter_update_userpage(counter, data);
+unlock:
 	rcu_read_unlock();
 }
 
@@ -1547,7 +1549,13 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	data = rcu_dereference(counter->data);
 	if (data) {
 		(void)atomic_xchg(&data->wakeup, POLL_IN);
-		__perf_counter_update_userpage(counter, data);
+		/*
+		 * Ensure all data writes are issued before updating the
+		 * user-space data head information. The matching rmb()
+		 * will be in userspace after reading this value.
+		 */
+		smp_wmb();
+		data->user_page->data_head = atomic_read(&data->head);
 	}
 	rcu_read_unlock();
 

  reply	other threads:[~2009-04-01 10:14 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-30 17:07 [PATCH 00/15] pending perf_counter bits Peter Zijlstra
2009-03-30 17:07 ` [PATCH 01/15] perf_counter: unify and fix delayed counter wakeup Peter Zijlstra
2009-03-31  5:45   ` Paul Mackerras
2009-03-31  6:37     ` Peter Zijlstra
2009-04-01  9:04       ` Ingo Molnar
2009-04-01 10:12   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 02/15] perf_counter: fix update_userpage() Peter Zijlstra
2009-04-01 10:12   ` Peter Zijlstra [this message]
2009-03-30 17:07 ` [PATCH 03/15] perf_counter: kerneltop: simplify data_head read Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 04/15] perf_counter: executable mmap() information Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 05/15] perf_counter: kerneltop: parse the mmap data stream Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 06/15] perf_counter: powerpc: only reserve PMU hardware when we need it Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Paul Mackerras
2009-03-30 17:07 ` [PATCH 07/15] perf_counter: make it possible for hw_perf_counter_init to return error codes Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Paul Mackerras
2009-03-30 17:07 ` [PATCH 08/15] perf_counter: x86: proper error propagation for the x86 hw_perf_counter_init() Peter Zijlstra
2009-04-01 10:13   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 09/15] perf_counter tools: optionally scale counter values in perfstat mode Peter Zijlstra
2009-04-01  9:39   ` Ingo Molnar
2009-04-01 10:50     ` Paul Mackerras
2009-04-01 10:14   ` [tip:perfcounters/core] " Paul Mackerras
2009-03-30 17:07 ` [PATCH 10/15] perf_counter: small cleanup of the output routines Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 11/15] perf_counter: re-arrange the perf_event_type Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 12/15] pref_counter: kerneltop: update event_types Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] perf_counter tools: " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 13/15] perf_counter: provide generic callchain bits Peter Zijlstra
2009-03-31  6:12   ` Paul Mackerras
2009-03-31  6:39     ` Peter Zijlstra
2009-03-31  7:24       ` Corey Ashford
2009-03-31  8:43         ` Peter Zijlstra
2009-03-31  9:09           ` Paul Mackerras
2009-03-31  9:12         ` Paul Mackerras
2009-03-31 14:00           ` Peter Zijlstra
2009-03-31 17:11             ` Corey Ashford
2009-04-01  3:48             ` Paul Mackerras
2009-04-01  7:59               ` Peter Zijlstra
2009-04-01  8:45                 ` Paul Mackerras
2009-04-01 10:00                   ` Ingo Molnar
2009-04-01 11:53                     ` Paul Mackerras
2009-04-01 23:25                 ` Corey Ashford
2009-04-02  6:43                   ` Peter Zijlstra
2009-04-02  7:41                     ` Peter Zijlstra
2009-04-02  9:10                       ` Paul Mackerras
2009-04-02  9:14                         ` Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 14/15] perf_counter: x86: callchain support Peter Zijlstra
2009-04-01 10:14   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-30 17:07 ` [PATCH 15/15] perf_counter: pmc arbitration Peter Zijlstra
2009-04-01 10:15   ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-31  5:43 ` [PATCH 00/15] pending perf_counter bits Paul Mackerras

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-c48b1e4e8565e663bf085048afc61bf845a4801f@git.kernel.org \
    --to=a.p.zijlstra@chello.nl \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mingo@redhat.com \
    --cc=paulus@samba.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.