Re: [RFC] perf_events: support for uncore a.k.a. nest units

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Lin Ming <ming.m.lin@intel.com>
To: "eranian@gmail.com" <eranian@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	"Gary.Mohr@Bull.com" <Gary.Mohr@bull.com>,
	Corey Ashford <cjashfor@linux.vnet.ibm.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [RFC] perf_events: support for uncore a.k.a. nest units
Date: Wed, 21 Apr 2010 17:42:54 +0800	[thread overview]
Message-ID: <1271842974.3794.16.camel@minggr.sh.intel.com> (raw)
In-Reply-To: <i2j7c86c4471004210144kd8223bfbve5d6bee2820d0ccf@mail.gmail.com>

On Wed, 2010-04-21 at 16:44 +0800, stephane eranian wrote:
> On Wed, Apr 21, 2010 at 10:39 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> > On Wed, 2010-04-21 at 16:32 +0800, stephane eranian wrote:
> >> Seems to me that struct pmu is a shared resource across all CPUs.
> >> I don't understand why scheduling on one CPU would have to impact
> >> all the other CPUs, unless I am missing something here.
> >
> > Do you mean the pmu->flag?
> 
> Yes.

Thanks for the review.

Changes log:

v2.
pmu->flag should be per cpu (Stephane Eranian)

change definition of "const struct pmu" to "struct pmu" since it's not
read only now.

v1.
remove hw_perf_group_sched_in() based on Peter's idea.

---
 arch/x86/kernel/cpu/perf_event.c |  183 ++++++++++++++------------------------
 include/linux/perf_event.h       |   16 +++-
 kernel/perf_event.c              |   55 ++++++------
 3 files changed, 106 insertions(+), 148 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 626154a..1113fd5 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -598,7 +598,7 @@ static void x86_pmu_enable_all(int added)
 	}
 }
 
-static const struct pmu pmu;
+static struct pmu pmu;
 
 static inline int is_x86_event(struct perf_event *event)
 {
@@ -935,6 +935,7 @@ static int x86_pmu_enable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc;
 	int assign[X86_PMC_IDX_MAX];
+	u8 *flag;
 	int n, n0, ret;
 
 	hwc = &event->hw;
@@ -944,6 +945,10 @@ static int x86_pmu_enable(struct perf_event *event)
 	if (n < 0)
 		return n;
 
+	flag = perf_pmu_flag((struct pmu *)event->pmu);
+	if (!(*flag & PERF_EVENT_TRAN_STARTED))
+		goto out;
+
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
 	if (ret)
 		return ret;
@@ -953,6 +958,7 @@ static int x86_pmu_enable(struct perf_event *event)
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
+out:
 	cpuc->n_events = n;
 	cpuc->n_added += n - n0;
 
@@ -1210,119 +1216,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 	return &unconstrained;
 }
 
-static int x86_event_sched_in(struct perf_event *event,
-			  struct perf_cpu_context *cpuctx)
-{
-	int ret = 0;
-
-	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = smp_processor_id();
-	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
-
-	if (!is_x86_event(event))
-		ret = event->pmu->enable(event);
-
-	if (!ret && !is_software_event(event))
-		cpuctx->active_oncpu++;
-
-	if (!ret && event->attr.exclusive)
-		cpuctx->exclusive = 1;
-
-	return ret;
-}
-
-static void x86_event_sched_out(struct perf_event *event,
-			    struct perf_cpu_context *cpuctx)
-{
-	event->state = PERF_EVENT_STATE_INACTIVE;
-	event->oncpu = -1;
-
-	if (!is_x86_event(event))
-		event->pmu->disable(event);
-
-	event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
-
-	if (!is_software_event(event))
-		cpuctx->active_oncpu--;
-
-	if (event->attr.exclusive || !cpuctx->active_oncpu)
-		cpuctx->exclusive = 0;
-}
-
-/*
- * Called to enable a whole group of events.
- * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
- * Assumes the caller has disabled interrupts and has
- * frozen the PMU with hw_perf_save_disable.
- *
- * called with PMU disabled. If successful and return value 1,
- * then guaranteed to call perf_enable() and hw_perf_enable()
- */
-int hw_perf_group_sched_in(struct perf_event *leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct perf_event *sub;
-	int assign[X86_PMC_IDX_MAX];
-	int n0, n1, ret;
-
-	if (!x86_pmu_initialized())
-		return 0;
-
-	/* n0 = total number of events */
-	n0 = collect_events(cpuc, leader, true);
-	if (n0 < 0)
-		return n0;
-
-	ret = x86_pmu.schedule_events(cpuc, n0, assign);
-	if (ret)
-		return ret;
-
-	ret = x86_event_sched_in(leader, cpuctx);
-	if (ret)
-		return ret;
-
-	n1 = 1;
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		if (sub->state > PERF_EVENT_STATE_OFF) {
-			ret = x86_event_sched_in(sub, cpuctx);
-			if (ret)
-				goto undo;
-			++n1;
-		}
-	}
-	/*
-	 * copy new assignment, now we know it is possible
-	 * will be used by hw_perf_enable()
-	 */
-	memcpy(cpuc->assign, assign, n0*sizeof(int));
-
-	cpuc->n_events  = n0;
-	cpuc->n_added  += n1;
-	ctx->nr_active += n1;
-
-	/*
-	 * 1 means successful and events are active
-	 * This is not quite true because we defer
-	 * actual activation until hw_perf_enable() but
-	 * this way we* ensure caller won't try to enable
-	 * individual events
-	 */
-	return 1;
-undo:
-	x86_event_sched_out(leader, cpuctx);
-	n0  = 1;
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
-			x86_event_sched_out(sub, cpuctx);
-			if (++n0 == n1)
-				break;
-		}
-	}
-	return ret;
-}
-
 #include "perf_event_amd.c"
 #include "perf_event_p6.c"
 #include "perf_event_p4.c"
@@ -1397,6 +1290,14 @@ void __init init_hw_perf_events(void)
 		return;
 	}
 
+	/*
+	 * TBD: will move this to pmu register function
+	 * when multiple hw pmu support is added
+	 */
+	pmu.flag = alloc_percpu(u8);
+	if (!pmu.flag)
+		return;
+
 	pmu_check_apic();
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -1454,13 +1355,61 @@ static inline void x86_pmu_read(struct perf_event *event)
 	x86_perf_event_update(event);
 }
 
-static const struct pmu pmu = {
+/*
+ * Set the flag to make pmu::enable() not perform the
+ * schedulablilty test.
+ */
+static void x86_pmu_start_txn(struct pmu *pmu)
+{
+	u8 *flag = perf_pmu_flag(pmu);
+
+	*flag |= PERF_EVENT_TRAN_STARTED;
+}
+
+static void x86_pmu_stop_txn(struct pmu *pmu)
+{
+	u8 *flag = perf_pmu_flag(pmu);
+
+	*flag &= ~PERF_EVENT_TRAN_STARTED;
+}
+
+/*
+ * Return 0 if commit transaction success
+ */
+static int x86_pmu_commit_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int assign[X86_PMC_IDX_MAX];
+	int n, ret;
+
+	n = cpuc->n_events;
+
+	if (!x86_pmu_initialized())
+		return -EAGAIN;
+
+	ret = x86_pmu.schedule_events(cpuc, n, assign);
+	if (ret)
+		return ret;
+
+	/*
+	 * copy new assignment, now we know it is possible
+	 * will be used by hw_perf_enable()
+	 */
+	memcpy(cpuc->assign, assign, n*sizeof(int));
+
+	return 0;
+}
+
+static struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
 	.start		= x86_pmu_start,
 	.stop		= x86_pmu_stop,
 	.read		= x86_pmu_read,
 	.unthrottle	= x86_pmu_unthrottle,
+	.start_txn	= x86_pmu_start_txn,
+	.stop_txn	= x86_pmu_stop_txn,
+	.commit_txn	= x86_pmu_commit_txn,
 };
 
 /*
@@ -1537,9 +1486,9 @@ out:
 	return ret;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
-	const struct pmu *tmp;
+	struct pmu *tmp;
 	int err;
 
 	err = __hw_perf_event_init(event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index bf896d0..9fa3f46 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -524,6 +524,8 @@ struct hw_perf_event {
 
 struct perf_event;
 
+#define PERF_EVENT_TRAN_STARTED 1
+
 /**
  * struct pmu - generic performance monitoring unit
  */
@@ -534,6 +536,12 @@ struct pmu {
 	void (*stop)			(struct perf_event *event);
 	void (*read)			(struct perf_event *event);
 	void (*unthrottle)		(struct perf_event *event);
+	void (*start_txn)		(struct pmu *pmu);
+	void (*stop_txn)		(struct pmu *pmu);
+	int (*commit_txn)		(struct pmu *pmu);
+
+	/* percpu flag */
+	u8 *flag;
 };
 
 /**
@@ -610,7 +618,7 @@ struct perf_event {
 	int				group_flags;
 	struct perf_event		*group_leader;
 	struct perf_event		*output;
-	const struct pmu		*pmu;
+	struct pmu			*pmu;
 
 	enum perf_event_active_state	state;
 	atomic64_t			count;
@@ -782,7 +790,7 @@ struct perf_output_handle {
  */
 extern int perf_max_events;
 
-extern const struct pmu *hw_perf_event_init(struct perf_event *event);
+extern struct pmu *hw_perf_event_init(struct perf_event *event);
 
 extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
@@ -799,9 +807,6 @@ extern void perf_disable(void);
 extern void perf_enable(void);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
-extern int hw_perf_group_sched_in(struct perf_event *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
@@ -811,6 +816,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
+extern u8 *perf_pmu_flag(struct pmu *pmu);
 
 struct perf_sample_data {
 	u64				type;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 07b7a43..4820090 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -75,7 +75,7 @@ static DEFINE_SPINLOCK(perf_resource_lock);
 /*
  * Architecture provided APIs - weak aliases:
  */
-extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
+extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
@@ -83,15 +83,14 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
 void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
-int __weak
-hw_perf_group_sched_in(struct perf_event *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx)
+void __weak perf_event_print_debug(void)	{ }
+
+u8 *perf_pmu_flag(struct pmu *pmu)
 {
-	return 0;
-}
+	int cpu = smp_processor_id();
 
-void __weak perf_event_print_debug(void)	{ }
+	return per_cpu_ptr(pmu->flag, cpu);
+}
 
 static DEFINE_PER_CPU(int, perf_disable_count);
 
@@ -642,14 +641,13 @@ group_sched_in(struct perf_event *group_event,
 	       struct perf_event_context *ctx)
 {
 	struct perf_event *event, *partial_group;
+	struct pmu *pmu = group_event->pmu;
 	int ret;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
 
-	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx);
-	if (ret)
-		return ret < 0 ? ret : 0;
+	pmu->start_txn(pmu);
 
 	if (event_sched_in(group_event, cpuctx, ctx))
 		return -EAGAIN;
@@ -664,16 +662,21 @@ group_sched_in(struct perf_event *group_event,
 		}
 	}
 
-	return 0;
+	ret = pmu->commit_txn(pmu);
+	if (!ret) {
+		pmu->stop_txn(pmu);
+		return 0;
+	}
 
 group_error:
+	pmu->stop_txn(pmu);
+
 	/*
-	 * Groups can be scheduled in as one unit only, so undo any
-	 * partial group before returning:
+	 * Commit transaction fails, rollback
+	 * Groups can be scheduled in as one unit only, so undo
+	 * whole group before returning:
 	 */
 	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-		if (event == partial_group)
-			break;
 		event_sched_out(event, cpuctx, ctx);
 	}
 	event_sched_out(group_event, cpuctx, ctx);
@@ -4139,7 +4142,7 @@ static void perf_swevent_disable(struct perf_event *event)
 	hlist_del_rcu(&event->hlist_entry);
 }
 
-static const struct pmu perf_ops_generic = {
+static struct pmu perf_ops_generic = {
 	.enable		= perf_swevent_enable,
 	.disable	= perf_swevent_disable,
 	.read		= perf_swevent_read,
@@ -4250,7 +4253,7 @@ static void cpu_clock_perf_event_read(struct perf_event *event)
 	cpu_clock_perf_event_update(event);
 }
 
-static const struct pmu perf_ops_cpu_clock = {
+static struct pmu perf_ops_cpu_clock = {
 	.enable		= cpu_clock_perf_event_enable,
 	.disable	= cpu_clock_perf_event_disable,
 	.read		= cpu_clock_perf_event_read,
@@ -4307,7 +4310,7 @@ static void task_clock_perf_event_read(struct perf_event *event)
 	task_clock_perf_event_update(event, time);
 }
 
-static const struct pmu perf_ops_task_clock = {
+static struct pmu perf_ops_task_clock = {
 	.enable		= task_clock_perf_event_enable,
 	.disable	= task_clock_perf_event_disable,
 	.read		= task_clock_perf_event_read,
@@ -4448,7 +4451,7 @@ static void tp_perf_event_destroy(struct perf_event *event)
 	swevent_hlist_put(event);
 }
 
-static const struct pmu *tp_perf_event_init(struct perf_event *event)
+static struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	int err;
 
@@ -4505,7 +4508,7 @@ static int perf_tp_event_match(struct perf_event *event,
 	return 1;
 }
 
-static const struct pmu *tp_perf_event_init(struct perf_event *event)
+static struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
@@ -4527,7 +4530,7 @@ static void bp_perf_event_destroy(struct perf_event *event)
 	release_bp_slot(event);
 }
 
-static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+static struct pmu *bp_perf_event_init(struct perf_event *bp)
 {
 	int err;
 
@@ -4551,7 +4554,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
 		perf_swevent_add(bp, 1, 1, &sample, regs);
 }
 #else
-static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+static struct pmu *bp_perf_event_init(struct perf_event *bp)
 {
 	return NULL;
 }
@@ -4573,9 +4576,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
 	swevent_hlist_put(event);
 }
 
-static const struct pmu *sw_perf_event_init(struct perf_event *event)
+static struct pmu *sw_perf_event_init(struct perf_event *event)
 {
-	const struct pmu *pmu = NULL;
+	struct pmu *pmu = NULL;
 	u64 event_id = event->attr.config;
 
 	/*
@@ -4637,7 +4640,7 @@ perf_event_alloc(struct perf_event_attr *attr,
 		   perf_overflow_handler_t overflow_handler,
 		   gfp_t gfpflags)
 {
-	const struct pmu *pmu;
+	struct pmu *pmu;
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
 	long err;


> 
> > You are right, pmu->flag should be per cpu data.
> >
> > Will update the patch.
> >
> > Thanks,
> > Lin Ming
> >
> >>

next prev parent reply	other threads:[~2010-04-21  9:42 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-19 19:41 [RFC] perf_events: support for uncore a.k.a. nest units Corey Ashford
2010-01-20  0:44 ` Andi Kleen
2010-01-20  1:49   ` Corey Ashford
2010-01-20  9:35     ` Andi Kleen
2010-01-20 19:28       ` Corey Ashford
2010-01-20 13:34 ` Peter Zijlstra
2010-01-20 21:33   ` Peter Zijlstra
2010-01-20 23:23     ` Corey Ashford
2010-01-21  7:21       ` Ingo Molnar
2010-01-21 19:13         ` Corey Ashford
2010-01-21 19:28           ` Corey Ashford
2010-01-27 10:28             ` Ingo Molnar
2010-01-27 19:50               ` Corey Ashford
2010-01-28 10:57                 ` Peter Zijlstra
2010-01-28 18:00                   ` Corey Ashford
2010-01-28 19:06                     ` Peter Zijlstra
2010-01-28 19:44                       ` Corey Ashford
2010-01-28 22:08                       ` Corey Ashford
2010-01-29  9:52                         ` Peter Zijlstra
2010-01-29 23:05                           ` Corey Ashford
2010-01-30  8:42                             ` Peter Zijlstra
2010-02-01 19:39                               ` Corey Ashford
2010-02-01 19:54                                 ` Peter Zijlstra
2010-01-21  8:36       ` Peter Zijlstra
2010-01-21  8:47     ` stephane eranian
2010-01-21  8:59       ` Peter Zijlstra
2010-01-21  9:16         ` stephane eranian
2010-01-21  9:43         ` stephane eranian
     [not found] ` <d3f22a1003290213x7d7904an59d50eb6a8616133@mail.gmail.com>
2010-03-30  7:42   ` Lin Ming
2010-03-30 16:49     ` Corey Ashford
2010-03-30 17:15       ` Peter Zijlstra
2010-03-30 22:12         ` Corey Ashford
2010-03-31 14:01           ` Peter Zijlstra
2010-03-31 14:13             ` stephane eranian
2010-03-31 15:49             ` Maynard Johnson
2010-03-31 17:50             ` Corey Ashford
2010-04-15 21:16         ` Gary.Mohr
2010-04-16 13:24           ` Peter Zijlstra
2010-04-19  9:08             ` Lin Ming
2010-04-19  9:27               ` Peter Zijlstra
2010-04-20 11:55             ` Lin Ming
2010-04-20 12:03               ` Peter Zijlstra
2010-04-21  8:08                 ` Lin Ming
2010-04-21  8:32                   ` stephane eranian
2010-04-21  8:39                     ` Lin Ming
2010-04-21  8:44                       ` stephane eranian
2010-04-21  9:42                         ` Lin Ming [this message]
2010-04-21  9:57                           ` Peter Zijlstra
2010-04-21 22:12                             ` Lin Ming
2010-04-21 14:22                               ` Peter Zijlstra
2010-04-21 22:38                                 ` Lin Ming
2010-04-21 14:53                                   ` Peter Zijlstra
2010-03-30 21:28       ` stephane eranian
2010-03-30 23:11         ` Corey Ashford
2010-03-31 13:43           ` stephane eranian

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:626154a dfblob:1113fd5 dfblob:bf896d0 dfblob:9fa3f46
dfblob:07b7a43 dfblob:4820090 )
 OR (
bs:"Re: [RFC] perf_events: support for uncore a.k.a. nest units" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1271842974.3794.16.camel@minggr.sh.intel.com \
    --to=ming.m.lin@intel.com \
    --cc=Gary.Mohr@bull.com \
    --cc=cjashfor@linux.vnet.ibm.com \
    --cc=eranian@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.