All of lore.kernel.org
 help / color / mirror / Atom feed
From: Borislav Petkov <bp@amd64.org>
To: Frederic Weisbecker <fweisbec@gmail.com>,
	Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-kernel@vger.kernel.org
Subject: [RFC PATCH] perf: Carve out cgroup-related code
Date: Wed, 11 May 2011 14:11:35 +0200	[thread overview]
Message-ID: <20110511121135.GA25865@aftab> (raw)

Hi guys,

here's a first prototype carving out cgroup perf code. It builds fine
with both CONFIG_CGROUP_PERF enabled and disabled. Please take a look
while I do the same with the callchain stuff and let me know whether I
should do something differently.

Thanks.
---

Move cgroup perf support into a different compilation module -
kernel/events/cgroup.c - thus slimming perf_event.c some more.

While at it,

* push some oneliners into perf_event.h now that they're used in
multiple .c files.

* drop is_cgroup_event() check for perf_cgroup_defer_enabled() at its
callsite in __perf_event_enable since the latter does the check anyway.

No functional change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 include/linux/perf_event.h |  138 ++++++++++++-
 kernel/events/Makefile     |    1 +
 kernel/events/cgroup.c     |  324 +++++++++++++++++++++++++++++
 kernel/events/core.c       |  496 +-------------------------------------------
 4 files changed, 473 insertions(+), 486 deletions(-)
 create mode 100644 kernel/events/cgroup.c

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3412684..ef65f34 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -954,8 +954,15 @@ struct perf_output_handle {
 	int				sample;
 };
 
+enum event_type_t {
+	EVENT_FLEXIBLE = 0x1,
+	EVENT_PINNED = 0x2,
+	EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
 #ifdef CONFIG_PERF_EVENTS
 
+extern struct list_head pmus;
 extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
@@ -1153,6 +1160,47 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_task_tick(void);
+
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static inline u64 perf_clock(void)
+{
+	return local_clock();
+}
+
+extern void ctx_sched_out(struct perf_event_context *ctx,
+			  struct perf_cpu_context *cpuctx,
+			  enum event_type_t event_type);
+/*
+ * Called with IRQs disabled
+ */
+static inline void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+				     enum event_type_t event_type)
+{
+	ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
+}
+
+extern void ctx_sched_in(struct perf_event_context *ctx,
+			 struct perf_cpu_context *cpuctx,
+			 enum event_type_t event_type,
+			 struct task_struct *task);
+
+static inline void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+				    enum event_type_t event_type,
+				    struct task_struct *task)
+{
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	ctx_sched_in(ctx, cpuctx, event_type, task);
+}
+
+extern int
+task_function_call(struct task_struct *p, int (*func) (void *info), void *info);
+extern u64 perf_event_time(struct perf_event *event);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task)			{ }
@@ -1187,7 +1235,95 @@ static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
 static inline void perf_event_task_tick(void)				{ }
-#endif
+static inline void ctx_sched_out(struct perf_event_context *ctx,
+				 struct perf_cpu_context *cpuctx,
+				 enum event_type_t event_type)		{ }
+
+static void ctx_sched_in(struct perf_event_context *ctx,
+			 struct perf_cpu_context *cpuctx,
+			 enum event_type_t event_type,
+			 struct task_struct *task)			{ }
+static inline int
+task_function_call(struct task_struct *p,
+		   int (*func) (void *info), void *info)		{ return -EINVAL; }
+static inline u64 perf_event_time(struct perf_event *event)		{ return 0; }
+#endif /* CONFIG_PERF_EVENTS */
+
+#ifdef CONFIG_CGROUP_PERF
+extern struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task);
+extern bool perf_cgroup_match(struct perf_event *event);
+extern int
+perf_cgroup_connect(pid_t pid, struct perf_event *event,
+		    struct perf_event_attr *attr,
+		    struct perf_event *group_leader);
+extern void perf_detach_cgroup(struct perf_event *event);
+
+static inline int is_cgroup_event(struct perf_event *event)
+{
+	return event->cgrp != NULL;
+}
+
+extern u64 perf_cgroup_event_time(struct perf_event *event);
+extern void update_cgrp_time_from_event(struct perf_event *event);
+extern void
+update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx);
+extern void
+perf_cgroup_set_timestamp(struct task_struct *task,
+			  struct perf_event_context *ctx);
+
+extern void perf_cgroup_sched_out(struct task_struct *task);
+extern void perf_cgroup_sched_in(struct task_struct *task);
+
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+{
+	struct perf_cgroup_info *t;
+	t = per_cpu_ptr(event->cgrp->info, event->cpu);
+	event->shadow_ctx_time = now - t->timestamp;
+}
+
+static inline void
+perf_cgroup_defer_enabled(struct perf_event *event)
+{
+	/*
+	 * when the current task's perf cgroup does not match
+	 * the event's, we need to remember to call the
+	 * perf_mark_enable() function the first time a task with
+	 * a matching perf cgroup is scheduled in.
+	 */
+	if (is_cgroup_event(event) && !perf_cgroup_match(event))
+		event->cgrp_defer_enabled = 1;
+}
+extern inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+			 struct perf_event_context *ctx);
+#else
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task)				{ return NULL; }
+static inline bool perf_cgroup_match(struct perf_event *event)		{ return true; }
+static inline int
+perf_cgroup_connect(pid_t pid, struct perf_event *event,
+		    struct perf_event_attr *attr,
+		    struct perf_event *group_leader)			{ return -EINVAL; }
+static inline void perf_detach_cgroup(struct perf_event *event)		{ }
+static inline int is_cgroup_event(struct perf_event *event)		{ return 0; }
+static inline u64 perf_cgroup_event_time(struct perf_event *event)	{ return 0; }
+static inline void update_cgrp_time_from_event(struct perf_event *e)	{ }
+static inline void
+update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)		{ }
+static inline void
+perf_cgroup_set_timestamp(struct task_struct *task,
+			  struct perf_event_context *ctx)		{ }
+static inline void perf_cgroup_sched_out(struct task_struct *task)	{ }
+static inline void perf_cgroup_sched_in(struct task_struct *task)	{ }
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)		{ }
+static inline void perf_cgroup_defer_enabled(struct perf_event *event)	{ }
+static inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+			 struct perf_event_context *ctx)		{ }
+#endif /* CONFIG_CGROUP_PERF */
 
 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 1ce23d3..21b7da7 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -4,3 +4,4 @@ endif
 
 obj-y := core.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_CGROUP_PERF)	 += cgroup.o
diff --git a/kernel/events/cgroup.c b/kernel/events/cgroup.c
new file mode 100644
index 0000000..5516928
--- /dev/null
+++ b/kernel/events/cgroup.c
@@ -0,0 +1,324 @@
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+/*
+ * Must ensure cgroup is pinned (css_get) before calling
+ * this function. In other words, we cannot call this function
+ * if there is no cgroup event for the current CPU context.
+ */
+inline struct perf_cgroup *perf_cgroup_from_task(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, perf_subsys_id),
+			    struct perf_cgroup, css);
+}
+
+inline bool perf_cgroup_match(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+
+	return !event->cgrp || event->cgrp == cpuctx->cgrp;
+}
+
+static inline void perf_get_cgroup(struct perf_event *event)
+{
+	css_get(&event->cgrp->css);
+}
+
+inline int perf_cgroup_connect(int fd, struct perf_event *event,
+			       struct perf_event_attr *attr,
+			       struct perf_event *group_leader)
+{
+	struct perf_cgroup *cgrp;
+	struct cgroup_subsys_state *css;
+	struct file *file;
+	int ret = 0, fput_needed;
+
+	file = fget_light(fd, &fput_needed);
+	if (!file)
+		return -EBADF;
+
+	css = cgroup_css_from_dir(file, perf_subsys_id);
+	if (IS_ERR(css)) {
+		ret = PTR_ERR(css);
+		goto out;
+	}
+
+	cgrp = container_of(css, struct perf_cgroup, css);
+	event->cgrp = cgrp;
+
+	/* must be done before we fput() the file */
+	perf_get_cgroup(event);
+
+	/*
+	 * all events in a group must monitor
+	 * the same cgroup because a task belongs
+	 * to only one perf cgroup at a time
+	 */
+	if (group_leader && group_leader->cgrp != cgrp) {
+		perf_detach_cgroup(event);
+		ret = -EINVAL;
+	}
+out:
+	fput_light(file, fput_needed);
+	return ret;
+}
+
+static inline void perf_put_cgroup(struct perf_event *event)
+{
+	css_put(&event->cgrp->css);
+}
+
+inline void perf_detach_cgroup(struct perf_event *event)
+{
+	perf_put_cgroup(event);
+	event->cgrp = NULL;
+}
+
+inline u64 perf_cgroup_event_time(struct perf_event *event)
+{
+	struct perf_cgroup_info *t;
+
+	t = per_cpu_ptr(event->cgrp->info, event->cpu);
+	return t->time;
+}
+
+inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+			 struct perf_event_context *ctx)
+{
+	struct perf_event *sub;
+	u64 tstamp = perf_event_time(event);
+
+	if (!event->cgrp_defer_enabled)
+		return;
+
+	event->cgrp_defer_enabled = 0;
+
+	event->tstamp_enabled = tstamp - event->total_time_enabled;
+	list_for_each_entry(sub, &event->sibling_list, group_entry) {
+		if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
+			sub->tstamp_enabled = tstamp - sub->total_time_enabled;
+			sub->cgrp_defer_enabled = 0;
+		}
+	}
+}
+
+static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
+{
+	struct perf_cgroup_info *info;
+	u64 now;
+
+	now = perf_clock();
+
+	info = this_cpu_ptr(cgrp->info);
+
+	info->time += now - info->timestamp;
+	info->timestamp = now;
+}
+
+inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+{
+	struct perf_cgroup *cgrp_out = cpuctx->cgrp;
+	if (cgrp_out)
+		__update_cgrp_time(cgrp_out);
+}
+
+inline void update_cgrp_time_from_event(struct perf_event *event)
+{
+	struct perf_cgroup *cgrp;
+
+	/*
+	 * ensure we access cgroup data only when needed and
+	 * when we know the cgroup is pinned (css_get)
+	 */
+	if (!is_cgroup_event(event))
+		return;
+
+	cgrp = perf_cgroup_from_task(current);
+	/*
+	 * Do not update time when cgroup is not active
+	 */
+	if (cgrp == event->cgrp)
+		__update_cgrp_time(event->cgrp);
+}
+
+inline void perf_cgroup_set_timestamp(struct task_struct *task,
+				      struct perf_event_context *ctx)
+{
+	struct perf_cgroup *cgrp;
+	struct perf_cgroup_info *info;
+
+	/*
+	 * ctx->lock held by caller
+	 * ensure we do not access cgroup data
+	 * unless we have the cgroup pinned (css_get)
+	 */
+	if (!task || !ctx->nr_cgroups)
+		return;
+
+	cgrp = perf_cgroup_from_task(task);
+	info = this_cpu_ptr(cgrp->info);
+	info->timestamp = ctx->timestamp;
+}
+
+#define PERF_CGROUP_SWOUT	0x1 /* cgroup switch out every event */
+#define PERF_CGROUP_SWIN	0x2 /* cgroup switch in events based on task */
+
+/*
+ * reschedule events based on the cgroup constraint of task.
+ *
+ * mode SWOUT : schedule out everything
+ * mode SWIN : schedule in based on cgroup for next
+ */
+void perf_cgroup_switch(struct task_struct *task, int mode)
+{
+	struct perf_cpu_context *cpuctx;
+	struct pmu *pmu;
+	unsigned long flags;
+
+	/*
+	 * disable interrupts to avoid geting nr_cgroup
+	 * changes via __perf_event_disable(). Also
+	 * avoids preemption.
+	 */
+	local_irq_save(flags);
+
+	/*
+	 * we reschedule only in the presence of cgroup
+	 * constrained events.
+	 */
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+
+		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+		perf_pmu_disable(cpuctx->ctx.pmu);
+
+		/*
+		 * perf_cgroup_events says at least one
+		 * context on this CPU has cgroup events.
+		 *
+		 * ctx->nr_cgroups reports the number of cgroup
+		 * events for a context.
+		 */
+		if (cpuctx->ctx.nr_cgroups > 0) {
+
+			if (mode & PERF_CGROUP_SWOUT) {
+				cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+				/*
+				 * must not be done before ctxswout due
+				 * to event_filter_match() in event_sched_out()
+				 */
+				cpuctx->cgrp = NULL;
+			}
+
+			if (mode & PERF_CGROUP_SWIN) {
+				WARN_ON_ONCE(cpuctx->cgrp);
+				/* set cgrp before ctxsw in to
+				 * allow event_filter_match() to not
+				 * have to pass task around
+				 */
+				cpuctx->cgrp = perf_cgroup_from_task(task);
+				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
+			}
+		}
+
+		perf_pmu_enable(cpuctx->ctx.pmu);
+	}
+
+	rcu_read_unlock();
+
+	local_irq_restore(flags);
+}
+
+inline void perf_cgroup_sched_out(struct task_struct *task)
+{
+	perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+}
+
+inline void perf_cgroup_sched_in(struct task_struct *task)
+{
+	perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+}
+
+static int __perf_cgroup_move(void *info)
+{
+	struct task_struct *task = info;
+	perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+	return 0;
+}
+
+static void perf_cgroup_move(struct task_struct *task)
+{
+	task_function_call(task, __perf_cgroup_move, task);
+}
+
+static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+		struct cgroup *old_cgrp, struct task_struct *task,
+		bool threadgroup)
+{
+	perf_cgroup_move(task);
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			perf_cgroup_move(c);
+		}
+		rcu_read_unlock();
+	}
+}
+
+static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
+		struct cgroup *old_cgrp, struct task_struct *task)
+{
+	/*
+	 * cgroup_exit() is called in the copy_process() failure path.
+	 * Ignore this case since the task hasn't ran yet, this avoids
+	 * trying to poke a half freed task state from generic code.
+	 */
+	if (!(task->flags & PF_EXITING))
+		return;
+
+	perf_cgroup_move(task);
+}
+
+static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup_subsys *ss,
+						      struct cgroup *cont)
+{
+	struct perf_cgroup *jc;
+
+	jc = kzalloc(sizeof(*jc), GFP_KERNEL);
+	if (!jc)
+		return ERR_PTR(-ENOMEM);
+
+	jc->info = alloc_percpu(struct perf_cgroup_info);
+	if (!jc->info) {
+		kfree(jc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return &jc->css;
+}
+
+static void perf_cgroup_destroy(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+	struct perf_cgroup *jc;
+	jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
+			  struct perf_cgroup, css);
+	free_percpu(jc->info);
+	kfree(jc);
+}
+
+struct cgroup_subsys perf_subsys = {
+	.name		= "perf_event",
+	.subsys_id	= perf_subsys_id,
+	.create		= perf_cgroup_create,
+	.destroy	= perf_cgroup_destroy,
+	.exit		= perf_cgroup_exit,
+	.attach		= perf_cgroup_attach,
+};
+
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0fc34a3..b65905f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -72,7 +72,7 @@ static void remote_function(void *data)
  *	    -ESRCH  - when the process isn't running
  *	    -EAGAIN - when the process moved away
  */
-static int
+int
 task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
@@ -115,12 +115,6 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
 		       PERF_FLAG_FD_OUTPUT  |\
 		       PERF_FLAG_PID_CGROUP)
 
-enum event_type_t {
-	EVENT_FLEXIBLE = 0x1,
-	EVENT_PINNED = 0x2,
-	EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
-};
-
 /*
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
@@ -132,7 +126,7 @@ static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 
-static LIST_HEAD(pmus);
+LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
 
@@ -172,15 +166,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
 
 static atomic64_t perf_event_id;
 
-static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
-			      enum event_type_t event_type);
-
-static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
-			     enum event_type_t event_type,
-			     struct task_struct *task);
-
 static void update_context_time(struct perf_event_context *ctx);
-static u64 perf_event_time(struct perf_event *event);
 
 void __weak perf_event_print_debug(void)	{ }
 
@@ -189,366 +175,6 @@ extern __weak const char *perf_pmu_name(void)
 	return "pmu";
 }
 
-static inline u64 perf_clock(void)
-{
-	return local_clock();
-}
-
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-#ifdef CONFIG_CGROUP_PERF
-
-/*
- * Must ensure cgroup is pinned (css_get) before calling
- * this function. In other words, we cannot call this function
- * if there is no cgroup event for the current CPU context.
- */
-static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
-{
-	return container_of(task_subsys_state(task, perf_subsys_id),
-			struct perf_cgroup, css);
-}
-
-static inline bool
-perf_cgroup_match(struct perf_event *event)
-{
-	struct perf_event_context *ctx = event->ctx;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-	return !event->cgrp || event->cgrp == cpuctx->cgrp;
-}
-
-static inline void perf_get_cgroup(struct perf_event *event)
-{
-	css_get(&event->cgrp->css);
-}
-
-static inline void perf_put_cgroup(struct perf_event *event)
-{
-	css_put(&event->cgrp->css);
-}
-
-static inline void perf_detach_cgroup(struct perf_event *event)
-{
-	perf_put_cgroup(event);
-	event->cgrp = NULL;
-}
-
-static inline int is_cgroup_event(struct perf_event *event)
-{
-	return event->cgrp != NULL;
-}
-
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
-{
-	struct perf_cgroup_info *t;
-
-	t = per_cpu_ptr(event->cgrp->info, event->cpu);
-	return t->time;
-}
-
-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
-{
-	struct perf_cgroup_info *info;
-	u64 now;
-
-	now = perf_clock();
-
-	info = this_cpu_ptr(cgrp->info);
-
-	info->time += now - info->timestamp;
-	info->timestamp = now;
-}
-
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
-{
-	struct perf_cgroup *cgrp_out = cpuctx->cgrp;
-	if (cgrp_out)
-		__update_cgrp_time(cgrp_out);
-}
-
-static inline void update_cgrp_time_from_event(struct perf_event *event)
-{
-	struct perf_cgroup *cgrp;
-
-	/*
-	 * ensure we access cgroup data only when needed and
-	 * when we know the cgroup is pinned (css_get)
-	 */
-	if (!is_cgroup_event(event))
-		return;
-
-	cgrp = perf_cgroup_from_task(current);
-	/*
-	 * Do not update time when cgroup is not active
-	 */
-	if (cgrp == event->cgrp)
-		__update_cgrp_time(event->cgrp);
-}
-
-static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
-			  struct perf_event_context *ctx)
-{
-	struct perf_cgroup *cgrp;
-	struct perf_cgroup_info *info;
-
-	/*
-	 * ctx->lock held by caller
-	 * ensure we do not access cgroup data
-	 * unless we have the cgroup pinned (css_get)
-	 */
-	if (!task || !ctx->nr_cgroups)
-		return;
-
-	cgrp = perf_cgroup_from_task(task);
-	info = this_cpu_ptr(cgrp->info);
-	info->timestamp = ctx->timestamp;
-}
-
-#define PERF_CGROUP_SWOUT	0x1 /* cgroup switch out every event */
-#define PERF_CGROUP_SWIN	0x2 /* cgroup switch in events based on task */
-
-/*
- * reschedule events based on the cgroup constraint of task.
- *
- * mode SWOUT : schedule out everything
- * mode SWIN : schedule in based on cgroup for next
- */
-void perf_cgroup_switch(struct task_struct *task, int mode)
-{
-	struct perf_cpu_context *cpuctx;
-	struct pmu *pmu;
-	unsigned long flags;
-
-	/*
-	 * disable interrupts to avoid geting nr_cgroup
-	 * changes via __perf_event_disable(). Also
-	 * avoids preemption.
-	 */
-	local_irq_save(flags);
-
-	/*
-	 * we reschedule only in the presence of cgroup
-	 * constrained events.
-	 */
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-
-		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-		perf_pmu_disable(cpuctx->ctx.pmu);
-
-		/*
-		 * perf_cgroup_events says at least one
-		 * context on this CPU has cgroup events.
-		 *
-		 * ctx->nr_cgroups reports the number of cgroup
-		 * events for a context.
-		 */
-		if (cpuctx->ctx.nr_cgroups > 0) {
-
-			if (mode & PERF_CGROUP_SWOUT) {
-				cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-				/*
-				 * must not be done before ctxswout due
-				 * to event_filter_match() in event_sched_out()
-				 */
-				cpuctx->cgrp = NULL;
-			}
-
-			if (mode & PERF_CGROUP_SWIN) {
-				WARN_ON_ONCE(cpuctx->cgrp);
-				/* set cgrp before ctxsw in to
-				 * allow event_filter_match() to not
-				 * have to pass task around
-				 */
-				cpuctx->cgrp = perf_cgroup_from_task(task);
-				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
-			}
-		}
-
-		perf_pmu_enable(cpuctx->ctx.pmu);
-	}
-
-	rcu_read_unlock();
-
-	local_irq_restore(flags);
-}
-
-static inline void perf_cgroup_sched_out(struct task_struct *task)
-{
-	perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
-}
-
-static inline void perf_cgroup_sched_in(struct task_struct *task)
-{
-	perf_cgroup_switch(task, PERF_CGROUP_SWIN);
-}
-
-static inline int perf_cgroup_connect(int fd, struct perf_event *event,
-				      struct perf_event_attr *attr,
-				      struct perf_event *group_leader)
-{
-	struct perf_cgroup *cgrp;
-	struct cgroup_subsys_state *css;
-	struct file *file;
-	int ret = 0, fput_needed;
-
-	file = fget_light(fd, &fput_needed);
-	if (!file)
-		return -EBADF;
-
-	css = cgroup_css_from_dir(file, perf_subsys_id);
-	if (IS_ERR(css)) {
-		ret = PTR_ERR(css);
-		goto out;
-	}
-
-	cgrp = container_of(css, struct perf_cgroup, css);
-	event->cgrp = cgrp;
-
-	/* must be done before we fput() the file */
-	perf_get_cgroup(event);
-
-	/*
-	 * all events in a group must monitor
-	 * the same cgroup because a task belongs
-	 * to only one perf cgroup at a time
-	 */
-	if (group_leader && group_leader->cgrp != cgrp) {
-		perf_detach_cgroup(event);
-		ret = -EINVAL;
-	}
-out:
-	fput_light(file, fput_needed);
-	return ret;
-}
-
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
-	struct perf_cgroup_info *t;
-	t = per_cpu_ptr(event->cgrp->info, event->cpu);
-	event->shadow_ctx_time = now - t->timestamp;
-}
-
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
-	/*
-	 * when the current task's perf cgroup does not match
-	 * the event's, we need to remember to call the
-	 * perf_mark_enable() function the first time a task with
-	 * a matching perf cgroup is scheduled in.
-	 */
-	if (is_cgroup_event(event) && !perf_cgroup_match(event))
-		event->cgrp_defer_enabled = 1;
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
-			 struct perf_event_context *ctx)
-{
-	struct perf_event *sub;
-	u64 tstamp = perf_event_time(event);
-
-	if (!event->cgrp_defer_enabled)
-		return;
-
-	event->cgrp_defer_enabled = 0;
-
-	event->tstamp_enabled = tstamp - event->total_time_enabled;
-	list_for_each_entry(sub, &event->sibling_list, group_entry) {
-		if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
-			sub->tstamp_enabled = tstamp - sub->total_time_enabled;
-			sub->cgrp_defer_enabled = 0;
-		}
-	}
-}
-#else /* !CONFIG_CGROUP_PERF */
-
-static inline bool
-perf_cgroup_match(struct perf_event *event)
-{
-	return true;
-}
-
-static inline void perf_detach_cgroup(struct perf_event *event)
-{}
-
-static inline int is_cgroup_event(struct perf_event *event)
-{
-	return 0;
-}
-
-static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
-{
-	return 0;
-}
-
-static inline void update_cgrp_time_from_event(struct perf_event *event)
-{
-}
-
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
-{
-}
-
-static inline void perf_cgroup_sched_out(struct task_struct *task)
-{
-}
-
-static inline void perf_cgroup_sched_in(struct task_struct *task)
-{
-}
-
-static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
-				      struct perf_event_attr *attr,
-				      struct perf_event *group_leader)
-{
-	return -EINVAL;
-}
-
-static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
-			  struct perf_event_context *ctx)
-{
-}
-
-void
-perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
-{
-}
-
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
-}
-
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
-{
-	return 0;
-}
-
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
-			 struct perf_event_context *ctx)
-{
-}
-#endif
-
 void perf_pmu_disable(struct pmu *pmu)
 {
 	int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -727,7 +353,7 @@ static void update_context_time(struct perf_event_context *ctx)
 	ctx->timestamp = now;
 }
 
-static u64 perf_event_time(struct perf_event *event)
+u64 perf_event_time(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 
@@ -1641,8 +1267,7 @@ static int __perf_event_enable(void *info)
 	__perf_event_mark_enabled(event, ctx);
 
 	if (!event_filter_match(event)) {
-		if (is_cgroup_event(event))
-			perf_cgroup_defer_enabled(event);
+		perf_cgroup_defer_enabled(event);
 		goto unlock;
 	}
 
@@ -1761,9 +1386,9 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
 	return 0;
 }
 
-static void ctx_sched_out(struct perf_event_context *ctx,
-			  struct perf_cpu_context *cpuctx,
-			  enum event_type_t event_type)
+void ctx_sched_out(struct perf_event_context *ctx,
+		   struct perf_cpu_context *cpuctx,
+		   enum event_type_t event_type)
 {
 	struct perf_event *event;
 
@@ -1988,15 +1613,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
 	cpuctx->task_ctx = NULL;
 }
 
-/*
- * Called with IRQs disabled
- */
-static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
-			      enum event_type_t event_type)
-{
-	ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
-}
-
 static void
 ctx_pinned_sched_in(struct perf_event_context *ctx,
 		    struct perf_cpu_context *cpuctx)
@@ -2056,11 +1672,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
 	}
 }
 
-static void
-ctx_sched_in(struct perf_event_context *ctx,
-	     struct perf_cpu_context *cpuctx,
-	     enum event_type_t event_type,
-	     struct task_struct *task)
+void ctx_sched_in(struct perf_event_context *ctx,
+		  struct perf_cpu_context *cpuctx,
+		  enum event_type_t event_type,
+		  struct task_struct *task)
 {
 	u64 now;
 
@@ -2087,15 +1702,6 @@ out:
 	raw_spin_unlock(&ctx->lock);
 }
 
-static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
-			     enum event_type_t event_type,
-			     struct task_struct *task)
-{
-	struct perf_event_context *ctx = &cpuctx->ctx;
-
-	ctx_sched_in(ctx, cpuctx, event_type, task);
-}
-
 static void task_ctx_sched_in(struct perf_event_context *ctx,
 			      enum event_type_t event_type)
 {
@@ -7373,83 +6979,3 @@ unlock:
 	return ret;
 }
 device_initcall(perf_event_sysfs_init);
-
-#ifdef CONFIG_CGROUP_PERF
-static struct cgroup_subsys_state *perf_cgroup_create(
-	struct cgroup_subsys *ss, struct cgroup *cont)
-{
-	struct perf_cgroup *jc;
-
-	jc = kzalloc(sizeof(*jc), GFP_KERNEL);
-	if (!jc)
-		return ERR_PTR(-ENOMEM);
-
-	jc->info = alloc_percpu(struct perf_cgroup_info);
-	if (!jc->info) {
-		kfree(jc);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return &jc->css;
-}
-
-static void perf_cgroup_destroy(struct cgroup_subsys *ss,
-				struct cgroup *cont)
-{
-	struct perf_cgroup *jc;
-	jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
-			  struct perf_cgroup, css);
-	free_percpu(jc->info);
-	kfree(jc);
-}
-
-static int __perf_cgroup_move(void *info)
-{
-	struct task_struct *task = info;
-	perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
-	return 0;
-}
-
-static void perf_cgroup_move(struct task_struct *task)
-{
-	task_function_call(task, __perf_cgroup_move, task);
-}
-
-static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		struct cgroup *old_cgrp, struct task_struct *task,
-		bool threadgroup)
-{
-	perf_cgroup_move(task);
-	if (threadgroup) {
-		struct task_struct *c;
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-			perf_cgroup_move(c);
-		}
-		rcu_read_unlock();
-	}
-}
-
-static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		struct cgroup *old_cgrp, struct task_struct *task)
-{
-	/*
-	 * cgroup_exit() is called in the copy_process() failure path.
-	 * Ignore this case since the task hasn't ran yet, this avoids
-	 * trying to poke a half freed task state from generic code.
-	 */
-	if (!(task->flags & PF_EXITING))
-		return;
-
-	perf_cgroup_move(task);
-}
-
-struct cgroup_subsys perf_subsys = {
-	.name		= "perf_event",
-	.subsys_id	= perf_subsys_id,
-	.create		= perf_cgroup_create,
-	.destroy	= perf_cgroup_destroy,
-	.exit		= perf_cgroup_exit,
-	.attach		= perf_cgroup_attach,
-};
-#endif /* CONFIG_CGROUP_PERF */
-- 
1.7.4.rc2


-- 
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

             reply	other threads:[~2011-05-11 15:49 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-11 12:11 Borislav Petkov [this message]
2011-05-11 13:46 ` [RFC PATCH] perf: Carve out cgroup-related code Peter Zijlstra
2011-05-11 14:00   ` Ingo Molnar
2011-05-11 14:02   ` Borislav Petkov
2011-05-11 14:13     ` Ingo Molnar
2011-05-11 14:31       ` Borislav Petkov
2011-05-11 15:21         ` Ingo Molnar
2011-05-11 17:09           ` Borislav Petkov
2011-05-12  8:51             ` Peter Zijlstra
2011-05-12  8:54               ` Peter Zijlstra
2011-05-12 10:18               ` Borislav Petkov
2011-05-12 14:31                 ` Frederic Weisbecker
2011-05-12 15:13                   ` Lin Ming
2011-05-14  8:44                   ` Borislav Petkov
2011-05-14 13:02                     ` Frederic Weisbecker
2011-05-12 10:36             ` Ingo Molnar
2011-05-12 10:54               ` Borislav Petkov
2011-05-12 11:03                 ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110511121135.GA25865@aftab \
    --to=bp@amd64.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.