All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Carrillo-Cisneros <davidcc@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Ingo Molnar <mingo@redhat.com>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>,
	Matt Fleming <matt@codeblueprint.co.uk>,
	Tony Luck <tony.luck@intel.com>,
	Stephane Eranian <eranian@google.com>,
	Paul Turner <pjt@google.com>,
	David Carrillo-Cisneros <davidcc@google.com>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v2 16/32] perf/x86/intel/cqm: add cgroup support
Date: Wed, 11 May 2016 16:02:16 -0700	[thread overview]
Message-ID: <1463007752-116802-17-git-send-email-davidcc@google.com> (raw)
In-Reply-To: <1463007752-116802-1-git-send-email-davidcc@google.com>

Create a monr per monitored cgroup. Inserts monrs in the monr hierarchy.
Task events are leaves of the lowest monitored ancestor cgroup (the lowest
cgroup ancestor with a monr).

CQM starts after the cgroup subsystem, and uses the cqm_initialized_key
static key to avoid interfering with the perf cgroup logic until
propertly initialized. The cgroup_init_mutex protects the initialization.

Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
---
 arch/x86/events/intel/cqm.c       | 595 +++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cqm.h       |  16 +
 arch/x86/include/asm/perf_event.h |  32 ++
 3 files changed, 639 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 0771154..fb62bac 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -89,6 +89,13 @@ struct monr *monr_hrchy_root;
 
 struct pkg_data **cqm_pkgs_data;
 
+/*
+ * Synchronizes initialization of cqm with cgroups.
+ */
+static DEFINE_MUTEX(cqm_init_mutex);
+
+DEFINE_STATIC_KEY_FALSE(cqm_initialized_key);
+
 static inline bool __pmonr__in_astate(struct pmonr *pmonr)
 {
 	lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
@@ -119,6 +126,9 @@ static inline bool __pmonr__in_instate(struct pmonr *pmonr)
 	return __pmonr__in_istate(pmonr) && !__pmonr__in_ilstate(pmonr);
 }
 
+/* Whether the monr is root. Recall that the cgroups can not be root and yet
+ * point to a root monr.
+ */
 static inline bool monr__is_root(struct monr *monr)
 {
 	return monr_hrchy_root == monr;
@@ -165,6 +175,19 @@ static inline void __monr__clear_mon_active(struct monr *monr)
 	monr->flags &= ~MONR_MON_ACTIVE;
 }
 
+static inline bool monr_is_event_type(struct monr *monr)
+{
+	return !monr->mon_cgrp && monr->mon_event_group;
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static inline struct cgroup_subsys_state *get_root_perf_css(void)
+{
+	/* Get css for root cgroup */
+	return  init_css_set.subsys[perf_event_cgrp_id];
+}
+#endif
+
 static inline bool __valid_pkg_id(u16 pkg_id)
 {
 	return pkg_id < topology_max_packages();
@@ -706,6 +729,7 @@ static struct monr *monr_alloc(void)
 	monr->parent = NULL;
 	INIT_LIST_HEAD(&monr->children);
 	INIT_LIST_HEAD(&monr->parent_entry);
+	monr->mon_cgrp = NULL;
 	monr->mon_event_group = NULL;
 
 	monr->pmonrs = kmalloc(
@@ -934,7 +958,7 @@ retry:
 }
 
 /*
- * Wrappers for monr manipulation in events.
+ * Wrappers for monr manipulation in events and cgroups.
  *
  */
 static inline struct monr *monr_from_event(struct perf_event *event)
@@ -947,6 +971,100 @@ static inline void event_set_monr(struct perf_event *event, struct monr *monr)
 	WRITE_ONCE(event->hw.cqm_monr, monr);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline struct monr *monr_from_perf_cgroup(struct perf_cgroup *cgrp)
+{
+	struct monr *monr;
+	struct cgrp_cqm_info *cqm_info;
+
+	cqm_info = (struct cgrp_cqm_info *)READ_ONCE(cgrp->arch_info);
+	WARN_ON_ONCE(!cqm_info);
+	monr = READ_ONCE(cqm_info->monr);
+	return monr;
+}
+
+static inline struct perf_cgroup *monr__get_mon_cgrp(struct monr *monr)
+{
+	WARN_ON_ONCE(!monr);
+	return READ_ONCE(monr->mon_cgrp);
+}
+
+static inline void
+monr__set_mon_cgrp(struct monr *monr, struct perf_cgroup *cgrp)
+{
+	WRITE_ONCE(monr->mon_cgrp, cgrp);
+}
+
+static inline void
+perf_cgroup_set_monr(struct perf_cgroup *cgrp, struct monr *monr)
+{
+	WRITE_ONCE(cgrp_to_cqm_info(cgrp)->monr, monr);
+}
+
+/*
+ * A perf_cgroup is monitored when it's set in a monr->mon_cgrp.
+ * There is a many-to-one relationship between perf_cgroup's monrs
+ * and monrs' mon_cgrp. A monitored cgroup is necesarily referenced
+ * back by its monr's mon_cgrp.
+ */
+static inline bool perf_cgroup_is_monitored(struct perf_cgroup *cgrp)
+{
+	struct monr *monr;
+	struct perf_cgroup *monr_cgrp;
+
+	/* monr can be referenced by a cgroup other than the one in its
+	 * mon_cgrp, be careful.
+	 */
+	monr = monr_from_perf_cgroup(cgrp);
+
+	monr_cgrp = monr__get_mon_cgrp(monr);
+	/* Root monr do not have a cgroup associated before initialization.
+	 * mon_cgrp and mon_event_group are union, so the pointer must be set
+	 * for all non-root monrs.
+	 */
+	return  monr_cgrp && monr__get_mon_cgrp(monr) == cgrp;
+}
+
+/* Set css's monr to the monr of its lowest monitored ancestor. */
+static inline void __css_set_monr_to_lma(struct cgroup_subsys_state *css)
+{
+	lockdep_assert_held(&cqm_mutex);
+	if (!css->parent) {
+		perf_cgroup_set_monr(css_to_perf_cgroup(css), monr_hrchy_root);
+		return;
+	}
+	perf_cgroup_set_monr(
+		css_to_perf_cgroup(css),
+		monr_from_perf_cgroup(css_to_perf_cgroup(css->parent)));
+}
+
+static inline void
+perf_cgroup_make_monitored(struct perf_cgroup *cgrp, struct monr *monr)
+{
+	monr_hrchy_assert_held_mutexes();
+	perf_cgroup_set_monr(cgrp, monr);
+	/* Make sure that monr is a valid monr for css before it's visible
+	 * to any reader of css.
+	 */
+	smp_wmb();
+	monr__set_mon_cgrp(monr, cgrp);
+}
+
+static inline void
+perf_cgroup_make_unmonitored(struct perf_cgroup *cgrp)
+{
+	struct monr *monr = monr_from_perf_cgroup(cgrp);
+
+	monr_hrchy_assert_held_mutexes();
+	__css_set_monr_to_lma(&cgrp->css);
+	/* Make sure that all readers of css'monr see lma css before
+	 * monr stops being a valid monr for css.
+	 */
+	smp_wmb();
+	monr__set_mon_cgrp(monr, NULL);
+}
+#endif
+
 /*
  * Always finds a rmid_entry to schedule. To be called during scheduler.
  * A fast path that only uses read_lock for common case when rmid for current
@@ -1055,6 +1173,286 @@ __monr_hrchy_remove_leaf(struct monr *monr)
 	monr->parent = NULL;
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static struct perf_cgroup *__perf_cgroup_parent(struct perf_cgroup *cgrp)
+{
+	struct cgroup_subsys_state *parent_css = cgrp->css.parent;
+
+	if (parent_css)
+		return css_to_perf_cgroup(parent_css);
+	return NULL;
+}
+
+/* Get cgroup for both task and cgroup event. */
+static inline struct perf_cgroup *
+perf_cgroup_from_event(struct perf_event *event)
+{
+#ifdef CONFIG_LOCKDEP
+	u16 pkg_id = topology_physical_package_id(smp_processor_id());
+	bool rcu_safe = lockdep_is_held(
+		&cqm_pkgs_data[pkg_id]->pkg_data_lock);
+#endif
+
+	if (!(event->attach_state & PERF_ATTACH_TASK))
+		return event->cgrp;
+
+	return container_of(
+		task_css_check(event->hw.target, perf_event_cgrp_id, rcu_safe),
+		struct perf_cgroup, css);
+}
+
+/* Find lowest ancestor that is monitored, not including this cgrp.
+ * Return NULL if no ancestor is monitored.
+ */
+struct perf_cgroup *__cgroup_find_lma(struct perf_cgroup *cgrp)
+{
+	do {
+		cgrp = __perf_cgroup_parent(cgrp);
+	} while (cgrp && !perf_cgroup_is_monitored(cgrp));
+	return cgrp;
+}
+
+/* Similar to css_next_descendant_pre but skips the subtree rooted by pos. */
+struct cgroup_subsys_state *
+css_skip_subtree_pre(struct cgroup_subsys_state *pos,
+		     struct cgroup_subsys_state *root)
+{
+	struct cgroup_subsys_state *next;
+
+	WARN_ON_ONCE(!pos);
+	while (pos != root) {
+		next = css_next_child(pos, pos->parent);
+		if (next)
+			return next;
+		pos = pos->parent;
+	}
+	return NULL;
+}
+
+/* Make all monrs of css descendants of css to depend on new_monr. */
+inline void __css_subtree_update_monrs(struct cgroup_subsys_state *css,
+				       struct monr *new_monr)
+{
+	struct cgroup_subsys_state *pos_css;
+	int i;
+	unsigned long flags;
+
+	lockdep_assert_held(&cqm_mutex);
+	monr_hrchy_assert_held_mutexes();
+
+	rcu_read_lock();
+
+	/* Iterate over descendants of css in pre-order, in a way
+	 * similar to css_for_each_descendant_pre, but skipping the subtrees
+	 * rooted by css's with a monitored cgroup, since the elements
+	 * in those subtrees do not need to be updated.
+	 */
+	pos_css = css_next_descendant_pre(css, css);
+	while (pos_css) {
+		struct perf_cgroup *pos_cgrp = css_to_perf_cgroup(pos_css);
+		struct monr *pos_monr = monr_from_perf_cgroup(pos_cgrp);
+
+		/* Skip css that are not online, sync'ed with cqm_mutex. */
+		if (!(pos_css->flags & CSS_ONLINE)) {
+			pos_css = css_next_descendant_pre(pos_css, css);
+			continue;
+		}
+		/* Update descendant pos's mnor pointers to monr_parent. */
+		if (!perf_cgroup_is_monitored(pos_cgrp)) {
+			perf_cgroup_set_monr(pos_cgrp, new_monr);
+			pos_css = css_next_descendant_pre(pos_css, css);
+			continue;
+		}
+		monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+		pos_monr->parent = new_monr;
+		list_move_tail(&pos_monr->parent_entry, &new_monr->children);
+		monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+		/* Dont go down the subtree in pos_css since pos_monr is the
+		 * lma for all its descendants.
+		 */
+		pos_css = css_skip_subtree_pre(pos_css, css);
+	}
+	rcu_read_unlock();
+}
+
+static inline int __css_start_monitoring(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp, *cgrp_lma, *pos_cgrp;
+	struct monr *monr, *monr_parent, *pos_monr, *tmp_monr;
+	unsigned long flags;
+	int i;
+
+	lockdep_assert_held(&cqm_mutex);
+
+	/* Hold mutexes to prevent all rotation threads in all packages from
+	 * messing with this.
+	 */
+	monr_hrchy_acquire_mutexes();
+	cgrp = css_to_perf_cgroup(css);
+	if (WARN_ON_ONCE(perf_cgroup_is_monitored(cgrp)))
+		return -1;
+
+	/* When css is root cgroup's css, attach to the pre-existing
+	 * and active root monr.
+	 */
+	cgrp_lma = __cgroup_find_lma(cgrp);
+	if (!cgrp_lma) {
+		/* monr of root cgrp must be monr_hrchy_root. */
+		WARN_ON_ONCE(!monr__is_root(monr_from_perf_cgroup(cgrp)));
+		perf_cgroup_make_monitored(cgrp, monr_hrchy_root);
+		monr_hrchy_release_mutexes();
+		return 0;
+	}
+	/* The monr for the lowest monitored ancestor is direct ancestor
+	 * of monr in the monr hierarchy.
+	 */
+	monr_parent = monr_from_perf_cgroup(cgrp_lma);
+
+	/* Create new monr. */
+	monr = monr_alloc();
+	if (IS_ERR(monr)) {
+		monr_hrchy_release_mutexes();
+		return PTR_ERR(monr);
+	}
+
+	/* monr has no children yet so it is to be inserted in hierarchy with
+	 * all its pmors in (U)state.
+	 * We hold locks until monr_hrchy changes are complete, to prevent
+	 * possible state transition for the pmonrs in monr while still
+	 * allowing to read the prmid_summary in the scheduler path.
+	 */
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+	__monr_hrchy_insert_leaf(monr, monr_parent);
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	/* Make sure monr is in hierarchy before attaching monr to cgroup. */
+	barrier();
+
+	perf_cgroup_make_monitored(cgrp, monr);
+	__css_subtree_update_monrs(css, monr);
+
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+	/* Move task-event monrs that are descendant from css's cgroup. */
+	list_for_each_entry_safe(pos_monr, tmp_monr,
+				 &monr_parent->children, parent_entry) {
+		if (!monr_is_event_type(pos_monr))
+			continue;
+		/* all events in event group must have the same cgroup.
+		 * No RCU read lock necessary for task_css_check since calling
+		 * inside critical section.
+		 */
+		pos_cgrp = perf_cgroup_from_event(pos_monr->mon_event_group);
+		if (!cgroup_is_descendant(pos_cgrp->css.cgroup,
+					  cgrp->css.cgroup))
+			continue;
+		pos_monr->parent = monr;
+		list_move_tail(&pos_monr->parent_entry, &monr->children);
+	}
+	/* Make sure monitoring starts after all monrs have moved. */
+	barrier();
+
+	__monr__set_mon_active(monr);
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	monr_hrchy_release_mutexes();
+	return 0;
+}
+
+static inline int __css_stop_monitoring(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp, *cgrp_lma;
+	struct monr *monr, *monr_parent, *pos_monr;
+	unsigned long flags;
+	int i;
+
+	lockdep_assert_held(&cqm_mutex);
+
+	monr_hrchy_acquire_mutexes();
+	cgrp = css_to_perf_cgroup(css);
+	if (WARN_ON_ONCE(!perf_cgroup_is_monitored(cgrp)))
+		return -1;
+
+	monr = monr_from_perf_cgroup(cgrp);
+
+	/* When css is root cgroup's css, detach cgroup but do not
+	 * destroy monr.
+	 */
+	cgrp_lma = __cgroup_find_lma(cgrp);
+	if (!cgrp_lma) {
+		/* monr of root cgrp must be monr_hrchy_root. */
+		WARN_ON_ONCE(!monr__is_root(monr_from_perf_cgroup(cgrp)));
+		perf_cgroup_make_unmonitored(cgrp);
+		monr_hrchy_release_mutexes();
+		return 0;
+	}
+	/* The monr for the lowest monitored ancestor is direct ancestor
+	 * of monr in the monr hierarchy.
+	 */
+	monr_parent = monr_from_perf_cgroup(cgrp_lma);
+
+	/* Lock together the transition to (U)state and clearing
+	 * MONR_MON_ACTIVE to prevent prmids to return to (A)state
+	 * or (I)state in between.
+	 */
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+	cqm_pkg_id_for_each_online(i)
+		__pmonr__to_ustate(monr->pmonrs[i]);
+	barrier();
+	__monr__clear_mon_active(monr);
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	__css_subtree_update_monrs(css, monr_parent);
+
+
+	/*
+	 * Move the children monrs that are no cgroups.
+	 */
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+
+	list_for_each_entry(pos_monr, &monr->children, parent_entry)
+		pos_monr->parent = monr_parent;
+	list_splice_tail_init(&monr->children, &monr_parent->children);
+	perf_cgroup_make_unmonitored(cgrp);
+	__monr_hrchy_remove_leaf(monr);
+
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	monr_hrchy_release_mutexes();
+	monr_dealloc(monr);
+	return 0;
+}
+
+/* Attaching an event to a cgroup starts monitoring in the cgroup.
+ * If the cgroup is already monitoring, just use its pre-existing mnor.
+ */
+static int __monr_hrchy_attach_cgroup_event(struct perf_event *event,
+					    struct perf_cgroup *perf_cgrp)
+{
+	struct monr *monr;
+	int ret;
+
+	lockdep_assert_held(&cqm_mutex);
+	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_TASK);
+	WARN_ON_ONCE(monr_from_event(event));
+	WARN_ON_ONCE(!perf_cgrp);
+
+	if (!perf_cgroup_is_monitored(perf_cgrp)) {
+		css_get(&perf_cgrp->css);
+		ret = __css_start_monitoring(&perf_cgrp->css);
+		css_put(&perf_cgrp->css);
+		if (ret)
+			return ret;
+	}
+
+	/* At this point, cgrp is always monitored, use its monr. */
+	monr = monr_from_perf_cgroup(perf_cgrp);
+
+	event_set_monr(event, monr);
+	monr->mon_event_group = event;
+	return 0;
+}
+#endif
+
 static int __monr_hrchy_attach_cpu_event(struct perf_event *event)
 {
 	lockdep_assert_held(&cqm_mutex);
@@ -1096,12 +1494,30 @@ static int __monr_hrchy_attach_task_event(struct perf_event *event,
 static int monr_hrchy_attach_event(struct perf_event *event)
 {
 	struct monr *monr_parent;
+	bool has_cgrp = false;
+#ifdef CONFIG_CGROUP_PERF
+	struct perf_cgroup *perf_cgrp;
+
+	has_cgrp = event->cgrp;
+#endif
 
-	if (!event->cgrp && !(event->attach_state & PERF_ATTACH_TASK))
+	if (!has_cgrp && !(event->attach_state & PERF_ATTACH_TASK))
 		return __monr_hrchy_attach_cpu_event(event);
 
+#ifdef CONFIG_CGROUP_PERF
+	/* Task events become leaves, cgroup events reuse the cgroup's monr */
+	if (event->cgrp)
+		return __monr_hrchy_attach_cgroup_event(event, event->cgrp);
+
+	rcu_read_lock();
+	perf_cgrp = perf_cgroup_from_event(event);
+	rcu_read_unlock();
+
+	monr_parent = monr_from_perf_cgroup(perf_cgrp);
+#else
 	/* Two-levels hierarchy: Root and all event monr underneath it. */
 	monr_parent = monr_hrchy_root;
+#endif
 	return __monr_hrchy_attach_task_event(event, monr_parent);
 }
 
@@ -1113,7 +1529,7 @@ static int monr_hrchy_attach_event(struct perf_event *event)
  */
 static bool __match_event(struct perf_event *a, struct perf_event *b)
 {
-	/* Per-cpu and task events don't mix */
+	/* Cgroup/non-task per-cpu and task events don't mix */
 	if ((a->attach_state & PERF_ATTACH_TASK) !=
 	    (b->attach_state & PERF_ATTACH_TASK))
 		return false;
@@ -2171,6 +2587,129 @@ static struct pmu intel_cqm_pmu = {
 	.read		     = intel_cqm_event_read,
 };
 
+#ifdef CONFIG_CGROUP_PERF
+/* XXX: Add hooks for attach dettach task with monr to a cgroup. */
+int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+				      struct cgroup_subsys_state *new_css)
+{
+	struct perf_cgroup *new_cgrp;
+	struct cgrp_cqm_info *cqm_info;
+
+	new_cgrp = css_to_perf_cgroup(new_css);
+	cqm_info = kmalloc(sizeof(struct cgrp_cqm_info), GFP_KERNEL);
+	if (!cqm_info)
+		return -ENOMEM;
+	cqm_info->cont_monitoring = false;
+	cqm_info->monr = NULL;
+	new_cgrp->arch_info = cqm_info;
+
+	return 0;
+}
+
+void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp = css_to_perf_cgroup(css);
+
+	kfree(cgrp_to_cqm_info(cgrp));
+	cgrp->arch_info = NULL;
+}
+
+/* Do the bulk of arch_css_online. To be called when CQM starts after
+ * css has gone online.
+ */
+static inline int __css_go_online(struct cgroup_subsys_state *css)
+{
+	lockdep_assert_held(&cqm_mutex);
+
+	/* css must not be used in monr hierarchy before having
+	 * set its monr in this step.
+	 */
+	__css_set_monr_to_lma(css);
+	/* Root monr is always monitoring. */
+	if (!css->parent)
+		css_to_cqm_info(css)->cont_monitoring = true;
+
+	if (css_to_cqm_info(css)->cont_monitoring)
+		return __css_start_monitoring(css);
+	return 0;
+}
+
+int perf_cgroup_arch_css_online(struct cgroup_subsys_state *css)
+{
+	int ret = 0;
+
+	/* use cqm_init_mutex to synchronize with
+	 * __start_monitoring_all_cgroups.
+	 */
+	mutex_lock(&cqm_init_mutex);
+
+	if (static_branch_unlikely(&cqm_initialized_key)) {
+		mutex_lock(&cqm_mutex);
+		ret = __css_go_online(css);
+		mutex_unlock(&cqm_mutex);
+		WARN_ON_ONCE(ret);
+	}
+
+	mutex_unlock(&cqm_init_mutex);
+	return ret;
+}
+
+void perf_cgroup_arch_css_offline(struct cgroup_subsys_state *css)
+{
+	int ret = 0;
+	struct monr *monr;
+	struct perf_cgroup *cgrp = css_to_perf_cgroup(css);
+
+	mutex_lock(&cqm_init_mutex);
+
+	if (!static_branch_unlikely(&cqm_initialized_key))
+		goto out;
+
+	mutex_lock(&cqm_mutex);
+
+	monr = monr_from_perf_cgroup(cgrp);
+	if (!perf_cgroup_is_monitored(cgrp))
+		goto out_cqm;
+
+	/* Stop monitoring for the css's monr only if no more events need it.
+	 * If events need the monr, it will be destroyed when the events that
+	 * use it are destroyed.
+	 */
+	if (monr->mon_event_group) {
+		monr_hrchy_acquire_mutexes();
+		perf_cgroup_make_unmonitored(cgrp);
+		monr_hrchy_release_mutexes();
+	} else {
+		ret = __css_stop_monitoring(css);
+		WARN_ON_ONCE(ret);
+	}
+
+out_cqm:
+	mutex_unlock(&cqm_mutex);
+out:
+	mutex_unlock(&cqm_init_mutex);
+	WARN_ON_ONCE(ret);
+}
+
+void perf_cgroup_arch_css_released(struct cgroup_subsys_state *css)
+{
+	mutex_lock(&cqm_init_mutex);
+
+	if (static_branch_unlikely(&cqm_initialized_key)) {
+		mutex_lock(&cqm_mutex);
+		/*
+		 * Remove css from monr hierarchy now that css is about to
+		 * leave the cgroup hierarchy.
+		 */
+		perf_cgroup_set_monr(css_to_perf_cgroup(css), NULL);
+		mutex_unlock(&cqm_mutex);
+	}
+
+	mutex_unlock(&cqm_init_mutex);
+}
+
+#endif
+
 static inline void cqm_pick_event_reader(int cpu)
 {
 	u16 pkg_id = topology_physical_package_id(cpu);
@@ -2235,6 +2774,39 @@ static const struct x86_cpu_id intel_cqm_match[] = {
 	{}
 };
 
+#ifdef CONFIG_CGROUP_PERF
+/* Start monitoring for all cgroups in cgroup hierarchy. */
+static int __start_monitoring_all_cgroups(void)
+{
+	int ret;
+	struct cgroup_subsys_state *css, *css_root;
+
+	lockdep_assert_held(&cqm_init_mutex);
+
+	rcu_read_lock();
+	/* Get css for root cgroup */
+	css_root =  get_root_perf_css();
+
+	css_for_each_descendant_pre(css, css_root) {
+		if (!css_tryget_online(css))
+			continue;
+
+		rcu_read_unlock();
+		mutex_lock(&cqm_mutex);
+		ret = __css_go_online(css);
+		mutex_unlock(&cqm_mutex);
+
+		css_put(css);
+		if (ret)
+			return ret;
+
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+	return 0;
+}
+#endif
+
 static int __init intel_cqm_init(void)
 {
 	char *str, scale[20];
@@ -2316,17 +2888,32 @@ static int __init intel_cqm_init(void)
 
 	__perf_cpu_notifier(intel_cqm_cpu_notifier);
 
+	/* Use cqm_init_mutex to synchronize with css's online/offline. */
+	mutex_lock(&cqm_init_mutex);
+
+#ifdef CONFIG_CGROUP_PERF
+	ret = __start_monitoring_all_cgroups();
+	if (ret)
+		goto error_init_mutex;
+#endif
+
 	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
 	if (ret)
-		goto error;
+		goto error_init_mutex;
 
 	cpu_notifier_register_done();
 
+	static_branch_enable(&cqm_initialized_key);
+
+	mutex_unlock(&cqm_init_mutex);
+
 	pr_info("Intel CQM monitoring enabled with at least %u rmids per package.\n",
 		min_max_rmid + 1);
 
 	return ret;
 
+error_init_mutex:
+	mutex_unlock(&cqm_init_mutex);
 error:
 	pr_err("Intel CQM perf registration failed: %d\n", ret);
 	cpu_notifier_register_done();
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index 0467c52..a66fe02 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -316,6 +316,7 @@ struct pkg_data {
  * struct monr: MONitored Resource.
  * @flags:		Flags field for monr (XXX: More flags will be added
  *			with MBM).
+ * @mon_cgrp:		The cgroup associated with this monr, if any
  * @mon_event_group:	The head of event's group that use this monr, if any.
  * @parent:		Parent in monr hierarchy.
  * @children:		List of children in monr hierarchy.
@@ -336,6 +337,7 @@ struct pkg_data {
 struct monr {
 	u16				flags;
 	/* Back reference pointers */
+	struct perf_cgroup		*mon_cgrp;
 	struct perf_event		*mon_event_group;
 
 	struct monr			*parent;
@@ -514,3 +516,17 @@ static unsigned int __cqm_min_progress_rate = CQM_DEFAULT_MIN_PROGRESS_RATE;
  * It's units are bytes must be scaled by cqm_l3_scale to obtain cache lines.
  */
 static unsigned int __intel_cqm_max_threshold;
+
+#ifdef CONFIG_CGROUP_PERF
+
+struct cgrp_cqm_info {
+	/* Should the cgroup be continuously monitored? */
+	bool		cont_monitoring;
+	struct monr	*monr;
+};
+
+# define css_to_perf_cgroup(css_) container_of(css_, struct perf_cgroup, css)
+# define cgrp_to_cqm_info(cgrp_) ((struct cgrp_cqm_info *)cgrp_->arch_info)
+# define css_to_cqm_info(css_) cgrp_to_cqm_info(css_to_perf_cgroup(css_))
+
+#endif
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f353061..2246443 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -299,4 +299,36 @@ static inline void perf_check_microcode(void) { }
 
 #define arch_perf_out_copy_user copy_from_user_nmi
 
+
+/*
+ * Hooks for architecture specific features of perf_event cgroup.
+ * Currently used by Intel's CQM.
+ */
+#ifdef CONFIG_CGROUP_PERF
+#ifdef CONFIG_INTEL_RDT
+
+#define perf_cgroup_arch_css_alloc \
+	perf_cgroup_arch_css_alloc
+int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+				      struct cgroup_subsys_state *new_css);
+
+#define perf_cgroup_arch_css_online \
+	perf_cgroup_arch_css_online
+int perf_cgroup_arch_css_online(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_css_offline \
+	perf_cgroup_arch_css_offline
+void perf_cgroup_arch_css_offline(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_css_released \
+	perf_cgroup_arch_css_released
+void perf_cgroup_arch_css_released(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_css_free \
+	perf_cgroup_arch_css_free
+void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css);
+
+#endif
+#endif
+
 #endif /* _ASM_X86_PERF_EVENT_H */
-- 
2.8.0.rc3.226.g39d4020

  parent reply	other threads:[~2016-05-11 23:07 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-11 23:02 [PATCH v2 00/32] 2nd Iteration of Cache QoS Monitoring support David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 01/32] perf/x86/intel/cqm: remove previous version of CQM and MBM David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 02/32] perf/x86/intel/cqm: software cache for MSR_IA32_PQR_ASSOC David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 03/32] x86/intel,cqm: add CONFIG_INTEL_RDT configuration flag David Carrillo-Cisneros
2016-05-18 17:30   ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 04/32] perf/x86/intel/cqm: add constants for CQM David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 05/32] perf/x86/intel/cqm: encapsulate per-package RMIDs David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 06/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks David Carrillo-Cisneros
2016-05-18 16:08   ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 07/32] perf/x86/intel/cqm: add helpers for per-package locking David Carrillo-Cisneros
2016-05-18 17:35   ` Thomas Gleixner
2016-05-18 19:09     ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 08/32] perf/x86/intel/cqm: add pmu sysfs attribute David Carrillo-Cisneros
2016-05-18 17:38   ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 09/32] perf/x86/intel/cqm: basic RMID hierarchy with per package RMIDs David Carrillo-Cisneros
2016-05-18 19:51   ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 10/32] perf/x86/intel/cqm: introduce (I)state and limbo prmids David Carrillo-Cisneros
2016-05-18 20:36   ` Thomas Gleixner
2016-05-25  0:52     ` David Carrillo-Cisneros
2016-05-25  8:51       ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 11/32] perf/x86/intel/cqm: add per-package RMID rotation David Carrillo-Cisneros
2016-05-18 21:37   ` Thomas Gleixner
2016-05-24 21:01     ` David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 12/32] perf/x86/intel/cqm: schedule work for rotation task David Carrillo-Cisneros
2016-05-18 20:41   ` Thomas Gleixner
2016-05-11 23:02 ` [PATCH v2 13/32] perf/x86/intel/cqm: add polled update of RMID's llc_occupancy David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 14/32] perf/x86/intel/cqm: add preallocation of anodes David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 15/32] perf/core: add hooks to expose architecture specific features in perf_cgroup David Carrillo-Cisneros
2016-05-11 23:02 ` David Carrillo-Cisneros [this message]
2016-05-11 23:02 ` [PATCH v2 17/32] perf/core,perf/x86/intel/cqm: add pmu::event_terminate David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 18/32] perf/core: introduce PMU event flag PERF_CGROUP_NO_RECURSION David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 19/32] x86/intel/cqm: use PERF_CGROUP_NO_RECURSION in CQM David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 20/32] perf/x86/intel/cqm: handle inherit event and inherit_stat flag David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 21/32] perf/x86/intel/cqm: introduce read_subtree David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 22/32] perf/core: introduce PERF_INACTIVE_*_READ_* flags David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 23/32] perf/x86/intel/cqm: use PERF_INACTIVE_*_READ_* flags in CQM David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 24/32] sched: introduce the finish_arch_pre_lock_switch() scheduler hook David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 25/32] perf/x86/intel/cqm: integrate CQM cgroups with scheduler David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 26/32] perf/x86/intel/cqm: make one write of PQR_ASSOC per ctx switch David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 27/32] perf/core: add perf_event cgroup hooks for subsystem attributes David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 28/32] perf/x86/intel/cqm: add CQM attributes to perf_event cgroup David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 29/32] perf,perf/x86,perf/powerpc,perf/arm,perf/*: add int error return to pmu::read David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 30/32] perf,perf/x86: add hook perf_event_arch_exec David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 31/32] perf/stat: fix bug in handling events in error state David Carrillo-Cisneros
2016-05-11 23:02 ` [PATCH v2 32/32] perf/stat: revamp read error handling, snapshot and per_pkg events David Carrillo-Cisneros

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1463007752-116802-17-git-send-email-davidcc@google.com \
    --to=davidcc@google.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt@codeblueprint.co.uk \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.