All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Carrillo-Cisneros <davidcc@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Ingo Molnar <mingo@redhat.com>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>,
	Matt Fleming <matt.fleming@intel.com>,
	Tony Luck <tony.luck@intel.com>,
	Stephane Eranian <eranian@google.com>,
	Paul Turner <pjt@google.com>,
	David Carrillo-Cisneros <davidcc@google.com>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 16/32] perf/x86/intel/cqm: add cgroup support
Date: Thu, 28 Apr 2016 21:43:22 -0700	[thread overview]
Message-ID: <1461905018-86355-17-git-send-email-davidcc@google.com> (raw)
In-Reply-To: <1461905018-86355-1-git-send-email-davidcc@google.com>

Create a monr per monitored cgroup. Inserts monrs in the monr hierarchy.
Task events are leaves of the lowest monitored ancestor cgroup (the lowest
cgroup ancestor with a monr).

CQM starts after the cgroup subsystem, and uses the cqm_initialized_key
static key to avoid interfering with the perf cgroup logic until
propertly initialized. The cgroup_init_mutex protects the initialization.

Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
---
 arch/x86/events/intel/cqm.c       | 594 +++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cqm.h       |  13 +
 arch/x86/include/asm/perf_event.h |  33 +++
 3 files changed, 637 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 98a919f..f000fd0 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -35,10 +35,17 @@ static struct perf_pmu_events_attr event_attr_##v = {				\
 static LIST_HEAD(cache_groups);
 static DEFINE_MUTEX(cqm_mutex);
 
+/*
+ * Synchronizes initialization of cqm with cgroups.
+ */
+static DEFINE_MUTEX(cqm_init_mutex);
+
 struct monr *monr_hrchy_root;
 
 struct pkg_data *cqm_pkgs_data[PQR_MAX_NR_PKGS];
 
+DEFINE_STATIC_KEY_FALSE(cqm_initialized_key);
+
 static inline bool __pmonr__in_istate(struct pmonr *pmonr)
 {
 	lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
@@ -69,6 +76,9 @@ static inline bool __pmonr__in_ustate(struct pmonr *pmonr)
 	return !pmonr->prmid && !pmonr->ancestor_pmonr;
 }
 
+/* Whether the monr is root. Recall that the cgroups can not be root and yet
+ * point to a root monr.
+ */
 static inline bool monr__is_root(struct monr *monr)
 {
 	return monr_hrchy_root == monr;
@@ -115,6 +125,23 @@ static inline void __monr__clear_mon_active(struct monr *monr)
 	monr->flags &= ~MONR_MON_ACTIVE;
 }
 
+static inline bool monr__is_cgroup_type(struct monr *monr)
+{
+	return monr->mon_cgrp;
+}
+
+static inline bool monr_is_event_type(struct monr *monr)
+{
+	return !monr->mon_cgrp && monr->mon_event_group;
+}
+
+
+static inline struct cgroup_subsys_state *get_root_perf_css(void)
+{
+	/* Get css for root cgroup */
+	return  init_css_set.subsys[perf_event_cgrp_id];
+}
+
 /*
  * Update if enough time has passed since last read.
  *
@@ -725,6 +752,7 @@ static struct monr *monr_alloc(void)
 	monr->parent = NULL;
 	INIT_LIST_HEAD(&monr->children);
 	INIT_LIST_HEAD(&monr->parent_entry);
+	monr->mon_cgrp = NULL;
 	monr->mon_event_group = NULL;
 
 	/* Iterate over all pkgs, even unitialized ones. */
@@ -947,7 +975,7 @@ retry:
 }
 
 /*
- * Wrappers for monr manipulation in events.
+ * Wrappers for monr manipulation in events and cgroups.
  *
  */
 static inline struct monr *monr_from_event(struct perf_event *event)
@@ -960,6 +988,100 @@ static inline void event_set_monr(struct perf_event *event, struct monr *monr)
 	WRITE_ONCE(event->hw.cqm_monr, monr);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline struct monr *monr_from_perf_cgroup(struct perf_cgroup *cgrp)
+{
+	struct monr *monr;
+	struct cgrp_cqm_info *cqm_info;
+
+	cqm_info = (struct cgrp_cqm_info *)READ_ONCE(cgrp->arch_info);
+	WARN_ON_ONCE(!cqm_info);
+	monr = READ_ONCE(cqm_info->monr);
+	return monr;
+}
+
+static inline struct perf_cgroup *monr__get_mon_cgrp(struct monr *monr)
+{
+	WARN_ON_ONCE(!monr);
+	return READ_ONCE(monr->mon_cgrp);
+}
+
+static inline void
+monr__set_mon_cgrp(struct monr *monr, struct perf_cgroup *cgrp)
+{
+	WRITE_ONCE(monr->mon_cgrp, cgrp);
+}
+
+static inline void
+perf_cgroup_set_monr(struct perf_cgroup *cgrp, struct monr *monr)
+{
+	WRITE_ONCE(cgrp_to_cqm_info(cgrp)->monr, monr);
+}
+
+/*
+ * A perf_cgroup is monitored when it's set in a monr->mon_cgrp.
+ * There is a many-to-one relationship between perf_cgroup's monrs
+ * and monrs' mon_cgrp. A monitored cgroup is necesarily referenced
+ * back by its monr's mon_cgrp.
+ */
+static inline bool perf_cgroup_is_monitored(struct perf_cgroup *cgrp)
+{
+	struct monr *monr;
+	struct perf_cgroup *monr_cgrp;
+
+	/* monr can be referenced by a cgroup other than the one in its
+	 * mon_cgrp, be careful.
+	 */
+	monr = monr_from_perf_cgroup(cgrp);
+
+	monr_cgrp = monr__get_mon_cgrp(monr);
+	/* Root monr do not have a cgroup associated before initialization.
+	 * mon_cgrp and mon_event_group are union, so the pointer must be set
+	 * for all non-root monrs.
+	 */
+	return  monr_cgrp && monr__get_mon_cgrp(monr) == cgrp;
+}
+
+/* Set css's monr to the monr of its lowest monitored ancestor. */
+static inline void __css_set_monr_to_lma(struct cgroup_subsys_state *css)
+{
+	lockdep_assert_held(&cqm_mutex);
+	if (!css->parent) {
+		perf_cgroup_set_monr(css_to_perf_cgroup(css), monr_hrchy_root);
+		return;
+	}
+	perf_cgroup_set_monr(
+		css_to_perf_cgroup(css),
+		monr_from_perf_cgroup(css_to_perf_cgroup(css->parent)));
+}
+
+static inline void
+perf_cgroup_make_monitored(struct perf_cgroup *cgrp, struct monr *monr)
+{
+	monr_hrchy_assert_held_mutexes();
+	perf_cgroup_set_monr(cgrp, monr);
+	/* Make sure that monr is a valid monr for css before it's visible
+	 * to any reader of css.
+	 */
+	smp_wmb();
+	monr__set_mon_cgrp(monr, cgrp);
+}
+
+static inline void
+perf_cgroup_make_unmonitored(struct perf_cgroup *cgrp)
+{
+	struct monr *monr = monr_from_perf_cgroup(cgrp);
+
+	monr_hrchy_assert_held_mutexes();
+	__css_set_monr_to_lma(&cgrp->css);
+	/* Make sure that all readers of css'monr see lma css before
+	 * monr stops being a valid monr for css.
+	 */
+	smp_wmb();
+	monr__set_mon_cgrp(monr, NULL);
+}
+#endif
+
 /*
  * Always finds a rmid_entry to schedule. To be called during scheduler.
  * A fast path that only uses read_lock for common case when rmid for current
@@ -1068,6 +1190,286 @@ __monr_hrchy_remove_leaf(struct monr *monr)
 	monr->parent = NULL;
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static struct perf_cgroup *__perf_cgroup_parent(struct perf_cgroup *cgrp)
+{
+	struct cgroup_subsys_state *parent_css = cgrp->css.parent;
+
+	if (parent_css)
+		return css_to_perf_cgroup(parent_css);
+	return NULL;
+}
+
+/* Get cgroup for both task and cgroup event. */
+static inline struct perf_cgroup *
+perf_cgroup_from_event(struct perf_event *event)
+{
+#ifdef CONFIG_LOCKDEP
+	u16 pkg_id = topology_physical_package_id(smp_processor_id());
+	bool rcu_safe = lockdep_is_held(
+		&cqm_pkgs_data[pkg_id]->pkg_data_lock);
+#endif
+
+	if (!(event->attach_state & PERF_ATTACH_TASK))
+		return event->cgrp;
+
+	return container_of(
+		task_css_check(event->hw.target, perf_event_cgrp_id, rcu_safe),
+		struct perf_cgroup, css);
+}
+
+/* Find lowest ancestor that is monitored, not including this cgrp.
+ * Return NULL if no ancestor is monitored.
+ */
+struct perf_cgroup *__cgroup_find_lma(struct perf_cgroup *cgrp)
+{
+	do {
+		cgrp = __perf_cgroup_parent(cgrp);
+	} while (cgrp && !perf_cgroup_is_monitored(cgrp));
+	return cgrp;
+}
+
+/* Similar to css_next_descendant_pre but skips the subtree rooted by pos. */
+struct cgroup_subsys_state *
+css_skip_subtree_pre(struct cgroup_subsys_state *pos,
+		     struct cgroup_subsys_state *root)
+{
+	struct cgroup_subsys_state *next;
+
+	WARN_ON_ONCE(!pos);
+	while (pos != root) {
+		next = css_next_child(pos, pos->parent);
+		if (next)
+			return next;
+		pos = pos->parent;
+	}
+	return NULL;
+}
+
+/* Make all monrs of css descendants of css to depend on new_monr. */
+inline void __css_subtree_update_monrs(struct cgroup_subsys_state *css,
+				       struct monr *new_monr)
+{
+	struct cgroup_subsys_state *pos_css;
+	int i;
+	unsigned long flags;
+
+	lockdep_assert_held(&cqm_mutex);
+	monr_hrchy_assert_held_mutexes();
+
+	rcu_read_lock();
+
+	/* Iterate over descendants of css in pre-order, in a way
+	 * similar to css_for_each_descendant_pre, but skipping the subtrees
+	 * rooted by css's with a monitored cgroup, since the elements
+	 * in those subtrees do not need to be updated.
+	 */
+	pos_css = css_next_descendant_pre(css, css);
+	while (pos_css) {
+		struct perf_cgroup *pos_cgrp = css_to_perf_cgroup(pos_css);
+		struct monr *pos_monr = monr_from_perf_cgroup(pos_cgrp);
+
+		/* Skip css that are not online, sync'ed with cqm_mutex. */
+		if (!(pos_css->flags & CSS_ONLINE)) {
+			pos_css = css_next_descendant_pre(pos_css, css);
+			continue;
+		}
+		/* Update descendant pos's mnor pointers to monr_parent. */
+		if (!perf_cgroup_is_monitored(pos_cgrp)) {
+			perf_cgroup_set_monr(pos_cgrp, new_monr);
+			pos_css = css_next_descendant_pre(pos_css, css);
+			continue;
+		}
+		monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+		pos_monr->parent = new_monr;
+		list_move_tail(&pos_monr->parent_entry, &new_monr->children);
+		monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+		/* Dont go down the subtree in pos_css since pos_monr is the
+		 * lma for all its descendants.
+		 */
+		pos_css = css_skip_subtree_pre(pos_css, css);
+	}
+	rcu_read_unlock();
+}
+
+static inline int __css_start_monitoring(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp, *cgrp_lma, *pos_cgrp;
+	struct monr *monr, *monr_parent, *pos_monr, *tmp_monr;
+	unsigned long flags;
+	int i;
+
+	lockdep_assert_held(&cqm_mutex);
+
+	/* Hold mutexes to prevent all rotation threads in all packages from
+	 * messing with this.
+	 */
+	monr_hrchy_acquire_mutexes();
+	cgrp = css_to_perf_cgroup(css);
+	if (WARN_ON_ONCE(perf_cgroup_is_monitored(cgrp)))
+		return -1;
+
+	/* When css is root cgroup's css, attach to the pre-existing
+	 * and active root monr.
+	 */
+	cgrp_lma = __cgroup_find_lma(cgrp);
+	if (!cgrp_lma) {
+		/* monr of root cgrp must be monr_hrchy_root. */
+		WARN_ON_ONCE(!monr__is_root(monr_from_perf_cgroup(cgrp)));
+		perf_cgroup_make_monitored(cgrp, monr_hrchy_root);
+		monr_hrchy_release_mutexes();
+		return 0;
+	}
+	/* The monr for the lowest monitored ancestor is direct ancestor
+	 * of monr in the monr hierarchy.
+	 */
+	monr_parent = monr_from_perf_cgroup(cgrp_lma);
+
+	/* Create new monr. */
+	monr = monr_alloc();
+	if (IS_ERR(monr)) {
+		monr_hrchy_release_mutexes();
+		return PTR_ERR(monr);
+	}
+
+	/* monr has no children yet so it is to be inserted in hierarchy with
+	 * all its pmors in (U)state.
+	 * We hold locks until monr_hrchy changes are complete, to prevent
+	 * possible state transition for the pmonrs in monr while still
+	 * allowing to read the prmid_summary in the scheduler path.
+	 */
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+	__monr_hrchy_insert_leaf(monr, monr_parent);
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	/* Make sure monr is in hierarchy before attaching monr to cgroup. */
+	barrier();
+
+	perf_cgroup_make_monitored(cgrp, monr);
+	__css_subtree_update_monrs(css, monr);
+
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+	/* Move task-event monrs that are descendant from css's cgroup. */
+	list_for_each_entry_safe(pos_monr, tmp_monr,
+				 &monr_parent->children, parent_entry) {
+		if (!monr_is_event_type(pos_monr))
+			continue;
+		/* all events in event group must have the same cgroup.
+		 * No RCU read lock necessary for task_css_check since calling
+		 * inside critical section.
+		 */
+		pos_cgrp = perf_cgroup_from_event(pos_monr->mon_event_group);
+		if (!cgroup_is_descendant(pos_cgrp->css.cgroup,
+					  cgrp->css.cgroup))
+			continue;
+		pos_monr->parent = monr;
+		list_move_tail(&pos_monr->parent_entry, &monr->children);
+	}
+	/* Make sure monitoring starts after all monrs have moved. */
+	barrier();
+
+	__monr__set_mon_active(monr);
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	monr_hrchy_release_mutexes();
+	return 0;
+}
+
+static inline int __css_stop_monitoring(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp, *cgrp_lma;
+	struct monr *monr, *monr_parent, *pos_monr;
+	unsigned long flags;
+	int i;
+
+	lockdep_assert_held(&cqm_mutex);
+
+	monr_hrchy_acquire_mutexes();
+	cgrp = css_to_perf_cgroup(css);
+	if (WARN_ON_ONCE(!perf_cgroup_is_monitored(cgrp)))
+		return -1;
+
+	monr = monr_from_perf_cgroup(cgrp);
+
+	/* When css is root cgroup's css, detach cgroup but do not
+	 * destroy monr.
+	 */
+	cgrp_lma = __cgroup_find_lma(cgrp);
+	if (!cgrp_lma) {
+		/* monr of root cgrp must be monr_hrchy_root. */
+		WARN_ON_ONCE(!monr__is_root(monr_from_perf_cgroup(cgrp)));
+		perf_cgroup_make_unmonitored(cgrp);
+		monr_hrchy_release_mutexes();
+		return 0;
+	}
+	/* The monr for the lowest monitored ancestor is direct ancestor
+	 * of monr in the monr hierarchy.
+	 */
+	monr_parent = monr_from_perf_cgroup(cgrp_lma);
+
+	/* Lock together the transition to (U)state and clearing
+	 * MONR_MON_ACTIVE to prevent prmids to return to (A)state
+	 * or (I)state in between.
+	 */
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+	cqm_pkg_id_for_each_online(i)
+		__pmonr__to_ustate(monr->pmonrs[i]);
+	barrier();
+	__monr__clear_mon_active(monr);
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	__css_subtree_update_monrs(css, monr_parent);
+
+
+	/*
+	 * Move the children monrs that are no cgroups.
+	 */
+	monr_hrchy_acquire_raw_spin_locks_irq_save(flags, i);
+
+	list_for_each_entry(pos_monr, &monr->children, parent_entry)
+		pos_monr->parent = monr_parent;
+	list_splice_tail_init(&monr->children, &monr_parent->children);
+	perf_cgroup_make_unmonitored(cgrp);
+	__monr_hrchy_remove_leaf(monr);
+
+	monr_hrchy_release_raw_spin_locks_irq_restore(flags, i);
+
+	monr_hrchy_release_mutexes();
+	monr_dealloc(monr);
+	return 0;
+}
+
+/* Attaching an event to a cgroup starts monitoring in the cgroup.
+ * If the cgroup is already monitoring, just use its pre-existing mnor.
+ */
+static int __monr_hrchy_attach_cgroup_event(struct perf_event *event,
+					    struct perf_cgroup *perf_cgrp)
+{
+	struct monr *monr;
+	int ret;
+
+	lockdep_assert_held(&cqm_mutex);
+	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_TASK);
+	WARN_ON_ONCE(monr_from_event(event));
+	WARN_ON_ONCE(!perf_cgrp);
+
+	if (!perf_cgroup_is_monitored(perf_cgrp)) {
+		css_get(&perf_cgrp->css);
+		ret = __css_start_monitoring(&perf_cgrp->css);
+		css_put(&perf_cgrp->css);
+		if (ret)
+			return ret;
+	}
+
+	/* At this point, cgrp is always monitored, use its monr. */
+	monr = monr_from_perf_cgroup(perf_cgrp);
+
+	event_set_monr(event, monr);
+	monr->mon_event_group = event;
+	return 0;
+}
+#endif
+
 static int __monr_hrchy_attach_cpu_event(struct perf_event *event)
 {
 	lockdep_assert_held(&cqm_mutex);
@@ -1109,12 +1511,27 @@ static int __monr_hrchy_attach_task_event(struct perf_event *event,
 static int monr_hrchy_attach_event(struct perf_event *event)
 {
 	struct monr *monr_parent;
+#ifdef CONFIG_CGROUP_PERF
+	struct perf_cgroup *perf_cgrp;
+#endif
 
 	if (!event->cgrp && !(event->attach_state & PERF_ATTACH_TASK))
 		return __monr_hrchy_attach_cpu_event(event);
 
+#ifdef CONFIG_CGROUP_PERF
+	/* Task events become leaves, cgroup events reuse the cgroup's monr */
+	if (event->cgrp)
+		return __monr_hrchy_attach_cgroup_event(event, event->cgrp);
+
+	rcu_read_lock();
+	perf_cgrp = perf_cgroup_from_event(event);
+	rcu_read_unlock();
+
+	monr_parent = monr_from_perf_cgroup(perf_cgrp);
+#else
 	/* Two-levels hierarchy: Root and all event monr underneath it. */
 	monr_parent = monr_hrchy_root;
+#endif
 	return __monr_hrchy_attach_task_event(event, monr_parent);
 }
 
@@ -1126,7 +1543,7 @@ static int monr_hrchy_attach_event(struct perf_event *event)
  */
 static bool __match_event(struct perf_event *a, struct perf_event *b)
 {
-	/* Per-cpu and task events don't mix */
+	/* Cgroup/non-task per-cpu and task events don't mix */
 	if ((a->attach_state & PERF_ATTACH_TASK) !=
 	    (b->attach_state & PERF_ATTACH_TASK))
 		return false;
@@ -2185,6 +2602,129 @@ static struct pmu intel_cqm_pmu = {
 	.read		     = intel_cqm_event_read,
 };
 
+#ifdef CONFIG_CGROUP_PERF
+/* XXX: Add hooks for attach dettach task with monr to a cgroup. */
+inline int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+				      struct cgroup_subsys_state *new_css)
+{
+	struct perf_cgroup *new_cgrp;
+	struct cgrp_cqm_info *cqm_info;
+
+	new_cgrp = css_to_perf_cgroup(new_css);
+	cqm_info = kmalloc(sizeof(struct cgrp_cqm_info), GFP_KERNEL);
+	if (!cqm_info)
+		return -ENOMEM;
+	cqm_info->cont_monitoring = false;
+	cqm_info->monr = NULL;
+	new_cgrp->arch_info = cqm_info;
+
+	return 0;
+}
+
+inline void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp = css_to_perf_cgroup(css);
+
+	kfree(cgrp_to_cqm_info(cgrp));
+	cgrp->arch_info = NULL;
+}
+
+/* Do the bulk of arch_css_online. To be called when CQM starts after
+ * css has gone online.
+ */
+static inline int __css_go_online(struct cgroup_subsys_state *css)
+{
+	lockdep_assert_held(&cqm_mutex);
+
+	/* css must not be used in monr hierarchy before having
+	 * set its monr in this step.
+	 */
+	__css_set_monr_to_lma(css);
+	/* Root monr is always monitoring. */
+	if (!css->parent)
+		css_to_cqm_info(css)->cont_monitoring = true;
+
+	if (css_to_cqm_info(css)->cont_monitoring)
+		return __css_start_monitoring(css);
+	return 0;
+}
+
+inline int perf_cgroup_arch_css_online(struct cgroup_subsys_state *css)
+{
+	int ret = 0;
+
+	/* use cqm_init_mutex to synchronize with
+	 * __start_monitoring_all_cgroups.
+	 */
+	mutex_lock(&cqm_init_mutex);
+
+	if (static_branch_unlikely(&cqm_initialized_key)) {
+		mutex_lock(&cqm_mutex);
+		ret = __css_go_online(css);
+		mutex_unlock(&cqm_mutex);
+		WARN_ON_ONCE(ret);
+	}
+
+	mutex_unlock(&cqm_init_mutex);
+	return ret;
+}
+
+inline void perf_cgroup_arch_css_offline(struct cgroup_subsys_state *css)
+{
+	int ret = 0;
+	struct monr *monr;
+	struct perf_cgroup *cgrp = css_to_perf_cgroup(css);
+
+	mutex_lock(&cqm_init_mutex);
+
+	if (!static_branch_unlikely(&cqm_initialized_key))
+		goto out;
+
+	mutex_lock(&cqm_mutex);
+
+	monr = monr_from_perf_cgroup(cgrp);
+	if (!perf_cgroup_is_monitored(cgrp))
+		goto out_cqm;
+
+	/* Stop monitoring for the css's monr only if no more events need it.
+	 * If events need the monr, it will be destroyed when the events that
+	 * use it are destroyed.
+	 */
+	if (monr->mon_event_group) {
+		monr_hrchy_acquire_mutexes();
+		perf_cgroup_make_unmonitored(cgrp);
+		monr_hrchy_release_mutexes();
+	} else {
+		ret = __css_stop_monitoring(css);
+		WARN_ON_ONCE(ret);
+	}
+
+out_cqm:
+	mutex_unlock(&cqm_mutex);
+out:
+	mutex_unlock(&cqm_init_mutex);
+	WARN_ON_ONCE(ret);
+}
+
+inline void perf_cgroup_arch_css_released(struct cgroup_subsys_state *css)
+{
+	mutex_lock(&cqm_init_mutex);
+
+	if (static_branch_unlikely(&cqm_initialized_key)) {
+		mutex_lock(&cqm_mutex);
+		/*
+		 * Remove css from monr hierarchy now that css is about to
+		 * leave the cgroup hierarchy.
+		 */
+		perf_cgroup_set_monr(css_to_perf_cgroup(css), NULL);
+		mutex_unlock(&cqm_mutex);
+	}
+
+	mutex_unlock(&cqm_init_mutex);
+}
+
+#endif
+
 static inline void cqm_pick_event_reader(int cpu)
 {
 	u16 pkg_id = topology_physical_package_id(cpu);
@@ -2249,6 +2789,39 @@ static const struct x86_cpu_id intel_cqm_match[] = {
 	{}
 };
 
+#ifdef CONFIG_CGROUP_PERF
+/* Start monitoring for all cgroups in cgroup hierarchy. */
+static int __start_monitoring_all_cgroups(void)
+{
+	int ret;
+	struct cgroup_subsys_state *css, *css_root;
+
+	lockdep_assert_held(&cqm_init_mutex);
+
+	rcu_read_lock();
+	/* Get css for root cgroup */
+	css_root =  get_root_perf_css();
+
+	css_for_each_descendant_pre(css, css_root) {
+		if (!css_tryget_online(css))
+			continue;
+
+		rcu_read_unlock();
+		mutex_lock(&cqm_mutex);
+		ret = __css_go_online(css);
+		mutex_unlock(&cqm_mutex);
+
+		css_put(css);
+		if (ret)
+			return ret;
+
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+	return 0;
+}
+#endif
+
 static int __init intel_cqm_init(void)
 {
 	char *str, scale[20];
@@ -2324,17 +2897,32 @@ static int __init intel_cqm_init(void)
 
 	__perf_cpu_notifier(intel_cqm_cpu_notifier);
 
+	/* Use cqm_init_mutex to synchronize with css's online/offline. */
+	mutex_lock(&cqm_init_mutex);
+
+#ifdef CONFIG_CGROUP_PERF
+	ret = __start_monitoring_all_cgroups();
+	if (ret)
+		goto error_init_mutex;
+#endif
+
 	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
 	if (ret)
-		goto error;
+		goto error_init_mutex;
 
 	cpu_notifier_register_done();
 
+	static_branch_enable(&cqm_initialized_key);
+
+	mutex_unlock(&cqm_init_mutex);
+
 	pr_info("Intel CQM monitoring enabled with at least %u rmids per package.\n",
 		min_max_rmid + 1);
 
 	return ret;
 
+error_init_mutex:
+	mutex_unlock(&cqm_init_mutex);
 error:
 	pr_err("Intel CQM perf registration failed: %d\n", ret);
 	cpu_notifier_register_done();
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index 25646a2..0f3da94 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -313,6 +313,7 @@ struct pkg_data {
  * struct monr: MONitored Resource.
  * @flags:		Flags field for monr (XXX: More flags will be added
  *			with MBM).
+ * @mon_cgrp:		The cgroup associated with this monr, if any
  * @mon_event_group:	The head of event's group that use this monr, if any.
  * @parent:		Parent in monr hierarchy.
  * @children:		List of children in monr hierarchy.
@@ -333,6 +334,7 @@ struct pkg_data {
 struct monr {
 	u16				flags;
 	/* Back reference pointers */
+	struct perf_cgroup		*mon_cgrp;
 	struct perf_event		*mon_event_group;
 
 	struct monr			*parent;
@@ -506,3 +508,14 @@ static unsigned int __cqm_min_progress_rate = CQM_DEFAULT_MIN_PROGRESS_RATE;
  * It's units are bytes must be scaled by cqm_l3_scale to obtain cache lines.
  */
 static unsigned int __intel_cqm_max_threshold;
+
+
+struct cgrp_cqm_info {
+	/* Should the cgroup be continuously monitored? */
+	bool		cont_monitoring;
+	struct monr	*monr;
+};
+
+# define css_to_perf_cgroup(css_) container_of(css_, struct perf_cgroup, css)
+# define cgrp_to_cqm_info(cgrp_) ((struct cgrp_cqm_info *)cgrp_->arch_info)
+# define css_to_cqm_info(css_) cgrp_to_cqm_info(css_to_perf_cgroup(css_))
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f353061..c22d9e0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -299,4 +299,37 @@ static inline void perf_check_microcode(void) { }
 
 #define arch_perf_out_copy_user copy_from_user_nmi
 
+
+/*
+ * Hooks for architecture specific features of perf_event cgroup.
+ * Currently used by Intel's CQM.
+ */
+#ifdef CONFIG_INTEL_RDT
+#define perf_cgroup_arch_css_alloc \
+	perf_cgroup_arch_css_alloc
+inline int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+				      struct cgroup_subsys_state *new_css);
+
+#define perf_cgroup_arch_css_online \
+	perf_cgroup_arch_css_online
+inline int perf_cgroup_arch_css_online(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_css_offline \
+	perf_cgroup_arch_css_offline
+inline void perf_cgroup_arch_css_offline(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_css_released \
+	perf_cgroup_arch_css_released
+inline void perf_cgroup_arch_css_released(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_css_free \
+	perf_cgroup_arch_css_free
+inline void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css);
+
+#else
+
+#define PERF_CGROUP_ARCH_CGRP_SUBSYS_ATTS
+
+#endif
+
 #endif /* _ASM_X86_PERF_EVENT_H */
-- 
2.8.0.rc3.226.g39d4020

  parent reply	other threads:[~2016-04-29  4:50 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-29  4:43 [PATCH 00/32] 2nd Iteration of Cache QoS Monitoring support David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 01/32] perf/x86/intel/cqm: temporarily remove MBM from CQM and cleanup David Carrillo-Cisneros
2016-04-29 20:19   ` Vikas Shivappa
2016-04-29  4:43 ` [PATCH 02/32] perf/x86/intel/cqm: remove check for conflicting events David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 03/32] perf/x86/intel/cqm: remove all code for rotation of RMIDs David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 04/32] perf/x86/intel/cqm: make read of RMIDs per package (Temporal) David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 05/32] perf/core: remove unused pmu->count David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 06/32] x86/intel,cqm: add CONFIG_INTEL_RDT configuration flag and refactor PQR David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 07/32] perf/x86/intel/cqm: separate CQM PMU's attributes from x86 PMU David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 08/32] perf/x86/intel/cqm: prepare for next patches David Carrillo-Cisneros
2016-04-29  9:18   ` Peter Zijlstra
2016-04-29  4:43 ` [PATCH 09/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks David Carrillo-Cisneros
2016-04-29 20:56   ` Vikas Shivappa
2016-04-29  4:43 ` [PATCH 10/32] perf/x86/intel/cqm: basic RMID hierarchy with per package rmids David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 11/32] perf/x86/intel/cqm: (I)state and limbo prmids David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 12/32] perf/x86/intel/cqm: add per-package RMID rotation David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 13/32] perf/x86/intel/cqm: add polled update of RMID's llc_occupancy David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 14/32] perf/x86/intel/cqm: add preallocation of anodes David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 15/32] perf/core: add hooks to expose architecture specific features in perf_cgroup David Carrillo-Cisneros
2016-04-29  4:43 ` David Carrillo-Cisneros [this message]
2016-04-29  4:43 ` [PATCH 17/32] perf/core: adding pmu::event_terminate David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 18/32] perf/x86/intel/cqm: use pmu::event_terminate David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 19/32] perf/core: introduce PMU event flag PERF_CGROUP_NO_RECURSION David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 20/32] x86/intel/cqm: use PERF_CGROUP_NO_RECURSION in CQM David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 21/32] perf/x86/intel/cqm: handle inherit event and inherit_stat flag David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 22/32] perf/x86/intel/cqm: introduce read_subtree David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 23/32] perf/core: introduce PERF_INACTIVE_*_READ_* flags David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 24/32] perf/x86/intel/cqm: use PERF_INACTIVE_*_READ_* flags in CQM David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 25/32] sched: introduce the finish_arch_pre_lock_switch() scheduler hook David Carrillo-Cisneros
2016-04-29  8:52   ` Peter Zijlstra
     [not found]     ` <CALcN6miyq9_4GQfO9=bjFb-X_2LSQdwfWnm+KvT=UrYRCAb6Og@mail.gmail.com>
2016-04-29 18:40       ` David Carrillo-Cisneros
2016-04-29 20:21         ` Vikas Shivappa
2016-04-29 20:50           ` David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 26/32] perf/x86/intel/cqm: integrate CQM cgroups with scheduler David Carrillo-Cisneros
2016-04-29 20:25   ` Vikas Shivappa
2016-04-29 20:48     ` David Carrillo-Cisneros
2016-04-29 21:01       ` Vikas Shivappa
2016-04-29 21:26         ` David Carrillo-Cisneros
2016-04-29 21:32           ` Vikas Shivappa
2016-04-29 21:49             ` David Carrillo-Cisneros
2016-04-29 23:49               ` Vikas Shivappa
2016-04-30 17:50                 ` David Carrillo-Cisneros
2016-05-02 13:22                   ` Thomas Gleixner
2016-04-29  4:43 ` [PATCH 27/32] perf/core: add perf_event cgroup hooks for subsystem attributes David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 28/32] perf/x86/intel/cqm: add CQM attributes to perf_event cgroup David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 29/32] perf,perf/x86,perf/powerpc,perf/arm,perf/*: add int error return to pmu::read David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 30/32] perf,perf/x86: add hook perf_event_arch_exec David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 31/32] perf/stat: fix bug in handling events in error state David Carrillo-Cisneros
2016-04-29  4:43 ` [PATCH 32/32] perf/stat: revamp error handling for snapshot and per_pkg events David Carrillo-Cisneros
2016-04-29 21:06 ` [PATCH 00/32] 2nd Iteration of Cache QoS Monitoring support Vikas Shivappa
2016-04-29 21:10   ` David Carrillo-Cisneros

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1461905018-86355-17-git-send-email-davidcc@google.com \
    --to=davidcc@google.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.