linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Carrillo-Cisneros <davidcc@google.com>
To: linux-kernel@vger.kernel.org
Cc: "x86@kernel.org" <x86@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Andi Kleen <ak@linux.intel.com>, Kan Liang <kan.liang@intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Vegard Nossum <vegard.nossum@gmail.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Nilay Vaish <nilayvaish@gmail.com>, Borislav Petkov <bp@suse.de>,
	Vikas Shivappa <vikas.shivappa@linux.intel.com>,
	Ravi V Shankar <ravi.v.shankar@intel.com>,
	Fenghua Yu <fenghua.yu@intel.com>, Paul Turner <pjt@google.com>,
	Stephane Eranian <eranian@google.com>,
	David Carrillo-Cisneros <davidcc@google.com>
Subject: [PATCH v3 38/46] perf/x86/intel/cmt: introduce read SLOs for rotation
Date: Sat, 29 Oct 2016 17:38:35 -0700	[thread overview]
Message-ID: <1477787923-61185-39-git-send-email-davidcc@google.com> (raw)
In-Reply-To: <1477787923-61185-1-git-send-email-davidcc@google.com>

To make rmid rotation more dependable, this patch series introduces
rotation Service Level Objectives (SLOs) that are described in
code's documentation.

This patch introduces cmt_{pre,min}_mon_slice SLOs that protects from
bogus values when a rmid has not been available since the beginning of
monitoring. It also introduces auxiliary variables necessary for the
SLOs to work and the checks in intel_cmt_event_read that enforce the SLOs
for the read of llc_occupancy event.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
---
 arch/x86/events/intel/cmt.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cmt.h | 28 +++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3ade923..649eb5f 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -51,6 +51,25 @@ static size_t pkg_uflags_size;
 static struct pkg_data **cmt_pkgs_data;
 
 /*
+ * Rotation Service Level Objectives (SLO) for monrs with llc_occupancy
+ * monitoring. Note that these are monr level SLOs, therefore all pmonrs in
+ * the monr meet or exceed them.
+ * (A "monitored"  monr is a monr with no pmonr in a Dependent state).
+ *
+ * SLOs:
+ *
+ * @__cmt_pre_mon_slice: Min time a monr is monitored before being readable.
+ * @__cmt_min_mon_slice: Min time a monr stays monitored after becoming
+ *                       readable.
+ */
+#define CMT_DEFAULT_PRE_MON_SLICE 2000		/* ms */
+static u64 __cmt_pre_mon_slice;
+
+#define CMT_DEFAULT_MIN_MON_SLICE 5000		/* ms */
+static u64 __cmt_min_mon_slice;
+
+
+/*
  * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
  * Otherwise next online pkg_data or NULL if no more.
  */
@@ -300,6 +319,7 @@ static void pmonr_to_unused(struct pmonr *pmonr)
 			pmonr_move_all_dependants(pmonr, lender);
 		}
 		__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+		pkgd->nr_dirty_rmids++;
 
 	} else if (pmonr->state == PMONR_DEP_IDLE ||
 		   pmonr->state == PMONR_DEP_DIRTY) {
@@ -312,6 +332,11 @@ static void pmonr_to_unused(struct pmonr *pmonr)
 			__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
 		else
 			pkgd->nr_dep_pmonrs--;
+
+
+		if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+			atomic64_set(&pmonr->monr->last_rmid_recoup,
+				     get_jiffies_64());
 	} else {
 		WARN_ON_ONCE(true);
 		return;
@@ -372,6 +397,7 @@ static inline void __pmonr_to_dep_helper(
 
 	lender_rmids.value = atomic64_read(&lender->atomic_rmids);
 	pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+	atomic_inc(&pmonr->monr->nr_dep_pmonrs);
 }
 
 static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
@@ -390,6 +416,7 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
 
 static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
 {
+	struct pkg_data *pkgd = pmonr->pkgd;
 	struct pmonr *lender;
 	union pmonr_rmids rmids;
 
@@ -398,6 +425,7 @@ static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
 
 	rmids.value = atomic64_read(&pmonr->atomic_rmids);
 	__pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+	pkgd->nr_dirty_rmids++;
 }
 
 static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
@@ -408,6 +436,9 @@ static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
 	pmonr_move_dependants(pmonr->lender, pmonr);
 	pmonr->lender = NULL;
 	__pmonr_to_active_helper(pmonr, rmid);
+
+	if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+		atomic64_set(&pmonr->monr->last_rmid_recoup, get_jiffies_64());
 }
 
 static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
@@ -422,6 +453,7 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
 	union pmonr_rmids rmids;
 
 	rmids.value = atomic64_read(&pmonr->atomic_rmids);
+	pmonr->pkgd->nr_dirty_rmids--;
 	__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
 }
 
@@ -1599,7 +1631,7 @@ static int read_all_pkgs(struct monr *monr, int wait_time_ms, u64 *count)
 static int intel_cmt_event_read(struct perf_event *event)
 {
 	struct monr *monr = monr_from_event(event);
-	u64 count;
+	u64 count, recoup, wait_end;
 	u16 pkgid = topology_logical_package_id(smp_processor_id());
 	int err;
 
@@ -1614,6 +1646,15 @@ static int intel_cmt_event_read(struct perf_event *event)
 		return -ENXIO;
 
 	/*
+	 * If rmid has been stolen, only read if enough time has elapsed since
+	 * rmid were recovered.
+	 */
+	recoup = atomic64_read(&monr->last_rmid_recoup);
+	wait_end = recoup + __cmt_pre_mon_slice;
+	if (recoup && time_before64(get_jiffies_64(), wait_end))
+		return -EAGAIN;
+
+	/*
 	 * Only event parent can return a value, everyone else share its
 	 * rmid and therefore doesn't track occupancy independently.
 	 */
@@ -2267,6 +2308,9 @@ static int __init intel_cmt_init(void)
 	struct pkg_data *pkgd = NULL;
 	int err = 0;
 
+	__cmt_pre_mon_slice = msecs_to_jiffies(CMT_DEFAULT_PRE_MON_SLICE);
+	__cmt_min_mon_slice = msecs_to_jiffies(CMT_DEFAULT_MIN_MON_SLICE);
+
 	if (!x86_match_cpu(intel_cmt_match)) {
 		err = -ENODEV;
 		goto err_exit;
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 8bb43bd..8756666 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -52,6 +52,24 @@
  * schedule and read.
  *
  *
+ * Rotation
+ *
+ * The number of rmids in hw is relatively small with respect to the number
+ * of potential monitored resources. rmids are rotated to among pmonrs that
+ * need one to give a fair-ish usage of this resource.
+ *
+ * A hw constraint is that occupancy for a rmid cannot be restarted, therefore
+ * a rmid with llc_occupancy need some time unscheduled until all cache lines
+ * tagged to it are evicted from cache (if this ever happens).
+ *
+ * When a rmid is "rotated", it is stolen from a pmonr and must wait until its
+ * llc_occupancy has decreased enough to be considered "clean". Meanwhile, that
+ * rmid is considered "dirty".
+ *
+ * Rotation logic periodically reads occupancy of this "dirty" rmids and, when
+ * clean, the rmid is either reused or placed in a free pool.
+ *
+ *
  * Locking
  *
  * One global cmt_mutex. One mutex and spin_lock per package.
@@ -62,6 +80,7 @@
  *  cgroup start/stop.
  *  - Hold pkg->mutex and pkg->lock in _all_ active packages to traverse or
  *  change the monr hierarchy.
+ *  - pkgd->mutex: Hold in current package for rotation in that pkgd.
  *  - pkgd->lock: Hold in current package to access that pkgd's members. Hold
  *  a pmonr's package pkgd->lock for non-atomic access to pmonr.
  */
@@ -225,6 +244,7 @@ struct cmt_csd {
  * @dep_dirty_pmonrs:		LRU of Dep_Dirty pmonrs.
  * @dep_pmonrs:			LRU of Dep_Idle and Dep_Dirty pmonrs.
  * @nr_dep_pmonrs:		nr Dep_Idle + nr Dep_Dirty pmonrs.
+ * @nr_dirty_rmids:		"dirty" rmids, both with and without a pmonr.
  * @mutex:			Hold when modifying this pkg_data.
  * @mutex_key:			lockdep class for pkg_data's mutex.
  * @lock:			Hold to protect pmonrs in this pkg_data.
@@ -243,6 +263,7 @@ struct pkg_data {
 	struct list_head	dep_dirty_pmonrs;
 	struct list_head	dep_pmonrs;
 	int			nr_dep_pmonrs;
+	int			nr_dirty_rmids;
 
 	struct mutex		mutex;
 	raw_spinlock_t		lock;
@@ -280,6 +301,10 @@ enum cmt_user_flags {
  * @parent:		Parent in monr hierarchy.
  * @children:		List of children in monr hierarchy.
  * @parent_entry:	Entry in parent's children list.
+ * @last_rmid_recoup:	Last time that nr_dep_pmonrs decreased to zero. It's
+ *			zero if a rmid has never been stolen from this monr.
+ * @nr_dep_pmonrs:	nr of Dep_* pmonrs in this monr. A zero implies that
+ *			monr is monitoring in all required packages.
  * @flags:		monr_flags.
  * @nr_has_user:	nr of CMT_UF_HAS_USER set in events in mon_events.
  * @nr_nolazy_user:	nr of CMT_UF_NOLAZY_RMID set in events in mon_events.
@@ -303,6 +328,9 @@ struct monr {
 	struct list_head		children;
 	struct list_head		parent_entry;
 
+	atomic64_t			last_rmid_recoup;
+	atomic_t			nr_dep_pmonrs;
+
 	enum monr_flags			flags;
 	int				nr_has_user;
 	int				nr_nolazy_rmid;
-- 
2.8.0.rc3.226.g39d4020

  parent reply	other threads:[~2016-10-30  0:40 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-30  0:37 [PATCH v3 00/46] Cache Monitoring Technology (aka CQM) David Carrillo-Cisneros
2016-10-30  0:37 ` [PATCH v3 01/46] perf/x86/intel/cqm: remove previous version of CQM and MBM David Carrillo-Cisneros
2016-10-30  0:37 ` [PATCH v3 02/46] perf/x86/intel: rename CQM cpufeatures to CMT David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 03/46] x86/intel: add CONFIG_INTEL_RDT_M configuration flag David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 04/46] perf/x86/intel/cmt: add device initialization and CPU hotplug support David Carrillo-Cisneros
2016-11-10 15:19   ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 05/46] perf/x86/intel/cmt: add per-package locks David Carrillo-Cisneros
2016-11-10 21:23   ` Thomas Gleixner
2016-11-11  2:22     ` David Carrillo-Cisneros
2016-11-11  7:21       ` Peter Zijlstra
2016-11-11  7:32         ` Ingo Molnar
2016-11-11  9:41         ` Thomas Gleixner
2016-11-11 17:21           ` David Carrillo-Cisneros
2016-11-13 10:58             ` Thomas Gleixner
2016-11-15  4:53         ` David Carrillo-Cisneros
2016-11-16 19:00           ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 06/46] perf/x86/intel/cmt: add intel_cmt pmu David Carrillo-Cisneros
2016-11-10 21:27   ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 07/46] perf/core: add RDT Monitoring attributes to struct hw_perf_event David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 08/46] perf/x86/intel/cmt: add MONitored Resource (monr) initialization David Carrillo-Cisneros
2016-11-10 23:09   ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 09/46] perf/x86/intel/cmt: add basic monr hierarchy David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 10/46] perf/x86/intel/cmt: add Package MONitored Resource (pmonr) initialization David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 11/46] perf/x86/intel/cmt: add cmt_user_flags (uflags) to monr David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 12/46] perf/x86/intel/cmt: add per-package rmid pools David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 13/46] perf/x86/intel/cmt: add pmonr's Off and Unused states David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 14/46] perf/x86/intel/cmt: add Active and Dep_{Idle, Dirty} states David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 15/46] perf/x86/intel: encapsulate rmid and closid updates in pqr cache David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 16/46] perf/x86/intel/cmt: set sched rmid and complete pmu start/stop/add/del David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 17/46] perf/x86/intel/cmt: add uflag CMT_UF_NOLAZY_RMID David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 18/46] perf/core: add arch_info field to struct perf_cgroup David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 19/46] perf/x86/intel/cmt: add support for cgroup events David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 20/46] perf/core: add pmu::event_terminate David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 21/46] perf/x86/intel/cmt: use newly introduced event_terminate David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 22/46] perf/x86/intel/cmt: sync cgroups and intel_cmt device start/stop David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 23/46] perf/core: hooks to add architecture specific features in perf_cgroup David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 24/46] perf/x86/intel/cmt: add perf_cgroup_arch_css_{online,offline} David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 25/46] perf/x86/intel/cmt: add monr->flags and CMT_MONR_ZOMBIE David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 26/46] sched: introduce the finish_arch_pre_lock_switch() scheduler hook David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 27/46] perf/x86/intel: add pqr cache flags and intel_pqr_ctx_switch David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 28/46] perf,perf/x86,perf/powerpc,perf/arm,perf/*: add int error return to pmu::read David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 29/46] perf/x86/intel/cmt: add error handling to intel_cmt_event_read David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 30/46] perf/x86/intel/cmt: add asynchronous read for task events David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 31/46] perf/x86/intel/cmt: add subtree read for cgroup events David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 32/46] perf/core: Add PERF_EV_CAP_READ_ANY_{CPU_,}PKG flags David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 33/46] perf/x86/intel/cmt: use PERF_EV_CAP_READ_{,CPU_}PKG flags in Intel cmt David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 34/46] perf/core: introduce PERF_EV_CAP_CGROUP_NO_RECURSION David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 35/46] perf/x86/intel/cmt: use PERF_EV_CAP_CGROUP_NO_RECURSION in intel_cmt David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 36/46] perf/core: add perf_event cgroup hooks for subsystem attributes David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 37/46] perf/x86/intel/cmt: add cont_monitoring to perf cgroup David Carrillo-Cisneros
2016-10-30  0:38 ` David Carrillo-Cisneros [this message]
2016-10-30  0:38 ` [PATCH v3 39/46] perf/x86/intel/cmt: add max_recycle_threshold sysfs attribute David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 40/46] perf/x86/intel/cmt: add rotation scheduled work David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 41/46] perf/x86/intel/cmt: add rotation minimum progress SLO David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 42/46] perf/x86/intel/cmt: add rmid stealing David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 43/46] perf/x86/intel/cmt: add CMT_UF_NOSTEAL_RMID flag David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 44/46] perf/x86/intel/cmt: add debugfs intel_cmt directory David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 45/46] perf/stat: fix bug in handling events in error state David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 46/46] perf/stat: revamp read error handling, snapshot and per_pkg events David Carrillo-Cisneros

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1477787923-61185-39-git-send-email-davidcc@google.com \
    --to=davidcc@google.com \
    --cc=ak@linux.intel.com \
    --cc=bp@suse.de \
    --cc=eranian@google.com \
    --cc=fenghua.yu@intel.com \
    --cc=kan.liang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=nilayvaish@gmail.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=vegard.nossum@gmail.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).