From: David Carrillo-Cisneros <davidcc@google.com>
To: linux-kernel@vger.kernel.org
Cc: "x86@kernel.org" <x86@kernel.org>, Ingo Molnar <mingo@redhat.com>,
Thomas Gleixner <tglx@linutronix.de>,
Andi Kleen <ak@linux.intel.com>, Kan Liang <kan.liang@intel.com>,
Peter Zijlstra <peterz@infradead.org>,
Vegard Nossum <vegard.nossum@gmail.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Nilay Vaish <nilayvaish@gmail.com>, Borislav Petkov <bp@suse.de>,
Vikas Shivappa <vikas.shivappa@linux.intel.com>,
Ravi V Shankar <ravi.v.shankar@intel.com>,
Fenghua Yu <fenghua.yu@intel.com>, Paul Turner <pjt@google.com>,
Stephane Eranian <eranian@google.com>,
David Carrillo-Cisneros <davidcc@google.com>
Subject: [PATCH v3 38/46] perf/x86/intel/cmt: introduce read SLOs for rotation
Date: Sat, 29 Oct 2016 17:38:35 -0700 [thread overview]
Message-ID: <1477787923-61185-39-git-send-email-davidcc@google.com> (raw)
In-Reply-To: <1477787923-61185-1-git-send-email-davidcc@google.com>
To make rmid rotation more dependable, this patch series introduces
rotation Service Level Objectives (SLOs) that are described in
code's documentation.
This patch introduces cmt_{pre,min}_mon_slice SLOs that protects from
bogus values when a rmid has not been available since the beginning of
monitoring. It also introduces auxiliary variables necessary for the
SLOs to work and the checks in intel_cmt_event_read that enforce the SLOs
for the read of llc_occupancy event.
Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
---
arch/x86/events/intel/cmt.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-
arch/x86/events/intel/cmt.h | 28 +++++++++++++++++++++++++++
2 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3ade923..649eb5f 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -51,6 +51,25 @@ static size_t pkg_uflags_size;
static struct pkg_data **cmt_pkgs_data;
/*
+ * Rotation Service Level Objectives (SLO) for monrs with llc_occupancy
+ * monitoring. Note that these are monr level SLOs, therefore all pmonrs in
+ * the monr meet or exceed them.
+ * (A "monitored" monr is a monr with no pmonr in a Dependent state).
+ *
+ * SLOs:
+ *
+ * @__cmt_pre_mon_slice: Min time a monr is monitored before being readable.
+ * @__cmt_min_mon_slice: Min time a monr stays monitored after becoming
+ * readable.
+ */
+#define CMT_DEFAULT_PRE_MON_SLICE 2000 /* ms */
+static u64 __cmt_pre_mon_slice;
+
+#define CMT_DEFAULT_MIN_MON_SLICE 5000 /* ms */
+static u64 __cmt_min_mon_slice;
+
+
+/*
* If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
* Otherwise next online pkg_data or NULL if no more.
*/
@@ -300,6 +319,7 @@ static void pmonr_to_unused(struct pmonr *pmonr)
pmonr_move_all_dependants(pmonr, lender);
}
__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+ pkgd->nr_dirty_rmids++;
} else if (pmonr->state == PMONR_DEP_IDLE ||
pmonr->state == PMONR_DEP_DIRTY) {
@@ -312,6 +332,11 @@ static void pmonr_to_unused(struct pmonr *pmonr)
__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
else
pkgd->nr_dep_pmonrs--;
+
+
+ if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+ atomic64_set(&pmonr->monr->last_rmid_recoup,
+ get_jiffies_64());
} else {
WARN_ON_ONCE(true);
return;
@@ -372,6 +397,7 @@ static inline void __pmonr_to_dep_helper(
lender_rmids.value = atomic64_read(&lender->atomic_rmids);
pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+ atomic_inc(&pmonr->monr->nr_dep_pmonrs);
}
static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
@@ -390,6 +416,7 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
{
+ struct pkg_data *pkgd = pmonr->pkgd;
struct pmonr *lender;
union pmonr_rmids rmids;
@@ -398,6 +425,7 @@ static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
rmids.value = atomic64_read(&pmonr->atomic_rmids);
__pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+ pkgd->nr_dirty_rmids++;
}
static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
@@ -408,6 +436,9 @@ static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
pmonr_move_dependants(pmonr->lender, pmonr);
pmonr->lender = NULL;
__pmonr_to_active_helper(pmonr, rmid);
+
+ if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+ atomic64_set(&pmonr->monr->last_rmid_recoup, get_jiffies_64());
}
static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
@@ -422,6 +453,7 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
union pmonr_rmids rmids;
rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ pmonr->pkgd->nr_dirty_rmids--;
__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
}
@@ -1599,7 +1631,7 @@ static int read_all_pkgs(struct monr *monr, int wait_time_ms, u64 *count)
static int intel_cmt_event_read(struct perf_event *event)
{
struct monr *monr = monr_from_event(event);
- u64 count;
+ u64 count, recoup, wait_end;
u16 pkgid = topology_logical_package_id(smp_processor_id());
int err;
@@ -1614,6 +1646,15 @@ static int intel_cmt_event_read(struct perf_event *event)
return -ENXIO;
/*
+ * If rmid has been stolen, only read if enough time has elapsed since
+ * rmid were recovered.
+ */
+ recoup = atomic64_read(&monr->last_rmid_recoup);
+ wait_end = recoup + __cmt_pre_mon_slice;
+ if (recoup && time_before64(get_jiffies_64(), wait_end))
+ return -EAGAIN;
+
+ /*
* Only event parent can return a value, everyone else share its
* rmid and therefore doesn't track occupancy independently.
*/
@@ -2267,6 +2308,9 @@ static int __init intel_cmt_init(void)
struct pkg_data *pkgd = NULL;
int err = 0;
+ __cmt_pre_mon_slice = msecs_to_jiffies(CMT_DEFAULT_PRE_MON_SLICE);
+ __cmt_min_mon_slice = msecs_to_jiffies(CMT_DEFAULT_MIN_MON_SLICE);
+
if (!x86_match_cpu(intel_cmt_match)) {
err = -ENODEV;
goto err_exit;
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 8bb43bd..8756666 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -52,6 +52,24 @@
* schedule and read.
*
*
+ * Rotation
+ *
+ * The number of rmids in hw is relatively small with respect to the number
+ * of potential monitored resources. rmids are rotated to among pmonrs that
+ * need one to give a fair-ish usage of this resource.
+ *
+ * A hw constraint is that occupancy for a rmid cannot be restarted, therefore
+ * a rmid with llc_occupancy need some time unscheduled until all cache lines
+ * tagged to it are evicted from cache (if this ever happens).
+ *
+ * When a rmid is "rotated", it is stolen from a pmonr and must wait until its
+ * llc_occupancy has decreased enough to be considered "clean". Meanwhile, that
+ * rmid is considered "dirty".
+ *
+ * Rotation logic periodically reads occupancy of this "dirty" rmids and, when
+ * clean, the rmid is either reused or placed in a free pool.
+ *
+ *
* Locking
*
* One global cmt_mutex. One mutex and spin_lock per package.
@@ -62,6 +80,7 @@
* cgroup start/stop.
* - Hold pkg->mutex and pkg->lock in _all_ active packages to traverse or
* change the monr hierarchy.
+ * - pkgd->mutex: Hold in current package for rotation in that pkgd.
* - pkgd->lock: Hold in current package to access that pkgd's members. Hold
* a pmonr's package pkgd->lock for non-atomic access to pmonr.
*/
@@ -225,6 +244,7 @@ struct cmt_csd {
* @dep_dirty_pmonrs: LRU of Dep_Dirty pmonrs.
* @dep_pmonrs: LRU of Dep_Idle and Dep_Dirty pmonrs.
* @nr_dep_pmonrs: nr Dep_Idle + nr Dep_Dirty pmonrs.
+ * @nr_dirty_rmids: "dirty" rmids, both with and without a pmonr.
* @mutex: Hold when modifying this pkg_data.
* @mutex_key: lockdep class for pkg_data's mutex.
* @lock: Hold to protect pmonrs in this pkg_data.
@@ -243,6 +263,7 @@ struct pkg_data {
struct list_head dep_dirty_pmonrs;
struct list_head dep_pmonrs;
int nr_dep_pmonrs;
+ int nr_dirty_rmids;
struct mutex mutex;
raw_spinlock_t lock;
@@ -280,6 +301,10 @@ enum cmt_user_flags {
* @parent: Parent in monr hierarchy.
* @children: List of children in monr hierarchy.
* @parent_entry: Entry in parent's children list.
+ * @last_rmid_recoup: Last time that nr_dep_pmonrs decreased to zero. It's
+ * zero if a rmid has never been stolen from this monr.
+ * @nr_dep_pmonrs: nr of Dep_* pmonrs in this monr. A zero implies that
+ * monr is monitoring in all required packages.
* @flags: monr_flags.
* @nr_has_user: nr of CMT_UF_HAS_USER set in events in mon_events.
* @nr_nolazy_user: nr of CMT_UF_NOLAZY_RMID set in events in mon_events.
@@ -303,6 +328,9 @@ struct monr {
struct list_head children;
struct list_head parent_entry;
+ atomic64_t last_rmid_recoup;
+ atomic_t nr_dep_pmonrs;
+
enum monr_flags flags;
int nr_has_user;
int nr_nolazy_rmid;
--
2.8.0.rc3.226.g39d4020
next prev parent reply other threads:[~2016-10-30 0:40 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-30 0:37 [PATCH v3 00/46] Cache Monitoring Technology (aka CQM) David Carrillo-Cisneros
2016-10-30 0:37 ` [PATCH v3 01/46] perf/x86/intel/cqm: remove previous version of CQM and MBM David Carrillo-Cisneros
2016-10-30 0:37 ` [PATCH v3 02/46] perf/x86/intel: rename CQM cpufeatures to CMT David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 03/46] x86/intel: add CONFIG_INTEL_RDT_M configuration flag David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 04/46] perf/x86/intel/cmt: add device initialization and CPU hotplug support David Carrillo-Cisneros
2016-11-10 15:19 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 05/46] perf/x86/intel/cmt: add per-package locks David Carrillo-Cisneros
2016-11-10 21:23 ` Thomas Gleixner
2016-11-11 2:22 ` David Carrillo-Cisneros
2016-11-11 7:21 ` Peter Zijlstra
2016-11-11 7:32 ` Ingo Molnar
2016-11-11 9:41 ` Thomas Gleixner
2016-11-11 17:21 ` David Carrillo-Cisneros
2016-11-13 10:58 ` Thomas Gleixner
2016-11-15 4:53 ` David Carrillo-Cisneros
2016-11-16 19:00 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 06/46] perf/x86/intel/cmt: add intel_cmt pmu David Carrillo-Cisneros
2016-11-10 21:27 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 07/46] perf/core: add RDT Monitoring attributes to struct hw_perf_event David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 08/46] perf/x86/intel/cmt: add MONitored Resource (monr) initialization David Carrillo-Cisneros
2016-11-10 23:09 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 09/46] perf/x86/intel/cmt: add basic monr hierarchy David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 10/46] perf/x86/intel/cmt: add Package MONitored Resource (pmonr) initialization David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 11/46] perf/x86/intel/cmt: add cmt_user_flags (uflags) to monr David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 12/46] perf/x86/intel/cmt: add per-package rmid pools David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 13/46] perf/x86/intel/cmt: add pmonr's Off and Unused states David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 14/46] perf/x86/intel/cmt: add Active and Dep_{Idle, Dirty} states David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 15/46] perf/x86/intel: encapsulate rmid and closid updates in pqr cache David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 16/46] perf/x86/intel/cmt: set sched rmid and complete pmu start/stop/add/del David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 17/46] perf/x86/intel/cmt: add uflag CMT_UF_NOLAZY_RMID David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 18/46] perf/core: add arch_info field to struct perf_cgroup David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 19/46] perf/x86/intel/cmt: add support for cgroup events David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 20/46] perf/core: add pmu::event_terminate David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 21/46] perf/x86/intel/cmt: use newly introduced event_terminate David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 22/46] perf/x86/intel/cmt: sync cgroups and intel_cmt device start/stop David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 23/46] perf/core: hooks to add architecture specific features in perf_cgroup David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 24/46] perf/x86/intel/cmt: add perf_cgroup_arch_css_{online,offline} David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 25/46] perf/x86/intel/cmt: add monr->flags and CMT_MONR_ZOMBIE David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 26/46] sched: introduce the finish_arch_pre_lock_switch() scheduler hook David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 27/46] perf/x86/intel: add pqr cache flags and intel_pqr_ctx_switch David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 28/46] perf,perf/x86,perf/powerpc,perf/arm,perf/*: add int error return to pmu::read David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 29/46] perf/x86/intel/cmt: add error handling to intel_cmt_event_read David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 30/46] perf/x86/intel/cmt: add asynchronous read for task events David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 31/46] perf/x86/intel/cmt: add subtree read for cgroup events David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 32/46] perf/core: Add PERF_EV_CAP_READ_ANY_{CPU_,}PKG flags David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 33/46] perf/x86/intel/cmt: use PERF_EV_CAP_READ_{,CPU_}PKG flags in Intel cmt David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 34/46] perf/core: introduce PERF_EV_CAP_CGROUP_NO_RECURSION David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 35/46] perf/x86/intel/cmt: use PERF_EV_CAP_CGROUP_NO_RECURSION in intel_cmt David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 36/46] perf/core: add perf_event cgroup hooks for subsystem attributes David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 37/46] perf/x86/intel/cmt: add cont_monitoring to perf cgroup David Carrillo-Cisneros
2016-10-30 0:38 ` David Carrillo-Cisneros [this message]
2016-10-30 0:38 ` [PATCH v3 39/46] perf/x86/intel/cmt: add max_recycle_threshold sysfs attribute David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 40/46] perf/x86/intel/cmt: add rotation scheduled work David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 41/46] perf/x86/intel/cmt: add rotation minimum progress SLO David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 42/46] perf/x86/intel/cmt: add rmid stealing David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 43/46] perf/x86/intel/cmt: add CMT_UF_NOSTEAL_RMID flag David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 44/46] perf/x86/intel/cmt: add debugfs intel_cmt directory David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 45/46] perf/stat: fix bug in handling events in error state David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 46/46] perf/stat: revamp read error handling, snapshot and per_pkg events David Carrillo-Cisneros
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1477787923-61185-39-git-send-email-davidcc@google.com \
--to=davidcc@google.com \
--cc=ak@linux.intel.com \
--cc=bp@suse.de \
--cc=eranian@google.com \
--cc=fenghua.yu@intel.com \
--cc=kan.liang@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mtosatti@redhat.com \
--cc=nilayvaish@gmail.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=ravi.v.shankar@intel.com \
--cc=tglx@linutronix.de \
--cc=vegard.nossum@gmail.com \
--cc=vikas.shivappa@linux.intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).