linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Carrillo-Cisneros <davidcc@google.com>
To: linux-kernel@vger.kernel.org
Cc: "x86@kernel.org" <x86@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Andi Kleen <ak@linux.intel.com>, Kan Liang <kan.liang@intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Vegard Nossum <vegard.nossum@gmail.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Nilay Vaish <nilayvaish@gmail.com>, Borislav Petkov <bp@suse.de>,
	Vikas Shivappa <vikas.shivappa@linux.intel.com>,
	Ravi V Shankar <ravi.v.shankar@intel.com>,
	Fenghua Yu <fenghua.yu@intel.com>, Paul Turner <pjt@google.com>,
	Stephane Eranian <eranian@google.com>,
	David Carrillo-Cisneros <davidcc@google.com>
Subject: [PATCH v3 04/46] perf/x86/intel/cmt: add device initialization and CPU hotplug support
Date: Sat, 29 Oct 2016 17:38:01 -0700	[thread overview]
Message-ID: <1477787923-61185-5-git-send-email-davidcc@google.com> (raw)
In-Reply-To: <1477787923-61185-1-git-send-email-davidcc@google.com>

Introduce struct pkg_data to store per CPU package locks and data for
new CMT driver.

Each pkg_data is initialiazed/terminated on demand when the first/last CPU
in its package goes online/offline.

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
---
 arch/x86/events/intel/Makefile |   1 +
 arch/x86/events/intel/cmt.c    | 268 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/intel/cmt.h    |  29 +++++
 include/linux/cpuhotplug.h     |   2 +
 4 files changed, 300 insertions(+)
 create mode 100644 arch/x86/events/intel/cmt.c
 create mode 100644 arch/x86/events/intel/cmt.h

diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index e9d8520..02fecbc 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
 intel-uncore-objs			:= uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE)	+= intel-cstate.o
 intel-cstate-objs			:= cstate.o
+obj-$(CONFIG_INTEL_RDT_M)		+= cmt.o
diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
new file mode 100644
index 0000000..267a9ec
--- /dev/null
+++ b/arch/x86/events/intel/cmt.c
@@ -0,0 +1,268 @@
+/*
+ * Intel Cache Monitoring Technology (CMT) support.
+ */
+
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "cmt.h"
+#include "../perf_event.h"
+
+static DEFINE_MUTEX(cmt_mutex);
+
+static unsigned int cmt_l3_scale;	/* cmt hw units to bytes. */
+
+static unsigned int __min_max_rmid;	/* minimum max_rmid across all pkgs. */
+
+/* Array of packages (array of pkgds). It's protected by RCU or cmt_mutex. */
+static struct pkg_data **cmt_pkgs_data;
+
+/*
+ * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
+ * Otherwise next online pkg_data or NULL if no more.
+ */
+static struct pkg_data *cmt_pkgs_data_next_rcu(struct pkg_data *pkgd)
+{
+	u16 p, nr_pkgs = topology_max_packages();
+
+	if (!pkgd)
+		return rcu_dereference_check(cmt_pkgs_data[0],
+					     lockdep_is_held(&cmt_mutex));
+	p = pkgd->pkgid + 1;
+	pkgd = NULL;
+
+	while (!pkgd && p < nr_pkgs) {
+		pkgd = rcu_dereference_check(cmt_pkgs_data[p++],
+					     lockdep_is_held(&cmt_mutex));
+	}
+
+	return pkgd;
+}
+
+static void free_pkg_data(struct pkg_data *pkg_data)
+{
+	kfree(pkg_data);
+}
+
+/* Init pkg_data for @cpu 's package. */
+static struct pkg_data *alloc_pkg_data(int cpu)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct pkg_data *pkgd;
+	int numa_node = cpu_to_node(cpu);
+	u16 pkgid = topology_logical_package_id(cpu);
+
+	if (c->x86_cache_occ_scale != cmt_l3_scale) {
+		/* 0 scale must have been converted to 1 automatically. */
+		if (c->x86_cache_occ_scale || cmt_l3_scale != 1) {
+			pr_err("Multiple LLC scale values, disabling CMT support.\n");
+			return ERR_PTR(-ENXIO);
+		}
+	}
+
+	pkgd = kzalloc_node(sizeof(*pkgd), GFP_KERNEL, numa_node);
+	if (!pkgd)
+		return ERR_PTR(-ENOMEM);
+
+	pkgd->max_rmid = c->x86_cache_max_rmid;
+
+	pkgd->work_cpu = cpu;
+	pkgd->pkgid = pkgid;
+
+	__min_max_rmid = min(__min_max_rmid, pkgd->max_rmid);
+
+	return pkgd;
+}
+
+static void __terminate_pkg_data(struct pkg_data *pkgd)
+{
+	lockdep_assert_held(&cmt_mutex);
+
+	free_pkg_data(pkgd);
+}
+
+static int init_pkg_data(int cpu)
+{
+	struct pkg_data *pkgd;
+	u16 pkgid = topology_logical_package_id(cpu);
+
+	lockdep_assert_held(&cmt_mutex);
+
+	/* Verify that this pkgid isn't already initialized. */
+	if (WARN_ON_ONCE(cmt_pkgs_data[pkgid]))
+		return -EPERM;
+
+	pkgd = alloc_pkg_data(cpu);
+	if (IS_ERR(pkgd))
+		return PTR_ERR(pkgd);
+
+	rcu_assign_pointer(cmt_pkgs_data[pkgid], pkgd);
+	synchronize_rcu();
+
+	return 0;
+}
+
+static int intel_cmt_hp_online_enter(unsigned int cpu)
+{
+	struct pkg_data *pkgd;
+	u16 pkgid = topology_logical_package_id(cpu);
+
+	rcu_read_lock();
+	pkgd = rcu_dereference(cmt_pkgs_data[pkgid]);
+	if (pkgd->work_cpu >= nr_cpu_ids)
+		pkgd->work_cpu = cpu;
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int intel_cmt_hp_online_exit(unsigned int cpu)
+{
+	struct pkg_data *pkgd;
+	u16 pkgid = topology_logical_package_id(cpu);
+
+	rcu_read_lock();
+	pkgd = rcu_dereference(cmt_pkgs_data[pkgid]);
+	if (pkgd->work_cpu == cpu)
+		pkgd->work_cpu = cpumask_any_but(
+				topology_core_cpumask(cpu), cpu);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int intel_cmt_prep_up(unsigned int cpu)
+{
+	struct pkg_data *pkgd;
+	int err = 0;
+	u16 pkgid = topology_logical_package_id(cpu);
+
+	mutex_lock(&cmt_mutex);
+	pkgd = rcu_dereference_protected(cmt_pkgs_data[pkgid],
+					 lockdep_is_held(&cmt_mutex));
+	if (!pkgd)
+		err = init_pkg_data(cpu);
+	mutex_unlock(&cmt_mutex);
+
+	return err;
+}
+
+static int intel_cmt_prep_down(unsigned int cpu)
+{
+	struct pkg_data *pkgd;
+	u16 pkgid = topology_logical_package_id(cpu);
+
+	mutex_lock(&cmt_mutex);
+	pkgd = rcu_dereference_protected(cmt_pkgs_data[pkgid],
+					 lockdep_is_held(&cmt_mutex));
+	if (pkgd->work_cpu >= nr_cpu_ids) {
+		/* will destroy pkgd */
+		__terminate_pkg_data(pkgd);
+		RCU_INIT_POINTER(cmt_pkgs_data[pkgid], NULL);
+		synchronize_rcu();
+	}
+	mutex_unlock(&cmt_mutex);
+
+	return 0;
+}
+
+static const struct x86_cpu_id intel_cmt_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CMT_OCCUP_LLC },
+	{}
+};
+
+static void cmt_dealloc(void)
+{
+	kfree(cmt_pkgs_data);
+	cmt_pkgs_data = NULL;
+}
+
+static int __init cmt_alloc(void)
+{
+	cmt_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+	if (cmt_l3_scale == 0)
+		cmt_l3_scale = 1;
+
+	cmt_pkgs_data = kcalloc(topology_max_packages(),
+				sizeof(*cmt_pkgs_data), GFP_KERNEL);
+	if (!cmt_pkgs_data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int __init cmt_start(void)
+{
+	char *str, scale[20];
+	int err;
+
+	/* will be modified by init_pkg_data() in intel_cmt_prep_up(). */
+	__min_max_rmid = UINT_MAX;
+	err = cpuhp_setup_state(CPUHP_PERF_X86_CMT_PREP,
+				"PERF_X86_CMT_PREP",
+				intel_cmt_prep_up,
+				intel_cmt_prep_down);
+	if (err)
+		return err;
+
+	err = cpuhp_setup_state(CPUHP_AP_PERF_X86_CMT_ONLINE,
+				"AP_PERF_X86_CMT_ONLINE",
+				intel_cmt_hp_online_enter,
+				intel_cmt_hp_online_exit);
+	if (err)
+		goto rm_prep;
+
+	snprintf(scale, sizeof(scale), "%u", cmt_l3_scale);
+	str = kstrdup(scale, GFP_KERNEL);
+	if (!str) {
+		err = -ENOMEM;
+		goto rm_online;
+	}
+
+	return 0;
+
+rm_online:
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_CMT_ONLINE);
+rm_prep:
+	cpuhp_remove_state(CPUHP_PERF_X86_CMT_PREP);
+
+	return err;
+}
+
+static int __init intel_cmt_init(void)
+{
+	struct pkg_data *pkgd = NULL;
+	int err = 0;
+
+	if (!x86_match_cpu(intel_cmt_match)) {
+		err = -ENODEV;
+		goto err_exit;
+	}
+
+	err = cmt_alloc();
+	if (err)
+		goto err_exit;
+
+	err = cmt_start();
+	if (err)
+		goto err_dealloc;
+
+	pr_info("Intel CMT enabled with ");
+	rcu_read_lock();
+	while ((pkgd = cmt_pkgs_data_next_rcu(pkgd))) {
+		pr_cont("%d RMIDs for pkg %d, ",
+			pkgd->max_rmid + 1, pkgd->pkgid);
+	}
+	rcu_read_unlock();
+	pr_cont("and l3 scale of %d KBs.\n", cmt_l3_scale);
+
+	return err;
+
+err_dealloc:
+	cmt_dealloc();
+err_exit:
+	pr_err("Intel CMT registration failed with error: %d\n", err);
+	return err;
+}
+
+device_initcall(intel_cmt_init);
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
new file mode 100644
index 0000000..8c16797
--- /dev/null
+++ b/arch/x86/events/intel/cmt.h
@@ -0,0 +1,29 @@
+/*
+ * Intel Monitoring Technology (CMT) support.
+ * (formerly Intel Cache QoS Monitoring, CQM)
+ *
+ *
+ * Locking
+ *
+ * One global cmt_mutex. One mutex and spin_lock per package.
+ * cmt_pkgs_data is RCU protected.
+ *
+ * Rules:
+ *  - cmt_mutex: Hold for CMT init/terminate, event init/terminate,
+ *  cgroup start/stop.
+ */
+
+/**
+ * struct pkg_data - Per-package CMT data.
+ *
+ * @work_cpu:			CPU to run rotation and other batch jobs.
+ *				It must be in the package associated to its
+ *				instance of pkg_data.
+ * @max_rmid:			Max rmid valid for CPUs in this package.
+ * @pkgid:			The logical package id for this pkgd.
+ */
+struct pkg_data {
+	unsigned int		work_cpu;
+	u32			max_rmid;
+	u16			pkgid;
+};
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 320a3be..604660a 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -11,6 +11,7 @@ enum cpuhp_state {
 	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
 	CPUHP_PERF_X86_RAPL_PREP,
+	CPUHP_PERF_X86_CMT_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
 	CPUHP_PERF_SUPERH,
@@ -110,6 +111,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
 	CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
 	CPUHP_AP_PERF_X86_RAPL_ONLINE,
+	CPUHP_AP_PERF_X86_CMT_ONLINE,
 	CPUHP_AP_PERF_X86_CSTATE_ONLINE,
 	CPUHP_AP_PERF_S390_CF_ONLINE,
 	CPUHP_AP_PERF_S390_SF_ONLINE,
-- 
2.8.0.rc3.226.g39d4020

  parent reply	other threads:[~2016-10-30  0:40 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-30  0:37 [PATCH v3 00/46] Cache Monitoring Technology (aka CQM) David Carrillo-Cisneros
2016-10-30  0:37 ` [PATCH v3 01/46] perf/x86/intel/cqm: remove previous version of CQM and MBM David Carrillo-Cisneros
2016-10-30  0:37 ` [PATCH v3 02/46] perf/x86/intel: rename CQM cpufeatures to CMT David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 03/46] x86/intel: add CONFIG_INTEL_RDT_M configuration flag David Carrillo-Cisneros
2016-10-30  0:38 ` David Carrillo-Cisneros [this message]
2016-11-10 15:19   ` [PATCH v3 04/46] perf/x86/intel/cmt: add device initialization and CPU hotplug support Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 05/46] perf/x86/intel/cmt: add per-package locks David Carrillo-Cisneros
2016-11-10 21:23   ` Thomas Gleixner
2016-11-11  2:22     ` David Carrillo-Cisneros
2016-11-11  7:21       ` Peter Zijlstra
2016-11-11  7:32         ` Ingo Molnar
2016-11-11  9:41         ` Thomas Gleixner
2016-11-11 17:21           ` David Carrillo-Cisneros
2016-11-13 10:58             ` Thomas Gleixner
2016-11-15  4:53         ` David Carrillo-Cisneros
2016-11-16 19:00           ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 06/46] perf/x86/intel/cmt: add intel_cmt pmu David Carrillo-Cisneros
2016-11-10 21:27   ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 07/46] perf/core: add RDT Monitoring attributes to struct hw_perf_event David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 08/46] perf/x86/intel/cmt: add MONitored Resource (monr) initialization David Carrillo-Cisneros
2016-11-10 23:09   ` Thomas Gleixner
2016-10-30  0:38 ` [PATCH v3 09/46] perf/x86/intel/cmt: add basic monr hierarchy David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 10/46] perf/x86/intel/cmt: add Package MONitored Resource (pmonr) initialization David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 11/46] perf/x86/intel/cmt: add cmt_user_flags (uflags) to monr David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 12/46] perf/x86/intel/cmt: add per-package rmid pools David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 13/46] perf/x86/intel/cmt: add pmonr's Off and Unused states David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 14/46] perf/x86/intel/cmt: add Active and Dep_{Idle, Dirty} states David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 15/46] perf/x86/intel: encapsulate rmid and closid updates in pqr cache David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 16/46] perf/x86/intel/cmt: set sched rmid and complete pmu start/stop/add/del David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 17/46] perf/x86/intel/cmt: add uflag CMT_UF_NOLAZY_RMID David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 18/46] perf/core: add arch_info field to struct perf_cgroup David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 19/46] perf/x86/intel/cmt: add support for cgroup events David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 20/46] perf/core: add pmu::event_terminate David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 21/46] perf/x86/intel/cmt: use newly introduced event_terminate David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 22/46] perf/x86/intel/cmt: sync cgroups and intel_cmt device start/stop David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 23/46] perf/core: hooks to add architecture specific features in perf_cgroup David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 24/46] perf/x86/intel/cmt: add perf_cgroup_arch_css_{online,offline} David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 25/46] perf/x86/intel/cmt: add monr->flags and CMT_MONR_ZOMBIE David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 26/46] sched: introduce the finish_arch_pre_lock_switch() scheduler hook David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 27/46] perf/x86/intel: add pqr cache flags and intel_pqr_ctx_switch David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 28/46] perf,perf/x86,perf/powerpc,perf/arm,perf/*: add int error return to pmu::read David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 29/46] perf/x86/intel/cmt: add error handling to intel_cmt_event_read David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 30/46] perf/x86/intel/cmt: add asynchronous read for task events David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 31/46] perf/x86/intel/cmt: add subtree read for cgroup events David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 32/46] perf/core: Add PERF_EV_CAP_READ_ANY_{CPU_,}PKG flags David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 33/46] perf/x86/intel/cmt: use PERF_EV_CAP_READ_{,CPU_}PKG flags in Intel cmt David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 34/46] perf/core: introduce PERF_EV_CAP_CGROUP_NO_RECURSION David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 35/46] perf/x86/intel/cmt: use PERF_EV_CAP_CGROUP_NO_RECURSION in intel_cmt David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 36/46] perf/core: add perf_event cgroup hooks for subsystem attributes David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 37/46] perf/x86/intel/cmt: add cont_monitoring to perf cgroup David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 38/46] perf/x86/intel/cmt: introduce read SLOs for rotation David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 39/46] perf/x86/intel/cmt: add max_recycle_threshold sysfs attribute David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 40/46] perf/x86/intel/cmt: add rotation scheduled work David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 41/46] perf/x86/intel/cmt: add rotation minimum progress SLO David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 42/46] perf/x86/intel/cmt: add rmid stealing David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 43/46] perf/x86/intel/cmt: add CMT_UF_NOSTEAL_RMID flag David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 44/46] perf/x86/intel/cmt: add debugfs intel_cmt directory David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 45/46] perf/stat: fix bug in handling events in error state David Carrillo-Cisneros
2016-10-30  0:38 ` [PATCH v3 46/46] perf/stat: revamp read error handling, snapshot and per_pkg events David Carrillo-Cisneros

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1477787923-61185-5-git-send-email-davidcc@google.com \
    --to=davidcc@google.com \
    --cc=ak@linux.intel.com \
    --cc=bp@suse.de \
    --cc=eranian@google.com \
    --cc=fenghua.yu@intel.com \
    --cc=kan.liang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=nilayvaish@gmail.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=vegard.nossum@gmail.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).