All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, tj@kernel.org,
	peterz@infradead.org, matt.fleming@intel.com,
	will.auld@intel.com, glenn.p.williamson@intel.com,
	kanaka.d.juvva@intel.com, vikas.shivappa@linux.intel.com
Subject: [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management
Date: Thu,  6 Aug 2015 14:55:14 -0700	[thread overview]
Message-ID: <1438898117-3692-7-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1438898117-3692-1-git-send-email-vikas.shivappa@linux.intel.com>

Adds a file l3_cbm to the intel_rdt cgroup which represents the cache
capacity bit mask for the cgroup. The tasks in the cgroup would get to
fill the L3 cache represented by the cgroup's l3_cbm file. The bit mask
may map to ways in the cache but could be hardware implementation
specific.

The l3_cbm would represent one of IA32_L3_MASK_n MSRs, there by any
updates to the l3_cbm end up in an MSR write to the appropriate
IA32_L3_MASK_n. The IA32_L3_MASK_n MSRs are per package but the l3_cbm
represents the global value of the MSR on all packages.

When a child cgroup is created it inherits the CLOSid and the l3_cbm
from its parent. When a user changes the default l3_cbm for a cgroup, a
new CLOSid may be allocated if the l3_cbm was not used before. If the
new l3_cbm is the one that is already used, the count for that CLOSid
<-> l3_cbm is incremented. The changing of 'l3_cbm' may fail with
-ENOSPC once the kernel runs out of maximum CLOSids it can support.

User can create as many cgroups as he wants, but having different l3_cbm
at the same time is restricted by the maximum number of CLOSids. Kernel
maintains a CLOSid <-> l3_cbm mapping which keeps count of cgroups using
a CLOSid.

Reuse of CLOSids for cgroups with same bitmask also has following
advantages:
 - This helps to use the scant CLOSids optimally.
 - This also implies that during context switch, write to PQR-MSR is
 done only when a task with a different bitmask is scheduled in.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
 arch/x86/include/asm/intel_rdt.h |   3 +
 arch/x86/kernel/cpu/intel_rdt.c  | 202 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index a887004..58bac91 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,6 +4,9 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+#define MAX_CBM_LENGTH			32
+#define IA32_L3_CBM_BASE		0xc90
+#define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
 
 struct rdt_subsys_info {
 	unsigned long *closmap;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 52e1fd6..115f136 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -34,6 +34,13 @@ static struct clos_cbm_table *cctable;
 static struct rdt_subsys_info rdtss_info;
 static DEFINE_MUTEX(rdt_group_mutex);
 struct intel_rdt rdt_root_group;
+/*
+ * Mask of CPUs for writing CBM values. We only need one CPU per-socket.
+ */
+static cpumask_t rdt_cpumask;
+
+#define rdt_for_each_child(pos_css, parent_ir)		\
+	css_for_each_child((pos_css), &(parent_ir)->css)
 
 static inline void closid_get(u32 closid)
 {
@@ -117,12 +124,192 @@ static void intel_rdt_css_free(struct cgroup_subsys_state *css)
 	mutex_unlock(&rdt_group_mutex);
 }
 
+static int intel_cache_alloc_cbm_read(struct seq_file *m, void *v)
+{
+	struct intel_rdt *ir = css_rdt(seq_css(m));
+
+	seq_printf(m, "%08lx\n", cctable[ir->closid].l3_cbm);
+
+	return 0;
+}
+
+static inline bool cbm_is_contiguous(unsigned long var)
+{
+	unsigned long maxcbm = MAX_CBM_LENGTH;
+	unsigned long first_bit, zero_bit;
+
+	if (!var)
+		return false;
+
+	first_bit = find_first_bit(&var, maxcbm);
+	zero_bit = find_next_zero_bit(&var, maxcbm, first_bit);
+
+	if (find_next_bit(&var, maxcbm, zero_bit) < maxcbm)
+		return false;
+
+	return true;
+}
+
+static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue)
+{
+	struct cgroup_subsys_state *css;
+	struct intel_rdt *par, *c;
+	unsigned long *cbm_tmp;
+	int err = 0;
+
+	if (!cbm_is_contiguous(cbmvalue)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	par = parent_rdt(ir);
+	cbm_tmp = &cctable[par->closid].l3_cbm;
+	if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rcu_read_lock();
+	rdt_for_each_child(css, ir) {
+		c = css_rdt(css);
+		cbm_tmp = &cctable[c->closid].l3_cbm;
+		if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) {
+			rcu_read_unlock();
+			err = -EINVAL;
+			goto out_err;
+		}
+	}
+	rcu_read_unlock();
+out_err:
+
+	return err;
+}
+
+static bool cbm_search(unsigned long cbm, u32 *closid)
+{
+	u32 maxid = boot_cpu_data.x86_cache_max_closid;
+	u32 i;
+
+	for (i = 0; i < maxid; i++) {
+		if (bitmap_equal(&cbm, &cctable[i].l3_cbm, MAX_CBM_LENGTH)) {
+			*closid = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void closcbm_map_dump(void)
+{
+	u32 i;
+
+	pr_debug("CBMMAP\n");
+	for (i = 0; i < boot_cpu_data.x86_cache_max_closid; i++) {
+		pr_debug("l3_cbm: 0x%x,clos_refcnt: %u\n",
+		 (unsigned int)cctable[i].l3_cbm, cctable[i].clos_refcnt);
+	}
+}
+
+static void cbm_cpu_update(void *info)
+{
+	u32 closid = (u32) info;
+
+	wrmsrl(CBM_FROM_INDEX(closid), cctable[closid].l3_cbm);
+}
+
+/*
+ * cbm_update_all() - Update the cache bit mask for all packages.
+ */
+static inline void cbm_update_all(u32 closid)
+{
+	on_each_cpu_mask(&rdt_cpumask, cbm_cpu_update, (void *)closid, 1);
+}
+
+/*
+ * intel_cache_alloc_cbm_write() - Validates and writes the
+ * cache bit mask(cbm) to the IA32_L3_MASK_n
+ * and also store the same in the cctable.
+ *
+ * CLOSids are reused for cgroups which have same bitmask.
+ * This helps to use the scant CLOSids optimally. This also
+ * implies that at context switch write to PQR-MSR is done
+ * only when a task with a different bitmask is scheduled in.
+ */
+static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css,
+				 struct cftype *cft, u64 cbmvalue)
+{
+	u32 max_cbm = boot_cpu_data.x86_cache_max_cbm_len;
+	struct intel_rdt *ir = css_rdt(css);
+	u64 max_mask;
+	int err = 0;
+	u32 closid;
+
+	if (ir == &rdt_root_group)
+		return -EPERM;
+
+	/*
+	 * Need global mutex as cbm write may allocate a closid.
+	 */
+	mutex_lock(&rdt_group_mutex);
+
+	max_mask = (1ULL << max_cbm) - 1;
+	if (cbmvalue & ~max_mask) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (cbmvalue == cctable[ir->closid].l3_cbm)
+		goto out;
+
+	err = cbm_validate(ir, cbmvalue);
+	if (err)
+		goto out;
+
+	/*
+	 * Try to get a reference for a different CLOSid and release the
+	 * reference to the current CLOSid.
+	 * Need to put down the reference here and get it back in case we
+	 * run out of closids. Otherwise we run into a problem when
+	 * we could be using the last closid that could have been available.
+	 */
+	closid_put(ir->closid);
+	if (cbm_search(cbmvalue, &closid)) {
+		ir->closid = closid;
+		closid_get(closid);
+	} else {
+		closid = ir->closid;
+		err = closid_alloc(ir);
+		if (err) {
+			closid_get(ir->closid);
+			goto out;
+		}
+
+		cctable[ir->closid].l3_cbm = cbmvalue;
+		cbm_update_all(ir->closid);
+	}
+	closcbm_map_dump();
+out:
+	mutex_unlock(&rdt_group_mutex);
+
+	return err;
+}
+
+static inline void rdt_cpumask_update(int cpu)
+{
+	static cpumask_t tmp;
+
+	cpumask_and(&tmp, &rdt_cpumask, topology_core_cpumask(cpu));
+	if (cpumask_empty(&tmp))
+		cpumask_set_cpu(cpu, &rdt_cpumask);
+}
+
 static int __init intel_rdt_late_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	static struct clos_cbm_table *cct;
 	u32 maxid, max_cbm_len;
-	int err = 0, size;
+	int err = 0, size, i;
 
 	if (!cpu_has(c, X86_FEATURE_CAT_L3)) {
 		rdt_root_group.css.ss->disabled = 1;
@@ -152,6 +339,9 @@ static int __init intel_rdt_late_init(void)
 	cct->l3_cbm = (1ULL << max_cbm_len) - 1;
 	cct->clos_refcnt = 1;
 
+	for_each_online_cpu(i)
+		rdt_cpumask_update(i);
+
 	pr_info("Intel cache allocation enabled\n");
 out_err:
 
@@ -160,8 +350,18 @@ out_err:
 
 late_initcall(intel_rdt_late_init);
 
+static struct cftype rdt_files[] = {
+	{
+		.name		= "l3_cbm",
+		.seq_show	= intel_cache_alloc_cbm_read,
+		.write_u64	= intel_cache_alloc_cbm_write,
+	},
+	{ }	/* terminate */
+};
+
 struct cgroup_subsys intel_rdt_cgrp_subsys = {
 	.css_alloc		= intel_rdt_css_alloc,
 	.css_free		= intel_rdt_css_free,
+	.legacy_cftypes		= rdt_files,
 	.early_init		= 0,
 };
-- 
1.9.1


  parent reply	other threads:[~2015-08-06 21:55 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-06 21:55 [PATCH V13 0/9] Intel cache allocation and Hot cpu handling changes to cqm, rapl Vikas Shivappa
2015-08-06 21:55 ` [PATCH 1/9] x86/intel_cqm: Modify hot cpu notification handling Vikas Shivappa
2015-08-06 21:55 ` [PATCH 2/9] x86/intel_rapl: " Vikas Shivappa
2015-08-06 21:55 ` [PATCH 3/9] x86/intel_rdt: Cache Allocation documentation and cgroup usage guide Vikas Shivappa
2015-08-06 21:55 ` [PATCH 4/9] x86/intel_rdt: Add support for Cache Allocation detection Vikas Shivappa
2015-08-06 21:55 ` [PATCH 5/9] x86/intel_rdt: Add new cgroup and Class of service management Vikas Shivappa
2015-08-06 21:55 ` Vikas Shivappa [this message]
2015-08-06 21:55 ` [PATCH 7/9] x86/intel_rdt: Implement scheduling support for Intel RDT Vikas Shivappa
2015-08-06 23:03   ` Andy Lutomirski
2015-08-07 18:52     ` Vikas Shivappa
2015-08-06 21:55 ` [PATCH 8/9] x86/intel_rdt: Hot cpu support for Cache Allocation Vikas Shivappa
2015-08-06 21:55 ` [PATCH 9/9] x86/intel_rdt: Intel haswell Cache Allocation enumeration Vikas Shivappa
  -- strict thread matches above, loose matches on Subject: below --
2015-07-01 22:21 [PATCH V12 0/9] Hot cpu handling changes to cqm, rapl and Intel Cache Allocation support Vikas Shivappa
2015-07-01 22:21 ` [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-07-28 16:35   ` Peter Zijlstra
2015-07-28 22:08     ` Vikas Shivappa
2015-07-28 16:37   ` Peter Zijlstra
2015-07-30 17:54     ` Vikas Shivappa
2015-06-25 19:25 [PATCH V11 0/9] Hot cpu handling changes to cqm,rapl and Intel Cache Allocation support Vikas Shivappa
2015-06-25 19:25 ` [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438898117-3692-7-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=glenn.p.williamson@intel.com \
    --cc=hpa@zytor.com \
    --cc=kanaka.d.juvva@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=vikas.shivappa@intel.com \
    --cc=will.auld@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.