From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, hpa@zytor.com,
tglx@linutronix.de, mingo@kernel.org, tj@kernel.org,
peterz@infradead.org, matt.fleming@intel.com,
will.auld@intel.com, glenn.p.williamson@intel.com,
kanaka.d.juvva@intel.com, vikas.shivappa@linux.intel.com
Subject: [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management
Date: Thu, 6 Aug 2015 14:55:14 -0700 [thread overview]
Message-ID: <1438898117-3692-7-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1438898117-3692-1-git-send-email-vikas.shivappa@linux.intel.com>
Adds a file l3_cbm to the intel_rdt cgroup which represents the cache
capacity bit mask for the cgroup. The tasks in the cgroup would get to
fill the L3 cache represented by the cgroup's l3_cbm file. The bit mask
may map to ways in the cache but could be hardware implementation
specific.
The l3_cbm would represent one of IA32_L3_MASK_n MSRs, there by any
updates to the l3_cbm end up in an MSR write to the appropriate
IA32_L3_MASK_n. The IA32_L3_MASK_n MSRs are per package but the l3_cbm
represents the global value of the MSR on all packages.
When a child cgroup is created it inherits the CLOSid and the l3_cbm
from its parent. When a user changes the default l3_cbm for a cgroup, a
new CLOSid may be allocated if the l3_cbm was not used before. If the
new l3_cbm is the one that is already used, the count for that CLOSid
<-> l3_cbm is incremented. The changing of 'l3_cbm' may fail with
-ENOSPC once the kernel runs out of maximum CLOSids it can support.
User can create as many cgroups as he wants, but having different l3_cbm
at the same time is restricted by the maximum number of CLOSids. Kernel
maintains a CLOSid <-> l3_cbm mapping which keeps count of cgroups using
a CLOSid.
Reuse of CLOSids for cgroups with same bitmask also has following
advantages:
- This helps to use the scant CLOSids optimally.
- This also implies that during context switch, write to PQR-MSR is
done only when a task with a different bitmask is scheduled in.
Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
arch/x86/include/asm/intel_rdt.h | 3 +
arch/x86/kernel/cpu/intel_rdt.c | 202 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 204 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index a887004..58bac91 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,6 +4,9 @@
#ifdef CONFIG_CGROUP_RDT
#include <linux/cgroup.h>
+#define MAX_CBM_LENGTH 32
+#define IA32_L3_CBM_BASE 0xc90
+#define CBM_FROM_INDEX(x) (IA32_L3_CBM_BASE + x)
struct rdt_subsys_info {
unsigned long *closmap;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 52e1fd6..115f136 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -34,6 +34,13 @@ static struct clos_cbm_table *cctable;
static struct rdt_subsys_info rdtss_info;
static DEFINE_MUTEX(rdt_group_mutex);
struct intel_rdt rdt_root_group;
+/*
+ * Mask of CPUs for writing CBM values. We only need one CPU per-socket.
+ */
+static cpumask_t rdt_cpumask;
+
+#define rdt_for_each_child(pos_css, parent_ir) \
+ css_for_each_child((pos_css), &(parent_ir)->css)
static inline void closid_get(u32 closid)
{
@@ -117,12 +124,192 @@ static void intel_rdt_css_free(struct cgroup_subsys_state *css)
mutex_unlock(&rdt_group_mutex);
}
+static int intel_cache_alloc_cbm_read(struct seq_file *m, void *v)
+{
+ struct intel_rdt *ir = css_rdt(seq_css(m));
+
+ seq_printf(m, "%08lx\n", cctable[ir->closid].l3_cbm);
+
+ return 0;
+}
+
+static inline bool cbm_is_contiguous(unsigned long var)
+{
+ unsigned long maxcbm = MAX_CBM_LENGTH;
+ unsigned long first_bit, zero_bit;
+
+ if (!var)
+ return false;
+
+ first_bit = find_first_bit(&var, maxcbm);
+ zero_bit = find_next_zero_bit(&var, maxcbm, first_bit);
+
+ if (find_next_bit(&var, maxcbm, zero_bit) < maxcbm)
+ return false;
+
+ return true;
+}
+
+static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue)
+{
+ struct cgroup_subsys_state *css;
+ struct intel_rdt *par, *c;
+ unsigned long *cbm_tmp;
+ int err = 0;
+
+ if (!cbm_is_contiguous(cbmvalue)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ par = parent_rdt(ir);
+ cbm_tmp = &cctable[par->closid].l3_cbm;
+ if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ rcu_read_lock();
+ rdt_for_each_child(css, ir) {
+ c = css_rdt(css);
+ cbm_tmp = &cctable[c->closid].l3_cbm;
+ if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) {
+ rcu_read_unlock();
+ err = -EINVAL;
+ goto out_err;
+ }
+ }
+ rcu_read_unlock();
+out_err:
+
+ return err;
+}
+
+static bool cbm_search(unsigned long cbm, u32 *closid)
+{
+ u32 maxid = boot_cpu_data.x86_cache_max_closid;
+ u32 i;
+
+ for (i = 0; i < maxid; i++) {
+ if (bitmap_equal(&cbm, &cctable[i].l3_cbm, MAX_CBM_LENGTH)) {
+ *closid = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void closcbm_map_dump(void)
+{
+ u32 i;
+
+ pr_debug("CBMMAP\n");
+ for (i = 0; i < boot_cpu_data.x86_cache_max_closid; i++) {
+ pr_debug("l3_cbm: 0x%x,clos_refcnt: %u\n",
+ (unsigned int)cctable[i].l3_cbm, cctable[i].clos_refcnt);
+ }
+}
+
+static void cbm_cpu_update(void *info)
+{
+ u32 closid = (u32) info;
+
+ wrmsrl(CBM_FROM_INDEX(closid), cctable[closid].l3_cbm);
+}
+
+/*
+ * cbm_update_all() - Update the cache bit mask for all packages.
+ */
+static inline void cbm_update_all(u32 closid)
+{
+ on_each_cpu_mask(&rdt_cpumask, cbm_cpu_update, (void *)closid, 1);
+}
+
+/*
+ * intel_cache_alloc_cbm_write() - Validates and writes the
+ * cache bit mask(cbm) to the IA32_L3_MASK_n
+ * and also store the same in the cctable.
+ *
+ * CLOSids are reused for cgroups which have same bitmask.
+ * This helps to use the scant CLOSids optimally. This also
+ * implies that at context switch write to PQR-MSR is done
+ * only when a task with a different bitmask is scheduled in.
+ */
+static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 cbmvalue)
+{
+ u32 max_cbm = boot_cpu_data.x86_cache_max_cbm_len;
+ struct intel_rdt *ir = css_rdt(css);
+ u64 max_mask;
+ int err = 0;
+ u32 closid;
+
+ if (ir == &rdt_root_group)
+ return -EPERM;
+
+ /*
+ * Need global mutex as cbm write may allocate a closid.
+ */
+ mutex_lock(&rdt_group_mutex);
+
+ max_mask = (1ULL << max_cbm) - 1;
+ if (cbmvalue & ~max_mask) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (cbmvalue == cctable[ir->closid].l3_cbm)
+ goto out;
+
+ err = cbm_validate(ir, cbmvalue);
+ if (err)
+ goto out;
+
+ /*
+ * Try to get a reference for a different CLOSid and release the
+ * reference to the current CLOSid.
+ * Need to put down the reference here and get it back in case we
+ * run out of closids. Otherwise we run into a problem when
+ * we could be using the last closid that could have been available.
+ */
+ closid_put(ir->closid);
+ if (cbm_search(cbmvalue, &closid)) {
+ ir->closid = closid;
+ closid_get(closid);
+ } else {
+ closid = ir->closid;
+ err = closid_alloc(ir);
+ if (err) {
+ closid_get(ir->closid);
+ goto out;
+ }
+
+ cctable[ir->closid].l3_cbm = cbmvalue;
+ cbm_update_all(ir->closid);
+ }
+ closcbm_map_dump();
+out:
+ mutex_unlock(&rdt_group_mutex);
+
+ return err;
+}
+
+static inline void rdt_cpumask_update(int cpu)
+{
+ static cpumask_t tmp;
+
+ cpumask_and(&tmp, &rdt_cpumask, topology_core_cpumask(cpu));
+ if (cpumask_empty(&tmp))
+ cpumask_set_cpu(cpu, &rdt_cpumask);
+}
+
static int __init intel_rdt_late_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
static struct clos_cbm_table *cct;
u32 maxid, max_cbm_len;
- int err = 0, size;
+ int err = 0, size, i;
if (!cpu_has(c, X86_FEATURE_CAT_L3)) {
rdt_root_group.css.ss->disabled = 1;
@@ -152,6 +339,9 @@ static int __init intel_rdt_late_init(void)
cct->l3_cbm = (1ULL << max_cbm_len) - 1;
cct->clos_refcnt = 1;
+ for_each_online_cpu(i)
+ rdt_cpumask_update(i);
+
pr_info("Intel cache allocation enabled\n");
out_err:
@@ -160,8 +350,18 @@ out_err:
late_initcall(intel_rdt_late_init);
+static struct cftype rdt_files[] = {
+ {
+ .name = "l3_cbm",
+ .seq_show = intel_cache_alloc_cbm_read,
+ .write_u64 = intel_cache_alloc_cbm_write,
+ },
+ { } /* terminate */
+};
+
struct cgroup_subsys intel_rdt_cgrp_subsys = {
.css_alloc = intel_rdt_css_alloc,
.css_free = intel_rdt_css_free,
+ .legacy_cftypes = rdt_files,
.early_init = 0,
};
--
1.9.1
next prev parent reply other threads:[~2015-08-06 21:55 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-06 21:55 [PATCH V13 0/9] Intel cache allocation and Hot cpu handling changes to cqm, rapl Vikas Shivappa
2015-08-06 21:55 ` [PATCH 1/9] x86/intel_cqm: Modify hot cpu notification handling Vikas Shivappa
2015-08-06 21:55 ` [PATCH 2/9] x86/intel_rapl: " Vikas Shivappa
2015-08-06 21:55 ` [PATCH 3/9] x86/intel_rdt: Cache Allocation documentation and cgroup usage guide Vikas Shivappa
2015-08-06 21:55 ` [PATCH 4/9] x86/intel_rdt: Add support for Cache Allocation detection Vikas Shivappa
2015-08-06 21:55 ` [PATCH 5/9] x86/intel_rdt: Add new cgroup and Class of service management Vikas Shivappa
2015-08-06 21:55 ` Vikas Shivappa [this message]
2015-08-06 21:55 ` [PATCH 7/9] x86/intel_rdt: Implement scheduling support for Intel RDT Vikas Shivappa
2015-08-06 23:03 ` Andy Lutomirski
2015-08-07 18:52 ` Vikas Shivappa
2015-08-06 21:55 ` [PATCH 8/9] x86/intel_rdt: Hot cpu support for Cache Allocation Vikas Shivappa
2015-08-06 21:55 ` [PATCH 9/9] x86/intel_rdt: Intel haswell Cache Allocation enumeration Vikas Shivappa
-- strict thread matches above, loose matches on Subject: below --
2015-07-01 22:21 [PATCH V12 0/9] Hot cpu handling changes to cqm, rapl and Intel Cache Allocation support Vikas Shivappa
2015-07-01 22:21 ` [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-07-28 16:35 ` Peter Zijlstra
2015-07-28 22:08 ` Vikas Shivappa
2015-07-28 16:37 ` Peter Zijlstra
2015-07-30 17:54 ` Vikas Shivappa
2015-06-25 19:25 [PATCH V11 0/9] Hot cpu handling changes to cqm,rapl and Intel Cache Allocation support Vikas Shivappa
2015-06-25 19:25 ` [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1438898117-3692-7-git-send-email-vikas.shivappa@linux.intel.com \
--to=vikas.shivappa@linux.intel.com \
--cc=glenn.p.williamson@intel.com \
--cc=hpa@zytor.com \
--cc=kanaka.d.juvva@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=matt.fleming@intel.com \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vikas.shivappa@intel.com \
--cc=will.auld@intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).