* [PATCH 1/2] powerpc/cacheinfo: Lookup cache by dt node and thread-group id
2021-01-19 7:36 [PATCH 0/2] powerpc/cacheinfo: Add "ibm,thread-groups" awareness Gautham R. Shenoy
@ 2021-01-19 7:36 ` Gautham R. Shenoy
2021-01-19 7:36 ` [PATCH 2/2] powerpc/cacheinfo: Remove the redundant get_shared_cpu_map() Gautham R. Shenoy
1 sibling, 0 replies; 3+ messages in thread
From: Gautham R. Shenoy @ 2021-01-19 7:36 UTC (permalink / raw)
To: Michael Ellerman, Nathan Lynch, Srikar Dronamraju
Cc: Gautham R. Shenoy, linuxppc-dev, linux-kernel
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Currently the cacheinfo code on powerpc indexes the "cache" objects
(modelling the L1/L2/L3 caches) where the key is device-tree node
corresponding to that cache. On some of the POWER server platforms
thread-groups within the core share different sets of caches (Eg: On
SMT8 POWER9 systems, threads 0,2,4,6 of a core share L1 cache and
threads 1,3,5,7 of the same core share another L1 cache). On such
platforms, there is a single device-tree node corresponding to that
cache and the cache-configuration within the threads of the core is
indicated via "ibm,thread-groups" device-tree property.
Since the current code is not aware of the "ibm,thread-groups"
property, on the aforementoined systems, cacheinfo code still treats
all the threads in the core to be sharing the cache because of the
single device-tree node (In the earlier example, the cacheinfo code
would says CPUs 0-7 share L1 cache).
In this patch, we make the powerpc cacheinfo code aware of the
"ibm,thread-groups" property. We indexe the "cache" objects by the
key-pair (device-tree node, thread-group id). For any CPUX, for a
given level of cache, the thread-group id is defined to be the first
CPU in the "ibm,thread-groups" cache-group containing CPUX. For levels
of cache which are not represented in "ibm,thread-groups" property,
the thread-group id is -1.
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/smp.h | 3 ++
arch/powerpc/kernel/cacheinfo.c | 80 +++++++++++++++++++++++++++++------------
2 files changed, 61 insertions(+), 22 deletions(-)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index c4e2d53..39de24b 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -32,6 +32,9 @@
extern int cpu_to_chip_id(int cpu);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
#ifdef CONFIG_SMP
struct smp_ops_t {
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 6f903e9a..5a6925d 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -120,6 +120,7 @@ struct cache {
struct cpumask shared_cpu_map; /* online CPUs using this cache */
int type; /* split cache disambiguation */
int level; /* level not explicit in device tree */
+ int group_id; /* id of the group of threads that share this cache */
struct list_head list; /* global list of cache objects */
struct cache *next_local; /* next cache of >= level */
};
@@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache *cache)
}
static void cache_init(struct cache *cache, int type, int level,
- struct device_node *ofnode)
+ struct device_node *ofnode, int group_id)
{
cache->type = type;
cache->level = level;
cache->ofnode = of_node_get(ofnode);
+ cache->group_id = group_id;
INIT_LIST_HEAD(&cache->list);
list_add(&cache->list, &cache_list);
}
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+ struct device_node *ofnode, int group_id)
{
struct cache *cache;
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache)
- cache_init(cache, type, level, ofnode);
+ cache_init(cache, type, level, ofnode, group_id);
return cache;
}
@@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
return cache;
list_for_each_entry(iter, &cache_list, list)
- if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ if (iter->ofnode == cache->ofnode &&
+ iter->group_id == cache->group_id &&
+ iter->next_local == cache)
return iter;
return cache;
}
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+ int group_id)
{
struct cache *cache = NULL;
struct cache *iter;
list_for_each_entry(iter, &cache_list, list) {
- if (iter->ofnode != node)
+ if (iter->ofnode != node ||
+ iter->group_id != group_id)
continue;
cache = cache_find_first_sibling(iter);
break;
@@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node *np)
CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
}
-static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+ int level)
{
pr_debug("creating L%d ucache for %pOFP\n", level, node);
- return new_cache(cache_is_unified_d(node), level, node);
+ return new_cache(cache_is_unified_d(node), level, node, group_id);
}
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
int level)
{
struct cache *dcache, *icache;
@@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
pr_debug("creating L%d dcache and icache for %pOFP\n", level,
node);
- dcache = new_cache(CACHE_TYPE_DATA, level, node);
- icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+ dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
if (!dcache || !icache)
goto err;
@@ -382,31 +390,32 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
return NULL;
}
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
{
struct cache *cache;
if (cache_node_is_unified(node))
- cache = cache_do_one_devnode_unified(node, level);
+ cache = cache_do_one_devnode_unified(node, group_id, level);
else
- cache = cache_do_one_devnode_split(node, level);
+ cache = cache_do_one_devnode_split(node, group_id, level);
return cache;
}
static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+ int group_id,
int level)
{
struct cache *cache;
- cache = cache_lookup_by_node(node);
+ cache = cache_lookup_by_node_group(node, group_id);
WARN_ONCE(cache && cache->level != level,
"cache level mismatch on lookup (got %d, expected %d)\n",
cache->level, level);
if (!cache)
- cache = cache_do_one_devnode(node, level);
+ cache = cache_do_one_devnode(node, group_id, level);
return cache;
}
@@ -443,7 +452,27 @@ static void do_subsidiary_caches_debugcheck(struct cache *cache)
of_node_get_device_type(cache->ofnode));
}
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+ if (has_big_cores && level == 1)
+ return cpumask_first(per_cpu(thread_group_l1_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l2 && level == 2)
+ return cpumask_first(per_cpu(thread_group_l2_cache_map,
+ cpu_id));
+ return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
{
struct device_node *subcache_node;
int level = cache->level;
@@ -452,9 +481,11 @@ static void do_subsidiary_caches(struct cache *cache)
while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
struct cache *subcache;
+ int group_id;
level++;
- subcache = cache_lookup_or_instantiate(subcache_node, level);
+ group_id = get_group_id(cpu_id, level);
+ subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
of_node_put(subcache_node);
if (!subcache)
break;
@@ -468,6 +499,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cpu_cache = NULL;
+ int group_id;
pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
@@ -476,11 +508,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
if (!cpu_node)
goto out;
- cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ group_id = get_group_id(cpu_id, 1);
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
if (!cpu_cache)
goto out;
- do_subsidiary_caches(cpu_cache);
+ do_subsidiary_caches(cpu_cache, cpu_id);
cache_cpu_set(cpu_cache, cpu_id);
out:
@@ -848,13 +882,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cache;
+ int group_id;
cpu_node = of_get_cpu_node(cpu_id, NULL);
WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
if (!cpu_node)
return NULL;
- cache = cache_lookup_by_node(cpu_node);
+ group_id = get_group_id(cpu_id, 1);
+ cache = cache_lookup_by_node_group(cpu_node, group_id);
of_node_put(cpu_node);
return cache;
--
1.9.4
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 2/2] powerpc/cacheinfo: Remove the redundant get_shared_cpu_map()
2021-01-19 7:36 [PATCH 0/2] powerpc/cacheinfo: Add "ibm,thread-groups" awareness Gautham R. Shenoy
2021-01-19 7:36 ` [PATCH 1/2] powerpc/cacheinfo: Lookup cache by dt node and thread-group id Gautham R. Shenoy
@ 2021-01-19 7:36 ` Gautham R. Shenoy
1 sibling, 0 replies; 3+ messages in thread
From: Gautham R. Shenoy @ 2021-01-19 7:36 UTC (permalink / raw)
To: Michael Ellerman, Nathan Lynch, Srikar Dronamraju
Cc: Gautham R. Shenoy, linuxppc-dev, linux-kernel
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
The helper function get_shared_cpu_map() was added in
'commit 500fe5f550ec ("powerpc/cacheinfo: Report the correct
shared_cpu_map on big-cores")'
and subsequently expanded upon in
'commit 0be47634db0b ("powerpc/cacheinfo: Print correct cache-sibling
map/list for L2 cache")'
in order to help report the correct groups of threads sharing these caches
on big-core systems where groups of threads within a core can share
different sets of caches.
Now that powerpc/cacheinfo is aware of "ibm,thread-groups" property,
cache->shared_cpu_map contains the correct set of thread-siblings
sharing the cache. Hence we no longer need the functions
get_shared_cpu_map(). This patch removes this function. We also remove
the helper function index_dir_to_cpu() which was only called by
get_shared_cpu_map().
With these functions removed, we can still see the correct
cache-sibling map/list for L1 and L2 caches on systems with L1 and L2
caches distributed among groups of threads in a core.
With this patch, on a SMT8 POWER10 system where the L1 and L2 caches
are split between the two groups of threads in a core, for CPUs 8,9,
the L1-Data, L1-Instruction, L2, L3 cache CPU sibling list is as
follows:
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8,10,12,14
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8,10,12,14
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8,10,12,14
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-15
/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9,11,13,15
/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9,11,13,15
/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9,11,13,15
/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-15
$ ppc64_cpu --smt=4
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8,10
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8,10
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8,10
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-11
/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9,11
/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9,11
/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9,11
/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-11
$ ppc64_cpu --smt=2
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-9
/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9
/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9
/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9
/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-9
$ ppc64_cpu --smt=1
$ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
arch/powerpc/kernel/cacheinfo.c | 41 +----------------------------------------
1 file changed, 1 insertion(+), 40 deletions(-)
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 5a6925d..20d9169 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -675,45 +675,6 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
-static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
-{
- struct kobject *index_dir_kobj = &index->kobj;
- struct kobject *cache_dir_kobj = index_dir_kobj->parent;
- struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
- struct device *dev = kobj_to_dev(cpu_dev_kobj);
-
- return dev->id;
-}
-
-/*
- * On big-core systems, each core has two groups of CPUs each of which
- * has its own L1-cache. The thread-siblings which share l1-cache with
- * @cpu can be obtained via cpu_smallcore_mask().
- *
- * On some big-core systems, the L2 cache is shared only between some
- * groups of siblings. This is already parsed and encoded in
- * cpu_l2_cache_mask().
- *
- * TODO: cache_lookup_or_instantiate() needs to be made aware of the
- * "ibm,thread-groups" property so that cache->shared_cpu_map
- * reflects the correct siblings on platforms that have this
- * device-tree property. This helper function is only a stop-gap
- * solution so that we report the correct siblings to the
- * userspace via sysfs.
- */
-static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache)
-{
- if (has_big_cores) {
- int cpu = index_dir_to_cpu(index);
- if (cache->level == 1)
- return cpu_smallcore_mask(cpu);
- if (cache->level == 2 && thread_group_shares_l2)
- return cpu_l2_cache_mask(cpu);
- }
-
- return &cache->shared_cpu_map;
-}
-
static ssize_t
show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
{
@@ -724,7 +685,7 @@ static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, s
index = kobj_to_cache_index_dir(k);
cache = index->cache;
- mask = get_shared_cpu_map(index, cache);
+ mask = &cache->shared_cpu_map;
return cpumap_print_to_pagebuf(list, buf, mask);
}
--
1.9.4
^ permalink raw reply related [flat|nested] 3+ messages in thread