lustre-devel-lustre.org archive mirror
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	devel@driverdev.osuosl.org,
	Andreas Dilger <andreas.dilger@intel.com>,
	Oleg Drokin <oleg.drokin@intel.com>, NeilBrown <neilb@suse.com>
Cc: Amir Shehata <amir.shehata@intel.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 09/25] staging: lustre: libcfs: use distance in cpu and node handling
Date: Mon, 16 Apr 2018 00:09:51 -0400	[thread overview]
Message-ID: <1523851807-16573-10-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1523851807-16573-1-git-send-email-jsimmons@infradead.org>

From: Amir Shehata <amir.shehata@intel.com>

Take into consideration the location of NUMA nodes and core
when calling cfs_cpt_[un]set_cpu() and cfs_cpt_[un]set_node().
This enables functioning on platforms with 100s of cores and
NUMA nodes.

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <olaf@sgi.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 192 +++++++++++++++------
 1 file changed, 143 insertions(+), 49 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 1e184b1..bbf89b8 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -300,11 +300,134 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 }
 EXPORT_SYMBOL(cfs_cpt_distance);
 
+/*
+ * Calculate the maximum NUMA distance between all nodes in the
+ * from_mask and all nodes in the to_mask.
+ */
+static unsigned int cfs_cpt_distance_calculate(nodemask_t *from_mask,
+					       nodemask_t *to_mask)
+{
+	unsigned int maximum;
+	unsigned int distance;
+	int from;
+	int to;
+
+	maximum = 0;
+	for_each_node_mask(from, *from_mask) {
+		for_each_node_mask(to, *to_mask) {
+			distance = node_distance(from, to);
+			if (maximum < distance)
+				maximum = distance;
+		}
+	}
+	return maximum;
+}
+
+static void cfs_cpt_add_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+	cptab->ctb_cpu2cpt[cpu] = cpt;
+
+	cpumask_set_cpu(cpu, cptab->ctb_cpumask);
+	cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+}
+
+static void cfs_cpt_del_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+	cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+	cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
+
+	cptab->ctb_cpu2cpt[cpu] = -1;
+}
+
+static void cfs_cpt_add_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+	struct cfs_cpu_partition *part;
+
+	if (!node_isset(node, *cptab->ctb_nodemask)) {
+		unsigned int dist;
+
+		/* first time node is added to the CPT table */
+		node_set(node, *cptab->ctb_nodemask);
+		cptab->ctb_node2cpt[node] = cpt;
+
+		dist = cfs_cpt_distance_calculate(cptab->ctb_nodemask,
+						  cptab->ctb_nodemask);
+		cptab->ctb_distance = dist;
+	}
+
+	part = &cptab->ctb_parts[cpt];
+	if (!node_isset(node, *part->cpt_nodemask)) {
+		int cpt2;
+
+		/* first time node is added to this CPT */
+		node_set(node, *part->cpt_nodemask);
+		for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+			struct cfs_cpu_partition *part2;
+			unsigned int dist;
+
+			part2 = &cptab->ctb_parts[cpt2];
+			dist = cfs_cpt_distance_calculate(part->cpt_nodemask,
+							  part2->cpt_nodemask);
+			part->cpt_distance[cpt2] = dist;
+			dist = cfs_cpt_distance_calculate(part2->cpt_nodemask,
+							  part->cpt_nodemask);
+			part2->cpt_distance[cpt] = dist;
+		}
+	}
+}
+
+static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+	struct cfs_cpu_partition *part = &cptab->ctb_parts[cpt];
+	int cpu;
+
+	for_each_cpu(cpu, part->cpt_cpumask) {
+		/* this CPT has other CPU belonging to this node? */
+		if (cpu_to_node(cpu) == node)
+			break;
+	}
+
+	if (cpu >= nr_cpu_ids && node_isset(node,  *part->cpt_nodemask)) {
+		int cpt2;
+
+		/* No more CPUs in the node for this CPT. */
+		node_clear(node, *part->cpt_nodemask);
+		for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+			struct cfs_cpu_partition *part2;
+			unsigned int dist;
+
+			part2 = &cptab->ctb_parts[cpt2];
+			if (node_isset(node, *part2->cpt_nodemask))
+				cptab->ctb_node2cpt[node] = cpt2;
+
+			dist = cfs_cpt_distance_calculate(part->cpt_nodemask,
+							  part2->cpt_nodemask);
+			part->cpt_distance[cpt2] = dist;
+			dist = cfs_cpt_distance_calculate(part2->cpt_nodemask,
+							  part->cpt_nodemask);
+			part2->cpt_distance[cpt] = dist;
+		}
+	}
+
+	for_each_cpu(cpu, cptab->ctb_cpumask) {
+		/* this CPT-table has other CPUs belonging to this node? */
+		if (cpu_to_node(cpu) == node)
+			break;
+	}
+
+	if (cpu >= nr_cpu_ids && node_isset(node, *cptab->ctb_nodemask)) {
+		/* No more CPUs in the table for this node. */
+		node_clear(node, *cptab->ctb_nodemask);
+		cptab->ctb_node2cpt[node] = -1;
+		cptab->ctb_distance =
+			cfs_cpt_distance_calculate(cptab->ctb_nodemask,
+						   cptab->ctb_nodemask);
+	}
+}
+
 int
 cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
 {
-	int node;
-
 	LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
 
 	if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
@@ -318,23 +441,11 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 		return 0;
 	}
 
-	cptab->ctb_cpu2cpt[cpu] = cpt;
-
 	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
 	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
 
-	cpumask_set_cpu(cpu, cptab->ctb_cpumask);
-	cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-
-	node = cpu_to_node(cpu);
-
-	/* first CPU of @node in this CPT table */
-	if (!node_isset(node, *cptab->ctb_nodemask))
-		node_set(node, *cptab->ctb_nodemask);
-
-	/* first CPU of @node in this partition */
-	if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
-		node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
+	cfs_cpt_add_cpu(cptab, cpt, cpu);
+	cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu));
 
 	return 1;
 }
@@ -343,9 +454,6 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 void
 cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
 {
-	int node;
-	int i;
-
 	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
 
 	if (cpu < 0 || cpu >= nr_cpu_ids) {
@@ -371,32 +479,8 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
 	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
 
-	cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-	cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
-	cptab->ctb_cpu2cpt[cpu] = -1;
-
-	node = cpu_to_node(cpu);
-
-	LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
-	LASSERT(node_isset(node, *cptab->ctb_nodemask));
-
-	for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
-		/* this CPT has other CPU belonging to this node? */
-		if (cpu_to_node(i) == node)
-			break;
-	}
-
-	if (i >= nr_cpu_ids)
-		node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
-	for_each_cpu(i, cptab->ctb_cpumask) {
-		/* this CPT-table has other CPU belonging to this node? */
-		if (cpu_to_node(i) == node)
-			break;
-	}
-
-	if (i >= nr_cpu_ids)
-		node_clear(node, *cptab->ctb_nodemask);
+	cfs_cpt_del_cpu(cptab, cpt, cpu);
+	cfs_cpt_del_node(cptab, cpt, cpu_to_node(cpu));
 }
 EXPORT_SYMBOL(cfs_cpt_unset_cpu);
 
@@ -413,8 +497,8 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 	}
 
 	for_each_cpu(cpu, mask) {
-		if (!cfs_cpt_set_cpu(cptab, cpt, cpu))
-			return 0;
+		cfs_cpt_add_cpu(cptab, cpt, cpu);
+		cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu));
 	}
 
 	return 1;
@@ -436,6 +520,7 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
 {
 	const cpumask_t *mask;
+	int cpu;
 
 	if (node < 0 || node >= nr_node_ids) {
 		CDEBUG(D_INFO,
@@ -445,7 +530,12 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 
 	mask = cpumask_of_node(node);
 
-	return cfs_cpt_set_cpumask(cptab, cpt, mask);
+	for_each_cpu(cpu, mask)
+		cfs_cpt_add_cpu(cptab, cpt, cpu);
+
+	cfs_cpt_add_node(cptab, cpt, node);
+
+	return 1;
 }
 EXPORT_SYMBOL(cfs_cpt_set_node);
 
@@ -453,6 +543,7 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
 {
 	const cpumask_t *mask;
+	int cpu;
 
 	if (node < 0 || node >= nr_node_ids) {
 		CDEBUG(D_INFO,
@@ -462,7 +553,10 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
 
 	mask = cpumask_of_node(node);
 
-	cfs_cpt_unset_cpumask(cptab, cpt, mask);
+	for_each_cpu(cpu, mask)
+		cfs_cpt_del_cpu(cptab, cpt, cpu);
+
+	cfs_cpt_del_node(cptab, cpt, node);
 }
 EXPORT_SYMBOL(cfs_cpt_unset_node);
 
-- 
1.8.3.1

  parent reply	other threads:[~2018-04-16  4:09 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-16  4:09 [lustre-devel] [PATCH 00/25] staging: lustre: libcfs: SMP rework James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 01/25] staging: lustre: libcfs: remove useless CPU partition code James Simmons
2018-04-16 13:42   ` Dan Carpenter
2018-04-16  4:09 ` [lustre-devel] [PATCH 02/25] staging: lustre: libcfs: rename variable i to cpu James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 03/25] staging: lustre: libcfs: implement cfs_cpt_cpumask for UMP case James Simmons
2018-04-16 13:51   ` Dan Carpenter
2018-04-16  4:09 ` [lustre-devel] [PATCH 04/25] staging: lustre: libcfs: replace MAX_NUMNODES with nr_node_ids James Simmons
2018-04-16 13:55   ` Dan Carpenter
2018-04-16  4:09 ` [lustre-devel] [PATCH 05/25] staging: lustre: libcfs: remove excess space James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 06/25] staging: lustre: libcfs: replace num_possible_cpus() with nr_cpu_ids James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 07/25] staging: lustre: libcfs: NUMA support James Simmons
2018-04-16 14:27   ` Dan Carpenter
2018-04-16  4:09 ` [lustre-devel] [PATCH 08/25] staging: lustre: libcfs: add cpu distance handling James Simmons
2018-04-16 14:45   ` Dan Carpenter
2018-04-16  4:09 ` James Simmons [this message]
2018-04-16  4:09 ` [lustre-devel] [PATCH 10/25] staging: lustre: libcfs: provide debugfs files for " James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 11/25] staging: lustre: libcfs: invert error handling for cfs_cpt_table_print James Simmons
2018-04-17  7:14   ` Dan Carpenter
2018-04-16  4:09 ` [lustre-devel] [PATCH 12/25] staging: lustre: libcfs: fix libcfs_cpu coding style James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 13/25] staging: lustre: libcfs: use int type for CPT identification James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 14/25] staging: lustre: libcfs: rename i to node for cfs_cpt_set_nodemask James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 15/25] staging: lustre: libcfs: rename i to cpu for cfs_cpt_bind James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 16/25] staging: lustre: libcfs: rename cpumask_var_t variables to *_mask James Simmons
2018-04-16  4:09 ` [lustre-devel] [PATCH 17/25] staging: lustre: libcfs: rename goto label in cfs_cpt_table_print James Simmons
2018-04-17  7:34   ` Dan Carpenter
2018-04-16  4:10 ` [lustre-devel] [PATCH 18/25] staging: lustre: libcfs: clear up failure patch in cfs_cpt_*_print James Simmons
2018-04-17  7:39   ` Dan Carpenter
2018-04-16  4:10 ` [lustre-devel] [PATCH 19/25] staging: lustre: libcfs: update debug messages James Simmons
2018-04-16  4:10 ` [lustre-devel] [PATCH 20/25] staging: lustre: libcfs: make tolerant to offline CPUs and empty NUMA nodes James Simmons
2018-04-16  4:10 ` [lustre-devel] [PATCH 21/25] staging: lustre: libcfs: report NUMA node instead of just node James Simmons
2018-04-16  4:10 ` [lustre-devel] [PATCH 22/25] staging: lustre: libcfs: update debug messages in CPT code James Simmons
2018-04-16  4:10 ` [lustre-devel] [PATCH 23/25] staging: lustre: libcfs: rework CPU pattern parsing code James Simmons
2018-04-16  4:10 ` [lustre-devel] [PATCH 24/25] staging: lustre: libcfs: change CPT estimate algorithm James Simmons
2018-04-16  4:10 ` [lustre-devel] [PATCH 25/25] staging: lustre: libcfs: merge UMP and SMP libcfs cpu header code James Simmons
2018-04-23 12:58 ` [lustre-devel] [PATCH 00/25] staging: lustre: libcfs: SMP rework Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1523851807-16573-10-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=amir.shehata@intel.com \
    --cc=andreas.dilger@intel.com \
    --cc=devel@driverdev.osuosl.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.com \
    --cc=oleg.drokin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).