All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched/topology: Use Identity node only if required
@ 2018-08-08  7:09 Srikar Dronamraju
  2018-08-08  7:58 ` Peter Zijlstra
  0 siblings, 1 reply; 26+ messages in thread
From: Srikar Dronamraju @ 2018-08-08  7:09 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: LKML, Mel Gorman, Rik van Riel, Srikar Dronamraju,
	Thomas Gleixner, Michael Ellerman, Heiko Carstens,
	Suravee Suthikulpanit, Andre Wild

With Commit 051f3ca02e46 ("sched/topology: Introduce NUMA identity node
sched domain") scheduler introduces an extra numa level. However that
leads to

 - numa topology on 2 node systems no more marked as NUMA_DIRECT.  After
   this commit, it gets reported as NUMA_BACKPLANE. This is because
   sched_domains_numa_level now equals 2 on 2 node systems.

 - Extra numa sched domain that gets added and degenerated on most
   machines.  The Identity node is only needed on very few systems.
   Also all non-numa systems will end up populating
   sched_domains_numa_distance and sched_domains_numa_masks tables.

 - On shared lpars like powerpc, this extra sched domain creation can
   lead to repeated rcu stalls, sometimes even causing unresponsive
   systems on boot. On such stalls, it was noticed that
   init_sched_groups_capacity() (sg != sd->groups is always true).

INFO: rcu_sched self-detected stall on CPU
 1-....: (240039 ticks this GP) idle=c32/1/4611686018427387906 softirq=782/782 fqs=80012
  (t=240039 jiffies g=6272 c=6271 q=263040)
NMI backtrace for cpu 1
CPU: 1 PID: 1576 Comm: kworker/1:1 Kdump: loaded Tainted: G            E     4.18.0-rc7-master+ #42
Workqueue: events topology_work_fn
Call Trace:
[c00000832132f190] [c0000000009557ac] dump_stack+0xb0/0xf4 (unreliable)
[c00000832132f1d0] [c00000000095ed54] nmi_cpu_backtrace+0x1b4/0x230
[c00000832132f270] [c00000000095efac] nmi_trigger_cpumask_backtrace+0x1dc/0x220
[c00000832132f310] [c00000000005f77c] arch_trigger_cpumask_backtrace+0x2c/0x40
[c00000832132f330] [c0000000001a32d4] rcu_dump_cpu_stacks+0x100/0x15c
[c00000832132f380] [c0000000001a2024] rcu_check_callbacks+0x894/0xaa0
[c00000832132f4a0] [c0000000001ace9c] update_process_times+0x4c/0xa0
[c00000832132f4d0] [c0000000001c5400] tick_sched_handle.isra.13+0x50/0x80
[c00000832132f4f0] [c0000000001c549c] tick_sched_timer+0x6c/0xd0
[c00000832132f530] [c0000000001ae044] __hrtimer_run_queues+0x134/0x360
[c00000832132f5b0] [c0000000001aeea4] hrtimer_interrupt+0x124/0x300
[c00000832132f660] [c000000000024a04] timer_interrupt+0x114/0x2f0
[c00000832132f6c0] [c0000000000090f4] decrementer_common+0x114/0x120
--- interrupt: 901 at __bitmap_weight+0x70/0x100
    LR = __bitmap_weight+0x78/0x100
[c00000832132f9b0] [c0000000009bb738] __func__.61127+0x0/0x20 (unreliable)
[c00000832132fa00] [c00000000016c178] build_sched_domains+0xf98/0x13f0
[c00000832132fb30] [c00000000016d73c] partition_sched_domains+0x26c/0x440
[c00000832132fc20] [c0000000001ee284] rebuild_sched_domains_locked+0x64/0x80
[c00000832132fc50] [c0000000001f11ec] rebuild_sched_domains+0x3c/0x60
[c00000832132fc80] [c00000000007e1c4] topology_work_fn+0x24/0x40
[c00000832132fca0] [c000000000126704] process_one_work+0x1a4/0x470
[c00000832132fd30] [c000000000126a68] worker_thread+0x98/0x540
[c00000832132fdc0] [c00000000012f078] kthread+0x168/0x1b0
[c00000832132fe30] [c00000000000b65c]
ret_from_kernel_thread+0x5c/0x80

Similar problem was earlier also reported at
https://lwn.net/ml/linux-kernel/20180512100233.GB3738@osiris/

One easy alternative would be to use a hint from architectures that have
a liking for identity node.

Fixes: 051f3ca02e46 "Introduce NUMA identity node sched domain"
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/topology.h |  2 ++
 arch/x86/kernel/smpboot.c       |  5 +++++
 kernel/sched/topology.c         | 34 ++++++++++++++++++++++++----------
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c1d2a9892352..524cb900e273 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -79,7 +79,9 @@ extern void setup_node_to_cpumask_map(void);
 
 extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
+#define arch_supports_identity_node arch_supports_identity_node
 
+extern int arch_supports_identity_node(void);
 #else /* !CONFIG_NUMA */
 
 static inline int numa_node_id(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index db9656e13ea0..08de8ca06232 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1346,6 +1346,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	mtrr_aps_init();
 }
 
+int arch_supports_identity_node(void)
+{
+	return x86_has_numa_in_package;
+}
+
 static int __initdata setup_possible_cpus = -1;
 static int __init _setup_possible_cpus(char *str)
 {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 56a0fed30c0a..8f61df23948a 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1322,20 +1322,30 @@ static void init_numa_topology_type(void)
 	}
 }
 
+#ifndef arch_supports_identity_node
+static inline int arch_supports_identity_node(void)
+{
+	return 0;
+}
+#endif
+
 void sched_init_numa(void)
 {
 	int next_distance, curr_distance = node_distance(0, 0);
 	struct sched_domain_topology_level *tl;
-	int level = 0;
+	int numa_in_package, level = 0;
 	int i, j, k;
 
 	sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL);
 	if (!sched_domains_numa_distance)
 		return;
 
-	/* Includes NUMA identity node at level 0. */
-	sched_domains_numa_distance[level++] = curr_distance;
-	sched_domains_numa_levels = level;
+	numa_in_package = arch_supports_identity_node();
+	if (numa_in_package) {
+		/* Includes NUMA identity node at level 0. */
+		sched_domains_numa_distance[level++] = curr_distance;
+		sched_domains_numa_levels = level;
+	}
 
 	/*
 	 * O(nr_nodes^2) deduplicating selection sort -- in order to find the
@@ -1445,19 +1455,23 @@ void sched_init_numa(void)
 	for (i = 0; sched_domain_topology[i].mask; i++)
 		tl[i] = sched_domain_topology[i];
 
+	j  = 0;
 	/*
 	 * Add the NUMA identity distance, aka single NODE.
 	 */
-	tl[i++] = (struct sched_domain_topology_level){
-		.mask = sd_numa_mask,
-		.numa_level = 0,
-		SD_INIT_NAME(NODE)
-	};
+	if (numa_in_package) {
+		tl[i++] = (struct sched_domain_topology_level){
+			.mask = sd_numa_mask,
+			.numa_level = 0,
+			SD_INIT_NAME(NODE)
+		};
+		j++;
+	}
 
 	/*
 	 * .. and append 'j' levels of NUMA goodness.
 	 */
-	for (j = 1; j < level; i++, j++) {
+	for (; j < level; i++, j++) {
 		tl[i] = (struct sched_domain_topology_level){
 			.mask = sd_numa_mask,
 			.sd_flags = cpu_numa_flags,
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread
[parent not found: <reply-to=<20180808081942.GA37418@linux.vnet.ibm.com>]

end of thread, other threads:[~2018-09-10 10:46 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-08-08  7:09 [PATCH] sched/topology: Use Identity node only if required Srikar Dronamraju
2018-08-08  7:58 ` Peter Zijlstra
2018-08-08  8:19   ` Srikar Dronamraju
2018-08-08  8:43     ` Peter Zijlstra
2018-08-08  9:30       ` Peter Zijlstra
2018-08-10 16:45   ` Srikar Dronamraju
2018-08-29  8:43     ` Peter Zijlstra
2018-08-29  8:57       ` Peter Zijlstra
2018-08-31 10:22       ` Srikar Dronamraju
2018-08-31 10:22         ` Srikar Dronamraju
2018-08-31 10:41         ` Peter Zijlstra
2018-08-31 11:26           ` Srikar Dronamraju
2018-08-31 12:06             ` Peter Zijlstra
     [not found] <reply-to=<20180808081942.GA37418@linux.vnet.ibm.com>
2018-08-10 17:00 ` [PATCH 1/2] sched/topology: Set correct numa topology type Srikar Dronamraju
2018-08-10 17:00   ` [PATCH 2/2] sched/topology: Expose numa_mask set/clear functions to arch Srikar Dronamraju
2018-08-29  8:02     ` Peter Zijlstra
2018-08-31 10:27       ` Srikar Dronamraju
2018-08-31 11:12         ` Peter Zijlstra
2018-08-31 11:26           ` Peter Zijlstra
2018-08-31 11:53             ` Srikar Dronamraju
2018-08-31 11:53               ` Srikar Dronamraju
2018-08-31 12:05               ` Peter Zijlstra
2018-08-31 12:08               ` Peter Zijlstra
2018-08-21 11:02   ` [PATCH 1/2] sched/topology: Set correct numa topology type Srikar Dronamraju
2018-08-21 13:59     ` Peter Zijlstra
2018-09-10 10:06   ` [tip:sched/core] sched/topology: Set correct NUMA " tip-bot for Srikar Dronamraju

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.