public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* PXM/Nid/SLIT patch
@ 2004-02-17 13:53 Robert Picco
  2004-02-17 22:32 ` Jesse Barnes
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: Robert Picco @ 2004-02-17 13:53 UTC (permalink / raw)
  To: linux-ia64

This patch enables cell based HP machines to boot with a NUMA configured 2.6 Linux kernel.  The
problem is the default hardware configuration reports N-1 CPUs nodes without
memory.  The Nth node has all interleaved memory.  This resulted in the NUMA kernel panicing
very early.  The patch eliminates CPU nodes with no memory and reassigns these CPUs to the
interleaved memory node. The NID space is compressed during the CPU reassignments.

Bob

--- linux-2.6.2-orig/arch/ia64/kernel/acpi.c	2004-02-16 10:14:53.000000000 -0500
+++ linux-2.6.2/arch/ia64/kernel/acpi.c	2004-02-17 06:10:20.000000000 -0500
@@ -338,11 +338,16 @@
 #undef SLIT_DEBUG
 
 #define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+#define	PXM_MAGIC	(255)
 
 static int __initdata srat_num_cpus;			/* number of cpus */
 static u32 __initdata pxm_flag[PXM_FLAG_LEN];
+static u32 __initdata mpxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
+#define	pxm_bit_clear(bit)	(clear_bit(bit, (void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
+#define	mpxm_bit_set(bit)	(set_bit(bit, (void *) mpxm_flag))
+#define	mpxm_bit_test(bit)	(test_bit(bit, (void *) mpxm_flag))
 /* maps to convert between proximity domain and logical node ID */
 int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS];
 int __initdata nid_to_pxm_map[MAX_NUMNODES];
@@ -424,6 +429,110 @@
 	num_memblks++;
 }
 
+static void __init
+acpi_pxm_magic_slit_fix(void)
+{
+	u8		distance, x;
+	int		i, j, nid;
+#define	SLIT_IDENTITY	10
+
+
+	if (!pxm_bit_test(PXM_MAGIC) || slit_table->localities >= PXM_MAGIC)
+		return;
+
+	nid = pxm_to_nid_map[PXM_MAGIC];
+
+	for (distance = SLIT_IDENTITY*2, i = 0; i < slit_table->localities; i++) {
+		if (!pxm_bit_test(i))
+			continue;
+		for (j = 0; j < slit_table->localities; j++) {
+			if (!pxm_bit_test(j) || (i = j))
+				continue;
+
+			x = (slit_table->entry[i*slit_table->localities + j] + SLIT_IDENTITY) / 2;
+			distance = min(x, distance);
+		}
+	}
+
+	/*
+	 * Fill in distances for PXM magic.
+	 */
+
+	for (i = 0; i < numnodes;  i++) 
+		node_distance(i, nid) = distance;
+
+	for (i = 0; i < (numnodes - 1); i++)
+		node_distance(nid, i) = distance;
+
+	node_distance(nid, nid) = SLIT_IDENTITY;
+
+
+	return;
+}
+
+static void __init
+acpi_pxm_magic_fix(void)
+{
+	struct node_memblk_s	*p;
+	int			i, nnode, nid, cpu, pxm;
+
+
+	/*
+	 * If every nid has memory then we are done.
+	 */
+
+	for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_memblks]; p++) 
+		if (!mpxm_bit_test(p->nid)) {
+			mpxm_bit_set(p->nid);
+			nnode++;
+		}
+	
+	/*
+	 * All nids with memory.
+	 */
+
+	if (nnode = numnodes) 
+		return;
+
+	/*
+	 * Change logical node id for nids without memory.
+	 * If we are removing a nid without memory, then
+	 * move that nid's cpus to nnode-1 which will become
+	 * the magic PXM's logical node id.  The node_cpu[X].nid
+	 * is the PXM but will change later to logical node
+	 * id.
+	 */
+
+	for (nid = 0, i = 0; i < numnodes; i++) 
+		if (mpxm_bit_test(i)) {
+			if (i = nid) {
+				nid++;
+				continue;
+			}
+
+			for (p = &node_memblk[0]; p < &node_memblk[num_memblks]; p++)
+				if (p->nid = i)
+					p->nid = nid;
+
+			pxm = nid_to_pxm_map[i];
+			pxm_to_nid_map[pxm] = nid;
+			nid_to_pxm_map[nid] = pxm;
+			nid++;
+		}
+		else {
+			for (cpu = 0; cpu < srat_num_cpus; cpu++)
+				if (node_cpuid[cpu].nid = nid_to_pxm_map[i])
+					node_cpuid[cpu].nid = PXM_MAGIC;
+
+			pxm_to_nid_map[i] = nnode - 1;
+			pxm_bit_clear(nid_to_pxm_map[i]);
+		}
+
+	numnodes = nnode;
+
+	return;
+}
+
 void __init
 acpi_numa_arch_fixup (void)
 {
@@ -451,6 +560,8 @@
 	for (i = 0; i < num_memblks; i++)
 		node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
 
+	acpi_pxm_magic_fix();
+
 	/* assign memory bank numbers for each chunk on each node */
 	for (i = 0; i < numnodes; i++) {
 		int bank;
@@ -468,8 +579,13 @@
 	printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes);
 	printk(KERN_INFO "Number of memory chunks in system = %d\n", num_memblks);
 
-	if (!slit_table) return;
+	if (!slit_table) 
+		return;
+
 	memset(numa_slit, -1, sizeof(numa_slit));
+
+	acpi_pxm_magic_slit_fix();
+
 	for (i=0; i<slit_table->localities; i++) {
 		if (!pxm_bit_test(i))
 			continue;



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2004-02-18 19:43 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-17 13:53 PXM/Nid/SLIT patch Robert Picco
2004-02-17 22:32 ` Jesse Barnes
2004-02-18 15:33 ` Robert Picco
2004-02-18 17:08 ` Christoph Hellwig
2004-02-18 18:56 ` Robert Picco
2004-02-18 18:59 ` David Mosberger
2004-02-18 19:04 ` Jesse Barnes
2004-02-18 19:06 ` Jesse Barnes
2004-02-18 19:13 ` Christoph Hellwig
2004-02-18 19:19 ` Robert Picco
2004-02-18 19:36 ` Jesse Barnes
2004-02-18 19:43 ` David Mosberger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox