All of lore.kernel.org
 help / color / mirror / Atom feed
* percpu-2.5.63-bkcurr
@ 2003-03-01  5:59 William Lee Irwin III
  2003-03-01  7:36 ` percpu-2.5.63-bkcurr William Lee Irwin III
  0 siblings, 1 reply; 5+ messages in thread
From: William Lee Irwin III @ 2003-03-01  5:59 UTC (permalink / raw)
  To: linux-kernel

Shove per-cpu areas into node-local memory for i386 discontigmem,
or at least NUMA-Q. You'll have to plop down early_cpu_to_node()
and early_node_to_cpumask() stubs to use it on, say Summit.


-- wli

===== arch/i386/mm/discontig.c 1.9 vs edited =====
--- 1.9/arch/i386/mm/discontig.c	Fri Feb 28 15:08:58 2003
+++ edited/arch/i386/mm/discontig.c	Fri Feb 28 21:48:54 2003
@@ -48,8 +48,6 @@
 extern unsigned long totalram_pages;
 extern unsigned long totalhigh_pages;
 
-#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
-
 unsigned long node_remap_start_pfn[MAX_NUMNODES];
 unsigned long node_remap_size[MAX_NUMNODES];
 unsigned long node_remap_offset[MAX_NUMNODES];
@@ -67,6 +65,20 @@
 		node_end_pfn[nid] = max_pfn;
 }
 
+extern char __per_cpu_start[], __per_cpu_end[];
+unsigned long __per_cpu_offset[NR_CPUS];
+
+#define PER_CPU_PAGES	PFN_UP((unsigned long)(__per_cpu_end-__per_cpu_start))
+#define MEM_MAP_SIZE(n)	PFN_UP((node_end_pfn[n]-node_start_pfn[n]+1)*sizeof(struct page))
+
+#ifdef CONFIG_X86_NUMAQ
+#define early_cpu_to_node(cpu)		((cpu)/4)
+#define early_node_to_cpumask(node)	(0xFUL << (4*(node)))
+#else
+#define early_cpu_to_node(cpu)		cpu_to_node(cpu)
+#define early_node_to_cpumask(node)	node_to_cpumask(node)
+#endif
+
 /* 
  * Allocate memory for the pg_data_t via a crude pre-bootmem method
  * We ought to relocate these onto their own node later on during boot.
@@ -82,6 +94,44 @@
 	}
 }
 
+static void __init allocate_one_cpu_area(int cpu)
+{
+	int cpu_in_node, node = early_cpu_to_node(cpu);
+	unsigned long nodemask = early_node_to_cpumask(node);
+	unsigned long node_vaddr = (unsigned long)node_remap_start_vaddr[node];
+
+	if (!PER_CPU_PAGES)
+		return;
+
+	if (!node) {
+		__per_cpu_offset[cpu] = min_low_pfn*PAGE_SIZE
+					+ PAGE_OFFSET
+					- (unsigned long)__per_cpu_start;
+		min_low_pfn += PER_CPU_PAGES;
+		return;
+	}
+
+	cpu_in_node = hweight32(nodemask & ((1UL << cpu) - 1));
+	__per_cpu_offset[cpu] = node_vaddr + MEM_MAP_SIZE(node)*PAGE_SIZE
+					+ PFN_UP(sizeof(pg_data_t))*PAGE_SIZE
+					+ PER_CPU_PAGES*cpu_in_node*PAGE_SIZE
+					- (unsigned long)__per_cpu_start;
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	int node, cpu;
+	for (node = 0; node < numnodes; ++node) {
+		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+			if (early_cpu_to_node(cpu) == node) {
+				memcpy(RELOC_HIDE((char *)__per_cpu_start, __per_cpu_offset[cpu]),
+						__per_cpu_start,
+						PER_CPU_PAGES*PAGE_SIZE);
+			}
+		}
+	}
+}
+
 /*
  * Register fully available low RAM pages with the bootmem allocator.
  */
@@ -144,13 +194,11 @@
 	unsigned long size, reserve_pages = 0;
 
 	for (nid = 1; nid < numnodes; nid++) {
-		/* calculate the size of the mem_map needed in bytes */
-		size = (node_end_pfn[nid] - node_start_pfn[nid] + 1) 
-			* sizeof(struct page) + sizeof(pg_data_t);
-		/* convert size to large (pmd size) pages, rounding up */
-		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
-		/* now the roundup is correct, convert to PAGE_SIZE pages */
-		size = size * PTRS_PER_PTE;
+		/* calculate the size of the mem_map needed in pages */
+		size = MEM_MAP_SIZE(nid) + PFN_UP(sizeof(pg_data_t))
+			+ PER_CPU_PAGES*hweight32(early_node_to_cpumask(nid));
+		/* round up to nearest pmd boundary */
+		size = (size + PTRS_PER_PTE - 1) & ~(PTRS_PER_PTE - 1);
 		printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
 				size, nid);
 		node_remap_size[nid] = size;
@@ -196,9 +244,14 @@
 	printk("Low memory ends at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for (nid = 0; nid < numnodes; nid++) {
+		int cpu;
 		node_remap_start_vaddr[nid] = pfn_to_kaddr(
 			highstart_pfn - node_remap_offset[nid]);
 		allocate_pgdat(nid);
+		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+			if (early_cpu_to_node(cpu) == nid)
+				allocate_one_cpu_area(cpu);
+		}
 		printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
 			(ulong) node_remap_start_vaddr[nid],
 			(ulong) pfn_to_kaddr(highstart_pfn
===== include/asm-i386/numaq.h 1.7 vs edited =====
--- 1.7/include/asm-i386/numaq.h	Fri Feb 28 15:03:59 2003
+++ edited/include/asm-i386/numaq.h	Fri Feb 28 18:37:53 2003
@@ -169,9 +169,9 @@
         struct	eachquadmem eq[MAX_NUMNODES];	/* indexed by quad id */
 };
 
-static inline unsigned long get_zholes_size(int nid)
+static inline unsigned long *get_zholes_size(int nid)
 {
-	return 0;
+	return NULL;
 }
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* NUMAQ_H */
===== include/asm-i386/percpu.h 1.1 vs edited =====
--- 1.1/include/asm-i386/percpu.h	Fri Mar 15 04:55:35 2002
+++ edited/include/asm-i386/percpu.h	Fri Feb 28 18:31:26 2003
@@ -1,6 +1,30 @@
 #ifndef __ARCH_I386_PERCPU__
 #define __ARCH_I386_PERCPU__
 
+#include <linux/config.h>
+#include <linux/compiler.h>
+
+#ifndef CONFIG_DISCONTIGMEM
 #include <asm-generic/percpu.h>
+#else /* CONFIG_DISCONTIGMEM */
+
+extern unsigned long __per_cpu_offset[NR_CPUS];
+void setup_per_cpu_areas(void);
+
+/* Separate out the type, so (int[3], foo) works. */
+#ifndef MODULE
+#define DEFINE_PER_CPU(type, name) \
+    __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
+#endif
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
+#define __get_cpu_var(var) per_cpu(var, smp_processor_id())
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu)
+
+#endif /* CONFIG_DISCONTIGMEM */
 
 #endif /* __ARCH_I386_PERCPU__ */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2003-03-01  8:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-01  5:59 percpu-2.5.63-bkcurr William Lee Irwin III
2003-03-01  7:36 ` percpu-2.5.63-bkcurr William Lee Irwin III
2003-03-01  7:40   ` percpu-2.5.63-bkcurr William Lee Irwin III
2003-03-01  7:46     ` percpu-2.5.63-bkcurr William Lee Irwin III
2003-03-01  8:27       ` percpu-2.5.63-bkcurr William Lee Irwin III

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.