[PATCH 19/20] sparc64: use embedding percpu first chunk allocator

public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
	mingo@redhat.com, benh@kernel.crashing.org, davem@davemloft.net,
	dhowells@redhat.com, npiggin@suse.de, JBeulich@novell.comc
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 19/20] sparc64: use embedding percpu first chunk allocator
Date: Tue, 21 Jul 2009 19:26:18 +0900	[thread overview]
Message-ID: <1248171979-29166-20-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1248171979-29166-1-git-send-email-tj@kernel.org>

sparc64 currently allocates a large page for each cpu and partially
remap them into vmalloc area much like what lpage first chunk
allocator did.  As a 4M page is used for each cpu, this results in
very large unit size and also adds TLB pressure due to the double
mapping of pages in the first chunk.

This patch converts sparc64 to use the embedding percpu first chunk
allocator which now knows how to handle NUMA configurations.  This
simplifies the code a lot, doesn't incur any extra TLB pressure and
results in better utilization of address space.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig         |    3 +
 arch/sparc/kernel/smp_64.c |  128 ++++++-------------------------------------
 2 files changed, 21 insertions(+), 110 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4f6ed0f..fbd1233 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -95,6 +95,9 @@ config AUDIT_ARCH
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y if SPARC64
 
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y if SPARC64
+
 config GENERIC_HARDIRQS_NO__DO_IRQ
 	bool
 	def_bool y if SPARC64
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b03fd36..ff68373 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1389,8 +1389,8 @@ void smp_send_stop(void)
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
-					unsigned long align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+					size_t align)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1415,123 +1415,31 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
 
-#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
-
-static void __init pcpu_map_range(unsigned long start, unsigned long end,
-				  struct page *page)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	unsigned long pfn = page_to_pfn(page);
-	unsigned long pte_base;
-
-	BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
-
-	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
-		    _PAGE_CP_4U | _PAGE_CV_4U |
-		    _PAGE_P_4U | _PAGE_W_4U);
-	if (tlb_type == hypervisor)
-		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-			    _PAGE_CP_4V | _PAGE_CV_4V |
-			    _PAGE_P_4V | _PAGE_W_4V);
-
-	while (start < end) {
-		pgd_t *pgd = pgd_offset_k(start);
-		unsigned long this_end;
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *pte;
-
-		pud = pud_offset(pgd, start);
-		if (pud_none(*pud)) {
-			pmd_t *new;
-
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-			pud_populate(&init_mm, pud, new);
-		}
-
-		pmd = pmd_offset(pud, start);
-		if (!pmd_present(*pmd)) {
-			pte_t *new;
-
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-			pmd_populate_kernel(&init_mm, pmd, new);
-		}
-
-		pte = pte_offset_kernel(pmd, start);
-		this_end = (start + PMD_SIZE) & PMD_MASK;
-		if (this_end > end)
-			this_end = end;
-
-		while (start < this_end) {
-			unsigned long paddr = pfn << PAGE_SHIFT;
-
-			pte_val(*pte) = (paddr | pte_base);
+	free_bootmem(__pa(ptr), size);
+}
 
-			start += PAGE_SIZE;
-			pte++;
-			pfn++;
-		}
-	}
+static int pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (cpu_to_node(from) == cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
 }
 
 void __init setup_per_cpu_areas(void)
 {
-	static struct vm_struct vm;
-	struct pcpu_alloc_info *ai;
-	unsigned long delta, cpu;
-	size_t size_sum;
-	size_t ptrs_size;
-	void **ptrs;
+	unsigned long delta;
+	unsigned int cpu;
 	int rc;
 
-	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
-
-	ai->static_size = __per_cpu_end - __per_cpu_start;
-	ai->reserved_size = PERCPU_MODULE_RESERVE;
-
-	size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
-			     PERCPU_DYNAMIC_RESERVE);
-
-	ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
-	ai->unit_size = PCPU_CHUNK_SIZE;
-	ai->atom_size = PCPU_CHUNK_SIZE;
-	ai->alloc_size = PCPU_CHUNK_SIZE;
-	ai->groups[0].nr_units = nr_cpu_ids;
-
-	for_each_possible_cpu(cpu)
-		ai->groups[0].cpu_map[cpu] = cpu;
-
-	ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
-	ptrs = alloc_bootmem(ptrs_size);
-
-	for_each_possible_cpu(cpu) {
-		ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
-					       PCPU_CHUNK_SIZE);
-
-		free_bootmem(__pa(ptrs[cpu] + size_sum),
-			     PCPU_CHUNK_SIZE - size_sum);
-
-		memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
-	}
-
-	/* allocate address and map */
-	vm.flags = VM_ALLOC;
-	vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE;
-	vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
-
-	for_each_possible_cpu(cpu) {
-		unsigned long start = (unsigned long) vm.addr;
-		unsigned long end;
-
-		start += cpu * PCPU_CHUNK_SIZE;
-		end = start + PCPU_CHUNK_SIZE;
-		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
-	}
-
-	rc = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE, 4 << 20,
+				    pcpu_cpu_distance, pcpu_alloc_bootmem,
+				    pcpu_free_bootmem);
 	if (rc)
-		panic("failed to setup percpu first chunk (%d)", rc);
-
-	free_bootmem(__pa(ptrs), ptrs_size);
+		panic("failed to initialize first chunk (%d)", rc);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
-- 
1.6.0.2

WARNING: multiple messages have this Message-ID (diff)

From: Tejun Heo <tj@kernel.org>
To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
	mingo@redhat.com, benh@kernel.crashing.org, davem@davemloft.net,
	dhowells@redhat.com, npiggin@suse.de, JBeulich@novell.com,
	cl@linux-foundation.org, rusty@rustcorp.com.au, hpa@zytor.com,
	tglx@linutronix.de, akpm@linux-foundation.org, x86@kernel.org,
	andi@firstfloor.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 19/20] sparc64: use embedding percpu first chunk allocator
Date: Tue, 21 Jul 2009 19:26:18 +0900	[thread overview]
Message-ID: <1248171979-29166-20-git-send-email-tj@kernel.org> (raw)
Message-ID: <20090721102618.wbmxSWImyOTvnBVjVar4mZZufbfAnVYQd-Dai9UfVBU@z> (raw)
In-Reply-To: <1248171979-29166-1-git-send-email-tj@kernel.org>

sparc64 currently allocates a large page for each cpu and partially
remap them into vmalloc area much like what lpage first chunk
allocator did.  As a 4M page is used for each cpu, this results in
very large unit size and also adds TLB pressure due to the double
mapping of pages in the first chunk.

This patch converts sparc64 to use the embedding percpu first chunk
allocator which now knows how to handle NUMA configurations.  This
simplifies the code a lot, doesn't incur any extra TLB pressure and
results in better utilization of address space.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig         |    3 +
 arch/sparc/kernel/smp_64.c |  128 ++++++-------------------------------------
 2 files changed, 21 insertions(+), 110 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4f6ed0f..fbd1233 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -95,6 +95,9 @@ config AUDIT_ARCH
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y if SPARC64
 
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y if SPARC64
+
 config GENERIC_HARDIRQS_NO__DO_IRQ
 	bool
 	def_bool y if SPARC64
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b03fd36..ff68373 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1389,8 +1389,8 @@ void smp_send_stop(void)
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
-					unsigned long align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+					size_t align)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1415,123 +1415,31 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
 
-#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
-
-static void __init pcpu_map_range(unsigned long start, unsigned long end,
-				  struct page *page)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	unsigned long pfn = page_to_pfn(page);
-	unsigned long pte_base;
-
-	BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
-
-	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
-		    _PAGE_CP_4U | _PAGE_CV_4U |
-		    _PAGE_P_4U | _PAGE_W_4U);
-	if (tlb_type == hypervisor)
-		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-			    _PAGE_CP_4V | _PAGE_CV_4V |
-			    _PAGE_P_4V | _PAGE_W_4V);
-
-	while (start < end) {
-		pgd_t *pgd = pgd_offset_k(start);
-		unsigned long this_end;
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *pte;
-
-		pud = pud_offset(pgd, start);
-		if (pud_none(*pud)) {
-			pmd_t *new;
-
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-			pud_populate(&init_mm, pud, new);
-		}
-
-		pmd = pmd_offset(pud, start);
-		if (!pmd_present(*pmd)) {
-			pte_t *new;
-
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-			pmd_populate_kernel(&init_mm, pmd, new);
-		}
-
-		pte = pte_offset_kernel(pmd, start);
-		this_end = (start + PMD_SIZE) & PMD_MASK;
-		if (this_end > end)
-			this_end = end;
-
-		while (start < this_end) {
-			unsigned long paddr = pfn << PAGE_SHIFT;
-
-			pte_val(*pte) = (paddr | pte_base);
+	free_bootmem(__pa(ptr), size);
+}
 
-			start += PAGE_SIZE;
-			pte++;
-			pfn++;
-		}
-	}
+static int pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (cpu_to_node(from) == cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
 }
 
 void __init setup_per_cpu_areas(void)
 {
-	static struct vm_struct vm;
-	struct pcpu_alloc_info *ai;
-	unsigned long delta, cpu;
-	size_t size_sum;
-	size_t ptrs_size;
-	void **ptrs;
+	unsigned long delta;
+	unsigned int cpu;
 	int rc;
 
-	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
-
-	ai->static_size = __per_cpu_end - __per_cpu_start;
-	ai->reserved_size = PERCPU_MODULE_RESERVE;
-
-	size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
-			     PERCPU_DYNAMIC_RESERVE);
-
-	ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
-	ai->unit_size = PCPU_CHUNK_SIZE;
-	ai->atom_size = PCPU_CHUNK_SIZE;
-	ai->alloc_size = PCPU_CHUNK_SIZE;
-	ai->groups[0].nr_units = nr_cpu_ids;
-
-	for_each_possible_cpu(cpu)
-		ai->groups[0].cpu_map[cpu] = cpu;
-
-	ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
-	ptrs = alloc_bootmem(ptrs_size);
-
-	for_each_possible_cpu(cpu) {
-		ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
-					       PCPU_CHUNK_SIZE);
-
-		free_bootmem(__pa(ptrs[cpu] + size_sum),
-			     PCPU_CHUNK_SIZE - size_sum);
-
-		memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
-	}
-
-	/* allocate address and map */
-	vm.flags = VM_ALLOC;
-	vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE;
-	vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
-
-	for_each_possible_cpu(cpu) {
-		unsigned long start = (unsigned long) vm.addr;
-		unsigned long end;
-
-		start += cpu * PCPU_CHUNK_SIZE;
-		end = start + PCPU_CHUNK_SIZE;
-		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
-	}
-
-	rc = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE, 4 << 20,
+				    pcpu_cpu_distance, pcpu_alloc_bootmem,
+				    pcpu_free_bootmem);
 	if (rc)
-		panic("failed to setup percpu first chunk (%d)", rc);
-
-	free_bootmem(__pa(ptrs), ptrs_size);
+		panic("failed to initialize first chunk (%d)", rc);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
-- 
1.6.0.2

next prev parent reply	other threads:[~2009-07-21 10:28 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-21 10:25 [PATCHSET percpu#for-next] implement and use sparse embedding first chunk allocator Tejun Heo
2009-07-21 10:25 ` Tejun Heo
2009-07-21 10:26 ` [PATCH 01/20] percpu: fix pcpu_reclaim() locking Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 21:41   ` Christoph Lameter
2009-07-21 10:26 ` [PATCH 02/20] percpu: improve boot messages Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 21:43   ` Christoph Lameter
2009-07-21 10:26 ` [PATCH 03/20] percpu: rename 4k first chunk allocator to page Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 21:47   ` Christoph Lameter
2009-07-22  4:38     ` Tejun Heo
2009-07-21 10:26 ` [PATCH 04/20] percpu: build first chunk allocators selectively Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 05/20] percpu: generalize first chunk allocator selection Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 06/20] percpu: drop @static_size from first chunk allocators Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 07/20] percpu: make @dyn_size mandatory for pcpu_setup_first_chunk() Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 08/20] percpu: add @align to pcpu_fc_alloc_fn_t Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 09/20] percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 10/20] percpu: introduce pcpu_alloc_info and pcpu_group_info Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 11/20] percpu: add pcpu_unit_offsets[] Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 12/20] percpu: add chunk->base_addr Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 13/20] vmalloc: separate out insert_vmalloc_vm() Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 14/20] vmalloc: implement pcpu_get_vm_areas() Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-08-14  6:07   ` Tejun Heo
2009-08-14  6:07     ` Tejun Heo
2009-07-21 10:26 ` [PATCH 15/20] percpu: use group information to allocate vmap areas sparsely Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 16/20] percpu: update embedding first chunk allocator to handle sparse units Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 17/20] x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` [PATCH 18/20] percpu: kill lpage first chunk allocator Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 10:26 ` Tejun Heo [this message]
2009-07-21 10:26   ` [PATCH 19/20] sparc64: use embedding percpu " Tejun Heo
2009-07-22  3:54   ` David Miller
2009-07-21 10:26 ` [PATCH 20/20] powerpc64: convert to dynamic percpu allocator Tejun Heo
2009-07-21 10:26   ` Tejun Heo
2009-07-21 12:22 ` [RFC PATCH] percpu: kill legacy " Tejun Heo
2009-07-21 12:22   ` Tejun Heo
2009-07-22  4:30   ` Rusty Russell
2009-08-14  6:09 ` [PATCHSET percpu#for-next] implement and use sparse embedding first chunk allocator Tejun Heo
2009-08-14  6:09   ` Tejun Heo

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4f6ed0f dfblob:fbd1233 dfblob:b03fd36 dfblob:ff68373
dfblob:4f6ed0f dfblob:fbd1233 dfblob:b03fd36 dfblob:ff68373 )
 OR (
bs:"[PATCH 19/20] sparc64: use embedding percpu first chunk allocator" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1248171979-29166-20-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=JBeulich@novell.comc \
    --cc=benh@kernel.crashing.org \
    --cc=davem@davemloft.net \
    --cc=dhowells@redhat.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=npiggin@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox