linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, x86@kernel.org,
	linux-arch@vger.kernel.org, mingo@elte.hu, andi@firstfloor.org,
	hpa@zytor.com, tglx@linutronix.de, cl@linux-foundation.org,
	akpm@linux-fo
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 04/10] percpu: make 4k first chunk allocator map memory
Date: Wed, 24 Jun 2009 22:30:10 +0900	[thread overview]
Message-ID: <1245850216-31653-5-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1245850216-31653-1-git-send-email-tj@kernel.org>

At first, percpu first chunk was always setup page-by-page by the
generic code.  To add other allocators, different parts of the generic
initialization was made optional.  Now we have three allocators -
embed, remap and 4k.  embed and remap fully handle allocation and
mapping of the first chunk while 4k still depends on generic code for
those.  This makes the generic alloc/map paths specifci to 4k and
makes the code unnecessary complicated with optional generic
behaviors.

This patch makes the 4k allocator to allocate and map memory directly
instead of depending on the generic code.  The only outside visible
change is that now dynamic area in the first chunk is allocated
up-front instead of on-demand.  This doesn't make any meaningful
difference as the area is minimal (usually less than a page, just
enough to fill the alignment) on 4k allocator.  Plus, dynamic area in
the first chunk usually gets fully used anyway.

This will allow simplification of pcpu_setpu_first_chunk() and removal
of chunk->page array.

[ Impact: no outside visible change other than up-front allocation of dyn area ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 mm/percpu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 39f4022..c173763 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -632,6 +632,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
 		pcpu_unmap(chunk, unmap_start, unmap_end, flush);
 }
 
+static int __pcpu_map_pages(unsigned long addr, struct page **pages,
+			    int nr_pages)
+{
+	return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
+					PAGE_KERNEL, pages);
+}
+
 /**
  * pcpu_map - map pages into a pcpu_chunk
  * @chunk: chunk of interest
@@ -651,11 +658,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
 	WARN_ON(chunk->immutable);
 
 	for_each_possible_cpu(cpu) {
-		err = map_kernel_range_noflush(
-				pcpu_chunk_addr(chunk, cpu, page_start),
-				(page_end - page_start) << PAGE_SHIFT,
-				PAGE_KERNEL,
-				pcpu_chunk_pagep(chunk, cpu, page_start));
+		err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
+				       pcpu_chunk_pagep(chunk, cpu, page_start),
+				       page_end - page_start);
 		if (err < 0)
 			return err;
 	}
@@ -1274,12 +1279,12 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
  * 4k page first chunk setup helper.
  */
 static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_nr_static_pages __initdata;
+static int pcpu4k_unit_pages __initdata;
 
 static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
 {
-	if (pageno < pcpu4k_nr_static_pages)
-		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
+	if (pageno < pcpu4k_unit_pages)
+		return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno];
 	return NULL;
 }
 
@@ -1306,22 +1311,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 				   pcpu_fc_free_fn_t free_fn,
 				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
+	static struct vm_struct vm;
 	size_t pages_size;
 	unsigned int cpu;
 	int i, j;
 	ssize_t ret;
 
-	pcpu4k_nr_static_pages = PFN_UP(static_size);
+	pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
+					 PCPU_MIN_UNIT_SIZE));
 
 	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() *
+	pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() *
 			       sizeof(pcpu4k_pages[0]));
 	pcpu4k_pages = alloc_bootmem(pages_size);
 
-	/* allocate and copy */
+	/* allocate pages */
 	j = 0;
 	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
+		for (i = 0; i < pcpu4k_unit_pages; i++) {
 			void *ptr;
 
 			ptr = alloc_fn(cpu, PAGE_SIZE);
@@ -1330,18 +1337,48 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 					   "4k page for cpu%u\n", cpu);
 				goto enomem;
 			}
-
-			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
 			pcpu4k_pages[j++] = virt_to_page(ptr);
 		}
 
+	/* allocate vm area, map the pages and copy static data */
+	vm.flags = VM_ALLOC;
+	vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT;
+	vm_area_register_early(&vm, PAGE_SIZE);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long unit_addr = (unsigned long)vm.addr +
+			(cpu * pcpu4k_unit_pages << PAGE_SHIFT);
+
+		for (i = 0; i < pcpu4k_unit_pages; i++)
+			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+
+		/* pte already populated, the following shouldn't fail */
+		ret = __pcpu_map_pages(unit_addr,
+				       &pcpu4k_pages[cpu * pcpu4k_unit_pages],
+				       pcpu4k_unit_pages);
+		if (ret < 0)
+			panic("failed to map percpu area, err=%zd\n", ret);
+
+		/*
+		 * FIXME: Archs with virtual cache should flush local
+		 * cache for the linear mapping here - something
+		 * equivalent to flush_cache_vmap() on the local cpu.
+		 * flush_cache_vmap() can't be used as most supporting
+		 * data structures are not set up yet.
+		 */
+
+		/* copy static data */
+		memcpy((void *)unit_addr, __per_cpu_load, static_size);
+	}
+
 	/* we're ready, commit */
-	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
-		pcpu4k_nr_static_pages, static_size);
+	pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n",
+		pcpu4k_unit_pages, static_size);
 
 	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
 				     reserved_size, -1,
-				     -1, NULL, populate_pte_fn);
+				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr,
+				     NULL);
 	goto out_free_ar;
 
 enomem:
-- 
1.6.0.2

WARNING: multiple messages have this Message-ID (diff)
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, x86@kernel.org,
	linux-arch@vger.kernel.org, mingo@elte.hu, andi@firstfloor.org,
	hpa@zytor.com, tglx@linutronix.de, cl@linux-foundation.org,
	akpm@linux-foundation.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 04/10] percpu: make 4k first chunk allocator map memory
Date: Wed, 24 Jun 2009 22:30:10 +0900	[thread overview]
Message-ID: <1245850216-31653-5-git-send-email-tj@kernel.org> (raw)
Message-ID: <20090624133010.4chSQIyurW2TUaOtU8PLepiBrgOGrJcOSGQ5Imj_oxY@z> (raw)
In-Reply-To: <1245850216-31653-1-git-send-email-tj@kernel.org>

At first, percpu first chunk was always setup page-by-page by the
generic code.  To add other allocators, different parts of the generic
initialization was made optional.  Now we have three allocators -
embed, remap and 4k.  embed and remap fully handle allocation and
mapping of the first chunk while 4k still depends on generic code for
those.  This makes the generic alloc/map paths specifci to 4k and
makes the code unnecessary complicated with optional generic
behaviors.

This patch makes the 4k allocator to allocate and map memory directly
instead of depending on the generic code.  The only outside visible
change is that now dynamic area in the first chunk is allocated
up-front instead of on-demand.  This doesn't make any meaningful
difference as the area is minimal (usually less than a page, just
enough to fill the alignment) on 4k allocator.  Plus, dynamic area in
the first chunk usually gets fully used anyway.

This will allow simplification of pcpu_setpu_first_chunk() and removal
of chunk->page array.

[ Impact: no outside visible change other than up-front allocation of dyn area ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 mm/percpu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 39f4022..c173763 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -632,6 +632,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
 		pcpu_unmap(chunk, unmap_start, unmap_end, flush);
 }
 
+static int __pcpu_map_pages(unsigned long addr, struct page **pages,
+			    int nr_pages)
+{
+	return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
+					PAGE_KERNEL, pages);
+}
+
 /**
  * pcpu_map - map pages into a pcpu_chunk
  * @chunk: chunk of interest
@@ -651,11 +658,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
 	WARN_ON(chunk->immutable);
 
 	for_each_possible_cpu(cpu) {
-		err = map_kernel_range_noflush(
-				pcpu_chunk_addr(chunk, cpu, page_start),
-				(page_end - page_start) << PAGE_SHIFT,
-				PAGE_KERNEL,
-				pcpu_chunk_pagep(chunk, cpu, page_start));
+		err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
+				       pcpu_chunk_pagep(chunk, cpu, page_start),
+				       page_end - page_start);
 		if (err < 0)
 			return err;
 	}
@@ -1274,12 +1279,12 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
  * 4k page first chunk setup helper.
  */
 static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_nr_static_pages __initdata;
+static int pcpu4k_unit_pages __initdata;
 
 static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
 {
-	if (pageno < pcpu4k_nr_static_pages)
-		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
+	if (pageno < pcpu4k_unit_pages)
+		return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno];
 	return NULL;
 }
 
@@ -1306,22 +1311,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 				   pcpu_fc_free_fn_t free_fn,
 				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
+	static struct vm_struct vm;
 	size_t pages_size;
 	unsigned int cpu;
 	int i, j;
 	ssize_t ret;
 
-	pcpu4k_nr_static_pages = PFN_UP(static_size);
+	pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
+					 PCPU_MIN_UNIT_SIZE));
 
 	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() *
+	pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() *
 			       sizeof(pcpu4k_pages[0]));
 	pcpu4k_pages = alloc_bootmem(pages_size);
 
-	/* allocate and copy */
+	/* allocate pages */
 	j = 0;
 	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
+		for (i = 0; i < pcpu4k_unit_pages; i++) {
 			void *ptr;
 
 			ptr = alloc_fn(cpu, PAGE_SIZE);
@@ -1330,18 +1337,48 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 					   "4k page for cpu%u\n", cpu);
 				goto enomem;
 			}
-
-			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
 			pcpu4k_pages[j++] = virt_to_page(ptr);
 		}
 
+	/* allocate vm area, map the pages and copy static data */
+	vm.flags = VM_ALLOC;
+	vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT;
+	vm_area_register_early(&vm, PAGE_SIZE);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long unit_addr = (unsigned long)vm.addr +
+			(cpu * pcpu4k_unit_pages << PAGE_SHIFT);
+
+		for (i = 0; i < pcpu4k_unit_pages; i++)
+			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+
+		/* pte already populated, the following shouldn't fail */
+		ret = __pcpu_map_pages(unit_addr,
+				       &pcpu4k_pages[cpu * pcpu4k_unit_pages],
+				       pcpu4k_unit_pages);
+		if (ret < 0)
+			panic("failed to map percpu area, err=%zd\n", ret);
+
+		/*
+		 * FIXME: Archs with virtual cache should flush local
+		 * cache for the linear mapping here - something
+		 * equivalent to flush_cache_vmap() on the local cpu.
+		 * flush_cache_vmap() can't be used as most supporting
+		 * data structures are not set up yet.
+		 */
+
+		/* copy static data */
+		memcpy((void *)unit_addr, __per_cpu_load, static_size);
+	}
+
 	/* we're ready, commit */
-	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
-		pcpu4k_nr_static_pages, static_size);
+	pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n",
+		pcpu4k_unit_pages, static_size);
 
 	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
 				     reserved_size, -1,
-				     -1, NULL, populate_pte_fn);
+				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr,
+				     NULL);
 	goto out_free_ar;
 
 enomem:
-- 
1.6.0.2


  parent reply	other threads:[~2009-06-24 13:30 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-06-24 13:30 [PATCHSET] percpu: generalize first chunk allocators and improve lpage NUMA support Tejun Heo
2009-06-24 13:30 ` Tejun Heo
2009-06-24 13:30 ` [PATCH 01/10] x86: make pcpu_chunk_addr_search() matching stricter Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 02/10] percpu: drop @unit_size from embed first chunk allocator Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 03/10] x86,percpu: generalize 4k " Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` Tejun Heo [this message]
2009-06-24 13:30   ` [PATCH 04/10] percpu: make 4k first chunk allocator map memory Tejun Heo
2009-06-24 13:30 ` [PATCH 05/10] x86,percpu: generalize lpage first chunk allocator Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 06/10] percpu: simplify pcpu_setup_first_chunk() Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 07/10] percpu: reorder a few functions in mm/percpu.c Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 08/10] percpu: drop pcpu_chunk->page[] Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 09/10] percpu: allow non-linear / sparse cpu -> unit mapping Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 13:30 ` [PATCH 10/10] percpu: teach large page allocator about NUMA Tejun Heo
2009-06-24 13:30   ` Tejun Heo
2009-06-24 23:55 ` [PATCHSET] percpu: generalize first chunk allocators and improve lpage NUMA support Andrew Morton
2009-06-25  0:02   ` Andi Kleen
2009-06-25  0:13     ` H. Peter Anvin
2009-06-25  9:19       ` Andi Kleen
2009-06-25 14:18         ` H. Peter Anvin
2009-06-25 19:54           ` Andi Kleen
2009-06-25 20:15             ` H. Peter Anvin
2009-06-25 20:26               ` Andi Kleen
2009-06-26  0:40                 ` Tejun Heo
2009-06-26  2:02                   ` H. Peter Anvin
2009-06-26  6:54                   ` Andi Kleen
2009-06-25  2:35   ` Tejun Heo
2009-06-25  9:20     ` Ingo Molnar
2009-06-29 23:20   ` Christoph Lameter
2009-06-29 23:39     ` Andrew Morton
2009-06-30 14:24       ` Christoph Lameter
2009-06-30 19:15       ` Ingo Molnar
2009-06-30 19:39         ` Christoph Lameter
2009-06-30 20:21           ` Scott Lurndal
2009-06-30 21:31           ` Ingo Molnar
2009-06-30 22:16             ` Christoph Lameter
2009-06-30 22:31               ` Ingo Molnar
2009-06-30 22:40                 ` Andi Kleen
2009-07-01  0:48                   ` Tejun Heo
2009-06-30 22:55                 ` Christoph Lameter
2009-06-30 22:55                   ` Christoph Lameter
2009-06-30 23:07                   ` Ingo Molnar
2009-06-30 23:18                     ` Christoph Lameter
2009-06-30 23:30                       ` Ingo Molnar
2009-07-01  6:34                     ` Andi Kleen
2009-06-30 23:20               ` Tejun Heo
2009-06-30 23:31                 ` Ingo Molnar
2009-06-30 23:34                   ` H. Peter Anvin
2009-07-01  6:42                 ` Andi Kleen
2009-07-01 10:21                   ` Tejun Heo
2009-07-01 12:23                     ` Andi Kleen
2009-07-01 12:53                       ` Tejun Heo
2009-07-01 13:11                         ` Andi Kleen
2009-07-01 17:33                           ` Christoph Lameter
2009-07-01 22:42                             ` Tejun Heo
2009-07-03 23:14 ` Tejun Heo
2009-07-03 23:14   ` Tejun Heo
2009-07-13 10:12 ` [PATCH 04/10] percpu: make 4k first chunk allocator map memory David Howells
2009-07-15  3:17   ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1245850216-31653-5-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=akpm@linux-fo \
    --cc=andi@firstfloor.org \
    --cc=cl@linux-foundation.org \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).