From: Tejun Heo <tj@kernel.org>
To: JBeulich@novell.com, andi@firstfloor.org, mingo@elte.hu,
linux-kernel-owner@vger.kernel.org, hpa@zytor.com,
tglx@linutronix.de, linux-kernel@vger.kernel.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 4/5] x86: implement percpu_alloc kernel parameter
Date: Fri, 15 May 2009 13:28:58 +0900 [thread overview]
Message-ID: <1242361739-21186-5-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1242361739-21186-1-git-send-email-tj@kernel.org>
According to Andi, it isn't clear whether remap allocator is worth the
trouble as there are many processors where PMD TLB is far scarcer than
PTE TLB. The advantage or disadvantage probably depends on the actual
size of percpu area and specific processor. As performance
degradation due to TLB pressure tends to be highly workload specific
and subtle, it is difficult to decide which way to go without more
data.
This patch implements percpu_alloc kernel parameter to allow selecting
which first chunk allocator to use to ease debugging and testing.
While at it, make sure all the failure paths report why something
failed to help determining why certain allocator isn't working. Also,
kill the "Great future plan" comment which had already been realized
quite some time ago.
[ Impact: allow explicit percpu first chunk allocator selection ]
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jan Beulich <JBeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
Documentation/kernel-parameters.txt | 6 +++
arch/x86/kernel/setup_percpu.c | 69 +++++++++++++++++++++++++----------
mm/percpu.c | 13 +++++--
3 files changed, 65 insertions(+), 23 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbf..929bb3a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1837,6 +1837,12 @@ and is between 256 and 4096 characters. It is defined in the file
Format: { 0 | 1 }
See arch/parisc/kernel/pdc_chassis.c
+ percpu_alloc= [X86] Select which percpu first chunk allocator to use.
+ Allowed values are one of "remap", "embed" and "4k".
+ See comments in arch/x86/kernel/setup_percpu.c for
+ details on each allocator. This parameter is primarily
+ for debugging and performance comparison.
+
pf. [PARIDE]
See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index dd567a7..29be178 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -156,18 +156,21 @@ static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
return virt_to_page(pcpur_map[cpu].ptr + off);
}
-static ssize_t __init setup_pcpu_remap(size_t static_size)
+static ssize_t __init setup_pcpu_remap(size_t static_size, bool chosen)
{
size_t map_size, dyn_size;
unsigned int cpu;
int i, j;
ssize_t ret;
- /*
- * If large page isn't supported, there's no benefit in doing
- * this. Also, on non-NUMA, embedding is better.
- */
- if (!cpu_has_pse || !pcpu_need_numa())
+ /* need PSE */
+ if (!cpu_has_pse) {
+ pr_warning("PERCPU: remap allocator requires PSE\n");
+ return -EINVAL;
+ }
+
+ /* on non-NUMA, embedding is better */
+ if (!chosen && !pcpu_need_numa())
return -EINVAL;
/*
@@ -191,8 +194,11 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
pcpur_map[cpu].cpu = cpu;
pcpur_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
PMD_SIZE);
- if (!pcpur_map[cpu].ptr)
+ if (!pcpur_map[cpu].ptr) {
+ pr_warning("PERCPU: failed to allocate large page "
+ "for cpu%u\n", cpu);
goto enomem;
+ }
/*
* Only use pcpur_size bytes and give back the rest.
@@ -297,7 +303,7 @@ void *pcpu_pmd_remapped(void *kaddr)
return NULL;
}
#else
-static ssize_t __init setup_pcpu_remap(size_t static_size)
+static ssize_t __init setup_pcpu_remap(size_t static_size, bool chosen)
{
return -EINVAL;
}
@@ -311,7 +317,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
* mapping so that it can use PMD mapping without additional TLB
* pressure.
*/
-static ssize_t __init setup_pcpu_embed(size_t static_size)
+static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
{
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
@@ -320,7 +326,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
* this. Also, embedding allocation doesn't play well with
* NUMA.
*/
- if (!cpu_has_pse || pcpu_need_numa())
+ if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
return -EINVAL;
return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
@@ -370,8 +376,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
void *ptr;
ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
- if (!ptr)
+ if (!ptr) {
+ pr_warning("PERCPU: failed to allocate "
+ "4k page for cpu%u\n", cpu);
goto enomem;
+ }
memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
pcpu4k_pages[j++] = virt_to_page(ptr);
@@ -395,6 +404,16 @@ out_free_ar:
return ret;
}
+/* for explicit first chunk allocator selection */
+static char pcpu_chosen_alloc[16] __initdata;
+
+static int __init percpu_alloc_setup(char *str)
+{
+ strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
+ return 0;
+}
+early_param("percpu_alloc", percpu_alloc_setup);
+
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
@@ -408,11 +427,6 @@ static inline void setup_percpu_segment(int cpu)
#endif
}
-/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
- */
void __init setup_per_cpu_areas(void)
{
size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -429,9 +443,26 @@ void __init setup_per_cpu_areas(void)
* of large page mappings. Please read comments on top of
* each allocator for details.
*/
- ret = setup_pcpu_remap(static_size);
- if (ret < 0)
- ret = setup_pcpu_embed(static_size);
+ ret = -EINVAL;
+ if (strlen(pcpu_chosen_alloc)) {
+ if (strcmp(pcpu_chosen_alloc, "4k")) {
+ if (!strcmp(pcpu_chosen_alloc, "remap"))
+ ret = setup_pcpu_remap(static_size, true);
+ else if (!strcmp(pcpu_chosen_alloc, "embed"))
+ ret = setup_pcpu_embed(static_size, true);
+ else
+ pr_warning("PERCPU: unknown allocator %s "
+ "specified\n", pcpu_chosen_alloc);
+ if (ret < 0)
+ pr_warning("PERCPU: %s allocator failed (%zd), "
+ "falling back to 4k\n",
+ pcpu_chosen_alloc, ret);
+ }
+ } else {
+ ret = setup_pcpu_remap(static_size, false);
+ if (ret < 0)
+ ret = setup_pcpu_embed(static_size, false);
+ }
if (ret < 0)
ret = setup_pcpu_4k(static_size);
if (ret < 0)
diff --git a/mm/percpu.c b/mm/percpu.c
index 1aa5d8f..d42f2ce 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1287,6 +1287,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
ssize_t dyn_size, ssize_t unit_size)
{
+ size_t chunk_size;
unsigned int cpu;
/* determine parameters and allocate */
@@ -1301,11 +1302,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
} else
pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
- pcpue_ptr = __alloc_bootmem_nopanic(
- num_possible_cpus() * pcpue_unit_size,
- PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
- if (!pcpue_ptr)
+ chunk_size = pcpue_unit_size * num_possible_cpus();
+
+ pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
+ __pa(MAX_DMA_ADDRESS));
+ if (!pcpue_ptr) {
+ pr_warning("PERCPU: failed to allocate %zu bytes for "
+ "embedding\n", chunk_size);
return -ENOMEM;
+ }
/* return the leftover and copy */
for_each_possible_cpu(cpu) {
--
1.6.0.2
next prev parent reply other threads:[~2009-05-15 4:30 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-05-15 4:28 [GIT PATCH 2.6.30-rc5] x86,percpu: fix pageattr handling with remap allocator, take#2 Tejun Heo
2009-05-15 4:28 ` [PATCH 1/5] x86: prepare setup_pcpu_remap() for pageattr fix Tejun Heo
2009-05-15 4:28 ` [PATCH 2/5] x86: reorganize cpa_process_alias() Tejun Heo
2009-05-15 9:35 ` [PATCH UPDATED " Tejun Heo
2009-05-15 4:28 ` [PATCH 3/5] x86: fix pageattr handling for remap percpu allocator Tejun Heo
2009-05-15 7:58 ` Jan Beulich
2009-05-15 8:18 ` Tejun Heo
2009-05-15 9:35 ` [PATCH UPDATED " Tejun Heo
2009-05-15 4:28 ` Tejun Heo [this message]
2009-05-15 8:04 ` [PATCH 4/5] x86: implement percpu_alloc kernel parameter Jan Beulich
2009-05-15 9:27 ` Tejun Heo
2009-05-15 4:28 ` [PATCH 5/5] x86: ensure percpu remap doesn't consume too much vmalloc space Tejun Heo
2009-05-15 9:36 ` [GIT PATCH 2.6.30-rc5] x86,percpu: fix pageattr handling with remap allocator, take#2 Tejun Heo
2009-05-15 9:52 ` [GIT PATCH 2.6.30-rc5] x86,percpu: fix pageattr handling withremap " Jan Beulich
[not found] ` <4A0D57920200007800001150@novell.com>
2009-05-15 10:08 ` Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2009-06-01 6:34 [GIT PATCH core/percpu] x86,percpu: fix pageattr handling with remap allocator, take#3 Tejun Heo
2009-06-01 6:34 ` [PATCH 4/5] x86: implement percpu_alloc kernel parameter Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1242361739-21186-5-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=JBeulich@novell.com \
--cc=andi@firstfloor.org \
--cc=hpa@zytor.com \
--cc=linux-kernel-owner@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox