From: Tejun Heo <tj@kernel.org>
To: mingo@elte.hu, rusty@rustcorp.com.au, tglx@linutronix.de,
x86@kernel.org, linux-kernel@vger.kernel.org, hpa@zytor.com,
jeremy@goop.org, cpw@sgi.com, nickpiggin@yahoo.com.au,
ink@jurassic.park.msu.ru
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 09/10] x86: add embedding percpu first chunk allocator
Date: Tue, 24 Feb 2009 12:11:40 +0900 [thread overview]
Message-ID: <1235445101-7882-10-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1235445101-7882-1-git-send-email-tj@kernel.org>
Impact: add better first percpu allocation for !NUMA
On !NUMA, we can simply allocate contiguous memory and use it for the
first chunk without mapping it into vmalloc area. As the memory area
is covered by the large page physical memory mapping, it allows the
dynamic perpcu allocator to not add any TLB overhead for the static
percpu area and whatever falls into the first chunk and the
implementation is very simple too.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
arch/x86/kernel/setup_percpu.c | 86 +++++++++++++++++++++++++++++++++++++++-
1 files changed, 85 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 4a17c96..fd4c399 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -43,6 +43,35 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
EXPORT_SYMBOL(__per_cpu_offset);
/**
+ * pcpu_need_numa - determine percpu allocation needs to consider NUMA
+ *
+ * If NUMA is not configured or there is only one NUMA node available,
+ * there is no reason to consider NUMA. This function determines
+ * whether percpu allocation should consider NUMA or not.
+ *
+ * RETURNS:
+ * true if NUMA should be considered; otherwise, false.
+ */
+static bool __init pcpu_need_numa(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ pg_data_t *last = NULL;
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int node = early_cpu_to_node(cpu);
+
+ if (node_online(node) && NODE_DATA(node) &&
+ last && last != NODE_DATA(node))
+ return true;
+
+ last = NODE_DATA(node);
+ }
+#endif
+ return false;
+}
+
+/**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
* @cpu: cpu to allocate for
* @size: size allocation in bytes
@@ -82,6 +111,59 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
}
/*
+ * Embedding allocator
+ *
+ * The first chunk is sized to just contain the static area plus
+ * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using
+ * bootmem allocator and used as-is without being mapped into vmalloc
+ * area. This enables the first chunk to piggy back on the linear
+ * physical PMD mapping and doesn't add any additional pressure to
+ * TLB.
+ */
+static void *pcpue_ptr __initdata;
+static size_t pcpue_unit_size __initdata;
+
+static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
+{
+ return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size
+ + ((size_t)pageno << PAGE_SHIFT));
+}
+
+static ssize_t __init setup_pcpu_embed(size_t static_size)
+{
+ unsigned int cpu;
+
+ /*
+ * If large page isn't supported, there's no benefit in doing
+ * this. Also, embedding allocation doesn't play well with
+ * NUMA.
+ */
+ if (!cpu_has_pse || pcpu_need_numa())
+ return -EINVAL;
+
+ /* allocate and copy */
+ pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE);
+ pcpue_unit_size = max(pcpue_unit_size, PCPU_MIN_UNIT_SIZE);
+ pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
+ PAGE_SIZE);
+ if (!pcpue_ptr)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load,
+ static_size);
+
+ /* we're ready, commit */
+ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
+ pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size);
+
+ return pcpu_setup_first_chunk(pcpue_get_page, static_size,
+ pcpue_unit_size,
+ pcpue_unit_size - static_size, pcpue_ptr,
+ NULL);
+}
+
+/*
* 4k page allocator
*
* This is the basic allocator. Static percpu area is allocated
@@ -178,7 +260,9 @@ void __init setup_per_cpu_areas(void)
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/* allocate percpu area */
- ret = setup_pcpu_4k(static_size);
+ ret = setup_pcpu_embed(static_size);
+ if (ret < 0)
+ ret = setup_pcpu_4k(static_size);
if (ret < 0)
panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
static_size, ret);
--
1.6.0.2
next prev parent reply other threads:[~2009-02-24 3:16 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-24 3:11 [PATCHSET x86/core/percpu] improve the first percpu chunk allocation Tejun Heo
2009-02-24 3:11 ` [PATCH 01/10] percpu: fix pcpu_chunk_struct_size Tejun Heo
2009-02-24 3:11 ` [PATCH 02/10] bootmem: clean up arch-specific bootmem wrapping Tejun Heo
2009-02-24 11:30 ` Johannes Weiner
2009-02-24 11:39 ` Tejun Heo
2009-02-24 3:11 ` [PATCH 03/10] bootmem: reorder interface functions and add a missing one Tejun Heo
2009-02-24 3:11 ` [PATCH 04/10] vmalloc: add @align to vm_area_register_early() Tejun Heo
2009-02-24 3:11 ` [PATCH 05/10] x86: update populate_extra_pte() and add populate_extra_pmd() Tejun Heo
2009-02-24 3:11 ` [PATCH 06/10] percpu: remove unit_size power-of-2 restriction Tejun Heo
2009-02-24 3:11 ` [PATCH 07/10] percpu: give more latitude to arch specific first chunk initialization Tejun Heo
2009-02-24 3:11 ` [PATCH 08/10] x86: separate out setup_pcpu_4k() from setup_per_cpu_areas() Tejun Heo
2009-02-24 3:11 ` Tejun Heo [this message]
2009-02-24 3:11 ` [PATCH 10/10] x86: add remapping percpu first chunk allocator Tejun Heo
2009-02-24 9:57 ` [PATCHSET x86/core/percpu] improve the first percpu chunk allocation Ingo Molnar
2009-02-24 11:48 ` Tejun Heo
2009-02-24 12:40 ` Ingo Molnar
2009-02-24 13:27 ` Tejun Heo
2009-02-24 14:12 ` Ingo Molnar
2009-02-24 14:37 ` Tejun Heo
2009-02-24 15:15 ` Ingo Molnar
2009-02-24 23:33 ` Tejun Heo
2009-03-04 0:03 ` Rusty Russell
2009-03-04 0:15 ` H. Peter Anvin
2009-03-04 0:50 ` Ingo Molnar
2009-02-24 12:51 ` Ingo Molnar
2009-02-24 14:47 ` Tejun Heo
2009-02-24 15:19 ` Ingo Molnar
2009-02-24 15:30 ` Nick Piggin
2009-02-24 13:02 ` Ingo Molnar
2009-02-24 14:40 ` Tejun Heo
2009-02-24 20:17 ` Ingo Molnar
2009-02-24 20:51 ` Ingo Molnar
2009-02-24 21:02 ` Yinghai Lu
2009-02-24 21:12 ` [PATCH] x86: check range in reserve_early() -v2 Yinghai Lu
2009-02-24 21:16 ` [PATCHSET x86/core/percpu] improve the first percpu chunk allocation Ingo Molnar
2009-02-25 2:09 ` [PATCH x86/core/percpu 1/2] x86, percpu: fix minor bugs in setup_percpu.c Tejun Heo
2009-02-25 2:10 ` [PATCH x86/core/percpu 2/2] x86: convert cacheflush macros inline functions Tejun Heo
2009-02-25 2:23 ` [PATCHSET x86/core/percpu] improve the first percpu chunk allocation Tejun Heo
2009-02-25 2:56 ` Tejun Heo
2009-02-25 12:59 ` Ingo Molnar
2009-02-25 13:43 ` WARNING: at include/linux/percpu.h:159 __create_workqueue_key+0x1f6/0x220() Ingo Molnar
2009-02-26 2:03 ` [PATCH core/percpu] percpu: fix too low alignment restriction on UP Tejun Heo
2009-02-26 3:26 ` Ingo Molnar
2009-02-25 6:40 ` [PATCHSET x86/core/percpu] improve the first percpu chunk allocation Rusty Russell
2009-02-25 12:54 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1235445101-7882-10-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=cpw@sgi.com \
--cc=hpa@zytor.com \
--cc=ink@jurassic.park.msu.ru \
--cc=jeremy@goop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=nickpiggin@yahoo.com.au \
--cc=rusty@rustcorp.com.au \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox