From: Manfred Spraul <manfred@colorfullife.com>
To: Jack Steiner <steiner@sgi.com>
Cc: Jes Sorensen <jes@wildopensource.com>,
linux-kernel@vger.kernel.org, anton@samba.org,
"Martin J. Bligh" <mbligh@aracnet.com>,
William Lee Irwin III <wli@holomorphy.com>,
Andrew Morton <akpm@digeo.com>
Subject: Re: hash table sizes
Date: Sat, 29 Nov 2003 11:39:44 +0100 [thread overview]
Message-ID: <3FC87770.1080400@colorfullife.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 627 bytes --]
What about the attached patch?
- add command line overrides for the hash tables sizes. It's impossible
to guess if the system is used as a HPC or as a file server. Doc update
missing.
- limit the dcache hash to 8 mio entries, and the inode hash to 1 mio
entries, regardless of the amount of memory in the system. The system
admin can override the defaults at boot time if needed.
- distribute the memory allocations that happen during boot to all nodes
- the memory will be touched by all cpus, binding all allocs to the boot
node is wrong.
The patch compiles, but is untested due to lack of hardware.
--
Manfred
[-- Attachment #2: patch-numa --]
[-- Type: text/plain, Size: 3591 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 6
// SUBLEVEL = 0
// EXTRAVERSION = -test11
--- 2.6/mm/page_alloc.c 2003-11-29 09:46:35.000000000 +0100
+++ build-2.6/mm/page_alloc.c 2003-11-29 11:34:04.000000000 +0100
@@ -681,6 +681,42 @@
EXPORT_SYMBOL(__alloc_pages);
+#ifdef CONFIG_NUMA
+/* Early boot: Everything is done by one cpu, but the data structures will be
+ * used by all cpus - spread them on all nodes.
+ */
+static __init unsigned long get_boot_pages(unsigned int gfp_mask, unsigned int order)
+{
+static int nodenr;
+ int i = nodenr;
+ struct page *page;
+
+ for (;;) {
+ if (i > nodenr + numnodes)
+ return 0;
+ if (node_present_pages(i%numnodes)) {
+ struct zone **z;
+ /* The node contains memory. Check that there is
+ * memory in the intended zonelist.
+ */
+ z = NODE_DATA(i%numnodes)->node_zonelists[gfp_mask & GFP_ZONEMASK].zones;
+ while (*z) {
+ if ( (*z)->free_pages > (1UL<<order))
+ goto found_node;
+ z++;
+ }
+ }
+ i++;
+ }
+found_node:
+ nodenr = i+1;
+ page = alloc_pages_node(i%numnodes, gfp_mask, order);
+ if (!page)
+ return 0;
+ return (unsigned long) page_address(page);
+}
+#endif
+
/*
* Common helper functions.
*/
@@ -688,6 +724,10 @@
{
struct page * page;
+#ifdef CONFIG_NUMA
+ if (unlikely(!system_running))
+ return get_boot_pages(gfp_mask, order);
+#endif
page = alloc_pages(gfp_mask, order);
if (!page)
return 0;
--- 2.6/fs/inode.c 2003-11-29 09:46:34.000000000 +0100
+++ build-2.6/fs/inode.c 2003-11-29 10:19:21.000000000 +0100
@@ -1327,6 +1327,20 @@
wake_up_all(wq);
}
+static __initdata int ihash_entries;
+
+static int __init set_ihash_entries(char *str)
+{
+ get_option(&str, &ihash_entries);
+ if (ihash_entries <= 0) {
+ ihash_entries = 0;
+ return 0;
+ }
+ return 1;
+}
+
+__setup("ihash_entries=", set_ihash_entries);
+
/*
* Initialize the waitqueues and inode hash table.
*/
@@ -1340,8 +1354,16 @@
for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
init_waitqueue_head(&i_wait_queue_heads[i].wqh);
- mempages >>= (14 - PAGE_SHIFT);
- mempages *= sizeof(struct hlist_head);
+ if (!ihash_entries) {
+ ihash_entries = mempages >> (14 - PAGE_SHIFT);
+ /* Limit inode hash size. Override for nfs servers
+ * that handle lots of files.
+ */
+ if (ihash_entries > 1024*1024)
+ ihash_entries = 1024*1024;
+ }
+
+ mempages = ihash_entries*sizeof(struct hlist_head);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
;
--- 2.6/fs/dcache.c 2003-11-29 09:46:34.000000000 +0100
+++ build-2.6/fs/dcache.c 2003-11-29 10:53:15.000000000 +0100
@@ -1546,6 +1546,20 @@
return ino;
}
+static __initdata int dhash_entries;
+
+static int __init set_dhash_entries(char *str)
+{
+ get_option(&str, &dhash_entries);
+ if (dhash_entries <= 0) {
+ dhash_entries = 0;
+ return 0;
+ }
+ return 1;
+}
+
+__setup("dhash_entries=", set_dhash_entries);
+
static void __init dcache_init(unsigned long mempages)
{
struct hlist_head *d;
@@ -1571,10 +1585,18 @@
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+ if (!dhash_entries) {
#if PAGE_SHIFT < 13
- mempages >>= (13 - PAGE_SHIFT);
+ mempages >>= (13 - PAGE_SHIFT);
#endif
- mempages *= sizeof(struct hlist_head);
+ dhash_entries = mempages;
+ /* 8 mio is enough for general purpose systems.
+ * For file servers, override with "dhash_entries="
+ */
+ if (dhash_entries > 8*1024*1024)
+ dhash_entries = 8*1024*1024;
+ }
+ mempages = dhash_entries*sizeof(struct hlist_head);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
;
next reply other threads:[~2003-11-29 10:40 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-11-29 10:39 Manfred Spraul [this message]
-- strict thread matches above, loose matches on Subject: below --
2003-11-26 5:53 hash table sizes Zhang, Yanmin
2003-11-25 13:35 Jes Sorensen
2003-11-25 13:42 ` William Lee Irwin III
2003-11-25 13:54 ` Jes Sorensen
2003-11-25 16:25 ` Thomas Schlichter
2003-11-25 17:52 ` Antonio Vargas
2003-11-25 17:54 ` William Lee Irwin III
2003-11-25 20:48 ` Jack Steiner
2003-11-25 21:07 ` Andrew Morton
2003-11-25 21:14 ` Jesse Barnes
2003-11-25 21:24 ` Andrew Morton
2003-11-26 2:14 ` David S. Miller
2003-11-26 5:27 ` Matt Mackall
2003-11-28 14:15 ` Jes Sorensen
2003-11-28 14:52 ` Jack Steiner
2003-11-28 16:22 ` Jes Sorensen
2003-11-28 19:35 ` Jack Steiner
2003-11-28 21:18 ` Jörn Engel
2003-12-01 9:46 ` Jes Sorensen
2003-12-01 21:06 ` Anton Blanchard
2003-12-01 22:57 ` Martin J. Bligh
2003-11-25 21:16 ` Anton Blanchard
2003-11-25 23:11 ` Jack Steiner
2003-11-26 3:39 ` Rik van Riel
2003-11-26 3:59 ` William Lee Irwin III
2003-11-26 4:25 ` Andrew Morton
2003-11-26 4:23 ` William Lee Irwin III
2003-11-26 5:14 ` Martin J. Bligh
2003-11-26 9:51 ` William Lee Irwin III
2003-11-26 16:17 ` Martin J. Bligh
2003-11-26 7:25 ` Anton Blanchard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3FC87770.1080400@colorfullife.com \
--to=manfred@colorfullife.com \
--cc=akpm@digeo.com \
--cc=anton@samba.org \
--cc=jes@wildopensource.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mbligh@aracnet.com \
--cc=steiner@sgi.com \
--cc=wli@holomorphy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox