From: Manfred Spraul <manfred@colorfullife.com>
To: Jack Steiner <steiner@sgi.com>
Cc: Jes Sorensen <jes@wildopensource.com>,
linux-kernel@vger.kernel.org, anton@samba.org,
"Martin J. Bligh" <mbligh@aracnet.com>,
William Lee Irwin III <wli@holomorphy.com>,
Andrew Morton <akpm@digeo.com>
Subject: Re: hash table sizes
Date: Sat, 29 Nov 2003 11:39:44 +0100 [thread overview]
Message-ID: <3FC87770.1080400@colorfullife.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 627 bytes --]
What about the attached patch?
- add command line overrides for the hash tables sizes. It's impossible
to guess if the system is used as a HPC or as a file server. Doc update
missing.
- limit the dcache hash to 8 mio entries, and the inode hash to 1 mio
entries, regardless of the amount of memory in the system. The system
admin can override the defaults at boot time if needed.
- distribute the memory allocations that happen during boot to all nodes
- the memory will be touched by all cpus, binding all allocs to the boot
node is wrong.
The patch compiles, but is untested due to lack of hardware.
--
Manfred
[-- Attachment #2: patch-numa --]
[-- Type: text/plain, Size: 3591 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 6
// SUBLEVEL = 0
// EXTRAVERSION = -test11
--- 2.6/mm/page_alloc.c 2003-11-29 09:46:35.000000000 +0100
+++ build-2.6/mm/page_alloc.c 2003-11-29 11:34:04.000000000 +0100
@@ -681,6 +681,42 @@
EXPORT_SYMBOL(__alloc_pages);
+#ifdef CONFIG_NUMA
+/* Early boot: Everything is done by one cpu, but the data structures will be
+ * used by all cpus - spread them on all nodes.
+ */
+static __init unsigned long get_boot_pages(unsigned int gfp_mask, unsigned int order)
+{
+static int nodenr;
+ int i = nodenr;
+ struct page *page;
+
+ for (;;) {
+ if (i > nodenr + numnodes)
+ return 0;
+ if (node_present_pages(i%numnodes)) {
+ struct zone **z;
+ /* The node contains memory. Check that there is
+ * memory in the intended zonelist.
+ */
+ z = NODE_DATA(i%numnodes)->node_zonelists[gfp_mask & GFP_ZONEMASK].zones;
+ while (*z) {
+ if ( (*z)->free_pages > (1UL<<order))
+ goto found_node;
+ z++;
+ }
+ }
+ i++;
+ }
+found_node:
+ nodenr = i+1;
+ page = alloc_pages_node(i%numnodes, gfp_mask, order);
+ if (!page)
+ return 0;
+ return (unsigned long) page_address(page);
+}
+#endif
+
/*
* Common helper functions.
*/
@@ -688,6 +724,10 @@
{
struct page * page;
+#ifdef CONFIG_NUMA
+ if (unlikely(!system_running))
+ return get_boot_pages(gfp_mask, order);
+#endif
page = alloc_pages(gfp_mask, order);
if (!page)
return 0;
--- 2.6/fs/inode.c 2003-11-29 09:46:34.000000000 +0100
+++ build-2.6/fs/inode.c 2003-11-29 10:19:21.000000000 +0100
@@ -1327,6 +1327,20 @@
wake_up_all(wq);
}
+static __initdata int ihash_entries;
+
+static int __init set_ihash_entries(char *str)
+{
+ get_option(&str, &ihash_entries);
+ if (ihash_entries <= 0) {
+ ihash_entries = 0;
+ return 0;
+ }
+ return 1;
+}
+
+__setup("ihash_entries=", set_ihash_entries);
+
/*
* Initialize the waitqueues and inode hash table.
*/
@@ -1340,8 +1354,16 @@
for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
init_waitqueue_head(&i_wait_queue_heads[i].wqh);
- mempages >>= (14 - PAGE_SHIFT);
- mempages *= sizeof(struct hlist_head);
+ if (!ihash_entries) {
+ ihash_entries = mempages >> (14 - PAGE_SHIFT);
+ /* Limit inode hash size. Override for nfs servers
+ * that handle lots of files.
+ */
+ if (ihash_entries > 1024*1024)
+ ihash_entries = 1024*1024;
+ }
+
+ mempages = ihash_entries*sizeof(struct hlist_head);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
;
--- 2.6/fs/dcache.c 2003-11-29 09:46:34.000000000 +0100
+++ build-2.6/fs/dcache.c 2003-11-29 10:53:15.000000000 +0100
@@ -1546,6 +1546,20 @@
return ino;
}
+static __initdata int dhash_entries;
+
+static int __init set_dhash_entries(char *str)
+{
+ get_option(&str, &dhash_entries);
+ if (dhash_entries <= 0) {
+ dhash_entries = 0;
+ return 0;
+ }
+ return 1;
+}
+
+__setup("dhash_entries=", set_dhash_entries);
+
static void __init dcache_init(unsigned long mempages)
{
struct hlist_head *d;
@@ -1571,10 +1585,18 @@
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+ if (!dhash_entries) {
#if PAGE_SHIFT < 13
- mempages >>= (13 - PAGE_SHIFT);
+ mempages >>= (13 - PAGE_SHIFT);
#endif
- mempages *= sizeof(struct hlist_head);
+ dhash_entries = mempages;
+ /* 8 mio is enough for general purpose systems.
+ * For file servers, override with "dhash_entries="
+ */
+ if (dhash_entries > 8*1024*1024)
+ dhash_entries = 8*1024*1024;
+ }
+ mempages = dhash_entries*sizeof(struct hlist_head);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
;
next reply other threads:[~2003-11-29 10:40 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-11-29 10:39 Manfred Spraul [this message]
-- strict thread matches above, loose matches on Subject: below --
2003-11-26 5:53 hash table sizes Zhang, Yanmin
2003-11-25 13:35 Jes Sorensen
2003-11-25 13:42 ` William Lee Irwin III
2003-11-25 13:54 ` Jes Sorensen
2003-11-25 16:25 ` Thomas Schlichter
2003-11-25 17:52 ` Antonio Vargas
2003-11-25 17:54 ` William Lee Irwin III
2003-11-25 20:48 ` Jack Steiner
2003-11-25 21:07 ` Andrew Morton
2003-11-25 21:14 ` Jesse Barnes
2003-11-25 21:24 ` Andrew Morton
2003-11-26 2:14 ` David S. Miller
2003-11-26 5:27 ` Matt Mackall
2003-11-28 14:15 ` Jes Sorensen
2003-11-28 14:52 ` Jack Steiner
2003-11-28 16:22 ` Jes Sorensen
2003-11-28 19:35 ` Jack Steiner
2003-11-28 21:18 ` Jörn Engel
2003-12-01 9:46 ` Jes Sorensen
2003-12-01 21:06 ` Anton Blanchard
2003-12-01 22:57 ` Martin J. Bligh
2003-11-25 21:16 ` Anton Blanchard
2003-11-25 23:11 ` Jack Steiner
2003-11-26 3:39 ` Rik van Riel
2003-11-26 3:59 ` William Lee Irwin III
2003-11-26 4:25 ` Andrew Morton
2003-11-26 4:23 ` William Lee Irwin III
2003-11-26 5:14 ` Martin J. Bligh
2003-11-26 9:51 ` William Lee Irwin III
2003-11-26 16:17 ` Martin J. Bligh
2003-11-26 7:25 ` Anton Blanchard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3FC87770.1080400@colorfullife.com \
--to=manfred@colorfullife.com \
--cc=akpm@digeo.com \
--cc=anton@samba.org \
--cc=jes@wildopensource.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mbligh@aracnet.com \
--cc=steiner@sgi.com \
--cc=wli@holomorphy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.