public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: torvalds@osdl.org, akpm@osdl.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size [#2]
Date: Fri, 11 Jun 2004 15:21:45 +0100	[thread overview]
Message-ID: <6567.1086963705@redhat.com> (raw)
In-Reply-To: <567.1086950642@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 255 bytes --]


Hi Linus, Andrew,

Here's an update to my patch. Thanks to Ingo Oeser for noticing that the patch
had a couple of problems in the allocation loop (it would never end if an
allocation failed, and the result of the allocation didn't need casting).

David


[-- Attachment #2: bootmem-hash-267rc3-2.diff --]
[-- Type: text/plain, Size: 10515 bytes --]

diff -uNrp linux-2.6.7-rc3/fs/dcache.c linux-2.6.7-rc3-hash/fs/dcache.c
--- linux-2.6.7-rc3/fs/dcache.c	2004-06-11 09:36:25.000000000 +0100
+++ linux-2.6.7-rc3-hash/fs/dcache.c	2004-06-11 10:52:23.000000000 +0100
@@ -30,6 +30,7 @@
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
+#include <linux/bootmem.h>
 
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
@@ -1561,13 +1562,31 @@ static int __init set_dhash_entries(char
 }
 __setup("dhash_entries=", set_dhash_entries);
 
-static void __init dcache_init(unsigned long mempages)
+static void __init dcache_init_early(void)
 {
-	struct hlist_head *d;
-	unsigned long order;
-	unsigned int nr_hash;
-	int i;
+	struct hlist_head *p;
+	int loop;
+
+	dentry_hashtable =
+		alloc_large_system_hash("Dentry cache",
+					sizeof(struct hlist_head),
+					dhash_entries,
+					13,
+					0,
+					&d_hash_shift,
+					&d_hash_mask);
+
+	p = dentry_hashtable;
+	loop = 1 << d_hash_shift;
+	do {
+		INIT_HLIST_HEAD(p);
+		p++;
+		loop--;
+	} while (loop);
+}
 
+static void __init dcache_init(unsigned long mempages)
+{
 	/* 
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
@@ -1580,45 +1599,6 @@ static void __init dcache_init(unsigned 
 					 NULL, NULL);
 	
 	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
-
-	if (!dhash_entries)
-		dhash_entries = PAGE_SHIFT < 13 ?
-				mempages >> (13 - PAGE_SHIFT) :
-				mempages << (PAGE_SHIFT - 13);
-
-	dhash_entries *= sizeof(struct hlist_head);
-	for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++)
-		;
-
-	do {
-		unsigned long tmp;
-
-		nr_hash = (1UL << order) * PAGE_SIZE /
-			sizeof(struct hlist_head);
-		d_hash_mask = (nr_hash - 1);
-
-		tmp = nr_hash;
-		d_hash_shift = 0;
-		while ((tmp >>= 1UL) != 0UL)
-			d_hash_shift++;
-
-		dentry_hashtable = (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, order);
-	} while (dentry_hashtable == NULL && --order >= 0);
-
-	printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
-
-	if (!dentry_hashtable)
-		panic("Failed to allocate dcache hash table\n");
-
-	d = dentry_hashtable;
-	i = nr_hash;
-	do {
-		INIT_HLIST_HEAD(d);
-		d++;
-		i--;
-	} while (i);
 }
 
 /* SLAB cache for __getname() consumers */
@@ -1632,6 +1612,12 @@ EXPORT_SYMBOL(d_genocide);
 extern void bdev_cache_init(void);
 extern void chrdev_init(void);
 
+void __init vfs_caches_init_early(void)
+{
+	dcache_init_early();
+	inode_init_early();
+}
+
 void __init vfs_caches_init(unsigned long mempages)
 {
 	unsigned long reserve;
diff -uNrp linux-2.6.7-rc3/fs/inode.c linux-2.6.7-rc3-hash/fs/inode.c
--- linux-2.6.7-rc3/fs/inode.c	2004-06-11 09:36:27.000000000 +0100
+++ linux-2.6.7-rc3-hash/fs/inode.c	2004-06-11 10:52:48.744705411 +0100
@@ -20,6 +20,7 @@
 #include <linux/security.h>
 #include <linux/pagemap.h>
 #include <linux/cdev.h>
+#include <linux/bootmem.h>
 
 /*
  * This is needed for the following functions:
@@ -1345,54 +1346,35 @@ __setup("ihash_entries=", set_ihash_entr
 /*
  * Initialize the waitqueues and inode hash table.
  */
-void __init inode_init(unsigned long mempages)
+void __init inode_init_early(void)
 {
-	struct hlist_head *head;
-	unsigned long order;
-	unsigned int nr_hash;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
-		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
+	struct hlist_head *p;
+	int loop;
 
-	if (!ihash_entries)
-		ihash_entries = PAGE_SHIFT < 14 ?
-				mempages >> (14 - PAGE_SHIFT) :
-				mempages << (PAGE_SHIFT - 14);
-
-	ihash_entries *= sizeof(struct hlist_head);
-	for (order = 0; ((1UL << order) << PAGE_SHIFT) < ihash_entries; order++)
-		;
+	inode_hashtable =
+		alloc_large_system_hash("Inode-cache",
+					sizeof(struct hlist_head),
+					ihash_entries,
+					14,
+					0,
+					&i_hash_shift,
+					&i_hash_mask);
 
+	p = inode_hashtable;
+	loop = 1 << i_hash_shift;
 	do {
-		unsigned long tmp;
-
-		nr_hash = (1UL << order) * PAGE_SIZE /
-			sizeof(struct hlist_head);
-		i_hash_mask = (nr_hash - 1);
-
-		tmp = nr_hash;
-		i_hash_shift = 0;
-		while ((tmp >>= 1UL) != 0UL)
-			i_hash_shift++;
-
-		inode_hashtable = (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, order);
-	} while (inode_hashtable == NULL && --order >= 0);
-
-	printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
+		INIT_HLIST_HEAD(p);
+		p++;
+		loop--;
+	} while (loop);
+}
 
-	if (!inode_hashtable)
-		panic("Failed to allocate inode hash table\n");
+void __init inode_init(unsigned long mempages)
+{
+	int i;
 
-	head = inode_hashtable;
-	i = nr_hash;
-	do {
-		INIT_HLIST_HEAD(head);
-		head++;
-		i--;
-	} while (i);
+	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
+		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
 
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
diff -uNrp linux-2.6.7-rc3/include/linux/bootmem.h linux-2.6.7-rc3-hash/include/linux/bootmem.h
--- linux-2.6.7-rc3/include/linux/bootmem.h	2004-06-11 09:25:03.000000000 +0100
+++ linux-2.6.7-rc3-hash/include/linux/bootmem.h	2004-06-11 10:38:23.000000000 +0100
@@ -67,4 +67,12 @@ extern void * __init __alloc_bootmem_nod
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
+extern void *__init alloc_large_system_hash(const char *tablename,
+					    unsigned long bucketsize,
+					    unsigned long numentries,
+					    int scale,
+					    int consider_highmem,
+					    unsigned int *_hash_shift,
+					    unsigned int *_hash_mask);
+
 #endif /* _LINUX_BOOTMEM_H */
diff -uNrp linux-2.6.7-rc3/include/linux/fs.h linux-2.6.7-rc3-hash/include/linux/fs.h
--- linux-2.6.7-rc3/include/linux/fs.h	2004-06-11 09:36:36.000000000 +0100
+++ linux-2.6.7-rc3-hash/include/linux/fs.h	2004-06-11 10:46:44.290698753 +0100
@@ -221,6 +221,7 @@ extern int leases_enable, dir_notify_ena
 extern void update_atime (struct inode *);
 
 extern void inode_init(unsigned long);
+extern void inode_init_early(void);
 extern void mnt_init(unsigned long);
 extern void files_init(unsigned long);
 
@@ -1199,6 +1200,7 @@ extern int filp_close(struct file *, fl_
 extern char * getname(const char __user *);
 
 /* fs/dcache.c */
+extern void vfs_caches_init_early(void);
 extern void vfs_caches_init(unsigned long);
 
 #define __getname()	kmem_cache_alloc(names_cachep, SLAB_KERNEL)
diff -uNrp linux-2.6.7-rc3/include/linux/mmzone.h linux-2.6.7-rc3-hash/include/linux/mmzone.h
--- linux-2.6.7-rc3/include/linux/mmzone.h	2004-06-11 09:36:36.000000000 +0100
+++ linux-2.6.7-rc3-hash/include/linux/mmzone.h	2004-06-11 09:44:41.531812981 +0100
@@ -20,6 +20,18 @@
 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
 #endif
 
+/*
+ * system hash table size limits
+ * - on large memory machines, we may want to allocate a bigger hash than that
+ *   permitted by MAX_ORDER, so we allocate with the bootmem allocator, and are
+ *   limited to this size
+ */
+#if MAX_ORDER > 14
+#define MAX_SYS_HASH_TABLE_ORDER MAX_ORDER
+#else
+#define MAX_SYS_HASH_TABLE_ORDER 14
+#endif
+
 struct free_area {
 	struct list_head	free_list;
 	unsigned long		*map;
diff -uNrp linux-2.6.7-rc3/init/main.c linux-2.6.7-rc3-hash/init/main.c
--- linux-2.6.7-rc3/init/main.c	2004-06-11 09:36:36.565153337 +0100
+++ linux-2.6.7-rc3-hash/init/main.c	2004-06-11 10:48:37.028742509 +0100
@@ -454,6 +454,7 @@ asmlinkage void __init start_kernel(void
 		initrd_start = 0;
 	}
 #endif
+	vfs_caches_init_early();
 	mem_init();
 	kmem_cache_init();
 	if (late_time_init)
diff -uNrp linux-2.6.7-rc3/mm/page_alloc.c linux-2.6.7-rc3-hash/mm/page_alloc.c
--- linux-2.6.7-rc3/mm/page_alloc.c	2004-06-11 09:36:36.000000000 +0100
+++ linux-2.6.7-rc3-hash/mm/page_alloc.c	2004-06-11 11:21:59.761918397 +0100
@@ -55,6 +55,9 @@ EXPORT_SYMBOL(zone_table);
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 
+static unsigned long __initdata nr_kernel_pages;
+static unsigned long __initdata nr_all_pages;
+
 /*
  * Temporary debugging check for pages not lying within a given zone.
  */
@@ -1454,6 +1457,10 @@ static void __init free_area_init_core(s
 		if (zholes_size)
 			realsize -= zholes_size[j];
 
+		if (j == ZONE_DMA || j == ZONE_NORMAL)
+			nr_kernel_pages += realsize;
+		nr_all_pages += realsize;
+
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 		zone->name = zone_names[j];
@@ -1994,3 +2001,78 @@ int lower_zone_protection_sysctl_handler
 	setup_per_zone_protection();
 	return 0;
 }
+
+/*
+ * allocate a large system hash table from bootmem
+ * - it is assumed that the hash table must contain an exact power-of-2
+ *   quantity of entries
+ */
+static inline int log2(unsigned long x) __attribute__((pure));
+static inline int log2(unsigned long x)
+{
+	int r = 0;
+	for (x >>= 1; x > 0; x >>= 1)
+		r++;
+	return r;
+}
+
+void *__init alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int consider_highmem,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask)
+{
+	unsigned long mem, max, log2qty, size;
+	void *table;
+
+	/* round applicable memory size up to nearest megabyte */
+	mem = consider_highmem ? nr_all_pages : nr_kernel_pages;
+	mem += (1UL << (20 - PAGE_SHIFT)) - 1;
+	mem >>= 20 - PAGE_SHIFT;
+	mem <<= 20 - PAGE_SHIFT;
+
+	/* limit to 1 bucket per 2^scale bytes of low memory (rounded up to
+	 * nearest power of 2 in size) */
+	if (scale > PAGE_SHIFT)
+		mem >>= (scale - PAGE_SHIFT);
+	else
+		mem <<= (PAGE_SHIFT - scale);
+
+	mem = 1UL << (log2(mem) + 1);
+
+	/* limit allocation size */
+	max = (1UL << (PAGE_SHIFT + MAX_SYS_HASH_TABLE_ORDER)) / bucketsize;
+	if (max > mem)
+		max = mem;
+
+	/* allow the kernel cmdline to have a say */
+	if (!numentries || numentries > max)
+		numentries = max;
+
+	log2qty = log2(numentries);
+
+	do {
+		size = bucketsize << log2qty;
+
+		table = alloc_bootmem(size);
+
+	} while (!table && size > PAGE_SIZE && --log2qty);
+
+	if (!table)
+		panic("Failed to allocate %s hash table\n", tablename);
+
+	printk("%s hash table entries: %d (order: %d, %lu bytes)\n",
+	       tablename,
+	       (1U << log2qty),
+	       log2(size) - PAGE_SHIFT,
+	       size);
+
+	if (_hash_shift)
+		*_hash_shift = log2qty;
+	if (_hash_mask)
+		*_hash_mask = (1 << log2qty) - 1;
+
+	return table;
+}

  parent reply	other threads:[~2004-06-11 14:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-06-11 10:44 [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size David Howells
2004-06-11 10:48 ` Andrew Morton
2004-06-11 11:12   ` David Howells
2004-06-11 22:04     ` Andrew Morton
2004-06-11 23:03       ` Martin J. Bligh
2004-06-11 23:19         ` Andrew Morton
2004-06-11 23:18           ` Martin J. Bligh
2004-06-11 23:30             ` Andrew Morton
2004-06-12 12:45               ` Andy Whitcroft
2004-06-13 16:09           ` Linus Torvalds
2004-06-11 14:21 ` David Howells [this message]
2004-06-11 22:01   ` [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size [#2] Andrew Morton
2004-06-14 10:47     ` [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size [try #3] David Howells
2004-06-14 11:04       ` Andrew Morton
2004-06-14 11:41         ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6567.1086963705@redhat.com \
    --to=dhowells@redhat.com \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox