All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: torvalds@osdl.org, akpm@osdl.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size [#2]
Date: Fri, 11 Jun 2004 15:21:45 +0100	[thread overview]
Message-ID: <6567.1086963705@redhat.com> (raw)
In-Reply-To: <567.1086950642@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 255 bytes --]


Hi Linus, Andrew,

Here's an update to my patch. Thanks to Ingo Oeser for noticing that the patch
had a couple of problems in the allocation loop (it would never end if an
allocation failed, and the result of the allocation didn't need casting).

David


[-- Attachment #2: bootmem-hash-267rc3-2.diff --]
[-- Type: text/plain, Size: 10515 bytes --]

diff -uNrp linux-2.6.7-rc3/fs/dcache.c linux-2.6.7-rc3-hash/fs/dcache.c
--- linux-2.6.7-rc3/fs/dcache.c	2004-06-11 09:36:25.000000000 +0100
+++ linux-2.6.7-rc3-hash/fs/dcache.c	2004-06-11 10:52:23.000000000 +0100
@@ -30,6 +30,7 @@
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
+#include <linux/bootmem.h>
 
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
@@ -1561,13 +1562,31 @@ static int __init set_dhash_entries(char
 }
 __setup("dhash_entries=", set_dhash_entries);
 
-static void __init dcache_init(unsigned long mempages)
+static void __init dcache_init_early(void)
 {
-	struct hlist_head *d;
-	unsigned long order;
-	unsigned int nr_hash;
-	int i;
+	struct hlist_head *p;
+	int loop;
+
+	dentry_hashtable =
+		alloc_large_system_hash("Dentry cache",
+					sizeof(struct hlist_head),
+					dhash_entries,
+					13,
+					0,
+					&d_hash_shift,
+					&d_hash_mask);
+
+	p = dentry_hashtable;
+	loop = 1 << d_hash_shift;
+	do {
+		INIT_HLIST_HEAD(p);
+		p++;
+		loop--;
+	} while (loop);
+}
 
+static void __init dcache_init(unsigned long mempages)
+{
 	/* 
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
@@ -1580,45 +1599,6 @@ static void __init dcache_init(unsigned 
 					 NULL, NULL);
 	
 	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
-
-	if (!dhash_entries)
-		dhash_entries = PAGE_SHIFT < 13 ?
-				mempages >> (13 - PAGE_SHIFT) :
-				mempages << (PAGE_SHIFT - 13);
-
-	dhash_entries *= sizeof(struct hlist_head);
-	for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++)
-		;
-
-	do {
-		unsigned long tmp;
-
-		nr_hash = (1UL << order) * PAGE_SIZE /
-			sizeof(struct hlist_head);
-		d_hash_mask = (nr_hash - 1);
-
-		tmp = nr_hash;
-		d_hash_shift = 0;
-		while ((tmp >>= 1UL) != 0UL)
-			d_hash_shift++;
-
-		dentry_hashtable = (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, order);
-	} while (dentry_hashtable == NULL && --order >= 0);
-
-	printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
-
-	if (!dentry_hashtable)
-		panic("Failed to allocate dcache hash table\n");
-
-	d = dentry_hashtable;
-	i = nr_hash;
-	do {
-		INIT_HLIST_HEAD(d);
-		d++;
-		i--;
-	} while (i);
 }
 
 /* SLAB cache for __getname() consumers */
@@ -1632,6 +1612,12 @@ EXPORT_SYMBOL(d_genocide);
 extern void bdev_cache_init(void);
 extern void chrdev_init(void);
 
+void __init vfs_caches_init_early(void)
+{
+	dcache_init_early();
+	inode_init_early();
+}
+
 void __init vfs_caches_init(unsigned long mempages)
 {
 	unsigned long reserve;
diff -uNrp linux-2.6.7-rc3/fs/inode.c linux-2.6.7-rc3-hash/fs/inode.c
--- linux-2.6.7-rc3/fs/inode.c	2004-06-11 09:36:27.000000000 +0100
+++ linux-2.6.7-rc3-hash/fs/inode.c	2004-06-11 10:52:48.744705411 +0100
@@ -20,6 +20,7 @@
 #include <linux/security.h>
 #include <linux/pagemap.h>
 #include <linux/cdev.h>
+#include <linux/bootmem.h>
 
 /*
  * This is needed for the following functions:
@@ -1345,54 +1346,35 @@ __setup("ihash_entries=", set_ihash_entr
 /*
  * Initialize the waitqueues and inode hash table.
  */
-void __init inode_init(unsigned long mempages)
+void __init inode_init_early(void)
 {
-	struct hlist_head *head;
-	unsigned long order;
-	unsigned int nr_hash;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
-		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
+	struct hlist_head *p;
+	int loop;
 
-	if (!ihash_entries)
-		ihash_entries = PAGE_SHIFT < 14 ?
-				mempages >> (14 - PAGE_SHIFT) :
-				mempages << (PAGE_SHIFT - 14);
-
-	ihash_entries *= sizeof(struct hlist_head);
-	for (order = 0; ((1UL << order) << PAGE_SHIFT) < ihash_entries; order++)
-		;
+	inode_hashtable =
+		alloc_large_system_hash("Inode-cache",
+					sizeof(struct hlist_head),
+					ihash_entries,
+					14,
+					0,
+					&i_hash_shift,
+					&i_hash_mask);
 
+	p = inode_hashtable;
+	loop = 1 << i_hash_shift;
 	do {
-		unsigned long tmp;
-
-		nr_hash = (1UL << order) * PAGE_SIZE /
-			sizeof(struct hlist_head);
-		i_hash_mask = (nr_hash - 1);
-
-		tmp = nr_hash;
-		i_hash_shift = 0;
-		while ((tmp >>= 1UL) != 0UL)
-			i_hash_shift++;
-
-		inode_hashtable = (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, order);
-	} while (inode_hashtable == NULL && --order >= 0);
-
-	printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
+		INIT_HLIST_HEAD(p);
+		p++;
+		loop--;
+	} while (loop);
+}
 
-	if (!inode_hashtable)
-		panic("Failed to allocate inode hash table\n");
+void __init inode_init(unsigned long mempages)
+{
+	int i;
 
-	head = inode_hashtable;
-	i = nr_hash;
-	do {
-		INIT_HLIST_HEAD(head);
-		head++;
-		i--;
-	} while (i);
+	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
+		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
 
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
diff -uNrp linux-2.6.7-rc3/include/linux/bootmem.h linux-2.6.7-rc3-hash/include/linux/bootmem.h
--- linux-2.6.7-rc3/include/linux/bootmem.h	2004-06-11 09:25:03.000000000 +0100
+++ linux-2.6.7-rc3-hash/include/linux/bootmem.h	2004-06-11 10:38:23.000000000 +0100
@@ -67,4 +67,12 @@ extern void * __init __alloc_bootmem_nod
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
+extern void *__init alloc_large_system_hash(const char *tablename,
+					    unsigned long bucketsize,
+					    unsigned long numentries,
+					    int scale,
+					    int consider_highmem,
+					    unsigned int *_hash_shift,
+					    unsigned int *_hash_mask);
+
 #endif /* _LINUX_BOOTMEM_H */
diff -uNrp linux-2.6.7-rc3/include/linux/fs.h linux-2.6.7-rc3-hash/include/linux/fs.h
--- linux-2.6.7-rc3/include/linux/fs.h	2004-06-11 09:36:36.000000000 +0100
+++ linux-2.6.7-rc3-hash/include/linux/fs.h	2004-06-11 10:46:44.290698753 +0100
@@ -221,6 +221,7 @@ extern int leases_enable, dir_notify_ena
 extern void update_atime (struct inode *);
 
 extern void inode_init(unsigned long);
+extern void inode_init_early(void);
 extern void mnt_init(unsigned long);
 extern void files_init(unsigned long);
 
@@ -1199,6 +1200,7 @@ extern int filp_close(struct file *, fl_
 extern char * getname(const char __user *);
 
 /* fs/dcache.c */
+extern void vfs_caches_init_early(void);
 extern void vfs_caches_init(unsigned long);
 
 #define __getname()	kmem_cache_alloc(names_cachep, SLAB_KERNEL)
diff -uNrp linux-2.6.7-rc3/include/linux/mmzone.h linux-2.6.7-rc3-hash/include/linux/mmzone.h
--- linux-2.6.7-rc3/include/linux/mmzone.h	2004-06-11 09:36:36.000000000 +0100
+++ linux-2.6.7-rc3-hash/include/linux/mmzone.h	2004-06-11 09:44:41.531812981 +0100
@@ -20,6 +20,18 @@
 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
 #endif
 
+/*
+ * system hash table size limits
+ * - on large memory machines, we may want to allocate a bigger hash than that
+ *   permitted by MAX_ORDER, so we allocate with the bootmem allocator, and are
+ *   limited to this size
+ */
+#if MAX_ORDER > 14
+#define MAX_SYS_HASH_TABLE_ORDER MAX_ORDER
+#else
+#define MAX_SYS_HASH_TABLE_ORDER 14
+#endif
+
 struct free_area {
 	struct list_head	free_list;
 	unsigned long		*map;
diff -uNrp linux-2.6.7-rc3/init/main.c linux-2.6.7-rc3-hash/init/main.c
--- linux-2.6.7-rc3/init/main.c	2004-06-11 09:36:36.565153337 +0100
+++ linux-2.6.7-rc3-hash/init/main.c	2004-06-11 10:48:37.028742509 +0100
@@ -454,6 +454,7 @@ asmlinkage void __init start_kernel(void
 		initrd_start = 0;
 	}
 #endif
+	vfs_caches_init_early();
 	mem_init();
 	kmem_cache_init();
 	if (late_time_init)
diff -uNrp linux-2.6.7-rc3/mm/page_alloc.c linux-2.6.7-rc3-hash/mm/page_alloc.c
--- linux-2.6.7-rc3/mm/page_alloc.c	2004-06-11 09:36:36.000000000 +0100
+++ linux-2.6.7-rc3-hash/mm/page_alloc.c	2004-06-11 11:21:59.761918397 +0100
@@ -55,6 +55,9 @@ EXPORT_SYMBOL(zone_table);
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 
+static unsigned long __initdata nr_kernel_pages;
+static unsigned long __initdata nr_all_pages;
+
 /*
  * Temporary debugging check for pages not lying within a given zone.
  */
@@ -1454,6 +1457,10 @@ static void __init free_area_init_core(s
 		if (zholes_size)
 			realsize -= zholes_size[j];
 
+		if (j == ZONE_DMA || j == ZONE_NORMAL)
+			nr_kernel_pages += realsize;
+		nr_all_pages += realsize;
+
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 		zone->name = zone_names[j];
@@ -1994,3 +2001,78 @@ int lower_zone_protection_sysctl_handler
 	setup_per_zone_protection();
 	return 0;
 }
+
+/*
+ * allocate a large system hash table from bootmem
+ * - it is assumed that the hash table must contain an exact power-of-2
+ *   quantity of entries
+ */
+static inline int log2(unsigned long x) __attribute__((pure));
+static inline int log2(unsigned long x)
+{
+	int r = 0;
+	for (x >>= 1; x > 0; x >>= 1)
+		r++;
+	return r;
+}
+
+void *__init alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int consider_highmem,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask)
+{
+	unsigned long mem, max, log2qty, size;
+	void *table;
+
+	/* round applicable memory size up to nearest megabyte */
+	mem = consider_highmem ? nr_all_pages : nr_kernel_pages;
+	mem += (1UL << (20 - PAGE_SHIFT)) - 1;
+	mem >>= 20 - PAGE_SHIFT;
+	mem <<= 20 - PAGE_SHIFT;
+
+	/* limit to 1 bucket per 2^scale bytes of low memory (rounded up to
+	 * nearest power of 2 in size) */
+	if (scale > PAGE_SHIFT)
+		mem >>= (scale - PAGE_SHIFT);
+	else
+		mem <<= (PAGE_SHIFT - scale);
+
+	mem = 1UL << (log2(mem) + 1);
+
+	/* limit allocation size */
+	max = (1UL << (PAGE_SHIFT + MAX_SYS_HASH_TABLE_ORDER)) / bucketsize;
+	if (max > mem)
+		max = mem;
+
+	/* allow the kernel cmdline to have a say */
+	if (!numentries || numentries > max)
+		numentries = max;
+
+	log2qty = log2(numentries);
+
+	do {
+		size = bucketsize << log2qty;
+
+		table = alloc_bootmem(size);
+
+	} while (!table && size > PAGE_SIZE && --log2qty);
+
+	if (!table)
+		panic("Failed to allocate %s hash table\n", tablename);
+
+	printk("%s hash table entries: %d (order: %d, %lu bytes)\n",
+	       tablename,
+	       (1U << log2qty),
+	       log2(size) - PAGE_SHIFT,
+	       size);
+
+	if (_hash_shift)
+		*_hash_shift = log2qty;
+	if (_hash_mask)
+		*_hash_mask = (1 << log2qty) - 1;
+
+	return table;
+}

  parent reply	other threads:[~2004-06-11 14:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-06-11 10:44 [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size David Howells
2004-06-11 10:48 ` Andrew Morton
2004-06-11 11:12   ` David Howells
2004-06-11 22:04     ` Andrew Morton
2004-06-11 23:03       ` Martin J. Bligh
2004-06-11 23:19         ` Andrew Morton
2004-06-11 23:18           ` Martin J. Bligh
2004-06-11 23:30             ` Andrew Morton
2004-06-12 12:45               ` Andy Whitcroft
2004-06-13 16:09           ` Linus Torvalds
2004-06-11 14:21 ` David Howells [this message]
2004-06-11 22:01   ` [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size [#2] Andrew Morton
2004-06-14 10:47     ` [PATCH] Permit inode & dentry hash tables to be allocated > MAX_ORDER size [try #3] David Howells
2004-06-14 11:04       ` Andrew Morton
2004-06-14 11:41         ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6567.1086963705@redhat.com \
    --to=dhowells@redhat.com \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.