All of lore.kernel.org
 help / color / mirror / Atom feed
From: npiggin@suse.de
To: akpm@linux-foundation.org
Cc: linux-mm@kvack.org
Subject: [patch 03/21] hugetlb: multiple hstates for multiple page sizes
Date: Wed, 04 Jun 2008 21:29:42 +1000	[thread overview]
Message-ID: <20080604113111.396631146@amd.local0.net> (raw)
In-Reply-To: 20080604112939.789444496@amd.local0.net

[-- Attachment #1: hugetlb-multiple-hstates.patch --]
[-- Type: text/plain, Size: 9300 bytes --]

Add basic support for more than one hstate in hugetlbfs. This is the
key to supporting multiple hugetlbfs page sizes at once.

- Rather than a single hstate, we now have an array, with an iterator
- default_hstate continues to be the struct hstate which we use by default
- Add functions for architectures to register new hstates

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 include/linux/hugetlb.h |   16 +++++++
 mm/hugetlb.c            |   98 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 95 insertions(+), 19 deletions(-)

Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c	2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/mm/hugetlb.c	2008-06-04 20:51:18.000000000 +1000
@@ -22,12 +22,19 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-unsigned long max_huge_pages;
-unsigned long sysctl_overcommit_huge_pages;
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
 
-struct hstate default_hstate;
+static int max_hstate = 0;
+unsigned int default_hstate_idx;
+struct hstate hstates[HUGE_MAX_HSTATE];
+
+/* for command line parsing */
+static struct hstate * __initdata parsed_hstate = NULL;
+static unsigned long __initdata default_hstate_max_huge_pages = 0;
+
+#define for_each_hstate(h) \
+	for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)
 
 /*
  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -272,13 +279,24 @@ static void update_and_free_page(struct 
 	__free_pages(page, huge_page_order(h));
 }
 
+struct hstate *size_to_hstate(unsigned long size)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		if (huge_page_size(h) == size)
+			return h;
+	}
+	return NULL;
+}
+
 static void free_huge_page(struct page *page)
 {
 	/*
 	 * Can't pass hstate in here because it is called from the
 	 * compound page destructor.
 	 */
-	struct hstate *h = &default_hstate;
+	struct hstate *h = page_hstate(page);
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
@@ -812,39 +830,94 @@ static struct page *alloc_huge_page(stru
 	return page;
 }
 
-static int __init hugetlb_init(void)
+static void __init hugetlb_init_one_hstate(struct hstate *h)
 {
 	unsigned long i;
-	struct hstate *h = &default_hstate;
-
-	if (HPAGE_SHIFT == 0)
-		return 0;
-
-	if (!h->order) {
-		h->order = HPAGE_SHIFT - PAGE_SHIFT;
-		h->mask = HPAGE_MASK;
-	}
 
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
 
 	h->hugetlb_next_nid = first_node(node_online_map);
 
-	for (i = 0; i < max_huge_pages; ++i) {
+	for (i = 0; i < h->max_huge_pages; ++i) {
 		if (!alloc_fresh_huge_page(h))
 			break;
 	}
-	max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
-	printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n",
-			h->free_huge_pages);
+	h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+}
+
+static void __init hugetlb_init_hstates(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		hugetlb_init_one_hstate(h);
+	}
+}
+
+static void __init report_hugepages(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		printk(KERN_INFO "Total HugeTLB memory allocated, "
+				"%ld %dMB pages\n",
+				h->free_huge_pages,
+				1 << (h->order + PAGE_SHIFT - 20));
+	}
+}
+
+static int __init hugetlb_init(void)
+{
+	BUILD_BUG_ON(HPAGE_SHIFT == 0);
+
+	if (!size_to_hstate(HPAGE_SIZE)) {
+		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	}
+	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+
+	hugetlb_init_hstates();
+
+	report_hugepages();
+
 	return 0;
 }
 module_init(hugetlb_init);
 
+/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_add_hstate(unsigned order)
+{
+	struct hstate *h;
+	if (size_to_hstate(PAGE_SIZE << order)) {
+		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
+		return;
+	}
+	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
+	BUG_ON(order == 0);
+	h = &hstates[max_hstate++];
+	h->order = order;
+	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	hugetlb_init_one_hstate(h);
+	parsed_hstate = h;
+}
+
 static int __init hugetlb_setup(char *s)
 {
-	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
-		max_huge_pages = 0;
+	unsigned long *mhp;
+
+	/*
+	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
+	 * so this hugepages= parameter goes to the "default hstate".
+	 */
+	if (!max_hstate)
+		mhp = &default_hstate_max_huge_pages;
+	else
+		mhp = &parsed_hstate->max_huge_pages;
+
+	if (sscanf(s, "%lu", mhp) <= 0)
+		*mhp = 0;
+
 	return 1;
 }
 __setup("hugepages=", hugetlb_setup);
@@ -875,7 +948,7 @@ static void try_to_free_low(struct hstat
 			if (PageHighMem(page))
 				continue;
 			list_del(&page->lru);
-			update_and_free_page(page);
+			update_and_free_page(h, page);
 			h->free_huge_pages--;
 			h->free_huge_pages_node[page_to_nid(page)]--;
 		}
@@ -888,10 +961,9 @@ static inline void try_to_free_low(struc
 #endif
 
 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
-static unsigned long set_max_huge_pages(unsigned long count)
+static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 {
 	unsigned long min_count, ret;
-	struct hstate *h = &default_hstate;
 
 	/*
 	 * Increase the pool size
@@ -962,8 +1034,19 @@ int hugetlb_sysctl_handler(struct ctl_ta
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
+	struct hstate *h = &default_hstate;
+	unsigned long tmp;
+
+	if (!write)
+		tmp = h->max_huge_pages;
+
+	table->data = &tmp;
+	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
-	max_huge_pages = set_max_huge_pages(max_huge_pages);
+
+	if (write)
+		h->max_huge_pages = set_max_huge_pages(h, tmp);
+
 	return 0;
 }
 
@@ -984,10 +1067,21 @@ int hugetlb_overcommit_handler(struct ct
 			size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
+	unsigned long tmp;
+
+	if (!write)
+		tmp = h->nr_overcommit_huge_pages;
+
+	table->data = &tmp;
+	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
-	spin_lock(&hugetlb_lock);
-	h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
-	spin_unlock(&hugetlb_lock);
+
+	if (write) {
+		spin_lock(&hugetlb_lock);
+		h->nr_overcommit_huge_pages = tmp;
+		spin_unlock(&hugetlb_lock);
+	}
+
 	return 0;
 }
 
Index: linux-2.6/include/linux/hugetlb.h
===================================================================
--- linux-2.6.orig/include/linux/hugetlb.h	2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/include/linux/hugetlb.h	2008-06-04 20:51:18.000000000 +1000
@@ -36,8 +36,6 @@ int hugetlb_reserve_pages(struct inode *
 						struct vm_area_struct *vma);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
-extern unsigned long max_huge_pages;
-extern unsigned long sysctl_overcommit_huge_pages;
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
@@ -181,7 +179,17 @@ struct hstate {
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 };
 
-extern struct hstate default_hstate;
+void __init hugetlb_add_hstate(unsigned order);
+struct hstate *size_to_hstate(unsigned long size);
+
+#ifndef HUGE_MAX_HSTATE
+#define HUGE_MAX_HSTATE 1
+#endif
+
+extern struct hstate hstates[HUGE_MAX_HSTATE];
+extern unsigned int default_hstate_idx;
+
+#define default_hstate (hstates[default_hstate_idx])
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
@@ -230,6 +238,11 @@ static inline unsigned int blocks_per_hu
 
 #include <asm/hugetlb.h>
 
+static inline struct hstate *page_hstate(struct page *page)
+{
+	return size_to_hstate(PAGE_SIZE << compound_order(page));
+}
+
 #else
 struct hstate {};
 #define hstate_file(f) NULL
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c	2008-06-04 20:51:09.000000000 +1000
+++ linux-2.6/kernel/sysctl.c	2008-06-04 20:51:18.000000000 +1000
@@ -926,7 +926,7 @@ static struct ctl_table vm_table[] = {
 #ifdef CONFIG_HUGETLB_PAGE
 	 {
 		.procname	= "nr_hugepages",
-		.data		= &max_huge_pages,
+		.data		= NULL,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_sysctl_handler,
@@ -952,10 +952,12 @@ static struct ctl_table vm_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nr_overcommit_hugepages",
-		.data		= &sysctl_overcommit_huge_pages,
-		.maxlen		= sizeof(sysctl_overcommit_huge_pages),
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_overcommit_handler,
+		.extra1		= (void *)&hugetlb_zero,
+		.extra2		= (void *)&hugetlb_infinity,
 	},
 #endif
 	{

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-06-04 11:29 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-04 11:29 [patch 00/21] hugetlb patches resend npiggin
2008-06-04 11:29 ` [patch 01/21] hugetlb: factor out prep_new_huge_page npiggin
2008-06-04 11:29 ` [patch 02/21] hugetlb: modular state for hugetlb page size npiggin
2008-06-04 11:29 ` npiggin [this message]
2008-06-04 11:29 ` [patch 04/21] hugetlbfs: per mount huge page sizes npiggin
2008-06-04 11:29 ` [patch 05/21] hugetlb: new sysfs interface npiggin
2008-06-08 18:59   ` Andrew Morton
2008-06-10  3:02     ` Nick Piggin
2008-06-12  1:11       ` Nishanth Aravamudan
2008-07-02  0:24         ` Nishanth Aravamudan
2008-06-20 15:18   ` Dave Hansen
2008-06-23  2:48     ` Nick Piggin
2008-06-23  3:31       ` Andrew Morton
2008-06-23  3:52         ` Nick Piggin
2008-06-04 11:29 ` [patch 06/21] hugetlb: abstract numa round robin selection npiggin
2008-06-04 11:29 ` [patch 07/21] mm: introduce non panic alloc_bootmem npiggin
2008-06-04 11:29 ` [patch 08/21] mm: export prep_compound_page to mm npiggin
2008-06-04 11:29 ` [patch 09/21] hugetlb: support larger than MAX_ORDER npiggin
2008-06-04 11:29 ` [patch 10/21] hugetlb: support boot allocate different sizes npiggin
2008-06-04 11:29 ` [patch 11/21] hugetlb: printk cleanup npiggin
2008-06-04 11:29 ` [patch 12/21] hugetlb: introduce pud_huge npiggin
2008-06-11 23:16   ` Andrew Morton
2008-06-12  0:45     ` Nick Piggin
2008-06-04 11:29 ` [patch 13/21] x86: support GB hugepages on 64-bit npiggin
2008-06-04 11:29 ` [patch 14/21] x86: add hugepagesz option " npiggin
2008-06-04 17:51   ` Randy Dunlap
2008-06-05  2:01     ` Nick Piggin
2008-06-04 11:29 ` [patch 15/21] hugetlb: override default huge page size npiggin
2008-06-09 10:41   ` Andrew Morton
2008-06-10  3:22     ` Nick Piggin
2008-06-04 11:29 ` [patch 16/21] hugetlb: allow arch overried hugepage allocation npiggin
2008-06-08 19:14   ` Andrew Morton
2008-06-10  3:26     ` Nick Piggin
2008-06-12  8:08     ` Andy Whitcroft
2008-06-04 11:29 ` [patch 17/21] powerpc: function to allocate gigantic hugepages npiggin
2008-06-04 11:29 ` [patch 18/21] powerpc: scan device tree for gigantic pages npiggin
2008-06-04 11:29 ` [patch 19/21] powerpc: define support for 16G hugepages npiggin
2008-06-08 19:05   ` Andrew Morton
2008-06-10  3:05     ` Nick Piggin
2008-06-04 11:29 ` [patch 20/21] fs: check for statfs overflow npiggin
2008-06-08 19:06   ` Andrew Morton
2008-06-10  3:12     ` Nick Piggin
2008-06-04 11:30 ` [patch 21/21] powerpc: support multiple hugepage sizes npiggin
2008-07-14 16:32   ` [patch] powerpc: hugetlb pgtable cache access cleanup Jon Tollefson
2008-07-14 16:32     ` Jon Tollefson
2008-07-14 22:56     ` Andrew Morton
2008-07-14 22:56       ` Andrew Morton
2008-07-15 22:49       ` [patch v2] " Jon Tollefson
2008-07-15 22:49         ` Jon Tollefson
2008-07-15 22:57         ` Andrew Morton
2008-07-15 22:57           ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080604113111.396631146@amd.local0.net \
    --to=npiggin@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.