From: Nishanth Aravamudan <nacc@us.ibm.com>
To: William Lee Irwin III <wli@holomorphy.com>
Cc: Christoph Lameter <clameter@sgi.com>,
lee.schermerhorn@hp.com, anton@samba.org,
akpm@linux-foundation.org, linux-mm@kvack.org
Subject: [PATCH v7][RFC] Fix hugetlb pool allocation with empty nodes
Date: Tue, 12 Jun 2007 17:04:46 -0700 [thread overview]
Message-ID: <20070613000446.GL3798@us.ibm.com> (raw)
In-Reply-To: <20070612191347.GE11781@holomorphy.com>
On 12.06.2007 [12:13:47 -0700], William Lee Irwin III wrote:
> On 11.06.2007 [22:15:12 -0700], William Lee Irwin III wrote:
> >> For initially filling the pool one can just loop over nid's modulo the
> >> number of populated nodes and pass down a stack-allocated variable.
>
> On Tue, Jun 12, 2007 at 10:45:03AM -0700, Nishanth Aravamudan wrote:
> > But how does one differentiate between "initally filling" the pool and a
> > later attempt to add to the pool (or even just marginally later).
> > I guess I don't see why folks are so against this static variable :) It
> > does the job and removing it seems like it could be an independent
> > cleanup?
>
> Well, another approach is to just statically initialize it to something
> and then always check to make sure the node for the nid has memory, and
> if not, find the next nid with a node with memory from the populated map.
How does something like this look? Or is it overkill?
[PATCH 2.6.22-rc4-mm2] Fix hugetlb pool allocation with empty nodes V7
Anton found a problem with the hugetlb pool allocation when some nodes
have no memory (http://marc.info/?l=linux-mm&m=118133042025995&w=2). Lee
worked on versions that tried to fix it, but none were accepted.
Christoph has created a set of patches which allow for GFP_THISNODE
allocations to fail if the node has no memory and for exporting a
node_memory_map indicating which nodes have memory. Since mempolicy.c
already has a number of functions which support interleaving, create a
mempolicy when we invoke alloc_fresh_huge_page() that specifies
interleaving across all the nodes in node_memory_map, rather than custom
interleaving code in hugetlb.c. This requires adding some dummy
functions, and some declarations, in mempolicy.h to compile with NUMA or
!NUMA.
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Lee Schermerhorn <lee.schermerhon@hp.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 22b668c..c8a68b8 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -76,6 +76,8 @@ struct mempolicy {
* The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
*/
+extern struct mempolicy *mpol_new(int mode, nodemask_t *nodes);
+
extern void __mpol_free(struct mempolicy *pol);
static inline void mpol_free(struct mempolicy *pol)
{
@@ -164,6 +166,8 @@ static inline void check_highest_zone(enum zone_type k)
policy_zone = k;
}
+extern unsigned interleave_nodes(struct mempolicy *policy);
+
int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
@@ -179,6 +183,11 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
#define mpol_set_vma_default(vma) do {} while(0)
+static inline struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
+{
+ return NULL;
+}
+
static inline void mpol_free(struct mempolicy *p)
{
}
@@ -267,6 +276,11 @@ static inline int do_migrate_pages(struct mm_struct *mm,
static inline void check_highest_zone(int k)
{
}
+
+static inline unsigned interleave_nodes(struct mempolicy *policy)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
#endif /* __KERNEL__ */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 858c0b3..1c13687 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -103,15 +103,20 @@ static void free_huge_page(struct page *page)
spin_unlock(&hugetlb_lock);
}
-static int alloc_fresh_huge_page(void)
+static int alloc_fresh_huge_page(struct mempolicy *policy)
{
- static int nid = 0;
+ int nid;
struct page *page;
- page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
- HUGETLB_PAGE_ORDER);
- nid = next_node(nid, node_online_map);
- if (nid == MAX_NUMNODES)
- nid = first_node(node_online_map);
+ int start_nid = interleave_nodes(policy);
+
+ nid = start_nid;
+
+ do {
+ page = alloc_pages_node(nid,
+ htlb_alloc_mask|__GFP_COMP|GFP_THISNODE,
+ HUGETLB_PAGE_ORDER);
+ nid = interleave_nodes(policy);
+ } while (!page && nid != start_nid);
if (page) {
set_compound_page_dtor(page, free_huge_page);
spin_lock(&hugetlb_lock);
@@ -153,6 +158,7 @@ fail:
static int __init hugetlb_init(void)
{
unsigned long i;
+ struct mempolicy *pol;
if (HPAGE_SHIFT == 0)
return 0;
@@ -160,11 +166,16 @@ static int __init hugetlb_init(void)
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&hugepage_freelists[i]);
+ pol = mpol_new(MPOL_INTERLEAVE, &node_memory_map);
+ if (IS_ERR(pol))
+ goto quit;
for (i = 0; i < max_huge_pages; ++i) {
- if (!alloc_fresh_huge_page())
+ if (!alloc_fresh_huge_page(pol))
break;
}
+ mpol_free(pol);
max_huge_pages = free_huge_pages = nr_huge_pages = i;
+quit:
printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
return 0;
}
@@ -232,10 +243,16 @@ static inline void try_to_free_low(unsigned long count)
static unsigned long set_max_huge_pages(unsigned long count)
{
+ struct mempolicy *pol;
+
+ pol = mpol_new(MPOL_INTERLEAVE, &node_memory_map);
+ if (IS_ERR(pol))
+ return nr_huge_pages;
while (count > nr_huge_pages) {
- if (!alloc_fresh_huge_page())
- return nr_huge_pages;
+ if (!alloc_fresh_huge_page(pol))
+ break;
}
+ mpol_free(pol);
if (count >= nr_huge_pages)
return nr_huge_pages;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 21458ca..c576d32 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -171,7 +171,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
}
/* Create a new policy */
-static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
+struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
{
struct mempolicy *policy;
@@ -1121,7 +1121,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
}
/* Do dynamic interleaving for a process */
-static unsigned interleave_nodes(struct mempolicy *policy)
+unsigned interleave_nodes(struct mempolicy *policy)
{
unsigned nid, next;
struct task_struct *me = current;
--
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-06-13 0:04 UTC|newest]
Thread overview: 140+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-06-11 20:27 [PATCH] Add populated_map to account for memoryless nodes Nishanth Aravamudan, Lee Schermerhorn
2007-06-11 21:25 ` Christoph Lameter
2007-06-11 22:10 ` [PATCH v2] " Nishanth Aravamudan
2007-06-11 22:42 ` Christoph Lameter
2007-06-11 22:52 ` [PATCH v3] " Nishanth Aravamudan
2007-06-11 23:00 ` Christoph Lameter
2007-06-11 23:41 ` [PATCH v4] " Nishanth Aravamudan
2007-06-11 23:45 ` Christoph Lameter
2007-06-12 0:07 ` [PATCH] populated_map: fix !NUMA case, remove comment Nishanth Aravamudan
2007-06-12 0:41 ` Christoph Lameter
2007-06-12 1:43 ` Nishanth Aravamudan
2007-06-12 1:45 ` Christoph Lameter
2007-06-12 1:52 ` Nishanth Aravamudan
2007-06-12 2:39 ` Nishanth Aravamudan
2007-06-12 2:02 ` Nishanth Aravamudan
2007-06-12 2:20 ` Christoph Lameter
2007-06-12 2:32 ` Nishanth Aravamudan
2007-06-12 2:54 ` Christoph Lameter
2007-06-12 3:20 ` Nishanth Aravamudan
2007-06-12 3:21 ` Christoph Lameter
2007-06-12 3:31 ` Nishanth Aravamudan
2007-06-12 15:06 ` Lee Schermerhorn
2007-06-12 17:28 ` Nishanth Aravamudan
2007-06-12 18:43 ` Christoph Lameter
2007-06-12 18:48 ` Lee Schermerhorn
2007-06-12 18:51 ` Christoph Lameter
2007-06-12 19:44 ` Lee Schermerhorn
2007-06-12 19:48 ` Christoph Lameter
2007-06-12 19:58 ` Christoph Lameter
2007-06-12 20:01 ` Nishanth Aravamudan
2007-06-13 15:30 ` Lee Schermerhorn
2007-06-13 17:58 ` Nishanth Aravamudan
2007-06-13 18:21 ` Lee Schermerhorn
2007-06-13 19:01 ` Nishanth Aravamudan
2007-06-13 22:51 ` Christoph Lameter
2007-06-14 15:50 ` Lee Schermerhorn
2007-06-14 15:57 ` Christoph Lameter
2007-06-14 16:54 ` Lee Schermerhorn
2007-06-14 16:09 ` Nishanth Aravamudan
2007-06-14 16:15 ` Christoph Lameter
2007-06-14 17:07 ` Lee Schermerhorn
2007-06-14 17:16 ` Christoph Lameter
2007-06-14 18:04 ` Lee Schermerhorn
2007-06-14 22:35 ` Nishanth Aravamudan
2007-06-13 22:50 ` Christoph Lameter
2007-06-13 23:09 ` Nishanth Aravamudan
2007-06-13 23:12 ` Christoph Lameter
2007-06-13 23:18 ` Nishanth Aravamudan
2007-06-13 23:26 ` Christoph Lameter
2007-06-13 23:56 ` Nishanth Aravamudan
2007-06-14 14:23 ` Lee Schermerhorn
2007-06-13 22:49 ` Christoph Lameter
2007-06-12 19:55 ` Nishanth Aravamudan
2007-06-12 18:41 ` Christoph Lameter
2007-06-12 19:07 ` Lee Schermerhorn
2007-06-12 19:13 ` Christoph Lameter
2007-06-11 23:08 ` [PATCH][RFC] Fix INTERLEAVE with memoryless nodes Nishanth Aravamudan
2007-06-11 23:10 ` [PATCH v6][RFC] Fix hugetlb pool allocation with empty nodes Nishanth Aravamudan
2007-06-11 23:11 ` [PATCH][RFC] hugetlb: numafy several functions Nishanth Aravamudan
2007-06-11 23:13 ` [PATCH][RFC] hugetlb: add per-node nr_hugepages sysfs attribute Nishanth Aravamudan
2007-06-11 23:40 ` Christoph Lameter
2007-06-11 23:42 ` Christoph Lameter
2007-06-12 0:19 ` Nishanth Aravamudan
2007-06-12 0:43 ` Christoph Lameter
2007-06-12 2:19 ` Nishanth Aravamudan
2007-06-12 2:22 ` Christoph Lameter
2007-06-12 2:34 ` Nishanth Aravamudan
2007-06-11 23:38 ` [PATCH][RFC] hugetlb: numafy several functions Christoph Lameter
2007-06-11 23:17 ` [PATCH v6][RFC] Fix hugetlb pool allocation with empty nodes Christoph Lameter
2007-06-12 0:15 ` Nishanth Aravamudan
2007-06-12 0:47 ` Christoph Lameter
2007-06-12 2:12 ` Nishanth Aravamudan
2007-06-12 2:21 ` Christoph Lameter
2007-06-12 2:25 ` Christoph Lameter
2007-06-12 2:34 ` Nishanth Aravamudan
2007-06-12 2:55 ` Christoph Lameter
2007-06-12 3:17 ` Nishanth Aravamudan
2007-06-12 3:19 ` Christoph Lameter
2007-06-12 3:30 ` Nishanth Aravamudan
2007-06-12 3:48 ` Christoph Lameter
2007-06-12 5:07 ` Nishanth Aravamudan
2007-06-12 18:47 ` Christoph Lameter
2007-06-12 17:43 ` Nishanth Aravamudan
2007-06-12 18:49 ` Christoph Lameter
2007-06-12 2:33 ` Nishanth Aravamudan
2007-06-12 3:44 ` William Lee Irwin III
2007-06-12 3:50 ` Christoph Lameter
2007-06-12 3:53 ` William Lee Irwin III
2007-06-12 3:53 ` Christoph Lameter
2007-06-12 4:14 ` William Lee Irwin III
2007-06-12 5:09 ` Nishanth Aravamudan
2007-06-12 5:15 ` William Lee Irwin III
2007-06-12 17:36 ` Nishanth Aravamudan
2007-06-12 18:50 ` Christoph Lameter
2007-06-12 17:45 ` Nishanth Aravamudan
2007-06-12 19:13 ` William Lee Irwin III
2007-06-13 0:04 ` Nishanth Aravamudan [this message]
2007-06-13 15:26 ` [PATCH v3][RFC] hugetlb: numafy several functions Nishanth Aravamudan
2007-06-13 15:28 ` [PATCH v3][RFC] hugetlb: add per-node nr_hugepages sysfs attribute Nishanth Aravamudan
2007-06-13 18:23 ` Lee Schermerhorn
2007-06-13 19:19 ` [PATCH v4][RFC] " Nishanth Aravamudan
2007-06-13 20:05 ` Lee Schermerhorn
2007-06-13 20:29 ` Nishanth Aravamudan
2007-06-13 21:02 ` Lee Schermerhorn
2007-07-23 19:23 ` Christoph Lameter
2007-07-23 20:14 ` Lee Schermerhorn
2007-06-13 21:04 ` [PATCH v7][RFC] Fix hugetlb pool allocation with empty nodes Lee Schermerhorn
2007-06-13 21:50 ` [PATCH v7][UPDATE][RFC] " Nishanth Aravamudan
2007-06-12 14:28 ` [PATCH v6][RFC] " Lee Schermerhorn
2007-06-11 23:15 ` [PATCH][RFC] Fix INTERLEAVE with memoryless nodes Christoph Lameter
2007-06-12 0:14 ` [PATCH v2][RFC] " Nishanth Aravamudan
2007-06-12 0:42 ` Christoph Lameter
2007-06-12 0:57 ` Andrew Morton
2007-06-12 1:12 ` Christoph Lameter
2007-06-12 1:41 ` Nishanth Aravamudan
2007-06-12 1:52 ` Andrew Morton
2007-06-12 2:03 ` Nishanth Aravamudan
2007-06-12 14:19 ` [PATCH v2] Add populated_map to account for " Lee Schermerhorn
2007-06-12 17:32 ` Nishanth Aravamudan
2007-06-12 18:45 ` Christoph Lameter
2007-06-12 19:17 ` Lee Schermerhorn
2007-06-12 19:22 ` Christoph Lameter
2007-06-12 19:49 ` Nishanth Aravamudan
2007-06-12 19:51 ` Christoph Lameter
2007-06-12 20:00 ` Nishanth Aravamudan
2007-06-12 20:03 ` Christoph Lameter
2007-06-12 20:10 ` Christoph Lameter
2007-06-12 19:52 ` Christoph Lameter
2007-06-12 19:58 ` Christoph Lameter
2007-06-12 20:00 ` Nishanth Aravamudan
2007-06-12 20:06 ` Christoph Lameter
2007-06-12 14:10 ` [PATCH] " Lee Schermerhorn
2007-06-12 17:35 ` Nishanth Aravamudan
2007-06-12 18:39 ` Christoph Lameter
2007-06-12 18:54 ` Lee Schermerhorn
2007-06-12 19:00 ` Christoph Lameter
2007-06-12 2:27 ` KAMEZAWA Hiroyuki
2007-06-12 2:46 ` Nishanth Aravamudan
2007-06-12 2:53 ` Christoph Lameter
2007-06-12 3:04 ` KAMEZAWA Hiroyuki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070613000446.GL3798@us.ibm.com \
--to=nacc@us.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=anton@samba.org \
--cc=clameter@sgi.com \
--cc=lee.schermerhorn@hp.com \
--cc=linux-mm@kvack.org \
--cc=wli@holomorphy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.