All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
To: linux-mm@kvack.org, linux-numa@vger.kernel.org
Cc: akpm@linux-foundation.org, Mel Gorman <mel@csn.ul.ie>,
	Greg KH <gregkh@suse.de>, Nishanth Aravamudan <nacc@us.ibm.com>,
	andi@firstfloor.org, David Rientjes <rientjes@google.com>,
	Adam Litke <agl@us.ibm.com>, Andy Whitcroft <apw@canonical.com>,
	eric.whitney@hp.com
Subject: [PATCH 2/4] hugetlb:  numafy several functions
Date: Wed, 29 Jul 2009 14:11:52 -0400	[thread overview]
Message-ID: <20090729181152.23716.22375.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20090729181139.23716.85986.sendpatchset@localhost.localdomain>

PATCH/RFC 2/4 hugetlb:  numafy several functions

Against: 2.6.31-rc3-mmotm-090716-1432
atop the previously posted alloc_bootmem_hugepages fix.
[http://marc.info/?l=linux-mm&m=124775468226290&w=4]

Based on a patch by Nishanth Aravamudan <nacc@us.ibm.com>, circa
april2008.

Factor out functions to dequeue and free huge pages and/or adjust
surplus huge page count for a specific node in support of subsequent
patch to alloc or free huge pages on a specified node.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>

---
 mm/hugetlb.c |  126 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 72 insertions(+), 54 deletions(-)

Index: linux-2.6.31-rc3-mmotm-090716-1432/mm/hugetlb.c
===================================================================
--- linux-2.6.31-rc3-mmotm-090716-1432.orig/mm/hugetlb.c	2009-07-23 11:10:29.000000000 -0400
+++ linux-2.6.31-rc3-mmotm-090716-1432/mm/hugetlb.c	2009-07-27 15:26:39.000000000 -0400
@@ -456,6 +456,17 @@ static void enqueue_huge_page(struct hst
 	h->free_huge_pages_node[nid]++;
 }
 
+static struct page *hstate_dequeue_huge_page_node(struct hstate *h, int nid)
+{
+	struct page *page;
+
+	page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
+	list_del(&page->lru);
+	h->free_huge_pages--;
+	h->free_huge_pages_node[nid]--;
+	return page;
+}
+
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
@@ -487,11 +498,7 @@ static struct page *dequeue_huge_page_vm
 		nid = zone_to_nid(zone);
 		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
 		    !list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
+			page = hstate_dequeue_huge_page_node(h, nid);
 
 			if (!avoid_reserve)
 				decrement_hugepage_resv_vma(h, vma);
@@ -599,12 +606,12 @@ int PageHuge(struct page *page)
 	return dtor == free_huge_page;
 }
 
-static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
+static int alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
 	if (h->order >= MAX_ORDER)
-		return NULL;
+		return 0;
 
 	page = alloc_pages_exact_node(nid,
 		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
@@ -613,12 +620,12 @@ static struct page *alloc_fresh_huge_pag
 	if (page) {
 		if (arch_prepare_hugepage(page)) {
 			__free_pages(page, huge_page_order(h));
-			return NULL;
+			return 0;
 		}
 		prep_new_huge_page(h, page, nid);
 	}
 
-	return page;
+	return 1;
 }
 
 /*
@@ -658,7 +665,6 @@ static int hstate_next_node_to_alloc(str
 
 static int alloc_fresh_huge_page(struct hstate *h)
 {
-	struct page *page;
 	int start_nid;
 	int next_nid;
 	int ret = 0;
@@ -667,11 +673,9 @@ static int alloc_fresh_huge_page(struct 
 	next_nid = start_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(h, next_nid);
-		if (page) {
-			ret = 1;
+		ret = alloc_fresh_huge_page_node(h, next_nid);
+		if (ret)
 			break;
-		}
 		next_nid = hstate_next_node_to_alloc(h);
 	} while (next_nid != start_nid);
 
@@ -699,6 +703,23 @@ static int hstate_next_node_to_free(stru
 	return nid;
 }
 
+static int hstate_free_huge_page_node(struct hstate *h, bool acct_surplus,
+							int nid)
+{
+	struct page *page;
+
+	if (list_empty(&h->hugepage_freelists[nid]))
+		return 0;
+
+	page = hstate_dequeue_huge_page_node(h, nid);
+	if (acct_surplus) {
+		h->surplus_huge_pages--;
+		h->surplus_huge_pages_node[nid]--;
+	}
+	update_and_free_page(h, page);
+	return 1;
+}
+
 /*
  * Free huge page from pool from next node to free.
  * Attempt to keep persistent huge pages more or less
@@ -719,21 +740,11 @@ static int free_pool_huge_page(struct hs
 		 * If we're returning unused surplus pages, only examine
 		 * nodes with surplus pages.
 		 */
-		if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
-		    !list_empty(&h->hugepage_freelists[next_nid])) {
-			struct page *page =
-				list_entry(h->hugepage_freelists[next_nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[next_nid]--;
-			if (acct_surplus) {
-				h->surplus_huge_pages--;
-				h->surplus_huge_pages_node[next_nid]--;
-			}
-			update_and_free_page(h, page);
-			ret = 1;
-			break;
+		if ((!acct_surplus || h->surplus_huge_pages_node[next_nid])) {
+			ret = hstate_free_huge_page_node(h, acct_surplus,
+			                                    next_nid);
+			if (ret)
+				break;
 		}
 		next_nid = hstate_next_node_to_free(h);
 	} while (next_nid != start_nid);
@@ -1173,6 +1184,31 @@ static inline void try_to_free_low(struc
 #endif
 
 /*
+ * Increment or decrement surplus_huge_pages for a specified node,
+ * if conditions permit.  Note that decrementing the surplus huge
+ * page count effective promotes a page to persistent, while
+ * incrementing the surplus count demotes a page to surplus.
+ */
+static int adjust_pool_surplus_node(struct hstate *h, int delta, int nid)
+{
+	int ret = 0;
+
+	/*
+	 * To shrink on this node, there must be a surplus page.
+	 * Surplus cannot exceed the total number of pages.
+	 */
+	if ((delta < 0 && h->surplus_huge_pages_node[nid]) ||
+	    (delta > 0 && h->surplus_huge_pages_node[nid] <
+					h->nr_huge_pages_node[nid])) {
+
+		h->surplus_huge_pages += delta;
+		h->surplus_huge_pages_node[nid] += delta;
+		ret = 1;
+	}
+	return ret;
+}
+
+/*
  * Increment or decrement surplus_huge_pages.  Keep node-specific counters
  * balanced by operating on them in a round-robin fashion.
  * Returns 1 if an adjustment was made.
@@ -1191,31 +1227,13 @@ static int adjust_pool_surplus(struct hs
 	next_nid = start_nid;
 
 	do {
-		int nid = next_nid;
-		if (delta < 0)  {
-			/*
-			 * To shrink on this node, there must be a surplus page
-			 */
-			if (!h->surplus_huge_pages_node[nid]) {
-				next_nid = hstate_next_node_to_alloc(h);
-				continue;
-			}
-		}
-		if (delta > 0) {
-			/*
-			 * Surplus cannot exceed the total number of pages
-			 */
-			if (h->surplus_huge_pages_node[nid] >=
-						h->nr_huge_pages_node[nid]) {
-				next_nid = hstate_next_node_to_free(h);
-				continue;
-			}
-		}
-
-		h->surplus_huge_pages += delta;
-		h->surplus_huge_pages_node[nid] += delta;
-		ret = 1;
-		break;
+		ret = adjust_pool_surplus_node(h, delta, next_nid);
+		if (ret)
+			break;
+		if (delta < 0)
+			next_nid = hstate_next_node_to_alloc(h);
+		else
+			next_nid = hstate_next_node_to_free(h);
 	} while (next_nid != start_nid);
 
 	return ret;

WARNING: multiple messages have this Message-ID (diff)
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
To: linux-mm@kvack.org, linux-numa@vger.kernel.org
Cc: akpm@linux-foundation.org, Mel Gorman <mel@csn.ul.ie>,
	Greg KH <gregkh@suse.de>, Nishanth Aravamudan <nacc@us.ibm.com>,
	andi@firstfloor.org, David Rientjes <rientjes@google.com>,
	Adam Litke <agl@us.ibm.com>, Andy Whitcroft <apw@canonical.com>,
	eric.whitney@hp.com
Subject: [PATCH 2/4] hugetlb:  numafy several functions
Date: Wed, 29 Jul 2009 14:11:52 -0400	[thread overview]
Message-ID: <20090729181152.23716.22375.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20090729181139.23716.85986.sendpatchset@localhost.localdomain>

PATCH/RFC 2/4 hugetlb:  numafy several functions

Against: 2.6.31-rc3-mmotm-090716-1432
atop the previously posted alloc_bootmem_hugepages fix.
[http://marc.info/?l=linux-mm&m=124775468226290&w=4]

Based on a patch by Nishanth Aravamudan <nacc@us.ibm.com>, circa
april2008.

Factor out functions to dequeue and free huge pages and/or adjust
surplus huge page count for a specific node in support of subsequent
patch to alloc or free huge pages on a specified node.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>

---
 mm/hugetlb.c |  126 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 72 insertions(+), 54 deletions(-)

Index: linux-2.6.31-rc3-mmotm-090716-1432/mm/hugetlb.c
===================================================================
--- linux-2.6.31-rc3-mmotm-090716-1432.orig/mm/hugetlb.c	2009-07-23 11:10:29.000000000 -0400
+++ linux-2.6.31-rc3-mmotm-090716-1432/mm/hugetlb.c	2009-07-27 15:26:39.000000000 -0400
@@ -456,6 +456,17 @@ static void enqueue_huge_page(struct hst
 	h->free_huge_pages_node[nid]++;
 }
 
+static struct page *hstate_dequeue_huge_page_node(struct hstate *h, int nid)
+{
+	struct page *page;
+
+	page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
+	list_del(&page->lru);
+	h->free_huge_pages--;
+	h->free_huge_pages_node[nid]--;
+	return page;
+}
+
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
@@ -487,11 +498,7 @@ static struct page *dequeue_huge_page_vm
 		nid = zone_to_nid(zone);
 		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
 		    !list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
+			page = hstate_dequeue_huge_page_node(h, nid);
 
 			if (!avoid_reserve)
 				decrement_hugepage_resv_vma(h, vma);
@@ -599,12 +606,12 @@ int PageHuge(struct page *page)
 	return dtor == free_huge_page;
 }
 
-static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
+static int alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
 	if (h->order >= MAX_ORDER)
-		return NULL;
+		return 0;
 
 	page = alloc_pages_exact_node(nid,
 		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
@@ -613,12 +620,12 @@ static struct page *alloc_fresh_huge_pag
 	if (page) {
 		if (arch_prepare_hugepage(page)) {
 			__free_pages(page, huge_page_order(h));
-			return NULL;
+			return 0;
 		}
 		prep_new_huge_page(h, page, nid);
 	}
 
-	return page;
+	return 1;
 }
 
 /*
@@ -658,7 +665,6 @@ static int hstate_next_node_to_alloc(str
 
 static int alloc_fresh_huge_page(struct hstate *h)
 {
-	struct page *page;
 	int start_nid;
 	int next_nid;
 	int ret = 0;
@@ -667,11 +673,9 @@ static int alloc_fresh_huge_page(struct 
 	next_nid = start_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(h, next_nid);
-		if (page) {
-			ret = 1;
+		ret = alloc_fresh_huge_page_node(h, next_nid);
+		if (ret)
 			break;
-		}
 		next_nid = hstate_next_node_to_alloc(h);
 	} while (next_nid != start_nid);
 
@@ -699,6 +703,23 @@ static int hstate_next_node_to_free(stru
 	return nid;
 }
 
+static int hstate_free_huge_page_node(struct hstate *h, bool acct_surplus,
+							int nid)
+{
+	struct page *page;
+
+	if (list_empty(&h->hugepage_freelists[nid]))
+		return 0;
+
+	page = hstate_dequeue_huge_page_node(h, nid);
+	if (acct_surplus) {
+		h->surplus_huge_pages--;
+		h->surplus_huge_pages_node[nid]--;
+	}
+	update_and_free_page(h, page);
+	return 1;
+}
+
 /*
  * Free huge page from pool from next node to free.
  * Attempt to keep persistent huge pages more or less
@@ -719,21 +740,11 @@ static int free_pool_huge_page(struct hs
 		 * If we're returning unused surplus pages, only examine
 		 * nodes with surplus pages.
 		 */
-		if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
-		    !list_empty(&h->hugepage_freelists[next_nid])) {
-			struct page *page =
-				list_entry(h->hugepage_freelists[next_nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[next_nid]--;
-			if (acct_surplus) {
-				h->surplus_huge_pages--;
-				h->surplus_huge_pages_node[next_nid]--;
-			}
-			update_and_free_page(h, page);
-			ret = 1;
-			break;
+		if ((!acct_surplus || h->surplus_huge_pages_node[next_nid])) {
+			ret = hstate_free_huge_page_node(h, acct_surplus,
+			                                    next_nid);
+			if (ret)
+				break;
 		}
 		next_nid = hstate_next_node_to_free(h);
 	} while (next_nid != start_nid);
@@ -1173,6 +1184,31 @@ static inline void try_to_free_low(struc
 #endif
 
 /*
+ * Increment or decrement surplus_huge_pages for a specified node,
+ * if conditions permit.  Note that decrementing the surplus huge
+ * page count effective promotes a page to persistent, while
+ * incrementing the surplus count demotes a page to surplus.
+ */
+static int adjust_pool_surplus_node(struct hstate *h, int delta, int nid)
+{
+	int ret = 0;
+
+	/*
+	 * To shrink on this node, there must be a surplus page.
+	 * Surplus cannot exceed the total number of pages.
+	 */
+	if ((delta < 0 && h->surplus_huge_pages_node[nid]) ||
+	    (delta > 0 && h->surplus_huge_pages_node[nid] <
+					h->nr_huge_pages_node[nid])) {
+
+		h->surplus_huge_pages += delta;
+		h->surplus_huge_pages_node[nid] += delta;
+		ret = 1;
+	}
+	return ret;
+}
+
+/*
  * Increment or decrement surplus_huge_pages.  Keep node-specific counters
  * balanced by operating on them in a round-robin fashion.
  * Returns 1 if an adjustment was made.
@@ -1191,31 +1227,13 @@ static int adjust_pool_surplus(struct hs
 	next_nid = start_nid;
 
 	do {
-		int nid = next_nid;
-		if (delta < 0)  {
-			/*
-			 * To shrink on this node, there must be a surplus page
-			 */
-			if (!h->surplus_huge_pages_node[nid]) {
-				next_nid = hstate_next_node_to_alloc(h);
-				continue;
-			}
-		}
-		if (delta > 0) {
-			/*
-			 * Surplus cannot exceed the total number of pages
-			 */
-			if (h->surplus_huge_pages_node[nid] >=
-						h->nr_huge_pages_node[nid]) {
-				next_nid = hstate_next_node_to_free(h);
-				continue;
-			}
-		}
-
-		h->surplus_huge_pages += delta;
-		h->surplus_huge_pages_node[nid] += delta;
-		ret = 1;
-		break;
+		ret = adjust_pool_surplus_node(h, delta, next_nid);
+		if (ret)
+			break;
+		if (delta < 0)
+			next_nid = hstate_next_node_to_alloc(h);
+		else
+			next_nid = hstate_next_node_to_free(h);
 	} while (next_nid != start_nid);
 
 	return ret;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-07-29 18:11 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-29 18:11 [PATCH 0/4] hugetlb: V1 Per Node Hugepages attributes Lee Schermerhorn
2009-07-29 18:11 ` Lee Schermerhorn
2009-07-29 18:11 ` [PATCH 1/4] hugetlb: rework hstate_next_node_* functions Lee Schermerhorn
2009-07-29 18:11 ` Lee Schermerhorn [this message]
2009-07-29 18:11   ` [PATCH 2/4] hugetlb: numafy several functions Lee Schermerhorn
2009-07-29 18:11 ` [PATCH 3/4] hugetlb: add private bit-field to kobject structure Lee Schermerhorn
2009-07-29 18:11   ` Lee Schermerhorn
2009-07-29 18:25   ` Greg KH
2009-07-31 18:59     ` Lee Schermerhorn
2009-07-31 18:59       ` Lee Schermerhorn
2009-07-29 18:12 ` [PATCH 4/4] hugetlb: add per node hstate attributes Lee Schermerhorn
2009-07-30 19:39   ` David Rientjes
2009-07-30 19:39     ` David Rientjes
2009-07-31 10:36     ` Mel Gorman
2009-07-31 10:36       ` Mel Gorman
2009-07-31 19:10       ` Lee Schermerhorn
2009-07-31 19:10         ` Lee Schermerhorn
2009-08-14 22:38         ` David Rientjes
2009-08-14 23:08           ` Andrew Morton
2009-08-14 23:19             ` Greg KH
2009-08-14 23:19               ` Greg KH
2009-08-14 23:53             ` David Rientjes
2009-08-14 23:53               ` David Rientjes
2009-08-17  1:10               ` Lee Schermerhorn
2009-08-17  1:10                 ` Lee Schermerhorn
2009-08-17 10:07                 ` David Rientjes
2009-08-17 10:07                   ` David Rientjes
2009-08-15 10:08           ` Mel Gorman
2009-08-15 10:08             ` Mel Gorman
2009-07-31 19:55       ` David Rientjes
2009-07-31 19:55         ` David Rientjes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090729181152.23716.22375.sendpatchset@localhost.localdomain \
    --to=lee.schermerhorn@hp.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=apw@canonical.com \
    --cc=eric.whitney@hp.com \
    --cc=gregkh@suse.de \
    --cc=linux-mm@kvack.org \
    --cc=linux-numa@vger.kernel.org \
    --cc=mel@csn.ul.ie \
    --cc=nacc@us.ibm.com \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.