linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH 0/5] hugetlb NUMA improvements
@ 2007-08-06 16:32 Nishanth Aravamudan
  2007-08-06 16:37 ` [RFC][PATCH 1/5] Fix hugetlb pool allocation with empty nodes V9 Nishanth Aravamudan
  2007-08-06 16:39 ` [RFC][PATCH 0/5] hugetlb NUMA improvements Nishanth Aravamudan
  0 siblings, 2 replies; 27+ messages in thread
From: Nishanth Aravamudan @ 2007-08-06 16:32 UTC (permalink / raw)
  To: clameter; +Cc: lee.schermerhorn, wli, melgor, akpm, linux-mm, agl

The following stack of 5 patches give hugetlbfs improved NUMA support.

1/5: Fix hugetlb pool allocation with empty nodes V9
	The most important of the patches, fix hugetlb pool allocation
	in the presence of memoryless nodes.

2/5: hugetlb: numafy several functions
3/5: hugetlb: add per-node nr_hugepages sysfs attribute
	Together, add a per-node sysfs attribute for the number of
	hugepages allocated on the node.  This gives system
	administrators more fine-grained control of the global pool's
	distribution.

4/5: hugetlb: fix cpuset-constrained pool resizing
	fix cpuset-constrained resizing in the presence of the previous
	3 patches.

5/5: hugetlb: interleave dequeueing of huge pages
	add interleaving to the dequeue path for hugetlb, so that
	hugepages are removed from all available nodes when the pool
	shrinks. Given the sysfs attribute the current node-at-a-time
	dequeueing is still possible.

Thanks,
Nish

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread
* [PATCH 1/5] hugetlb: numafy several functions
@ 2008-04-11 23:44 Nishanth Aravamudan
  2008-04-11 23:47 ` [RFC][PATCH 2/5] " Nishanth Aravamudan
  0 siblings, 1 reply; 27+ messages in thread
From: Nishanth Aravamudan @ 2008-04-11 23:44 UTC (permalink / raw)
  To: wli; +Cc: clameter, agl, luick, Lee.Schermerhorn, linux-mm, npiggin

Add node-parameterized helpers for dequeue_huge_page,
alloc_fresh_huge_page, adjust_pool_surplus and try_to_free_low. Also
have update_and_free_page() take a nid parameter. These changes are
necessary to add sysfs attributes to specify the number of static
hugepages on NUMA nodes.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e13a7b2..8faaa16 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -71,6 +71,20 @@ static void enqueue_huge_page(struct page *page)
 	free_huge_pages_node[nid]++;
 }
 
+static struct page *dequeue_huge_page_node(struct vm_area_struct *vma,
+								int nid)
+{
+	struct page *page;
+
+	page = list_entry(hugepage_freelists[nid].next, struct page, lru);
+	list_del(&page->lru);
+	free_huge_pages--;
+	free_huge_pages_node[nid]--;
+	if (vma && vma->vm_flags & VM_MAYSHARE)
+		resv_huge_pages--;
+	return page;
+}
+
 static struct page *dequeue_huge_page(void)
 {
 	int nid;
@@ -78,11 +92,7 @@ static struct page *dequeue_huge_page(void)
 
 	for (nid = 0; nid < MAX_NUMNODES; ++nid) {
 		if (!list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
+			page = dequeue_huge_page_node(NULL, nid);
 			break;
 		}
 	}
@@ -106,13 +116,7 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 		nid = zone_to_nid(zone);
 		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
 		    !list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
-			if (vma && vma->vm_flags & VM_MAYSHARE)
-				resv_huge_pages--;
+			page = dequeue_huge_page_node(vma, nid);
 			break;
 		}
 	}
@@ -120,11 +124,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	return page;
 }
 
-static void update_and_free_page(struct page *page)
+static void update_and_free_page(int nid, struct page *page)
 {
 	int i;
 	nr_huge_pages--;
-	nr_huge_pages_node[page_to_nid(page)]--;
+	nr_huge_pages_node[nid]--;
 	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
@@ -148,7 +152,7 @@ static void free_huge_page(struct page *page)
 
 	spin_lock(&hugetlb_lock);
 	if (surplus_huge_pages_node[nid]) {
-		update_and_free_page(page);
+		update_and_free_page(nid, page);
 		surplus_huge_pages--;
 		surplus_huge_pages_node[nid]--;
 	} else {
@@ -164,6 +168,20 @@ static void free_huge_page(struct page *page)
  * balanced by operating on them in a round-robin fashion.
  * Returns 1 if an adjustment was made.
  */
+static int adjust_pool_surplus_node(int delta, int nid)
+{
+	/* To shrink on this node, there must be a surplus page */
+	if (delta < 0 && !surplus_huge_pages_node[nid])
+		return 0;
+	/* Surplus cannot exceed the total number of pages */
+	if (delta > 0 && surplus_huge_pages_node[nid] >=
+					nr_huge_pages_node[nid])
+		return 0;
+	surplus_huge_pages += delta;
+	surplus_huge_pages_node[nid] += delta;
+	return 1;
+}
+
 static int adjust_pool_surplus(int delta)
 {
 	static int prev_nid;
@@ -175,19 +193,9 @@ static int adjust_pool_surplus(int delta)
 		nid = next_node(nid, node_online_map);
 		if (nid == MAX_NUMNODES)
 			nid = first_node(node_online_map);
-
-		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !surplus_huge_pages_node[nid])
-			continue;
-		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && surplus_huge_pages_node[nid] >=
-						nr_huge_pages_node[nid])
-			continue;
-
-		surplus_huge_pages += delta;
-		surplus_huge_pages_node[nid] += delta;
-		ret = 1;
-		break;
+		ret = adjust_pool_surplus_node(delta, nid);
+		if (ret == 1)
+			break;
 	} while (nid != prev_nid);
 
 	prev_nid = nid;
@@ -450,7 +458,7 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 			page = list_entry(hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			update_and_free_page(page);
+			update_and_free_page(nid, page);
 			free_huge_pages--;
 			free_huge_pages_node[nid]--;
 			surplus_huge_pages--;
@@ -556,25 +564,35 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
+static void try_to_free_low_node(unsigned long count, int nid)
+{
+	struct page *page, *next;
+	list_for_each_entry_safe(page, next, &hugepage_freelists[nid], lru) {
+		if (count >= nr_huge_pages_node[nid])
+			return;
+		if (PageHighMem(page))
+			continue;
+		list_del(&page->lru);
+		update_and_free_page(nid, page);
+		free_huge_pages--;
+		free_huge_pages_node[nid]--;
+	}
+}
+
 static void try_to_free_low(unsigned long count)
 {
 	int i;
 
 	for (i = 0; i < MAX_NUMNODES; ++i) {
-		struct page *page, *next;
-		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
-			if (count >= nr_huge_pages)
-				return;
-			if (PageHighMem(page))
-				continue;
-			list_del(&page->lru);
-			update_and_free_page(page);
-			free_huge_pages--;
-			free_huge_pages_node[page_to_nid(page)]--;
-		}
+		if (count >= nr_huge_pages)
+			return;
+		try_to_free_low_node(count, i);
 	}
 }
 #else
+static inline void try_to_free_low_node(unsigned long count, int nid)
+{
+}
 static inline void try_to_free_low(unsigned long count)
 {
 }
@@ -639,7 +657,7 @@ static unsigned long set_max_huge_pages(unsigned long count)
 		struct page *page = dequeue_huge_page();
 		if (!page)
 			break;
-		update_and_free_page(page);
+		update_and_free_page(page_to_nid(page), page);
 	}
 	while (count < persistent_huge_pages) {
 		if (!adjust_pool_surplus(1))

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2008-04-14 21:10 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-06 16:32 [RFC][PATCH 0/5] hugetlb NUMA improvements Nishanth Aravamudan
2007-08-06 16:37 ` [RFC][PATCH 1/5] Fix hugetlb pool allocation with empty nodes V9 Nishanth Aravamudan
2007-08-06 16:38   ` [RFC][PATCH 2/5] hugetlb: numafy several functions Nishanth Aravamudan
2007-08-06 16:40     ` [RFC][PATCH 3/5] hugetlb: add per-node nr_hugepages sysfs attribute Nishanth Aravamudan
2007-08-06 16:44       ` [RFC][PATCH 4/5] hugetlb: fix cpuset-constrained pool resizing Nishanth Aravamudan
2007-08-06 16:45         ` Nishanth Aravamudan
2007-08-06 16:48         ` [RFC][PATCH 5/5] hugetlb: interleave dequeueing of huge pages Nishanth Aravamudan
2007-08-06 18:04         ` [RFC][PATCH 4/5] hugetlb: fix cpuset-constrained pool resizing Christoph Lameter
2007-08-06 18:26           ` Nishanth Aravamudan
2007-08-06 18:41             ` Christoph Lameter
2007-08-07  0:03               ` Nishanth Aravamudan
2007-08-06 19:37           ` Lee Schermerhorn
2007-08-08  1:50           ` Nishanth Aravamudan
2007-08-08 13:26             ` Lee Schermerhorn
2007-08-06 17:59     ` [RFC][PATCH 2/5] hugetlb: numafy several functions Christoph Lameter
2007-08-06 18:15       ` Nishanth Aravamudan
2007-08-07  0:34         ` Nishanth Aravamudan
2007-08-06 18:00   ` [RFC][PATCH 1/5] Fix hugetlb pool allocation with empty nodes V9 Christoph Lameter
2007-08-06 18:19     ` Nishanth Aravamudan
2007-08-06 18:37       ` Christoph Lameter
2007-08-06 19:52         ` Lee Schermerhorn
2007-08-06 20:15           ` Christoph Lameter
2007-08-07  0:04             ` Nishanth Aravamudan
2007-08-06 16:39 ` [RFC][PATCH 0/5] hugetlb NUMA improvements Nishanth Aravamudan
  -- strict thread matches above, loose matches on Subject: below --
2008-04-11 23:44 [PATCH 1/5] hugetlb: numafy several functions Nishanth Aravamudan
2008-04-11 23:47 ` [RFC][PATCH 2/5] " Nishanth Aravamudan
2008-04-14 14:52   ` Adam Litke
2008-04-14 21:10     ` Nishanth Aravamudan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).