alloc_pages_bulk - Martin J. Bligh

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>
To: Andrew Morton <akpm@zip.com.au>
Cc: Bill Irwin <wli@holomorphy.com>, linux-mm@kvack.org
Subject: alloc_pages_bulk
Date: Mon, 22 Jul 2002 11:40:48 -0700	[thread overview]
Message-ID: <1615040000.1027363248@flay> (raw)

Below is a first cut at a bulk page allocator. This has no testing whatsoever,
not even being compiled ... I just want to get some feedback on the approach,
so if I get slapped, I'm less far down the path that I have to back out of.
The __alloc_pages cleanup is also tacked on the end because I'm lazy at
creating diff trees - sorry ;-)

Comments, opinions, abuse?

M.

diff -urN virgin-2.5.25/include/linux/gfp.h 2.5.25-A04-alloc_pages_bulk/include/linux/gfp.h
--- virgin-2.5.25/include/linux/gfp.h	Fri Jul  5 16:42:19 2002
+++ 2.5.25-A04-alloc_pages_bulk/include/linux/gfp.h	Mon Jul 22 11:27:49 2002
@@ -45,12 +45,23 @@
 
 static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
 {
+	struct page *page;
 	/*
 	 * Gets optimized away by the compiler.
 	 */
 	if (order >= MAX_ORDER)
 		return NULL;
-	return _alloc_pages(gfp_mask, order);
+	return _alloc_pages(gfp_mask, order, 1, &page);
+}
+
+static inline struct page * alloc_pages_bulk(unsigned int gfp_mask, unsigned int order, unsigned long count, struct page **pages)
+{
+	/*
+	 * Gets optimized away by the compiler.
+	 */
+	if (order >= MAX_ORDER)
+		return NULL;
+	return _alloc_pages(gfp_mask, order, count, pages);
 }
 
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
diff -urN virgin-2.5.25/mm/numa.c 2.5.25-A04-alloc_pages_bulk/mm/numa.c
--- virgin-2.5.25/mm/numa.c	Fri Jul  5 16:42:20 2002
+++ 2.5.25-A04-alloc_pages_bulk/mm/numa.c	Mon Jul 22 11:14:53 2002
@@ -31,32 +31,29 @@
 
 #endif /* !CONFIG_DISCONTIGMEM */
 
-struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order)
+struct page * alloc_pages_node(int nid, unsigned int gfp_mask, 
+		unsigned int order)
 {
-#ifdef CONFIG_NUMA
-	return __alloc_pages(gfp_mask, order, NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
-#else
-	return alloc_pages(gfp_mask, order);
-#endif
+	struct page *page;
+
+	return __alloc_pages(gfp_mask, order, 
+		NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK)
+		1, &page);
 }
 
 #ifdef CONFIG_DISCONTIGMEM
 
 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 
-static spinlock_t node_lock = SPIN_LOCK_UNLOCKED;
-
 void show_free_areas_node(pg_data_t *pgdat)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&node_lock, flags);
 	show_free_areas_core(pgdat);
-	spin_unlock_irqrestore(&node_lock, flags);
 }
 
 /*
- * Nodes can be initialized parallely, in no particular order.
+ * Nodes can be initialized in parallel, in no particular order.
  */
 void __init free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
 	unsigned long *zones_size, unsigned long zone_start_paddr, 
@@ -82,49 +79,69 @@
 	memset(pgdat->valid_addr_bitmap, 0, size);
 }
 
-static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask,
-	unsigned int order)
+/* 
+ * Walk the global list of pg_data_t's starting at specified start point 
+ */
+static inline unsigned long _alloc_pages_pgdat_walk(pg_data_t *start,
+		unsigned int gfp_mask, unsigned int order, 
+		unsigned long count, struct page **pages)
 {
-	return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+	unsigned long page_count = 0;
+	pg_data_t *pgdat = start;
+	
+	/* walk the second part of the list from start to the list tail */
+	for (pgdat = start; pgdat != NULL; pgdat = pgdat->node_next) {
+		if ((page_count = __alloc_pages(pgdat, gfp_mask, order, 
+						pgdat->node_zonelists + 
+						(gfp_mask & GFP_ZONEMASK)
+						count, pages)))
+			return(page_count);
+	}
+	/* walk the first part of the list from the list head to start */
+	for (pgdat = pgdat_list; pgdat != start; pgdat = pgdat->node_next) {
+		if ((page_count = __alloc_pages(pgdat, gfp_mask, order, 
+						pgdat->node_zonelists + 
+						(gfp_mask & GFP_ZONEMASK)
+						count, pages)))
+			return(page_count);
+	}
+	return(0);
 }
 
+#ifdef CONFIG_NUMA
+
 /*
  * This can be refined. Currently, tries to do round robin, instead
  * should do concentratic circle search, starting from current node.
  */
-struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order)
+unsigned long _alloc_pages(unsigned int gfp_mask, unsigned int order,
+		unsigned long count, struct page **pages)
+{
+	/* start at the current node, then do round robin */	
+	return _alloc_pages_pgdat_walk(NODE_DATA(numa_node_id()), 
+		gfp_mask, order, count, pages);
+}
+
+#else /* !CONFIG_NUMA */
+
+unsigned long _alloc_pages(unsigned int gfp_mask, unsigned int order,
+		unsigned long count, struct page **pages)
 {
-	struct page *ret = 0;
-	pg_data_t *start, *temp;
-#ifndef CONFIG_NUMA
 	unsigned long flags;
 	static pg_data_t *next = 0;
-#endif
+	pgdat_t *temp;
 
-	if (order >= MAX_ORDER)
-		return NULL;
-#ifdef CONFIG_NUMA
-	temp = NODE_DATA(numa_node_id());
-#else
-	spin_lock_irqsave(&node_lock, flags);
+	/* 
+	 * As the next ptr is static, it saves position between calls
+	 * and we round robin between memory segments to balance pressure
+	 */
 	if (!next) next = pgdat_list;
 	temp = next;
 	next = next->node_next;
-	spin_unlock_irqrestore(&node_lock, flags);
-#endif
-	start = temp;
-	while (temp) {
-		if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
-			return(ret);
-		temp = temp->node_next;
-	}
-	temp = pgdat_list;
-	while (temp != start) {
-		if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
-			return(ret);
-		temp = temp->node_next;
-	}
-	return(0);
+
+	return _alloc_pages_pgdat_walk(temp, gfp_mask, order, count, pages);
 }
+
+#endif /* CONFIG_NUMA */
 
 #endif /* CONFIG_DISCONTIGMEM */
diff -urN virgin-2.5.25/mm/page_alloc.c 2.5.25-A04-alloc_pages_bulk/mm/page_alloc.c
--- virgin-2.5.25/mm/page_alloc.c	Fri Jul  5 16:42:03 2002
+++ 2.5.25-A04-alloc_pages_bulk/mm/page_alloc.c	Mon Jul 22 11:15:00 2002
@@ -187,16 +187,13 @@
 	set_page_count(page, 1);
 }
 
-static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
-static struct page * rmqueue(zone_t *zone, unsigned int order)
+static inline struct page * __rmqueue(zone_t *zone, unsigned int order)
 {
 	free_area_t * area = zone->free_area + order;
 	unsigned int curr_order = order;
 	struct list_head *head, *curr;
-	unsigned long flags;
 	struct page *page;
 
-	spin_lock_irqsave(&zone->lock, flags);
 	do {
 		head = &area->free_list;
 		curr = head->next;
@@ -213,21 +210,40 @@
 			zone->free_pages -= 1UL << order;
 
 			page = expand(zone, page, index, order, curr_order, area);
-			spin_unlock_irqrestore(&zone->lock, flags);
-
-			if (bad_range(zone, page))
-				BUG();
-			prep_new_page(page);
 			return page;	
 		}
 		curr_order++;
 		area++;
 	} while (curr_order < MAX_ORDER);
-	spin_unlock_irqrestore(&zone->lock, flags);
 
 	return NULL;
 }
 
+static FASTCALL(unsigned long rmqueue(zone_t *zone, unsigned int order, 
+			unsigned long count, struct page *pages[]));
+static unsigned long rmqueue(zone_t *zone, unsigned int order, 
+		unsigned long count, struct page **pages)
+{
+	unsigned long flags;
+	int i, allocated = 0;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	for (i = 0; i < count; ++i) {
+		pages[i] = __rmqueue(zone, order);
+		if (pages[i] == NULL)
+			break;
+		++allocated;
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+	
+	for (i = 0; i < allocated; ++i) {
+		if (bad_range(zone, pages[i]))
+			BUG();
+		prep_new_page(pages[i]);
+	}
+	return allocated;	
+}
+
 #ifdef CONFIG_SOFTWARE_SUSPEND
 int is_head_of_free_region(struct page *page)
 {
@@ -253,10 +269,12 @@
 #endif /* CONFIG_SOFTWARE_SUSPEND */
 
 #ifndef CONFIG_DISCONTIGMEM
-struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
+unsigned long _alloc_pages(unsigned int gfp_mask, unsigned int order,
+		unsigned long count, struct page **pages)
 {
 	return __alloc_pages(gfp_mask, order,
-		contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+		contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK)
+		count, pages);
 }
 #endif
 
@@ -316,26 +334,27 @@
 /*
  * This is the 'heart' of the zoned buddy allocator:
  */
-struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
+unsigned long __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist, unsigned long count, struct page **pages)
 {
 	unsigned long min;
-	zone_t **zone, * classzone;
+	zone_t **zones, * classzone;
 	struct page * page;
-	int freed;
+	int freed, i;
 
-	zone = zonelist->zones;
-	classzone = *zone;
-	if (classzone == NULL)
+	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+	classzone = zones[0]; 
+	if (classzone == NULL)    /* no zones in the zonelist */
 		return NULL;
-	min = 1UL << order;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
 
+	/* Go through the zonelist once, looking for a zone with enough free */
+	min = count << order;
+	for (i = 0; zones[i] != NULL; i++) {
+		zone_t *z = *zones[i];
+
+		/* the incremental min is allegedly to discourage fallback */
 		min += z->pages_low;
 		if (z->free_pages > min) {
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}
@@ -343,23 +362,22 @@
 
 	classzone->need_balance = 1;
 	mb();
+	/* we're somewhat low on memory, failed to find what we needed */
 	if (waitqueue_active(&kswapd_wait))
 		wake_up_interruptible(&kswapd_wait);
 
-	zone = zonelist->zones;
-	min = 1UL << order;
-	for (;;) {
+	/* Go through the zonelist again, taking __GFP_HIGH into account */
+	min = count << order;
+	for (i = 0; zones[i] != NULL; i++) {
 		unsigned long local_min;
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
+		zone_t *z = *zones[i];
 
 		local_min = z->pages_min;
 		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
 		min += local_min;
 		if (z->free_pages > min) {
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}
@@ -369,13 +387,11 @@
 
 rebalance:
 	if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
-		zone = zonelist->zones;
-		for (;;) {
-			zone_t *z = *(zone++);
-			if (!z)
-				break;
+		/* go through the zonelist yet again, ignoring mins */
+		for (i = 0; zones[i] != NULL; i++) {
+			zone_t *z = *zones[i];
 
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}
@@ -396,16 +412,14 @@
 	if (page)
 		return page;
 
-	zone = zonelist->zones;
-	min = 1UL << order;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
+	/* go through the zonelist yet one more time */
+	min = count << order;
+	for (i = 0; zones[i] != NULL; i++) {
+		zone_t *z = *zones[i];
 
 		min += z->pages_min;
 		if (z->free_pages > min) {
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

next             reply	other threads:[~2002-07-22 18:40 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-07-22 18:40 Martin J. Bligh [this message]
2002-07-22 18:56 ` alloc_pages_bulk Benjamin LaHaise
2002-07-22 20:05   ` alloc_pages_bulk Martin J. Bligh
2002-07-22 20:26 ` alloc_pages_bulk Andrew Morton
2002-07-22 20:35   ` alloc_pages_bulk Martin J. Bligh
2002-07-22 20:58     ` alloc_pages_bulk Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1615040000.1027363248@flay \
    --to=martin.bligh@us.ibm.com \
    --cc=akpm@zip.com.au \
    --cc=linux-mm@kvack.org \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.