[PATCH 10/9] percpu splitout

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Joel Schopp <jschopp@austin.ibm.com>
To: Joel Schopp <jschopp@austin.ibm.com>
Cc: Andrew Morton <akpm@osdl.org>,
	lhms <lhms-devel@lists.sourceforge.net>,
	Linux Memory Management List <linux-mm@kvack.org>,
	linux-kernel@vger.kernel.org, Mel Gorman <mel@csn.ul.ie>,
	Mike Kravetz <kravetz@us.ibm.com>
Subject: [PATCH 10/9] percpu splitout
Date: Mon, 26 Sep 2005 15:19:29 -0500	[thread overview]
Message-ID: <433857D1.3050903@austin.ibm.com> (raw)
In-Reply-To: <4338537E.8070603@austin.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 554 bytes --]

NOT READY FOR MERGING!
Only works with NUMA off on 2.6.13.  On 2.6.13 with NUMA on free_hot_cold_page
calls __free_pages_bulk, which then trips BUG_ON(bad_range(zone,page));  This
does not happen on 2.6.13-rc1 kernels. Released under the release early
release often doctrine.

This patch splits the percpu allocations into two types.  Kernel reclaimable
and kernel non-reclaimable types are considered one PCPU_KERNEL type and user
types are PCPU_USER type.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>


[-- Attachment #2: 10_percpu_splitout --]
[-- Type: text/plain, Size: 5504 bytes --]

Index: 2.6.13-joel2/include/linux/mmzone.h
===================================================================
--- 2.6.13-joel2.orig/include/linux/mmzone.h	2005-09-26 13:58:59.%N -0500
+++ 2.6.13-joel2/include/linux/mmzone.h	2005-09-26 13:59:38.%N -0500
@@ -57,13 +57,28 @@ struct zone_padding {
 #else
 #define ZONE_PADDING(name)
 #endif
+/*
+ * The pcpu_list is to keep kernel and userrclm allocations
+ * apart while still allowing all allocation types to have
+ * per-cpu lists
+ */
+struct pcpu_list {
+	int count;
+	struct list_head list;
+} ____cacheline_aligned_in_smp;
+
+
+/* Indices into pcpu_list */
+#define PCPU_KERN 0
+#define PCPU_USER 1
+#define PCPU_LIST_SIZE 2
 
 struct per_cpu_pages {
-	int count;		/* number of pages in the list */
-	int low;		/* low watermark, refill needed */
-	int high;		/* high watermark, emptying needed */
-	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
+	int count;			/* number of pages in the list */
+	struct pcpu_list pcpu_list[PCPU_LIST_SIZE];
+	int low;			/* low watermark, refill needed */
+	int high;			/* high watermark, emptying needed */
+	int batch;			/* chunk size for buddy add/remove */
 };
 
 struct per_cpu_pageset {
Index: 2.6.13-joel2/mm/page_alloc.c
===================================================================
--- 2.6.13-joel2.orig/mm/page_alloc.c	2005-09-26 13:59:27.%N -0500
+++ 2.6.13-joel2/mm/page_alloc.c	2005-09-26 13:59:38.%N -0500
@@ -775,9 +775,18 @@ void drain_remote_pages(void)
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
-			if (pcp->count)
-				pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
+			if (pcp->pcpu_list[PCPU_KERN].count)
+				pcp->pcpu_list[PCPU_KERN].count -=
+					free_pages_bulk(zone,
+							pcp->pcpu_list[PCPU_KERN].count,
+							&pcp->pcpu_list[PCPU_KERN].list,
+							0);
+			if (pcp->pcpu_list[PCPU_USER].count)
+				pcp->pcpu_list[PCPU_USER].count -=
+					free_pages_bulk(zone,
+							pcp->pcpu_list[PCPU_USER].count,
+							&pcp->pcpu_list[PCPU_USER].list,
+							0);
 		}
 	}
 	local_irq_restore(flags);
@@ -798,8 +807,18 @@ static void __drain_pages(unsigned int c
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
-			pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
+			pcp->pcpu_list[PCPU_KERN].count -=
+				free_pages_bulk(zone,
+						pcp->pcpu_list[PCPU_KERN].count,
+						&pcp->pcpu_list[PCPU_KERN].list,
+						0);
+
+			pcp->pcpu_list[PCPU_USER].count -=
+				free_pages_bulk(zone,
+						pcp->pcpu_list[PCPU_USER].count,
+						&pcp->pcpu_list[PCPU_USER].list,
+						0);
+
 		}
 	}
 }
@@ -881,6 +900,7 @@ static void fastcall free_hot_cold_page(
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
+	struct pcpu_list *plist;
 
 	arch_free_page(page, 0);
 
@@ -890,11 +910,24 @@ static void fastcall free_hot_cold_page(
 		page->mapping = NULL;
 	free_pages_check(__FUNCTION__, page);
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+
+	/*
+	 * Strictly speaking, we should not be accessing the zone information
+	 * here wihtout the zone lock. In this case, it does not matter if 
+	 * the read is incorrect.
+	 */
+	if (get_pageblock_type(zone, page) == RCLM_USER)
+		plist = &pcp->pcpu_list[PCPU_USER];
+	else
+		plist = &pcp->pcpu_list[PCPU_KERN];
+
+	if (plist->count >= pcp->high)
+		plist->count -= free_pages_bulk(zone, pcp->batch,
+						&plist->list, 0);
+
 	local_irq_save(flags);
-	list_add(&page->lru, &pcp->list);
-	pcp->count++;
-	if (pcp->count >= pcp->high)
-		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+	list_add(&page->lru, &plist->list);
+	plist->count++;
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -930,19 +963,28 @@ buffered_rmqueue(struct zone *zone, int 
 	unsigned long flags;
 	struct page *page = NULL;
 	int cold = !!(gfp_flags & __GFP_COLD);
+	struct pcpu_list *plist;
 
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
 
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
-			pcp->count += rmqueue_bulk(zone, pcp->batch,
-						   &pcp->list, alloctype);
-		if (pcp->count) {
-			page = list_entry(pcp->list.next, struct page, lru);
+
+		if (alloctype == __GFP_USER)
+			plist = &pcp->pcpu_list[PCPU_USER];
+		else
+			plist = &pcp->pcpu_list[PCPU_KERN];
+
+		if (plist->count <= pcp->low)
+			plist->count += rmqueue_bulk(zone,
+						     pcp->batch,
+						     &plist->list,
+						     alloctype);
+		if (plist->count) {
+			page = list_entry(plist->list.next, struct page, lru);
 			list_del(&page->lru);
-			pcp->count--;
+			plist->count--;
 		}
 		local_irq_restore(flags);
 		put_cpu();
@@ -2001,18 +2043,23 @@ inline void setup_pageset(struct per_cpu
 	struct per_cpu_pages *pcp;
 
 	pcp = &p->pcp[0];		/* hot */
-	pcp->count = 0;
+	pcp->pcpu_list[PCPU_KERN].count = 0;
+	pcp->pcpu_list[PCPU_USER].count = 0;
 	pcp->low = 2 * batch;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
+	INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_KERN].list);
+	INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_USER].list);
 
 	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
+	pcp->pcpu_list[PCPU_KERN].count = 0;
+	pcp->pcpu_list[PCPU_USER].count = 0;
 	pcp->low = 0;
 	pcp->high = 2 * batch;
 	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
+	INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_KERN].list);
+	INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_USER].list);
+
 }
 
 #ifdef CONFIG_NUMA

next prev parent reply	other threads:[~2005-09-26 20:19 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-09-26 20:01 [PATCH 0/9] fragmentation avoidance Joel Schopp
2005-09-26 20:03 ` [PATCH 1/9] add defrag flags Joel Schopp
2005-09-27  0:16   ` Kyle Moffett
2005-09-27  0:16     ` Kyle Moffett
2005-09-27  0:24     ` Dave Hansen
2005-09-27  0:24       ` Dave Hansen
2005-09-27  0:43       ` Kyle Moffett
2005-09-27  0:43         ` Kyle Moffett
2005-09-27  5:44       ` Paul Jackson
2005-09-27  5:44         ` Paul Jackson
2005-09-27 13:34         ` Mel Gorman
2005-09-27 13:34           ` Mel Gorman
2005-09-27 16:26           ` [Lhms-devel] " Paul Jackson
2005-09-27 16:26             ` Paul Jackson
2005-09-27 18:38         ` Joel Schopp
2005-09-27 19:30           ` Paul Jackson
2005-09-27 19:30             ` Paul Jackson
2005-09-27 21:00             ` [Lhms-devel] " Joel Schopp
2005-09-27 21:00               ` Joel Schopp
2005-09-27 21:23               ` Paul Jackson
2005-09-27 21:23                 ` Paul Jackson
2005-09-27 22:03                 ` Joel Schopp
2005-09-27 22:45                   ` Paul Jackson
2005-09-27 22:45                     ` Paul Jackson
2005-09-26 20:05 ` [PATCH 2/9] declare defrag structs Joel Schopp
2005-09-26 20:06 ` [PATCH 3/9] initialize defrag Joel Schopp
2005-09-26 20:09 ` [PATCH 4/9] defrag helper functions Joel Schopp
2005-09-26 22:29   ` Alex Bligh - linux-kernel
2005-09-26 22:29     ` Alex Bligh - linux-kernel
2005-09-27 16:08     ` Joel Schopp
2005-09-27 16:08       ` Joel Schopp
2005-09-26 20:11 ` [PATCH 5/9] propagate defrag alloc types Joel Schopp
2005-09-26 20:13 ` [PATCH 6/9] fragmentation avoidance core Joel Schopp
2005-09-26 20:14 ` [PATCH 7/9] try harder on large allocations Joel Schopp
2005-09-27  7:21   ` Coywolf Qi Hunt
2005-09-27  7:21     ` Coywolf Qi Hunt
2005-09-27 16:17     ` Joel Schopp
2005-09-27 16:17       ` Joel Schopp
2005-09-26 20:16 ` [PATCH 8/9] defrag fallback Joel Schopp
2005-09-26 20:17 ` [PATCH 9/9] free memory is user reclaimable Joel Schopp
2005-09-26 20:19 ` Joel Schopp [this message]
2005-09-26 21:49 ` [Lhms-devel] [PATCH 0/9] fragmentation avoidance Joel Schopp
2005-09-26 21:49   ` Joel Schopp

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=433857D1.3050903@austin.ibm.com \
    --to=jschopp@austin.ibm.com \
    --cc=akpm@osdl.org \
    --cc=kravetz@us.ibm.com \
    --cc=lhms-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.