public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Linux Kernel Mailing List <Linux-Kernel@vger.kernel.org>
Subject: [patch 10/14] mm: single pcp list
Date: Sun, 06 Nov 2005 19:26:14 +1100	[thread overview]
Message-ID: <436DBE26.5080504@yahoo.com.au> (raw)
In-Reply-To: <436DBE03.90009@yahoo.com.au>

[-- Attachment #1: Type: text/plain, Size: 35 bytes --]

10/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-single-pcp-list.patch --]
[-- Type: text/plain, Size: 8572 bytes --]

Use a single pcp list.

Having a hot and a cold pcp list means that cold pages are overlooked
when when a hot page is needed but none available. So a workload that is
doing heavy page reclaim will not take much advantage of the pcps for
minimising zone lock contention for the pages it is freeing up.

The same wastage applies the other way (eg. when the hot list fills up
and cold list is empty). The patch also takes care of that.

Disallow cold page allocation from taking hot pages though.

Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -44,15 +44,13 @@ struct zone_padding {
 #define ZONE_PADDING(name)
 #endif
 
-struct per_cpu_pages {
+struct per_cpu_pageset {
+	struct list_head list;	/* the list of pages */
 	int count;		/* number of pages in the list */
+	int cold_count;		/* number of cold pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
-};
 
-struct per_cpu_pageset {
-	struct per_cpu_pages pcp[2];	/* 0: hot.  1: cold */
 #ifdef CONFIG_NUMA
 	unsigned long numa_hit;		/* allocated in intended node */
 	unsigned long numa_miss;	/* allocated in non intended node */
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -533,10 +533,8 @@ static int rmqueue_bulk(struct zone *zon
 void drain_remote_pages(void)
 {
 	struct zone *zone;
-	int i;
 	unsigned long flags;
 
-	local_irq_save(flags);
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
 
@@ -544,17 +542,16 @@ void drain_remote_pages(void)
 		if (zone->zone_pgdat->node_id == numa_node_id())
 			continue;
 
-		pset = zone->pageset[smp_processor_id()];
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &pset->pcp[i];
-			if (pcp->count)
-				pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
+		local_irq_save(flags);
+		if (zone->zone_pgdat->node_id != numa_node_id()) {
+			pset = zone->pageset[smp_processor_id()];
+			if (pset->count)
+				pset->count -= free_pages_bulk(zone,
+						pset->count, &pset->list, 0);
+			pset->cold_count = min(pset->cold_count, pset->count);
 		}
+		local_irq_restore(flags);
 	}
-	local_irq_restore(flags);
 }
 #endif
 
@@ -563,21 +560,16 @@ static void __drain_pages(unsigned int c
 {
 	unsigned long flags;
 	struct zone *zone;
-	int i;
 
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
 
 		pset = zone_pcp(zone, cpu);
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &pset->pcp[i];
-			local_irq_save(flags);
-			pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
-			local_irq_restore(flags);
-		}
+		local_irq_save(flags);
+		pset->count -= free_pages_bulk(zone, pset->count,
+							&pset->list, 0);
+		pset->cold_count = min(pset->cold_count, pset->count);
+		local_irq_restore(flags);
 	}
 }
 #endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
@@ -655,7 +647,8 @@ static void FASTCALL(free_hot_cold_page(
 static void fastcall free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
-	struct per_cpu_pages *pcp;
+	struct per_cpu_pageset *pset;
+	struct list_head *entry;
 	unsigned long flags;
 
 	arch_free_page(page, 0);
@@ -664,13 +657,21 @@ static void fastcall free_hot_cold_page(
 	if (PageAnon(page))
 		page->mapping = NULL;
 	free_pages_check(page);
-	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	pset = zone_pcp(zone, get_cpu());
 	local_irq_save(flags);
 	page_state(pgfree)++;
-	list_add(&page->lru, &pcp->list);
-	pcp->count++;
-	if (pcp->count >= pcp->high)
-		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+	pset->count++;
+	entry = &pset->list;
+	if (cold) {
+		pset->cold_count++;
+		entry = entry->prev; /* tail */
+	}
+	list_add(&page->lru, entry);
+	if (pset->count > pset->high) {
+		pset->count -= free_pages_bulk(zone, pset->batch,
+							&pset->list, 0);
+		pset->cold_count = min(pset->cold_count, pset->count);
+	}
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -708,19 +709,31 @@ buffered_rmqueue(struct zone *zone, int 
 	int cpu = get_cpu();
 
 	if (order == 0) {
-		struct per_cpu_pages *pcp;
+		struct per_cpu_pageset *pset;
+		struct list_head *entry;
 
-		pcp = &zone_pcp(zone, cpu)->pcp[cold];
+		pset = zone_pcp(zone, cpu);
 		local_irq_save(flags);
-		if (!pcp->count) {
-			pcp->count += rmqueue_bulk(zone, 0,
-						pcp->batch, &pcp->list);
-			if (unlikely(!pcp->count))
+		if (!pset->count || (cold && !pset->cold_count &&
+				pset->count <= pset->high - (pset->high>>2))) {
+			int count;
+			count = rmqueue_bulk(zone, 0,pset->batch, &pset->list);
+			if (unlikely(!count))
 				goto failed;
+			pset->count += count;
+			pset->cold_count += count;
 		}
-		page = list_entry(pcp->list.next, struct page, lru);
+
+		pset->count--;
+		entry = pset->list.next;
+		if (cold) {
+			if (pset->cold_count)
+				pset->cold_count--;
+			entry = pset->list.prev;
+		}
+		pset->cold_count = min(pset->cold_count, pset->count);
+		page = list_entry(entry, struct page, lru);
 		list_del(&page->lru);
-		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
@@ -1318,7 +1331,7 @@ void si_meminfo_node(struct sysinfo *val
 void show_free_areas(void)
 {
 	struct page_state ps;
-	int cpu, temperature;
+	int cpu;
 	unsigned long active;
 	unsigned long inactive;
 	unsigned long free;
@@ -1335,17 +1348,11 @@ void show_free_areas(void)
 			printk("\n");
 
 		for_each_cpu(cpu) {
-			struct per_cpu_pageset *pageset;
+			struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
 
-			pageset = zone_pcp(zone, cpu);
-
-			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: high %d, batch %d used:%d\n",
-					cpu,
-					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].high,
-					pageset->pcp[temperature].batch,
-					pageset->pcp[temperature].count);
+			printk("cpu %d: high %d, batch %d, pages %d, cold %d\n",
+				cpu, pset->high, pset->batch,
+				pset->count, pset->cold_count);
 		}
 	}
 
@@ -1774,21 +1781,12 @@ static int __devinit zone_batchsize(stru
 
 inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
-	struct per_cpu_pages *pcp;
-
 	memset(p, 0, sizeof(*p));
-
-	pcp = &p->pcp[0];		/* hot */
-	pcp->count = 0;
-	pcp->high = 4 * batch;
-	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
-
-	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
-	pcp->high = 2 * batch;
-	pcp->batch = max(1UL, batch/2);
-	INIT_LIST_HEAD(&pcp->list);
+	p->count = 0;
+	p->cold_count = 0;
+	p->high = 6 * batch;
+	p->batch = max(1UL, 1 * batch);
+	INIT_LIST_HEAD(&p->list);
 }
 
 #ifdef CONFIG_NUMA
@@ -2168,27 +2166,15 @@ static int zoneinfo_show(struct seq_file
 			   ")"
 			   "\n  pagesets");
 		for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) {
-			struct per_cpu_pageset *pageset;
-			int j;
+			struct per_cpu_pageset *pset;
 
-			pageset = zone_pcp(zone, i);
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				if (pageset->pcp[j].count)
-					break;
-			}
-			if (j == ARRAY_SIZE(pageset->pcp))
-				continue;
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				seq_printf(m,
-					   "\n    cpu: %i pcp: %i"
-					   "\n              count: %i"
-					   "\n              high:  %i"
-					   "\n              batch: %i",
-					   i, j,
-					   pageset->pcp[j].count,
-					   pageset->pcp[j].high,
-					   pageset->pcp[j].batch);
-			}
+			pset = zone_pcp(zone, i);
+			seq_printf(m,
+				   "\n    cpu: %i"
+				   "\n              count: %i"
+				   "\n              high:  %i"
+				   "\n              batch: %i",
+				   i, pset->count, pset->high, pset->batch);
 #ifdef CONFIG_NUMA
 			seq_printf(m,
 				   "\n            numa_hit:       %lu"
@@ -2197,12 +2183,12 @@ static int zoneinfo_show(struct seq_file
 				   "\n            interleave_hit: %lu"
 				   "\n            local_node:     %lu"
 				   "\n            other_node:     %lu",
-				   pageset->numa_hit,
-				   pageset->numa_miss,
-				   pageset->numa_foreign,
-				   pageset->interleave_hit,
-				   pageset->local_node,
-				   pageset->other_node);
+				   pset->numa_hit,
+				   pset->numa_miss,
+				   pset->numa_foreign,
+				   pset->interleave_hit,
+				   pset->local_node,
+				   pset->other_node);
 #endif
 		}
 		seq_printf(m,

  reply	other threads:[~2005-11-06  8:24 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-06  8:11 [rfc][patch 0/14] mm: performance improvements Nick Piggin
2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
2005-11-06  8:20   ` [patch 2/14] mm: Nick Piggin
2005-11-06  8:20   ` [patch 2/14] mm: pte prefetch Nick Piggin
2005-11-06  8:21     ` [patch 3/14] mm: release opt Nick Piggin
2005-11-06  8:22       ` [patch 4/14] mm: rmap opt Nick Piggin
2005-11-06  8:23         ` [patch 5/14] mm: set_page_refs opt Nick Piggin
2005-11-06  8:24           ` [patch 6/14] mm: microopt conditions Nick Piggin
2005-11-06  8:24             ` [patch 7/14] mm: remove bad_range Nick Piggin
2005-11-06  8:25               ` [patch 8/14] mm: remove pcp_low Nick Piggin
2005-11-06  8:25                 ` [patch 9/14] mm: page_state opt Nick Piggin
2005-11-06  8:26                   ` Nick Piggin [this message]
2005-11-06  8:26                     ` [patch 11/14] mm: increase pcp size Nick Piggin
2005-11-06  8:27                       ` [patch 12/14] mm: variable " Nick Piggin
2005-11-06  8:27                         ` [patch 13/14] mm: cleanup zone_pcp Nick Piggin
2005-11-06  8:28                           ` [patch 14/14] mm: page_alloc cleanups Nick Piggin
2005-11-13  2:38                   ` [patch 9/14] mm: page_state opt Andi Kleen
2005-11-06 17:37               ` [patch 7/14] mm: remove bad_range Bob Picco
2005-11-07  0:58                 ` Nick Piggin
2005-11-07  3:00                   ` Bob Picco
2005-11-07  3:05                     ` Nick Piggin
2005-11-07  1:40           ` [patch 5/14] mm: set_page_refs opt Christoph Hellwig
2005-11-07  1:45             ` Nick Piggin
2005-11-06  8:35     ` [patch 2/14] mm: pte prefetch Arjan van de Ven
2005-11-06  8:51       ` Nick Piggin
2005-11-06 17:37   ` [patch 1/14] mm: opt rmqueue Andi Kleen
2005-11-07  1:06     ` Nick Piggin
2005-11-07  3:23       ` Andi Kleen
2005-11-07  3:43         ` Nick Piggin
2005-11-07  1:39 ` [rfc][patch 0/14] mm: performance improvements Christoph Hellwig
2005-11-07  1:51   ` Nick Piggin
2005-11-07  3:57     ` Paul Jackson
2005-11-07  4:51       ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=436DBE26.5080504@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=Linux-Kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox