linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Mel Gorman <mel@csn.ul.ie>,
	Linux Memory Management List <linux-mm@kvack.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>,
	Rik van Riel <riel@redhat.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Nick Piggin <npiggin@suse.de>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Lin Ming <ming.m.lin@intel.com>,
	Zhang Yanmin <yanmin_zhang@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH 29/35] Do not store the PCP high and batch watermarks in the per-cpu structure
Date: Mon, 16 Mar 2009 09:46:24 +0000	[thread overview]
Message-ID: <1237196790-7268-30-git-send-email-mel@csn.ul.ie> (raw)
In-Reply-To: <1237196790-7268-1-git-send-email-mel@csn.ul.ie>

Currently, there are high and batch counters in the per-cpu structure.
This might have made sense when there was hot and cold per-cpu
structures but that is no longer the case. In practice, all the per-cpu
structures for a zone contain the same values and they are read-mostly.
This patch stores them in the zone with the watermarks which are also
read-mostly.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---
 include/linux/mmzone.h |    8 ++++++--
 mm/page_alloc.c        |   43 +++++++++++++++++++++++--------------------
 mm/vmstat.c            |    4 ++--
 3 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index eed6867..b4fba09 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -166,8 +166,6 @@ static inline int is_unevictable_lru(enum lru_list l)
 
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
-	int high;		/* high watermark, emptying needed */
-	int batch;		/* chunk size for buddy add/remove */
 
 	/* Lists of pages, one per migrate type stored on the pcp-lists */
 	struct list_head lists[MIGRATE_PCPTYPES];
@@ -285,6 +283,12 @@ struct zone {
 		unsigned long pages_mark[3];
 	};
 
+	/* high watermark for per-cpu lists, emptying needed */
+	u16 pcp_high;
+
+	/* chunk size for buddy add/remove to per-cpu lists*/
+	u16 pcp_batch;
+
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index edadab1..77e9970 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -975,8 +975,8 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	int to_drain;
 
 	local_irq_save(flags);
-	if (pcp->count >= pcp->batch)
-		to_drain = pcp->batch;
+	if (pcp->count >= zone->pcp_batch)
+		to_drain = zone->pcp_batch;
 	else
 		to_drain = pcp->count;
 	free_pcppages_bulk(zone, to_drain, pcp, 0);
@@ -1119,8 +1119,8 @@ static void free_hot_cold_page(struct page *page, int order, int cold)
 	set_page_private(page, migratetype);
 	page->index = order;
 	add_pcp_page(pcp, page, cold);
-	if (pcp->count >= pcp->high)
-		free_pcppages_bulk(zone, pcp->batch, pcp, migratetype);
+	if (pcp->count >= zone->pcp_high)
+		free_pcppages_bulk(zone, zone->pcp_batch, pcp, migratetype);
 
 out:
 	local_irq_restore(flags);
@@ -1184,7 +1184,7 @@ again:
 
 		pcp = &zone_pcp(zone, cpu)->pcp;
 		list = &pcp->lists[migratetype];
-		batch = max(1, pcp->batch >> order);
+		batch = max(1, zone->pcp_batch >> order);
 		local_irq_save(flags);
 		if (list_empty(list)) {
 			delta = rmqueue_bulk(zone, order, batch,
@@ -2144,8 +2144,8 @@ void show_free_areas(void)
 			pageset = zone_pcp(zone, cpu);
 
 			printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
-			       cpu, pageset->pcp.high,
-			       pageset->pcp.batch, pageset->pcp.count);
+			       cpu, zone->pcp_high,
+			       zone->pcp_batch, pageset->pcp.count);
 		}
 	}
 
@@ -2975,7 +2975,8 @@ static int zone_batchsize(struct zone *zone)
 	return batch;
 }
 
-static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static void setup_pageset(struct zone *zone,
+			struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
 	int migratetype;
@@ -2984,8 +2985,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 
 	pcp = &p->pcp;
 	pcp->count = 0;
-	pcp->high = 6 * batch;
-	pcp->batch = max(1UL, 1 * batch);
+	zone->pcp_high = 6 * batch;
+	zone->pcp_batch = max(1UL, 1 * batch);
 	for (migratetype = 0; migratetype < MIGRATE_TYPES; migratetype++)
 		INIT_LIST_HEAD(&pcp->lists[migratetype]);
 }
@@ -2995,16 +2996,17 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
  * to the value high for the pageset p.
  */
 
-static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+static void setup_pagelist_highmark(struct zone *zone,
+				struct per_cpu_pageset *p,
 				unsigned long high)
 {
 	struct per_cpu_pages *pcp;
 
 	pcp = &p->pcp;
-	pcp->high = high;
-	pcp->batch = max(1UL, high/4);
+	zone->pcp_high = high;
+	zone->pcp_batch = max(1UL, high/4);
 	if ((high/4) > (PAGE_SHIFT * 8))
-		pcp->batch = PAGE_SHIFT * 8;
+		zone->pcp_batch = PAGE_SHIFT * 8;
 }
 
 
@@ -3049,10 +3051,10 @@ static int __cpuinit process_zones(int cpu)
 		if (!zone_pcp(zone, cpu))
 			goto bad;
 
-		setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
+		setup_pageset(zone, zone_pcp(zone, cpu), zone_batchsize(zone));
 
 		if (percpu_pagelist_fraction)
-			setup_pagelist_highmark(zone_pcp(zone, cpu),
+			setup_pagelist_highmark(zone, zone_pcp(zone, cpu),
 			 	(zone->present_pages / percpu_pagelist_fraction));
 	}
 
@@ -3178,9 +3180,9 @@ static __meminit void zone_pcp_init(struct zone *zone)
 #ifdef CONFIG_NUMA
 		/* Early boot. Slab allocator not functional yet */
 		zone_pcp(zone, cpu) = &boot_pageset[cpu];
-		setup_pageset(&boot_pageset[cpu],0);
+		setup_pageset(zone, &boot_pageset[cpu], 0);
 #else
-		setup_pageset(zone_pcp(zone,cpu), batch);
+		setup_pageset(zone, zone_pcp(zone, cpu), batch);
 #endif
 	}
 	if (zone->present_pages)
@@ -4771,7 +4773,7 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
 }
 
 /*
- * percpu_pagelist_fraction - changes the pcp->high for each zone on each
+ * percpu_pagelist_fraction - changes the zone->pcp_high for each zone on each
  * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
  * can have before it gets flushed back to buddy allocator.
  */
@@ -4790,7 +4792,8 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
 		for_each_online_cpu(cpu) {
 			unsigned long  high;
 			high = zone->present_pages / percpu_pagelist_fraction;
-			setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+			setup_pagelist_highmark(zone, zone_pcp(zone, cpu),
+									high);
 		}
 	}
 	return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9114974..3be59b1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -766,8 +766,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 			   "\n              batch: %i",
 			   i,
 			   pageset->pcp.count,
-			   pageset->pcp.high,
-			   pageset->pcp.batch);
+			   zone->pcp_high,
+			   zone->pcp_batch);
 #ifdef CONFIG_SMP
 		seq_printf(m, "\n  vm stats threshold: %d",
 				pageset->stat_threshold);
-- 
1.5.6.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-03-16  9:44 UTC|newest]

Thread overview: 94+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-16  9:45 [PATCH 00/35] Cleanup and optimise the page allocator V3 Mel Gorman
2009-03-16  9:45 ` [PATCH 01/35] Replace __alloc_pages_internal() with __alloc_pages_nodemask() Mel Gorman
2009-03-16 15:49   ` Christoph Lameter
2009-03-16  9:45 ` [PATCH 02/35] Do not sanity check order in the fast path Mel Gorman
2009-03-16 15:52   ` Christoph Lameter
2009-03-16  9:45 ` [PATCH 03/35] Do not check NUMA node ID when the caller knows the node is valid Mel Gorman
2009-03-16  9:45 ` [PATCH 04/35] Check only once if the zonelist is suitable for the allocation Mel Gorman
2009-03-16  9:46 ` [PATCH 05/35] Break up the allocator entry point into fast and slow paths Mel Gorman
2009-03-16  9:46 ` [PATCH 06/35] Move check for disabled anti-fragmentation out of fastpath Mel Gorman
2009-03-16 15:54   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 07/35] Check in advance if the zonelist needs additional filtering Mel Gorman
2009-03-16  9:46 ` [PATCH 08/35] Calculate the preferred zone for allocation only once Mel Gorman
2009-03-16  9:46 ` [PATCH 09/35] Calculate the migratetype " Mel Gorman
2009-03-16  9:46 ` [PATCH 10/35] Calculate the alloc_flags " Mel Gorman
2009-03-16  9:46 ` [PATCH 11/35] Calculate the cold parameter " Mel Gorman
2009-03-16  9:46 ` [PATCH 12/35] Remove a branch by assuming __GFP_HIGH == ALLOC_HIGH Mel Gorman
2009-03-16  9:46 ` [PATCH 13/35] Inline __rmqueue_smallest() Mel Gorman
2009-03-16  9:46 ` [PATCH 14/35] Inline buffered_rmqueue() Mel Gorman
2009-03-16  9:46 ` [PATCH 15/35] Inline __rmqueue_fallback() Mel Gorman
2009-03-16 15:57   ` Christoph Lameter
2009-03-16 16:25     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 16/35] Save text by reducing call sites of __rmqueue() Mel Gorman
2009-03-16  9:46 ` [PATCH 17/35] Do not call get_pageblock_migratetype() more than necessary Mel Gorman
2009-03-16 16:00   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 18/35] Do not disable interrupts in free_page_mlock() Mel Gorman
2009-03-16 16:05   ` Christoph Lameter
2009-03-16 16:29     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 19/35] Do not setup zonelist cache when there is only one node Mel Gorman
2009-03-16 16:06   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 20/35] Use a pre-calculated value for num_online_nodes() Mel Gorman
2009-03-16 11:42   ` Nick Piggin
2009-03-16 11:46     ` Nick Piggin
2009-03-16 16:08   ` Christoph Lameter
2009-03-16 16:36     ` Mel Gorman
2009-03-16 16:47       ` Christoph Lameter
2009-03-18 15:08         ` Mel Gorman
2009-03-18 16:58           ` Christoph Lameter
2009-03-18 18:01             ` Mel Gorman
2009-03-18 19:10               ` Christoph Lameter
2009-03-19 20:43                 ` Christoph Lameter
2009-03-19 21:29                   ` Mel Gorman
2009-03-19 22:22                     ` Christoph Lameter
2009-03-19 22:33                       ` Mel Gorman
2009-03-19 22:42                         ` Christoph Lameter
2009-03-19 22:52                           ` Mel Gorman
2009-03-19 22:06                   ` Mel Gorman
2009-03-19 22:39                     ` Christoph Lameter
2009-03-19 22:21                   ` Mel Gorman
2009-03-19 22:24                     ` Christoph Lameter
2009-03-19 23:04                       ` Mel Gorman
2009-03-16  9:46 ` [PATCH 21/35] Do not check for compound pages during the page allocator sanity checks Mel Gorman
2009-03-16 16:09   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 22/35] Use allocation flags as an index to the zone watermark Mel Gorman
2009-03-16 16:11   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 23/35] Update NR_FREE_PAGES only as necessary Mel Gorman
2009-03-16 16:17   ` Christoph Lameter
2009-03-16 16:42     ` Mel Gorman
2009-03-16 16:48       ` Christoph Lameter
2009-03-16 16:58         ` Mel Gorman
2009-03-16  9:46 ` [PATCH 24/35] Convert gfp_zone() to use a table of precalculated values Mel Gorman
2009-03-16 16:19   ` Christoph Lameter
2009-03-16 16:45     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 25/35] Re-sort GFP flags and fix whitespace alignment for easier reading Mel Gorman
2009-03-16  9:46 ` [PATCH 26/35] Use the per-cpu allocator for orders up to PAGE_ALLOC_COSTLY_ORDER Mel Gorman
2009-03-16 16:26   ` Christoph Lameter
2009-03-16 16:47     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 27/35] Split per-cpu list into one-list-per-migrate-type Mel Gorman
2009-03-16  9:46 ` [PATCH 28/35] Batch free pages from migratetype per-cpu lists Mel Gorman
2009-03-16  9:46 ` Mel Gorman [this message]
2009-03-16 16:30   ` [PATCH 29/35] Do not store the PCP high and batch watermarks in the per-cpu structure Christoph Lameter
2009-03-16  9:46 ` [PATCH 30/35] Skip the PCP list search by counting the order and type of pages on list Mel Gorman
2009-03-16 16:31   ` Christoph Lameter
2009-03-16 16:51     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 31/35] Optimistically check the first page on the PCP free list is suitable Mel Gorman
2009-03-16 16:33   ` Christoph Lameter
2009-03-16 16:52     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 32/35] Inline next_zones_zonelist() of the zonelist scan in the fastpath Mel Gorman
2009-03-16  9:46 ` [PATCH 33/35] Do not merge buddies until they are needed by a high-order allocation or anti-fragmentation Mel Gorman
2009-03-16  9:46 ` [PATCH 34/35] Allow compound pages to be stored on the PCP lists Mel Gorman
2009-03-16 16:47   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 35/35] Allow up to 4MB PCP lists due to compound pages Mel Gorman
2009-03-16 10:40 ` [PATCH 00/35] Cleanup and optimise the page allocator V3 Nick Piggin
2009-03-16 11:19   ` Mel Gorman
2009-03-16 11:33     ` Nick Piggin
2009-03-16 12:02       ` Mel Gorman
2009-03-16 12:25         ` Nick Piggin
2009-03-16 13:32           ` Mel Gorman
2009-03-16 15:53             ` Nick Piggin
2009-03-16 16:56               ` Mel Gorman
2009-03-16 17:05                 ` Nick Piggin
2009-03-18 15:07                   ` Mel Gorman
2009-03-16 11:45 ` Nick Piggin
2009-03-16 12:11   ` Mel Gorman
2009-03-16 12:28     ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1237196790-7268-30-git-send-email-mel@csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=cl@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ming.m.lin@intel.com \
    --cc=npiggin@suse.de \
    --cc=penberg@cs.helsinki.fi \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).