All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Hansen <haveblue@us.ibm.com>
To: Andrew Morton <akpm@zip.com.au>
Cc: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>,
	William Lee Irwin III <wli@holomorphy.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH] per-zone kswapd process
Date: Thu, 12 Sep 2002 20:33:32 -0700	[thread overview]
Message-ID: <3D815C8C.4050000@us.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1119 bytes --]

This patch implements a kswapd process for each memory zone.  The original code 
came from Bill Irwin, but the current VM is quite a bit different from the one 
that he wrote it for, so not much remains.  The current kswapd interface is much 
more simple than before because there is a single waitqueue and there is a 
single place where it is emptied.

kswapd_can_sleep() and kswapd_balance() are simpler now that the extra pgdat 
level of indirection is gone.

Tested on 8-way PIII with highmem off and then 4GB support.  With 4GB support, I 
did 20 parallel greps through a 10GB fileset while some other processes 
allocated and freed 1-2GB chunks of memory.  That gave kswapd a good workout, 
and I observed it running the zone Highmem and zone Normal kswapd threads.  So, 
it survives my torture test.  It also removes more code than it adds.

include/linux/mmzone.h |    2 +
include/linux/swap.h   |    1
mm/page_alloc.c        |   11 +++++-
mm/vmscan.c            |   88 +++++++++++++++++--------------------------------
4 files changed, 42 insertions(+), 60 deletions(-)

-- 
Dave Hansen
haveblue@us.ibm.com




[-- Attachment #2: per-zone-kswapd-2.5.34-mm2-3.patch --]
[-- Type: text/plain, Size: 6092 bytes --]

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.625   -> 1.628  
#	include/linux/mmzone.h	1.19    -> 1.20   
#	include/linux/swap.h	1.57    -> 1.58   
#	     mm/page_alloc.c	1.98    -> 1.101  
#	         mm/vmscan.c	1.102   -> 1.105  
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/09/12	haveblue@elm3b96.(none)	1.626
# add per-zone kswapd
# --------------------------------------------
# 02/09/12	haveblue@elm3b96.(none)	1.627
# fix some wli-indicated formatting bits
# --------------------------------------------
# 02/09/12	haveblue@elm3b96.(none)	1.628
# move waitqueue init to a more appropriate place 
# --------------------------------------------
#
diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h	Thu Sep 12 20:24:39 2002
+++ b/include/linux/mmzone.h	Thu Sep 12 20:24:39 2002
@@ -108,6 +108,8 @@
 	unsigned long		wait_table_size;
 	unsigned long		wait_table_bits;
 
+	wait_queue_head_t       kswapd_wait;	
+	
 	/*
 	 * Discontig memory support fields.
 	 */
diff -Nru a/include/linux/swap.h b/include/linux/swap.h
--- a/include/linux/swap.h	Thu Sep 12 20:24:39 2002
+++ b/include/linux/swap.h	Thu Sep 12 20:24:39 2002
@@ -162,7 +162,6 @@
 extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
-extern wait_queue_head_t kswapd_wait;
 extern int try_to_free_pages(struct zone *, unsigned int, unsigned int);
 
 /* linux/mm/page_io.c */
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	Thu Sep 12 20:24:39 2002
+++ b/mm/page_alloc.c	Thu Sep 12 20:24:39 2002
@@ -345,8 +345,15 @@
 	classzone->need_balance = 1;
 	mb();
 	/* we're somewhat low on memory, failed to find what we needed */
-	if (waitqueue_active(&kswapd_wait))
-		wake_up_interruptible(&kswapd_wait);
+	for (i = 0; zones[i] != NULL; i++) {
+		struct zone *z = zones[i];
+
+		/* We don't want to go swapping on zones that aren't actually
+		 * low.  This accounts for "incremental min" from last loop */
+		if (z->free_pages <= z->pages_low &&
+		    waitqueue_active(&z->kswapd_wait)) 
+			wake_up_interruptible(&z->kswapd_wait);
+	}
 
 	/* Go through the zonelist again, taking __GFP_HIGH into account */
 	min = 1UL << order;
@@ -874,6 +881,8 @@
 		for(i = 0; i < zone->wait_table_size; ++i)
 			init_waitqueue_head(zone->wait_table + i);
 
+		init_waitqueue_head(&zone->kswapd_wait);
+		
 		pgdat->nr_zones = j+1;
 
 		mask = (realsize / zone_balance_ratio[j]);
diff -Nru a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	Thu Sep 12 20:24:39 2002
+++ b/mm/vmscan.c	Thu Sep 12 20:24:39 2002
@@ -713,8 +713,6 @@
 	return 0;
 }
 
-DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
-
 static int check_classzone_need_balance(struct zone *classzone)
 {
 	struct zone *first_classzone;
@@ -728,71 +726,33 @@
 	return 1;
 }
 
-static int kswapd_balance_pgdat(pg_data_t * pgdat)
+static int kswapd_balance_zone(struct zone *zone)
 {
-	int need_more_balance = 0, i;
-	struct zone *zone;
-
-	for (i = pgdat->nr_zones-1; i >= 0; i--) {
-		zone = pgdat->node_zones + i;
+	int need_more_balance = 0;
+	
+	do {
 		cond_resched();
 		if (!zone->need_balance)
-			continue;
+			break;
 		if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
 			zone->need_balance = 0;
 			__set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(HZ);
-			continue;
+			break;
 		}
 		if (check_classzone_need_balance(zone))
 			need_more_balance = 1;
 		else
 			zone->need_balance = 0;
-	}
-
-	return need_more_balance;
-}
-
-static void kswapd_balance(void)
-{
-	int need_more_balance;
-	pg_data_t * pgdat;
-
-	do {
-		need_more_balance = 0;
-		pgdat = pgdat_list;
-		do
-			need_more_balance |= kswapd_balance_pgdat(pgdat);
-		while ((pgdat = pgdat->pgdat_next));
 	} while (need_more_balance);
-}
 
-static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
-{
-	struct zone *zone;
-	int i;
-
-	for (i = pgdat->nr_zones-1; i >= 0; i--) {
-		zone = pgdat->node_zones + i;
-		if (!zone->need_balance)
-			continue;
-		return 0;
-	}
-
-	return 1;
+	return 0;
 }
 
-static int kswapd_can_sleep(void)
+static int kswapd_can_sleep_zone(struct zone *zone)
 {
-	pg_data_t * pgdat;
-
-	pgdat = pgdat_list;
-	do {
-		if (kswapd_can_sleep_pgdat(pgdat))
-			continue;
-		return 0;
-	} while ((pgdat = pgdat->pgdat_next));
-
+	if (zone->need_balance)
+		return 0;	
 	return 1;
 }
 
@@ -809,13 +769,18 @@
  * If there are applications that are active memory-allocators
  * (most normal use), this basically shouldn't matter.
  */
-int kswapd(void *unused)
+int kswapd_zone(void *p)
 {
+	struct zone *zone = (struct zone *)p;
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
+	
+	printk( "kswapd%d starting for %s\n", 
+			zone - zone->zone_pgdat->node_zones, 
+			zone->name);
 
 	daemonize();
-	strcpy(tsk->comm, "kswapd");
+	sprintf(tsk->comm, "kswapd%d", zone - zone->zone_pgdat->node_zones);
 	sigfillset(&tsk->blocked);
 	
 	/*
@@ -839,30 +804,37 @@
 		if (current->flags & PF_FREEZE)
 			refrigerator(PF_IOTHREAD);
 		__set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&kswapd_wait, &wait);
+		add_wait_queue(&zone->kswapd_wait, &wait);
 
 		mb();
-		if (kswapd_can_sleep())
+		if (kswapd_can_sleep_zone(zone))
 			schedule();
 
 		__set_current_state(TASK_RUNNING);
-		remove_wait_queue(&kswapd_wait, &wait);
+		remove_wait_queue(&zone->kswapd_wait, &wait);
 
 		/*
 		 * If we actually get into a low-memory situation,
 		 * the processes needing more memory will wake us
 		 * up on a more timely basis.
 		 */
-		kswapd_balance();
+		kswapd_balance_zone(zone);
 		blk_run_queues();
 	}
 }
 
 static int __init kswapd_init(void)
 {
+	struct zone* zone;
+
 	printk("Starting kswapd\n");
 	swap_setup();
-	kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
+	for_each_zone(zone)
+		if (zone->size)
+			kernel_thread(kswapd_zone, 
+				      zone, 
+				      CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
+	
 	return 0;
 }
 

             reply	other threads:[~2002-09-13  3:30 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-09-13  3:33 Dave Hansen [this message]
2002-09-13  4:06 ` [PATCH] per-zone kswapd process Andrew Morton
2002-09-13  4:06   ` Andrew Morton
2002-09-13  4:59   ` William Lee Irwin III
2002-09-13  4:59     ` William Lee Irwin III
2002-09-13  5:10     ` Martin J. Bligh
2002-09-13  5:10       ` Martin J. Bligh
     [not found]       ` <3D8232DE.9090000@us.ibm.com>
     [not found]         ` <3D823702.8E29AB4F@digeo.com>
     [not found]           ` <3D8251D6.3060704@us.ibm.com>
     [not found]             ` <3D82566B.EB2939D5@digeo.com>
2002-09-13 22:52               ` [PATCH] per-zone^Wnode " Dave Hansen
2002-09-13 23:24                 ` Matthew Dobson
2002-09-13 23:29                 ` Matthew Dobson
2002-09-13 23:46                 ` William Lee Irwin III
2002-09-14  0:02                   ` Andrew Morton
2002-09-14  0:12                     ` William Lee Irwin III
2002-09-14  1:19                       ` Andrew Morton
2002-09-13  5:46     ` [PATCH] per-zone " Andrew Morton
2002-09-13  5:46       ` Andrew Morton
2002-09-13  5:38       ` Martin J. Bligh
2002-09-13  5:38         ` Martin J. Bligh
2002-09-13  6:03         ` Andrew Morton
2002-09-13  6:03           ` Andrew Morton
2002-09-13 13:05     ` Alan Cox
2002-09-13 13:05       ` Alan Cox
2002-09-13 21:30       ` William Lee Irwin III
2002-09-13 21:30         ` William Lee Irwin III
2002-09-18 16:07         ` [PATCH] recognize MAP_LOCKED in mmap() call Hubertus Franke
2002-09-18 16:29           ` Andrew Morton
2002-09-18 16:29             ` Andrew Morton
2002-09-16  5:44     ` [PATCH] per-zone kswapd process Daniel Phillips
2002-09-16  5:44       ` Daniel Phillips
2002-09-16  7:46       ` William Lee Irwin III
2002-09-16  7:46         ` William Lee Irwin III
2002-09-16 15:12         ` Rik van Riel
2002-09-16 15:12           ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D815C8C.4050000@us.ibm.com \
    --to=haveblue@us.ibm.com \
    --cc=Martin.Bligh@us.ibm.com \
    --cc=akpm@zip.com.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.