Re: [patch] real-time enhanced page allocator and throttling

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Robert Love <rml@tech9.net>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Valdis.Kletnieks@vt.edu,
	piggin@cyberone.com.au, kernel@kolivas.org, linux-mm@kvack.org
Subject: Re: [patch] real-time enhanced page allocator and throttling
Date: 05 Aug 2003 17:39:28 -0700	[thread overview]
Message-ID: <1060130368.4494.166.camel@localhost> (raw)
In-Reply-To: <20030805170954.59385c78.akpm@osdl.org>

On Tue, 2003-08-05 at 17:09, Andrew Morton wrote:

> -void balance_dirty_pages(struct address_space *mapping)
> +static void balance_dirty_pages(struct address_space *mapping)

Hrm. void? I have this as an int in my tree (test2-mm4), did you change
something? The function returns stuff.. I made it a 'static int'

>  		dirty_exceeded = 1;
> +		if (rt_task(current))
> +			break;

OK, this was my other option. I think this is better because, as we have
both said, it allows us to wake up pdflush.

Here is what I have right now, now ..

	Robert Love


 include/linux/sched.h |    4 +++-
 kernel/sched.c        |    1 -
 mm/page-writeback.c   |   11 +++++++++--
 mm/page_alloc.c       |   31 ++++++++++++++++++++++---------
 4 files changed, 34 insertions(+), 13 deletions(-)


diff -urN linux-2.6.0-test2-mm4/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.0-test2-mm4/include/linux/sched.h	2003-08-05 14:53:47.000000000 -0700
+++ linux/include/linux/sched.h	2003-08-05 12:38:41.000000000 -0700
@@ -282,7 +282,9 @@
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 
 #define MAX_PRIO		(MAX_RT_PRIO + 40)
- 
+
+#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
+
 /*
  * Some day this will be a full-fledged user tracking system..
  */
diff -urN linux-2.6.0-test2-mm4/kernel/sched.c linux/kernel/sched.c
--- linux-2.6.0-test2-mm4/kernel/sched.c	2003-08-05 14:53:47.000000000 -0700
+++ linux/kernel/sched.c	2003-08-05 12:38:29.000000000 -0700
@@ -199,7 +199,6 @@
 #define this_rq()		(cpu_rq(smp_processor_id())) /* not __get_cpu_var(runqueues)! */
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
 
 /*
  * Default context-switch locking:
diff -urN linux-2.6.0-test2-mm4/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.6.0-test2-mm4/mm/page_alloc.c	2003-08-05 14:48:38.000000000 -0700
+++ linux/mm/page_alloc.c	2003-08-05 17:22:30.000000000 -0700
@@ -518,7 +518,8 @@
  *
  * Herein lies the mysterious "incremental min".  That's the
  *
- *	min += z->pages_low;
+ *	local_low = z->pages_low;
+ *	min += local_low;
  *
  * thing.  The intent here is to provide additional protection to low zones for
  * allocation requests which _could_ use higher zones.  So a GFP_HIGHMEM
@@ -536,10 +537,11 @@
 	unsigned long min;
 	struct zone **zones, *classzone;
 	struct page *page;
+	struct reclaim_state reclaim_state;
+	struct task_struct *p = current;
 	int i;
 	int cold;
 	int do_retry;
-	struct reclaim_state reclaim_state;
 
 	if (wait)
 		might_sleep();
@@ -557,8 +559,17 @@
 	min = 1UL << order;
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *z = zones[i];
+		unsigned long local_low;
+
+		/*
+		 * This is the fabled 'incremental min'. We let real-time tasks
+		 * dip their real-time paws a little deeper into reserves.
+		 */
+		local_low = z->pages_low;
+		if (rt_task(p))
+			local_low >>= 1;
+		min += local_low;
 
-		min += z->pages_low;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
@@ -581,6 +592,8 @@
 		local_min = z->pages_min;
 		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
+		if (rt_task(p))
+			local_min >>= 1;
 		min += local_min;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
@@ -594,7 +607,7 @@
 	/* here we're in the low on memory slow path */
 
 rebalance:
-	if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+	if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
 		/* go through the zonelist yet again, ignoring mins */
 		for (i = 0; zones[i] != NULL; i++) {
 			struct zone *z = zones[i];
@@ -610,14 +623,14 @@
 	if (!wait)
 		goto nopage;
 
-	current->flags |= PF_MEMALLOC;
+	p->flags |= PF_MEMALLOC;
 	reclaim_state.reclaimed_slab = 0;
-	current->reclaim_state = &reclaim_state;
+	p->reclaim_state = &reclaim_state;
 
 	try_to_free_pages(classzone, gfp_mask, order);
 
-	current->reclaim_state = NULL;
-	current->flags &= ~PF_MEMALLOC;
+	p->reclaim_state = NULL;
+	p->flags &= ~PF_MEMALLOC;
 
 	/* go through the zonelist yet one more time */
 	min = 1UL << order;
@@ -657,7 +670,7 @@
 	if (!(gfp_mask & __GFP_NOWARN)) {
 		printk("%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
-			current->comm, order, gfp_mask);
+			p->comm, order, gfp_mask);
 	}
 	return NULL;
 got_pg:
diff -urN linux-2.6.0-test2-mm4/mm/page-writeback.c linux/mm/page-writeback.c
--- linux-2.6.0-test2-mm4/mm/page-writeback.c	2003-08-05 14:53:47.000000000 -0700
+++ linux/mm/page-writeback.c	2003-08-05 17:35:36.095648523 -0700
@@ -145,7 +145,7 @@
  * If we're over `background_thresh' then pdflush is woken to perform some
  * writeout.
  */
-int balance_dirty_pages(struct address_space *mapping)
+static int balance_dirty_pages(struct address_space *mapping)
 {
 	struct page_state ps;
 	long nr_reclaimable;
@@ -169,9 +169,16 @@
 		nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
 		if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
 			break;
-
 		dirty_exceeded = 1;
 
+		/*
+		 * We do not want to throttle a real-time task here. Ever.
+		 * But we do want to update the accounting and possibly poke
+		 * pdflush below.
+		 */
+		if (rt_task(current))
+			break;
+
 		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
 		 * Unstable writes are a feature of certain networked
 		 * filesystems (i.e. NFS) in which data may have been

WARNING: multiple messages have this Message-ID (diff)

From: Robert Love <rml@tech9.net>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Valdis.Kletnieks@vt.edu,
	piggin@cyberone.com.au, kernel@kolivas.org, linux-mm@kvack.org
Subject: Re: [patch] real-time enhanced page allocator and throttling
Date: 05 Aug 2003 17:39:28 -0700	[thread overview]
Message-ID: <1060130368.4494.166.camel@localhost> (raw)
In-Reply-To: <20030805170954.59385c78.akpm@osdl.org>

On Tue, 2003-08-05 at 17:09, Andrew Morton wrote:

> -void balance_dirty_pages(struct address_space *mapping)
> +static void balance_dirty_pages(struct address_space *mapping)

Hrm. void? I have this as an int in my tree (test2-mm4), did you change
something? The function returns stuff.. I made it a 'static int'

>  		dirty_exceeded = 1;
> +		if (rt_task(current))
> +			break;

OK, this was my other option. I think this is better because, as we have
both said, it allows us to wake up pdflush.

Here is what I have right now, now ..

	Robert Love


 include/linux/sched.h |    4 +++-
 kernel/sched.c        |    1 -
 mm/page-writeback.c   |   11 +++++++++--
 mm/page_alloc.c       |   31 ++++++++++++++++++++++---------
 4 files changed, 34 insertions(+), 13 deletions(-)


diff -urN linux-2.6.0-test2-mm4/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.0-test2-mm4/include/linux/sched.h	2003-08-05 14:53:47.000000000 -0700
+++ linux/include/linux/sched.h	2003-08-05 12:38:41.000000000 -0700
@@ -282,7 +282,9 @@
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 
 #define MAX_PRIO		(MAX_RT_PRIO + 40)
- 
+
+#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
+
 /*
  * Some day this will be a full-fledged user tracking system..
  */
diff -urN linux-2.6.0-test2-mm4/kernel/sched.c linux/kernel/sched.c
--- linux-2.6.0-test2-mm4/kernel/sched.c	2003-08-05 14:53:47.000000000 -0700
+++ linux/kernel/sched.c	2003-08-05 12:38:29.000000000 -0700
@@ -199,7 +199,6 @@
 #define this_rq()		(cpu_rq(smp_processor_id())) /* not __get_cpu_var(runqueues)! */
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
 
 /*
  * Default context-switch locking:
diff -urN linux-2.6.0-test2-mm4/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.6.0-test2-mm4/mm/page_alloc.c	2003-08-05 14:48:38.000000000 -0700
+++ linux/mm/page_alloc.c	2003-08-05 17:22:30.000000000 -0700
@@ -518,7 +518,8 @@
  *
  * Herein lies the mysterious "incremental min".  That's the
  *
- *	min += z->pages_low;
+ *	local_low = z->pages_low;
+ *	min += local_low;
  *
  * thing.  The intent here is to provide additional protection to low zones for
  * allocation requests which _could_ use higher zones.  So a GFP_HIGHMEM
@@ -536,10 +537,11 @@
 	unsigned long min;
 	struct zone **zones, *classzone;
 	struct page *page;
+	struct reclaim_state reclaim_state;
+	struct task_struct *p = current;
 	int i;
 	int cold;
 	int do_retry;
-	struct reclaim_state reclaim_state;
 
 	if (wait)
 		might_sleep();
@@ -557,8 +559,17 @@
 	min = 1UL << order;
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *z = zones[i];
+		unsigned long local_low;
+
+		/*
+		 * This is the fabled 'incremental min'. We let real-time tasks
+		 * dip their real-time paws a little deeper into reserves.
+		 */
+		local_low = z->pages_low;
+		if (rt_task(p))
+			local_low >>= 1;
+		min += local_low;
 
-		min += z->pages_low;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
@@ -581,6 +592,8 @@
 		local_min = z->pages_min;
 		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
+		if (rt_task(p))
+			local_min >>= 1;
 		min += local_min;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
@@ -594,7 +607,7 @@
 	/* here we're in the low on memory slow path */
 
 rebalance:
-	if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+	if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
 		/* go through the zonelist yet again, ignoring mins */
 		for (i = 0; zones[i] != NULL; i++) {
 			struct zone *z = zones[i];
@@ -610,14 +623,14 @@
 	if (!wait)
 		goto nopage;
 
-	current->flags |= PF_MEMALLOC;
+	p->flags |= PF_MEMALLOC;
 	reclaim_state.reclaimed_slab = 0;
-	current->reclaim_state = &reclaim_state;
+	p->reclaim_state = &reclaim_state;
 
 	try_to_free_pages(classzone, gfp_mask, order);
 
-	current->reclaim_state = NULL;
-	current->flags &= ~PF_MEMALLOC;
+	p->reclaim_state = NULL;
+	p->flags &= ~PF_MEMALLOC;
 
 	/* go through the zonelist yet one more time */
 	min = 1UL << order;
@@ -657,7 +670,7 @@
 	if (!(gfp_mask & __GFP_NOWARN)) {
 		printk("%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
-			current->comm, order, gfp_mask);
+			p->comm, order, gfp_mask);
 	}
 	return NULL;
 got_pg:
diff -urN linux-2.6.0-test2-mm4/mm/page-writeback.c linux/mm/page-writeback.c
--- linux-2.6.0-test2-mm4/mm/page-writeback.c	2003-08-05 14:53:47.000000000 -0700
+++ linux/mm/page-writeback.c	2003-08-05 17:35:36.095648523 -0700
@@ -145,7 +145,7 @@
  * If we're over `background_thresh' then pdflush is woken to perform some
  * writeout.
  */
-int balance_dirty_pages(struct address_space *mapping)
+static int balance_dirty_pages(struct address_space *mapping)
 {
 	struct page_state ps;
 	long nr_reclaimable;
@@ -169,9 +169,16 @@
 		nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
 		if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
 			break;
-
 		dirty_exceeded = 1;
 
+		/*
+		 * We do not want to throttle a real-time task here. Ever.
+		 * But we do want to update the accounting and possibly poke
+		 * pdflush below.
+		 */
+		if (rt_task(current))
+			break;
+
 		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
 		 * Unstable writes are a feature of certain networked
 		 * filesystems (i.e. NFS) in which data may have been


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

next prev parent reply	other threads:[~2003-08-06  0:39 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-08-05 22:13 [patch] real-time enhanced page allocator and throttling Robert Love
2003-08-05 22:13 ` Robert Love
2003-08-06  0:09 ` Andrew Morton
2003-08-06  0:09   ` Andrew Morton
2003-08-06  0:39   ` Robert Love [this message]
2003-08-06  0:39     ` Robert Love
2003-08-06  0:45     ` Andrew Morton
2003-08-06  0:45       ` Andrew Morton
2003-08-06  3:58       ` Robert Love
2003-08-06  3:58         ` Robert Love
2003-08-06  8:41         ` Andrew Morton
2003-08-06  8:41           ` Andrew Morton
2003-08-06 17:01           ` Robert Love
2003-08-06 17:01             ` Robert Love

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1060130368.4494.166.camel@localhost \
    --to=rml@tech9.net \
    --cc=Valdis.Kletnieks@vt.edu \
    --cc=akpm@osdl.org \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=piggin@cyberone.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.