From: Robert Love <rml@tech9.net>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Valdis.Kletnieks@vt.edu,
piggin@cyberone.com.au, kernel@kolivas.org, linux-mm@kvack.org
Subject: Re: [patch] real-time enhanced page allocator and throttling
Date: 05 Aug 2003 17:39:28 -0700 [thread overview]
Message-ID: <1060130368.4494.166.camel@localhost> (raw)
In-Reply-To: <20030805170954.59385c78.akpm@osdl.org>
On Tue, 2003-08-05 at 17:09, Andrew Morton wrote:
> -void balance_dirty_pages(struct address_space *mapping)
> +static void balance_dirty_pages(struct address_space *mapping)
Hrm. void? I have this as an int in my tree (test2-mm4), did you change
something? The function returns stuff.. I made it a 'static int'
> dirty_exceeded = 1;
> + if (rt_task(current))
> + break;
OK, this was my other option. I think this is better because, as we have
both said, it allows us to wake up pdflush.
Here is what I have right now, now ..
Robert Love
include/linux/sched.h | 4 +++-
kernel/sched.c | 1 -
mm/page-writeback.c | 11 +++++++++--
mm/page_alloc.c | 31 ++++++++++++++++++++++---------
4 files changed, 34 insertions(+), 13 deletions(-)
diff -urN linux-2.6.0-test2-mm4/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.0-test2-mm4/include/linux/sched.h 2003-08-05 14:53:47.000000000 -0700
+++ linux/include/linux/sched.h 2003-08-05 12:38:41.000000000 -0700
@@ -282,7 +282,9 @@
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
-
+
+#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
+
/*
* Some day this will be a full-fledged user tracking system..
*/
diff -urN linux-2.6.0-test2-mm4/kernel/sched.c linux/kernel/sched.c
--- linux-2.6.0-test2-mm4/kernel/sched.c 2003-08-05 14:53:47.000000000 -0700
+++ linux/kernel/sched.c 2003-08-05 12:38:29.000000000 -0700
@@ -199,7 +199,6 @@
#define this_rq() (cpu_rq(smp_processor_id())) /* not __get_cpu_var(runqueues)! */
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
/*
* Default context-switch locking:
diff -urN linux-2.6.0-test2-mm4/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.6.0-test2-mm4/mm/page_alloc.c 2003-08-05 14:48:38.000000000 -0700
+++ linux/mm/page_alloc.c 2003-08-05 17:22:30.000000000 -0700
@@ -518,7 +518,8 @@
*
* Herein lies the mysterious "incremental min". That's the
*
- * min += z->pages_low;
+ * local_low = z->pages_low;
+ * min += local_low;
*
* thing. The intent here is to provide additional protection to low zones for
* allocation requests which _could_ use higher zones. So a GFP_HIGHMEM
@@ -536,10 +537,11 @@
unsigned long min;
struct zone **zones, *classzone;
struct page *page;
+ struct reclaim_state reclaim_state;
+ struct task_struct *p = current;
int i;
int cold;
int do_retry;
- struct reclaim_state reclaim_state;
if (wait)
might_sleep();
@@ -557,8 +559,17 @@
min = 1UL << order;
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
+ unsigned long local_low;
+
+ /*
+ * This is the fabled 'incremental min'. We let real-time tasks
+ * dip their real-time paws a little deeper into reserves.
+ */
+ local_low = z->pages_low;
+ if (rt_task(p))
+ local_low >>= 1;
+ min += local_low;
- min += z->pages_low;
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
@@ -581,6 +592,8 @@
local_min = z->pages_min;
if (gfp_mask & __GFP_HIGH)
local_min >>= 2;
+ if (rt_task(p))
+ local_min >>= 1;
min += local_min;
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
@@ -594,7 +607,7 @@
/* here we're in the low on memory slow path */
rebalance:
- if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+ if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
/* go through the zonelist yet again, ignoring mins */
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
@@ -610,14 +623,14 @@
if (!wait)
goto nopage;
- current->flags |= PF_MEMALLOC;
+ p->flags |= PF_MEMALLOC;
reclaim_state.reclaimed_slab = 0;
- current->reclaim_state = &reclaim_state;
+ p->reclaim_state = &reclaim_state;
try_to_free_pages(classzone, gfp_mask, order);
- current->reclaim_state = NULL;
- current->flags &= ~PF_MEMALLOC;
+ p->reclaim_state = NULL;
+ p->flags &= ~PF_MEMALLOC;
/* go through the zonelist yet one more time */
min = 1UL << order;
@@ -657,7 +670,7 @@
if (!(gfp_mask & __GFP_NOWARN)) {
printk("%s: page allocation failure."
" order:%d, mode:0x%x\n",
- current->comm, order, gfp_mask);
+ p->comm, order, gfp_mask);
}
return NULL;
got_pg:
diff -urN linux-2.6.0-test2-mm4/mm/page-writeback.c linux/mm/page-writeback.c
--- linux-2.6.0-test2-mm4/mm/page-writeback.c 2003-08-05 14:53:47.000000000 -0700
+++ linux/mm/page-writeback.c 2003-08-05 17:35:36.095648523 -0700
@@ -145,7 +145,7 @@
* If we're over `background_thresh' then pdflush is woken to perform some
* writeout.
*/
-int balance_dirty_pages(struct address_space *mapping)
+static int balance_dirty_pages(struct address_space *mapping)
{
struct page_state ps;
long nr_reclaimable;
@@ -169,9 +169,16 @@
nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
break;
-
dirty_exceeded = 1;
+ /*
+ * We do not want to throttle a real-time task here. Ever.
+ * But we do want to update the accounting and possibly poke
+ * pdflush below.
+ */
+ if (rt_task(current))
+ break;
+
/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
* Unstable writes are a feature of certain networked
* filesystems (i.e. NFS) in which data may have been
WARNING: multiple messages have this Message-ID (diff)
From: Robert Love <rml@tech9.net>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Valdis.Kletnieks@vt.edu,
piggin@cyberone.com.au, kernel@kolivas.org, linux-mm@kvack.org
Subject: Re: [patch] real-time enhanced page allocator and throttling
Date: 05 Aug 2003 17:39:28 -0700 [thread overview]
Message-ID: <1060130368.4494.166.camel@localhost> (raw)
In-Reply-To: <20030805170954.59385c78.akpm@osdl.org>
On Tue, 2003-08-05 at 17:09, Andrew Morton wrote:
> -void balance_dirty_pages(struct address_space *mapping)
> +static void balance_dirty_pages(struct address_space *mapping)
Hrm. void? I have this as an int in my tree (test2-mm4), did you change
something? The function returns stuff.. I made it a 'static int'
> dirty_exceeded = 1;
> + if (rt_task(current))
> + break;
OK, this was my other option. I think this is better because, as we have
both said, it allows us to wake up pdflush.
Here is what I have right now, now ..
Robert Love
include/linux/sched.h | 4 +++-
kernel/sched.c | 1 -
mm/page-writeback.c | 11 +++++++++--
mm/page_alloc.c | 31 ++++++++++++++++++++++---------
4 files changed, 34 insertions(+), 13 deletions(-)
diff -urN linux-2.6.0-test2-mm4/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.0-test2-mm4/include/linux/sched.h 2003-08-05 14:53:47.000000000 -0700
+++ linux/include/linux/sched.h 2003-08-05 12:38:41.000000000 -0700
@@ -282,7 +282,9 @@
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
-
+
+#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
+
/*
* Some day this will be a full-fledged user tracking system..
*/
diff -urN linux-2.6.0-test2-mm4/kernel/sched.c linux/kernel/sched.c
--- linux-2.6.0-test2-mm4/kernel/sched.c 2003-08-05 14:53:47.000000000 -0700
+++ linux/kernel/sched.c 2003-08-05 12:38:29.000000000 -0700
@@ -199,7 +199,6 @@
#define this_rq() (cpu_rq(smp_processor_id())) /* not __get_cpu_var(runqueues)! */
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
/*
* Default context-switch locking:
diff -urN linux-2.6.0-test2-mm4/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.6.0-test2-mm4/mm/page_alloc.c 2003-08-05 14:48:38.000000000 -0700
+++ linux/mm/page_alloc.c 2003-08-05 17:22:30.000000000 -0700
@@ -518,7 +518,8 @@
*
* Herein lies the mysterious "incremental min". That's the
*
- * min += z->pages_low;
+ * local_low = z->pages_low;
+ * min += local_low;
*
* thing. The intent here is to provide additional protection to low zones for
* allocation requests which _could_ use higher zones. So a GFP_HIGHMEM
@@ -536,10 +537,11 @@
unsigned long min;
struct zone **zones, *classzone;
struct page *page;
+ struct reclaim_state reclaim_state;
+ struct task_struct *p = current;
int i;
int cold;
int do_retry;
- struct reclaim_state reclaim_state;
if (wait)
might_sleep();
@@ -557,8 +559,17 @@
min = 1UL << order;
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
+ unsigned long local_low;
+
+ /*
+ * This is the fabled 'incremental min'. We let real-time tasks
+ * dip their real-time paws a little deeper into reserves.
+ */
+ local_low = z->pages_low;
+ if (rt_task(p))
+ local_low >>= 1;
+ min += local_low;
- min += z->pages_low;
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
@@ -581,6 +592,8 @@
local_min = z->pages_min;
if (gfp_mask & __GFP_HIGH)
local_min >>= 2;
+ if (rt_task(p))
+ local_min >>= 1;
min += local_min;
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
@@ -594,7 +607,7 @@
/* here we're in the low on memory slow path */
rebalance:
- if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+ if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
/* go through the zonelist yet again, ignoring mins */
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
@@ -610,14 +623,14 @@
if (!wait)
goto nopage;
- current->flags |= PF_MEMALLOC;
+ p->flags |= PF_MEMALLOC;
reclaim_state.reclaimed_slab = 0;
- current->reclaim_state = &reclaim_state;
+ p->reclaim_state = &reclaim_state;
try_to_free_pages(classzone, gfp_mask, order);
- current->reclaim_state = NULL;
- current->flags &= ~PF_MEMALLOC;
+ p->reclaim_state = NULL;
+ p->flags &= ~PF_MEMALLOC;
/* go through the zonelist yet one more time */
min = 1UL << order;
@@ -657,7 +670,7 @@
if (!(gfp_mask & __GFP_NOWARN)) {
printk("%s: page allocation failure."
" order:%d, mode:0x%x\n",
- current->comm, order, gfp_mask);
+ p->comm, order, gfp_mask);
}
return NULL;
got_pg:
diff -urN linux-2.6.0-test2-mm4/mm/page-writeback.c linux/mm/page-writeback.c
--- linux-2.6.0-test2-mm4/mm/page-writeback.c 2003-08-05 14:53:47.000000000 -0700
+++ linux/mm/page-writeback.c 2003-08-05 17:35:36.095648523 -0700
@@ -145,7 +145,7 @@
* If we're over `background_thresh' then pdflush is woken to perform some
* writeout.
*/
-int balance_dirty_pages(struct address_space *mapping)
+static int balance_dirty_pages(struct address_space *mapping)
{
struct page_state ps;
long nr_reclaimable;
@@ -169,9 +169,16 @@
nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
break;
-
dirty_exceeded = 1;
+ /*
+ * We do not want to throttle a real-time task here. Ever.
+ * But we do want to update the accounting and possibly poke
+ * pdflush below.
+ */
+ if (rt_task(current))
+ break;
+
/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
* Unstable writes are a feature of certain networked
* filesystems (i.e. NFS) in which data may have been
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
next prev parent reply other threads:[~2003-08-06 0:39 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-08-05 22:13 [patch] real-time enhanced page allocator and throttling Robert Love
2003-08-05 22:13 ` Robert Love
2003-08-06 0:09 ` Andrew Morton
2003-08-06 0:09 ` Andrew Morton
2003-08-06 0:39 ` Robert Love [this message]
2003-08-06 0:39 ` Robert Love
2003-08-06 0:45 ` Andrew Morton
2003-08-06 0:45 ` Andrew Morton
2003-08-06 3:58 ` Robert Love
2003-08-06 3:58 ` Robert Love
2003-08-06 8:41 ` Andrew Morton
2003-08-06 8:41 ` Andrew Morton
2003-08-06 17:01 ` Robert Love
2003-08-06 17:01 ` Robert Love
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1060130368.4494.166.camel@localhost \
--to=rml@tech9.net \
--cc=Valdis.Kletnieks@vt.edu \
--cc=akpm@osdl.org \
--cc=kernel@kolivas.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=piggin@cyberone.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.