From: Wu Fengguang <fengguang.wu@intel.com>
To: linux-fsdevel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Wu Fengguang <fengguang.wu@intel.com>, Jan Kara <jack@suse.cz>,
Christoph Hellwig <hch@lst.de>,
Dave Chinner <david@fromorbit.com>,
Greg Thelen <gthelen@google.com>,
Minchan Kim <minchan.kim@gmail.com>,
Vivek Goyal <vgoyal@redhat.com>,
Andrea Righi <arighi@develer.com>, linux-mm <linux-mm@kvack.org>,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 4/5] writeback: per task dirty rate limit
Date: Sat, 06 Aug 2011 16:44:51 +0800 [thread overview]
Message-ID: <20110806094527.002914580@intel.com> (raw)
In-Reply-To: 20110806084447.388624428@intel.com
[-- Attachment #1: per-task-ratelimit --]
[-- Type: text/plain, Size: 7408 bytes --]
Add two fields to task_struct.
1) account dirtied pages in the individual tasks, for accuracy
2) per-task balance_dirty_pages() call intervals, for flexibility
The balance_dirty_pages() call interval (ie. nr_dirtied_pause) will
scale near-sqrt to the safety gap between dirty pages and threshold.
XXX: The main problem of per-task nr_dirtied is, if 10k tasks start
dirtying pages at exactly the same time, each task will be assigned a
large initial nr_dirtied_pause, so that the dirty threshold will be
exceeded long before each task reached its nr_dirtied_pause and hence
call balance_dirty_pages().
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
include/linux/sched.h | 7 ++
mm/memory_hotplug.c | 3 -
mm/page-writeback.c | 106 +++++++++-------------------------------
3 files changed, 32 insertions(+), 84 deletions(-)
--- linux-next.orig/include/linux/sched.h 2011-08-05 15:36:23.000000000 +0800
+++ linux-next/include/linux/sched.h 2011-08-05 15:39:52.000000000 +0800
@@ -1525,6 +1525,13 @@ struct task_struct {
int make_it_fail;
#endif
struct prop_local_single dirties;
+ /*
+ * when (nr_dirtied >= nr_dirtied_pause), it's time to call
+ * balance_dirty_pages() for some dirty throttling pause
+ */
+ int nr_dirtied;
+ int nr_dirtied_pause;
+
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
--- linux-next.orig/mm/page-writeback.c 2011-08-05 15:39:48.000000000 +0800
+++ linux-next/mm/page-writeback.c 2011-08-05 15:39:52.000000000 +0800
@@ -48,26 +48,6 @@
#define BANDWIDTH_CALC_SHIFT 10
-/*
- * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
- * will look to see if it needs to force writeback or throttling.
- */
-static long ratelimit_pages = 32;
-
-/*
- * When balance_dirty_pages decides that the caller needs to perform some
- * non-background writeback, this is how many pages it will attempt to write.
- * It should be somewhat larger than dirtied pages to ensure that reasonably
- * large amounts of I/O are submitted.
- */
-static inline long sync_writeback_pages(unsigned long dirtied)
-{
- if (dirtied < ratelimit_pages)
- dirtied = ratelimit_pages;
-
- return dirtied + dirtied / 2;
-}
-
/* The following parameters are exported via /proc/sys/vm */
/*
@@ -868,6 +848,23 @@ static void bdi_update_bandwidth(struct
}
/*
+ * After a task dirtied this many pages, balance_dirty_pages_ratelimited_nr()
+ * will look to see if it needs to start dirty throttling.
+ *
+ * If ratelimit_pages is too low then big NUMA machines will call the expensive
+ * global_page_state() too often. So scale it near-sqrt to the safety margin
+ * (the number of pages we may dirty without exceeding the dirty limits).
+ */
+static unsigned long ratelimit_pages(unsigned long dirty,
+ unsigned long thresh)
+{
+ if (thresh > dirty)
+ return 1UL << (ilog2(thresh - dirty) >> 1);
+
+ return 1;
+}
+
+/*
* balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force
* the caller to perform writeback if the system is over `vm_dirty_ratio'.
@@ -1008,6 +1005,9 @@ static void balance_dirty_pages(struct a
if (clear_dirty_exceeded && bdi->dirty_exceeded)
bdi->dirty_exceeded = 0;
+ current->nr_dirtied = 0;
+ current->nr_dirtied_pause = ratelimit_pages(nr_dirty, dirty_thresh);
+
if (writeback_in_progress(bdi))
return;
@@ -1034,8 +1034,6 @@ void set_page_dirty_balance(struct page
}
}
-static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
-
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
@@ -1055,30 +1053,17 @@ void balance_dirty_pages_ratelimited_nr(
{
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long ratelimit;
- unsigned long *p;
if (!bdi_cap_account_dirty(bdi))
return;
- ratelimit = ratelimit_pages;
- if (mapping->backing_dev_info->dirty_exceeded)
+ ratelimit = current->nr_dirtied_pause;
+ if (bdi->dirty_exceeded)
ratelimit = 8;
- /*
- * Check the rate limiting. Also, we do not want to throttle real-time
- * tasks in balance_dirty_pages(). Period.
- */
- preempt_disable();
- p = &__get_cpu_var(bdp_ratelimits);
- *p += nr_pages_dirtied;
- if (unlikely(*p >= ratelimit)) {
- ratelimit = sync_writeback_pages(*p);
- *p = 0;
- preempt_enable();
- balance_dirty_pages(mapping, ratelimit);
- return;
- }
- preempt_enable();
+ current->nr_dirtied += nr_pages_dirtied;
+ if (unlikely(current->nr_dirtied >= ratelimit))
+ balance_dirty_pages(mapping, current->nr_dirtied);
}
EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
@@ -1166,44 +1151,6 @@ void laptop_sync_completion(void)
#endif
/*
- * If ratelimit_pages is too high then we can get into dirty-data overload
- * if a large number of processes all perform writes at the same time.
- * If it is too low then SMP machines will call the (expensive)
- * get_writeback_state too often.
- *
- * Here we set ratelimit_pages to a level which ensures that when all CPUs are
- * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
- * thresholds before writeback cuts in.
- *
- * But the limit should not be set too high. Because it also controls the
- * amount of memory which the balance_dirty_pages() caller has to write back.
- * If this is too large then the caller will block on the IO queue all the
- * time. So limit it to four megabytes - the balance_dirty_pages() caller
- * will write six megabyte chunks, max.
- */
-
-void writeback_set_ratelimit(void)
-{
- ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
- if (ratelimit_pages < 16)
- ratelimit_pages = 16;
- if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
- ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
-}
-
-static int __cpuinit
-ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
-{
- writeback_set_ratelimit();
- return NOTIFY_DONE;
-}
-
-static struct notifier_block __cpuinitdata ratelimit_nb = {
- .notifier_call = ratelimit_handler,
- .next = NULL,
-};
-
-/*
* Called early on to tune the page writeback dirty limits.
*
* We used to scale dirty pages according to how total memory
@@ -1225,9 +1172,6 @@ void __init page_writeback_init(void)
{
int shift;
- writeback_set_ratelimit();
- register_cpu_notifier(&ratelimit_nb);
-
shift = calc_period_shift();
prop_descriptor_init(&vm_completions, shift);
prop_descriptor_init(&vm_dirties, shift);
--- linux-next.orig/mm/memory_hotplug.c 2011-08-05 15:36:23.000000000 +0800
+++ linux-next/mm/memory_hotplug.c 2011-08-05 15:39:52.000000000 +0800
@@ -527,8 +527,6 @@ int __ref online_pages(unsigned long pfn
vm_total_pages = nr_free_pagecache_pages();
- writeback_set_ratelimit();
-
if (onlined_pages)
memory_notify(MEM_ONLINE, &arg);
unlock_memory_hotplug();
@@ -970,7 +968,6 @@ repeat:
}
vm_total_pages = nr_free_pagecache_pages();
- writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg);
unlock_memory_hotplug();
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2011-08-06 12:20 UTC|newest]
Thread overview: 128+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-06 8:44 [PATCH 0/5] IO-less dirty throttling v8 Wu Fengguang
2011-08-06 8:44 ` [PATCH 1/5] writeback: account per-bdi accumulated dirtied pages Wu Fengguang
2011-08-06 8:44 ` [PATCH 2/5] writeback: dirty position control Wu Fengguang
2011-08-08 13:46 ` Peter Zijlstra
2011-08-08 14:11 ` Wu Fengguang
2011-08-08 14:31 ` Peter Zijlstra
2011-08-08 22:47 ` Wu Fengguang
2011-08-09 9:31 ` Peter Zijlstra
2011-08-10 12:28 ` Wu Fengguang
2011-08-08 14:41 ` Peter Zijlstra
2011-08-08 23:05 ` Wu Fengguang
2011-08-09 10:32 ` Peter Zijlstra
2011-08-09 17:20 ` Peter Zijlstra
2011-08-10 22:34 ` Jan Kara
2011-08-11 2:29 ` Wu Fengguang
2011-08-11 11:14 ` Jan Kara
2011-08-16 8:35 ` Wu Fengguang
2011-08-12 13:19 ` Wu Fengguang
2011-08-10 21:40 ` Vivek Goyal
2011-08-16 8:55 ` Wu Fengguang
2011-08-11 22:56 ` Peter Zijlstra
2011-08-12 2:43 ` Wu Fengguang
2011-08-12 3:18 ` Wu Fengguang
2011-08-12 5:45 ` Wu Fengguang
2011-08-12 9:45 ` Peter Zijlstra
2011-08-12 11:07 ` Wu Fengguang
2011-08-12 12:17 ` Peter Zijlstra
2011-08-12 9:47 ` Peter Zijlstra
2011-08-12 11:11 ` Wu Fengguang
2011-08-12 12:54 ` Peter Zijlstra
2011-08-12 12:59 ` Wu Fengguang
2011-08-12 13:08 ` Peter Zijlstra
2011-08-12 13:04 ` Peter Zijlstra
2011-08-12 14:20 ` Wu Fengguang
2011-08-22 15:38 ` Peter Zijlstra
2011-08-23 3:40 ` Wu Fengguang
2011-08-23 10:01 ` Peter Zijlstra
2011-08-23 14:15 ` Wu Fengguang
2011-08-23 17:47 ` Vivek Goyal
2011-08-24 0:12 ` Wu Fengguang
2011-08-24 16:12 ` Peter Zijlstra
2011-08-26 0:18 ` Wu Fengguang
2011-08-26 9:04 ` Peter Zijlstra
2011-08-26 10:04 ` Wu Fengguang
2011-08-26 10:42 ` Peter Zijlstra
2011-08-26 10:52 ` Wu Fengguang
2011-08-26 11:26 ` Wu Fengguang
2011-08-26 12:11 ` Peter Zijlstra
2011-08-26 12:20 ` Wu Fengguang
2011-08-26 13:13 ` Wu Fengguang
2011-08-26 13:18 ` Peter Zijlstra
2011-08-26 13:24 ` Wu Fengguang
2011-08-24 18:00 ` Vivek Goyal
2011-08-25 3:19 ` Wu Fengguang
2011-08-25 22:20 ` Vivek Goyal
2011-08-26 1:56 ` Wu Fengguang
2011-08-26 8:56 ` Peter Zijlstra
2011-08-26 9:53 ` Wu Fengguang
2011-08-29 13:12 ` Peter Zijlstra
2011-08-29 13:37 ` Wu Fengguang
2011-09-02 12:16 ` Peter Zijlstra
2011-09-06 12:40 ` Peter Zijlstra
2011-08-24 15:57 ` Peter Zijlstra
2011-08-25 5:30 ` Wu Fengguang
2011-08-23 14:36 ` Vivek Goyal
2011-08-09 2:08 ` Vivek Goyal
2011-08-16 8:59 ` Wu Fengguang
2011-08-06 8:44 ` [PATCH 3/5] writeback: dirty rate control Wu Fengguang
2011-08-09 14:54 ` Vivek Goyal
2011-08-11 3:42 ` Wu Fengguang
2011-08-09 14:57 ` Peter Zijlstra
2011-08-10 11:07 ` Wu Fengguang
2011-08-10 16:17 ` Peter Zijlstra
2011-08-15 14:08 ` Wu Fengguang
2011-08-09 15:50 ` Vivek Goyal
2011-08-09 16:16 ` Peter Zijlstra
2011-08-09 16:19 ` Peter Zijlstra
2011-08-10 14:07 ` Wu Fengguang
2011-08-10 14:00 ` Wu Fengguang
2011-08-10 17:10 ` Peter Zijlstra
2011-08-15 14:11 ` Wu Fengguang
2011-08-09 16:56 ` Peter Zijlstra
2011-08-10 14:10 ` Wu Fengguang
2011-08-09 17:02 ` Peter Zijlstra
2011-08-10 14:15 ` Wu Fengguang
2011-08-06 8:44 ` Wu Fengguang [this message]
2011-08-06 14:35 ` [PATCH 4/5] writeback: per task dirty rate limit Andrea Righi
2011-08-07 6:19 ` Wu Fengguang
2011-08-08 13:47 ` Peter Zijlstra
2011-08-08 14:21 ` Wu Fengguang
2011-08-08 23:32 ` Wu Fengguang
2011-08-08 14:23 ` Wu Fengguang
2011-08-08 14:26 ` Peter Zijlstra
2011-08-08 22:38 ` Wu Fengguang
2011-08-13 16:28 ` Andrea Righi
2011-08-15 14:21 ` Wu Fengguang
2011-08-15 14:26 ` Andrea Righi
2011-08-09 17:46 ` Vivek Goyal
2011-08-10 3:29 ` Wu Fengguang
2011-08-10 18:18 ` Vivek Goyal
2011-08-11 0:55 ` Wu Fengguang
2011-08-09 18:35 ` Peter Zijlstra
2011-08-10 3:40 ` Wu Fengguang
2011-08-10 10:25 ` Peter Zijlstra
2011-08-10 11:13 ` Wu Fengguang
2011-08-06 8:44 ` [PATCH 5/5] writeback: IO-less balance_dirty_pages() Wu Fengguang
2011-08-06 14:48 ` Andrea Righi
2011-08-07 6:44 ` Wu Fengguang
2011-08-06 16:46 ` Andrea Righi
2011-08-07 7:18 ` Wu Fengguang
2011-08-07 9:50 ` Andrea Righi
2011-08-09 18:15 ` Vivek Goyal
2011-08-09 18:41 ` Peter Zijlstra
2011-08-10 3:22 ` Wu Fengguang
2011-08-10 3:26 ` Wu Fengguang
2011-08-09 19:16 ` Vivek Goyal
2011-08-10 4:33 ` Wu Fengguang
2011-08-09 2:01 ` [PATCH 0/5] IO-less dirty throttling v8 Vivek Goyal
2011-08-09 5:55 ` Dave Chinner
2011-08-09 14:04 ` Vivek Goyal
2011-08-10 7:41 ` Greg Thelen
2011-08-10 18:40 ` Vivek Goyal
2011-08-11 3:21 ` Wu Fengguang
2011-08-11 20:42 ` Vivek Goyal
2011-08-11 21:00 ` Vivek Goyal
-- strict thread matches above, loose matches on Subject: below --
2011-08-16 2:20 [PATCH 0/5] IO-less dirty throttling v9 Wu Fengguang
2011-08-16 2:20 ` [PATCH 4/5] writeback: per task dirty rate limit Wu Fengguang
2011-08-16 7:17 ` Andrea Righi
2011-08-16 7:22 ` Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110806094527.002914580@intel.com \
--to=fengguang.wu@intel.com \
--cc=akpm@linux-foundation.org \
--cc=arighi@develer.com \
--cc=david@fromorbit.com \
--cc=gthelen@google.com \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=minchan.kim@gmail.com \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).