From: Fengguang Wu <fengguang.wu@intel.com>
To: Linux Memory Management List <linux-mm@kvack.org>
Cc: Vivek Goyal <vgoyal@redhat.com>,
Andrea Righi <arighi@develer.com>,
Wu Fengguang <fengguang.wu@intel.com>
Cc: Suresh Jayaraman <sjayaraman@suse.com>,
Andrea Righi <andrea@betterlinux.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: <linux-fsdevel@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 4/6] blk-cgroup: buffered write IO controller - bandwidth limit
Date: Wed, 28 Mar 2012 20:13:12 +0800 [thread overview]
Message-ID: <20120328131153.382173637@intel.com> (raw)
In-Reply-To: 20120328121308.568545879@intel.com
[-- Attachment #1: writeback-io-controller.patch --]
[-- Type: text/plain, Size: 4945 bytes --]
A bare per-cgroup buffered write IO controller.
Basically, when there are N dd tasks running in the blkcg,
blkcg->dirty_ratelimit will be balanced around
blkcg->buffered_write_bps / N
and each blkcg task will be throttled under
blkcg->dirty_ratelimit
or
min(blkcg->dirty_ratelimit, bdi->dirty_ratelimit)
when there are other dirtier tasks in the system.
CC: Vivek Goyal <vgoyal@redhat.com>
CC: Andrea Righi <arighi@develer.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
include/linux/blk-cgroup.h | 20 +++++++++++
mm/page-writeback.c | 59 +++++++++++++++++++++++++++++++++++
2 files changed, 79 insertions(+)
--- linux-next.orig/mm/page-writeback.c 2012-03-28 15:36:16.414093131 +0800
+++ linux-next/mm/page-writeback.c 2012-03-28 15:40:25.446088022 +0800
@@ -1145,6 +1145,54 @@ static long bdi_min_pause(struct backing
return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
}
+#ifdef CONFIG_BLK_DEV_THROTTLING
+static void blkcg_update_dirty_ratelimit(struct blkio_cgroup *blkcg,
+ unsigned long dirtied,
+ unsigned long elapsed)
+{
+ unsigned long long bps = blkcg_buffered_write_bps(blkcg);
+ unsigned long long ratelimit;
+ unsigned long dirty_rate;
+
+ dirty_rate = (dirtied - blkcg->dirtied_stamp) * HZ;
+ dirty_rate /= elapsed;
+
+ ratelimit = blkcg->dirty_ratelimit;
+ ratelimit *= div_u64(bps, dirty_rate + 1);
+ ratelimit = min(ratelimit, bps);
+ ratelimit >>= PAGE_SHIFT;
+
+ blkcg->dirty_ratelimit = (blkcg->dirty_ratelimit + ratelimit) / 2 + 1;
+}
+
+void blkcg_update_bandwidth(struct blkio_cgroup *blkcg)
+{
+ unsigned long now = jiffies;
+ unsigned long dirtied;
+ unsigned long elapsed;
+
+ if (!blkcg)
+ return;
+ if (!spin_trylock(&blkcg->lock))
+ return;
+
+ elapsed = now - blkcg->bw_time_stamp;
+ dirtied = percpu_counter_read(&blkcg->nr_dirtied);
+
+ if (elapsed > MAX_PAUSE * 2)
+ goto snapshot;
+ if (elapsed <= MAX_PAUSE)
+ goto unlock;
+
+ blkcg_update_dirty_ratelimit(blkcg, dirtied, elapsed);
+snapshot:
+ blkcg->dirtied_stamp = dirtied;
+ blkcg->bw_time_stamp = now;
+unlock:
+ spin_unlock(&blkcg->lock);
+}
+#endif
+
/*
* balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force
@@ -1174,6 +1222,7 @@ static void balance_dirty_pages(struct a
unsigned long pos_ratio;
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long start_time = jiffies;
+ struct blkio_cgroup *blkcg = task_blkio_cgroup(current);
for (;;) {
unsigned long now = jiffies;
@@ -1198,6 +1247,8 @@ static void balance_dirty_pages(struct a
freerun = dirty_freerun_ceiling(dirty_thresh,
background_thresh);
if (nr_dirty <= freerun) {
+ if (blkcg_buffered_write_bps(blkcg))
+ goto blkcg_bps;
current->dirty_paused_when = now;
current->nr_dirtied = 0;
current->nr_dirtied_pause =
@@ -1263,6 +1314,14 @@ static void balance_dirty_pages(struct a
task_ratelimit = (u64)task_ratelimit *
blkcg_weight(blkcg) / BLKIO_WEIGHT_DEFAULT;
+ if (blkcg_buffered_write_bps(blkcg) &&
+ task_ratelimit > blkcg_dirty_ratelimit(blkcg)) {
+blkcg_bps:
+ blkcg_update_bandwidth(blkcg);
+ dirty_ratelimit = blkcg_dirty_ratelimit(blkcg);
+ task_ratelimit = dirty_ratelimit;
+ }
+
max_pause = bdi_max_pause(bdi, bdi_dirty);
min_pause = bdi_min_pause(bdi, max_pause,
task_ratelimit, dirty_ratelimit,
--- linux-next.orig/include/linux/blk-cgroup.h 2012-03-28 15:36:16.414093131 +0800
+++ linux-next/include/linux/blk-cgroup.h 2012-03-28 15:39:46.730088815 +0800
@@ -122,6 +122,10 @@ struct blkio_cgroup {
struct hlist_head blkg_list;
struct list_head policy_list; /* list of blkio_policy_node */
struct percpu_counter nr_dirtied;
+ unsigned long bw_time_stamp;
+ unsigned long dirtied_stamp;
+ unsigned long dirty_ratelimit;
+ unsigned long long buffered_write_bps;
};
struct blkio_group_stats {
@@ -217,6 +221,14 @@ static inline unsigned int blkcg_weight(
{
return blkcg->weight;
}
+static inline uint64_t blkcg_buffered_write_bps(struct blkio_cgroup *blkcg)
+{
+ return blkcg->buffered_write_bps;
+}
+static inline unsigned long blkcg_dirty_ratelimit(struct blkio_cgroup *blkcg)
+{
+ return blkcg->dirty_ratelimit;
+}
typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
@@ -272,6 +284,14 @@ static inline unsigned int blkcg_weight(
{
return BLKIO_WEIGHT_DEFAULT;
}
+static inline uint64_t blkcg_buffered_write_bps(struct blkio_cgroup *blkcg)
+{
+ return 0;
+}
+static inline unsigned long blkcg_dirty_ratelimit(struct blkio_cgroup *blkcg)
+{
+ return 0;
+}
#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2012-03-28 12:13 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-28 12:13 [PATCH 0/6] buffered write IO controller in balance_dirty_pages() Fengguang Wu
2012-03-28 12:13 ` [PATCH 1/6] blk-cgroup: move blk-cgroup.h in include/linux/blk-cgroup.h Fengguang Wu
2012-03-28 12:13 ` [PATCH 2/6] blk-cgroup: account dirtied pages Fengguang Wu
2012-03-28 12:13 ` [PATCH 3/6] blk-cgroup: buffered write IO controller - bandwidth weight Fengguang Wu
2012-03-28 12:13 ` Fengguang Wu [this message]
2012-03-28 12:13 ` [PATCH 5/6] blk-cgroup: buffered write IO controller - bandwidth limit interface Fengguang Wu
2012-03-28 12:13 ` [PATCH 6/6] blk-cgroup: buffered write IO controller - debug trace Fengguang Wu
2012-03-28 21:10 ` [PATCH 0/6] buffered write IO controller in balance_dirty_pages() Vivek Goyal
2012-03-28 22:35 ` Fengguang Wu
2012-03-29 2:48 ` Suresh Jayaraman
2012-03-29 0:34 ` KAMEZAWA Hiroyuki
2012-03-29 1:22 ` Fengguang Wu
2012-04-01 4:16 ` Suresh Jayaraman
2012-04-01 8:30 ` Fengguang Wu
2012-04-01 20:56 ` Vivek Goyal
2012-04-03 8:00 ` Fengguang Wu
2012-04-03 14:53 ` Vivek Goyal
2012-04-03 23:32 ` Fengguang Wu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120328131153.382173637@intel.com \
--to=fengguang.wu@intel.com \
--cc=arighi@develer.com \
--cc=linux-mm@kvack.org \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).