From: Andrea Righi <andrea@betterlinux.com>
To: Vivek Goyal <vgoyal@redhat.com>
Cc: linux-kernel@vger.kernel.org, jaxboe@fusionio.com,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 6/8] blk-throttle: core logic to throttle task while dirtying pages
Date: Wed, 29 Jun 2011 11:30:50 +0200 [thread overview]
Message-ID: <20110629093050.GA1183@thinkpad> (raw)
In-Reply-To: <1309275309-12889-7-git-send-email-vgoyal@redhat.com>
On Tue, Jun 28, 2011 at 11:35:07AM -0400, Vivek Goyal wrote:
...
> + /*
> + * We dispatched one task. Set the charge for other queued tasks,
> + * if any.
> + */
> + tg_set_active_task_charge(tg, rw);
> + throtl_log_tg(td, tg, "[%c] Wake up a task. bdisp=%u sz=%u bps=%llu"
> + " iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
> + rw == READ ? 'R' : 'W', tg->bytes_disp[rw], sz,
> + tg->bps[rw], tg->io_disp[rw], tg->iops[rw],
> + tg->nr_queued_bio[READ], tg->nr_queued_bio[WRITE],
> + tg->nr_queued_tsk[READ], tg->nr_queued_tsk[WRITE]);
A small nitpick:
tg->bytes_disp[rw] is uint64_t, we should use bdisp=%llu.
> +}
> +
> +static void tg_switch_active_dispatch(struct throtl_data *td,
> + struct throtl_grp *tg, bool rw)
> +{
> + unsigned int nr_tasks = tg->nr_queued_tsk[rw];
> + unsigned int nr_bios = tg->nr_queued_bio[rw];
> + enum dispatch_type curr_dispatch = tg->active_dispatch[rw];
> +
> + /* Nothing queued. Whoever gets queued next first, sets dispatch type */
> + if (!nr_bios && !nr_tasks)
> + return;
> +
> + if (curr_dispatch == DISPATCH_BIO && nr_tasks) {
> + tg->active_dispatch[rw] = DISPATCH_TASK;
> + return;
> + }
> +
> + if (curr_dispatch == DISPATCH_TASK && nr_bios)
> + tg->active_dispatch[rw] = DISPATCH_BIO;
> +}
> +
> +static void tg_update_active_dispatch(struct throtl_data *td,
> + struct throtl_grp *tg, bool rw)
> +{
> + unsigned int nr_tasks = tg->nr_queued_tsk[rw];
> + unsigned int nr_bios = tg->nr_queued_bio[rw];
> + enum dispatch_type curr_dispatch = tg->active_dispatch[rw];
> +
> + BUG_ON(nr_bios < 0 || nr_tasks < 0);
> +
> + if (curr_dispatch == DISPATCH_BIO && !nr_bios) {
> + tg->active_dispatch[rw] = DISPATCH_TASK;
> + return;
> }
>
> + if (curr_dispatch == DISPATCH_TASK && !nr_tasks)
> + tg->active_dispatch[rw] = DISPATCH_BIO;
> +}
> +
> +static int throtl_dispatch_tg_rw(struct throtl_data *td, struct throtl_grp *tg,
> + bool rw, struct bio_list *bl, unsigned int max)
> +{
> + unsigned int nr_disp = 0;
> +
> + if (tg->active_dispatch[rw] == DISPATCH_BIO)
> + nr_disp = throtl_dispatch_tg_bio(td, tg, rw, bl, max);
> + else
> + /* Only number of bios dispatched is kept track of here */
> + throtl_dispatch_tg_task(td, tg, rw, max);
> +
> + tg_switch_active_dispatch(td, tg, rw);
> + return nr_disp;
> +}
> +
> +static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
> + struct bio_list *bl)
> +{
> + /* Try to dispatch 75% READS and 25% WRITES */
> + unsigned int nr_reads = 0, nr_writes = 0;
> + unsigned int max_nr_reads = throtl_grp_quantum*3/4;
> + unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
> +
> + nr_reads = throtl_dispatch_tg_rw(td, tg, READ, bl, max_nr_reads);
> + nr_writes = throtl_dispatch_tg_rw(td, tg, WRITE, bl, max_nr_writes);
> +
> return nr_reads + nr_writes;
> }
>
> +static bool tg_should_requeue(struct throtl_grp *tg)
> +{
> + /* If there are queued bios, requeue */
> + if (tg->nr_queued_bio[0] || tg->nr_queued_bio[1])
> + return 1;
> +
> + /* If there are queued tasks reueue */
> + if (tg->nr_queued_tsk[0] || tg->nr_queued_tsk[1])
> + return 1;
> +
> + return 0;
> +}
> +
> static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
> {
> unsigned int nr_disp = 0;
> @@ -832,7 +1016,7 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
>
> nr_disp += throtl_dispatch_tg(td, tg, bl);
>
> - if (tg->nr_queued[0] || tg->nr_queued[1]) {
> + if (tg_should_requeue(tg)) {
> tg_update_disptime(td, tg);
> throtl_enqueue_tg(td, tg);
> }
> @@ -899,9 +1083,9 @@ static int throtl_dispatch(struct request_queue *q)
>
> bio_list_init(&bio_list_on_stack);
>
> - throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
> - total_nr_queued(td), td->nr_queued[READ],
> - td->nr_queued[WRITE]);
> + throtl_log(td, "dispatch bioq=%u/%u tskq=%u/%u",
> + td->nr_queued_bio[READ], td->nr_queued_bio[WRITE],
> + td->nr_queued_tsk[READ], td->nr_queued_tsk[WRITE]);
>
> nr_disp = throtl_select_dispatch(td, &bio_list_on_stack);
>
> @@ -1122,7 +1306,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
>
> if (tg_no_rule_group(tg, rw)) {
> blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
> - rw, bio->bi_rw & REQ_SYNC);
> + 1, rw, bio->bi_rw & REQ_SYNC);
> rcu_read_unlock();
> return 0;
> }
> @@ -1146,14 +1330,14 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
> }
> }
>
> - if (tg->nr_queued[rw]) {
> + /* If there are already queued bio or task in same dir, queue bio */
> + if (tg->nr_queued_bio[rw] || tg->nr_queued_tsk[rw]) {
> /*
> - * There is already another bio queued in same dir. No
> + * There is already another bio/task queued in same dir. No
> * need to update dispatch time.
> */
> update_disptime = false;
> goto queue_bio;
> -
> }
>
> /* Bio is with-in rate limit of group */
> @@ -1178,16 +1362,18 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
>
> queue_bio:
> throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
> - " iodisp=%u iops=%u queued=%d/%d",
> + " iodisp=%u iops=%u bioq=%u/%u taskq=%u/%u",
> rw == READ ? 'R' : 'W',
> tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
> tg->io_disp[rw], tg->iops[rw],
> - tg->nr_queued[READ], tg->nr_queued[WRITE]);
> + tg->nr_queued_bio[READ], tg->nr_queued_bio[WRITE],
> + tg->nr_queued_tsk[READ], tg->nr_queued_tsk[WRITE]);
>
> throtl_add_bio_tg(q->td, tg, bio);
> *biop = NULL;
>
> if (update_disptime) {
> + tg_update_active_dispatch(td, tg, rw);
> tg_update_disptime(td, tg);
> throtl_schedule_next_dispatch(td);
> }
> @@ -1197,6 +1383,137 @@ out:
> return 0;
> }
>
> +static void
> +__blk_throtl_dirty_pages(struct request_queue *q, unsigned long nr_dirty)
> +{
> + struct throtl_data *td = q->td;
> + struct throtl_grp *tg;
> + struct blkio_cgroup *blkcg;
> + bool rw = WRITE, update_disptime = true, first_task = false;
> + unsigned int sz = nr_dirty << PAGE_SHIFT;
> + DEFINE_WAIT(wait);
> +
> + /*
> + * A throtl_grp pointer retrieved under rcu can be used to access
> + * basic fields like stats and io rates. If a group has no rules,
> + * just update the dispatch stats in lockless manner and return.
> + */
> +
> + rcu_read_lock();
> + blkcg = task_blkio_cgroup(current);
> + tg = throtl_find_tg(td, blkcg);
> + if (tg) {
> + throtl_tg_fill_dev_details(td, tg);
> +
> + if (tg_no_rule_group(tg, rw)) {
> + blkiocg_update_dispatch_stats(&tg->blkg, sz, nr_dirty,
> + rw, 0);
> + rcu_read_unlock();
> + return;
> + }
> + }
> + rcu_read_unlock();
> +
> + spin_lock_irq(q->queue_lock);
> +
> + tg = throtl_get_tg(td);
> +
> + /* Queue is gone. No queue lock held here. */
> + if (IS_ERR(tg))
> + return;
> +
> + tg->unaccounted_dirty += nr_dirty;
> +
> + /* If there are already queued task, put this task also on waitq */
> + if (tg->nr_queued_tsk[rw]) {
> + update_disptime = false;
> + goto queue_task;
> + } else
> + first_task = true;
> +
> + /* If there are bios already throttled in same dir, queue task */
> + if (!bio_list_empty(&tg->bio_lists[rw])) {
> + update_disptime = false;
> + goto queue_task;
> + }
> +
> + /*
> + * Task is with-in rate limit of group.
> + *
> + * Note: How many IOPS we should charge for this operation. For
> + * the time being I am sticking to number of pages as number of
> + * IOPS.
> + */
> + if (!tg_wait_dispatch(td, tg, rw, sz, nr_dirty)) {
> + throtl_charge_dirty_io(tg, rw, sz, nr_dirty, 0);
> + throtl_trim_slice(td, tg, rw);
> + goto out;
> + }
> +
> +queue_task:
> + throtl_log_tg(td, tg, "[%c] task. bdisp=%u sz=%u bps=%llu"
> + " iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
> + rw == READ ? 'R' : 'W',
> + tg->bytes_disp[rw], sz, tg->bps[rw],
> + tg->io_disp[rw], tg->iops[rw],
> + tg->nr_queued_bio[READ], tg->nr_queued_bio[WRITE],
> + tg->nr_queued_tsk[READ], tg->nr_queued_tsk[WRITE]);
Ditto.
See the fix below.
Thanks,
-Andrea
---
block/blk-throttle.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 623cc05..f94a2a8 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -909,7 +909,7 @@ static void throtl_dispatch_tg_task(struct throtl_data *td,
* if any.
*/
tg_set_active_task_charge(tg, rw);
- throtl_log_tg(td, tg, "[%c] Wake up a task. bdisp=%u sz=%u bps=%llu"
+ throtl_log_tg(td, tg, "[%c] Wake up a task. bdisp=%llu sz=%u bps=%llu"
" iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
rw == READ ? 'R' : 'W', tg->bytes_disp[rw], sz,
tg->bps[rw], tg->io_disp[rw], tg->iops[rw],
@@ -1459,7 +1459,7 @@ __blk_throtl_dirty_pages(struct request_queue *q, unsigned long nr_dirty)
}
queue_task:
- throtl_log_tg(td, tg, "[%c] task. bdisp=%u sz=%u bps=%llu"
+ throtl_log_tg(td, tg, "[%c] task. bdisp=%llu sz=%u bps=%llu"
" iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
rw == READ ? 'R' : 'W',
tg->bytes_disp[rw], sz, tg->bps[rw],
next prev parent reply other threads:[~2011-06-29 9:31 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-28 15:35 [PATCH 0/8][V2] blk-throttle: Throttle buffered WRITEs in balance_dirty_pages() Vivek Goyal
2011-06-28 15:35 ` [PATCH 1/8] blk-throttle: convert wait routines to return jiffies to wait Vivek Goyal
2011-06-28 15:35 ` [PATCH 2/8] blk-throttle: do not enforce first queued bio check in tg_wait_dispatch Vivek Goyal
2011-06-28 15:35 ` [PATCH 3/8] blk-throttle: use io size and direction as parameters to wait routines Vivek Goyal
2011-06-28 15:35 ` [PATCH 4/8] blk-throttle: specify number of ios during dispatch update Vivek Goyal
2011-06-28 15:35 ` [PATCH 5/8] blk-throttle: get rid of extend slice trace message Vivek Goyal
2011-06-28 15:35 ` [PATCH 6/8] blk-throttle: core logic to throttle task while dirtying pages Vivek Goyal
2011-06-29 9:30 ` Andrea Righi [this message]
2011-06-29 15:25 ` Andrea Righi
2011-06-29 20:03 ` Vivek Goyal
2011-06-28 15:35 ` [PATCH 7/8] blk-throttle: do not throttle writes at device level except direct io Vivek Goyal
2011-06-28 15:35 ` [PATCH 8/8] blk-throttle: enable throttling of task while dirtying pages Vivek Goyal
2011-06-30 14:52 ` Andrea Righi
2011-06-30 15:06 ` Andrea Righi
2011-06-30 17:14 ` Vivek Goyal
2011-06-30 21:22 ` Andrea Righi
2011-06-28 16:21 ` [PATCH 0/8][V2] blk-throttle: Throttle buffered WRITEs in balance_dirty_pages() Andrea Righi
2011-06-28 17:06 ` Vivek Goyal
2011-06-28 17:39 ` Andrea Righi
2011-06-29 16:05 ` Andrea Righi
2011-06-29 20:04 ` Vivek Goyal
2011-06-29 0:42 ` Dave Chinner
2011-06-29 1:53 ` Vivek Goyal
2011-06-30 20:04 ` fsync serialization on ext4 with blkio throttling (Was: Re: [PATCH 0/8][V2] blk-throttle: Throttle buffered WRITEs in balance_dirty_pages()) Vivek Goyal
2011-06-30 20:44 ` Vivek Goyal
2011-07-01 0:16 ` Dave Chinner
-- strict thread matches above, loose matches on Subject: below --
2011-06-03 21:06 [RFC PATCH 0/8] blk-throttle: Throttle buffered WRITE in balance_dirty_pages() Vivek Goyal
2011-06-03 21:06 ` [PATCH 6/8] blk-throttle: core logic to throttle task while dirtying pages Vivek Goyal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110629093050.GA1183@thinkpad \
--to=andrea@betterlinux.com \
--cc=jaxboe@fusionio.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.