From: Andrea Righi <andrea@betterlinux.com>
To: Vivek Goyal <vgoyal@redhat.com>
Cc: linux-kernel@vger.kernel.org, jaxboe@fusionio.com,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 6/8] blk-throttle: core logic to throttle task while dirtying pages
Date: Wed, 29 Jun 2011 11:30:50 +0200 [thread overview]
Message-ID: <20110629093050.GA1183@thinkpad> (raw)
In-Reply-To: <1309275309-12889-7-git-send-email-vgoyal@redhat.com>
On Tue, Jun 28, 2011 at 11:35:07AM -0400, Vivek Goyal wrote:
...
> + /*
> + * We dispatched one task. Set the charge for other queued tasks,
> + * if any.
> + */
> + tg_set_active_task_charge(tg, rw);
> + throtl_log_tg(td, tg, "[%c] Wake up a task. bdisp=%u sz=%u bps=%llu"
> + " iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
> + rw == READ ? 'R' : 'W', tg->bytes_disp[rw], sz,
> + tg->bps[rw], tg->io_disp[rw], tg->iops[rw],
> + tg->nr_queued_bio[READ], tg->nr_queued_bio[WRITE],
> + tg->nr_queued_tsk[READ], tg->nr_queued_tsk[WRITE]);
A small nitpick:
tg->bytes_disp[rw] is uint64_t, we should use bdisp=%llu.
> +}
> +
> +static void tg_switch_active_dispatch(struct throtl_data *td,
> + struct throtl_grp *tg, bool rw)
> +{
> + unsigned int nr_tasks = tg->nr_queued_tsk[rw];
> + unsigned int nr_bios = tg->nr_queued_bio[rw];
> + enum dispatch_type curr_dispatch = tg->active_dispatch[rw];
> +
> + /* Nothing queued. Whoever gets queued next first, sets dispatch type */
> + if (!nr_bios && !nr_tasks)
> + return;
> +
> + if (curr_dispatch == DISPATCH_BIO && nr_tasks) {
> + tg->active_dispatch[rw] = DISPATCH_TASK;
> + return;
> + }
> +
> + if (curr_dispatch == DISPATCH_TASK && nr_bios)
> + tg->active_dispatch[rw] = DISPATCH_BIO;
> +}
> +
> +static void tg_update_active_dispatch(struct throtl_data *td,
> + struct throtl_grp *tg, bool rw)
> +{
> + unsigned int nr_tasks = tg->nr_queued_tsk[rw];
> + unsigned int nr_bios = tg->nr_queued_bio[rw];
> + enum dispatch_type curr_dispatch = tg->active_dispatch[rw];
> +
> + BUG_ON(nr_bios < 0 || nr_tasks < 0);
> +
> + if (curr_dispatch == DISPATCH_BIO && !nr_bios) {
> + tg->active_dispatch[rw] = DISPATCH_TASK;
> + return;
> }
>
> + if (curr_dispatch == DISPATCH_TASK && !nr_tasks)
> + tg->active_dispatch[rw] = DISPATCH_BIO;
> +}
> +
> +static int throtl_dispatch_tg_rw(struct throtl_data *td, struct throtl_grp *tg,
> + bool rw, struct bio_list *bl, unsigned int max)
> +{
> + unsigned int nr_disp = 0;
> +
> + if (tg->active_dispatch[rw] == DISPATCH_BIO)
> + nr_disp = throtl_dispatch_tg_bio(td, tg, rw, bl, max);
> + else
> + /* Only number of bios dispatched is kept track of here */
> + throtl_dispatch_tg_task(td, tg, rw, max);
> +
> + tg_switch_active_dispatch(td, tg, rw);
> + return nr_disp;
> +}
> +
> +static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
> + struct bio_list *bl)
> +{
> + /* Try to dispatch 75% READS and 25% WRITES */
> + unsigned int nr_reads = 0, nr_writes = 0;
> + unsigned int max_nr_reads = throtl_grp_quantum*3/4;
> + unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
> +
> + nr_reads = throtl_dispatch_tg_rw(td, tg, READ, bl, max_nr_reads);
> + nr_writes = throtl_dispatch_tg_rw(td, tg, WRITE, bl, max_nr_writes);
> +
> return nr_reads + nr_writes;
> }
>
> +static bool tg_should_requeue(struct throtl_grp *tg)
> +{
> + /* If there are queued bios, requeue */
> + if (tg->nr_queued_bio[0] || tg->nr_queued_bio[1])
> + return 1;
> +
> + /* If there are queued tasks reueue */
> + if (tg->nr_queued_tsk[0] || tg->nr_queued_tsk[1])
> + return 1;
> +
> + return 0;
> +}
> +
> static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
> {
> unsigned int nr_disp = 0;
> @@ -832,7 +1016,7 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
>
> nr_disp += throtl_dispatch_tg(td, tg, bl);
>
> - if (tg->nr_queued[0] || tg->nr_queued[1]) {
> + if (tg_should_requeue(tg)) {
> tg_update_disptime(td, tg);
> throtl_enqueue_tg(td, tg);
> }
> @@ -899,9 +1083,9 @@ static int throtl_dispatch(struct request_queue *q)
>
> bio_list_init(&bio_list_on_stack);
>
> - throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
> - total_nr_queued(td), td->nr_queued[READ],
> - td->nr_queued[WRITE]);
> + throtl_log(td, "dispatch bioq=%u/%u tskq=%u/%u",
> + td->nr_queued_bio[READ], td->nr_queued_bio[WRITE],
> + td->nr_queued_tsk[READ], td->nr_queued_tsk[WRITE]);
>
> nr_disp = throtl_select_dispatch(td, &bio_list_on_stack);
>
> @@ -1122,7 +1306,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
>
> if (tg_no_rule_group(tg, rw)) {
> blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
> - rw, bio->bi_rw & REQ_SYNC);
> + 1, rw, bio->bi_rw & REQ_SYNC);
> rcu_read_unlock();
> return 0;
> }
> @@ -1146,14 +1330,14 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
> }
> }
>
> - if (tg->nr_queued[rw]) {
> + /* If there are already queued bio or task in same dir, queue bio */
> + if (tg->nr_queued_bio[rw] || tg->nr_queued_tsk[rw]) {
> /*
> - * There is already another bio queued in same dir. No
> + * There is already another bio/task queued in same dir. No
> * need to update dispatch time.
> */
> update_disptime = false;
> goto queue_bio;
> -
> }
>
> /* Bio is with-in rate limit of group */
> @@ -1178,16 +1362,18 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
>
> queue_bio:
> throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
> - " iodisp=%u iops=%u queued=%d/%d",
> + " iodisp=%u iops=%u bioq=%u/%u taskq=%u/%u",
> rw == READ ? 'R' : 'W',
> tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
> tg->io_disp[rw], tg->iops[rw],
> - tg->nr_queued[READ], tg->nr_queued[WRITE]);
> + tg->nr_queued_bio[READ], tg->nr_queued_bio[WRITE],
> + tg->nr_queued_tsk[READ], tg->nr_queued_tsk[WRITE]);
>
> throtl_add_bio_tg(q->td, tg, bio);
> *biop = NULL;
>
> if (update_disptime) {
> + tg_update_active_dispatch(td, tg, rw);
> tg_update_disptime(td, tg);
> throtl_schedule_next_dispatch(td);
> }
> @@ -1197,6 +1383,137 @@ out:
> return 0;
> }
>
> +static void
> +__blk_throtl_dirty_pages(struct request_queue *q, unsigned long nr_dirty)
> +{
> + struct throtl_data *td = q->td;
> + struct throtl_grp *tg;
> + struct blkio_cgroup *blkcg;
> + bool rw = WRITE, update_disptime = true, first_task = false;
> + unsigned int sz = nr_dirty << PAGE_SHIFT;
> + DEFINE_WAIT(wait);
> +
> + /*
> + * A throtl_grp pointer retrieved under rcu can be used to access
> + * basic fields like stats and io rates. If a group has no rules,
> + * just update the dispatch stats in lockless manner and return.
> + */
> +
> + rcu_read_lock();
> + blkcg = task_blkio_cgroup(current);
> + tg = throtl_find_tg(td, blkcg);
> + if (tg) {
> + throtl_tg_fill_dev_details(td, tg);
> +
> + if (tg_no_rule_group(tg, rw)) {
> + blkiocg_update_dispatch_stats(&tg->blkg, sz, nr_dirty,
> + rw, 0);
> + rcu_read_unlock();
> + return;
> + }
> + }
> + rcu_read_unlock();
> +
> + spin_lock_irq(q->queue_lock);
> +
> + tg = throtl_get_tg(td);
> +
> + /* Queue is gone. No queue lock held here. */
> + if (IS_ERR(tg))
> + return;
> +
> + tg->unaccounted_dirty += nr_dirty;
> +
> + /* If there are already queued task, put this task also on waitq */
> + if (tg->nr_queued_tsk[rw]) {
> + update_disptime = false;
> + goto queue_task;
> + } else
> + first_task = true;
> +
> + /* If there are bios already throttled in same dir, queue task */
> + if (!bio_list_empty(&tg->bio_lists[rw])) {
> + update_disptime = false;
> + goto queue_task;
> + }
> +
> + /*
> + * Task is with-in rate limit of group.
> + *
> + * Note: How many IOPS we should charge for this operation. For
> + * the time being I am sticking to number of pages as number of
> + * IOPS.
> + */
> + if (!tg_wait_dispatch(td, tg, rw, sz, nr_dirty)) {
> + throtl_charge_dirty_io(tg, rw, sz, nr_dirty, 0);
> + throtl_trim_slice(td, tg, rw);
> + goto out;
> + }
> +
> +queue_task:
> + throtl_log_tg(td, tg, "[%c] task. bdisp=%u sz=%u bps=%llu"
> + " iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
> + rw == READ ? 'R' : 'W',
> + tg->bytes_disp[rw], sz, tg->bps[rw],
> + tg->io_disp[rw], tg->iops[rw],
> + tg->nr_queued_bio[READ], tg->nr_queued_bio[WRITE],
> + tg->nr_queued_tsk[READ], tg->nr_queued_tsk[WRITE]);
Ditto.
See the fix below.
Thanks,
-Andrea
---
block/blk-throttle.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 623cc05..f94a2a8 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -909,7 +909,7 @@ static void throtl_dispatch_tg_task(struct throtl_data *td,
* if any.
*/
tg_set_active_task_charge(tg, rw);
- throtl_log_tg(td, tg, "[%c] Wake up a task. bdisp=%u sz=%u bps=%llu"
+ throtl_log_tg(td, tg, "[%c] Wake up a task. bdisp=%llu sz=%u bps=%llu"
" iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
rw == READ ? 'R' : 'W', tg->bytes_disp[rw], sz,
tg->bps[rw], tg->io_disp[rw], tg->iops[rw],
@@ -1459,7 +1459,7 @@ __blk_throtl_dirty_pages(struct request_queue *q, unsigned long nr_dirty)
}
queue_task:
- throtl_log_tg(td, tg, "[%c] task. bdisp=%u sz=%u bps=%llu"
+ throtl_log_tg(td, tg, "[%c] task. bdisp=%llu sz=%u bps=%llu"
" iodisp=%u iops=%u bioq=%d/%d taskq=%d/%d",
rw == READ ? 'R' : 'W',
tg->bytes_disp[rw], sz, tg->bps[rw],
next prev parent reply other threads:[~2011-06-29 9:30 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-28 15:35 [PATCH 0/8][V2] blk-throttle: Throttle buffered WRITEs in balance_dirty_pages() Vivek Goyal
2011-06-28 15:35 ` [PATCH 1/8] blk-throttle: convert wait routines to return jiffies to wait Vivek Goyal
2011-06-28 15:35 ` [PATCH 2/8] blk-throttle: do not enforce first queued bio check in tg_wait_dispatch Vivek Goyal
2011-06-28 15:35 ` [PATCH 3/8] blk-throttle: use io size and direction as parameters to wait routines Vivek Goyal
2011-06-28 15:35 ` [PATCH 4/8] blk-throttle: specify number of ios during dispatch update Vivek Goyal
2011-06-28 15:35 ` [PATCH 5/8] blk-throttle: get rid of extend slice trace message Vivek Goyal
2011-06-28 15:35 ` [PATCH 6/8] blk-throttle: core logic to throttle task while dirtying pages Vivek Goyal
2011-06-29 9:30 ` Andrea Righi [this message]
2011-06-29 15:25 ` Andrea Righi
2011-06-29 20:03 ` Vivek Goyal
2011-06-28 15:35 ` [PATCH 7/8] blk-throttle: do not throttle writes at device level except direct io Vivek Goyal
2011-06-28 15:35 ` [PATCH 8/8] blk-throttle: enable throttling of task while dirtying pages Vivek Goyal
2011-06-30 14:52 ` Andrea Righi
2011-06-30 15:06 ` Andrea Righi
2011-06-30 17:14 ` Vivek Goyal
2011-06-30 21:22 ` Andrea Righi
2011-06-28 16:21 ` [PATCH 0/8][V2] blk-throttle: Throttle buffered WRITEs in balance_dirty_pages() Andrea Righi
2011-06-28 17:06 ` Vivek Goyal
2011-06-28 17:39 ` Andrea Righi
2011-06-29 16:05 ` Andrea Righi
2011-06-29 20:04 ` Vivek Goyal
2011-06-29 0:42 ` Dave Chinner
2011-06-29 1:53 ` Vivek Goyal
2011-06-30 20:04 ` fsync serialization on ext4 with blkio throttling (Was: Re: [PATCH 0/8][V2] blk-throttle: Throttle buffered WRITEs in balance_dirty_pages()) Vivek Goyal
2011-06-30 20:44 ` Vivek Goyal
2011-07-01 0:16 ` Dave Chinner
-- strict thread matches above, loose matches on Subject: below --
2011-06-03 21:06 [RFC PATCH 0/8] blk-throttle: Throttle buffered WRITE in balance_dirty_pages() Vivek Goyal
2011-06-03 21:06 ` [PATCH 6/8] blk-throttle: core logic to throttle task while dirtying pages Vivek Goyal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110629093050.GA1183@thinkpad \
--to=andrea@betterlinux.com \
--cc=jaxboe@fusionio.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).