From: Vivek Goyal <vgoyal@redhat.com>
To: linux-kernel@vger.kernel.org, jens.axboe@oracle.com
Cc: nauman@google.com, dpshah@google.com, lizf@cn.fujitsu.com,
ryov@valinux.co.jp, fernando@oss.ntt.co.jp,
s-uchida@ap.jp.nec.com, taka@valinux.co.jp,
guijianfeng@cn.fujitsu.com, jmoyer@redhat.com,
righi.andrea@gmail.com, m-ikeda@ds.jp.nec.com,
czoccolo@gmail.com, Alan.Brunelle@hp.com
Subject: Re: [PATCH 05/21] blkio: Introduce the root service tree for cfq groups
Date: Wed, 2 Dec 2009 10:49:29 -0500 [thread overview]
Message-ID: <20091202154929.GF31715@redhat.com> (raw)
In-Reply-To: <1259549968-10369-6-git-send-email-vgoyal@redhat.com>
On Sun, Nov 29, 2009 at 09:59:12PM -0500, Vivek Goyal wrote:
> o So far we just had one cfq_group in cfq_data. To create space for more than
> one cfq_group, we need to have a service tree of groups where all the groups
> can be queued if they have active cfq queues backlogged in these.
>
Reposting this patch. Introduced a new macro rb_entry_cfqg() along the
lines of rb_entry_rq().
o So far we just had one cfq_group in cfq_data. To create space for more than
one cfq_group, we need to have a service tree of groups where all the groups
can be queued if they have active cfq queues backlogged in these.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
block/cfq-iosched.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 134 insertions(+), 3 deletions(-)
Index: linux10/block/cfq-iosched.c
===================================================================
--- linux10.orig/block/cfq-iosched.c 2009-11-30 17:22:57.000000000 -0500
+++ linux10/block/cfq-iosched.c 2009-12-02 10:47:17.000000000 -0500
@@ -66,6 +66,7 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
#define sample_valid(samples) ((samples) > 80)
+#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
/*
* Most of our rbtree usage is for sorting with min extraction, so
@@ -77,8 +78,9 @@ struct cfq_rb_root {
struct rb_root rb;
struct rb_node *left;
unsigned count;
+ u64 min_vdisktime;
};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, }
+#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
/*
* Per process-grouping structure
@@ -156,6 +158,16 @@ enum wl_type_t {
/* This is per cgroup per device grouping structure */
struct cfq_group {
+ /* group service_tree member */
+ struct rb_node rb_node;
+
+ /* group service_tree key */
+ u64 vdisktime;
+ bool on_st;
+
+ /* number of cfqq currently on this group */
+ int nr_cfqq;
+
/*
* rr lists of queues with requests, onle rr for each priority class.
* Counts are embedded in the cfq_rb_root
@@ -169,6 +181,8 @@ struct cfq_group {
*/
struct cfq_data {
struct request_queue *queue;
+ /* Root service tree for cfq_groups */
+ struct cfq_rb_root grp_service_tree;
struct cfq_group root_group;
/*
@@ -251,6 +265,9 @@ static struct cfq_rb_root *service_tree_
enum wl_type_t type,
struct cfq_data *cfqd)
{
+ if (!cfqg)
+ return NULL;
+
if (prio == IDLE_WORKLOAD)
return &cfqg->service_tree_idle;
@@ -589,6 +606,17 @@ static struct cfq_queue *cfq_rb_first(st
return NULL;
}
+static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
+{
+ if (!root->left)
+ root->left = rb_first(&root->rb);
+
+ if (root->left)
+ return rb_entry_cfqg(root->left);
+
+ return NULL;
+}
+
static void rb_erase_init(struct rb_node *n, struct rb_root *root)
{
rb_erase(n, root);
@@ -645,6 +673,83 @@ static unsigned long cfq_slice_offset(st
cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
}
+static inline s64
+cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+ return cfqg->vdisktime - st->min_vdisktime;
+}
+
+static void
+__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+ struct rb_node **node = &st->rb.rb_node;
+ struct rb_node *parent = NULL;
+ struct cfq_group *__cfqg;
+ s64 key = cfqg_key(st, cfqg);
+ int left = 1;
+
+ while (*node != NULL) {
+ parent = *node;
+ __cfqg = rb_entry_cfqg(parent);
+
+ if (key < cfqg_key(st, __cfqg))
+ node = &parent->rb_left;
+ else {
+ node = &parent->rb_right;
+ left = 0;
+ }
+ }
+
+ if (left)
+ st->left = &cfqg->rb_node;
+
+ rb_link_node(&cfqg->rb_node, parent, node);
+ rb_insert_color(&cfqg->rb_node, &st->rb);
+}
+
+static void
+cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
+{
+ struct cfq_rb_root *st = &cfqd->grp_service_tree;
+ struct cfq_group *__cfqg;
+ struct rb_node *n;
+
+ cfqg->nr_cfqq++;
+ if (cfqg->on_st)
+ return;
+
+ /*
+ * Currently put the group at the end. Later implement something
+ * so that groups get lesser vtime based on their weights, so that
+ * if group does not loose all if it was not continously backlogged.
+ */
+ n = rb_last(&st->rb);
+ if (n) {
+ __cfqg = rb_entry_cfqg(n);
+ cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
+ } else
+ cfqg->vdisktime = st->min_vdisktime;
+
+ __cfq_group_service_tree_add(st, cfqg);
+ cfqg->on_st = true;
+}
+
+static void
+cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
+{
+ struct cfq_rb_root *st = &cfqd->grp_service_tree;
+
+ BUG_ON(cfqg->nr_cfqq < 1);
+ cfqg->nr_cfqq--;
+ /* If there are other cfq queues under this group, don't delete it */
+ if (cfqg->nr_cfqq)
+ return;
+
+ cfqg->on_st = false;
+ if (!RB_EMPTY_NODE(&cfqg->rb_node))
+ cfq_rb_erase(&cfqg->rb_node, st);
+}
+
/*
* The cfqd->service_trees holds all pending cfq_queue's that have
* requests waiting to be processed. It is sorted in the order that
@@ -727,6 +832,7 @@ static void cfq_service_tree_add(struct
rb_link_node(&cfqq->rb_node, parent, p);
rb_insert_color(&cfqq->rb_node, &service_tree->rb);
service_tree->count++;
+ cfq_group_service_tree_add(cfqd, cfqq->cfqg);
}
static struct cfq_queue *
@@ -837,6 +943,7 @@ static void cfq_del_cfqq_rr(struct cfq_d
cfqq->p_root = NULL;
}
+ cfq_group_service_tree_del(cfqd, cfqq->cfqg);
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
}
@@ -1113,6 +1220,9 @@ static struct cfq_queue *cfq_get_next_qu
if (!cfqd->rq_queued)
return NULL;
+ /* There is nothing to dispatch */
+ if (!service_tree)
+ return NULL;
if (RB_EMPTY_ROOT(&service_tree->rb))
return NULL;
return cfq_rb_first(service_tree);
@@ -1482,6 +1592,12 @@ static void choose_service_tree(struct c
unsigned count;
struct cfq_rb_root *st;
+ if (!cfqg) {
+ cfqd->serving_prio = IDLE_WORKLOAD;
+ cfqd->workload_expires = jiffies + 1;
+ return;
+ }
+
/* Choose next priority. RT > BE > IDLE */
if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
cfqd->serving_prio = RT_WORKLOAD;
@@ -1540,10 +1656,21 @@ static void choose_service_tree(struct c
cfqd->noidle_tree_requires_idle = false;
}
+static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
+{
+ struct cfq_rb_root *st = &cfqd->grp_service_tree;
+
+ if (RB_EMPTY_ROOT(&st->rb))
+ return NULL;
+ return cfq_rb_first_group(st);
+}
+
static void cfq_choose_cfqg(struct cfq_data *cfqd)
{
- cfqd->serving_group = &cfqd->root_group;
- choose_service_tree(cfqd, &cfqd->root_group);
+ struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
+
+ cfqd->serving_group = cfqg;
+ choose_service_tree(cfqd, cfqg);
}
/*
@@ -3019,10 +3146,14 @@ static void *cfq_init_queue(struct reque
if (!cfqd)
return NULL;
+ /* Init root service tree */
+ cfqd->grp_service_tree = CFQ_RB_ROOT;
+
/* Init root group */
cfqg = &cfqd->root_group;
for_each_cfqg_st(cfqg, i, j, st)
*st = CFQ_RB_ROOT;
+ RB_CLEAR_NODE(&cfqg->rb_node);
/*
* Not strictly needed (since RB_ROOT just clears the node and we
next prev parent reply other threads:[~2009-12-02 15:51 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-11-30 2:59 Block IO Controller V4 Vivek Goyal
2009-11-30 2:59 ` [PATCH 01/21] blkio: Set must_dispatch only if we decided to not dispatch the request Vivek Goyal
2009-12-02 14:06 ` Jeff Moyer
2009-11-30 2:59 ` [PATCH 02/21] blkio: Introduce the notion of cfq groups Vivek Goyal
2009-11-30 2:59 ` [PATCH 03/21] blkio: Implement macro to traverse each idle tree in group Vivek Goyal
2009-11-30 20:13 ` Divyesh Shah
2009-11-30 22:24 ` Vivek Goyal
2009-11-30 2:59 ` [PATCH 04/21] blkio: Keep queue on service tree until we expire it Vivek Goyal
2009-11-30 2:59 ` [PATCH 05/21] blkio: Introduce the root service tree for cfq groups Vivek Goyal
2009-11-30 23:55 ` Divyesh Shah
2009-12-02 15:42 ` Vivek Goyal
2009-12-02 15:49 ` Vivek Goyal [this message]
2009-11-30 2:59 ` [PATCH 06/21] blkio: Introduce blkio controller cgroup interface Vivek Goyal
2009-12-01 0:04 ` Divyesh Shah
2009-12-02 15:27 ` Vivek Goyal
2009-11-30 2:59 ` [PATCH 07/21] blkio: Introduce per cfq group weights and vdisktime calculations Vivek Goyal
2009-12-02 15:50 ` Vivek Goyal
2009-11-30 2:59 ` [PATCH 08/21] blkio: Implement per cfq group latency target and busy queue avg Vivek Goyal
2009-11-30 2:59 ` [PATCH 09/21] blkio: Group time used accounting and workload context save restore Vivek Goyal
2009-11-30 2:59 ` [PATCH 10/21] blkio: Dynamic cfq group creation based on cgroup tasks belongs to Vivek Goyal
2009-11-30 2:59 ` [PATCH 11/21] blkio: Take care of cgroup deletion and cfq group reference counting Vivek Goyal
2009-11-30 2:59 ` [PATCH 12/21] blkio: Some debugging aids for CFQ Vivek Goyal
2009-11-30 2:59 ` [PATCH 13/21] blkio: Export disk time and sectors used by a group to user space Vivek Goyal
2009-11-30 2:59 ` [PATCH 14/21] blkio: Provide some isolation between groups Vivek Goyal
2009-11-30 2:59 ` [PATCH 15/21] blkio: Drop the reference to queue once the task changes cgroup Vivek Goyal
2009-11-30 2:59 ` [PATCH 16/21] blkio: Propagate cgroup weight updation to cfq groups Vivek Goyal
2009-11-30 2:59 ` [PATCH 17/21] blkio: Wait for cfq queue to get backlogged if group is empty Vivek Goyal
2009-11-30 2:59 ` [PATCH 18/21] blkio: Determine async workload length based on total number of queues Vivek Goyal
2009-11-30 2:59 ` [PATCH 19/21] blkio: Implement group_isolation tunable Vivek Goyal
2009-11-30 2:59 ` [PATCH 20/21] blkio: Wait on sync-noidle queue even if rq_noidle = 1 Vivek Goyal
2009-11-30 2:59 ` [PATCH 21/21] blkio: Documentation Vivek Goyal
2009-11-30 15:34 ` Block IO Controller V4 Corrado Zoccolo
2009-11-30 16:00 ` Vivek Goyal
2009-11-30 21:34 ` Corrado Zoccolo
2009-11-30 21:58 ` Vivek Goyal
2009-11-30 22:00 ` Alan D. Brunelle
2009-11-30 22:56 ` Vivek Goyal
2009-11-30 23:50 ` Alan D. Brunelle
2009-12-02 19:12 ` Vivek Goyal
2009-12-08 15:17 ` Alan D. Brunelle
2009-12-08 16:32 ` Vivek Goyal
2009-12-08 18:05 ` Alan D. Brunelle
2009-12-10 3:44 ` Vivek Goyal
2009-12-01 22:27 ` Vivek Goyal
2009-12-02 1:51 ` Gui Jianfeng
2009-12-02 14:25 ` Vivek Goyal
2009-12-03 8:41 ` Gui Jianfeng
2009-12-03 14:36 ` Vivek Goyal
2009-12-03 18:10 ` Vivek Goyal
2009-12-03 23:51 ` Vivek Goyal
2009-12-07 8:45 ` Gui Jianfeng
2009-12-07 15:25 ` Vivek Goyal
2009-12-07 1:35 ` Gui Jianfeng
2009-12-07 8:41 ` Gui Jianfeng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091202154929.GF31715@redhat.com \
--to=vgoyal@redhat.com \
--cc=Alan.Brunelle@hp.com \
--cc=czoccolo@gmail.com \
--cc=dpshah@google.com \
--cc=fernando@oss.ntt.co.jp \
--cc=guijianfeng@cn.fujitsu.com \
--cc=jens.axboe@oracle.com \
--cc=jmoyer@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lizf@cn.fujitsu.com \
--cc=m-ikeda@ds.jp.nec.com \
--cc=nauman@google.com \
--cc=righi.andrea@gmail.com \
--cc=ryov@valinux.co.jp \
--cc=s-uchida@ap.jp.nec.com \
--cc=taka@valinux.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.