public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-kernel@vger.kernel.org, jens.axboe@oracle.com
Cc: nauman@google.com, dpshah@google.com, lizf@cn.fujitsu.com,
	ryov@valinux.co.jp, fernando@oss.ntt.co.jp,
	s-uchida@ap.jp.nec.com, taka@valinux.co.jp,
	guijianfeng@cn.fujitsu.com, jmoyer@redhat.com,
	balbir@linux.vnet.ibm.com, righi.andrea@gmail.com,
	m-ikeda@ds.jp.nec.com, vgoyal@redhat.com,
	akpm@linux-foundation.org, riel@redhat.com,
	kamezawa.hiroyu@jp.fujitsu.com
Subject: [PATCH 07/16] blkio: Introduce per cfq group weights and vdisktime calculations
Date: Thu, 12 Nov 2009 18:32:27 -0500	[thread overview]
Message-ID: <1258068756-10766-8-git-send-email-vgoyal@redhat.com> (raw)
In-Reply-To: <1258068756-10766-1-git-send-email-vgoyal@redhat.com>

o Bring in the per cfq group weight and how vdisktime is calculated for the
  group. Also bring in the functionality of updating the min_vdisktime of
  the group service tree.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 block/Kconfig.iosched |    9 ++++++-
 block/cfq-iosched.c   |   62 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index be0280d..fa95fa7 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,7 +23,6 @@ config IOSCHED_DEADLINE
 
 config IOSCHED_CFQ
 	tristate "CFQ I/O scheduler"
-	select BLK_CGROUP
 	default y
 	---help---
 	  The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -33,6 +32,14 @@ config IOSCHED_CFQ
 
 	  This is the default I/O scheduler.
 
+config CFQ_GROUP_IOSCHED
+	bool "CFQ Group Scheduling support"
+	depends on IOSCHED_CFQ && CGROUPS
+	select BLK_CGROUP
+	default n
+	---help---
+	  Enable group IO scheduling in CFQ.
+
 choice
 	prompt "Default I/O scheduler"
 	default DEFAULT_CFQ
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index f8688b4..1354c6b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -13,6 +13,7 @@
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
+#include "blk-cgroup.h"
 
 /*
  * tunables
@@ -49,6 +50,7 @@ static const int cfq_hist_divisor = 4;
 
 #define CFQ_SLICE_SCALE		(5)
 #define CFQ_HW_QUEUE_MIN	(5)
+#define CFQ_SERVICE_SHIFT       12
 
 #define RQ_CIC(rq)		\
 	((struct cfq_io_context *) (rq)->elevator_private)
@@ -78,6 +80,7 @@ struct cfq_rb_root {
 	struct rb_node *left;
 	unsigned count;
 	u64 min_vdisktime;
+	struct rb_node *active;
 };
 #define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
 
@@ -162,6 +165,7 @@ struct cfq_group {
 
 	/* group service_tree key */
 	u64 vdisktime;
+	unsigned int weight;
 	bool on_st;
 
 	/* number of cfqq currently on this group */
@@ -417,6 +421,51 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
 }
 
+static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
+{
+	u64 d = delta << CFQ_SERVICE_SHIFT;
+
+	d = d * BLKIO_WEIGHT_DEFAULT;
+	do_div(d, cfqg->weight);
+	return d;
+}
+
+static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
+{
+	s64 delta = (s64)(vdisktime - min_vdisktime);
+	if (delta > 0)
+		min_vdisktime = vdisktime;
+
+	return min_vdisktime;
+}
+
+static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
+{
+	s64 delta = (s64)(vdisktime - min_vdisktime);
+	if (delta < 0)
+		min_vdisktime = vdisktime;
+
+	return min_vdisktime;
+}
+
+static void update_min_vdisktime(struct cfq_rb_root *st)
+{
+	u64 vdisktime = st->min_vdisktime;
+	struct cfq_group *cfqg;
+
+	if (st->active) {
+		cfqg = rb_entry(st->active, struct cfq_group, rb_node);
+		vdisktime = cfqg->vdisktime;
+	}
+
+	if (st->left) {
+		cfqg = rb_entry(st->left, struct cfq_group, rb_node);
+		vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
+	}
+
+	st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
+}
+
 /*
  * get averaged number of queues of RT/BE priority.
  * average is updated, with a formula that gives more weight to higher numbers,
@@ -718,8 +767,12 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
 
+	if (st->active == &cfqg->rb_node)
+		st->active = NULL;
+
 	BUG_ON(cfqg->nr_cfqq < 1);
 	cfqg->nr_cfqq--;
+
 	/* If there are other cfq queues under this group, don't delete it */
 	if (cfqg->nr_cfqq)
 		return;
@@ -1667,10 +1720,14 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
+	struct cfq_group *cfqg;
 
 	if (RB_EMPTY_ROOT(&st->rb))
 		return NULL;
-	return cfq_rb_first_group(st);
+	cfqg = cfq_rb_first_group(st);
+	st->active = &cfqg->rb_node;
+	update_min_vdisktime(st);
+	return cfqg;
 }
 
 static void cfq_choose_cfqg(struct cfq_data *cfqd)
@@ -3146,6 +3203,9 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqg->service_tree_idle = CFQ_RB_ROOT;
 	RB_CLEAR_NODE(&cfqg->rb_node);
 
+	/* Give preference to root group over other groups */
+	cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
+
 	/*
 	 * Not strictly needed (since RB_ROOT just clears the node and we
 	 * zeroed cfqd on alloc), but better be safe in case someone decides
-- 
1.6.2.5


  parent reply	other threads:[~2009-11-12 23:45 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-12 23:32 [RFC] Block IO Controller V2 Vivek Goyal
2009-11-12 23:32 ` [PATCH 01/16] blkio: Documentation Vivek Goyal
2009-11-13 10:48   ` Jens Axboe
2009-11-13 15:18     ` Vivek Goyal
2009-11-12 23:32 ` [PATCH 02/16] blkio: Introduce the notion of cfq groups Vivek Goyal
2009-11-12 23:32 ` [PATCH 03/16] blkio: Keep queue on service tree until we expire it Vivek Goyal
2009-11-13 10:39   ` Corrado Zoccolo
2009-11-13 10:48     ` Jens Axboe
2009-11-13 15:05       ` Vivek Goyal
2009-11-13 18:44         ` Jens Axboe
2009-11-12 23:32 ` [PATCH 04/16] blkio: Introduce the root service tree for cfq groups Vivek Goyal
2009-11-12 23:32 ` [PATCH 05/16] blkio: Implement per cfq group latency target and busy queue avg Vivek Goyal
2009-11-13 10:46   ` Corrado Zoccolo
2009-11-13 15:18     ` Vivek Goyal
2009-11-13 16:15       ` Vivek Goyal
2009-11-13 18:40         ` Corrado Zoccolo
2009-11-13 19:26           ` Vivek Goyal
2009-11-13 19:38             ` Corrado Zoccolo
2009-11-12 23:32 ` [PATCH 06/16] blkio: Introduce blkio controller cgroup interface Vivek Goyal
2009-11-12 23:32 ` Vivek Goyal [this message]
2009-11-12 23:32 ` [PATCH 08/16] blkio: Group time used accounting and workload context save restore Vivek Goyal
2009-11-12 23:32 ` [PATCH 09/16] blkio: Dynamic cfq group creation based on cgroup tasks belongs to Vivek Goyal
2009-11-12 23:32 ` [PATCH 10/16] blkio: Take care of cgroup deletion and cfq group reference counting Vivek Goyal
2009-11-12 23:32 ` [PATCH 11/16] blkio: Some debugging aids for CFQ Vivek Goyal
2009-11-12 23:32 ` [PATCH 12/16] blkio: Export disk time and sectors used by a group to user space Vivek Goyal
2009-11-12 23:32 ` [PATCH 13/16] blkio: Provide some isolation between groups Vivek Goyal
2009-11-12 23:32 ` [PATCH 14/16] blkio: Idle on a group for some time on rotational media Vivek Goyal
2009-11-13 10:58   ` Corrado Zoccolo
2009-11-13 15:37     ` Vivek Goyal
2009-11-12 23:32 ` [PATCH 15/16] blkio: Drop the reference to queue once the task changes cgroup Vivek Goyal
2009-11-12 23:32 ` [PATCH 16/16] blkio: Propagate cgroup weight updation to cfq groups Vivek Goyal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1258068756-10766-8-git-send-email-vgoyal@redhat.com \
    --to=vgoyal@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=dpshah@google.com \
    --cc=fernando@oss.ntt.co.jp \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=jens.axboe@oracle.com \
    --cc=jmoyer@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=m-ikeda@ds.jp.nec.com \
    --cc=nauman@google.com \
    --cc=riel@redhat.com \
    --cc=righi.andrea@gmail.com \
    --cc=ryov@valinux.co.jp \
    --cc=s-uchida@ap.jp.nec.com \
    --cc=taka@valinux.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox