The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: "Satoshi UCHIDA" <s-uchida@ap.jp.nec.com>
To: <linux-kernel@vger.kernel.org>,
	<containers@lists.linux-foundation.org>,
	<virtualization@lists.linux-foundation.org>,
	<jens.axboe@oracle.com>, "'Ryo Tsuruta'" <ryov@valinux.co.jp>,
	"'Andrea Righi'" <righi.andrea@gmail.com>, <ngupta@google.com>,
	<fernando@oss.ntt.co.jp>, <vtaras@openvz.org>
Cc: "'Andrew Morton'" <akpm@linux-foundation.org>,
	"'SUGAWARA Tomoyoshi'" <tom-sugawara@ap.jp.nec.com>,
	<menage@google.com>, <balbir@linux.vnet.ibm.com>
Subject: [PATCH][cfq-cgroups][Option 2] Introduce ioprio class for top layer.
Date: Wed, 12 Nov 2008 17:37:59 +0900	[thread overview]
Message-ID: <002701c944a1$f7954c70$e6bfe550$@jp.nec.com> (raw)
In-Reply-To: <000c01c9449e$c5bcdc20$51369460$@jp.nec.com>

>From c13547c5758479116b6dcf10c58d0ef4f058351e Mon Sep 17 00:00:00 2001
From: Satoshi UCHIDA <s-uchida@ap.jp.nec.com>
Date: Fri, 7 Nov 2008 19:21:19 +0900
Subject: [PATCH][cfq-cgroups] Introduce ioprio class for top layer.

  This patch introduces iprio class for cfq data control layer.
  By applying this patch, controller can also handle the RT/IDLE properties
  among groups.

    Signed-off-by: Satoshi UCHIDA <s-uchida@ap.jp.nec.com>

---
 block/cfq-cgroup.c          |  344 +++++++++++++++++++++++++------------------
 include/linux/cfq-iosched.h |    1 +
 2 files changed, 203 insertions(+), 142 deletions(-)

diff --git a/block/cfq-cgroup.c b/block/cfq-cgroup.c
index bb8cb6f..993a3b6 100644
--- a/block/cfq-cgroup.c
+++ b/block/cfq-cgroup.c
@@ -20,11 +20,24 @@
 
 static const int cfq_cgroup_slice = HZ / 10;
 
+/*
+ * offset from end of service tree
+ */
+#define CFQ_CGROUP_IDLE_DELAY		(HZ / 5)
+
+#define cfq_data_class_idle(cfqd)				\
+		((cfqd)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_data_class_rt(cfqd)					\
+		((cfqd)->ioprio_class == IOPRIO_CLASS_RT)
+
+
+
 static struct cfq_ops cfq_cgroup_op;
 
 struct cfq_cgroup {
 	struct cgroup_subsys_state css;
 	unsigned int ioprio;
+	unsigned short ioprio_class;
 
 	struct rb_root sibling_tree;
 	unsigned int siblings;
@@ -161,6 +174,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup *cfqc,
 		cfqc = cgroup_to_cfq_cgroup(get_root_subsys(&cfq_subsys));
 		cfq_cgroup_sibling_tree_add(cfqc, cfqd);
 		cfqd->ioprio = cfqc->ioprio;
+		cfqd->ioprio_class = cfqc->ioprio_class;
 	} else {
 		struct cfq_data *__cfqd;
 		__cfqd = __cfq_cgroup_init_queue(cfqd->cfqdd->queue,
@@ -168,7 +182,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup *cfqc,
 		if (!__cfqd)
 			return NULL;
 		cfq_cgroup_sibling_tree_add(cfqc, __cfqd);
-		__cfqd->ioprio = cfqc->ioprio;
+		__cfqd->ioprio_class = cfqc->ioprio_class;
 	}
 
 	/* check and create cfq_data for children */
@@ -250,6 +264,7 @@ cfq_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 		return ERR_PTR(-ENOMEM);
 
 	cfqc->ioprio = 3;
+	cfqc->ioprio = IOPRIO_CLASS_BE;
 
 	cfqc->sibling_tree = RB_ROOT;
 	cfqc->siblings = 0;
@@ -378,7 +393,15 @@ static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd, int add_front)
 	unsigned long rb_key;
 	int left;
 
-	if (!add_front) {
+	if (cfq_data_class_idle(cfqd)) {
+		rb_key = CFQ_CGROUP_IDLE_DELAY;
+		parent = rb_last(&cfqdd->service_tree.rb);
+		if (parent && parent != &cfqd->rb_node) {
+			__cfqd = rb_entry(parent, struct cfq_data, rb_node);
+			rb_key += __cfqd->rb_key;
+		} else
+			rb_key += jiffies;
+	} else if (!add_front) {
 		rb_key = cfq_cgroup_slice_offset(cfqd) + jiffies;
 		rb_key += cfqd->slice_resid;
 		cfqd->slice_resid = 0;
@@ -400,7 +423,23 @@ static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd, int add_front)
 		parent = *p;
 		__cfqd = rb_entry(parent, struct cfq_data, rb_node);
 
-		if (rb_key < __cfqd->rb_key)
+
+		/*
+		 * sort RT cfq_data first, we always want to give
+		 * preference to them. IDLE cfq_data goes to the back.
+		 * after that, sort on the next service time.
+		 */
+		if (cfq_data_class_rt(cfqd) > cfq_data_class_rt(__cfqd))
+			n = &(*p)->rb_left;
+		else if (cfq_data_class_rt(cfqd) < cfq_data_class_rt(__cfqd))
+			n = &(*p)->rb_right;
+		else if (cfq_data_class_idle(cfqd) <
+					cfq_data_class_idle(__cfqd))
+			n = &(*p)->rb_left;
+		else if (cfq_data_class_idle(cfqd) >
+					cfq_data_class_idle(__cfqd))
+			n = &(*p)->rb_right;
+		else if (rb_key < __cfqd->rb_key)
 			n = &(*p)->rb_left;
 		else
 			n = &(*p)->rb_right;
@@ -542,6 +579,14 @@ int cfq_cgroup_dispatch_requests(struct request_queue *q, int force)
 	if (cfqd)
 		dispatched = cfq_queue_dispatch_requests(cfqd, force);
 
+	/*
+	 * idle cfq_data always expire after 1 dispatch round.
+	 */
+	if (cfqdd->busy_data > 1 && cfq_data_class_idle(cfqd)) {
+		cfqd->slice_end = jiffies + 1;
+		cfq_cgroup_slice_expired(cfqdd, 0);
+	}
+
 	return dispatched;
 }
 
@@ -699,149 +744,164 @@ param_separate(const char *master, char *valbuf, char *pathbuf,  int size)
 	*pc2 = '\0';
 }
 
-static ssize_t cfq_cgroup_read(struct cgroup *cont, struct cftype *cft,
-			       struct file *file, char __user *userbuf,
-			       size_t nbytes, loff_t *ppos)
-{
-	struct cfq_cgroup *cfqc;
-	char *page;
-	ssize_t ret;
-	struct rb_node *p;
-
-	page = (char *)__get_free_page(GFP_TEMPORARY);
-	if (!page)
-		return -ENOMEM;
-
-	cgroup_lock();
-	if (cgroup_is_removed(cont)) {
-		cgroup_unlock();
-		ret = -ENODEV;
-		goto out;
-	}
-
-	cfqc = cgroup_to_cfq_cgroup(cont);
-
-	cgroup_unlock();
-
-	/* print priority */
-	ret = snprintf(page, PAGE_SIZE, "default priority: %d\n", cfqc->ioprio);
-
-	p = rb_first(&cfqc->sibling_tree);
-	while (p) {
-		struct cfq_data *__cfqd;
-
-		__cfqd = rb_entry(p, struct cfq_data, group_node);
-
-		ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",
-			       __cfqd->cfqdd->queue->kobj.parent->name,
-			       __cfqd->ioprio);
-
-		p = rb_next(p);
-	}
 
-	ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);
-
-out:
-	free_page((unsigned long)page);
-	return ret;
+#define READ_FUNCTION(__FUNC, __VAR, __DEF_MSG)				\
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft,		\
+			struct file *file, char __user *userbuf,	\
+			size_t nbytes, loff_t *ppos)			\
+{									\
+	struct cfq_cgroup *cfqc;					\
+	char *page;							\
+	ssize_t ret;							\
+	struct rb_node *p;						\
+									\
+	page = (char *)__get_free_page(GFP_TEMPORARY);			\
+	if (!page)							\
+		return -ENOMEM;						\
+									\
+	cgroup_lock();							\
+	if (cgroup_is_removed(cont)) {					\
+		cgroup_unlock();					\
+		ret = -ENODEV;						\
+		goto out;						\
+	}								\
+									\
+	cfqc = cgroup_to_cfq_cgroup(cont);				\
+									\
+	cgroup_unlock();						\
+									\
+	/* print */							\
+	ret = snprintf(page, PAGE_SIZE, "default " __DEF_MSG ": %d\n",	\
+			cfqc->__VAR);					\
+									\
+	p = rb_first(&cfqc->sibling_tree);				\
+	while (p) {							\
+		struct cfq_data *__cfqd;				\
+									\
+		__cfqd = rb_entry(p, struct cfq_data, group_node);	\
+									\
+		ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",\
+			       __cfqd->cfqdd->queue->kobj.parent->name,	\
+			       __cfqd->__VAR);				\
+									\
+		p = rb_next(p);						\
+	}								\
+									\
+	ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);\
+									\
+out:									\
+	free_page((unsigned long)page);					\
+	return ret;							\
 }
-
-static ssize_t cfq_cgroup_write(struct cgroup *cont, struct cftype *cft,
-				struct file *file, const char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
-{
-	struct cfq_cgroup *cfqc;
-	ssize_t ret;
-	long new_prio;
-	int err, sn;
-	char *buffer = NULL;
-	char *valbuf = NULL, *pathbuf = NULL;
-	struct rb_node *p;
-
-	cgroup_lock();
-	if (cgroup_is_removed(cont)) {
-		cgroup_unlock();
-		ret = -ENODEV;
-		goto out;
-	}
-
-	cfqc = cgroup_to_cfq_cgroup(cont);
-	cgroup_unlock();
-
-	/* set priority */
-	buffer = kmalloc(nbytes + 1, GFP_KERNEL);
-	if (buffer == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(buffer, userbuf, nbytes)) {
-		ret = -EFAULT;
-		goto free_buf;
-	}
-	buffer[nbytes] = 0;
-
-	valbuf = kmalloc(nbytes + 1, GFP_KERNEL);
-	if (!valbuf) {
-		ret = -ENOMEM;
-		goto free_buf;
-	}
-
-	pathbuf = kmalloc(nbytes + 1, GFP_KERNEL);
-	if (!pathbuf) {
-		ret = -ENOMEM;
-		goto free_val;
-	}
-
-	param_separate(buffer, valbuf, pathbuf, nbytes);
-
-	err = strict_strtoul(valbuf, 10, &new_prio);
-	if ((err) || ((new_prio < 0) || (new_prio > CFQ_CGROUP_MAX_IOPRIO))) {
-		ret = -EINVAL;
-		goto free_path;
-	}
-
-	sn = strlen(pathbuf);
-
-	p = rb_first(&cfqc->sibling_tree);
-	while (p) {
-		struct cfq_data *__cfqd;
-		const char *namep;
-
-		__cfqd = rb_entry(p, struct cfq_data, group_node);
-		namep = __cfqd->cfqdd->queue->kobj.parent->name;
-
-		if (sn == 0) {
-			__cfqd->ioprio = new_prio;
-		} else if ((sn == strlen(namep)) &&
-			 (strncmp(pathbuf, namep, sn) == 0)) {
-			__cfqd->ioprio = new_prio;
-			break;
-		}
-
-		p = rb_next(p);
-	}
-
-	if ((sn == 0) ||
-	    ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0)))
-		cfqc->ioprio = new_prio;
-
-	ret = nbytes;
-
-free_path:
-	kfree(pathbuf);
-free_val:
-	kfree(valbuf);
-free_buf:
-	kfree(buffer);
-out:
-	return ret;
+READ_FUNCTION(cfq_cgroup_ioprio_read, ioprio, "priority");
+READ_FUNCTION(cfq_cgroup_ioprio_class_read, ioprio_class, "priority class");
+#undef READ_FUNCTION
+
+#define WRITE_FUNCTION(__FUNC, __VAR, MIN, MAX)				\
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft,		\
+			struct file *file, const char __user *userbuf,	\
+			size_t nbytes, loff_t *ppos)			\
+{									\
+	struct cfq_cgroup *cfqc;					\
+	ssize_t ret;							\
+	long new_val;							\
+	int err, sn;							\
+	char *buffer = NULL;						\
+	char *valbuf = NULL, *pathbuf = NULL;				\
+	struct rb_node *p;						\
+									\
+	cgroup_lock();							\
+	if (cgroup_is_removed(cont)) {					\
+		cgroup_unlock();					\
+		ret = -ENODEV;						\
+		goto out;						\
+	}								\
+									\
+	cfqc = cgroup_to_cfq_cgroup(cont);				\
+	cgroup_unlock();						\
+									\
+	/* set */							\
+	buffer = kmalloc(nbytes + 1, GFP_KERNEL);			\
+	if (buffer == NULL)						\
+		return -ENOMEM;						\
+									\
+	if (copy_from_user(buffer, userbuf, nbytes)) {			\
+		ret = -EFAULT;						\
+		goto free_buf;						\
+	}								\
+	buffer[nbytes] = 0;						\
+									\
+	valbuf = kmalloc(nbytes + 1, GFP_KERNEL);			\
+	if (!valbuf) {							\
+		ret = -ENOMEM;						\
+		goto free_buf;						\
+	}								\
+									\
+	pathbuf = kmalloc(nbytes + 1, GFP_KERNEL);			\
+	if (!pathbuf) {							\
+		ret = -ENOMEM;						\
+		goto free_val;						\
+	}								\
+									\
+	param_separate(buffer, valbuf, pathbuf, nbytes);		\
+									\
+	err = strict_strtoul(valbuf, 10, &new_val);			\
+	if ((err) || ((new_val < (MIN)) || (new_val > (MAX)))) {	\
+		ret = -EINVAL;						\
+		goto free_path;						\
+	}								\
+									\
+	sn = strlen(pathbuf);						\
+									\
+	p = rb_first(&cfqc->sibling_tree);				\
+	while (p) {							\
+		struct cfq_data *__cfqd;				\
+		const char *namep;					\
+									\
+		__cfqd = rb_entry(p, struct cfq_data, group_node);	\
+		namep = __cfqd->cfqdd->queue->kobj.parent->name;	\
+									\
+		if (sn == 0) {						\
+			__cfqd->__VAR = new_val;			\
+		} else if ((sn == strlen(namep)) &&			\
+			 (strncmp(pathbuf, namep, sn) == 0)) {		\
+			__cfqd->__VAR = new_val;			\
+			break;						\
+		}							\
+									\
+		p = rb_next(p);						\
+	}								\
+									\
+	if ((sn == 0) ||						\
+	    ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0)))	\
+		cfqc->__VAR = new_val;					\
+									\
+	ret = nbytes;							\
+									\
+free_path:								\
+	kfree(pathbuf);							\
+free_val:								\
+	kfree(valbuf);							\
+free_buf:								\
+	kfree(buffer);							\
+out:									\
+	return ret;							\
 }
+WRITE_FUNCTION(cfq_cgroup_ioprio_write, ioprio, 0, CFQ_CGROUP_MAX_IOPRIO);
+WRITE_FUNCTION(cfq_cgroup_ioprio_class_write, ioprio_class, 0,
+							IOPRIO_CLASS_IDLE);
+#undef WRITE_FUNCTION
+
+#define CFQ_CGROUP_CTYPE_ATTR(_name)			\
+	{						\
+		.name = (__stringify(_name)),		\
+		.read = cfq_cgroup_##_name##_read,	\
+		.write = cfq_cgroup_##_name##_write,	\
+	}
 
 static struct cftype files[] = {
-	{
-		.name = "ioprio",
-		.read = cfq_cgroup_read,
-		.write = cfq_cgroup_write,
-	},
+	CFQ_CGROUP_CTYPE_ATTR(ioprio),
+	CFQ_CGROUP_CTYPE_ATTR(ioprio_class),
 };
 
 static int cfq_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/include/linux/cfq-iosched.h b/include/linux/cfq-iosched.h
index 920bcb5..ca04ebd 100644
--- a/include/linux/cfq-iosched.h
+++ b/include/linux/cfq-iosched.h
@@ -102,6 +102,7 @@ struct cfq_data {
 
 #ifdef CONFIG_IOSCHED_CFQ_CGROUP
 	unsigned int ioprio;
+	unsigned short ioprio_class;
 
 	/* sibling_tree member for cfq_meta_data */
 	struct rb_node sib_node;
-- 
1.5.6.5



  parent reply	other threads:[~2008-11-12  8:43 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-12  8:15 [PATCH][RFC][12+2][v3] A expanded CFQ scheduler for cgroups Satoshi UCHIDA
2008-11-12  8:23 ` [PATCH][cfq-cgroups][01/12] Move basic strcture variable to header file Satoshi UCHIDA
2008-11-12  8:24 ` [PATCH][cfq-cgroups][02/12] Introduce "cfq_driver_data" structure Satoshi UCHIDA
2008-11-12  8:25 ` [PATCH][cfq-cgroups][03/12] Add cgroup file and modify configure files Satoshi UCHIDA
2008-11-12  8:26 ` [PATCH][cfq-cgroups][04/12] Register or unregister "cfq-cgroups" module Satoshi UCHIDA
2008-11-12  8:26 ` [PATCH][cfq-cgroups][05/12] Introduce cgroups structure with ioprio entry Satoshi UCHIDA
2008-11-12  8:27 ` [PATCH][cfq-cgroups][06/12] Add siblings tree control for driver data(cfq_driver_data) Satoshi UCHIDA
2008-11-12  8:28 ` [PATCH][cfq-cgroups][07/12] Add sibling tree control for group data(cfq_cgroup) Satoshi UCHIDA
2008-11-12  8:29 ` [PATCH][cfq-cgroups][08/12] Interface to new cfq data structure in cfq_cgroup module Satoshi UCHIDA
2008-11-12  8:29 ` [PATCH][cfq-cgroups][09/12] Develop service tree control Satoshi UCHIDA
2008-11-12  8:30 ` [PATCH][cfq-cgroups][10/12] Introduce request control for two layer Satoshi UCHIDA
2008-11-12  8:31 ` [PATCH][cfq-cgroups][11/12] Expand idle slice timer function Satoshi UCHIDA
2008-11-12  8:31 ` [PATCH][cfq-cgroups][12/12] Interface for parameter of cfq driver data Satoshi UCHIDA
2008-11-12  8:37 ` [PATCH][cfq-cgroups][Option 1] Introduce a think time valid entry Satoshi UCHIDA
2008-11-12  8:37 ` Satoshi UCHIDA [this message]
2008-11-12  8:57 ` [PATCH][RFC][12+2][v3] A expanded CFQ scheduler for cgroups Peter Zijlstra
2008-11-12  9:22   ` Satoshi UCHIDA

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='002701c944a1$f7954c70$e6bfe550$@jp.nec.com' \
    --to=s-uchida@ap.jp.nec.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=fernando@oss.ntt.co.jp \
    --cc=jens.axboe@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=ngupta@google.com \
    --cc=righi.andrea@gmail.com \
    --cc=ryov@valinux.co.jp \
    --cc=tom-sugawara@ap.jp.nec.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=vtaras@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox