From: "Satoshi UCHIDA" <s-uchida@ap.jp.nec.com>
To: <linux-kernel@vger.kernel.org>,
<containers@lists.linux-foundation.org>,
<virtualization@lists.linux-foundation.org>,
<jens.axboe@oracle.com>, "'Ryo Tsuruta'" <ryov@valinux.co.jp>,
"'Andrea Righi'" <righi.andrea@gmail.com>, <ngupta@google.com>,
<fernando@oss.ntt.co.jp>, <vtaras@openvz.org>
Cc: "'Andrew Morton'" <akpm@linux-foundation.org>,
"'SUGAWARA Tomoyoshi'" <tom-sugawara@ap.jp.nec.com>,
<menage@google.com>, <balbir@linux.vnet.ibm.com>
Subject: [PATCH][cfq-cgroups][Option 2] Introduce ioprio class for top layer.
Date: Wed, 12 Nov 2008 17:37:59 +0900 [thread overview]
Message-ID: <002701c944a1$f7954c70$e6bfe550$@jp.nec.com> (raw)
In-Reply-To: <000c01c9449e$c5bcdc20$51369460$@jp.nec.com>
>From c13547c5758479116b6dcf10c58d0ef4f058351e Mon Sep 17 00:00:00 2001
From: Satoshi UCHIDA <s-uchida@ap.jp.nec.com>
Date: Fri, 7 Nov 2008 19:21:19 +0900
Subject: [PATCH][cfq-cgroups] Introduce ioprio class for top layer.
This patch introduces iprio class for cfq data control layer.
By applying this patch, controller can also handle the RT/IDLE properties
among groups.
Signed-off-by: Satoshi UCHIDA <s-uchida@ap.jp.nec.com>
---
block/cfq-cgroup.c | 344 +++++++++++++++++++++++++------------------
include/linux/cfq-iosched.h | 1 +
2 files changed, 203 insertions(+), 142 deletions(-)
diff --git a/block/cfq-cgroup.c b/block/cfq-cgroup.c
index bb8cb6f..993a3b6 100644
--- a/block/cfq-cgroup.c
+++ b/block/cfq-cgroup.c
@@ -20,11 +20,24 @@
static const int cfq_cgroup_slice = HZ / 10;
+/*
+ * offset from end of service tree
+ */
+#define CFQ_CGROUP_IDLE_DELAY (HZ / 5)
+
+#define cfq_data_class_idle(cfqd) \
+ ((cfqd)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_data_class_rt(cfqd) \
+ ((cfqd)->ioprio_class == IOPRIO_CLASS_RT)
+
+
+
static struct cfq_ops cfq_cgroup_op;
struct cfq_cgroup {
struct cgroup_subsys_state css;
unsigned int ioprio;
+ unsigned short ioprio_class;
struct rb_root sibling_tree;
unsigned int siblings;
@@ -161,6 +174,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup *cfqc,
cfqc = cgroup_to_cfq_cgroup(get_root_subsys(&cfq_subsys));
cfq_cgroup_sibling_tree_add(cfqc, cfqd);
cfqd->ioprio = cfqc->ioprio;
+ cfqd->ioprio_class = cfqc->ioprio_class;
} else {
struct cfq_data *__cfqd;
__cfqd = __cfq_cgroup_init_queue(cfqd->cfqdd->queue,
@@ -168,7 +182,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup *cfqc,
if (!__cfqd)
return NULL;
cfq_cgroup_sibling_tree_add(cfqc, __cfqd);
- __cfqd->ioprio = cfqc->ioprio;
+ __cfqd->ioprio_class = cfqc->ioprio_class;
}
/* check and create cfq_data for children */
@@ -250,6 +264,7 @@ cfq_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
return ERR_PTR(-ENOMEM);
cfqc->ioprio = 3;
+ cfqc->ioprio = IOPRIO_CLASS_BE;
cfqc->sibling_tree = RB_ROOT;
cfqc->siblings = 0;
@@ -378,7 +393,15 @@ static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd, int add_front)
unsigned long rb_key;
int left;
- if (!add_front) {
+ if (cfq_data_class_idle(cfqd)) {
+ rb_key = CFQ_CGROUP_IDLE_DELAY;
+ parent = rb_last(&cfqdd->service_tree.rb);
+ if (parent && parent != &cfqd->rb_node) {
+ __cfqd = rb_entry(parent, struct cfq_data, rb_node);
+ rb_key += __cfqd->rb_key;
+ } else
+ rb_key += jiffies;
+ } else if (!add_front) {
rb_key = cfq_cgroup_slice_offset(cfqd) + jiffies;
rb_key += cfqd->slice_resid;
cfqd->slice_resid = 0;
@@ -400,7 +423,23 @@ static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd, int add_front)
parent = *p;
__cfqd = rb_entry(parent, struct cfq_data, rb_node);
- if (rb_key < __cfqd->rb_key)
+
+ /*
+ * sort RT cfq_data first, we always want to give
+ * preference to them. IDLE cfq_data goes to the back.
+ * after that, sort on the next service time.
+ */
+ if (cfq_data_class_rt(cfqd) > cfq_data_class_rt(__cfqd))
+ n = &(*p)->rb_left;
+ else if (cfq_data_class_rt(cfqd) < cfq_data_class_rt(__cfqd))
+ n = &(*p)->rb_right;
+ else if (cfq_data_class_idle(cfqd) <
+ cfq_data_class_idle(__cfqd))
+ n = &(*p)->rb_left;
+ else if (cfq_data_class_idle(cfqd) >
+ cfq_data_class_idle(__cfqd))
+ n = &(*p)->rb_right;
+ else if (rb_key < __cfqd->rb_key)
n = &(*p)->rb_left;
else
n = &(*p)->rb_right;
@@ -542,6 +579,14 @@ int cfq_cgroup_dispatch_requests(struct request_queue *q, int force)
if (cfqd)
dispatched = cfq_queue_dispatch_requests(cfqd, force);
+ /*
+ * idle cfq_data always expire after 1 dispatch round.
+ */
+ if (cfqdd->busy_data > 1 && cfq_data_class_idle(cfqd)) {
+ cfqd->slice_end = jiffies + 1;
+ cfq_cgroup_slice_expired(cfqdd, 0);
+ }
+
return dispatched;
}
@@ -699,149 +744,164 @@ param_separate(const char *master, char *valbuf, char *pathbuf, int size)
*pc2 = '\0';
}
-static ssize_t cfq_cgroup_read(struct cgroup *cont, struct cftype *cft,
- struct file *file, char __user *userbuf,
- size_t nbytes, loff_t *ppos)
-{
- struct cfq_cgroup *cfqc;
- char *page;
- ssize_t ret;
- struct rb_node *p;
-
- page = (char *)__get_free_page(GFP_TEMPORARY);
- if (!page)
- return -ENOMEM;
-
- cgroup_lock();
- if (cgroup_is_removed(cont)) {
- cgroup_unlock();
- ret = -ENODEV;
- goto out;
- }
-
- cfqc = cgroup_to_cfq_cgroup(cont);
-
- cgroup_unlock();
-
- /* print priority */
- ret = snprintf(page, PAGE_SIZE, "default priority: %d\n", cfqc->ioprio);
-
- p = rb_first(&cfqc->sibling_tree);
- while (p) {
- struct cfq_data *__cfqd;
-
- __cfqd = rb_entry(p, struct cfq_data, group_node);
-
- ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",
- __cfqd->cfqdd->queue->kobj.parent->name,
- __cfqd->ioprio);
-
- p = rb_next(p);
- }
- ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);
-
-out:
- free_page((unsigned long)page);
- return ret;
+#define READ_FUNCTION(__FUNC, __VAR, __DEF_MSG) \
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft, \
+ struct file *file, char __user *userbuf, \
+ size_t nbytes, loff_t *ppos) \
+{ \
+ struct cfq_cgroup *cfqc; \
+ char *page; \
+ ssize_t ret; \
+ struct rb_node *p; \
+ \
+ page = (char *)__get_free_page(GFP_TEMPORARY); \
+ if (!page) \
+ return -ENOMEM; \
+ \
+ cgroup_lock(); \
+ if (cgroup_is_removed(cont)) { \
+ cgroup_unlock(); \
+ ret = -ENODEV; \
+ goto out; \
+ } \
+ \
+ cfqc = cgroup_to_cfq_cgroup(cont); \
+ \
+ cgroup_unlock(); \
+ \
+ /* print */ \
+ ret = snprintf(page, PAGE_SIZE, "default " __DEF_MSG ": %d\n", \
+ cfqc->__VAR); \
+ \
+ p = rb_first(&cfqc->sibling_tree); \
+ while (p) { \
+ struct cfq_data *__cfqd; \
+ \
+ __cfqd = rb_entry(p, struct cfq_data, group_node); \
+ \
+ ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",\
+ __cfqd->cfqdd->queue->kobj.parent->name, \
+ __cfqd->__VAR); \
+ \
+ p = rb_next(p); \
+ } \
+ \
+ ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);\
+ \
+out: \
+ free_page((unsigned long)page); \
+ return ret; \
}
-
-static ssize_t cfq_cgroup_write(struct cgroup *cont, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
-{
- struct cfq_cgroup *cfqc;
- ssize_t ret;
- long new_prio;
- int err, sn;
- char *buffer = NULL;
- char *valbuf = NULL, *pathbuf = NULL;
- struct rb_node *p;
-
- cgroup_lock();
- if (cgroup_is_removed(cont)) {
- cgroup_unlock();
- ret = -ENODEV;
- goto out;
- }
-
- cfqc = cgroup_to_cfq_cgroup(cont);
- cgroup_unlock();
-
- /* set priority */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (buffer == NULL)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- ret = -EFAULT;
- goto free_buf;
- }
- buffer[nbytes] = 0;
-
- valbuf = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!valbuf) {
- ret = -ENOMEM;
- goto free_buf;
- }
-
- pathbuf = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto free_val;
- }
-
- param_separate(buffer, valbuf, pathbuf, nbytes);
-
- err = strict_strtoul(valbuf, 10, &new_prio);
- if ((err) || ((new_prio < 0) || (new_prio > CFQ_CGROUP_MAX_IOPRIO))) {
- ret = -EINVAL;
- goto free_path;
- }
-
- sn = strlen(pathbuf);
-
- p = rb_first(&cfqc->sibling_tree);
- while (p) {
- struct cfq_data *__cfqd;
- const char *namep;
-
- __cfqd = rb_entry(p, struct cfq_data, group_node);
- namep = __cfqd->cfqdd->queue->kobj.parent->name;
-
- if (sn == 0) {
- __cfqd->ioprio = new_prio;
- } else if ((sn == strlen(namep)) &&
- (strncmp(pathbuf, namep, sn) == 0)) {
- __cfqd->ioprio = new_prio;
- break;
- }
-
- p = rb_next(p);
- }
-
- if ((sn == 0) ||
- ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0)))
- cfqc->ioprio = new_prio;
-
- ret = nbytes;
-
-free_path:
- kfree(pathbuf);
-free_val:
- kfree(valbuf);
-free_buf:
- kfree(buffer);
-out:
- return ret;
+READ_FUNCTION(cfq_cgroup_ioprio_read, ioprio, "priority");
+READ_FUNCTION(cfq_cgroup_ioprio_class_read, ioprio_class, "priority class");
+#undef READ_FUNCTION
+
+#define WRITE_FUNCTION(__FUNC, __VAR, MIN, MAX) \
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft, \
+ struct file *file, const char __user *userbuf, \
+ size_t nbytes, loff_t *ppos) \
+{ \
+ struct cfq_cgroup *cfqc; \
+ ssize_t ret; \
+ long new_val; \
+ int err, sn; \
+ char *buffer = NULL; \
+ char *valbuf = NULL, *pathbuf = NULL; \
+ struct rb_node *p; \
+ \
+ cgroup_lock(); \
+ if (cgroup_is_removed(cont)) { \
+ cgroup_unlock(); \
+ ret = -ENODEV; \
+ goto out; \
+ } \
+ \
+ cfqc = cgroup_to_cfq_cgroup(cont); \
+ cgroup_unlock(); \
+ \
+ /* set */ \
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL); \
+ if (buffer == NULL) \
+ return -ENOMEM; \
+ \
+ if (copy_from_user(buffer, userbuf, nbytes)) { \
+ ret = -EFAULT; \
+ goto free_buf; \
+ } \
+ buffer[nbytes] = 0; \
+ \
+ valbuf = kmalloc(nbytes + 1, GFP_KERNEL); \
+ if (!valbuf) { \
+ ret = -ENOMEM; \
+ goto free_buf; \
+ } \
+ \
+ pathbuf = kmalloc(nbytes + 1, GFP_KERNEL); \
+ if (!pathbuf) { \
+ ret = -ENOMEM; \
+ goto free_val; \
+ } \
+ \
+ param_separate(buffer, valbuf, pathbuf, nbytes); \
+ \
+ err = strict_strtoul(valbuf, 10, &new_val); \
+ if ((err) || ((new_val < (MIN)) || (new_val > (MAX)))) { \
+ ret = -EINVAL; \
+ goto free_path; \
+ } \
+ \
+ sn = strlen(pathbuf); \
+ \
+ p = rb_first(&cfqc->sibling_tree); \
+ while (p) { \
+ struct cfq_data *__cfqd; \
+ const char *namep; \
+ \
+ __cfqd = rb_entry(p, struct cfq_data, group_node); \
+ namep = __cfqd->cfqdd->queue->kobj.parent->name; \
+ \
+ if (sn == 0) { \
+ __cfqd->__VAR = new_val; \
+ } else if ((sn == strlen(namep)) && \
+ (strncmp(pathbuf, namep, sn) == 0)) { \
+ __cfqd->__VAR = new_val; \
+ break; \
+ } \
+ \
+ p = rb_next(p); \
+ } \
+ \
+ if ((sn == 0) || \
+ ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0))) \
+ cfqc->__VAR = new_val; \
+ \
+ ret = nbytes; \
+ \
+free_path: \
+ kfree(pathbuf); \
+free_val: \
+ kfree(valbuf); \
+free_buf: \
+ kfree(buffer); \
+out: \
+ return ret; \
}
+WRITE_FUNCTION(cfq_cgroup_ioprio_write, ioprio, 0, CFQ_CGROUP_MAX_IOPRIO);
+WRITE_FUNCTION(cfq_cgroup_ioprio_class_write, ioprio_class, 0,
+ IOPRIO_CLASS_IDLE);
+#undef WRITE_FUNCTION
+
+#define CFQ_CGROUP_CTYPE_ATTR(_name) \
+ { \
+ .name = (__stringify(_name)), \
+ .read = cfq_cgroup_##_name##_read, \
+ .write = cfq_cgroup_##_name##_write, \
+ }
static struct cftype files[] = {
- {
- .name = "ioprio",
- .read = cfq_cgroup_read,
- .write = cfq_cgroup_write,
- },
+ CFQ_CGROUP_CTYPE_ATTR(ioprio),
+ CFQ_CGROUP_CTYPE_ATTR(ioprio_class),
};
static int cfq_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/include/linux/cfq-iosched.h b/include/linux/cfq-iosched.h
index 920bcb5..ca04ebd 100644
--- a/include/linux/cfq-iosched.h
+++ b/include/linux/cfq-iosched.h
@@ -102,6 +102,7 @@ struct cfq_data {
#ifdef CONFIG_IOSCHED_CFQ_CGROUP
unsigned int ioprio;
+ unsigned short ioprio_class;
/* sibling_tree member for cfq_meta_data */
struct rb_node sib_node;
--
1.5.6.5
next prev parent reply other threads:[~2008-11-12 8:43 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-12 8:15 [PATCH][RFC][12+2][v3] A expanded CFQ scheduler for cgroups Satoshi UCHIDA
2008-11-12 8:15 ` Satoshi UCHIDA
2008-11-12 8:23 ` [PATCH][cfq-cgroups][01/12] Move basic strcture variable to header file Satoshi UCHIDA
2008-11-12 8:23 ` Satoshi UCHIDA
2008-11-12 8:23 ` Satoshi UCHIDA
2008-11-12 8:24 ` [PATCH][cfq-cgroups][02/12] Introduce "cfq_driver_data" structure Satoshi UCHIDA
2008-11-12 8:24 ` Satoshi UCHIDA
2008-11-12 8:24 ` Satoshi UCHIDA
2008-11-12 8:25 ` [PATCH][cfq-cgroups][03/12] Add cgroup file and modify configure files Satoshi UCHIDA
2008-11-12 8:25 ` Satoshi UCHIDA
2008-11-12 8:25 ` Satoshi UCHIDA
2008-11-12 8:26 ` [PATCH][cfq-cgroups][04/12] Register or unregister "cfq-cgroups" module Satoshi UCHIDA
2008-11-12 8:26 ` Satoshi UCHIDA
2008-11-12 8:26 ` Satoshi UCHIDA
2008-11-12 8:26 ` [PATCH][cfq-cgroups][05/12] Introduce cgroups structure with ioprio entry Satoshi UCHIDA
2008-11-12 8:26 ` Satoshi UCHIDA
2008-11-12 8:26 ` Satoshi UCHIDA
2008-11-12 8:27 ` [PATCH][cfq-cgroups][06/12] Add siblings tree control for driver data(cfq_driver_data) Satoshi UCHIDA
2008-11-12 8:27 ` Satoshi UCHIDA
2008-11-12 8:27 ` Satoshi UCHIDA
2008-11-12 8:28 ` [PATCH][cfq-cgroups][07/12] Add sibling tree control for group data(cfq_cgroup) Satoshi UCHIDA
2008-11-12 8:28 ` Satoshi UCHIDA
2008-11-12 8:28 ` Satoshi UCHIDA
2008-11-12 8:29 ` [PATCH][cfq-cgroups][08/12] Interface to new cfq data structure in cfq_cgroup module Satoshi UCHIDA
2008-11-12 8:29 ` Satoshi UCHIDA
2008-11-12 8:29 ` Satoshi UCHIDA
2008-11-12 8:29 ` [PATCH][cfq-cgroups][09/12] Develop service tree control Satoshi UCHIDA
2008-11-12 8:29 ` Satoshi UCHIDA
2008-11-12 8:29 ` Satoshi UCHIDA
2008-11-12 8:30 ` [PATCH][cfq-cgroups][10/12] Introduce request control for two layer Satoshi UCHIDA
2008-11-12 8:30 ` Satoshi UCHIDA
2008-11-12 8:30 ` Satoshi UCHIDA
2008-11-12 8:31 ` [PATCH][cfq-cgroups][11/12] Expand idle slice timer function Satoshi UCHIDA
2008-11-12 8:31 ` Satoshi UCHIDA
2008-11-12 8:31 ` Satoshi UCHIDA
2008-11-12 8:31 ` [PATCH][cfq-cgroups][12/12] Interface for parameter of cfq driver data Satoshi UCHIDA
2008-11-12 8:31 ` Satoshi UCHIDA
2008-11-12 8:31 ` Satoshi UCHIDA
2008-11-12 8:37 ` [PATCH][cfq-cgroups][Option 1] Introduce a think time valid entry Satoshi UCHIDA
2008-11-12 8:37 ` Satoshi UCHIDA
2008-11-12 8:37 ` Satoshi UCHIDA
2008-11-12 8:37 ` [PATCH][cfq-cgroups][Option 2] Introduce ioprio class for top layer Satoshi UCHIDA
2008-11-12 8:37 ` Satoshi UCHIDA
2008-11-12 8:37 ` Satoshi UCHIDA [this message]
2008-11-12 8:57 ` [PATCH][RFC][12+2][v3] A expanded CFQ scheduler for cgroups Peter Zijlstra
2008-11-12 8:57 ` Peter Zijlstra
2008-11-12 9:22 ` Satoshi UCHIDA
2008-11-12 9:22 ` Satoshi UCHIDA
2008-11-12 9:22 ` Satoshi UCHIDA
2008-11-12 8:57 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='002701c944a1$f7954c70$e6bfe550$@jp.nec.com' \
--to=s-uchida@ap.jp.nec.com \
--cc=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=containers@lists.linux-foundation.org \
--cc=fernando@oss.ntt.co.jp \
--cc=jens.axboe@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=menage@google.com \
--cc=ngupta@google.com \
--cc=righi.andrea@gmail.com \
--cc=ryov@valinux.co.jp \
--cc=tom-sugawara@ap.jp.nec.com \
--cc=virtualization@lists.linux-foundation.org \
--cc=vtaras@openvz.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.