From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
To: Andrea Righi <righi.andrea@gmail.com>,
Ryo Tsuruta <ryov@valinux.co.jp>,
Hirokazu Takahashi <taka@valinux.co.jp>
Cc: menage@google.com, containers@lists.linux-foundation.org,
linux-kernel@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [PATCH 7/7] let io-throttle support using bio-cgroup id
Date: Thu, 20 Nov 2008 19:15:40 +0800 [thread overview]
Message-ID: <492546DC.20505@cn.fujitsu.com> (raw)
In-Reply-To: <4925445C.10302@cn.fujitsu.com>
This patch makes io throttle support bio-cgroup id.
With this patch, you don't have to mount io-throttle and
bio-cgroup together. It's more gentle to other subsystems
who also want to use bio-cgroup.
Signed-of-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
block/blk-core.c | 4 +-
block/blk-io-throttle.c | 324 ++++++++++++++++++++++++++++++++++++++-
include/linux/biotrack.h | 2 +
include/linux/blk-io-throttle.h | 5 +-
mm/biotrack.c | 11 ++
5 files changed, 339 insertions(+), 7 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index e187476..da3c8af 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1537,8 +1537,8 @@ void submit_bio(int rw, struct bio *bio)
if (bio_has_data(bio)) {
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
- cgroup_io_throttle(bio_iovec_idx(bio, 0)->bv_page,
- bio->bi_bdev, bio->bi_size, 0);
+ cgroup_io_throttle(bio, bio->bi_bdev,
+ bio->bi_size, 0);
} else {
task_io_account_read(bio->bi_size);
count_vm_events(PGPGIN, count);
diff --git a/block/blk-io-throttle.c b/block/blk-io-throttle.c
index e6a0a03..77f58a6 100644
--- a/block/blk-io-throttle.c
+++ b/block/blk-io-throttle.c
@@ -32,6 +32,9 @@
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/blk-io-throttle.h>
+#include <linux/biotrack.h>
+#include <linux/sched.h>
+#include <linux/bio.h>
/*
* Statistics for I/O bandwidth controller.
@@ -126,6 +129,13 @@ struct iothrottle_node {
struct iothrottle_stat stat;
};
+/* A list of iothrottle which associate with a bio_cgroup */
+static LIST_HEAD(bio_group_list);
+static DECLARE_MUTEX(bio_group_list_sem);
+
+enum {
+ MOVING_FORBIDDEN,
+};
/**
* struct iothrottle - throttling rules for a cgroup
* @css: pointer to the cgroup state
@@ -139,9 +149,125 @@ struct iothrottle_node {
struct iothrottle {
struct cgroup_subsys_state css;
struct list_head list;
+ struct list_head bio_node;
+ int bio_id;
+ unsigned long flags;
};
static struct iothrottle init_iothrottle;
+static inline int is_bind_biocgroup(void)
+{
+ if (init_iothrottle.css.cgroup->subsys[bio_cgroup_subsys_id])
+ return 1;
+
+ return 0;
+}
+
+static inline int is_moving_forbidden(const struct iothrottle *iot)
+{
+ return test_bit(MOVING_FORBIDDEN, &iot->flags);
+}
+
+
+static struct iothrottle *bioid_to_iothrottle(int id)
+{
+ struct iothrottle *iot;
+
+ down(&bio_group_list_sem);
+ list_for_each_entry(iot, &bio_group_list, bio_node) {
+ if (iot->bio_id == id) {
+ up(&bio_group_list_sem);
+ return iot;
+ }
+ }
+ up(&bio_group_list_sem);
+ return NULL;
+}
+
+static int is_bio_group(struct iothrottle *iot)
+{
+ if (iot && iot->bio_id > 0)
+ return 0;
+
+ return -1;
+}
+
+static int synchronize_bio_cgroup(int old_id, int new_id,
+ struct task_struct *tsk)
+{
+ struct iothrottle *old_group, *new_group;
+ int ret = 0;
+
+ old_group = bioid_to_iothrottle(old_id);
+ new_group = bioid_to_iothrottle(new_id);
+
+ /* no need hold cgroup_lock(), for bio_cgroup holding it already*/
+ get_task_struct(tsk);
+
+ /* This has nothing to do with us! */
+ if (is_bio_group(old_group) && is_bio_group(new_group)) {
+ goto out;
+ }
+
+ /* if moving from an associated one to an unassociated one,
+ just moving it to root
+ */
+ if (!is_bio_group(old_group) && is_bio_group(new_group)) {
+ BUG_ON(is_moving_forbidden(&init_iothrottle));
+ clear_bit(MOVING_FORBIDDEN, &old_group->flags);
+ ret = cgroup_attach_task(init_iothrottle.css.cgroup, tsk);
+ set_bit(MOVING_FORBIDDEN, &old_group->flags);
+ goto out;
+ }
+
+ if (!is_bio_group(new_group) && is_bio_group(old_group)) {
+ BUG_ON(!is_moving_forbidden(new_group));
+ clear_bit(MOVING_FORBIDDEN, &new_group->flags);
+ ret = cgroup_attach_task(new_group->css.cgroup, tsk);
+ set_bit(MOVING_FORBIDDEN, &new_group->flags);
+ goto out;
+ }
+
+ if (!is_bio_group(new_group) && !is_bio_group(old_group)) {
+ BUG_ON(!is_moving_forbidden(new_group));
+ clear_bit(MOVING_FORBIDDEN, &new_group->flags);
+ clear_bit(MOVING_FORBIDDEN, &old_group->flags);
+ ret = cgroup_attach_task(new_group->css.cgroup, tsk);
+ set_bit(MOVING_FORBIDDEN, &old_group->flags);
+ set_bit(MOVING_FORBIDDEN, &new_group->flags);
+ goto out;
+ }
+
+
+ out:
+ put_task_struct(tsk);
+ return ret;
+}
+
+static int iothrottle_notifier_call(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct tsk_move_msg *tmm;
+ int old_id, new_id;
+ struct task_struct *tsk;
+
+ if (is_bind_biocgroup())
+ return NOTIFY_OK;
+
+ tmm = (struct tsk_move_msg *)ptr;
+ old_id = tmm->old_id;
+ new_id = tmm->new_id;
+ tsk = tmm->tsk;
+ synchronize_bio_cgroup(old_id, new_id, tsk);
+
+ return NOTIFY_OK;
+}
+
+
+static struct notifier_block iothrottle_notifier = {
+ .notifier_call = iothrottle_notifier_call,
+};
+
static inline struct iothrottle *cgroup_to_iothrottle(struct cgroup *cgrp)
{
return container_of(cgroup_subsys_state(cgrp, iothrottle_subsys_id),
@@ -209,14 +335,20 @@ iothrottle_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct iothrottle *iot;
- if (unlikely((cgrp->parent) == NULL))
+ if (unlikely((cgrp->parent) == NULL)) {
iot = &init_iothrottle;
+ /* where should we release?*/
+ register_biocgroup_notifier(&iothrottle_notifier);
+ }
else {
iot = kmalloc(sizeof(*iot), GFP_KERNEL);
if (unlikely(!iot))
return ERR_PTR(-ENOMEM);
}
INIT_LIST_HEAD(&iot->list);
+ INIT_LIST_HEAD(&iot->bio_node);
+ iot->bio_id = -1;
+ clear_bit(MOVING_FORBIDDEN, &iot->flags);
return &iot->css;
}
@@ -229,6 +361,9 @@ static void iothrottle_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
struct iothrottle_node *n, *p;
struct iothrottle *iot = cgroup_to_iothrottle(cgrp);
+ if (unlikely((cgrp->parent) == NULL))
+ unregister_biocgroup_notifier(&iothrottle_notifier);
+
/*
* don't worry about locking here, at this point there must be not any
* reference to the list.
@@ -523,6 +658,138 @@ out1:
return ret;
}
+s64 read_bio_id(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct iothrottle *iot;
+
+ iot = cgroup_to_iothrottle(cgrp);
+ return iot->bio_id;
+}
+
+int write_bio_id(struct cgroup *cgrp, struct cftype *cft, s64 val)
+{
+ int id, i, count;
+ struct cgroup *bio_cgroup;
+ struct cgroup_iter it;
+ struct iothrottle *iot, *pos;
+ struct task_struct **tasks;
+
+ if (is_bind_biocgroup())
+ return -EPERM;
+
+ iot = cgroup_to_iothrottle(cgrp);
+
+ /* no more operation if it's a root */
+ if (!cgrp->parent)
+ return 0;
+
+ id = val;
+
+ /* de-associate from a bio-cgroup*/
+ if (id < 0) {
+ if (is_bio_group(iot)) {
+ return 0;
+ }
+
+ read_lock(&tasklist_lock);
+ count = cgroup_task_count(cgrp);
+ if (!count) {
+ ;
+ } else {
+ tasks = (struct task_struct **)kmalloc(count * sizeof(*tasks),
+ GFP_KERNEL);
+ if (unlikely(!tasks)) {
+ read_unlock(&tasklist_lock);
+ return -ENOMEM;
+ }
+ i = 0;
+ cgroup_iter_start(cgrp, &it);
+ while ((tasks[i] = cgroup_iter_next(cgrp, &it))) {
+ get_task_struct(tasks[i]);
+ i++;
+ }
+ cgroup_iter_end(cgrp, &it);
+
+ clear_bit(MOVING_FORBIDDEN, &iot->flags);
+ cgroup_lock();
+ for (i = 0; i < count; i++) {
+ cgroup_attach_task(init_iothrottle.css.cgroup, tasks[i]);
+ put_task_struct(tasks[i]);
+ }
+ cgroup_unlock();
+ kfree(tasks);
+ }
+
+ read_unlock(&tasklist_lock);
+ down(&bio_group_list_sem);
+ list_del_init(&iot->bio_node);
+ up(&bio_group_list_sem);
+
+ iot->bio_id = -1;
+ return 0;
+ }
+
+ if (cgroup_task_count(cgrp))
+ return -EPERM;
+
+ bio_cgroup = bio_id_to_cgroup(id);
+ if (bio_cgroup) {
+ /*
+ Go through the bio_group_list, if don't exist, put it
+ into this list.
+ */
+ down(&bio_group_list_sem);
+ list_for_each_entry(pos, &bio_group_list, bio_node) {
+ if (pos->bio_id == id) {
+ up(&bio_group_list_sem);
+ return -EEXIST;
+ }
+ }
+ up(&bio_group_list_sem);
+
+ read_lock(&tasklist_lock);
+ count = cgroup_task_count(bio_cgroup);
+ if (count) {
+ tasks = (struct task_struct **)kmalloc(count * sizeof(*tasks),
+ GFP_KERNEL);
+ if (unlikely(!tasks)) {
+ read_unlock(&tasklist_lock);
+ return -ENOMEM;
+ }
+ } else
+ goto no_tasks;
+
+ i = 0;
+
+ /* synchronize tasks with bio_cgroup */
+ cgroup_iter_start(bio_cgroup, &it);
+ while ((tasks[i] = cgroup_iter_next(bio_cgroup, &it))) {
+ get_task_struct(tasks[i]);
+ i++;
+ }
+ cgroup_iter_end(bio_cgroup, &it);
+
+ cgroup_lock();
+ for (i = 0; i < count; i++) {
+ cgroup_attach_task(cgrp, tasks[i]);
+ put_task_struct(tasks[i]);
+ }
+ cgroup_unlock();
+
+ kfree(tasks);
+ no_tasks:
+ read_unlock(&tasklist_lock);
+ down(&bio_group_list_sem);
+ list_add(&iot->bio_node, &bio_group_list);
+ up(&bio_group_list_sem);
+
+ iot->bio_id = id;
+ set_bit(MOVING_FORBIDDEN, &iot->flags);
+ }
+
+ return 0;
+}
+
static struct cftype files[] = {
{
.name = "bandwidth-max",
@@ -548,6 +815,11 @@ static struct cftype files[] = {
.read_seq_string = iothrottle_read,
.private = IOTHROTTLE_STAT,
},
+ {
+ .name = "bio_id",
+ .write_s64 = write_bio_id,
+ .read_s64 = read_bio_id,
+ }
};
static int iothrottle_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -555,11 +827,41 @@ static int iothrottle_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files));
}
+static int iothrottle_can_attach(struct cgroup_subsys *ss,
+ struct cgroup *cont, struct task_struct *tsk)
+{
+ struct iothrottle *new_iot, *old_iot;
+
+ new_iot = cgroup_to_iothrottle(cont);
+ old_iot = task_to_iothrottle(tsk);
+
+ if (!is_moving_forbidden(new_iot) && !is_moving_forbidden(old_iot))
+ return 0;
+ else
+ return -EPERM;
+}
+
+static int iothrottle_subsys_depend(struct cgroup_subsys *ss,
+ unsigned long subsys_bits)
+{
+ unsigned long allow_subsys_bits;
+
+ allow_subsys_bits = 0;
+ allow_subsys_bits |= 1ul << bio_cgroup_subsys_id;
+ allow_subsys_bits |= 1ul << iothrottle_subsys_id;
+
+ if (subsys_bits & ~allow_subsys_bits)
+ return -1;
+ return 0;
+}
+
struct cgroup_subsys iothrottle_subsys = {
.name = "blockio",
.create = iothrottle_create,
.destroy = iothrottle_destroy,
.populate = iothrottle_populate,
+ .can_attach = iothrottle_can_attach,
+ .subsys_depend = iothrottle_subsys_depend,
.subsys_id = iothrottle_subsys_id,
.early_init = 1,
};
@@ -681,13 +983,15 @@ static inline int is_kthread_io(void)
* timeout.
**/
unsigned long long
-cgroup_io_throttle(struct page *page, struct block_device *bdev,
+cgroup_io_throttle(struct bio *bio, struct block_device *bdev,
ssize_t bytes, int can_sleep)
{
struct iothrottle *iot;
struct iothrottle_sleep s = {};
unsigned long long sleep;
+ struct page *page;
+ iot = NULL;
if (unlikely(!bdev))
return 0;
BUG_ON(!bdev->bd_inode || !bdev->bd_disk);
@@ -710,7 +1014,21 @@ cgroup_io_throttle(struct page *page, struct block_device *bdev,
(irqs_disabled() || in_interrupt() || in_atomic()));
/* check if we need to throttle */
- iot = get_iothrottle_from_page(page);
+
+ if (bio) {
+ page = bio_iovec_idx(bio, 0)->bv_page;
+ iot = get_iothrottle_from_page(page);
+ }
+ if (!iot) {
+ int id;
+
+ if (bio) {
+ id = get_bio_cgroup_id(bio);
+ iot = bioid_to_iothrottle(id);
+ }
+ if (iot)
+ css_get(&iot->css);
+ }
rcu_read_lock();
if (!iot) {
iot = task_to_iothrottle(current);
diff --git a/include/linux/biotrack.h b/include/linux/biotrack.h
index 546017c..e3957af 100644
--- a/include/linux/biotrack.h
+++ b/include/linux/biotrack.h
@@ -26,12 +26,14 @@ struct bio_cgroup {
/* struct radix_tree_root io_context_root; per device io_context */
};
+
static inline void __init_bio_page_cgroup(struct page_cgroup *pc)
{
pc->bio_cgroup_id = 0;
}
extern struct cgroup *get_cgroup_from_page(struct page *page);
extern void put_cgroup_from_page(struct page *page);
+extern struct cgroup *bio_id_to_cgroup(int id);
static inline int bio_cgroup_disabled(void)
{
diff --git a/include/linux/blk-io-throttle.h b/include/linux/blk-io-throttle.h
index a241758..9ef414e 100644
--- a/include/linux/blk-io-throttle.h
+++ b/include/linux/blk-io-throttle.h
@@ -14,8 +14,9 @@
#define IOTHROTTLE_STAT 3
#ifdef CONFIG_CGROUP_IO_THROTTLE
+
extern unsigned long long
-cgroup_io_throttle(struct page *page, struct block_device *bdev,
+cgroup_io_throttle(struct bio *bio, struct block_device *bdev,
ssize_t bytes, int can_sleep);
static inline void set_in_aio(void)
@@ -58,7 +59,7 @@ get_io_throttle_sleep(struct task_struct *t, int type)
}
#else
static inline unsigned long long
-cgroup_io_throttle(struct page *page, struct block_device *bdev,
+cgroup_io_throttle(struct bio *bio, struct block_device *bdev,
ssize_t bytes, int can_sleep)
{
return 0;
diff --git a/mm/biotrack.c b/mm/biotrack.c
index 979efcd..e3d9ad7 100644
--- a/mm/biotrack.c
+++ b/mm/biotrack.c
@@ -229,6 +229,17 @@ static struct bio_cgroup *find_bio_cgroup(int id)
return biog;
}
+struct cgroup *bio_id_to_cgroup(int id)
+{
+ struct bio_cgroup *biog;
+
+ biog = find_bio_cgroup(id);
+ if (biog)
+ return biog->css.cgroup;
+
+ return NULL;
+}
+
struct cgroup *get_cgroup_from_page(struct page *page)
{
struct page_cgroup *pc;
-- 1.5.4.rc3
prev parent reply other threads:[~2008-11-20 11:18 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-20 11:05 [PATCH 0/7] introduce bio-cgroup into io-throttle Gui Jianfeng
2008-11-20 11:08 ` [PATCH 1/7] porting bio-cgroup to 2.6.28-rc2-mm1 Gui Jianfeng
2008-11-20 11:09 ` [PATCH 2/7] Porting io-throttle v11 " Gui Jianfeng
2008-11-20 11:11 ` [PATCH 3/7] Introduction for new feature Gui Jianfeng
2008-11-20 11:12 ` [PATCH 4/7] enables bio-cgroup in io-throttle, have to mount together Gui Jianfeng
2008-11-20 11:14 ` [PATCH 5/7] announce tasks moving in bio-cgroup Gui Jianfeng
2008-11-20 11:14 ` [PATCH 6/7] support checking of subsystem dependencies Gui Jianfeng
2008-11-20 11:15 ` Gui Jianfeng [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=492546DC.20505@cn.fujitsu.com \
--to=guijianfeng@cn.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=containers@lists.linux-foundation.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=menage@google.com \
--cc=righi.andrea@gmail.com \
--cc=ryov@valinux.co.jp \
--cc=taka@valinux.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox