From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk, vgoyal@redhat.com
Cc: ctalbott@google.com, rni@google.com,
linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>,
Kay Sievers <kay.sievers@vrfy.org>
Subject: [PATCH 15/36] blkcg: don't allow or retain configuration of missing devices
Date: Tue, 21 Feb 2012 17:46:42 -0800 [thread overview]
Message-ID: <1329875223-5102-16-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1329875223-5102-1-git-send-email-tj@kernel.org>
blkcg is very peculiar in that it allows setting and remembering
configurations for non-existent devices by maintaining separate data
structures for configuration.
This behavior is completely out of the usual norms and outright
confusing; furthermore, it uses dev_t number to match the
configuration to devices, which is unpredictable to begin with and
becomes completely unuseable if EXT_DEVT is fully used.
It is wholely unnecessary - we already have fully functional userland
mechanism to program devices being hotplugged which has full access to
device identification, connection topology and filesystem information.
Add a new struct blkio_group_conf which contains all blkcg
configurations to blkio_group and let blkio_group, which can be
created iff the associated device exists and is removed when the
associated device goes away, carry all configurations.
Note that, after this patch, all newly created blkg's will always have
the default configuration (unlimited for throttling and blkcg's weight
for propio).
This patch makes blkio_policy_node meaningless but doesn't remove it.
The next patch will.
-v2: Updated to retry after short sleep if blkg lookup/creation failed
due to the queue being temporarily bypassed as indicated by
-EBUSY return. Pointed out by Vivek.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
---
block/blk-cgroup.c | 94 ++++++++++++++++++++++++++++++++++++++-----------
block/blk-cgroup.h | 9 +++++
block/blk-throttle.c | 8 ++--
block/cfq-iosched.c | 2 +-
4 files changed, 87 insertions(+), 26 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bc98914..fe8ce14 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -855,9 +855,12 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
}
static int blkio_policy_parse_and_set(char *buf,
- struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
+ struct blkio_policy_node *newpn,
+ enum blkio_policy_id plid, int fileid,
+ struct blkio_cgroup *blkcg)
{
struct gendisk *disk = NULL;
+ struct blkio_group *blkg = NULL;
char *s[4], *p, *major_s = NULL, *minor_s = NULL;
unsigned long major, minor;
int i = 0, ret = -EINVAL;
@@ -903,11 +906,25 @@ static int blkio_policy_parse_and_set(char *buf,
goto out;
/* For rule removal, do not check for device presence. */
- if (temp) {
- disk = get_gendisk(dev, &part);
- if (!disk || part) {
- ret = -ENODEV;
- goto out;
+ disk = get_gendisk(dev, &part);
+
+ if ((!disk || part) && temp) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ rcu_read_lock();
+
+ if (disk && !part) {
+ spin_lock_irq(disk->queue->queue_lock);
+ blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
+ spin_unlock_irq(disk->queue->queue_lock);
+
+ if (IS_ERR(blkg)) {
+ ret = PTR_ERR(blkg);
+ if (ret == -EBUSY)
+ goto out_unlock;
+ blkg = NULL;
}
}
@@ -917,25 +934,46 @@ static int blkio_policy_parse_and_set(char *buf,
case BLKIO_POLICY_PROP:
if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
temp > BLKIO_WEIGHT_MAX)
- goto out;
+ goto out_unlock;
newpn->plid = plid;
newpn->fileid = fileid;
newpn->val.weight = temp;
+ if (blkg)
+ blkg->conf.weight = temp;
break;
case BLKIO_POLICY_THROTL:
switch(fileid) {
case BLKIO_THROTL_read_bps_device:
+ if (blkg)
+ blkg->conf.bps[READ] = temp;
+ newpn->plid = plid;
+ newpn->fileid = fileid;
+ newpn->val.bps = temp;
+ break;
case BLKIO_THROTL_write_bps_device:
+ if (blkg)
+ blkg->conf.bps[WRITE] = temp;
newpn->plid = plid;
newpn->fileid = fileid;
newpn->val.bps = temp;
break;
case BLKIO_THROTL_read_iops_device:
+ if (temp > THROTL_IOPS_MAX)
+ goto out_unlock;
+
+ if (blkg)
+ blkg->conf.iops[READ] = temp;
+ newpn->plid = plid;
+ newpn->fileid = fileid;
+ newpn->val.iops = (unsigned int)temp;
+ break;
case BLKIO_THROTL_write_iops_device:
if (temp > THROTL_IOPS_MAX)
- goto out;
+ goto out_unlock;
+ if (blkg)
+ blkg->conf.iops[WRITE] = temp;
newpn->plid = plid;
newpn->fileid = fileid;
newpn->val.iops = (unsigned int)temp;
@@ -946,8 +984,21 @@ static int blkio_policy_parse_and_set(char *buf,
BUG();
}
ret = 0;
+out_unlock:
+ rcu_read_unlock();
out:
put_disk(disk);
+
+ /*
+ * If queue was bypassing, we should retry. Do so after a short
+ * msleep(). It isn't strictly necessary but queue can be
+ * bypassing for some time and it's always nice to avoid busy
+ * looping.
+ */
+ if (ret == -EBUSY) {
+ msleep(10);
+ return restart_syscall();
+ }
return ret;
}
@@ -1095,26 +1146,29 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, struct blkio_policy_node *pn)
{
- unsigned int weight, iops;
- u64 bps;
+ struct blkio_group_conf *conf = &blkg->conf;
switch(pn->plid) {
case BLKIO_POLICY_PROP:
- weight = pn->val.weight ? pn->val.weight :
- blkcg->weight;
- blkio_update_group_weight(blkg, weight);
+ blkio_update_group_weight(blkg, conf->weight ?: blkcg->weight);
break;
case BLKIO_POLICY_THROTL:
switch(pn->fileid) {
case BLKIO_THROTL_read_bps_device:
+ blkio_update_group_bps(blkg, conf->bps[READ] ?: -1,
+ pn->fileid);
+ break;
case BLKIO_THROTL_write_bps_device:
- bps = pn->val.bps ? pn->val.bps : (-1);
- blkio_update_group_bps(blkg, bps, pn->fileid);
+ blkio_update_group_bps(blkg, conf->bps[WRITE] ?: -1,
+ pn->fileid);
break;
case BLKIO_THROTL_read_iops_device:
+ blkio_update_group_iops(blkg, conf->iops[READ] ?: -1,
+ pn->fileid);
+ break;
case BLKIO_THROTL_write_iops_device:
- iops = pn->val.iops ? pn->val.iops : (-1);
- blkio_update_group_iops(blkg, iops, pn->fileid);
+ blkio_update_group_iops(blkg, conf->iops[WRITE] ?: -1,
+ pn->fileid);
break;
}
break;
@@ -1152,7 +1206,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
int ret = 0;
char *buf;
struct blkio_policy_node *newpn, *pn;
- struct blkio_cgroup *blkcg;
+ struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
int keep_newpn = 0;
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
int fileid = BLKIOFILE_ATTR(cft->private);
@@ -1167,12 +1221,10 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
goto free_buf;
}
- ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
+ ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid, blkcg);
if (ret)
goto free_newpn;
- blkcg = cgroup_to_blkio_cgroup(cgrp);
-
spin_lock_irq(&blkcg->lock);
pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2600ae7..81efe71 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -154,6 +154,12 @@ struct blkio_group_stats_cpu {
struct u64_stats_sync syncp;
};
+struct blkio_group_conf {
+ unsigned int weight;
+ unsigned int iops[2];
+ u64 bps[2];
+};
+
struct blkio_group {
/* Pointer to the associated request_queue, RCU protected */
struct request_queue __rcu *q;
@@ -166,6 +172,9 @@ struct blkio_group {
/* policy which owns this blk group */
enum blkio_policy_id plid;
+ /* Configuration */
+ struct blkio_group_conf conf;
+
/* Need to serialize the stats in the case of reset/update */
spinlock_t stats_lock;
struct blkio_group_stats stats;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 2ae637b..791b107 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -196,10 +196,10 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
bio_list_init(&tg->bio_lists[1]);
tg->limits_changed = false;
- tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
- tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
- tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
- tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
+ tg->bps[READ] = -1;
+ tg->bps[WRITE] = -1;
+ tg->iops[READ] = -1;
+ tg->iops[WRITE] = -1;
/*
* Take the initial reference that will be released on destroy
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index acef564..08d4fdd 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1083,7 +1083,7 @@ static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
return NULL;
cfq_init_cfqg_base(cfqg);
- cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+ cfqg->weight = blkcg->weight;
/*
* Take the initial reference that will be released on destroy
--
1.7.7.3
next prev parent reply other threads:[~2012-02-22 1:47 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-22 1:46 [PATCHSET] blkcg: accumulated blkcg updates Tejun Heo
2012-02-22 1:46 ` [PATCH 01/36] block: blk-throttle should be drained regardless of q->elevator Tejun Heo
2012-02-22 1:46 ` [PATCH 02/36] blkcg: make CONFIG_BLK_CGROUP bool Tejun Heo
2012-02-22 1:46 ` [PATCH 03/36] cfq: don't register propio policy if !CONFIG_CFQ_GROUP_IOSCHED Tejun Heo
2012-02-22 1:46 ` [PATCH 04/36] elevator: clear auxiliary data earlier during elevator switch Tejun Heo
2012-02-22 1:46 ` [PATCH 05/36] elevator: make elevator_init_fn() return 0/-errno Tejun Heo
2012-02-22 1:46 ` [PATCH 06/36] block: implement blk_queue_bypass_start/end() Tejun Heo
2012-02-22 1:46 ` [PATCH 07/36] block: extend queue bypassing to cover blkcg policies Tejun Heo
2012-02-22 1:46 ` [PATCH 08/36] blkcg: shoot down blkio_groups on elevator switch Tejun Heo
2012-02-22 1:46 ` [PATCH 09/36] blkcg: move rcu_read_lock() outside of blkio_group get functions Tejun Heo
2012-02-22 1:46 ` [PATCH 10/36] blkcg: update blkg get functions take blkio_cgroup as parameter Tejun Heo
2012-02-22 1:46 ` [PATCH 11/36] blkcg: use q and plid instead of opaque void * for blkio_group association Tejun Heo
2012-02-22 1:46 ` [PATCH 12/36] blkcg: add blkio_policy[] array and allow one policy per policy ID Tejun Heo
2012-02-22 1:46 ` [PATCH 13/36] blkcg: use the usual get blkg path for root blkio_group Tejun Heo
2012-02-22 1:46 ` [PATCH 14/36] blkcg: factor out blkio_group creation Tejun Heo
2012-02-22 1:46 ` Tejun Heo [this message]
2012-02-22 1:46 ` [PATCH 16/36] blkcg: kill blkio_policy_node Tejun Heo
2012-02-22 1:46 ` [PATCH 17/36] blkcg: kill the mind-bending blkg->dev Tejun Heo
2012-02-22 1:46 ` [PATCH 18/36] blkcg: let blkio_group point to blkio_cgroup directly Tejun Heo
2012-02-22 1:46 ` [PATCH 19/36] blkcg: add blkcg_{init|drain|exit}_queue() Tejun Heo
2012-02-22 1:46 ` [PATCH 20/36] blkcg: clear all request_queues on blkcg policy [un]registrations Tejun Heo
2012-02-22 1:46 ` [PATCH 21/36] blkcg: let blkcg core handle policy private data allocation Tejun Heo
2012-02-22 1:46 ` [PATCH 22/36] blkcg: move refcnt to blkcg core Tejun Heo
2012-02-22 1:46 ` [PATCH 23/36] blkcg: make blkg->pd an array and move configuration and stats into it Tejun Heo
2012-02-22 1:46 ` [PATCH 24/36] blkcg: don't use blkg->plid in stat related functions Tejun Heo
2012-02-22 1:46 ` [PATCH 25/36] blkcg: move per-queue blkg list heads and counters to queue and blkg Tejun Heo
2012-02-22 1:46 ` [PATCH 26/36] blkcg: let blkcg core manage per-queue blkg list and counter Tejun Heo
2012-02-22 1:46 ` [PATCH 27/36] blkcg: unify blkg's for blkcg policies Tejun Heo
2012-03-05 21:01 ` [PATCH UPDATED " Tejun Heo
2012-02-22 1:46 ` [PATCH 28/36] blkcg: use double locking instead of RCU for blkg synchronization Tejun Heo
2012-02-22 1:46 ` [PATCH 29/36] blkcg: drop unnecessary RCU locking Tejun Heo
2012-02-23 18:51 ` [PATCH UPDATED " Tejun Heo
2012-02-22 1:46 ` [PATCH 30/36] block: restructure get_request() Tejun Heo
2012-02-22 1:46 ` [PATCH 31/36] block: interface update for ioc/icq creation functions Tejun Heo
2012-02-22 1:46 ` [PATCH 32/36] block: ioc_task_link() can't fail Tejun Heo
2012-02-22 1:47 ` [PATCH 33/36] block: add io_context->active_ref Tejun Heo
2012-02-22 18:47 ` Vivek Goyal
2012-02-22 19:13 ` Tejun Heo
2012-02-23 18:20 ` Vivek Goyal
2012-02-22 1:47 ` [PATCH 34/36] block: implement bio_associate_current() Tejun Heo
2012-02-22 13:45 ` Jeff Moyer
2012-02-22 19:07 ` Tejun Heo
2012-02-22 19:33 ` Jeff Moyer
2012-02-22 19:37 ` Vivek Goyal
2012-02-22 19:41 ` Jeff Moyer
2012-02-22 1:47 ` [PATCH 35/36] block: make block cgroup policies follow bio task association Tejun Heo
2012-02-22 1:47 ` [PATCH 36/36] block: make blk-throttle preserve the issuing task on delayed bios Tejun Heo
2012-02-22 19:34 ` [PATCHSET] blkcg: accumulated blkcg updates Vivek Goyal
2012-02-22 22:04 ` Tejun Heo
2012-03-05 20:59 ` [PATCH 17.5] blkcg: skip blkg printing if q isn't associated with disk Tejun Heo
2012-03-05 21:07 ` [PATCHSET] blkcg: accumulated blkcg updates Tejun Heo
2012-03-05 21:08 ` Tejun Heo
2012-03-06 15:07 ` Vivek Goyal
2012-03-06 16:24 ` Vivek Goyal
2012-03-06 18:39 ` Vivek Goyal
2012-03-06 19:02 ` Vivek Goyal
2012-03-08 0:06 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1329875223-5102-16-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=axboe@kernel.dk \
--cc=ctalbott@google.com \
--cc=kay.sievers@vrfy.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rni@google.com \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).