From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk, vgoyal@redhat.com
Cc: ctalbott@google.com, rni@google.com,
linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 10/11] blkcg: let blkcg core manage per-queue blkg list and counter
Date: Wed, 1 Feb 2012 13:19:15 -0800 [thread overview]
Message-ID: <1328131156-13290-11-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1328131156-13290-1-git-send-email-tj@kernel.org>
With the previous patch to move blkg list heads and counters to
request_queue and blkg, logic to manage them in both policies are
almost identical and can be moved to blkcg core.
This patch moves blkg link logic into blkg_lookup_create(), implements
common blkg unlink code in blkg_destroy(), and updates
blkg_destory_all() so that it's policy specific and can skip root
group. The updated blkg_destroy_all() is now used to both clear queue
for bypassing and elv switching, and release all blkgs on q exit.
This patch introduces a race window where policy [de]registration may
race against queue blkg clearing. This can only be a problem on cfq
unload and shouldn't be a real problem in practice (and we have many
other places where this race already exists). Future patches will
remove these unlikely races.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
---
block/blk-cgroup.c | 72 ++++++++++++++++++++++++++++--------
block/blk-cgroup.h | 15 +++-----
block/blk-throttle.c | 99 +-------------------------------------------------
block/cfq-iosched.c | 98 +++-----------------------------------------------
block/elevator.c | 5 ++-
5 files changed, 71 insertions(+), 218 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0513ed5..a7f9363 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -596,8 +596,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
/* insert */
spin_lock(&blkcg->lock);
swap(blkg, new_blkg);
+
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
- pol->ops.blkio_link_group_fn(q, blkg);
+ list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
+ q->nr_blkgs[plid]++;
+
spin_unlock(&blkcg->lock);
out:
blkg_free(new_blkg);
@@ -646,36 +649,69 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
}
EXPORT_SYMBOL_GPL(blkg_lookup);
-void blkg_destroy_all(struct request_queue *q)
+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
+{
+ struct request_queue *q = blkg->q;
+
+ lockdep_assert_held(q->queue_lock);
+
+ /* Something wrong if we are trying to remove same group twice */
+ WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
+ list_del_init(&blkg->q_node[plid]);
+
+ WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
+ q->nr_blkgs[plid]--;
+
+ /*
+ * Put the reference taken at the time of creation so that when all
+ * queues are gone, group can be destroyed.
+ */
+ blkg_put(blkg);
+}
+
+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
+ bool destroy_root)
{
- struct blkio_policy_type *pol;
+ struct blkio_group *blkg, *n;
while (true) {
bool done = true;
- spin_lock(&blkio_list_lock);
spin_lock_irq(q->queue_lock);
- /*
- * clear_queue_fn() might return with non-empty group list
- * if it raced cgroup removal and lost. cgroup removal is
- * guaranteed to make forward progress and retrying after a
- * while is enough. This ugliness is scheduled to be
- * removed after locking update.
- */
- list_for_each_entry(pol, &blkio_list, list)
- if (!pol->ops.blkio_clear_queue_fn(q))
+ list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
+ q_node[plid]) {
+ /* skip root? */
+ if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+ continue;
+
+ /*
+ * If cgroup removal path got to blk_group first
+ * and removed it from cgroup list, then it will
+ * take care of destroying cfqg also.
+ */
+ if (!blkiocg_del_blkio_group(blkg))
+ blkg_destroy(blkg, plid);
+ else
done = false;
+ }
spin_unlock_irq(q->queue_lock);
- spin_unlock(&blkio_list_lock);
+ /*
+ * Group list may not be empty if we raced cgroup removal
+ * and lost. cgroup removal is guaranteed to make forward
+ * progress and retrying after a while is enough. This
+ * ugliness is scheduled to be removed after locking
+ * update.
+ */
if (done)
break;
msleep(10); /* just some random duration I like */
}
}
+EXPORT_SYMBOL_GPL(blkg_destroy_all);
static void blkg_rcu_free(struct rcu_head *rcu_head)
{
@@ -1538,11 +1574,13 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
* this event.
*/
spin_lock(&blkio_list_lock);
+ spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry(blkiop, &blkio_list, list) {
if (blkiop->plid != blkg->plid)
continue;
- blkiop->ops.blkio_unlink_group_fn(q, blkg);
+ blkg_destroy(blkg, blkiop->plid);
}
+ spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock(&blkio_list_lock);
} while (1);
@@ -1684,12 +1722,14 @@ static void blkcg_bypass_start(void)
__acquires(&all_q_mutex)
{
struct request_queue *q;
+ int i;
mutex_lock(&all_q_mutex);
list_for_each_entry(q, &all_q_list, all_q_node) {
blk_queue_bypass_start(q);
- blkg_destroy_all(q);
+ for (i = 0; i < BLKIO_NR_POLICIES; i++)
+ blkg_destroy_all(q, i, false);
}
}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ae96f19..83ce5fa 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -196,11 +196,6 @@ struct blkio_group {
};
typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_link_group_fn)(struct request_queue *q,
- struct blkio_group *blkg);
-typedef void (blkio_unlink_group_fn)(struct request_queue *q,
- struct blkio_group *blkg);
-typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
struct blkio_group *blkg, unsigned int weight);
typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
@@ -214,9 +209,6 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
struct blkio_policy_ops {
blkio_init_group_fn *blkio_init_group_fn;
- blkio_link_group_fn *blkio_link_group_fn;
- blkio_unlink_group_fn *blkio_unlink_group_fn;
- blkio_clear_queue_fn *blkio_clear_queue_fn;
blkio_update_group_weight_fn *blkio_update_group_weight_fn;
blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -238,7 +230,8 @@ extern void blkcg_exit_queue(struct request_queue *q);
/* Blkio controller policy registration */
extern void blkio_policy_register(struct blkio_policy_type *);
extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern void blkg_destroy_all(struct request_queue *q);
+extern void blkg_destroy_all(struct request_queue *q,
+ enum blkio_policy_id plid, bool destroy_root);
/**
* blkg_to_pdata - get policy private data
@@ -319,7 +312,9 @@ static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
-static inline void blkg_destroy_all(struct request_queue *q) { }
+static inline void blkg_destroy_all(struct request_queue *q,
+ enum blkio_policy_id plid,
+ bool destory_root) { }
static inline void *blkg_to_pdata(struct blkio_group *blkg,
struct blkio_policy_type *pol) { return NULL; }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c15d383..1329412 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -157,14 +157,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
tg->iops[WRITE] = -1;
}
-static void throtl_link_blkio_group(struct request_queue *q,
- struct blkio_group *blkg)
-{
- list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
- &q->blkg_list[BLKIO_POLICY_THROTL]);
- q->nr_blkgs[BLKIO_POLICY_THROTL]++;
-}
-
static struct
throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
{
@@ -813,89 +805,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
}
}
-static void
-throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
-{
- struct blkio_group *blkg = tg_to_blkg(tg);
-
- /* Something wrong if we are trying to remove same group twice */
- WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
-
- list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
-
- /*
- * Put the reference taken at the time of creation so that when all
- * queues are gone, group can be destroyed.
- */
- blkg_put(tg_to_blkg(tg));
- td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
-}
-
-static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
-{
- struct request_queue *q = td->queue;
- struct blkio_group *blkg, *n;
- bool empty = true;
-
- list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
- q_node[BLKIO_POLICY_THROTL]) {
- struct throtl_grp *tg = blkg_to_tg(blkg);
-
- /* skip root? */
- if (!release_root && tg == td->root_tg)
- continue;
-
- /*
- * If cgroup removal path got to blk_group first and removed
- * it from cgroup list, then it will take care of destroying
- * cfqg also.
- */
- if (!blkiocg_del_blkio_group(blkg))
- throtl_destroy_tg(td, tg);
- else
- empty = false;
- }
- return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). @q is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if queue was going away, cgroup deltion
- * path got to it first.
- */
-void throtl_unlink_blkio_group(struct request_queue *q,
- struct blkio_group *blkg)
-{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- throtl_destroy_tg(q->td, blkg_to_tg(blkg));
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static bool throtl_clear_queue(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
-
- /*
- * Clear tgs but leave the root one alone. This is necessary
- * because root_tg is expected to be persistent and safe because
- * blk-throtl can never be disabled while @q is alive. This is a
- * kludge to prepare for unified blkg. This whole function will be
- * removed soon.
- */
- return throtl_release_tgs(q->td, false);
-}
-
static void throtl_update_blkio_group_common(struct throtl_data *td,
struct throtl_grp *tg)
{
@@ -960,9 +869,6 @@ static void throtl_shutdown_wq(struct request_queue *q)
static struct blkio_policy_type blkio_policy_throtl = {
.ops = {
.blkio_init_group_fn = throtl_init_blkio_group,
- .blkio_link_group_fn = throtl_link_blkio_group,
- .blkio_unlink_group_fn = throtl_unlink_blkio_group,
- .blkio_clear_queue_fn = throtl_clear_queue,
.blkio_update_group_read_bps_fn =
throtl_update_blkio_group_read_bps,
.blkio_update_group_write_bps_fn =
@@ -1148,12 +1054,11 @@ void blk_throtl_exit(struct request_queue *q)
throtl_shutdown_wq(q);
- spin_lock_irq(q->queue_lock);
- throtl_release_tgs(td, true);
+ blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
/* If there are other groups */
+ spin_lock_irq(q->queue_lock);
wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
-
spin_unlock_irq(q->queue_lock);
/*
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c13e376..7093309 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1045,14 +1045,6 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
cfqg->needs_update = true;
}
-static void cfq_link_blkio_group(struct request_queue *q,
- struct blkio_group *blkg)
-{
- list_add(&blkg->q_node[BLKIO_POLICY_PROP],
- &q->blkg_list[BLKIO_POLICY_PROP]);
- q->nr_blkgs[BLKIO_POLICY_PROP]++;
-}
-
static void cfq_init_blkio_group(struct blkio_group *blkg)
{
struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1096,84 +1088,6 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
blkg_get(cfqg_to_blkg(cfqg));
}
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
- struct blkio_group *blkg = cfqg_to_blkg(cfqg);
-
- /* Something wrong if we are trying to remove same group twice */
- BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
-
- list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
-
- BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
- cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
-
- /*
- * Put the reference taken at the time of creation so that when all
- * queues are gone, group can be destroyed.
- */
- blkg_put(cfqg_to_blkg(cfqg));
-}
-
-static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
- struct request_queue *q = cfqd->queue;
- struct blkio_group *blkg, *n;
- bool empty = true;
-
- list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
- q_node[BLKIO_POLICY_PROP]) {
- /*
- * If cgroup removal path got to blk_group first and removed
- * it from cgroup list, then it will take care of destroying
- * cfqg also.
- */
- if (!cfq_blkiocg_del_blkio_group(blkg))
- cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
- else
- empty = false;
- }
- return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(struct request_queue *q,
- struct blkio_group *blkg)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static struct elevator_type iosched_cfq;
-
-static bool cfq_clear_queue(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
-
- /* shoot down blkgs iff the current elevator is cfq */
- if (!q->elevator || q->elevator->type != &iosched_cfq)
- return true;
-
- return cfq_release_cfq_groups(q->elevator->elevator_data);
-}
-
#else /* GROUP_IOSCHED */
static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
struct blkio_cgroup *blkcg)
@@ -1186,8 +1100,6 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
cfqq->cfqg = cfqg;
}
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-
#endif /* GROUP_IOSCHED */
/*
@@ -3553,14 +3465,17 @@ static void cfq_exit_queue(struct elevator_queue *e)
__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
cfq_put_async_queues(cfqd);
- cfq_release_cfq_groups(cfqd);
+
+ spin_unlock_irq(q->queue_lock);
+
+ blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
/*
* If there are groups which we could not unlink from blkcg list,
* wait for a rcu period for them to be freed.
*/
+ spin_lock_irq(q->queue_lock);
wait = q->nr_blkgs[BLKIO_POLICY_PROP];
-
spin_unlock_irq(q->queue_lock);
cfq_shutdown_timer_wq(cfqd);
@@ -3799,9 +3714,6 @@ static struct elevator_type iosched_cfq = {
static struct blkio_policy_type blkio_policy_cfq = {
.ops = {
.blkio_init_group_fn = cfq_init_blkio_group,
- .blkio_link_group_fn = cfq_link_blkio_group,
- .blkio_unlink_group_fn = cfq_unlink_blkio_group,
- .blkio_clear_queue_fn = cfq_clear_queue,
.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
},
.plid = BLKIO_POLICY_PROP,
diff --git a/block/elevator.c b/block/elevator.c
index d6116af..4599615 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -923,7 +923,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
struct elevator_queue *old = q->elevator;
bool registered = old->registered;
- int err;
+ int i, err;
/*
* Turn on BYPASS and drain all requests w/ elevator private data.
@@ -942,7 +942,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
ioc_clear_queue(q);
spin_unlock_irq(q->queue_lock);
- blkg_destroy_all(q);
+ for (i = 0; i < BLKIO_NR_POLICIES; i++)
+ blkg_destroy_all(q, i, false);
/* allocate, init and register new elevator */
err = -ENOMEM;
--
1.7.7.3
next prev parent reply other threads:[~2012-02-01 21:21 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-01 21:19 [PATCHSET] blkcg: unify blkgs for different policies Tejun Heo
2012-02-01 21:19 ` [PATCH 01/11] blkcg: let blkio_group point to blkio_cgroup directly Tejun Heo
2012-02-02 20:03 ` Vivek Goyal
2012-02-02 20:33 ` Tejun Heo
2012-02-02 20:55 ` Vivek Goyal
2012-02-01 21:19 ` [PATCH 02/11] block: relocate elevator initialized test from blk_cleanup_queue() to blk_drain_queue() Tejun Heo
2012-02-02 20:20 ` Vivek Goyal
2012-02-02 20:35 ` Tejun Heo
2012-02-02 20:37 ` Vivek Goyal
2012-02-02 20:38 ` Tejun Heo
2012-02-01 21:19 ` [PATCH 03/11] blkcg: add blkcg_{init|drain|exit}_queue() Tejun Heo
2012-02-01 21:19 ` [PATCH 04/11] blkcg: clear all request_queues on blkcg policy [un]registrations Tejun Heo
2012-02-01 21:19 ` [PATCH 05/11] blkcg: let blkcg core handle policy private data allocation Tejun Heo
2012-02-01 21:19 ` [PATCH 06/11] blkcg: move refcnt to blkcg core Tejun Heo
2012-02-02 22:07 ` Vivek Goyal
2012-02-02 22:11 ` Tejun Heo
2012-02-01 21:19 ` [PATCH 07/11] blkcg: make blkg->pd an array and move configuration and stats into it Tejun Heo
2012-02-01 21:19 ` [PATCH 08/11] blkcg: don't use blkg->plid in stat related functions Tejun Heo
2012-02-01 21:19 ` [PATCH 09/11] blkcg: move per-queue blkg list heads and counters to queue and blkg Tejun Heo
2012-02-02 22:47 ` Vivek Goyal
2012-02-02 22:47 ` Tejun Heo
2012-02-01 21:19 ` Tejun Heo [this message]
2012-02-01 21:19 ` [PATCH 11/11] blkcg: unify blkg's for blkcg policies Tejun Heo
2012-02-02 0:37 ` [PATCH UPDATED " Tejun Heo
2012-02-03 19:41 ` Vivek Goyal
2012-02-03 20:59 ` Tejun Heo
2012-02-03 21:44 ` Vivek Goyal
2012-02-03 21:47 ` Tejun Heo
2012-02-03 21:53 ` Vivek Goyal
2012-02-03 22:14 ` Tejun Heo
2012-02-03 22:23 ` Vivek Goyal
2012-02-03 22:28 ` Tejun Heo
2012-02-03 21:06 ` Vivek Goyal
2012-02-03 21:09 ` Tejun Heo
2012-02-03 21:10 ` Tejun Heo
2012-02-14 1:33 ` [PATCH UPDATED2 " Tejun Heo
2012-02-15 17:02 ` Vivek Goyal
2012-02-16 22:42 ` Tejun Heo
2012-02-02 19:29 ` [PATCHSET] blkcg: unify blkgs for different policies Vivek Goyal
2012-02-02 20:36 ` Tejun Heo
2012-02-02 20:43 ` Vivek Goyal
2012-02-02 20:59 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1328131156-13290-11-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=axboe@kernel.dk \
--cc=ctalbott@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=rni@google.com \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.