From: Dennis Zhou <dennisszhou@gmail.com>
To: Jens Axboe <axboe@kernel.dk>, Tejun Heo <tj@kernel.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Josef Bacik <josef@toxicpanda.com>
Cc: kernel-team@fb.com, linux-block@vger.kernel.org,
cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
"Dennis Zhou (Facebook)" <dennisszhou@gmail.com>,
Jiufei Xue <jiufei.xue@linux.alibaba.com>,
Joseph Qi <joseph.qi@linux.alibaba.com>
Subject: [PATCH 01/15] Revert "blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()"
Date: Thu, 30 Aug 2018 21:53:42 -0400 [thread overview]
Message-ID: <20180831015356.69796-2-dennisszhou@gmail.com> (raw)
In-Reply-To: <20180831015356.69796-1-dennisszhou@gmail.com>
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
This reverts commit 4c6994806f708559c2812b73501406e21ae5dcd0.
Destroying blkgs is tricky because of the nature of the relationship. A
blkg should go away when either a blkcg or a request_queue goes away.
However, blkg's pin the blkcg to ensure they remain valid. To break this
cycle, when a blkcg is offlined, blkgs put back their css ref. This
eventually lets css_free() get called which frees the blkcg.
The above commit (4c6994806f70) breaks this order of events by trying to
destroy blkgs in css_free(). As the blkgs still hold references to the
blkcg, css_free() is never called.
The race between blkcg_bio_issue_check() and cgroup_rmdir() will be
addressed in the following patch by delaying destruction of a blkg until
all writeback associated with the blkcg has been finished.
Fixes: 4c6994806f70 ("blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()")
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
---
block/blk-cgroup.c | 78 ++++++++------------------------------
include/linux/blk-cgroup.h | 1 -
2 files changed, 16 insertions(+), 63 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 694595b29b8f..2998e4f095d1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
}
}
-static void blkg_pd_offline(struct blkcg_gq *blkg)
-{
- int i;
-
- lockdep_assert_held(blkg->q->queue_lock);
- lockdep_assert_held(&blkg->blkcg->lock);
-
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
-
- if (blkg->pd[i] && !blkg->pd[i]->offline &&
- pol->pd_offline_fn) {
- pol->pd_offline_fn(blkg->pd[i]);
- blkg->pd[i]->offline = true;
- }
- }
-}
-
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
struct blkcg_gq *parent = blkg->parent;
+ int i;
lockdep_assert_held(blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
@@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
WARN_ON_ONCE(list_empty(&blkg->q_node));
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+
+ if (blkg->pd[i] && pol->pd_offline_fn)
+ pol->pd_offline_fn(blkg->pd[i]);
+ }
+
if (parent) {
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
struct blkcg *blkcg = blkg->blkcg;
spin_lock(&blkcg->lock);
- blkg_pd_offline(blkg);
blkg_destroy(blkg);
spin_unlock(&blkcg->lock);
}
@@ -1058,54 +1047,21 @@ static struct cftype blkcg_legacy_files[] = {
* @css: css of interest
*
* This function is called when @css is about to go away and responsible
- * for offlining all blkgs pd and killing all wbs associated with @css.
- * blkgs pd offline should be done while holding both q and blkcg locks.
- * As blkcg lock is nested inside q lock, this function performs reverse
- * double lock dancing.
+ * for shooting down all blkgs associated with @css. blkgs should be
+ * removed while holding both q and blkcg locks. As blkcg lock is nested
+ * inside q lock, this function performs reverse double lock dancing.
*
* This is the blkcg counterpart of ioc_release_fn().
*/
static void blkcg_css_offline(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg_gq *blkg;
spin_lock_irq(&blkcg->lock);
- hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
- struct request_queue *q = blkg->q;
-
- if (spin_trylock(q->queue_lock)) {
- blkg_pd_offline(blkg);
- spin_unlock(q->queue_lock);
- } else {
- spin_unlock_irq(&blkcg->lock);
- cpu_relax();
- spin_lock_irq(&blkcg->lock);
- }
- }
-
- spin_unlock_irq(&blkcg->lock);
-
- wb_blkcg_offline(blkcg);
-}
-
-/**
- * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
- * @blkcg: blkcg of interest
- *
- * This function is called when blkcg css is about to free and responsible for
- * destroying all blkgs associated with @blkcg.
- * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
- * is nested inside q lock, this function performs reverse double lock dancing.
- */
-static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
-{
- spin_lock_irq(&blkcg->lock);
while (!hlist_empty(&blkcg->blkg_list)) {
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
- struct blkcg_gq,
- blkcg_node);
+ struct blkcg_gq, blkcg_node);
struct request_queue *q = blkg->q;
if (spin_trylock(q->queue_lock)) {
@@ -1117,7 +1073,10 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
spin_lock_irq(&blkcg->lock);
}
}
+
spin_unlock_irq(&blkcg->lock);
+
+ wb_blkcg_offline(blkcg);
}
static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1125,8 +1084,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
struct blkcg *blkcg = css_to_blkcg(css);
int i;
- blkcg_destroy_all_blkgs(blkcg);
-
mutex_lock(&blkcg_pol_mutex);
list_del(&blkcg->all_blkcgs_node);
@@ -1480,11 +1437,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
list_for_each_entry(blkg, &q->blkg_list, q_node) {
if (blkg->pd[pol->plid]) {
- if (!blkg->pd[pol->plid]->offline &&
- pol->pd_offline_fn) {
+ if (pol->pd_offline_fn)
pol->pd_offline_fn(blkg->pd[pol->plid]);
- blkg->pd[pol->plid]->offline = true;
- }
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 34aec30e06c7..1615cdd4c797 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -89,7 +89,6 @@ struct blkg_policy_data {
/* the blkg and policy id this per-policy data belongs to */
struct blkcg_gq *blkg;
int plid;
- bool offline;
};
/*
--
2.17.1
next prev parent reply other threads:[~2018-08-31 1:53 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-31 1:53 [PATCH 00/15] blkcg ref count refactor/cleanup + blkcg avg_lat Dennis Zhou
2018-08-31 1:53 ` Dennis Zhou [this message]
2018-08-31 1:53 ` [PATCH 02/15] blkcg: delay blkg destruction until after writeback has finished Dennis Zhou
2018-08-31 11:09 ` kbuild test robot
2018-08-31 15:27 ` Josef Bacik
2018-08-31 20:19 ` Dennis Zhou
2018-08-31 1:53 ` [PATCH 03/15] blkcg: use tryget logic when associating a blkg with a bio Dennis Zhou
2018-08-31 15:30 ` Josef Bacik
2018-08-31 20:20 ` Dennis Zhou
2018-08-31 1:53 ` [PATCH 04/15] blkcg: fix ref count issue with bio_blkcg using task_css Dennis Zhou
2018-08-31 15:35 ` Josef Bacik
2018-08-31 23:04 ` Tejun Heo
2018-09-06 15:21 ` Dennis Zhou
2018-08-31 1:53 ` [PATCH 05/15] blkcg: update blkg_lookup_create to do locking Dennis Zhou
2018-08-31 15:37 ` Josef Bacik
2018-08-31 23:09 ` Tejun Heo
2018-08-31 1:53 ` [PATCH 06/15] blkcg: always associate a bio with a blkg Dennis Zhou
2018-08-31 9:01 ` kbuild test robot
2018-08-31 10:02 ` kbuild test robot
2018-08-31 23:16 ` Tejun Heo
2018-09-06 20:41 ` Dennis Zhou
2018-09-07 3:03 ` [LKP] [blkcg] c02c58dab2: WARNING:at_block/blk-throttle.c:#blk_throtl_bio kernel test robot
2018-09-07 3:03 ` kernel test robot
2018-08-31 1:53 ` [PATCH 07/15] blkcg: consolidate bio_issue_init and blkg association Dennis Zhou
2018-08-31 9:19 ` kbuild test robot
2018-08-31 11:11 ` kbuild test robot
2018-08-31 15:42 ` Josef Bacik
2018-09-06 20:43 ` Dennis Zhou
2018-08-31 23:45 ` Tejun Heo
2018-08-31 1:53 ` [PATCH 08/15] blkcg: associate a blkg for pages being evicted by swap Dennis Zhou
2018-08-31 15:44 ` Josef Bacik
2018-08-31 23:47 ` Tejun Heo
2018-08-31 1:53 ` [PATCH 09/15] blkcg: associate writeback bios with a blkg Dennis Zhou
2018-08-31 15:45 ` Josef Bacik
2018-08-31 23:53 ` Tejun Heo
2018-08-31 1:53 ` [PATCH 10/15] blkcg: remove bio->bi_css and instead use bio->bi_blkg Dennis Zhou
2018-08-31 15:46 ` Josef Bacik
2018-09-01 0:13 ` Tejun Heo
2018-08-31 1:53 ` [PATCH 11/15] blkcg: remove additional reference to the css Dennis Zhou
2018-09-01 0:26 ` Tejun Heo
2018-09-06 20:45 ` Dennis Zhou
2018-08-31 1:53 ` [PATCH 12/15] blkcg: cleanup and make blk_get_rl use blkg_lookup_create Dennis Zhou
2018-09-01 0:29 ` Tejun Heo
2018-09-11 2:37 ` [LKP] [blkcg] 22f657e287: general_protection_fault:#[##] kernel test robot
2018-09-11 2:37 ` kernel test robot
2018-08-31 1:53 ` [PATCH 13/15] blkcg: change blkg reference counting to use percpu_ref Dennis Zhou
2018-08-31 15:49 ` Josef Bacik
2018-09-01 0:31 ` Tejun Heo
2018-09-06 20:46 ` Dennis Zhou
2018-09-07 3:08 ` [LKP] [blkcg] 6ef69a3a0b: WARNING:suspicious_RCU_usage kernel test robot
2018-09-07 3:08 ` kernel test robot
2018-08-31 1:53 ` [PATCH 14/15] blkcg: rename blkg_try_get to blkg_tryget Dennis Zhou
2018-08-31 15:50 ` Josef Bacik
2018-09-01 0:32 ` Tejun Heo
2018-08-31 1:53 ` [PATCH 15/15] blkcg: add average latency tracking to blk-cgroup Dennis Zhou
2018-08-31 10:22 ` kbuild test robot
2018-08-31 11:38 ` kbuild test robot
2018-09-01 0:35 ` [PATCH 00/15] blkcg ref count refactor/cleanup + blkcg avg_lat Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180831015356.69796-2-dennisszhou@gmail.com \
--to=dennisszhou@gmail.com \
--cc=axboe@kernel.dk \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=jiufei.xue@linux.alibaba.com \
--cc=josef@toxicpanda.com \
--cc=joseph.qi@linux.alibaba.com \
--cc=kernel-team@fb.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.