From: Luis Chamberlain <mcgrof@kernel.org>
To: axboe@kernel.dk, viro@zeniv.linux.org.uk, bvanassche@acm.org,
gregkh@linuxfoundation.org, rostedt@goodmis.org,
mingo@redhat.com, jack@suse.cz, ming.lei@redhat.com,
nstange@suse.de, akpm@linux-foundation.org
Cc: mhocko@suse.com, yukuai3@huawei.com, martin.petersen@oracle.com,
jejb@linux.ibm.com, linux-block@vger.kernel.org,
linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Luis Chamberlain <mcgrof@kernel.org>
Subject: [PATCH v7 8/8] block: create the request_queue debugfs_dir on registration
Date: Fri, 19 Jun 2020 20:47:30 +0000 [thread overview]
Message-ID: <20200619204730.26124-9-mcgrof@kernel.org> (raw)
In-Reply-To: <20200619204730.26124-1-mcgrof@kernel.org>
We were only creating the request_queue debugfs_dir only
for make_request block drivers (multiqueue), but never for
request-based block drivers. We did this as we were only
creating non-blktrace additional debugfs files on that directory
for make_request drivers. However, since blktrace *always* creates
that directory anyway, we special-case the use of that directory
on blktrace. Other than this being an eye-sore, this exposes
request-based block drivers to the same debugfs fragile
race that used to exist with make_request block drivers
where if we start adding files onto that directory we can later
run a race with a double removal of dentries on the directory
if we don't deal with this carefully on blktrace.
Instead, just simplify things by always creating the request_queue
debugfs_dir on request_queue registration. Rename the mutex also to
reflect the fact that this is used outside of the blktrace context.
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
block/blk-core.c | 8 +-----
block/blk-mq-debugfs.c | 5 ----
block/blk-sysfs.c | 9 +++++++
block/blk.h | 2 --
include/linux/blkdev.h | 5 ++--
kernel/trace/blktrace.c | 58 ++++++++++++++++++-----------------------
6 files changed, 39 insertions(+), 48 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index a5126c0be777..72b102a389a5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -51,9 +51,7 @@
#include "blk-pm.h"
#include "blk-rq-qos.h"
-#ifdef CONFIG_DEBUG_FS
struct dentry *blk_debugfs_root;
-#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -555,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
kobject_init(&q->kobj, &blk_queue_ktype);
-#ifdef CONFIG_BLK_DEV_IO_TRACE
- mutex_init(&q->blk_trace_mutex);
-#endif
+ mutex_init(&q->debugfs_mutex);
mutex_init(&q->sysfs_lock);
mutex_init(&q->sysfs_dir_lock);
spin_lock_init(&q->queue_lock);
@@ -1937,9 +1933,7 @@ int __init blk_dev_init(void)
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
-#ifdef CONFIG_DEBUG_FS
blk_debugfs_root = debugfs_create_dir("block", NULL);
-#endif
return 0;
}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 15df3a36e9fa..a2800bc56fb4 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -824,9 +824,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
int i;
- q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
- blk_debugfs_root);
-
debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
/*
@@ -857,9 +854,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
void blk_mq_debugfs_unregister(struct request_queue *q)
{
- debugfs_remove_recursive(q->debugfs_dir);
q->sched_debugfs_dir = NULL;
- q->debugfs_dir = NULL;
}
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 561624d4cc4e..be67952e7be2 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
#include <linux/blktrace_api.h>
#include <linux/blk-mq.h>
#include <linux/blk-cgroup.h>
+#include <linux/debugfs.h>
#include "blk.h"
#include "blk-mq.h"
@@ -917,6 +918,9 @@ static void blk_release_queue(struct kobject *kobj)
blk_mq_release(q);
blk_trace_shutdown(q);
+ mutex_lock(&q->debugfs_mutex);
+ debugfs_remove_recursive(q->debugfs_dir);
+ mutex_unlock(&q->debugfs_mutex);
if (queue_is_mq(q))
blk_mq_debugfs_unregister(q);
@@ -989,6 +993,11 @@ int blk_register_queue(struct gendisk *disk)
goto unlock;
}
+ mutex_lock(&q->debugfs_mutex);
+ q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
+ blk_debugfs_root);
+ mutex_unlock(&q->debugfs_mutex);
+
if (queue_is_mq(q)) {
__blk_mq_register_dev(dev, q);
blk_mq_debugfs_register(q);
diff --git a/block/blk.h b/block/blk.h
index b5d1f0fc6547..499308c6ab3b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -14,9 +14,7 @@
/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT (5 * HZ)
-#ifdef CONFIG_DEBUG_FS
extern struct dentry *blk_debugfs_root;
-#endif
struct blk_flush_queue {
unsigned int flush_pending_idx:1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 780d6fa94746..e70e03d2cd8c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -528,9 +528,9 @@ struct request_queue {
unsigned int sg_timeout;
unsigned int sg_reserved_size;
int node;
+ struct mutex debugfs_mutex;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace __rcu *blk_trace;
- struct mutex blk_trace_mutex;
#endif
/*
* for flush operations
@@ -574,8 +574,9 @@ struct request_queue {
struct list_head tag_set_list;
struct bio_set bio_split;
-#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *debugfs_dir;
+
+#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *sched_debugfs_dir;
struct dentry *rqos_debugfs_dir;
#endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 098780ec018f..12e1d52f1d17 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -348,7 +348,7 @@ static int __blk_trace_remove(struct request_queue *q)
struct blk_trace *bt;
bt = rcu_replace_pointer(q->blk_trace, NULL,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (!bt)
return -EINVAL;
@@ -362,9 +362,9 @@ int blk_trace_remove(struct request_queue *q)
{
int ret;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_remove(q);
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -483,14 +483,11 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct dentry *dir = NULL;
int ret;
- lockdep_assert_held(&q->blk_trace_mutex);
+ lockdep_assert_held(&q->debugfs_mutex);
if (!buts->buf_size || !buts->buf_nr)
return -EINVAL;
- if (!blk_debugfs_root)
- return -ENOENT;
-
strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
@@ -505,7 +502,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
* we can be.
*/
if (rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex))) {
+ lockdep_is_held(&q->debugfs_mutex))) {
pr_warn("Concurrent blktraces are not allowed on %s\n",
buts->name);
return -EBUSY;
@@ -524,18 +521,15 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->msg_data)
goto err;
-#ifdef CONFIG_BLK_DEBUG_FS
/*
- * When tracing whole make_request drivers (multiqueue) block devices,
- * reuse the existing debugfs directory created by the block layer on
- * init. For request-based block devices, all partitions block devices,
+ * When tracing the whole disk reuse the existing debugfs directory
+ * created by the block layer on init. For partitions block devices,
* and scsi-generic block devices we create a temporary new debugfs
* directory that will be removed once the trace ends.
*/
- if (queue_is_mq(q) && bdev && bdev == bdev->bd_contains)
+ if (bdev && bdev == bdev->bd_contains)
dir = q->debugfs_dir;
else
-#endif
bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
/*
@@ -617,9 +611,9 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
{
int ret;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_setup(q, name, dev, bdev, arg);
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -665,7 +659,7 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (bt == NULL)
return -EINVAL;
@@ -705,9 +699,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_startstop(q, start);
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -736,7 +730,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
if (!q)
return -ENXIO;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
switch (cmd) {
case BLKTRACESETUP:
@@ -763,7 +757,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
break;
}
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -774,14 +768,14 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
**/
void blk_trace_shutdown(struct request_queue *q)
{
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
if (rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex))) {
+ lockdep_is_held(&q->debugfs_mutex))) {
__blk_trace_startstop(q, 0);
__blk_trace_remove(q);
}
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
}
#ifdef CONFIG_BLK_CGROUP
@@ -1662,7 +1656,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
struct blk_trace *bt;
bt = rcu_replace_pointer(q->blk_trace, NULL,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (bt == NULL)
return -EINVAL;
@@ -1837,10 +1831,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (q == NULL)
goto out_bdput;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (attr == &dev_attr_enable) {
ret = sprintf(buf, "%u\n", !!bt);
goto out_unlock_bdev;
@@ -1858,7 +1852,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", bt->end_lba);
out_unlock_bdev:
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
out_bdput:
bdput(bdev);
out:
@@ -1901,10 +1895,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (q == NULL)
goto out_bdput;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (attr == &dev_attr_enable) {
if (!!value == !!bt) {
ret = 0;
@@ -1921,7 +1915,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (bt == NULL) {
ret = blk_trace_setup_queue(q, bdev);
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
}
if (ret == 0) {
@@ -1936,7 +1930,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
}
out_unlock_bdev:
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
out_bdput:
bdput(bdev);
out:
--
2.26.2
next prev parent reply other threads:[~2020-06-19 20:47 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-19 20:47 [PATCH v7 0/8] blktrace: fix debugfs use after free Luis Chamberlain
2020-06-19 20:47 ` [PATCH v7 1/8] block: add docs for gendisk / request_queue refcount helpers Luis Chamberlain
2020-06-19 20:47 ` [PATCH v7 2/8] block: clarify context for refcount increment helpers Luis Chamberlain
2020-06-19 20:47 ` [PATCH v7 3/8] block: revert back to synchronous request_queue removal Luis Chamberlain
2020-06-19 20:47 ` [PATCH v7 4/8] blktrace: annotate required lock on do_blk_trace_setup() Luis Chamberlain
2020-06-20 17:02 ` Bart Van Assche
2020-06-19 20:47 ` [PATCH v7 5/8] loop: be paranoid on exit and prevent new additions / removals Luis Chamberlain
2020-06-20 17:11 ` Bart Van Assche
2020-06-22 12:27 ` Luis Chamberlain
2020-06-23 2:16 ` Bart Van Assche
2020-06-23 16:56 ` Luis Chamberlain
2020-06-23 17:05 ` Johannes Thumshirn
2020-06-24 12:08 ` Luis Chamberlain
2020-06-19 20:47 ` [PATCH v7 6/8] blktrace: fix debugfs use after free Luis Chamberlain
2020-06-20 7:29 ` Christoph Hellwig
2020-06-20 17:31 ` Bart Van Assche
2020-06-22 12:36 ` Luis Chamberlain
2020-06-19 20:47 ` [PATCH v7 7/8] blktrace: ensure our debugfs dir exists Luis Chamberlain
2020-06-20 7:29 ` Christoph Hellwig
2020-06-20 17:33 ` Bart Van Assche
2020-06-19 20:47 ` Luis Chamberlain [this message]
2020-06-20 7:30 ` [PATCH v7 8/8] block: create the request_queue debugfs_dir on registration Christoph Hellwig
2020-06-20 18:07 ` Bart Van Assche
2020-06-22 12:42 ` Luis Chamberlain
2020-06-23 3:18 ` Bart Van Assche
2020-06-20 21:18 ` [PATCH v7 0/8] blktrace: fix debugfs use after free Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200619204730.26124-9-mcgrof@kernel.org \
--to=mcgrof@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=axboe@kernel.dk \
--cc=bvanassche@acm.org \
--cc=gregkh@linuxfoundation.org \
--cc=jack@suse.cz \
--cc=jejb@linux.ibm.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=martin.petersen@oracle.com \
--cc=mhocko@suse.com \
--cc=ming.lei@redhat.com \
--cc=mingo@redhat.com \
--cc=nstange@suse.de \
--cc=rostedt@goodmis.org \
--cc=viro@zeniv.linux.org.uk \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.