From: Xiao Ni <xni@redhat.com>
To: linux-raid@vger.kernel.org
Cc: yukuai1@huaweicloud.com, ncroxon@redhat.com, song@kernel.org
Subject: [PATCH 2/2] md: call del_gendisk in control path
Date: Thu, 15 May 2025 17:08:47 +0800 [thread overview]
Message-ID: <20250515090847.2356-3-xni@redhat.com> (raw)
In-Reply-To: <20250515090847.2356-1-xni@redhat.com>
Now del_gendisk and put_disk are called asynchronously in workqueue work.
The asynchronous way also has a problem that the device node can still
exist after mdadm --stop command returns in a short window. So udev rule
can open this device node and create the struct mddev in kernel again.
So put del_gendisk in control path and still leave put_disk in
md_kobj_release to avoid uaf.
But there is a window that sysfs can be accessed between mddev_unlock and
del_gendisk. So some actions (add disk, change level, .e.g) can happen
which lead unexpected results. And if we delete MD_DELETED and only use
MD_CLOSING in stop control path, the sysfs files can't be accessed if
do_md_stop stuck when io hange. So we keep MD_DELETED here and set
MD_DELETED before mddev_unlock.
Signed-off-by: Xiao Ni <xni@redhat.com>
---
drivers/md/md.c | 53 ++++++++++++++++++++++++++++++++++++++++++-------
drivers/md/md.h | 16 ++++++++++++++-
2 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9b9950ed6ee9..a62867f34aa8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -606,15 +606,13 @@ static inline struct mddev *mddev_get(struct mddev *mddev)
}
static void mddev_delayed_delete(struct work_struct *ws);
+static bool can_delete_gendisk(struct mddev *mddev);
static void __mddev_put(struct mddev *mddev)
{
- if (mddev->raid_disks || !list_empty(&mddev->disks) ||
- mddev->ctime || mddev->hold_active)
- return;
- /* Array is not configured at all, and not held active, so destroy it */
- set_bit(MD_DELETED, &mddev->flags);
+ if (can_delete_gendisk(mddev) == false)
+ return;
/*
* Call queue_work inside the spinlock so that flush_workqueue() after
@@ -4400,6 +4398,7 @@ array_state_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", array_states[st]);
}
+static void delete_gendisk(struct mddev *mddev);
static int do_md_stop(struct mddev *mddev, int ro);
static int md_set_readonly(struct mddev *mddev);
static int restart_array(struct mddev *mddev);
@@ -4533,6 +4532,9 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
(err && st == clear))
clear_bit(MD_CLOSING, &mddev->flags);
+ if ((st == clear || st == inactive) && !err)
+ delete_gendisk(mddev);
+
return err ?: len;
}
static struct md_sysfs_entry md_array_state =
@@ -5721,19 +5723,30 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
struct mddev *mddev = container_of(kobj, struct mddev, kobj);
ssize_t rv;
+ struct kernfs_node *kn = NULL;
if (!entry->store)
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
+
+ if (entry->store == array_state_store && cmd_match(page, "clear"))
+ kn = sysfs_break_active_protection(kobj, attr);
+
spin_lock(&all_mddevs_lock);
if (!mddev_get(mddev)) {
spin_unlock(&all_mddevs_lock);
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
return -EBUSY;
}
spin_unlock(&all_mddevs_lock);
rv = entry->store(mddev, page, length);
mddev_put(mddev);
+
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
+
return rv;
}
@@ -5746,7 +5759,6 @@ static void md_kobj_release(struct kobject *ko)
if (mddev->sysfs_level)
sysfs_put(mddev->sysfs_level);
- del_gendisk(mddev->gendisk);
put_disk(mddev->gendisk);
}
@@ -6526,6 +6538,28 @@ static int md_set_readonly(struct mddev *mddev)
return err;
}
+static bool can_delete_gendisk(struct mddev *mddev)
+{
+ if (mddev->raid_disks || !list_empty(&mddev->disks) ||
+ mddev->ctime || mddev->hold_active)
+ return false;
+
+ return true;
+}
+
+/* Call this function after do_md_stop with mode 0.
+ * And it can't call this function under reconfig_mutex to
+ * avoid deadlock(e.g. call del_gendisk under the lock and
+ * an access to sysfs files waits the lock)
+ */
+static void delete_gendisk(struct mddev *mddev)
+{
+ if (can_delete_gendisk(mddev) == false)
+ return;
+
+ del_gendisk(mddev->gendisk);
+}
+
/* mode:
* 0 - completely stop and dis-assemble array
* 2 - stop but do not disassemble array
@@ -6588,8 +6622,8 @@ static int do_md_stop(struct mddev *mddev, int mode)
mddev->bitmap_info.offset = 0;
export_array(mddev);
-
md_clean(mddev);
+ set_bit(MD_DELETED, &mddev->flags);
}
md_new_event();
sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -6616,6 +6650,7 @@ static void autorun_array(struct mddev *mddev)
if (err) {
pr_warn("md: do_md_run() returned %d\n", err);
do_md_stop(mddev, 0);
+ delete_gendisk(mddev);
}
}
@@ -7886,6 +7921,10 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
out:
if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY))
clear_bit(MD_CLOSING, &mddev->flags);
+
+ if (cmd == STOP_ARRAY && err == 0)
+ delete_gendisk(mddev);
+
return err;
}
#ifdef CONFIG_COMPAT
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1cf00a04bcdd..45f1027986e4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -697,11 +697,25 @@ static inline bool reshape_interrupted(struct mddev *mddev)
static inline int __must_check mddev_lock(struct mddev *mddev)
{
- return mutex_lock_interruptible(&mddev->reconfig_mutex);
+ int ret = 0;
+
+ ret = mutex_lock_interruptible(&mddev->reconfig_mutex);
+
+ /* MD_DELETED is set in do_md_stop with reconfig_mutex
+ * So check it here also.
+ */
+ if (!ret && test_bit(MD_DELETED, &mddev->flags)) {
+ ret = -EBUSY;
+ mutex_unlock(&mddev->reconfig_mutex);
+ }
+
+ return ret;
}
/* Sometimes we need to take the lock in a situation where
* failure due to interrupts is not acceptable.
+ * It doesn't need to check MD_DELETED here, the owner which
+ * holds the lock here can't be stopped.
*/
static inline void mddev_lock_nointr(struct mddev *mddev)
{
--
2.32.0 (Apple Git-132)
next prev parent reply other threads:[~2025-05-15 9:09 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-15 9:08 [PATCH V2 0/2] md: call del_gendisk in sync way Xiao Ni
2025-05-15 9:08 ` [PATCH 1/2] md: Don't clear MD_CLOSING until mddev is freed Xiao Ni
2025-05-27 2:01 ` Yu Kuai
2025-05-30 6:48 ` Yu Kuai
2025-05-30 6:58 ` Xiao Ni
2025-05-30 7:57 ` Xiao Ni
2025-05-30 8:11 ` Yu Kuai
2025-05-15 9:08 ` Xiao Ni [this message]
2025-05-27 2:11 ` [PATCH 2/2] md: call del_gendisk in control path Yu Kuai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250515090847.2356-3-xni@redhat.com \
--to=xni@redhat.com \
--cc=linux-raid@vger.kernel.org \
--cc=ncroxon@redhat.com \
--cc=song@kernel.org \
--cc=yukuai1@huaweicloud.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).