From: NeilBrown <neilb@suse.com>
To: Shaohua Li <shli@kernel.org>
Cc: Linux Raid <linux-raid@vger.kernel.org>
Subject: [PATCH] md: create new workqueue for object destruction
Date: Tue, 17 Oct 2017 16:04:52 +1100 [thread overview]
Message-ID: <87mv4qjrez.fsf@notabene.neil.brown.name> (raw)
[-- Attachment #1: Type: text/plain, Size: 6965 bytes --]
lockdep currently complains about a potential deadlock
with sysfs access taking reconfig_mutex, and that
waiting for a work queue to complete.
The cause is inappropriate overloading of work-items
on work-queues.
We currently have two work-queues: md_wq and md_misc_wq.
They service 5 different tasks:
mddev->flush_work md_wq
mddev->event_work (for dm-raid) md_misc_wq
mddev->del_work (mddev_delayed_delete) md_misc_wq
mddev->del_work (md_start_sync) md_misc_wq
rdev->del_work md_misc_wq
We need to call flush_workqueue() for md_start_sync and ->event_work
while holding reconfig_mutex, but mustn't hold it when
flushing mddev_delayed_delete or rdev->del_work.
md_wq is a bit special as it has WQ_MEM_RECLAIM so it is
best to leave that alone.
So create a new workqueue, md_del_wq, and a new work_struct,
mddev->sync_work, so we can keep two classes of work separate.
md_del_wq and ->del_work are used only for destroying rdev
and mddev.
md_misc_wq is used for event_work and sync_work.
Also document the purpose of each flush_workqueue() call.
This removes the lockdep warning.
Signed-off-by: NeilBrown <neilb@suse.com>
---
drivers/md/md.c | 51 ++++++++++++++++++++++++++++-----------------------
drivers/md/md.h | 1 +
2 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index bf06ff017eda..a9f1352b3849 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -91,6 +91,7 @@ EXPORT_SYMBOL(md_cluster_mod);
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
+static struct workqueue_struct *md_del_wq;
static struct workqueue_struct *md_misc_wq;
static int remove_and_add_spares(struct mddev *mddev,
@@ -529,7 +530,7 @@ static void mddev_put(struct mddev *mddev)
* succeed in waiting for the work to be done.
*/
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
- queue_work(md_misc_wq, &mddev->del_work);
+ queue_work(md_del_wq, &mddev->del_work);
} else
kfree(mddev);
}
@@ -2264,7 +2265,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
synchronize_rcu();
INIT_WORK(&rdev->del_work, md_delayed_delete);
kobject_get(&rdev->kobj);
- queue_work(md_misc_wq, &rdev->del_work);
+ queue_work(md_del_wq, &rdev->del_work);
}
/*
@@ -4298,7 +4299,8 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
minor != MINOR(dev))
return -EOVERFLOW;
- flush_workqueue(md_misc_wq);
+ /* Ensure old devices are fully deleted (rdev->del_work) */
+ flush_workqueue(md_del_wq);
err = mddev_lock(mddev);
if (err)
@@ -4537,6 +4539,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
mddev_lock(mddev) == 0) {
+ /* Ensure sync/recovery has fully started (mddev->sync_work) */
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -5280,9 +5283,9 @@ static int md_alloc(dev_t dev, char *name)
unit = MINOR(mddev->unit) >> shift;
/* wait for any previous instance of this device to be
- * completely removed (mddev_delayed_delete).
+ * completely removed (mddev_delayed_delete, mddev->del_work).
*/
- flush_workqueue(md_misc_wq);
+ flush_workqueue(md_del_wq);
mutex_lock(&disks_mutex);
error = -EEXIST;
@@ -5788,6 +5791,7 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ /* Ensure any sync/recovery has fully started (mddev->sync_work) */
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -7128,8 +7132,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
if (cmd == ADD_NEW_DISK)
- /* need to ensure md_delayed_delete() has completed */
- flush_workqueue(md_misc_wq);
+ /* need to ensure md_delayed_delete() has completed (rdev->del_work) */
+ flush_workqueue(md_del_wq);
if (cmd == HOT_REMOVE_DISK)
/* need to ensure recovery thread has run */
@@ -7383,8 +7387,8 @@ static int md_open(struct block_device *bdev, fmode_t mode)
* bd_disk.
*/
mddev_put(mddev);
- /* Wait until bdev->bd_disk is definitely gone */
- flush_workqueue(md_misc_wq);
+ /* Wait until bdev->bd_disk is definitely gone (mddev->del_work) */
+ flush_workqueue(md_del_wq);
/* Then retry the open from the top */
return -ERESTARTSYS;
}
@@ -8631,7 +8635,7 @@ static int remove_and_add_spares(struct mddev *mddev,
static void md_start_sync(struct work_struct *ws)
{
- struct mddev *mddev = container_of(ws, struct mddev, del_work);
+ struct mddev *mddev = container_of(ws, struct mddev, sync_work);
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
@@ -8823,8 +8827,8 @@ void md_check_recovery(struct mddev *mddev)
*/
bitmap_write_all(mddev->bitmap);
}
- INIT_WORK(&mddev->del_work, md_start_sync);
- queue_work(md_misc_wq, &mddev->del_work);
+ INIT_WORK(&mddev->sync_work, md_start_sync);
+ queue_work(md_misc_wq, &mddev->sync_work);
goto unlock;
}
not_running:
@@ -9018,15 +9022,13 @@ static int __init md_init(void)
int ret = -ENOMEM;
md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
- if (!md_wq)
- goto err_wq;
-
+ md_del_wq = alloc_workqueue("md_del", 0, 0);
md_misc_wq = alloc_workqueue("md_misc", 0, 0);
- if (!md_misc_wq)
- goto err_misc_wq;
+ if (!md_wq || !md_del_wq || !md_misc_wq)
+ goto err;
if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
- goto err_md;
+ goto err;
if ((ret = register_blkdev(0, "mdp")) < 0)
goto err_mdp;
@@ -9045,11 +9047,13 @@ static int __init md_init(void)
err_mdp:
unregister_blkdev(MD_MAJOR, "md");
-err_md:
- destroy_workqueue(md_misc_wq);
-err_misc_wq:
- destroy_workqueue(md_wq);
-err_wq:
+err:
+ if (md_wq)
+ destroy_workqueue(md_wq);
+ if (md_del_wq)
+ destroy_workqueue(md_del_wq);
+ if (md_misc_wq)
+ destroy_workqueue(md_misc_wq);
return ret;
}
@@ -9304,6 +9308,7 @@ static __exit void md_exit(void)
*/
}
destroy_workqueue(md_misc_wq);
+ destroy_workqueue(md_del_wq);
destroy_workqueue(md_wq);
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 03fc641e5da1..8c2158f3bd59 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -397,6 +397,7 @@ struct mddev {
struct kernfs_node *sysfs_action; /* handle for 'sync_action' */
struct work_struct del_work; /* used for delayed sysfs removal */
+ struct work_struct sync_work; /* used for async starting of md_do_sync */
/* "lock" protects:
* flush_bio transition from NULL to !NULL
--
2.14.0.rc0.dirty
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]
next reply other threads:[~2017-10-17 5:04 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-17 5:04 NeilBrown [this message]
2017-10-18 6:21 ` [PATCH] md: create new workqueue for object destruction Shaohua Li
2017-10-18 7:29 ` NeilBrown
2017-10-18 11:21 ` Artur Paszkiewicz
2017-10-18 22:36 ` NeilBrown
2017-10-19 8:27 ` Artur Paszkiewicz
2017-10-19 22:28 ` NeilBrown
2017-10-20 14:00 ` Artur Paszkiewicz
2017-10-22 23:31 ` NeilBrown
2017-10-27 10:44 ` Artur Paszkiewicz
2017-10-29 22:18 ` NeilBrown
2017-10-30 13:02 ` Artur Paszkiewicz
2017-11-01 3:57 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87mv4qjrez.fsf@notabene.neil.brown.name \
--to=neilb@suse.com \
--cc=linux-raid@vger.kernel.org \
--cc=shli@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).