* [PATCH 1/9] md: fix error handling in md_alloc
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-13 15:26 ` Logan Gunthorpe
2022-07-13 11:31 ` [PATCH 2/9] md: implement ->free_disk Christoph Hellwig
` (7 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
Error handling in md_alloc is a mess. Untangle it to just free the mddev
directly before add_disk is called and thus the gendisk is globally
visible. After that clear the hold flag and let the mddev_put take care
of cleaning up the mddev through the usual mechanisms.
Fixes: 5e55e2f5fc95 ("[PATCH] md: convert compile time warnings into runtime warnings")
Fixes: 9be68dd7ac0e ("md: add error handling support for add_disk()")
Fixes: 7ad1069166c0 ("md: properly unwind when failing to add the kobject in md_alloc")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
Note: the put_disk here is not fully correct on md-next, but will
do the right thing once merged with the block tree.
drivers/md/md.c | 45 ++++++++++++++++++++++++++++++++-------------
1 file changed, 32 insertions(+), 13 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b64de313838f2..7affddade8b6b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -791,6 +791,15 @@ static struct mddev *mddev_alloc(dev_t unit)
return ERR_PTR(error);
}
+static void mddev_free(struct mddev *mddev)
+{
+ spin_lock(&all_mddevs_lock);
+ list_del(&mddev->all_mddevs);
+ spin_unlock(&all_mddevs_lock);
+
+ kfree(mddev);
+}
+
static const struct attribute_group md_redundancy_group;
void mddev_unlock(struct mddev *mddev)
@@ -5664,8 +5673,8 @@ int md_alloc(dev_t dev, char *name)
mutex_lock(&disks_mutex);
mddev = mddev_alloc(dev);
if (IS_ERR(mddev)) {
- mutex_unlock(&disks_mutex);
- return PTR_ERR(mddev);
+ error = PTR_ERR(mddev);
+ goto out_unlock;
}
partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
@@ -5683,7 +5692,7 @@ int md_alloc(dev_t dev, char *name)
strcmp(mddev2->gendisk->disk_name, name) == 0) {
spin_unlock(&all_mddevs_lock);
error = -EEXIST;
- goto out_unlock_disks_mutex;
+ goto out_free_mddev;
}
spin_unlock(&all_mddevs_lock);
}
@@ -5696,7 +5705,7 @@ int md_alloc(dev_t dev, char *name)
error = -ENOMEM;
disk = blk_alloc_disk(NUMA_NO_NODE);
if (!disk)
- goto out_unlock_disks_mutex;
+ goto out_free_mddev;
disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift;
@@ -5717,25 +5726,35 @@ int md_alloc(dev_t dev, char *name)
mddev->gendisk = disk;
error = add_disk(disk);
if (error)
- goto out_cleanup_disk;
+ goto out_put_disk;
error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
- if (error)
- goto out_del_gendisk;
+ if (error) {
+ /*
+ * The disk is already live at this point. Clear the hold flag
+ * and let mddev_put take care of the deletion, as it isn't any
+ * different from a normal close on last release now.
+ */
+ mddev->hold_active = 0;
+ goto done;
+ }
kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
- goto out_unlock_disks_mutex;
-out_del_gendisk:
- del_gendisk(disk);
-out_cleanup_disk:
- blk_cleanup_disk(disk);
-out_unlock_disks_mutex:
+done:
mutex_unlock(&disks_mutex);
mddev_put(mddev);
return error;
+
+out_put_disk:
+ put_disk(disk);
+out_free_mddev:
+ mddev_free(mddev);
+out_unlock:
+ mutex_unlock(&disks_mutex);
+ return error;
}
static void md_probe(dev_t dev)
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH 1/9] md: fix error handling in md_alloc
2022-07-13 11:31 ` [PATCH 1/9] md: fix error handling in md_alloc Christoph Hellwig
@ 2022-07-13 15:26 ` Logan Gunthorpe
2022-07-13 16:31 ` Logan Gunthorpe
0 siblings, 1 reply; 16+ messages in thread
From: Logan Gunthorpe @ 2022-07-13 15:26 UTC (permalink / raw)
To: Christoph Hellwig, Song Liu; +Cc: linux-raid, linux-block
On 2022-07-13 05:31, Christoph Hellwig wrote:
> Error handling in md_alloc is a mess. Untangle it to just free the mddev
> directly before add_disk is called and thus the gendisk is globally
> visible. After that clear the hold flag and let the mddev_put take care
> of cleaning up the mddev through the usual mechanisms.
>
> Fixes: 5e55e2f5fc95 ("[PATCH] md: convert compile time warnings into runtime warnings")
> Fixes: 9be68dd7ac0e ("md: add error handling support for add_disk()")
> Fixes: 7ad1069166c0 ("md: properly unwind when failing to add the kobject in md_alloc")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>
> Note: the put_disk here is not fully correct on md-next, but will
> do the right thing once merged with the block tree.
>
> drivers/md/md.c | 45 ++++++++++++++++++++++++++++++++-------------
> 1 file changed, 32 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index b64de313838f2..7affddade8b6b 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -791,6 +791,15 @@ static struct mddev *mddev_alloc(dev_t unit)
> return ERR_PTR(error);
> }
>
> +static void mddev_free(struct mddev *mddev)
> +{
> + spin_lock(&all_mddevs_lock);
> + list_del(&mddev->all_mddevs);
> + spin_unlock(&all_mddevs_lock);
> +
> + kfree(mddev);
> +}
Sadly, I still don't think this is correct. mddev_init() has already
called kobject_init() and according to the documentation it *must* be
cleaned up with a call to kobject_put(). That doesn't happen in this
path -- so I believe this will cause memory leaks.
I have also noticed this code base already makes that same mistake in
mddev_alloc(): freeing the mddev on the error path after kobject_init().
But that would be easy to fix up.
Logan
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH 1/9] md: fix error handling in md_alloc
2022-07-13 15:26 ` Logan Gunthorpe
@ 2022-07-13 16:31 ` Logan Gunthorpe
0 siblings, 0 replies; 16+ messages in thread
From: Logan Gunthorpe @ 2022-07-13 16:31 UTC (permalink / raw)
To: Christoph Hellwig, Song Liu; +Cc: linux-raid, linux-block
On 2022-07-13 09:26, Logan Gunthorpe wrote:
>
>
>
> On 2022-07-13 05:31, Christoph Hellwig wrote:
>> Error handling in md_alloc is a mess. Untangle it to just free the mddev
>> directly before add_disk is called and thus the gendisk is globally
>> visible. After that clear the hold flag and let the mddev_put take care
>> of cleaning up the mddev through the usual mechanisms.
>>
>> Fixes: 5e55e2f5fc95 ("[PATCH] md: convert compile time warnings into runtime warnings")
>> Fixes: 9be68dd7ac0e ("md: add error handling support for add_disk()")
>> Fixes: 7ad1069166c0 ("md: properly unwind when failing to add the kobject in md_alloc")
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>
>> Note: the put_disk here is not fully correct on md-next, but will
>> do the right thing once merged with the block tree.
>>
>> drivers/md/md.c | 45 ++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 32 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index b64de313838f2..7affddade8b6b 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -791,6 +791,15 @@ static struct mddev *mddev_alloc(dev_t unit)
>> return ERR_PTR(error);
>> }
>>
>> +static void mddev_free(struct mddev *mddev)
>> +{
>> + spin_lock(&all_mddevs_lock);
>> + list_del(&mddev->all_mddevs);
>> + spin_unlock(&all_mddevs_lock);
>> +
>> + kfree(mddev);
>> +}
>
> Sadly, I still don't think this is correct. mddev_init() has already
> called kobject_init() and according to the documentation it *must* be
> cleaned up with a call to kobject_put(). That doesn't happen in this
> path -- so I believe this will cause memory leaks.
What about something along these lines:
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 78e2588ed43e..1d13840cec6d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5581,6 +5581,15 @@ md_attr_store(struct kobject *kobj, struct
attribute *at>
return rv;
}
+static void mddev_free(struct mddev *mddev)
+{
+ percpu_ref_exit(&mddev->writes_pending);
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
+
+ kfree(mddev);
+}
+
static void md_free(struct kobject *ko)
{
struct mddev *mddev = container_of(ko, struct mddev, kobj);
@@ -5593,12 +5602,9 @@ static void md_free(struct kobject *ko)
if (mddev->gendisk) {
del_gendisk(mddev->gendisk);
blk_cleanup_disk(mddev->gendisk);
+ } else {
+ mddev_free(mddev);
}
- percpu_ref_exit(&mddev->writes_pending);
-
- bioset_exit(&mddev->bio_set);
- bioset_exit(&mddev->sync_set);
- kfree(mddev);
}
static const struct sysfs_ops md_sysfs_ops = {
@@ -7858,6 +7864,13 @@ static unsigned int md_check_events(struct
gendisk *disk>
return ret;
}
+static void md_free_disk(struct gendisk *disk)
+{
+ struct mddev *mddev = disk->private_data;
+
+ mddev_free(mddev);
+}
+
const struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
@@ -7871,6 +7884,7 @@ const struct block_device_operations md_fops =
.getgeo = md_getgeo,
.check_events = md_check_events,
.set_read_only = md_set_read_only,
+ .free_disk = md_free_disk,
};
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/9] md: implement ->free_disk
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
2022-07-13 11:31 ` [PATCH 1/9] md: fix error handling in md_alloc Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-13 11:31 ` [PATCH 3/9] md: rename md_free to md_kobj_release Christoph Hellwig
` (6 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
Ensure that all private data is only freed once all accesses are done.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7affddade8b6b..2beaadd202c4e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5603,11 +5603,6 @@ static void md_free(struct kobject *ko)
del_gendisk(mddev->gendisk);
blk_cleanup_disk(mddev->gendisk);
}
- percpu_ref_exit(&mddev->writes_pending);
-
- bioset_exit(&mddev->bio_set);
- bioset_exit(&mddev->sync_set);
- kfree(mddev);
}
static const struct sysfs_ops md_sysfs_ops = {
@@ -7877,6 +7872,17 @@ static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
return ret;
}
+static void md_free_disk(struct gendisk *disk)
+{
+ struct mddev *mddev = disk->private_data;
+
+ percpu_ref_exit(&mddev->writes_pending);
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
+
+ kfree(mddev);
+}
+
const struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
@@ -7890,6 +7896,7 @@ const struct block_device_operations md_fops =
.getgeo = md_getgeo,
.check_events = md_check_events,
.set_read_only = md_set_read_only,
+ .free_disk = md_free_disk,
};
static int md_thread(void *arg)
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 3/9] md: rename md_free to md_kobj_release
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
2022-07-13 11:31 ` [PATCH 1/9] md: fix error handling in md_alloc Christoph Hellwig
2022-07-13 11:31 ` [PATCH 2/9] md: implement ->free_disk Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-15 9:00 ` Guoqing Jiang
2022-07-13 11:31 ` [PATCH 4/9] md: factor out the rdev overlaps check from rdev_size_store Christoph Hellwig
` (5 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
The md_free name is rather misleading, so pick a better one.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2beaadd202c4e..3127dcb8102ce 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5590,7 +5590,7 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
return rv;
}
-static void md_free(struct kobject *ko)
+static void md_kobj_release(struct kobject *ko)
{
struct mddev *mddev = container_of(ko, struct mddev, kobj);
@@ -5610,7 +5610,7 @@ static const struct sysfs_ops md_sysfs_ops = {
.store = md_attr_store,
};
static struct kobj_type md_ktype = {
- .release = md_free,
+ .release = md_kobj_release,
.sysfs_ops = &md_sysfs_ops,
.default_groups = md_attr_groups,
};
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH 3/9] md: rename md_free to md_kobj_release
2022-07-13 11:31 ` [PATCH 3/9] md: rename md_free to md_kobj_release Christoph Hellwig
@ 2022-07-15 9:00 ` Guoqing Jiang
0 siblings, 0 replies; 16+ messages in thread
From: Guoqing Jiang @ 2022-07-15 9:00 UTC (permalink / raw)
To: Christoph Hellwig, Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
On 7/13/22 7:31 PM, Christoph Hellwig wrote:
> The md_free name is rather misleading, so pick a better one.
>
> Signed-off-by: Christoph Hellwig<hch@lst.de>
> ---
> drivers/md/md.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 2beaadd202c4e..3127dcb8102ce 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -5590,7 +5590,7 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
> return rv;
> }
>
> -static void md_free(struct kobject *ko)
> +static void md_kobj_release(struct kobject **ko*)
While at it, how about rename it to kobj?
> {
> struct mddev *mddev = container_of(ko, struct mddev, kobj);
>
> @@ -5610,7 +5610,7 @@ static const struct sysfs_ops md_sysfs_ops = {
> .store = md_attr_store,
> };
> static struct kobj_type md_ktype = {
> - .release = md_free,
> + .release = md_kobj_release,
> .sysfs_ops = &md_sysfs_ops,
> .default_groups = md_attr_groups,
> };
Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Thanks,
Guoqing
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 4/9] md: factor out the rdev overlaps check from rdev_size_store
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
` (2 preceding siblings ...)
2022-07-13 11:31 ` [PATCH 3/9] md: rename md_free to md_kobj_release Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-15 9:01 ` Guoqing Jiang
2022-07-13 11:31 ` [PATCH 5/9] md: stop using for_each_mddev in md_do_sync Christoph Hellwig
` (4 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
This splits the code into nicely readable chunks and also avoids
the refcount inc/dec manipulations.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 84 +++++++++++++++++++++++--------------------------
1 file changed, 39 insertions(+), 45 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3127dcb8102ce..5346135ab51c8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3344,14 +3344,33 @@ rdev_size_show(struct md_rdev *rdev, char *page)
return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
}
-static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
+static int md_rdevs_overlap(struct md_rdev *a, struct md_rdev *b)
{
/* check if two start/length pairs overlap */
- if (s1+l1 <= s2)
- return 0;
- if (s2+l2 <= s1)
- return 0;
- return 1;
+ if (a->data_offset + a->sectors <= b->data_offset)
+ return false;
+ if (b->data_offset + b->sectors <= a->data_offset)
+ return false;
+ return true;
+}
+
+static bool md_rdev_overlaps(struct md_rdev *rdev)
+{
+ struct mddev *mddev;
+ struct md_rdev *rdev2;
+
+ spin_lock(&all_mddevs_lock);
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
+ rdev_for_each(rdev2, mddev) {
+ if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
+ md_rdevs_overlap(rdev, rdev2)) {
+ spin_unlock(&all_mddevs_lock);
+ return true;
+ }
+ }
+ }
+ spin_unlock(&all_mddevs_lock);
+ return false;
}
static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
@@ -3403,46 +3422,21 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
return -EINVAL; /* component must fit device */
rdev->sectors = sectors;
- if (sectors > oldsectors && my_mddev->external) {
- /* Need to check that all other rdevs with the same
- * ->bdev do not overlap. 'rcu' is sufficient to walk
- * the rdev lists safely.
- * This check does not provide a hard guarantee, it
- * just helps avoid dangerous mistakes.
- */
- struct mddev *mddev;
- int overlap = 0;
- struct list_head *tmp;
- rcu_read_lock();
- for_each_mddev(mddev, tmp) {
- struct md_rdev *rdev2;
-
- rdev_for_each(rdev2, mddev)
- if (rdev->bdev == rdev2->bdev &&
- rdev != rdev2 &&
- overlaps(rdev->data_offset, rdev->sectors,
- rdev2->data_offset,
- rdev2->sectors)) {
- overlap = 1;
- break;
- }
- if (overlap) {
- mddev_put(mddev);
- break;
- }
- }
- rcu_read_unlock();
- if (overlap) {
- /* Someone else could have slipped in a size
- * change here, but doing so is just silly.
- * We put oldsectors back because we *know* it is
- * safe, and trust userspace not to race with
- * itself
- */
- rdev->sectors = oldsectors;
- return -EBUSY;
- }
+ /*
+ * Check that all other rdevs with the same bdev do not overlap. This
+ * check does not provide a hard guarantee, it just helps avoid
+ * dangerous mistakes.
+ */
+ if (sectors > oldsectors && my_mddev->external &&
+ md_rdev_overlaps(rdev)) {
+ /*
+ * Someone else could have slipped in a size change here, but
+ * doing so is just silly. We put oldsectors back because we
+ * know it is safe, and trust userspace not to race with itself.
+ */
+ rdev->sectors = oldsectors;
+ return -EBUSY;
}
return len;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH 4/9] md: factor out the rdev overlaps check from rdev_size_store
2022-07-13 11:31 ` [PATCH 4/9] md: factor out the rdev overlaps check from rdev_size_store Christoph Hellwig
@ 2022-07-15 9:01 ` Guoqing Jiang
0 siblings, 0 replies; 16+ messages in thread
From: Guoqing Jiang @ 2022-07-15 9:01 UTC (permalink / raw)
To: Christoph Hellwig, Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
On 7/13/22 7:31 PM, Christoph Hellwig wrote:
> This splits the code into nicely readable chunks and also avoids
> the refcount inc/dec manipulations.
>
> Signed-off-by: Christoph Hellwig<hch@lst.de>
> ---
> drivers/md/md.c | 84 +++++++++++++++++++++++--------------------------
> 1 file changed, 39 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 3127dcb8102ce..5346135ab51c8 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -3344,14 +3344,33 @@ rdev_size_show(struct md_rdev *rdev, char *page)
> return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
> }
>
> -static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
> +static int md_rdevs_overlap(struct md_rdev *a, struct md_rdev *b)
> {
> /* check if two start/length pairs overlap */
> - if (s1+l1 <= s2)
> - return 0;
> - if (s2+l2 <= s1)
> - return 0;
> - return 1;
> + if (a->data_offset + a->sectors <= b->data_offset)
> + return false;
> + if (b->data_offset + b->sectors <= a->data_offset)
> + return false;
> + return true;
> +}
Given it returns true or false, better to change the return type to bool.
> +
> +static bool md_rdev_overlaps(struct md_rdev *rdev)
> +{
The two names (md_rdevs_overlap/md_rdev_overlaps) are quite similar ...
Thanks,
Guoqing
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 5/9] md: stop using for_each_mddev in md_do_sync
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
` (3 preceding siblings ...)
2022-07-13 11:31 ` [PATCH 4/9] md: factor out the rdev overlaps check from rdev_size_store Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-13 11:31 ` [PATCH 6/9] md: stop using for_each_mddev in md_notify_reboot Christoph Hellwig
` (3 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
Just do a plain list_for_each that only grabs a mddev reference in
the case where the thread sleeps and restarts the list iteration.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5346135ab51c8..a2c2e45cb4c18 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8726,7 +8726,6 @@ void md_do_sync(struct md_thread *thread)
unsigned long update_time;
sector_t mark_cnt[SYNC_MARKS];
int last_mark,m;
- struct list_head *tmp;
sector_t last_check;
int skipped = 0;
struct md_rdev *rdev;
@@ -8790,7 +8789,8 @@ void md_do_sync(struct md_thread *thread)
try_again:
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto skip;
- for_each_mddev(mddev2, tmp) {
+ spin_lock(&all_mddevs_lock);
+ list_for_each_entry(mddev2, &all_mddevs, all_mddevs) {
if (mddev2 == mddev)
continue;
if (!mddev->parallel_resync
@@ -8822,7 +8822,8 @@ void md_do_sync(struct md_thread *thread)
desc, mdname(mddev),
mdname(mddev2));
}
- mddev_put(mddev2);
+ spin_unlock(&all_mddevs_lock);
+
if (signal_pending(current))
flush_signals(current);
schedule();
@@ -8832,6 +8833,7 @@ void md_do_sync(struct md_thread *thread)
finish_wait(&resync_wait, &wq);
}
}
+ spin_unlock(&all_mddevs_lock);
} while (mddev->curr_resync < MD_RESYNC_DELAYED);
j = 0;
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 6/9] md: stop using for_each_mddev in md_notify_reboot
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
` (4 preceding siblings ...)
2022-07-13 11:31 ` [PATCH 5/9] md: stop using for_each_mddev in md_do_sync Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-15 9:02 ` Guoqing Jiang
2022-07-13 11:31 ` [PATCH 7/9] md: stop using for_each_mddev in md_exit Christoph Hellwig
` (2 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
Just do a simple list_for_each_entry_safe on all_mddevs, and only grab a
reference when we drop the lock.
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a2c2e45cb4c18..d3e587271ef3f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9588,11 +9588,13 @@ EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
- struct list_head *tmp;
- struct mddev *mddev;
+ struct mddev *mddev, *n;
int need_delay = 0;
- for_each_mddev(mddev, tmp) {
+ spin_lock(&all_mddevs_lock);
+ list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
if (mddev_trylock(mddev)) {
if (mddev->pers)
__md_stop_writes(mddev);
@@ -9601,7 +9603,11 @@ static int md_notify_reboot(struct notifier_block *this,
mddev_unlock(mddev);
}
need_delay = 1;
+ mddev_put(mddev);
+ spin_lock(&all_mddevs_lock);
}
+ spin_unlock(&all_mddevs_lock);
+
/*
* certain more exotic SCSI devices are known to be
* volatile wrt too early system reboots. While the
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 7/9] md: stop using for_each_mddev in md_exit
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
` (5 preceding siblings ...)
2022-07-13 11:31 ` [PATCH 6/9] md: stop using for_each_mddev in md_notify_reboot Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-13 11:31 ` [PATCH 8/9] md: only delete entries from all_mddevs when the disk is freed Christoph Hellwig
2022-07-13 11:31 ` [PATCH 9/9] md: simplify md_open Christoph Hellwig
8 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
Just do a simple list_for_each_entry_safe on all_mddevs, and only grab a
reference when we drop the lock and delete the now unused for_each_mddev
macro.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 39 +++++++++++----------------------------
1 file changed, 11 insertions(+), 28 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d3e587271ef3f..df99a16892bce 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -368,28 +368,6 @@ EXPORT_SYMBOL_GPL(md_new_event);
static LIST_HEAD(all_mddevs);
static DEFINE_SPINLOCK(all_mddevs_lock);
-/*
- * iterates through all used mddevs in the system.
- * We take care to grab the all_mddevs_lock whenever navigating
- * the list, and to always hold a refcount when unlocked.
- * Any code which breaks out of this loop while own
- * a reference to the current mddev and must mddev_put it.
- */
-#define for_each_mddev(_mddev,_tmp) \
- \
- for (({ spin_lock(&all_mddevs_lock); \
- _tmp = all_mddevs.next; \
- _mddev = NULL;}); \
- ({ if (_tmp != &all_mddevs) \
- mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
- spin_unlock(&all_mddevs_lock); \
- if (_mddev) mddev_put(_mddev); \
- _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
- _tmp != &all_mddevs;}); \
- ({ spin_lock(&all_mddevs_lock); \
- _tmp = _tmp->next;}) \
- )
-
/* Rather than calling directly into the personality make_request function,
* IO requests come here first so that we can check if the device is
* being suspended pending a reconfiguration.
@@ -9926,8 +9904,7 @@ void md_autostart_arrays(int part)
static __exit void md_exit(void)
{
- struct mddev *mddev;
- struct list_head *tmp;
+ struct mddev *mddev, *n;
int delay = 1;
unregister_blkdev(MD_MAJOR,"md");
@@ -9947,17 +9924,23 @@ static __exit void md_exit(void)
}
remove_proc_entry("mdstat", NULL);
- for_each_mddev(mddev, tmp) {
+ spin_lock(&all_mddevs_lock);
+ list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
export_array(mddev);
mddev->ctime = 0;
mddev->hold_active = 0;
/*
- * for_each_mddev() will call mddev_put() at the end of each
- * iteration. As the mddev is now fully clear, this will
- * schedule the mddev for destruction by a workqueue, and the
+ * As the mddev is now fully clear, mddev_put will schedule
+ * the mddev for destruction by a workqueue, and the
* destroy_workqueue() below will wait for that to complete.
*/
+ mddev_put(mddev);
+ spin_lock(&all_mddevs_lock);
}
+ spin_unlock(&all_mddevs_lock);
+
destroy_workqueue(md_rdev_misc_wq);
destroy_workqueue(md_misc_wq);
destroy_workqueue(md_wq);
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 8/9] md: only delete entries from all_mddevs when the disk is freed
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
` (6 preceding siblings ...)
2022-07-13 11:31 ` [PATCH 7/9] md: stop using for_each_mddev in md_exit Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
2022-07-15 9:03 ` Guoqing Jiang
2022-07-13 11:31 ` [PATCH 9/9] md: simplify md_open Christoph Hellwig
8 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
This ensures device names don't get prematurely reused. Instead add a
deleted flag to skip already deleted devices in mddev_get and other
places that only want to see live mddevs.
Reported-by; Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 56 +++++++++++++++++++++++++++++++++----------------
drivers/md/md.h | 1 +
2 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index df99a16892bce..f3ff61e540ee0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -625,6 +625,10 @@ EXPORT_SYMBOL(md_flush_request);
static inline struct mddev *mddev_get(struct mddev *mddev)
{
+ lockdep_assert_held(&all_mddevs_lock);
+
+ if (mddev->deleted)
+ return NULL;
atomic_inc(&mddev->active);
return mddev;
}
@@ -639,7 +643,7 @@ static void mddev_put(struct mddev *mddev)
mddev->ctime == 0 && !mddev->hold_active) {
/* Array is not configured at all, and not held active,
* so destroy it */
- list_del_init(&mddev->all_mddevs);
+ mddev->deleted = true;
/*
* Call queue_work inside the spinlock so that
@@ -720,8 +724,8 @@ static struct mddev *mddev_find(dev_t unit)
spin_lock(&all_mddevs_lock);
mddev = mddev_find_locked(unit);
- if (mddev)
- mddev_get(mddev);
+ if (mddev && !mddev_get(mddev))
+ mddev = NULL;
spin_unlock(&all_mddevs_lock);
return mddev;
@@ -3339,6 +3343,8 @@ static bool md_rdev_overlaps(struct md_rdev *rdev)
spin_lock(&all_mddevs_lock);
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
+ if (mddev->deleted)
+ continue;
rdev_for_each(rdev2, mddev) {
if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
md_rdevs_overlap(rdev, rdev2)) {
@@ -5526,11 +5532,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
spin_lock(&all_mddevs_lock);
- if (list_empty(&mddev->all_mddevs)) {
+ if (!mddev_get(mddev)) {
spin_unlock(&all_mddevs_lock);
return -EBUSY;
}
- mddev_get(mddev);
spin_unlock(&all_mddevs_lock);
rv = entry->show(mddev, page);
@@ -5551,11 +5556,10 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
spin_lock(&all_mddevs_lock);
- if (list_empty(&mddev->all_mddevs)) {
+ if (!mddev_get(mddev)) {
spin_unlock(&all_mddevs_lock);
return -EBUSY;
}
- mddev_get(mddev);
spin_unlock(&all_mddevs_lock);
rv = entry->store(mddev, page, length);
mddev_put(mddev);
@@ -7852,7 +7856,7 @@ static void md_free_disk(struct gendisk *disk)
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
- kfree(mddev);
+ mddev_free(mddev);
}
const struct block_device_operations md_fops =
@@ -8174,6 +8178,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
if (!l--) {
mddev = list_entry(tmp, struct mddev, all_mddevs);
mddev_get(mddev);
+ if (!mddev_get(mddev))
+ continue;
spin_unlock(&all_mddevs_lock);
return mddev;
}
@@ -8187,25 +8193,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct list_head *tmp;
struct mddev *next_mddev, *mddev = v;
+ struct mddev *to_put = NULL;
++*pos;
if (v == (void*)2)
return NULL;
spin_lock(&all_mddevs_lock);
- if (v == (void*)1)
+ if (v == (void*)1) {
tmp = all_mddevs.next;
- else
+ } else {
+ to_put = mddev;
tmp = mddev->all_mddevs.next;
- if (tmp != &all_mddevs)
- next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
- else {
- next_mddev = (void*)2;
- *pos = 0x10000;
}
+
+ for (;;) {
+ if (tmp == &all_mddevs) {
+ next_mddev = (void*)2;
+ *pos = 0x10000;
+ break;
+ }
+ next_mddev = list_entry(tmp, struct mddev, all_mddevs);
+ if (mddev_get(next_mddev))
+ break;
+ mddev = next_mddev;
+ tmp = mddev->all_mddevs.next;
+ };
spin_unlock(&all_mddevs_lock);
- if (v != (void*)1)
+ if (to_put)
mddev_put(mddev);
return next_mddev;
@@ -8769,6 +8785,8 @@ void md_do_sync(struct md_thread *thread)
goto skip;
spin_lock(&all_mddevs_lock);
list_for_each_entry(mddev2, &all_mddevs, all_mddevs) {
+ if (mddev2->deleted)
+ continue;
if (mddev2 == mddev)
continue;
if (!mddev->parallel_resync
@@ -9571,7 +9589,8 @@ static int md_notify_reboot(struct notifier_block *this,
spin_lock(&all_mddevs_lock);
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
- mddev_get(mddev);
+ if (!mddev_get(mddev))
+ continue;
spin_unlock(&all_mddevs_lock);
if (mddev_trylock(mddev)) {
if (mddev->pers)
@@ -9926,7 +9945,8 @@ static __exit void md_exit(void)
spin_lock(&all_mddevs_lock);
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
- mddev_get(mddev);
+ if (!mddev_get(mddev))
+ continue;
spin_unlock(&all_mddevs_lock);
export_array(mddev);
mddev->ctime = 0;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1a85dbe78a71c..bc870e1f1e8c2 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -503,6 +503,7 @@ struct mddev {
atomic_t max_corr_read_errors; /* max read retries */
struct list_head all_mddevs;
+ bool deleted;
const struct attribute_group *to_remove;
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH 8/9] md: only delete entries from all_mddevs when the disk is freed
2022-07-13 11:31 ` [PATCH 8/9] md: only delete entries from all_mddevs when the disk is freed Christoph Hellwig
@ 2022-07-15 9:03 ` Guoqing Jiang
0 siblings, 0 replies; 16+ messages in thread
From: Guoqing Jiang @ 2022-07-15 9:03 UTC (permalink / raw)
To: Christoph Hellwig, Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
On 7/13/22 7:31 PM, Christoph Hellwig wrote:
> This ensures device names don't get prematurely reused.*Instead* add a
> deleted flag to skip already deleted devices in mddev_get and other
> places that only want to see live mddevs.
The patch actually adds a deleted member to struct mddev , so the
"Instead" here
is at least confusing to me.
> Reported-by; Logan Gunthorpe<logang@deltatee.com>
> Signed-off-by: Christoph Hellwig<hch@lst.de>
> ---
> drivers/md/md.c | 56 +++++++++++++++++++++++++++++++++----------------
> drivers/md/md.h | 1 +
> 2 files changed, 39 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index df99a16892bce..f3ff61e540ee0 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -625,6 +625,10 @@ EXPORT_SYMBOL(md_flush_request);
>
> static inline struct mddev *mddev_get(struct mddev *mddev)
> {
> + lockdep_assert_held(&all_mddevs_lock);
> +
> + if (mddev->deleted)
> + return NULL;
> atomic_inc(&mddev->active);
...
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 1a85dbe78a71c..bc870e1f1e8c2 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -503,6 +503,7 @@ struct mddev {
>
> atomic_t max_corr_read_errors; /* max read retries */
> struct list_head all_mddevs;
> + bool deleted;
>
> const struct attribute_group *to_remove;
Thanks,
Guoqing
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 9/9] md: simplify md_open
2022-07-13 11:31 fix md disk_name lifetime problems v2 Christoph Hellwig
` (7 preceding siblings ...)
2022-07-13 11:31 ` [PATCH 8/9] md: only delete entries from all_mddevs when the disk is freed Christoph Hellwig
@ 2022-07-13 11:31 ` Christoph Hellwig
8 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-07-13 11:31 UTC (permalink / raw)
To: Song Liu; +Cc: Logan Gunthorpe, linux-raid, linux-block
Now that devices are on the all_mddevs list until the gendisk is freed,
there can't be any duplicates. Remove the global list lookup and just
grab a reference.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/md.c | 42 +++++++++++++++---------------------------
1 file changed, 15 insertions(+), 27 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f3ff61e540ee0..41e752c97a196 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7786,45 +7786,33 @@ static int md_set_read_only(struct block_device *bdev, bool ro)
static int md_open(struct block_device *bdev, fmode_t mode)
{
- /*
- * Succeed if we can lock the mddev, which confirms that
- * it isn't being stopped right now.
- */
- struct mddev *mddev = mddev_find(bdev->bd_dev);
+ struct mddev *mddev;
int err;
+ spin_lock(&all_mddevs_lock);
+ mddev = mddev_get(bdev->bd_disk->private_data);
+ spin_unlock(&all_mddevs_lock);
if (!mddev)
return -ENODEV;
- if (mddev->gendisk != bdev->bd_disk) {
- /* we are racing with mddev_put which is discarding this
- * bd_disk.
- */
- mddev_put(mddev);
- /* Wait until bdev->bd_disk is definitely gone */
- if (work_pending(&mddev->del_work))
- flush_workqueue(md_misc_wq);
- return -EBUSY;
- }
- BUG_ON(mddev != bdev->bd_disk->private_data);
-
- if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
+ err = mutex_lock_interruptible(&mddev->open_mutex);
+ if (err)
goto out;
- if (test_bit(MD_CLOSING, &mddev->flags)) {
- mutex_unlock(&mddev->open_mutex);
- err = -ENODEV;
- goto out;
- }
+ err = -ENODEV;
+ if (test_bit(MD_CLOSING, &mddev->flags))
+ goto out_unlock;
- err = 0;
atomic_inc(&mddev->openers);
mutex_unlock(&mddev->open_mutex);
bdev_check_media_change(bdev);
- out:
- if (err)
- mddev_put(mddev);
+ return 0;
+
+out_unlock:
+ mutex_unlock(&mddev->open_mutex);
+out:
+ mddev_put(mddev);
return err;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread