* [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
@ 2022-10-17 2:11 Xiao Ni
2022-10-20 19:49 ` Song Liu
0 siblings, 1 reply; 7+ messages in thread
From: Xiao Ni @ 2022-10-17 2:11 UTC (permalink / raw)
To: song; +Cc: guoqing.jiang, linux-raid, ffan
It has added io_acct_set for raid0/raid5 io accounting and it needs to
alloc md_io_acct in the i/o path. They are free when the bios come back
from member disks. Now we don't have a method to monitor if those bios
are all come back. In the takeover process, it needs to free the raid0
memory resource including the memory pool for md_io_acct. But maybe some
bios are still not returned. When those bios are returned, it can cause
panic bcause of introducing NULL pointer or invalid address.
This patch adds io_acct_cnt. So when stopping raid0, it can use this
to wait until all bios come back.
Reported-by: Fine Fan <ffan@redhat.com>
Signed-off-by: Xiao Ni <xni@redhat.com>
---
drivers/md/md.c | 13 ++++++++++++-
drivers/md/md.h | 11 ++++++++---
drivers/md/raid0.c | 6 ++++++
3 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9dc0175280b4..57dc2ddf1e11 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -673,6 +673,7 @@ void mddev_init(struct mddev *mddev)
atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
+ init_waitqueue_head(&mddev->wait_io_acct);
mddev->reshape_position = MaxSector;
mddev->reshape_backwards = 0;
mddev->last_sync_action = "none";
@@ -8600,15 +8601,18 @@ int acct_bioset_init(struct mddev *mddev)
{
int err = 0;
- if (!bioset_initialized(&mddev->io_acct_set))
+ if (!bioset_initialized(&mddev->io_acct_set)) {
+ atomic_set(&mddev->io_acct_cnt, 0);
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
offsetof(struct md_io_acct, bio_clone), 0);
+ }
return err;
}
EXPORT_SYMBOL_GPL(acct_bioset_init);
void acct_bioset_exit(struct mddev *mddev)
{
+ WARN_ON(atomic_read(&mddev->io_acct_cnt) != 0);
bioset_exit(&mddev->io_acct_set);
}
EXPORT_SYMBOL_GPL(acct_bioset_exit);
@@ -8617,12 +8621,17 @@ static void md_end_io_acct(struct bio *bio)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
+ struct mddev *mddev = md_io_acct->mddev;
orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
bio_put(bio);
bio_endio(orig_bio);
+
+ if (atomic_dec_and_test(&mddev->io_acct_cnt))
+ if (unlikely(test_bit(MD_QUIESCE, &mddev->flags)))
+ wake_up(&mddev->wait_io_acct);
}
/*
@@ -8642,6 +8651,8 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
+ md_io_acct->mddev = mddev;
+ atomic_inc(&mddev->io_acct_cnt);
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b4e2d8b87b61..061176ff325f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -255,6 +255,7 @@ struct md_cluster_info;
* array is ready yet.
* @MD_BROKEN: This is used to stop writes and mark array as failed.
* @MD_DELETED: This device is being deleted
+ * @MD_QUIESCE: This device is being quiesced. Now only raid0 use this flag
*
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
*/
@@ -272,6 +273,7 @@ enum mddev_flags {
MD_NOT_READY,
MD_BROKEN,
MD_DELETED,
+ MD_QUIESCE,
};
enum mddev_sb_flags {
@@ -513,6 +515,8 @@ struct mddev {
* metadata and bitmap writes
*/
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
+ atomic_t io_acct_cnt;
+ wait_queue_head_t wait_io_acct;
/* Generic flush handling.
* The last to finish preflush schedules a worker to submit
@@ -710,9 +714,10 @@ struct md_thread {
};
struct md_io_acct {
- struct bio *orig_bio;
- unsigned long start_time;
- struct bio bio_clone;
+ struct bio *orig_bio;
+ unsigned long start_time;
+ struct bio bio_clone;
+ struct mddev *mddev;
};
#define THREAD_WAKEUP 0
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 857c49399c28..aced0ad8cdab 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -754,6 +754,12 @@ static void *raid0_takeover(struct mddev *mddev)
static void raid0_quiesce(struct mddev *mddev, int quiesce)
{
+ /* It doesn't use a separate struct to count how many bios are submitted
+ * to member disks to avoid memory alloc and performance decrease
+ */
+ set_bit(MD_QUIESCE, &mddev->flags);
+ wait_event(mddev->wait_io_acct, !atomic_read(&mddev->io_acct_cnt));
+ clear_bit(MD_QUIESCE, &mddev->flags);
}
static struct md_personality raid0_personality=
--
2.32.0 (Apple Git-132)
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
2022-10-17 2:11 [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce Xiao Ni
@ 2022-10-20 19:49 ` Song Liu
2022-10-21 10:07 ` Xiao Ni
0 siblings, 1 reply; 7+ messages in thread
From: Song Liu @ 2022-10-20 19:49 UTC (permalink / raw)
To: Xiao Ni; +Cc: guoqing.jiang, linux-raid, ffan
On Sun, Oct 16, 2022 at 7:11 PM Xiao Ni <xni@redhat.com> wrote:
>
> It has added io_acct_set for raid0/raid5 io accounting and it needs to
> alloc md_io_acct in the i/o path. They are free when the bios come back
> from member disks. Now we don't have a method to monitor if those bios
> are all come back. In the takeover process, it needs to free the raid0
> memory resource including the memory pool for md_io_acct. But maybe some
> bios are still not returned. When those bios are returned, it can cause
> panic bcause of introducing NULL pointer or invalid address.
>
> This patch adds io_acct_cnt. So when stopping raid0, it can use this
> to wait until all bios come back.
>
> Reported-by: Fine Fan <ffan@redhat.com>
> Signed-off-by: Xiao Ni <xni@redhat.com>
I have seen a lot of warnings and errors in dmesg with this patch. For example:
[ 402.116463] =============================================================================
[ 402.117176] BUG bio-144 (Tainted: G B W ): Right
Redzone overwritten
[ 402.117837] -----------------------------------------------------------------------------
[ 402.117837]
[ 402.118713] 0xffff88816f683cd0-0xffff88816f683cd7 @offset=15568.
First byte 0x0 instead of 0xcc
[ 402.119505] Allocated in mempool_alloc+0x79/0x1a0 age=1038 cpu=19 pid=1130
[ 402.120133] kmem_cache_alloc+0x2dc/0x3c0
[ 402.120510] mempool_alloc+0x79/0x1a0
[ 402.120840] bio_alloc_bioset+0xcb/0x530
[ 402.121205] bio_alloc_clone+0x20/0x60
[ 402.121560] md_account_bio+0x41/0x80
[ 402.121890] raid5_make_request+0x1cf/0x1450
[ 402.122327] md_handle_request+0x26c/0x3f0
[ 402.122700] __submit_bio+0x53/0x180
[ 402.123030] submit_bio_noacct_nocheck+0xe8/0x2b0
[ 402.123453] __blkdev_direct_IO_async+0x109/0x1d0
[ 402.123897] generic_file_direct_write+0x9c/0x1e0
[ 402.124332] __generic_file_write_iter+0x95/0x170
[ 402.124771] blkdev_write_iter+0xe9/0x180
[ 402.125162] aio_write+0x11a/0x2e0
[ 402.125503] io_submit_one+0x627/0xd20
[ 402.125844] __x64_sys_io_submit+0x88/0x250
[ 402.126223] Slab 0xffffea0005bda000 objects=51 used=51
fp=0x0000000000000000 flags=0x200000000010200(slab|head|node=0|zone=2)
[ 402.127227] Object 0xffff88816f683c40 @offset=15424 fp=0x0000000000000000
[ 402.127227]
[ 402.127960] Redzone ffff88816f683c00: cc cc cc cc cc cc cc cc cc
cc cc cc cc cc cc cc ................
[ 402.128797] Redzone ffff88816f683c10: cc cc cc cc cc cc cc cc cc
cc cc cc cc cc cc cc ................
[ 402.129665] Redzone ffff88816f683c20: cc cc cc cc cc cc cc cc cc
cc cc cc cc cc cc cc ................
[ 402.130503] Redzone ffff88816f683c30: cc cc cc cc cc cc cc cc cc
cc cc cc cc cc cc cc ................
[ 402.131336] Object ffff88816f683c40: 80 a3 68 6f 81 88 ff ff af
21 00 00 01 00 00 00 ..ho.....!......
[ 402.132166] Object ffff88816f683c50: 00 00 00 00 00 00 00 00 80
23 09 0b 81 88 ff ff .........#......
[ 402.132996] Object ffff88816f683c60: 01 88 00 00 02 00 04 40 00
5a 5a 5a 00 00 00 00 .......@.ZZZ....
[ 402.133822] Object ffff88816f683c70: 88 86 1c 00 00 00 00 00 00
10 00 00 00 00 00 00 ................
[ 402.134647] Object ffff88816f683c80: 00 00 00 00 ff ff ff ff e0
a9 a8 81 ff ff ff ff ................
[ 402.135501] Object ffff88816f683c90: 40 3c 68 6f 81 88 ff ff 00
00 00 00 00 00 00 00 @<ho............
[ 402.136354] Object ffff88816f683ca0: 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 ................
[ 402.137174] Object ffff88816f683cb0: 00 00 00 00 00 00 00 00 00
00 00 00 01 00 00 00 ................
[ 402.138027] Object ffff88816f683cc0: 00 a4 68 6f 81 88 ff ff 40
2f c4 73 81 88 ff ff ..ho....@/.s....
[ 402.138857] Redzone ffff88816f683cd0: 00 20 c4 73 81 88 ff ff
. .s....
[ 402.139657] Padding ffff88816f683d20: 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
[ 402.140510] Padding ffff88816f683d30: 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
[ 402.141345] CPU: 29 PID: 1092 Comm: md0_raid5 Tainted: G B W
6.1.0-rc1+ #145
[ 402.142083] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
[ 402.143127] Call Trace:
[ 402.143365] <TASK>
[ 402.143563] dump_stack_lvl+0x45/0x5d
[ 402.143899] check_bytes_and_report.cold+0x6d/0x85
[ 402.144343] check_object+0x1fa/0x2d0
[ 402.144675] free_debug_processing+0x1bc/0x660
[ 402.145091] ? md_end_io_acct+0x3c/0x80
[ 402.145464] ? md_end_io_acct+0x3c/0x80
[ 402.145812] kmem_cache_free+0x55f/0x5b0
[ 402.146164] md_end_io_acct+0x3c/0x80
[ 402.146498] handle_stripe+0x11a5/0x1d70
[ 402.146849] handle_active_stripes.constprop.0+0x487/0x5e0
[ 402.147353] raid5d+0x40d/0x680
[ 402.147640] ? lock_acquire+0x1ad/0x310
[ 402.147989] md_thread+0xc2/0x170
[ 402.148319] ? prepare_to_wait_exclusive+0xe0/0xe0
[ 402.148749] ? register_md_personality+0x90/0x90
[ 402.149162] kthread+0xf2/0x120
[ 402.149455] ? kthread_complete_and_exit+0x20/0x20
[ 402.149884] ret_from_fork+0x22/0x30
[ 402.150211] </TASK>
[ 402.150431] FIX bio-144: Restoring Right Redzone
0xffff88816f683cd0-0xffff88816f683cd7=0xcc
[ 402.151196] FIX bio-144: Object at 0xffff88816f683c40 not freed
Please fix them and resend.
Thanks,
Song
> ---
> drivers/md/md.c | 13 ++++++++++++-
> drivers/md/md.h | 11 ++++++++---
> drivers/md/raid0.c | 6 ++++++
> 3 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 9dc0175280b4..57dc2ddf1e11 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -673,6 +673,7 @@ void mddev_init(struct mddev *mddev)
> atomic_set(&mddev->flush_pending, 0);
> init_waitqueue_head(&mddev->sb_wait);
> init_waitqueue_head(&mddev->recovery_wait);
> + init_waitqueue_head(&mddev->wait_io_acct);
> mddev->reshape_position = MaxSector;
> mddev->reshape_backwards = 0;
> mddev->last_sync_action = "none";
> @@ -8600,15 +8601,18 @@ int acct_bioset_init(struct mddev *mddev)
> {
> int err = 0;
>
> - if (!bioset_initialized(&mddev->io_acct_set))
> + if (!bioset_initialized(&mddev->io_acct_set)) {
> + atomic_set(&mddev->io_acct_cnt, 0);
> err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
> offsetof(struct md_io_acct, bio_clone), 0);
> + }
> return err;
> }
> EXPORT_SYMBOL_GPL(acct_bioset_init);
>
> void acct_bioset_exit(struct mddev *mddev)
> {
> + WARN_ON(atomic_read(&mddev->io_acct_cnt) != 0);
> bioset_exit(&mddev->io_acct_set);
> }
> EXPORT_SYMBOL_GPL(acct_bioset_exit);
> @@ -8617,12 +8621,17 @@ static void md_end_io_acct(struct bio *bio)
> {
> struct md_io_acct *md_io_acct = bio->bi_private;
> struct bio *orig_bio = md_io_acct->orig_bio;
> + struct mddev *mddev = md_io_acct->mddev;
>
> orig_bio->bi_status = bio->bi_status;
>
> bio_end_io_acct(orig_bio, md_io_acct->start_time);
> bio_put(bio);
> bio_endio(orig_bio);
> +
> + if (atomic_dec_and_test(&mddev->io_acct_cnt))
> + if (unlikely(test_bit(MD_QUIESCE, &mddev->flags)))
> + wake_up(&mddev->wait_io_acct);
> }
>
> /*
> @@ -8642,6 +8651,8 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
> md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
> md_io_acct->orig_bio = *bio;
> md_io_acct->start_time = bio_start_io_acct(*bio);
> + md_io_acct->mddev = mddev;
> + atomic_inc(&mddev->io_acct_cnt);
>
> clone->bi_end_io = md_end_io_acct;
> clone->bi_private = md_io_acct;
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index b4e2d8b87b61..061176ff325f 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -255,6 +255,7 @@ struct md_cluster_info;
> * array is ready yet.
> * @MD_BROKEN: This is used to stop writes and mark array as failed.
> * @MD_DELETED: This device is being deleted
> + * @MD_QUIESCE: This device is being quiesced. Now only raid0 use this flag
> *
> * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
> */
> @@ -272,6 +273,7 @@ enum mddev_flags {
> MD_NOT_READY,
> MD_BROKEN,
> MD_DELETED,
> + MD_QUIESCE,
> };
>
> enum mddev_sb_flags {
> @@ -513,6 +515,8 @@ struct mddev {
> * metadata and bitmap writes
> */
> struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
> + atomic_t io_acct_cnt;
> + wait_queue_head_t wait_io_acct;
>
> /* Generic flush handling.
> * The last to finish preflush schedules a worker to submit
> @@ -710,9 +714,10 @@ struct md_thread {
> };
>
> struct md_io_acct {
> - struct bio *orig_bio;
> - unsigned long start_time;
> - struct bio bio_clone;
> + struct bio *orig_bio;
> + unsigned long start_time;
> + struct bio bio_clone;
> + struct mddev *mddev;
> };
>
> #define THREAD_WAKEUP 0
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index 857c49399c28..aced0ad8cdab 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -754,6 +754,12 @@ static void *raid0_takeover(struct mddev *mddev)
>
> static void raid0_quiesce(struct mddev *mddev, int quiesce)
> {
> + /* It doesn't use a separate struct to count how many bios are submitted
> + * to member disks to avoid memory alloc and performance decrease
> + */
> + set_bit(MD_QUIESCE, &mddev->flags);
> + wait_event(mddev->wait_io_acct, !atomic_read(&mddev->io_acct_cnt));
> + clear_bit(MD_QUIESCE, &mddev->flags);
> }
>
> static struct md_personality raid0_personality=
> --
> 2.32.0 (Apple Git-132)
>
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
2022-10-20 19:49 ` Song Liu
@ 2022-10-21 10:07 ` Xiao Ni
2022-10-21 21:10 ` Song Liu
0 siblings, 1 reply; 7+ messages in thread
From: Xiao Ni @ 2022-10-21 10:07 UTC (permalink / raw)
To: Song Liu; +Cc: guoqing.jiang, linux-raid, ffan
On Fri, Oct 21, 2022 at 3:50 AM Song Liu <song@kernel.org> wrote:
>
> On Sun, Oct 16, 2022 at 7:11 PM Xiao Ni <xni@redhat.com> wrote:
> >
> > It has added io_acct_set for raid0/raid5 io accounting and it needs to
> > alloc md_io_acct in the i/o path. They are free when the bios come back
> > from member disks. Now we don't have a method to monitor if those bios
> > are all come back. In the takeover process, it needs to free the raid0
> > memory resource including the memory pool for md_io_acct. But maybe some
> > bios are still not returned. When those bios are returned, it can cause
> > panic bcause of introducing NULL pointer or invalid address.
> >
> > This patch adds io_acct_cnt. So when stopping raid0, it can use this
> > to wait until all bios come back.
> >
> > Reported-by: Fine Fan <ffan@redhat.com>
> > Signed-off-by: Xiao Ni <xni@redhat.com>
>
> I have seen a lot of warnings and errors in dmesg with this patch. For example:
>
> [ 402.116463] =============================================================================
> [ 402.117176] BUG bio-144 (Tainted: G B W ): Right
> Redzone overwritten
> [ 402.117837] -----------------------------------------------------------------------------
> [ 402.117837]
> [ 402.118713] 0xffff88816f683cd0-0xffff88816f683cd7 @offset=15568.
> First byte 0x0 instead of 0xcc
> [ 402.119505] Allocated in mempool_alloc+0x79/0x1a0 age=1038 cpu=19 pid=1130
> [ 402.120133] kmem_cache_alloc+0x2dc/0x3c0
> [ 402.120510] mempool_alloc+0x79/0x1a0
> [ 402.120840] bio_alloc_bioset+0xcb/0x530
> [ 402.121205] bio_alloc_clone+0x20/0x60
> [ 402.121560] md_account_bio+0x41/0x80
> [ 402.121890] raid5_make_request+0x1cf/0x1450
> [ 402.122327] md_handle_request+0x26c/0x3f0
> [ 402.122700] __submit_bio+0x53/0x180
> [ 402.123030] submit_bio_noacct_nocheck+0xe8/0x2b0
> [ 402.123453] __blkdev_direct_IO_async+0x109/0x1d0
> [ 402.123897] generic_file_direct_write+0x9c/0x1e0
> [ 402.124332] __generic_file_write_iter+0x95/0x170
> [ 402.124771] blkdev_write_iter+0xe9/0x180
> [ 402.125162] aio_write+0x11a/0x2e0
> [ 402.125503] io_submit_one+0x627/0xd20
> [ 402.125844] __x64_sys_io_submit+0x88/0x250
> [ 402.126223] Slab 0xffffea0005bda000 objects=51 used=51
> fp=0x0000000000000000 flags=0x200000000010200(slab|head|node=0|zone=2)
> [ 402.127227] Object 0xffff88816f683c40 @offset=15424 fp=0x0000000000000000
> [ 402.127227]
> [ 402.127960] Redzone ffff88816f683c00: cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc cc cc ................
> [ 402.128797] Redzone ffff88816f683c10: cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc cc cc ................
> [ 402.129665] Redzone ffff88816f683c20: cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc cc cc ................
> [ 402.130503] Redzone ffff88816f683c30: cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc cc cc ................
> [ 402.131336] Object ffff88816f683c40: 80 a3 68 6f 81 88 ff ff af
> 21 00 00 01 00 00 00 ..ho.....!......
> [ 402.132166] Object ffff88816f683c50: 00 00 00 00 00 00 00 00 80
> 23 09 0b 81 88 ff ff .........#......
> [ 402.132996] Object ffff88816f683c60: 01 88 00 00 02 00 04 40 00
> 5a 5a 5a 00 00 00 00 .......@.ZZZ....
> [ 402.133822] Object ffff88816f683c70: 88 86 1c 00 00 00 00 00 00
> 10 00 00 00 00 00 00 ................
> [ 402.134647] Object ffff88816f683c80: 00 00 00 00 ff ff ff ff e0
> a9 a8 81 ff ff ff ff ................
> [ 402.135501] Object ffff88816f683c90: 40 3c 68 6f 81 88 ff ff 00
> 00 00 00 00 00 00 00 @<ho............
> [ 402.136354] Object ffff88816f683ca0: 00 00 00 00 00 00 00 00 00
> 00 00 00 00 00 00 00 ................
> [ 402.137174] Object ffff88816f683cb0: 00 00 00 00 00 00 00 00 00
> 00 00 00 01 00 00 00 ................
> [ 402.138027] Object ffff88816f683cc0: 00 a4 68 6f 81 88 ff ff 40
> 2f c4 73 81 88 ff ff ..ho....@/.s....
> [ 402.138857] Redzone ffff88816f683cd0: 00 20 c4 73 81 88 ff ff
> . .s....
> [ 402.139657] Padding ffff88816f683d20: 5a 5a 5a 5a 5a 5a 5a 5a 5a
> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
> [ 402.140510] Padding ffff88816f683d30: 5a 5a 5a 5a 5a 5a 5a 5a 5a
> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
> [ 402.141345] CPU: 29 PID: 1092 Comm: md0_raid5 Tainted: G B W
> 6.1.0-rc1+ #145
> [ 402.142083] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
> [ 402.143127] Call Trace:
> [ 402.143365] <TASK>
> [ 402.143563] dump_stack_lvl+0x45/0x5d
> [ 402.143899] check_bytes_and_report.cold+0x6d/0x85
> [ 402.144343] check_object+0x1fa/0x2d0
> [ 402.144675] free_debug_processing+0x1bc/0x660
> [ 402.145091] ? md_end_io_acct+0x3c/0x80
> [ 402.145464] ? md_end_io_acct+0x3c/0x80
> [ 402.145812] kmem_cache_free+0x55f/0x5b0
> [ 402.146164] md_end_io_acct+0x3c/0x80
> [ 402.146498] handle_stripe+0x11a5/0x1d70
> [ 402.146849] handle_active_stripes.constprop.0+0x487/0x5e0
> [ 402.147353] raid5d+0x40d/0x680
> [ 402.147640] ? lock_acquire+0x1ad/0x310
> [ 402.147989] md_thread+0xc2/0x170
> [ 402.148319] ? prepare_to_wait_exclusive+0xe0/0xe0
> [ 402.148749] ? register_md_personality+0x90/0x90
> [ 402.149162] kthread+0xf2/0x120
> [ 402.149455] ? kthread_complete_and_exit+0x20/0x20
> [ 402.149884] ret_from_fork+0x22/0x30
> [ 402.150211] </TASK>
> [ 402.150431] FIX bio-144: Restoring Right Redzone
> 0xffff88816f683cd0-0xffff88816f683cd7=0xcc
> [ 402.151196] FIX bio-144: Object at 0xffff88816f683c40 not freed
>
> Please fix them and resend.
>
> Thanks,
> Song
Hi Song
What commands do you run? I've run some tests and didn't see the messages.
By the way, what disks do you use?
Regards
Xiao
>
>
> > ---
> > drivers/md/md.c | 13 ++++++++++++-
> > drivers/md/md.h | 11 ++++++++---
> > drivers/md/raid0.c | 6 ++++++
> > 3 files changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index 9dc0175280b4..57dc2ddf1e11 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -673,6 +673,7 @@ void mddev_init(struct mddev *mddev)
> > atomic_set(&mddev->flush_pending, 0);
> > init_waitqueue_head(&mddev->sb_wait);
> > init_waitqueue_head(&mddev->recovery_wait);
> > + init_waitqueue_head(&mddev->wait_io_acct);
> > mddev->reshape_position = MaxSector;
> > mddev->reshape_backwards = 0;
> > mddev->last_sync_action = "none";
> > @@ -8600,15 +8601,18 @@ int acct_bioset_init(struct mddev *mddev)
> > {
> > int err = 0;
> >
> > - if (!bioset_initialized(&mddev->io_acct_set))
> > + if (!bioset_initialized(&mddev->io_acct_set)) {
> > + atomic_set(&mddev->io_acct_cnt, 0);
> > err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
> > offsetof(struct md_io_acct, bio_clone), 0);
> > + }
> > return err;
> > }
> > EXPORT_SYMBOL_GPL(acct_bioset_init);
> >
> > void acct_bioset_exit(struct mddev *mddev)
> > {
> > + WARN_ON(atomic_read(&mddev->io_acct_cnt) != 0);
> > bioset_exit(&mddev->io_acct_set);
> > }
> > EXPORT_SYMBOL_GPL(acct_bioset_exit);
> > @@ -8617,12 +8621,17 @@ static void md_end_io_acct(struct bio *bio)
> > {
> > struct md_io_acct *md_io_acct = bio->bi_private;
> > struct bio *orig_bio = md_io_acct->orig_bio;
> > + struct mddev *mddev = md_io_acct->mddev;
> >
> > orig_bio->bi_status = bio->bi_status;
> >
> > bio_end_io_acct(orig_bio, md_io_acct->start_time);
> > bio_put(bio);
> > bio_endio(orig_bio);
> > +
> > + if (atomic_dec_and_test(&mddev->io_acct_cnt))
> > + if (unlikely(test_bit(MD_QUIESCE, &mddev->flags)))
> > + wake_up(&mddev->wait_io_acct);
> > }
> >
> > /*
> > @@ -8642,6 +8651,8 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
> > md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
> > md_io_acct->orig_bio = *bio;
> > md_io_acct->start_time = bio_start_io_acct(*bio);
> > + md_io_acct->mddev = mddev;
> > + atomic_inc(&mddev->io_acct_cnt);
> >
> > clone->bi_end_io = md_end_io_acct;
> > clone->bi_private = md_io_acct;
> > diff --git a/drivers/md/md.h b/drivers/md/md.h
> > index b4e2d8b87b61..061176ff325f 100644
> > --- a/drivers/md/md.h
> > +++ b/drivers/md/md.h
> > @@ -255,6 +255,7 @@ struct md_cluster_info;
> > * array is ready yet.
> > * @MD_BROKEN: This is used to stop writes and mark array as failed.
> > * @MD_DELETED: This device is being deleted
> > + * @MD_QUIESCE: This device is being quiesced. Now only raid0 use this flag
> > *
> > * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
> > */
> > @@ -272,6 +273,7 @@ enum mddev_flags {
> > MD_NOT_READY,
> > MD_BROKEN,
> > MD_DELETED,
> > + MD_QUIESCE,
> > };
> >
> > enum mddev_sb_flags {
> > @@ -513,6 +515,8 @@ struct mddev {
> > * metadata and bitmap writes
> > */
> > struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
> > + atomic_t io_acct_cnt;
> > + wait_queue_head_t wait_io_acct;
> >
> > /* Generic flush handling.
> > * The last to finish preflush schedules a worker to submit
> > @@ -710,9 +714,10 @@ struct md_thread {
> > };
> >
> > struct md_io_acct {
> > - struct bio *orig_bio;
> > - unsigned long start_time;
> > - struct bio bio_clone;
> > + struct bio *orig_bio;
> > + unsigned long start_time;
> > + struct bio bio_clone;
> > + struct mddev *mddev;
> > };
> >
> > #define THREAD_WAKEUP 0
> > diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> > index 857c49399c28..aced0ad8cdab 100644
> > --- a/drivers/md/raid0.c
> > +++ b/drivers/md/raid0.c
> > @@ -754,6 +754,12 @@ static void *raid0_takeover(struct mddev *mddev)
> >
> > static void raid0_quiesce(struct mddev *mddev, int quiesce)
> > {
> > + /* It doesn't use a separate struct to count how many bios are submitted
> > + * to member disks to avoid memory alloc and performance decrease
> > + */
> > + set_bit(MD_QUIESCE, &mddev->flags);
> > + wait_event(mddev->wait_io_acct, !atomic_read(&mddev->io_acct_cnt));
> > + clear_bit(MD_QUIESCE, &mddev->flags);
> > }
> >
> > static struct md_personality raid0_personality=
> > --
> > 2.32.0 (Apple Git-132)
> >
>
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
2022-10-21 10:07 ` Xiao Ni
@ 2022-10-21 21:10 ` Song Liu
2022-10-23 9:15 ` Xiao Ni
0 siblings, 1 reply; 7+ messages in thread
From: Song Liu @ 2022-10-21 21:10 UTC (permalink / raw)
To: Xiao Ni; +Cc: guoqing.jiang, linux-raid, ffan
On Fri, Oct 21, 2022 at 3:07 AM Xiao Ni <xni@redhat.com> wrote:
>
> On Fri, Oct 21, 2022 at 3:50 AM Song Liu <song@kernel.org> wrote:
> >
> > On Sun, Oct 16, 2022 at 7:11 PM Xiao Ni <xni@redhat.com> wrote:
> > >
> > > It has added io_acct_set for raid0/raid5 io accounting and it needs to
> > > alloc md_io_acct in the i/o path. They are free when the bios come back
> > > from member disks. Now we don't have a method to monitor if those bios
> > > are all come back. In the takeover process, it needs to free the raid0
> > > memory resource including the memory pool for md_io_acct. But maybe some
> > > bios are still not returned. When those bios are returned, it can cause
> > > panic bcause of introducing NULL pointer or invalid address.
> > >
> > > This patch adds io_acct_cnt. So when stopping raid0, it can use this
> > > to wait until all bios come back.
> > >
> > > Reported-by: Fine Fan <ffan@redhat.com>
> > > Signed-off-by: Xiao Ni <xni@redhat.com>
> >
> > I have seen a lot of warnings and errors in dmesg with this patch. For example:
> >
> > [ 402.116463] =============================================================================
> > [ 402.117176] BUG bio-144 (Tainted: G B W ): Right
> > Redzone overwritten
> > [ 402.117837] -----------------------------------------------------------------------------
> > [ 402.117837]
> > [ 402.118713] 0xffff88816f683cd0-0xffff88816f683cd7 @offset=15568.
> > First byte 0x0 instead of 0xcc
> > [ 402.119505] Allocated in mempool_alloc+0x79/0x1a0 age=1038 cpu=19 pid=1130
> > [ 402.120133] kmem_cache_alloc+0x2dc/0x3c0
> > [ 402.120510] mempool_alloc+0x79/0x1a0
> > [ 402.120840] bio_alloc_bioset+0xcb/0x530
> > [ 402.121205] bio_alloc_clone+0x20/0x60
> > [ 402.121560] md_account_bio+0x41/0x80
> > [ 402.121890] raid5_make_request+0x1cf/0x1450
> > [ 402.122327] md_handle_request+0x26c/0x3f0
> > [ 402.122700] __submit_bio+0x53/0x180
> > [ 402.123030] submit_bio_noacct_nocheck+0xe8/0x2b0
> > [ 402.123453] __blkdev_direct_IO_async+0x109/0x1d0
> > [ 402.123897] generic_file_direct_write+0x9c/0x1e0
> > [ 402.124332] __generic_file_write_iter+0x95/0x170
> > [ 402.124771] blkdev_write_iter+0xe9/0x180
> > [ 402.125162] aio_write+0x11a/0x2e0
> > [ 402.125503] io_submit_one+0x627/0xd20
> > [ 402.125844] __x64_sys_io_submit+0x88/0x250
> > [ 402.126223] Slab 0xffffea0005bda000 objects=51 used=51
> > fp=0x0000000000000000 flags=0x200000000010200(slab|head|node=0|zone=2)
> > [ 402.127227] Object 0xffff88816f683c40 @offset=15424 fp=0x0000000000000000
> > [ 402.127227]
> > [ 402.127960] Redzone ffff88816f683c00: cc cc cc cc cc cc cc cc cc
> > cc cc cc cc cc cc cc ................
> > [ 402.128797] Redzone ffff88816f683c10: cc cc cc cc cc cc cc cc cc
> > cc cc cc cc cc cc cc ................
> > [ 402.129665] Redzone ffff88816f683c20: cc cc cc cc cc cc cc cc cc
> > cc cc cc cc cc cc cc ................
> > [ 402.130503] Redzone ffff88816f683c30: cc cc cc cc cc cc cc cc cc
> > cc cc cc cc cc cc cc ................
> > [ 402.131336] Object ffff88816f683c40: 80 a3 68 6f 81 88 ff ff af
> > 21 00 00 01 00 00 00 ..ho.....!......
> > [ 402.132166] Object ffff88816f683c50: 00 00 00 00 00 00 00 00 80
> > 23 09 0b 81 88 ff ff .........#......
> > [ 402.132996] Object ffff88816f683c60: 01 88 00 00 02 00 04 40 00
> > 5a 5a 5a 00 00 00 00 .......@.ZZZ....
> > [ 402.133822] Object ffff88816f683c70: 88 86 1c 00 00 00 00 00 00
> > 10 00 00 00 00 00 00 ................
> > [ 402.134647] Object ffff88816f683c80: 00 00 00 00 ff ff ff ff e0
> > a9 a8 81 ff ff ff ff ................
> > [ 402.135501] Object ffff88816f683c90: 40 3c 68 6f 81 88 ff ff 00
> > 00 00 00 00 00 00 00 @<ho............
> > [ 402.136354] Object ffff88816f683ca0: 00 00 00 00 00 00 00 00 00
> > 00 00 00 00 00 00 00 ................
> > [ 402.137174] Object ffff88816f683cb0: 00 00 00 00 00 00 00 00 00
> > 00 00 00 01 00 00 00 ................
> > [ 402.138027] Object ffff88816f683cc0: 00 a4 68 6f 81 88 ff ff 40
> > 2f c4 73 81 88 ff ff ..ho....@/.s....
> > [ 402.138857] Redzone ffff88816f683cd0: 00 20 c4 73 81 88 ff ff
> > . .s....
> > [ 402.139657] Padding ffff88816f683d20: 5a 5a 5a 5a 5a 5a 5a 5a 5a
> > 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
> > [ 402.140510] Padding ffff88816f683d30: 5a 5a 5a 5a 5a 5a 5a 5a 5a
> > 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
> > [ 402.141345] CPU: 29 PID: 1092 Comm: md0_raid5 Tainted: G B W
> > 6.1.0-rc1+ #145
> > [ 402.142083] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> > BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
> > [ 402.143127] Call Trace:
> > [ 402.143365] <TASK>
> > [ 402.143563] dump_stack_lvl+0x45/0x5d
> > [ 402.143899] check_bytes_and_report.cold+0x6d/0x85
> > [ 402.144343] check_object+0x1fa/0x2d0
> > [ 402.144675] free_debug_processing+0x1bc/0x660
> > [ 402.145091] ? md_end_io_acct+0x3c/0x80
> > [ 402.145464] ? md_end_io_acct+0x3c/0x80
> > [ 402.145812] kmem_cache_free+0x55f/0x5b0
> > [ 402.146164] md_end_io_acct+0x3c/0x80
> > [ 402.146498] handle_stripe+0x11a5/0x1d70
> > [ 402.146849] handle_active_stripes.constprop.0+0x487/0x5e0
> > [ 402.147353] raid5d+0x40d/0x680
> > [ 402.147640] ? lock_acquire+0x1ad/0x310
> > [ 402.147989] md_thread+0xc2/0x170
> > [ 402.148319] ? prepare_to_wait_exclusive+0xe0/0xe0
> > [ 402.148749] ? register_md_personality+0x90/0x90
> > [ 402.149162] kthread+0xf2/0x120
> > [ 402.149455] ? kthread_complete_and_exit+0x20/0x20
> > [ 402.149884] ret_from_fork+0x22/0x30
> > [ 402.150211] </TASK>
> > [ 402.150431] FIX bio-144: Restoring Right Redzone
> > 0xffff88816f683cd0-0xffff88816f683cd7=0xcc
> > [ 402.151196] FIX bio-144: Object at 0xffff88816f683c40 not freed
> >
> > Please fix them and resend.
> >
> > Thanks,
> > Song
>
> Hi Song
>
> What commands do you run? I've run some tests and didn't see the messages.
> By the way, what disks do you use?
I see these with regular IO. Some fio-libaio-direct workload should trigger it.
This is running in Qemu on virtual nvme devices, and with some debug
options enabled (KASAN, LOCKDEP, etc.).
Thanks,
Song
>
> Regards
> Xiao
> >
> >
> > > ---
> > > drivers/md/md.c | 13 ++++++++++++-
> > > drivers/md/md.h | 11 ++++++++---
> > > drivers/md/raid0.c | 6 ++++++
> > > 3 files changed, 26 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > > index 9dc0175280b4..57dc2ddf1e11 100644
> > > --- a/drivers/md/md.c
> > > +++ b/drivers/md/md.c
> > > @@ -673,6 +673,7 @@ void mddev_init(struct mddev *mddev)
> > > atomic_set(&mddev->flush_pending, 0);
> > > init_waitqueue_head(&mddev->sb_wait);
> > > init_waitqueue_head(&mddev->recovery_wait);
> > > + init_waitqueue_head(&mddev->wait_io_acct);
> > > mddev->reshape_position = MaxSector;
> > > mddev->reshape_backwards = 0;
> > > mddev->last_sync_action = "none";
> > > @@ -8600,15 +8601,18 @@ int acct_bioset_init(struct mddev *mddev)
> > > {
> > > int err = 0;
> > >
> > > - if (!bioset_initialized(&mddev->io_acct_set))
> > > + if (!bioset_initialized(&mddev->io_acct_set)) {
> > > + atomic_set(&mddev->io_acct_cnt, 0);
> > > err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
> > > offsetof(struct md_io_acct, bio_clone), 0);
> > > + }
> > > return err;
> > > }
> > > EXPORT_SYMBOL_GPL(acct_bioset_init);
> > >
> > > void acct_bioset_exit(struct mddev *mddev)
> > > {
> > > + WARN_ON(atomic_read(&mddev->io_acct_cnt) != 0);
> > > bioset_exit(&mddev->io_acct_set);
> > > }
> > > EXPORT_SYMBOL_GPL(acct_bioset_exit);
> > > @@ -8617,12 +8621,17 @@ static void md_end_io_acct(struct bio *bio)
> > > {
> > > struct md_io_acct *md_io_acct = bio->bi_private;
> > > struct bio *orig_bio = md_io_acct->orig_bio;
> > > + struct mddev *mddev = md_io_acct->mddev;
> > >
> > > orig_bio->bi_status = bio->bi_status;
> > >
> > > bio_end_io_acct(orig_bio, md_io_acct->start_time);
> > > bio_put(bio);
> > > bio_endio(orig_bio);
> > > +
> > > + if (atomic_dec_and_test(&mddev->io_acct_cnt))
> > > + if (unlikely(test_bit(MD_QUIESCE, &mddev->flags)))
> > > + wake_up(&mddev->wait_io_acct);
> > > }
> > >
> > > /*
> > > @@ -8642,6 +8651,8 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
> > > md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
> > > md_io_acct->orig_bio = *bio;
> > > md_io_acct->start_time = bio_start_io_acct(*bio);
> > > + md_io_acct->mddev = mddev;
> > > + atomic_inc(&mddev->io_acct_cnt);
> > >
> > > clone->bi_end_io = md_end_io_acct;
> > > clone->bi_private = md_io_acct;
> > > diff --git a/drivers/md/md.h b/drivers/md/md.h
> > > index b4e2d8b87b61..061176ff325f 100644
> > > --- a/drivers/md/md.h
> > > +++ b/drivers/md/md.h
> > > @@ -255,6 +255,7 @@ struct md_cluster_info;
> > > * array is ready yet.
> > > * @MD_BROKEN: This is used to stop writes and mark array as failed.
> > > * @MD_DELETED: This device is being deleted
> > > + * @MD_QUIESCE: This device is being quiesced. Now only raid0 use this flag
> > > *
> > > * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
> > > */
> > > @@ -272,6 +273,7 @@ enum mddev_flags {
> > > MD_NOT_READY,
> > > MD_BROKEN,
> > > MD_DELETED,
> > > + MD_QUIESCE,
> > > };
> > >
> > > enum mddev_sb_flags {
> > > @@ -513,6 +515,8 @@ struct mddev {
> > > * metadata and bitmap writes
> > > */
> > > struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
> > > + atomic_t io_acct_cnt;
> > > + wait_queue_head_t wait_io_acct;
> > >
> > > /* Generic flush handling.
> > > * The last to finish preflush schedules a worker to submit
> > > @@ -710,9 +714,10 @@ struct md_thread {
> > > };
> > >
> > > struct md_io_acct {
> > > - struct bio *orig_bio;
> > > - unsigned long start_time;
> > > - struct bio bio_clone;
> > > + struct bio *orig_bio;
> > > + unsigned long start_time;
> > > + struct bio bio_clone;
> > > + struct mddev *mddev;
> > > };
> > >
> > > #define THREAD_WAKEUP 0
> > > diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> > > index 857c49399c28..aced0ad8cdab 100644
> > > --- a/drivers/md/raid0.c
> > > +++ b/drivers/md/raid0.c
> > > @@ -754,6 +754,12 @@ static void *raid0_takeover(struct mddev *mddev)
> > >
> > > static void raid0_quiesce(struct mddev *mddev, int quiesce)
> > > {
> > > + /* It doesn't use a separate struct to count how many bios are submitted
> > > + * to member disks to avoid memory alloc and performance decrease
> > > + */
> > > + set_bit(MD_QUIESCE, &mddev->flags);
> > > + wait_event(mddev->wait_io_acct, !atomic_read(&mddev->io_acct_cnt));
> > > + clear_bit(MD_QUIESCE, &mddev->flags);
> > > }
> > >
> > > static struct md_personality raid0_personality=
> > > --
> > > 2.32.0 (Apple Git-132)
> > >
> >
>
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
2022-10-21 21:10 ` Song Liu
@ 2022-10-23 9:15 ` Xiao Ni
0 siblings, 0 replies; 7+ messages in thread
From: Xiao Ni @ 2022-10-23 9:15 UTC (permalink / raw)
To: Song Liu; +Cc: guoqing.jiang, linux-raid, ffan
On Sat, Oct 22, 2022 at 5:10 AM Song Liu <song@kernel.org> wrote:
>
> On Fri, Oct 21, 2022 at 3:07 AM Xiao Ni <xni@redhat.com> wrote:
> >
> > On Fri, Oct 21, 2022 at 3:50 AM Song Liu <song@kernel.org> wrote:
> > >
> > > On Sun, Oct 16, 2022 at 7:11 PM Xiao Ni <xni@redhat.com> wrote:
> > > >
> > > > It has added io_acct_set for raid0/raid5 io accounting and it needs to
> > > > alloc md_io_acct in the i/o path. They are free when the bios come back
> > > > from member disks. Now we don't have a method to monitor if those bios
> > > > are all come back. In the takeover process, it needs to free the raid0
> > > > memory resource including the memory pool for md_io_acct. But maybe some
> > > > bios are still not returned. When those bios are returned, it can cause
> > > > panic bcause of introducing NULL pointer or invalid address.
> > > >
> > > > This patch adds io_acct_cnt. So when stopping raid0, it can use this
> > > > to wait until all bios come back.
> > > >
> > > > Reported-by: Fine Fan <ffan@redhat.com>
> > > > Signed-off-by: Xiao Ni <xni@redhat.com>
> > >
> > > I have seen a lot of warnings and errors in dmesg with this patch. For example:
> > >
> > > [ 402.116463] =============================================================================
> > > [ 402.117176] BUG bio-144 (Tainted: G B W ): Right
> > > Redzone overwritten
> > > [ 402.117837] -----------------------------------------------------------------------------
> > > [ 402.117837]
> > > [ 402.118713] 0xffff88816f683cd0-0xffff88816f683cd7 @offset=15568.
> > > First byte 0x0 instead of 0xcc
> > > [ 402.119505] Allocated in mempool_alloc+0x79/0x1a0 age=1038 cpu=19 pid=1130
> > > [ 402.120133] kmem_cache_alloc+0x2dc/0x3c0
> > > [ 402.120510] mempool_alloc+0x79/0x1a0
> > > [ 402.120840] bio_alloc_bioset+0xcb/0x530
> > > [ 402.121205] bio_alloc_clone+0x20/0x60
> > > [ 402.121560] md_account_bio+0x41/0x80
> > > [ 402.121890] raid5_make_request+0x1cf/0x1450
> > > [ 402.122327] md_handle_request+0x26c/0x3f0
> > > [ 402.122700] __submit_bio+0x53/0x180
> > > [ 402.123030] submit_bio_noacct_nocheck+0xe8/0x2b0
> > > [ 402.123453] __blkdev_direct_IO_async+0x109/0x1d0
> > > [ 402.123897] generic_file_direct_write+0x9c/0x1e0
> > > [ 402.124332] __generic_file_write_iter+0x95/0x170
> > > [ 402.124771] blkdev_write_iter+0xe9/0x180
> > > [ 402.125162] aio_write+0x11a/0x2e0
> > > [ 402.125503] io_submit_one+0x627/0xd20
> > > [ 402.125844] __x64_sys_io_submit+0x88/0x250
> > > [ 402.126223] Slab 0xffffea0005bda000 objects=51 used=51
> > > fp=0x0000000000000000 flags=0x200000000010200(slab|head|node=0|zone=2)
> > > [ 402.127227] Object 0xffff88816f683c40 @offset=15424 fp=0x0000000000000000
> > > [ 402.127227]
> > > [ 402.127960] Redzone ffff88816f683c00: cc cc cc cc cc cc cc cc cc
> > > cc cc cc cc cc cc cc ................
> > > [ 402.128797] Redzone ffff88816f683c10: cc cc cc cc cc cc cc cc cc
> > > cc cc cc cc cc cc cc ................
> > > [ 402.129665] Redzone ffff88816f683c20: cc cc cc cc cc cc cc cc cc
> > > cc cc cc cc cc cc cc ................
> > > [ 402.130503] Redzone ffff88816f683c30: cc cc cc cc cc cc cc cc cc
> > > cc cc cc cc cc cc cc ................
> > > [ 402.131336] Object ffff88816f683c40: 80 a3 68 6f 81 88 ff ff af
> > > 21 00 00 01 00 00 00 ..ho.....!......
> > > [ 402.132166] Object ffff88816f683c50: 00 00 00 00 00 00 00 00 80
> > > 23 09 0b 81 88 ff ff .........#......
> > > [ 402.132996] Object ffff88816f683c60: 01 88 00 00 02 00 04 40 00
> > > 5a 5a 5a 00 00 00 00 .......@.ZZZ....
> > > [ 402.133822] Object ffff88816f683c70: 88 86 1c 00 00 00 00 00 00
> > > 10 00 00 00 00 00 00 ................
> > > [ 402.134647] Object ffff88816f683c80: 00 00 00 00 ff ff ff ff e0
> > > a9 a8 81 ff ff ff ff ................
> > > [ 402.135501] Object ffff88816f683c90: 40 3c 68 6f 81 88 ff ff 00
> > > 00 00 00 00 00 00 00 @<ho............
> > > [ 402.136354] Object ffff88816f683ca0: 00 00 00 00 00 00 00 00 00
> > > 00 00 00 00 00 00 00 ................
> > > [ 402.137174] Object ffff88816f683cb0: 00 00 00 00 00 00 00 00 00
> > > 00 00 00 01 00 00 00 ................
> > > [ 402.138027] Object ffff88816f683cc0: 00 a4 68 6f 81 88 ff ff 40
> > > 2f c4 73 81 88 ff ff ..ho....@/.s....
> > > [ 402.138857] Redzone ffff88816f683cd0: 00 20 c4 73 81 88 ff ff
> > > . .s....
> > > [ 402.139657] Padding ffff88816f683d20: 5a 5a 5a 5a 5a 5a 5a 5a 5a
> > > 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
> > > [ 402.140510] Padding ffff88816f683d30: 5a 5a 5a 5a 5a 5a 5a 5a 5a
> > > 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
> > > [ 402.141345] CPU: 29 PID: 1092 Comm: md0_raid5 Tainted: G B W
> > > 6.1.0-rc1+ #145
> > > [ 402.142083] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> > > BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
> > > [ 402.143127] Call Trace:
> > > [ 402.143365] <TASK>
> > > [ 402.143563] dump_stack_lvl+0x45/0x5d
> > > [ 402.143899] check_bytes_and_report.cold+0x6d/0x85
> > > [ 402.144343] check_object+0x1fa/0x2d0
> > > [ 402.144675] free_debug_processing+0x1bc/0x660
> > > [ 402.145091] ? md_end_io_acct+0x3c/0x80
> > > [ 402.145464] ? md_end_io_acct+0x3c/0x80
> > > [ 402.145812] kmem_cache_free+0x55f/0x5b0
> > > [ 402.146164] md_end_io_acct+0x3c/0x80
> > > [ 402.146498] handle_stripe+0x11a5/0x1d70
> > > [ 402.146849] handle_active_stripes.constprop.0+0x487/0x5e0
> > > [ 402.147353] raid5d+0x40d/0x680
> > > [ 402.147640] ? lock_acquire+0x1ad/0x310
> > > [ 402.147989] md_thread+0xc2/0x170
> > > [ 402.148319] ? prepare_to_wait_exclusive+0xe0/0xe0
> > > [ 402.148749] ? register_md_personality+0x90/0x90
> > > [ 402.149162] kthread+0xf2/0x120
> > > [ 402.149455] ? kthread_complete_and_exit+0x20/0x20
> > > [ 402.149884] ret_from_fork+0x22/0x30
> > > [ 402.150211] </TASK>
> > > [ 402.150431] FIX bio-144: Restoring Right Redzone
> > > 0xffff88816f683cd0-0xffff88816f683cd7=0xcc
> > > [ 402.151196] FIX bio-144: Object at 0xffff88816f683c40 not freed
> > >
> > > Please fix them and resend.
> > >
> > > Thanks,
> > > Song
> >
> > Hi Song
> >
> > What commands do you run? I've run some tests and didn't see the messages.
> > By the way, what disks do you use?
>
> I see these with regular IO. Some fio-libaio-direct workload should trigger it.
> This is running in Qemu on virtual nvme devices, and with some debug
> options enabled (KASAN, LOCKDEP, etc.).
Hi Song
I have reproduced this. Thanks for pointing this out. I'll fix this and re-send
v2.
Regards
Xiao
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
@ 2022-10-12 9:11 Xiao Ni
2022-10-13 0:45 ` Xiao Ni
0 siblings, 1 reply; 7+ messages in thread
From: Xiao Ni @ 2022-10-12 9:11 UTC (permalink / raw)
To: song; +Cc: ffan, guoqing.jiang, linux-raid
It has added io_acct_set for raid0/raid5 io accounting and it needs to
alloc md_io_acct in the i/o path. They are free when the bios come back
from member disks. Now we don't have a method to monitor if those bios
are all come back. In the takeover process, it needs to free the raid0
memory resource including the memory pool for md_io_acct. But maybe some
bios are still not returned. When those bios are returned, it can cause
panic bcause of introducing NULL pointer or invalid address.
This patch adds io_acct_cnt. So when stopping raid0, it can use this
to wait until all bios come back.
Reported-by: Fine Fan <ffan@redhat.com>
Signed-off-by: Xiao Ni <xni@redhat.com>
---
drivers/md/md.c | 10 +++++++++-
drivers/md/md.h | 8 +++++---
drivers/md/raid0.c | 8 ++++++++
drivers/md/raid0.h | 1 +
4 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9dc0175280b4..d6e9fa914087 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8600,15 +8600,18 @@ int acct_bioset_init(struct mddev *mddev)
{
int err = 0;
- if (!bioset_initialized(&mddev->io_acct_set))
+ if (!bioset_initialized(&mddev->io_acct_set)) {
+ atomic_set(&mddev->io_acct_cnt, 0);
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
offsetof(struct md_io_acct, bio_clone), 0);
+ }
return err;
}
EXPORT_SYMBOL_GPL(acct_bioset_init);
void acct_bioset_exit(struct mddev *mddev)
{
+ WARN_ON(atomic_read(&mddev->io_acct_cnt) != 0);
bioset_exit(&mddev->io_acct_set);
}
EXPORT_SYMBOL_GPL(acct_bioset_exit);
@@ -8617,12 +8620,15 @@ static void md_end_io_acct(struct bio *bio)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
+ struct mddev *mddev = md_io_acct->mddev;
orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
bio_put(bio);
bio_endio(orig_bio);
+
+ atomic_dec(&mddev->io_acct_cnt);
}
/*
@@ -8642,6 +8648,8 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
+ md_io_acct->mddev = mddev;
+ atomic_inc(&mddev->io_acct_cnt);
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b4e2d8b87b61..29d30642e13f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -513,6 +513,7 @@ struct mddev {
* metadata and bitmap writes
*/
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
+ atomic_t io_acct_cnt;
/* Generic flush handling.
* The last to finish preflush schedules a worker to submit
@@ -710,9 +711,10 @@ struct md_thread {
};
struct md_io_acct {
- struct bio *orig_bio;
- unsigned long start_time;
- struct bio bio_clone;
+ struct bio *orig_bio;
+ unsigned long start_time;
+ struct bio bio_clone;
+ struct mddev *mddev;
};
#define THREAD_WAKEUP 0
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 857c49399c28..1d2e098e0d52 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -73,6 +73,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
*private_conf = ERR_PTR(-ENOMEM);
if (!conf)
return -ENOMEM;
+
+ init_waitqueue_head(&conf->wait_quiesce);
rdev_for_each(rdev1, mddev) {
pr_debug("md/raid0:%s: looking at %pg\n",
mdname(mddev),
@@ -754,6 +756,12 @@ static void *raid0_takeover(struct mddev *mddev)
static void raid0_quiesce(struct mddev *mddev, int quiesce)
{
+ struct r0conf *conf = mddev->private;
+
+ /* It doesn't use a separate struct to count how many bios are submitted
+ * to member disks to avoid memory alloc and performance decrease
+ */
+ wait_event(conf->wait_quiesce, atomic_read(&mddev->io_acct_cnt) == 0);
}
static struct md_personality raid0_personality=
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
index 3816e5477db1..560dec93d459 100644
--- a/drivers/md/raid0.h
+++ b/drivers/md/raid0.h
@@ -27,6 +27,7 @@ struct r0conf {
* by strip_zone->dev */
int nr_strip_zones;
enum r0layout layout;
+ wait_queue_head_t wait_quiesce;
};
#endif
--
2.32.0 (Apple Git-132)
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce
2022-10-12 9:11 Xiao Ni
@ 2022-10-13 0:45 ` Xiao Ni
0 siblings, 0 replies; 7+ messages in thread
From: Xiao Ni @ 2022-10-13 0:45 UTC (permalink / raw)
To: song; +Cc: ffan, guoqing.jiang, linux-raid
Please ignore this patch. Now there is no place to wake up raid0_quiesce.
I'll send a new one.
Regards
Xiao
On Wed, Oct 12, 2022 at 5:12 PM Xiao Ni <xni@redhat.com> wrote:
>
> It has added io_acct_set for raid0/raid5 io accounting and it needs to
> alloc md_io_acct in the i/o path. They are free when the bios come back
> from member disks. Now we don't have a method to monitor if those bios
> are all come back. In the takeover process, it needs to free the raid0
> memory resource including the memory pool for md_io_acct. But maybe some
> bios are still not returned. When those bios are returned, it can cause
> panic bcause of introducing NULL pointer or invalid address.
>
> This patch adds io_acct_cnt. So when stopping raid0, it can use this
> to wait until all bios come back.
>
> Reported-by: Fine Fan <ffan@redhat.com>
> Signed-off-by: Xiao Ni <xni@redhat.com>
> ---
> drivers/md/md.c | 10 +++++++++-
> drivers/md/md.h | 8 +++++---
> drivers/md/raid0.c | 8 ++++++++
> drivers/md/raid0.h | 1 +
> 4 files changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 9dc0175280b4..d6e9fa914087 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -8600,15 +8600,18 @@ int acct_bioset_init(struct mddev *mddev)
> {
> int err = 0;
>
> - if (!bioset_initialized(&mddev->io_acct_set))
> + if (!bioset_initialized(&mddev->io_acct_set)) {
> + atomic_set(&mddev->io_acct_cnt, 0);
> err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
> offsetof(struct md_io_acct, bio_clone), 0);
> + }
> return err;
> }
> EXPORT_SYMBOL_GPL(acct_bioset_init);
>
> void acct_bioset_exit(struct mddev *mddev)
> {
> + WARN_ON(atomic_read(&mddev->io_acct_cnt) != 0);
> bioset_exit(&mddev->io_acct_set);
> }
> EXPORT_SYMBOL_GPL(acct_bioset_exit);
> @@ -8617,12 +8620,15 @@ static void md_end_io_acct(struct bio *bio)
> {
> struct md_io_acct *md_io_acct = bio->bi_private;
> struct bio *orig_bio = md_io_acct->orig_bio;
> + struct mddev *mddev = md_io_acct->mddev;
>
> orig_bio->bi_status = bio->bi_status;
>
> bio_end_io_acct(orig_bio, md_io_acct->start_time);
> bio_put(bio);
> bio_endio(orig_bio);
> +
> + atomic_dec(&mddev->io_acct_cnt);
> }
>
> /*
> @@ -8642,6 +8648,8 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
> md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
> md_io_acct->orig_bio = *bio;
> md_io_acct->start_time = bio_start_io_acct(*bio);
> + md_io_acct->mddev = mddev;
> + atomic_inc(&mddev->io_acct_cnt);
>
> clone->bi_end_io = md_end_io_acct;
> clone->bi_private = md_io_acct;
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index b4e2d8b87b61..29d30642e13f 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -513,6 +513,7 @@ struct mddev {
> * metadata and bitmap writes
> */
> struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
> + atomic_t io_acct_cnt;
>
> /* Generic flush handling.
> * The last to finish preflush schedules a worker to submit
> @@ -710,9 +711,10 @@ struct md_thread {
> };
>
> struct md_io_acct {
> - struct bio *orig_bio;
> - unsigned long start_time;
> - struct bio bio_clone;
> + struct bio *orig_bio;
> + unsigned long start_time;
> + struct bio bio_clone;
> + struct mddev *mddev;
> };
>
> #define THREAD_WAKEUP 0
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index 857c49399c28..1d2e098e0d52 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -73,6 +73,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
> *private_conf = ERR_PTR(-ENOMEM);
> if (!conf)
> return -ENOMEM;
> +
> + init_waitqueue_head(&conf->wait_quiesce);
> rdev_for_each(rdev1, mddev) {
> pr_debug("md/raid0:%s: looking at %pg\n",
> mdname(mddev),
> @@ -754,6 +756,12 @@ static void *raid0_takeover(struct mddev *mddev)
>
> static void raid0_quiesce(struct mddev *mddev, int quiesce)
> {
> + struct r0conf *conf = mddev->private;
> +
> + /* It doesn't use a separate struct to count how many bios are submitted
> + * to member disks to avoid memory alloc and performance decrease
> + */
> + wait_event(conf->wait_quiesce, atomic_read(&mddev->io_acct_cnt) == 0);
> }
>
> static struct md_personality raid0_personality=
> diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
> index 3816e5477db1..560dec93d459 100644
> --- a/drivers/md/raid0.h
> +++ b/drivers/md/raid0.h
> @@ -27,6 +27,7 @@ struct r0conf {
> * by strip_zone->dev */
> int nr_strip_zones;
> enum r0layout layout;
> + wait_queue_head_t wait_quiesce;
> };
>
> #endif
> --
> 2.32.0 (Apple Git-132)
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2022-10-23 9:16 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-10-17 2:11 [PATCH 1/1] Add mddev->io_acct_cnt for raid0_quiesce Xiao Ni
2022-10-20 19:49 ` Song Liu
2022-10-21 10:07 ` Xiao Ni
2022-10-21 21:10 ` Song Liu
2022-10-23 9:15 ` Xiao Ni
-- strict thread matches above, loose matches on Subject: below --
2022-10-12 9:11 Xiao Ni
2022-10-13 0:45 ` Xiao Ni
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).