Linux RAID subsystem development
 help / color / mirror / Atom feed
From: Yu Kuai <yukuai1@huaweicloud.com>
To: Christian Theune <ct@flyingcircus.io>, Yu Kuai <yukuai1@huaweicloud.com>
Cc: "John Stoffel" <john@stoffel.org>,
	"linux-raid@vger.kernel.org" <linux-raid@vger.kernel.org>,
	dm-devel@lists.linux.dev,
	"Dragan Milivojević" <galileo@pkm-inc.com>,
	"yukuai (C)" <yukuai3@huawei.com>
Subject: Re: PROBLEM: repeatable lockup on RAID-6 with LUKS dm-crypt on NVMe devices when rsyncing many files
Date: Sat, 26 Oct 2024 17:07:34 +0800	[thread overview]
Message-ID: <2d85e9ab-1d0f-70a1-fab2-1e469764ef28@huaweicloud.com> (raw)
In-Reply-To: <A74EC4F5-2FF8-4274-A1EB-28D527F143F1@flyingcircus.io>

Hi,

在 2024/10/26 13:37, Christian Theune 写道:
> 
>> On 25. Oct 2024, at 16:02, Christian Theune <ct@flyingcircus.io> wrote:
>>
>> Yeah, this was more directed towards the question whether Yu needs me to run the patch that he posted earlier.
>>
>> So. The current status is: previously this crashed within 2-3 hours. Both machines are now running with the bitmap turned off as described above and have been syncing data for about 7 hours. This seems to indicate that the bitmap is involved here.
> 
> Update: both machines have been able to finish their multi-TiB rsync job that previously caused reliable lockups. So: the bitmap code seems to be the culprit here …
> 
> Christian
> 

Then, can you enable bitmap and test the following debug patch:

Thanks,
Kuai

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 58f71c3e1368..b2a75a904209 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2369,6 +2369,7 @@ static struct stripe_head *alloc_stripe(struct 
kmem_cache *sc, gfp_t gfp,
                 atomic_set(&sh->count, 1);
                 sh->raid_conf = conf;
                 sh->log_start = MaxSector;
+               atomic_set(&sh->bitmap_counts, 0);

                 if (raid5_has_ppl(conf)) {
                         sh->ppl_page = alloc_page(gfp);
@@ -3565,6 +3566,7 @@ static void __add_stripe_bio(struct stripe_head 
*sh, struct bio *bi,
                 spin_unlock_irq(&sh->stripe_lock);
                 conf->mddev->bitmap_ops->startwrite(conf->mddev, 
sh->sector,
                                         RAID5_STRIPE_SECTORS(conf), false);
+               printk("%s: %s: start %px(%llu+%lu) %u\n", __func__, 
mdname(conf->mddev), sh, sh->sector, RAID5_STRIPE_SECTORS(conf), 
atomic_inc_return(&sh->bitmap_counts));
                 spin_lock_irq(&sh->stripe_lock);
                 clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
                 if (!sh->batch_head) {
@@ -3662,10 +3664,12 @@ handle_failed_stripe(struct r5conf *conf, struct 
stripe_head *sh,
                         bio_io_error(bi);
                         bi = nextbi;
                 }
-               if (bitmap_end)
+               if (bitmap_end) {
                         conf->mddev->bitmap_ops->endwrite(conf->mddev,
                                         sh->sector, 
RAID5_STRIPE_SECTORS(conf),
                                         false, false);
+                       printk("%s: %s: end %px(%llu+%lu) %u\n", 
__func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_dec_return(&sh->bitmap_counts));
+               }
                 bitmap_end = 0;
                 /* and fail all 'written' */
                 bi = sh->dev[i].written;
@@ -3709,10 +3713,12 @@ handle_failed_stripe(struct r5conf *conf, struct 
stripe_head *sh,
                                 bi = nextbi;
                         }
                 }
-               if (bitmap_end)
+               if (bitmap_end) {
                         conf->mddev->bitmap_ops->endwrite(conf->mddev,
                                         sh->sector, 
RAID5_STRIPE_SECTORS(conf),
                                         false, false);
+                       printk("%s: %s: end %px(%llu+%lu) %u\n", 
__func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_dec_return(&sh->bitmap_counts));
+               }
                 /* If we were in the middle of a write the parity block 
might
                  * still be locked - so just clear all R5_LOCKED flags
                  */
@@ -4065,6 +4071,7 @@ static void handle_stripe_clean_event(struct 
r5conf *conf,
                                         sh->sector, 
RAID5_STRIPE_SECTORS(conf),
                                         !test_bit(STRIPE_DEGRADED, 
&sh->state),
                                         false);
+                               printk("%s: %s: end %px(%llu+%lu) %u\n", 
__func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_dec_return(&sh->bitmap_counts));
                                 if (head_sh->batch_head) {
                                         sh = 
list_first_entry(&sh->batch_list,
                                                               struct 
stripe_head,
@@ -5785,9 +5792,11 @@ static void make_discard_request(struct mddev 
*mddev, struct bio *bi)
                 spin_unlock_irq(&sh->stripe_lock);
                 if (conf->mddev->bitmap) {
                         for (d = 0; d < conf->raid_disks - 
conf->max_degraded;
-                            d++)
+                            d++) {
                                 mddev->bitmap_ops->startwrite(mddev, 
sh->sector,
                                         RAID5_STRIPE_SECTORS(conf), false);
+                               printk("%s: %s: start %px(%llu+%lu) 
%u\n", __func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_inc_return(&sh->bitmap_counts));
+                       }
                         sh->bm_seq = conf->seq_flush + 1;
                         set_bit(STRIPE_BIT_DELAY, &sh->state);
                 }
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 896ecfc4afa6..12024249245e 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -255,6 +255,7 @@ struct stripe_head {
         int     nr_pages;       /* page array size */
         int     stripes_per_page;
  #endif
+       atomic_t bitmap_counts;
         struct r5dev {
                 /* rreq and rvec are used for the replacement device when
                  * writing data to both devices.


  reply	other threads:[~2024-10-26  9:07 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-06 14:10 PROBLEM: repeatable lockup on RAID-6 with LUKS dm-crypt on NVMe devices when rsyncing many files Christian Theune
2024-08-06 14:10 ` Christian Theune
2024-08-07  2:55 ` Yu Kuai
2024-08-07  5:31   ` Christian Theune
2024-08-07  6:46     ` Christian Theune
2024-08-07  8:59       ` Christian Theune
2024-08-07 21:05         ` John Stoffel
2024-08-08  1:33           ` Yu Kuai
2024-08-08  6:02           ` Christian Theune
2024-08-08  6:55             ` Yu Kuai
2024-08-08  7:06               ` Christian Theune
2024-08-08  8:53                 ` Christian Theune
2024-08-09  1:13                   ` Yu Kuai
2024-08-09  6:10                     ` Christian Theune
2024-08-09 22:51                       ` John Stoffel
2024-08-12  6:58                         ` Christian Theune
2024-08-12 18:37                           ` John Stoffel
2024-08-14  8:53                             ` Christian Theune
2024-08-15  6:19                               ` Christian Theune
2024-08-15 10:03                                 ` Christian Theune
2024-08-15 11:14                                   ` Yu Kuai
2024-08-15 11:24                                     ` Christian Theune
2024-08-15 11:49                                       ` Yu Kuai
2024-10-22 15:02                                     ` Christian Theune
2024-10-23  1:13                                       ` Yu Kuai
2024-10-23  6:03                                         ` Christian Theune
2024-10-23 17:50                                           ` Christian Theune
2024-10-25  8:39                                         ` Christian Theune
2024-10-25 13:31                                           ` Dragan Milivojević
2024-10-25 14:02                                             ` Christian Theune
2024-10-26  5:37                                               ` Christian Theune
2024-10-26  9:07                                                 ` Yu Kuai [this message]
2024-10-26 11:51                                                   ` Christian Theune
2024-10-26 12:07                                                   ` Christian Theune
2024-10-26 12:11                                                     ` Christian Theune
2024-10-30  1:25                                                       ` Yu Kuai
2024-10-30  6:29                                                         ` Christian Theune
2024-10-31  7:48                                                           ` Yu Kuai
2024-10-31  8:04                                                             ` Christian Theune
2024-10-31 15:07                                                               ` Christian Theune
2024-10-31 19:46                                                                 ` Christian Theune
2024-10-31 20:33                                                                   ` John Stoffel
2024-11-01  2:02                                                                     ` Yu Kuai
2024-11-01  7:56                                                                       ` Christian Theune
2024-11-01  8:33                                                                         ` Christian Theune
2024-11-03 15:54                                                                           ` Christian Theune
2024-11-03 16:16                                                                             ` Dragan Milivojević
2024-11-04 11:29                                                                           ` Yu Kuai
2024-11-04 11:51                                                                             ` Christian Theune
2024-11-04 12:30                                                                               ` Yu Kuai
2024-11-04 11:40                                                                           ` Yu Kuai
2024-11-04 12:18                                                                             ` Yu Kuai
2024-11-04 14:45                                                                               ` Christian Theune
2024-11-04 20:04                                                                                 ` Christian Theune
2024-11-05  1:20                                                                                   ` Yu Kuai
2024-11-05  6:23                                                                                     ` Christian Theune
2024-11-05 10:15                                                                                       ` Christian Theune
2024-11-06  6:35                                                                                         ` Yu Kuai
2024-11-06  6:40                                                                                           ` Christian Theune
2024-11-07  7:55                                                                                             ` Yu Kuai
2024-11-07  8:01                                                                                               ` Yu Kuai
2024-11-09 11:35                                                                                               ` Xiao Ni
2024-11-11  2:25                                                                                                 ` Yu Kuai
2024-11-11  8:00                                                                                                 ` Christian Theune
2024-11-11 14:34                                                                                                   ` Christian Theune
2024-11-12  6:57                                                                                                     ` Christian Theune
2024-11-14 15:07                                                                                                       ` Christian Theune
2024-11-15  8:07                                                                                                         ` Xiao Ni
2024-11-15  8:44                                                                                                           ` Christian Theune
2024-11-15 10:11                                                                                                             ` Xiao Ni
2024-11-15 11:06                                                                                                               ` Christian Theune
2024-12-10  8:33                                                                                                                 ` Christian Theune
2024-12-16 13:25                                                                                                                   ` Christian Theune
2024-12-16 13:36                                                                                                                     ` Yu Kuai
2024-12-16 14:18                                                                                                                       ` Christian Theune
2025-01-20  9:19                                                                                                                         ` Christian Theune
2025-01-24  6:22                                                                                                                           ` Christian Theune
2025-01-24  6:35                                                                                                                             ` Yu Kuai
2025-01-24  6:38                                                                                                                               ` Christian Theune
2024-08-15 15:53                                 ` John Stoffel
2024-08-15 19:13                                   ` Christian Theune
2024-08-26 14:38                                     ` Christian Theune
2024-08-08 14:23             ` John Stoffel
2024-08-19 19:12               ` tihmstar
2024-08-19 21:05                 ` John Stoffel
2024-08-24 16:56                   ` tihmstar
2024-08-24 18:12                   ` Dragan Milivojević
2024-08-27  1:27                     ` John Stoffel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2d85e9ab-1d0f-70a1-fab2-1e469764ef28@huaweicloud.com \
    --to=yukuai1@huaweicloud.com \
    --cc=ct@flyingcircus.io \
    --cc=dm-devel@lists.linux.dev \
    --cc=galileo@pkm-inc.com \
    --cc=john@stoffel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=yukuai3@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox