From: Jinpu Wang <jinpu.wang@profitbricks.com>
To: NeilBrown <neilb@suse.com>
Cc: linux-raid@vger.kernel.org, Shaohua Li <shli@fb.com>,
Nate Dailey <nate.dailey@stratus.com>
Subject: Re: [BUG] MD/RAID1 hung forever on freeze_array
Date: Tue, 13 Dec 2016 16:08:40 +0100 [thread overview]
Message-ID: <CAMGffEm4BDhKJSiDt=2WmxrpRutg862at7JP221gMi6_SpPomQ@mail.gmail.com> (raw)
In-Reply-To: <87oa0gzuej.fsf@notabene.neil.brown.name>
Hi Neil,
On Mon, Dec 12, 2016 at 10:53 PM, NeilBrown <neilb@suse.com> wrote:
> On Tue, Dec 13 2016, Jinpu Wang wrote:
>
>> On Mon, Dec 12, 2016 at 1:59 AM, NeilBrown <neilb@suse.com> wrote:
>>> On Sat, Nov 26 2016, Jinpu Wang wrote:
>>>> [ 810.270860] [<ffffffff813fc851>] blk_prologue_bio+0x91/0xc0
>>>
>>> What is this? I cannot find that function in the upstream kernel.
>>>
>>> NeilBrown
>>
>> Hi Neil,
>>
>> blk_prologue_bio is our internal extension to gather some stats, sorry
>> not informed before.
>
> Ahhh.
>
> ....
>> + return q->custom_make_request_fn(q, clone);
>
> I haven't heard of custom_make_request_fn before either.
>
>> +}
>>
>> IMHO, it seems unrelated, but I will rerun my test without this change.
>
> Yes, please re-test with an unmodified upstream kernel (and always
> report *exactly* what kernel you are running. I cannot analyse code
> that I cannot see).
>
> NeilBrown
As you suggested, I re-run same test with 4.4.36 with no our own patch on MD.
I can still reproduce the same bug, nr_pending on heathy leg(loop1) is till 1.
4.4.36 kernel:
crash> bt 4069
PID: 4069 TASK: ffff88022b4f8d00 CPU: 3 COMMAND: "md2_raid1"
#0 [ffff8800b77d3bf8] __schedule at ffffffff81811453
#1 [ffff8800b77d3c50] schedule at ffffffff81811c30
#2 [ffff8800b77d3c68] freeze_array at ffffffffa07ee17e [raid1]
#3 [ffff8800b77d3cc0] handle_read_error at ffffffffa07f093b [raid1]
#4 [ffff8800b77d3d68] raid1d at ffffffffa07f10a6 [raid1]
#5 [ffff8800b77d3e60] md_thread at ffffffffa04dee80 [md_mod]
#6 [ffff8800b77d3ed0] kthread at ffffffff81075fb6
#7 [ffff8800b77d3f50] ret_from_fork at ffffffff818157df
crash> bt 2558
bt: invalid task or pid value: 2558
crash> bt 4558
PID: 4558 TASK: ffff88022b550d00 CPU: 3 COMMAND: "fio"
#0 [ffff88022c287710] __schedule at ffffffff81811453
#1 [ffff88022c287768] schedule at ffffffff81811c30
#2 [ffff88022c287780] wait_barrier at ffffffffa07ee044 [raid1]
#3 [ffff88022c2877e8] make_request at ffffffffa07efc65 [raid1]
#4 [ffff88022c2878d0] md_make_request at ffffffffa04df609 [md_mod]
#5 [ffff88022c287928] generic_make_request at ffffffff813fd3de
#6 [ffff88022c287970] submit_bio at ffffffff813fd522
#7 [ffff88022c2879b8] do_blockdev_direct_IO at ffffffff811d32a7
#8 [ffff88022c287be8] __blockdev_direct_IO at ffffffff811d3b6e
#9 [ffff88022c287c10] blkdev_direct_IO at ffffffff811ce2d7
#10 [ffff88022c287c38] generic_file_direct_write at ffffffff81132c90
#11 [ffff88022c287cb0] __generic_file_write_iter at ffffffff81132e1d
#12 [ffff88022c287d08] blkdev_write_iter at ffffffff811ce597
#13 [ffff88022c287d68] aio_run_iocb at ffffffff811deca6
#14 [ffff88022c287e68] do_io_submit at ffffffff811dfbaa
#15 [ffff88022c287f40] sys_io_submit at ffffffff811dfe4b
#16 [ffff88022c287f50] entry_SYSCALL_64_fastpath at ffffffff81815497
RIP: 00007f63b1362737 RSP: 00007ffff7eb17f8 RFLAGS: 00000206
RAX: ffffffffffffffda RBX: 00007f63a142a000 RCX: 00007f63b1362737
RDX: 0000000001179b58 RSI: 0000000000000001 RDI: 00007f63b1f4a000
RBP: 0000000000000512 R8: 0000000000000001 R9: 0000000001171fa0
R10: 00007f639ef84000 R11: 0000000000000206 R12: 0000000000000001
R13: 0000000000000200 R14: 000000003a2d3000 R15: 0000000000000001
ORIG_RAX: 00000000000000d1 CS: 0033 SS: 002b
crash> struct r1conf 0xffff880037362100
struct r1conf {
mddev = 0xffff880037352800,
mirrors = 0xffff88022c209c00,
raid_disks = 2,
next_resync = 18446744073709527039,
start_next_window = 18446744073709551615,
current_window_requests = 0,
next_window_requests = 0,
device_lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
},
retry_list = {
next = 0xffff8801ce757740,
prev = 0xffff8801b1b79140
},
bio_end_io_list = {
next = 0xffff8801ce7d9ac0,
prev = 0xffff88022838f4c0
},
pending_bio_list = {
head = 0x0,
tail = 0x0
},
pending_count = 0,
wait_barrier = {
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
},
task_list = {
next = 0xffff8801f6d87818,
prev = 0xffff88022c2877a8
}
},
resync_lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
},
nr_pending = 2086,
nr_waiting = 97,
nr_queued = 2084,
barrier = 0,
array_frozen = 1,
fullsync = 0,
recovery_disabled = 1,
poolinfo = 0xffff8802330be390,
r1bio_pool = 0xffff88022bdf54e0,
r1buf_pool = 0x0,
tmppage = 0xffffea0000dcee40,
thread = 0x0,
cluster_sync_low = 0,
cluster_sync_high = 0
}
crash>
crash> struct raid1_info 0xffff88022c209c00
struct raid1_info {
rdev = 0xffff880231635800,
head_position = 1318965,
next_seq_sect = 252597,
seq_start = 252342
}
crash> struct raid1_info 0xffff88022c209c20
struct raid1_info {
rdev = 0xffff88023166ce00,
head_position = 1585216,
next_seq_sect = 839992,
seq_start = 839977
}
crash> struct md_rdev 0xffff880231635800
struct md_rdev {
same_set = {
next = 0xffff880037352818,
prev = 0xffff88023166ce00
},
sectors = 2095104,
mddev = 0xffff880037352800,
last_events = 41325652,
meta_bdev = 0x0,
bdev = 0xffff880235c2aa40,
sb_page = 0xffffea0002dd98c0,
bb_page = 0xffffea0002e48f80,
sb_loaded = 1,
sb_events = 205,
data_offset = 2048,
new_data_offset = 2048,
sb_start = 8,
sb_size = 512,
preferred_minor = 65535,
kobj = {
name = 0xffff8802341cdef0 "dev-loop1",
entry = {
next = 0xffff880231635880,
prev = 0xffff880231635880
},
parent = 0xffff880037352850,
kset = 0x0,
ktype = 0xffffffffa04f3020 <rdev_ktype>,
sd = 0xffff880233e3b8e8,
kref = {
refcount = {
counter = 1
}
},
state_initialized = 1,
state_in_sysfs = 1,
state_add_uevent_sent = 0,
state_remove_uevent_sent = 0,
uevent_suppress = 0
},
flags = 2,
blocked_wait = {
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
},
task_list = {
next = 0xffff8802316358c8,
prev = 0xffff8802316358c8
}
},
desc_nr = 0,
raid_disk = 0,
new_raid_disk = 0,
saved_raid_disk = -1,
{
recovery_offset = 0,
journal_tail = 0
},
nr_pending = {
counter = 1
},
read_errors = {
counter = 0
},
last_read_error = {
tv_sec = 0,
tv_nsec = 0
},
corrected_errors = {
counter = 0
},
del_work = {
data = {
counter = 0
},
entry = {
next = 0x0,
prev = 0x0
},
func = 0x0
},
sysfs_state = 0xffff880233e3b960,
badblocks = {
count = 0,
unacked_exist = 0,
shift = 0,
page = 0xffff88022c0d6000,
changed = 0,
lock = {
seqcount = {
sequence = 264
},
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
}
},
sector = 0,
size = 0
}
}
struct md_rdev {
same_set = {
next = 0xffff880231635800,
prev = 0xffff880037352818
},
sectors = 2095104,
mddev = 0xffff880037352800,
last_events = 10875407,
meta_bdev = 0x0,
bdev = 0xffff880234a86a40,
sb_page = 0xffffea00089e0ac0,
bb_page = 0xffffea0007db4980,
sb_loaded = 1,
sb_events = 204,
data_offset = 2048,
new_data_offset = 2048,
sb_start = 8,
sb_size = 512,
preferred_minor = 65535,
kobj = {
name = 0xffff88022c100e30 "dev-ibnbd0",
entry = {
next = 0xffff88023166ce80,
prev = 0xffff88023166ce80
},
parent = 0xffff880037352850,
kset = 0x0,
ktype = 0xffffffffa04f3020 <rdev_ktype>,
sd = 0xffff8800b6539e10,
kref = {
refcount = {
counter = 1
}
},
state_initialized = 1,
state_in_sysfs = 1,
state_add_uevent_sent = 0,
state_remove_uevent_sent = 0,
uevent_suppress = 0
},
flags = 581,
blocked_wait = {
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
},
task_list = {
next = 0xffff88023166cec8,
prev = 0xffff88023166cec8
}
},
desc_nr = 1,
raid_disk = 1,
new_raid_disk = 0,
saved_raid_disk = -1,
{
recovery_offset = 18446744073709551615,
journal_tail = 18446744073709551615
},
nr_pending = {
counter = 2073
},
read_errors = {
counter = 0
},
last_read_error = {
tv_sec = 0,
tv_nsec = 0
},
corrected_errors = {
counter = 0
},
del_work = {
data = {
counter = 0
},
entry = {
next = 0x0,
prev = 0x0
},
func = 0x0
},
sysfs_state = 0xffff8800b6539e88,
badblocks = {
count = 1,
unacked_exist = 0,
shift = 0,
page = 0xffff880099ced000,
changed = 0,
lock = {
seqcount = {
sequence = 4
},
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
}
},
sector = 80,
size = 8
}
}
--
Jinpu Wang
Linux Kernel Developer
ProfitBricks GmbH
Greifswalder Str. 207
D - 10405 Berlin
Tel: +49 30 577 008 042
Fax: +49 30 577 008 299
Email: jinpu.wang@profitbricks.com
URL: https://www.profitbricks.de
Sitz der Gesellschaft: Berlin
Registergericht: Amtsgericht Charlottenburg, HRB 125506 B
Geschäftsführer: Achim Weiss
next prev parent reply other threads:[~2016-12-13 15:08 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-25 13:30 [BUG] MD/RAID1 hung forever on freeze_array Jinpu Wang
2016-11-25 13:59 ` Jinpu Wang
2016-11-28 4:47 ` Coly Li
2016-11-28 8:24 ` Jinpu Wang
2016-11-28 8:54 ` Coly Li
2016-11-28 9:02 ` Jinpu Wang
2016-11-28 9:10 ` Coly Li
2016-11-29 11:15 ` Jinpu Wang
2016-12-07 14:17 ` Jinpu Wang
2016-12-08 3:17 ` NeilBrown
2016-12-08 9:50 ` Jinpu Wang
2016-12-09 6:01 ` NeilBrown
2016-12-09 15:28 ` Jinpu Wang
2016-12-09 15:36 ` Jinpu Wang
2016-12-12 0:59 ` NeilBrown
2016-12-12 13:10 ` Jinpu Wang
2016-12-12 21:53 ` NeilBrown
2016-12-13 15:08 ` Jinpu Wang [this message]
2016-12-13 22:18 ` NeilBrown
2016-12-14 10:22 ` Jinpu Wang
2016-12-14 12:13 ` Jinpu Wang
2016-12-14 14:49 ` Jinpu Wang
2016-12-15 3:20 ` NeilBrown
2016-12-15 9:24 ` Jinpu Wang
[not found] ` <CAMGffEkufeaDytaHxtLR02iiQifZDhcwkLdzMj3X8_yaitSoFQ@mail.gmail.com>
2016-12-19 14:56 ` Jinpu Wang
2016-12-19 22:45 ` NeilBrown
2016-12-20 10:34 ` Jinpu Wang
2016-12-20 21:23 ` NeilBrown
2016-12-21 12:48 ` Jinpu Wang
2016-12-21 23:51 ` NeilBrown
2016-12-22 8:35 ` Jinpu Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAMGffEm4BDhKJSiDt=2WmxrpRutg862at7JP221gMi6_SpPomQ@mail.gmail.com' \
--to=jinpu.wang@profitbricks.com \
--cc=linux-raid@vger.kernel.org \
--cc=nate.dailey@stratus.com \
--cc=neilb@suse.com \
--cc=shli@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).