linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jinpu Wang <jinpu.wang@profitbricks.com>
To: NeilBrown <neilb@suse.com>
Cc: linux-raid@vger.kernel.org, Shaohua Li <shli@fb.com>,
	Nate Dailey <nate.dailey@stratus.com>
Subject: Re: [BUG] MD/RAID1 hung forever on freeze_array
Date: Tue, 13 Dec 2016 16:08:40 +0100	[thread overview]
Message-ID: <CAMGffEm4BDhKJSiDt=2WmxrpRutg862at7JP221gMi6_SpPomQ@mail.gmail.com> (raw)
In-Reply-To: <87oa0gzuej.fsf@notabene.neil.brown.name>

Hi Neil,

On Mon, Dec 12, 2016 at 10:53 PM, NeilBrown <neilb@suse.com> wrote:
> On Tue, Dec 13 2016, Jinpu Wang wrote:
>
>> On Mon, Dec 12, 2016 at 1:59 AM, NeilBrown <neilb@suse.com> wrote:
>>> On Sat, Nov 26 2016, Jinpu Wang wrote:
>>>> [  810.270860]  [<ffffffff813fc851>] blk_prologue_bio+0x91/0xc0
>>>
>>> What is this?  I cannot find that function in the upstream kernel.
>>>
>>> NeilBrown
>>
>> Hi Neil,
>>
>> blk_prologue_bio is our internal extension to gather some stats, sorry
>> not informed before.
>
> Ahhh.
>
> ....
>> +       return q->custom_make_request_fn(q, clone);
>
> I haven't heard of custom_make_request_fn before either.
>
>> +}
>>
>> IMHO, it seems unrelated, but I will rerun my test without this change.
>
> Yes, please re-test with an unmodified upstream kernel (and always
> report *exactly* what kernel you are running.  I cannot analyse code
> that I cannot see).
>
> NeilBrown

As you suggested, I re-run same test with 4.4.36 with no our own patch on MD.
I can still reproduce the same bug, nr_pending on heathy leg(loop1) is till 1.

4.4.36 kernel:
crash> bt 4069
PID: 4069   TASK: ffff88022b4f8d00  CPU: 3   COMMAND: "md2_raid1"
 #0 [ffff8800b77d3bf8] __schedule at ffffffff81811453
 #1 [ffff8800b77d3c50] schedule at ffffffff81811c30
 #2 [ffff8800b77d3c68] freeze_array at ffffffffa07ee17e [raid1]
 #3 [ffff8800b77d3cc0] handle_read_error at ffffffffa07f093b [raid1]
 #4 [ffff8800b77d3d68] raid1d at ffffffffa07f10a6 [raid1]
 #5 [ffff8800b77d3e60] md_thread at ffffffffa04dee80 [md_mod]
 #6 [ffff8800b77d3ed0] kthread at ffffffff81075fb6
 #7 [ffff8800b77d3f50] ret_from_fork at ffffffff818157df
crash> bt 2558
bt: invalid task or pid value: 2558
crash> bt 4558
PID: 4558   TASK: ffff88022b550d00  CPU: 3   COMMAND: "fio"
 #0 [ffff88022c287710] __schedule at ffffffff81811453
 #1 [ffff88022c287768] schedule at ffffffff81811c30
 #2 [ffff88022c287780] wait_barrier at ffffffffa07ee044 [raid1]
 #3 [ffff88022c2877e8] make_request at ffffffffa07efc65 [raid1]
 #4 [ffff88022c2878d0] md_make_request at ffffffffa04df609 [md_mod]
 #5 [ffff88022c287928] generic_make_request at ffffffff813fd3de
 #6 [ffff88022c287970] submit_bio at ffffffff813fd522
 #7 [ffff88022c2879b8] do_blockdev_direct_IO at ffffffff811d32a7
 #8 [ffff88022c287be8] __blockdev_direct_IO at ffffffff811d3b6e
 #9 [ffff88022c287c10] blkdev_direct_IO at ffffffff811ce2d7
#10 [ffff88022c287c38] generic_file_direct_write at ffffffff81132c90
#11 [ffff88022c287cb0] __generic_file_write_iter at ffffffff81132e1d
#12 [ffff88022c287d08] blkdev_write_iter at ffffffff811ce597
#13 [ffff88022c287d68] aio_run_iocb at ffffffff811deca6
#14 [ffff88022c287e68] do_io_submit at ffffffff811dfbaa
#15 [ffff88022c287f40] sys_io_submit at ffffffff811dfe4b
#16 [ffff88022c287f50] entry_SYSCALL_64_fastpath at ffffffff81815497
    RIP: 00007f63b1362737  RSP: 00007ffff7eb17f8  RFLAGS: 00000206
    RAX: ffffffffffffffda  RBX: 00007f63a142a000  RCX: 00007f63b1362737
    RDX: 0000000001179b58  RSI: 0000000000000001  RDI: 00007f63b1f4a000
    RBP: 0000000000000512   R8: 0000000000000001   R9: 0000000001171fa0
    R10: 00007f639ef84000  R11: 0000000000000206  R12: 0000000000000001
    R13: 0000000000000200  R14: 000000003a2d3000  R15: 0000000000000001
    ORIG_RAX: 00000000000000d1  CS: 0033  SS: 002b

crash> struct r1conf 0xffff880037362100
struct r1conf {
  mddev = 0xffff880037352800,
  mirrors = 0xffff88022c209c00,
  raid_disks = 2,
  next_resync = 18446744073709527039,
  start_next_window = 18446744073709551615,
  current_window_requests = 0,
  next_window_requests = 0,
  device_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  retry_list = {
    next = 0xffff8801ce757740,
    prev = 0xffff8801b1b79140
  },
  bio_end_io_list = {
    next = 0xffff8801ce7d9ac0,
    prev = 0xffff88022838f4c0
  },
  pending_bio_list = {
    head = 0x0,
    tail = 0x0
  },
  pending_count = 0,
  wait_barrier = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff8801f6d87818,
      prev = 0xffff88022c2877a8
    }
  },
  resync_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  nr_pending = 2086,
  nr_waiting = 97,
  nr_queued = 2084,
  barrier = 0,
  array_frozen = 1,
  fullsync = 0,
  recovery_disabled = 1,
  poolinfo = 0xffff8802330be390,
  r1bio_pool = 0xffff88022bdf54e0,
  r1buf_pool = 0x0,
  tmppage = 0xffffea0000dcee40,
  thread = 0x0,
  cluster_sync_low = 0,
  cluster_sync_high = 0
}
crash>
crash> struct raid1_info 0xffff88022c209c00
struct raid1_info {
  rdev = 0xffff880231635800,
  head_position = 1318965,
  next_seq_sect = 252597,
  seq_start = 252342
}
crash> struct raid1_info 0xffff88022c209c20
struct raid1_info {
  rdev = 0xffff88023166ce00,
  head_position = 1585216,
  next_seq_sect = 839992,
  seq_start = 839977
}
crash> struct md_rdev 0xffff880231635800
struct md_rdev {
  same_set = {
    next = 0xffff880037352818,
    prev = 0xffff88023166ce00
  },
  sectors = 2095104,
  mddev = 0xffff880037352800,
  last_events = 41325652,
  meta_bdev = 0x0,
  bdev = 0xffff880235c2aa40,
  sb_page = 0xffffea0002dd98c0,
  bb_page = 0xffffea0002e48f80,
  sb_loaded = 1,
  sb_events = 205,
  data_offset = 2048,
  new_data_offset = 2048,
  sb_start = 8,
  sb_size = 512,
  preferred_minor = 65535,
  kobj = {
    name = 0xffff8802341cdef0 "dev-loop1",
    entry = {
      next = 0xffff880231635880,
      prev = 0xffff880231635880
    },
    parent = 0xffff880037352850,
    kset = 0x0,
    ktype = 0xffffffffa04f3020 <rdev_ktype>,
    sd = 0xffff880233e3b8e8,
    kref = {
      refcount = {
        counter = 1
      }
    },
    state_initialized = 1,
    state_in_sysfs = 1,
    state_add_uevent_sent = 0,
    state_remove_uevent_sent = 0,
    uevent_suppress = 0
  },
  flags = 2,
  blocked_wait = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff8802316358c8,
      prev = 0xffff8802316358c8
    }
  },
desc_nr = 0,
  raid_disk = 0,
  new_raid_disk = 0,
  saved_raid_disk = -1,
  {
    recovery_offset = 0,
    journal_tail = 0
  },
  nr_pending = {
    counter = 1
  },
  read_errors = {
    counter = 0
  },
  last_read_error = {
    tv_sec = 0,
    tv_nsec = 0
  },
  corrected_errors = {
    counter = 0
  },
  del_work = {
    data = {
      counter = 0
    },
    entry = {
      next = 0x0,
      prev = 0x0
    },
    func = 0x0
  },
  sysfs_state = 0xffff880233e3b960,
  badblocks = {
    count = 0,
    unacked_exist = 0,
    shift = 0,
    page = 0xffff88022c0d6000,
    changed = 0,
    lock = {
      seqcount = {
        sequence = 264
      },
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      }
    },
    sector = 0,
    size = 0
  }
}
struct md_rdev {
  same_set = {
    next = 0xffff880231635800,
    prev = 0xffff880037352818
  },
  sectors = 2095104,
  mddev = 0xffff880037352800,
  last_events = 10875407,
  meta_bdev = 0x0,
  bdev = 0xffff880234a86a40,
  sb_page = 0xffffea00089e0ac0,
  bb_page = 0xffffea0007db4980,
  sb_loaded = 1,
  sb_events = 204,
  data_offset = 2048,
  new_data_offset = 2048,
  sb_start = 8,
  sb_size = 512,
  preferred_minor = 65535,
  kobj = {
    name = 0xffff88022c100e30 "dev-ibnbd0",
    entry = {
      next = 0xffff88023166ce80,
      prev = 0xffff88023166ce80
    },
    parent = 0xffff880037352850,
    kset = 0x0,
    ktype = 0xffffffffa04f3020 <rdev_ktype>,
    sd = 0xffff8800b6539e10,
    kref = {
      refcount = {
        counter = 1
      }
    },
    state_initialized = 1,
    state_in_sysfs = 1,
    state_add_uevent_sent = 0,
    state_remove_uevent_sent = 0,
    uevent_suppress = 0
  },
  flags = 581,
  blocked_wait = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff88023166cec8,
      prev = 0xffff88023166cec8
    }
  },
  desc_nr = 1,
  raid_disk = 1,
  new_raid_disk = 0,
  saved_raid_disk = -1,
  {
    recovery_offset = 18446744073709551615,
    journal_tail = 18446744073709551615
  },
  nr_pending = {
    counter = 2073
  },
  read_errors = {
    counter = 0
  },
  last_read_error = {
    tv_sec = 0,
    tv_nsec = 0
  },
  corrected_errors = {
    counter = 0
  },
  del_work = {
    data = {
      counter = 0
    },
    entry = {
      next = 0x0,
      prev = 0x0
    },
    func = 0x0
  },
  sysfs_state = 0xffff8800b6539e88,
  badblocks = {
    count = 1,
    unacked_exist = 0,
    shift = 0,
    page = 0xffff880099ced000,
    changed = 0,
    lock = {
      seqcount = {
        sequence = 4
      },
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      }
    },
    sector = 80,
    size = 8
  }
}


-- 
Jinpu Wang
Linux Kernel Developer

ProfitBricks GmbH
Greifswalder Str. 207
D - 10405 Berlin

Tel:       +49 30 577 008  042
Fax:      +49 30 577 008 299
Email:    jinpu.wang@profitbricks.com
URL:      https://www.profitbricks.de

Sitz der Gesellschaft: Berlin
Registergericht: Amtsgericht Charlottenburg, HRB 125506 B
Geschäftsführer: Achim Weiss

  reply	other threads:[~2016-12-13 15:08 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-25 13:30 [BUG] MD/RAID1 hung forever on freeze_array Jinpu Wang
2016-11-25 13:59 ` Jinpu Wang
2016-11-28  4:47   ` Coly Li
2016-11-28  8:24     ` Jinpu Wang
2016-11-28  8:54       ` Coly Li
2016-11-28  9:02         ` Jinpu Wang
2016-11-28  9:10           ` Coly Li
2016-11-29 11:15             ` Jinpu Wang
2016-12-07 14:17               ` Jinpu Wang
2016-12-08  3:17                 ` NeilBrown
2016-12-08  9:50                   ` Jinpu Wang
2016-12-09  6:01                     ` NeilBrown
2016-12-09 15:28                       ` Jinpu Wang
2016-12-09 15:36                       ` Jinpu Wang
2016-12-12  0:59 ` NeilBrown
2016-12-12 13:10   ` Jinpu Wang
2016-12-12 21:53     ` NeilBrown
2016-12-13 15:08       ` Jinpu Wang [this message]
2016-12-13 22:18         ` NeilBrown
2016-12-14 10:22           ` Jinpu Wang
2016-12-14 12:13             ` Jinpu Wang
2016-12-14 14:49               ` Jinpu Wang
2016-12-15  3:20                 ` NeilBrown
2016-12-15  9:24                   ` Jinpu Wang
     [not found]                   ` <CAMGffEkufeaDytaHxtLR02iiQifZDhcwkLdzMj3X8_yaitSoFQ@mail.gmail.com>
2016-12-19 14:56                     ` Jinpu Wang
2016-12-19 22:45                     ` NeilBrown
2016-12-20 10:34                       ` Jinpu Wang
2016-12-20 21:23                         ` NeilBrown
2016-12-21 12:48                           ` Jinpu Wang
2016-12-21 23:51                             ` NeilBrown
2016-12-22  8:35                               ` Jinpu Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAMGffEm4BDhKJSiDt=2WmxrpRutg862at7JP221gMi6_SpPomQ@mail.gmail.com' \
    --to=jinpu.wang@profitbricks.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=nate.dailey@stratus.com \
    --cc=neilb@suse.com \
    --cc=shli@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).