All of lore.kernel.org
 help / color / mirror / Atom feed
From: bugzilla-daemon@bugzilla.kernel.org
To: linux-xfs@vger.kernel.org
Subject: [Bug 212495] New: xfs: system crash caused by null bp->b_pages
Date: Wed, 31 Mar 2021 01:16:29 +0000	[thread overview]
Message-ID: <bug-212495-201763@https.bugzilla.kernel.org/> (raw)

https://bugzilla.kernel.org/show_bug.cgi?id=212495

            Bug ID: 212495
           Summary: xfs: system crash caused by null bp->b_pages
           Product: File System
           Version: 2.5
    Kernel Version: 3.10
          Hardware: Intel
                OS: Linux
              Tree: Mainline
            Status: NEW
          Severity: high
          Priority: P1
         Component: XFS
          Assignee: filesystem_xfs@kernel-bugs.kernel.org
          Reporter: zp_8483@163.com
        Regression: No

We have encountered the following problems several times:
    1、Hardware problem causes block device loss.
    2、Continue to send IO requests to the block device.
    3、The system possibly crash after a few hours.




15205901.386974] RIP: 0010:xfs_buf_offset+0x32/0x60 [xfs]
[15205901.388044] RSP: 0018:ffffb28ba9b3bc68 EFLAGS: 00010246
[15205901.389021] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
000000000000000b
[15205901.390016] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
ffff88627bebf000
[15205901.391075] RBP: ffffb28ba9b3bc98 R08: ffff88627bebf000 R09:
00000001802a000d
[15205901.392031] R10: ffff88521f3a0240 R11: ffff88627bebf000 R12:
ffff88521041e000
[15205901.392950] R13: 0000000000000020 R14: ffff88627bebf000 R15:
0000000000000000
[15205901.393858] FS: 0000000000000000(0000) GS:ffff88521f380000(0000)
knlGS:0000000000000000
[15205901.394774] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[15205901.395756] CR2: 0000000000000000 CR3: 000000099bc09001 CR4:
00000000007606e0
[15205901.396904] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[15205901.397869] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[15205901.398836] PKRU: 55555554
[15205901.400111] Call Trace:
[15205901.401058] ? xfs_inode_buf_verify+0x8e/0xf0 [xfs]
[15205901.402069] ? xfs_buf_delwri_submit_buffers+0x16d/0x2b0 [xfs]
[15205901.403060] xfs_inode_buf_write_verify+0x10/0x20 [xfs]
[15205901.404017] _xfs_buf_ioapply+0x88/0x410 [xfs]
[15205901.404990] ? xfs_buf_delwri_submit_buffers+0x16d/0x2b0 [xfs]
[15205901.405929] xfs_buf_submit+0x63/0x200 [xfs]
[15205901.406801] xfs_buf_delwri_submit_buffers+0x16d/0x2b0 [xfs]
[15205901.407675] ? xfs_buf_delwri_submit_nowait+0x10/0x20 [xfs]
[15205901.408540] ? xfs_inode_item_push+0xb7/0x190 [xfs]
[15205901.409395] xfs_buf_delwri_submit_nowait+0x10/0x20 [xfs]
[15205901.410249] xfsaild+0x29a/0x780 [xfs]
[15205901.411121] kthread+0x109/0x140
[15205901.411981] ? xfs_trans_ail_cursor_first+0x90/0x90 [xfs]
[15205901.412785] ? kthread_park+0x60/0x60
[15205901.413578] ret_from_fork+0x2a/0x40

==============================================================================
From vmcore, we found that b_pages=NULL but b_page_count=16.

crash> struct xfs_buf 0xffff9005c51fb300
struct xfs_buf {
  b_rhash_head = {
    next = 0x0
  },
  b_bn = 481790464,
  b_length = 128,
  b_hold = {
    counter = 2
  },
  b_lru_ref = {
    counter = 0
  },
  b_flags = 1048626,
  b_sema = {
    lock = {
      raw_lock = {
        val = {
          counter = 0
        }
      }
    },
    count = 0,
    wait_list = {
      next = 0xffff9005c51fb328,
      prev = 0xffff9005c51fb328
    }
  },
  b_lru = {
    next = 0xffff9005c51fb338,
    prev = 0xffff9005c51fb338
  },
  b_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  b_state = 3,
  b_io_error = 0,
  b_waiters = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff9005c51fb360,
      prev = 0xffff9005c51fb360
    }
  },
  b_list = {
    next = 0xffff9005c51fb370,
    prev = 0xffff9005c51fb370
  },
  b_pag = 0xffff9005de557400,
  b_target = 0xffff9005ddff0d80,
  b_addr = 0x0,
  b_ioend_work = {
    data = {
      counter = 1920
    },
    entry = {
      next = 0xffff9005c51fb3a0,
      prev = 0xffff9005c51fb3a0
    },
    func = 0xffffffffc081ce80 <xfs_buf_ioend_work>
  },
  b_ioend_wq = 0xffff9035d433bc00,
  b_iodone = 0xffffffffc0843220 <xfs_buf_iodone_callbacks>,
  b_iowait = {
    done = 0,
    wait = {
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      },
      task_list = {
        next = 0xffff9005c51fb3d8,
        prev = 0xffff9005c51fb3d8
      }
    }
  },
  b_fspriv = 0xffff9005c5b9c690,
  b_transp = 0x0,
  b_pages = 0x0,
  b_page_array = {0x0, 0x0},
  b_maps = 0xffff9005c51fb418,
  __b_map = {
    bm_bn = 481790464,
    bm_len = 128
  },
  b_map_count = 1,
  b_io_length = 128,
  b_pin_count = {
    counter = 0
  },
  b_io_remaining = {
    counter = 1
  },
  b_page_count = 16,ƒb
  b_offset = 0,
  b_error = 0,
  b_retries = 0,
  b_first_retry_time = 0,
  b_last_error = -5,
  b_ops = 0xffffffffc085cf00 <xfs_inode_buf_ops>

===============================================================================
file: xfs_log.c function: xlog_sync, it seems current code not handle allocate
memory fail.

if (split) {
                bp = iclog->ic_log->l_xbuf;
                XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
                xfs_buf_associate_memory(bp,
                                (char *)&iclog->ic_header + count, split);
                bp->b_fspriv = iclog;
                bp->b_flags &= ~XBF_FLUSH;
                bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);

                ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
                ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);

                /* account for internal log which doesn't start at block #0 */
                XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
                error = xlog_bdstrat(bp);
                if (error) {
                        xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
                        return error;
                }
        }

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

             reply	other threads:[~2021-03-31  1:17 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-31  1:16 bugzilla-daemon [this message]
2021-03-31  1:38 ` [Bug 212495] xfs: system crash caused by null bp->b_pages bugzilla-daemon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-212495-201763@https.bugzilla.kernel.org/ \
    --to=bugzilla-daemon@bugzilla.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.