From: Chandan Rajendra <chandan@linux.ibm.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 04/21] xfs: refactor log recovery buffer item dispatch for pass2 commit functions
Date: Fri, 01 May 2020 19:13:50 +0530 [thread overview]
Message-ID: <5110385.3Rn1M7ztmW@localhost.localdomain> (raw)
In-Reply-To: <158820768108.467894.15795515034447474347.stgit@magnolia>
On Thursday, April 30, 2020 6:18 AM Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Move the log buffer item pass2 commit code into the per-item source code
> files and use the dispatch function to call it. We do these one at a
> time because there's a lot of code to move. No functional changes.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
The changes look good to me.
Reviewed-by: Chandan Rajendra <chandanrlinux@gmail.com>
> ---
> fs/xfs/libxfs/xfs_log_recover.h | 23 +
> fs/xfs/xfs_buf_item_recover.c | 790 +++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_log_recover.c | 798 ---------------------------------------
> 3 files changed, 820 insertions(+), 791 deletions(-)
>
>
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index b933dc8bb8a3..5017d80c0f4b 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -36,6 +36,26 @@ struct xlog_recover_item_type {
> /* Do whatever work we need to do for pass1, if provided. */
> int (*commit_pass1_fn)(struct xlog *log,
> struct xlog_recover_item *item);
> +
> + /*
> + * This function should do whatever work is needed for pass2 of log
> + * recovery, if provided.
> + *
> + * If the recovered item is an intent item, this function should parse
> + * the recovered item to construct an in-core log intent item and
> + * insert it into the AIL. The in-core log intent item should have 1
> + * refcount so that the item is freed either (a) when we commit the
> + * recovered log item for the intent-done item; (b) replay the work and
> + * log a new intent-done item; or (c) recovery fails and we have to
> + * abort.
> + *
> + * If the recovered item is an intent-done item, this function should
> + * parse the recovered item to find the id of the corresponding intent
> + * log item. Next, it should find the in-core log intent item in the
> + * AIL and release it.
> + */
> + int (*commit_pass2_fn)(struct xlog *log, struct list_head *buffer_list,
> + struct xlog_recover_item *item, xfs_lsn_t lsn);
> };
>
> extern const struct xlog_recover_item_type xlog_icreate_item_type;
> @@ -100,5 +120,8 @@ struct xlog_recover {
> void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
> const struct xfs_buf_ops *ops);
> bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> +bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> +bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> +void xlog_recover_iodone(struct xfs_buf *bp);
>
> #endif /* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> index deda3ad32d95..d324f810819d 100644
> --- a/fs/xfs/xfs_buf_item_recover.c
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -18,6 +18,10 @@
> #include "xfs_log.h"
> #include "xfs_log_priv.h"
> #include "xfs_log_recover.h"
> +#include "xfs_error.h"
> +#include "xfs_inode.h"
> +#include "xfs_dir2.h"
> +#include "xfs_quota.h"
>
> STATIC enum xlog_recover_reorder
> xlog_buf_reorder_fn(
> @@ -68,8 +72,794 @@ xlog_recover_buffer_commit_pass1(
> return 0;
> }
>
> +/*
> + * Validate the recovered buffer is of the correct type and attach the
> + * appropriate buffer operations to them for writeback. Magic numbers are in a
> + * few places:
> + * the first 16 bits of the buffer (inode buffer, dquot buffer),
> + * the first 32 bits of the buffer (most blocks),
> + * inside a struct xfs_da_blkinfo at the start of the buffer.
> + */
> +static void
> +xlog_recover_validate_buf_type(
> + struct xfs_mount *mp,
> + struct xfs_buf *bp,
> + struct xfs_buf_log_format *buf_f,
> + xfs_lsn_t current_lsn)
> +{
> + struct xfs_da_blkinfo *info = bp->b_addr;
> + uint32_t magic32;
> + uint16_t magic16;
> + uint16_t magicda;
> + char *warnmsg = NULL;
> +
> + /*
> + * We can only do post recovery validation on items on CRC enabled
> + * fielsystems as we need to know when the buffer was written to be able
> + * to determine if we should have replayed the item. If we replay old
> + * metadata over a newer buffer, then it will enter a temporarily
> + * inconsistent state resulting in verification failures. Hence for now
> + * just avoid the verification stage for non-crc filesystems
> + */
> + if (!xfs_sb_version_hascrc(&mp->m_sb))
> + return;
> +
> + magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
> + magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
> + magicda = be16_to_cpu(info->magic);
> + switch (xfs_blft_from_flags(buf_f)) {
> + case XFS_BLFT_BTREE_BUF:
> + switch (magic32) {
> + case XFS_ABTB_CRC_MAGIC:
> + case XFS_ABTB_MAGIC:
> + bp->b_ops = &xfs_bnobt_buf_ops;
> + break;
> + case XFS_ABTC_CRC_MAGIC:
> + case XFS_ABTC_MAGIC:
> + bp->b_ops = &xfs_cntbt_buf_ops;
> + break;
> + case XFS_IBT_CRC_MAGIC:
> + case XFS_IBT_MAGIC:
> + bp->b_ops = &xfs_inobt_buf_ops;
> + break;
> + case XFS_FIBT_CRC_MAGIC:
> + case XFS_FIBT_MAGIC:
> + bp->b_ops = &xfs_finobt_buf_ops;
> + break;
> + case XFS_BMAP_CRC_MAGIC:
> + case XFS_BMAP_MAGIC:
> + bp->b_ops = &xfs_bmbt_buf_ops;
> + break;
> + case XFS_RMAP_CRC_MAGIC:
> + bp->b_ops = &xfs_rmapbt_buf_ops;
> + break;
> + case XFS_REFC_CRC_MAGIC:
> + bp->b_ops = &xfs_refcountbt_buf_ops;
> + break;
> + default:
> + warnmsg = "Bad btree block magic!";
> + break;
> + }
> + break;
> + case XFS_BLFT_AGF_BUF:
> + if (magic32 != XFS_AGF_MAGIC) {
> + warnmsg = "Bad AGF block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_agf_buf_ops;
> + break;
> + case XFS_BLFT_AGFL_BUF:
> + if (magic32 != XFS_AGFL_MAGIC) {
> + warnmsg = "Bad AGFL block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_agfl_buf_ops;
> + break;
> + case XFS_BLFT_AGI_BUF:
> + if (magic32 != XFS_AGI_MAGIC) {
> + warnmsg = "Bad AGI block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_agi_buf_ops;
> + break;
> + case XFS_BLFT_UDQUOT_BUF:
> + case XFS_BLFT_PDQUOT_BUF:
> + case XFS_BLFT_GDQUOT_BUF:
> +#ifdef CONFIG_XFS_QUOTA
> + if (magic16 != XFS_DQUOT_MAGIC) {
> + warnmsg = "Bad DQUOT block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_dquot_buf_ops;
> +#else
> + xfs_alert(mp,
> + "Trying to recover dquots without QUOTA support built in!");
> + ASSERT(0);
> +#endif
> + break;
> + case XFS_BLFT_DINO_BUF:
> + if (magic16 != XFS_DINODE_MAGIC) {
> + warnmsg = "Bad INODE block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_inode_buf_ops;
> + break;
> + case XFS_BLFT_SYMLINK_BUF:
> + if (magic32 != XFS_SYMLINK_MAGIC) {
> + warnmsg = "Bad symlink block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_symlink_buf_ops;
> + break;
> + case XFS_BLFT_DIR_BLOCK_BUF:
> + if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
> + magic32 != XFS_DIR3_BLOCK_MAGIC) {
> + warnmsg = "Bad dir block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_dir3_block_buf_ops;
> + break;
> + case XFS_BLFT_DIR_DATA_BUF:
> + if (magic32 != XFS_DIR2_DATA_MAGIC &&
> + magic32 != XFS_DIR3_DATA_MAGIC) {
> + warnmsg = "Bad dir data magic!";
> + break;
> + }
> + bp->b_ops = &xfs_dir3_data_buf_ops;
> + break;
> + case XFS_BLFT_DIR_FREE_BUF:
> + if (magic32 != XFS_DIR2_FREE_MAGIC &&
> + magic32 != XFS_DIR3_FREE_MAGIC) {
> + warnmsg = "Bad dir3 free magic!";
> + break;
> + }
> + bp->b_ops = &xfs_dir3_free_buf_ops;
> + break;
> + case XFS_BLFT_DIR_LEAF1_BUF:
> + if (magicda != XFS_DIR2_LEAF1_MAGIC &&
> + magicda != XFS_DIR3_LEAF1_MAGIC) {
> + warnmsg = "Bad dir leaf1 magic!";
> + break;
> + }
> + bp->b_ops = &xfs_dir3_leaf1_buf_ops;
> + break;
> + case XFS_BLFT_DIR_LEAFN_BUF:
> + if (magicda != XFS_DIR2_LEAFN_MAGIC &&
> + magicda != XFS_DIR3_LEAFN_MAGIC) {
> + warnmsg = "Bad dir leafn magic!";
> + break;
> + }
> + bp->b_ops = &xfs_dir3_leafn_buf_ops;
> + break;
> + case XFS_BLFT_DA_NODE_BUF:
> + if (magicda != XFS_DA_NODE_MAGIC &&
> + magicda != XFS_DA3_NODE_MAGIC) {
> + warnmsg = "Bad da node magic!";
> + break;
> + }
> + bp->b_ops = &xfs_da3_node_buf_ops;
> + break;
> + case XFS_BLFT_ATTR_LEAF_BUF:
> + if (magicda != XFS_ATTR_LEAF_MAGIC &&
> + magicda != XFS_ATTR3_LEAF_MAGIC) {
> + warnmsg = "Bad attr leaf magic!";
> + break;
> + }
> + bp->b_ops = &xfs_attr3_leaf_buf_ops;
> + break;
> + case XFS_BLFT_ATTR_RMT_BUF:
> + if (magic32 != XFS_ATTR3_RMT_MAGIC) {
> + warnmsg = "Bad attr remote magic!";
> + break;
> + }
> + bp->b_ops = &xfs_attr3_rmt_buf_ops;
> + break;
> + case XFS_BLFT_SB_BUF:
> + if (magic32 != XFS_SB_MAGIC) {
> + warnmsg = "Bad SB block magic!";
> + break;
> + }
> + bp->b_ops = &xfs_sb_buf_ops;
> + break;
> +#ifdef CONFIG_XFS_RT
> + case XFS_BLFT_RTBITMAP_BUF:
> + case XFS_BLFT_RTSUMMARY_BUF:
> + /* no magic numbers for verification of RT buffers */
> + bp->b_ops = &xfs_rtbuf_ops;
> + break;
> +#endif /* CONFIG_XFS_RT */
> + default:
> + xfs_warn(mp, "Unknown buffer type %d!",
> + xfs_blft_from_flags(buf_f));
> + break;
> + }
> +
> + /*
> + * Nothing else to do in the case of a NULL current LSN as this means
> + * the buffer is more recent than the change in the log and will be
> + * skipped.
> + */
> + if (current_lsn == NULLCOMMITLSN)
> + return;
> +
> + if (warnmsg) {
> + xfs_warn(mp, warnmsg);
> + ASSERT(0);
> + }
> +
> + /*
> + * We must update the metadata LSN of the buffer as it is written out to
> + * ensure that older transactions never replay over this one and corrupt
> + * the buffer. This can occur if log recovery is interrupted at some
> + * point after the current transaction completes, at which point a
> + * subsequent mount starts recovery from the beginning.
> + *
> + * Write verifiers update the metadata LSN from log items attached to
> + * the buffer. Therefore, initialize a bli purely to carry the LSN to
> + * the verifier. We'll clean it up in our ->iodone() callback.
> + */
> + if (bp->b_ops) {
> + struct xfs_buf_log_item *bip;
> +
> + ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
> + bp->b_iodone = xlog_recover_iodone;
> + xfs_buf_item_init(bp, mp);
> + bip = bp->b_log_item;
> + bip->bli_item.li_lsn = current_lsn;
> + }
> +}
> +
> +/*
> + * Perform a 'normal' buffer recovery. Each logged region of the
> + * buffer should be copied over the corresponding region in the
> + * given buffer. The bitmap in the buf log format structure indicates
> + * where to place the logged data.
> + */
> +STATIC void
> +xlog_recover_do_reg_buffer(
> + struct xfs_mount *mp,
> + struct xlog_recover_item *item,
> + struct xfs_buf *bp,
> + struct xfs_buf_log_format *buf_f,
> + xfs_lsn_t current_lsn)
> +{
> + int i;
> + int bit;
> + int nbits;
> + xfs_failaddr_t fa;
> + const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot);
> +
> + trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
> +
> + bit = 0;
> + i = 1; /* 0 is the buf format structure */
> + while (1) {
> + bit = xfs_next_bit(buf_f->blf_data_map,
> + buf_f->blf_map_size, bit);
> + if (bit == -1)
> + break;
> + nbits = xfs_contig_bits(buf_f->blf_data_map,
> + buf_f->blf_map_size, bit);
> + ASSERT(nbits > 0);
> + ASSERT(item->ri_buf[i].i_addr != NULL);
> + ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
> + ASSERT(BBTOB(bp->b_length) >=
> + ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
> +
> + /*
> + * The dirty regions logged in the buffer, even though
> + * contiguous, may span multiple chunks. This is because the
> + * dirty region may span a physical page boundary in a buffer
> + * and hence be split into two separate vectors for writing into
> + * the log. Hence we need to trim nbits back to the length of
> + * the current region being copied out of the log.
> + */
> + if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
> + nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
> +
> + /*
> + * Do a sanity check if this is a dquot buffer. Just checking
> + * the first dquot in the buffer should do. XXXThis is
> + * probably a good thing to do for other buf types also.
> + */
> + fa = NULL;
> + if (buf_f->blf_flags &
> + (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> + if (item->ri_buf[i].i_addr == NULL) {
> + xfs_alert(mp,
> + "XFS: NULL dquot in %s.", __func__);
> + goto next;
> + }
> + if (item->ri_buf[i].i_len < size_disk_dquot) {
> + xfs_alert(mp,
> + "XFS: dquot too small (%d) in %s.",
> + item->ri_buf[i].i_len, __func__);
> + goto next;
> + }
> + fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
> + -1, 0);
> + if (fa) {
> + xfs_alert(mp,
> + "dquot corrupt at %pS trying to replay into block 0x%llx",
> + fa, bp->b_bn);
> + goto next;
> + }
> + }
> +
> + memcpy(xfs_buf_offset(bp,
> + (uint)bit << XFS_BLF_SHIFT), /* dest */
> + item->ri_buf[i].i_addr, /* source */
> + nbits<<XFS_BLF_SHIFT); /* length */
> + next:
> + i++;
> + bit += nbits;
> + }
> +
> + /* Shouldn't be any more regions */
> + ASSERT(i == item->ri_total);
> +
> + xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
> +}
> +
> +/*
> + * Perform a dquot buffer recovery.
> + * Simple algorithm: if we have found a QUOTAOFF log item of the same type
> + * (ie. USR or GRP), then just toss this buffer away; don't recover it.
> + * Else, treat it as a regular buffer and do recovery.
> + *
> + * Return false if the buffer was tossed and true if we recovered the buffer to
> + * indicate to the caller if the buffer needs writing.
> + */
> +STATIC bool
> +xlog_recover_do_dquot_buffer(
> + struct xfs_mount *mp,
> + struct xlog *log,
> + struct xlog_recover_item *item,
> + struct xfs_buf *bp,
> + struct xfs_buf_log_format *buf_f)
> +{
> + uint type;
> +
> + trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
> +
> + /*
> + * Filesystems are required to send in quota flags at mount time.
> + */
> + if (!mp->m_qflags)
> + return false;
> +
> + type = 0;
> + if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
> + type |= XFS_DQ_USER;
> + if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
> + type |= XFS_DQ_PROJ;
> + if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
> + type |= XFS_DQ_GROUP;
> + /*
> + * This type of quotas was turned off, so ignore this buffer
> + */
> + if (log->l_quotaoffs_flag & type)
> + return false;
> +
> + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
> + return true;
> +}
> +
> +/*
> + * Perform recovery for a buffer full of inodes. In these buffers, the only
> + * data which should be recovered is that which corresponds to the
> + * di_next_unlinked pointers in the on disk inode structures. The rest of the
> + * data for the inodes is always logged through the inodes themselves rather
> + * than the inode buffer and is recovered in xlog_recover_inode_pass2().
> + *
> + * The only time when buffers full of inodes are fully recovered is when the
> + * buffer is full of newly allocated inodes. In this case the buffer will
> + * not be marked as an inode buffer and so will be sent to
> + * xlog_recover_do_reg_buffer() below during recovery.
> + */
> +STATIC int
> +xlog_recover_do_inode_buffer(
> + struct xfs_mount *mp,
> + struct xlog_recover_item *item,
> + struct xfs_buf *bp,
> + struct xfs_buf_log_format *buf_f)
> +{
> + int i;
> + int item_index = 0;
> + int bit = 0;
> + int nbits = 0;
> + int reg_buf_offset = 0;
> + int reg_buf_bytes = 0;
> + int next_unlinked_offset;
> + int inodes_per_buf;
> + xfs_agino_t *logged_nextp;
> + xfs_agino_t *buffer_nextp;
> +
> + trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
> +
> + /*
> + * Post recovery validation only works properly on CRC enabled
> + * filesystems.
> + */
> + if (xfs_sb_version_hascrc(&mp->m_sb))
> + bp->b_ops = &xfs_inode_buf_ops;
> +
> + inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
> + for (i = 0; i < inodes_per_buf; i++) {
> + next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
> + offsetof(xfs_dinode_t, di_next_unlinked);
> +
> + while (next_unlinked_offset >=
> + (reg_buf_offset + reg_buf_bytes)) {
> + /*
> + * The next di_next_unlinked field is beyond
> + * the current logged region. Find the next
> + * logged region that contains or is beyond
> + * the current di_next_unlinked field.
> + */
> + bit += nbits;
> + bit = xfs_next_bit(buf_f->blf_data_map,
> + buf_f->blf_map_size, bit);
> +
> + /*
> + * If there are no more logged regions in the
> + * buffer, then we're done.
> + */
> + if (bit == -1)
> + return 0;
> +
> + nbits = xfs_contig_bits(buf_f->blf_data_map,
> + buf_f->blf_map_size, bit);
> + ASSERT(nbits > 0);
> + reg_buf_offset = bit << XFS_BLF_SHIFT;
> + reg_buf_bytes = nbits << XFS_BLF_SHIFT;
> + item_index++;
> + }
> +
> + /*
> + * If the current logged region starts after the current
> + * di_next_unlinked field, then move on to the next
> + * di_next_unlinked field.
> + */
> + if (next_unlinked_offset < reg_buf_offset)
> + continue;
> +
> + ASSERT(item->ri_buf[item_index].i_addr != NULL);
> + ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
> + ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
> +
> + /*
> + * The current logged region contains a copy of the
> + * current di_next_unlinked field. Extract its value
> + * and copy it to the buffer copy.
> + */
> + logged_nextp = item->ri_buf[item_index].i_addr +
> + next_unlinked_offset - reg_buf_offset;
> + if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
> + xfs_alert(mp,
> + "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
> + "Trying to replay bad (0) inode di_next_unlinked field.",
> + item, bp);
> + return -EFSCORRUPTED;
> + }
> +
> + buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
> + *buffer_nextp = *logged_nextp;
> +
> + /*
> + * If necessary, recalculate the CRC in the on-disk inode. We
> + * have to leave the inode in a consistent state for whoever
> + * reads it next....
> + */
> + xfs_dinode_calc_crc(mp,
> + xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
> +
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * V5 filesystems know the age of the buffer on disk being recovered. We can
> + * have newer objects on disk than we are replaying, and so for these cases we
> + * don't want to replay the current change as that will make the buffer contents
> + * temporarily invalid on disk.
> + *
> + * The magic number might not match the buffer type we are going to recover
> + * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence
> + * extract the LSN of the existing object in the buffer based on it's current
> + * magic number. If we don't recognise the magic number in the buffer, then
> + * return a LSN of -1 so that the caller knows it was an unrecognised block and
> + * so can recover the buffer.
> + *
> + * Note: we cannot rely solely on magic number matches to determine that the
> + * buffer has a valid LSN - we also need to verify that it belongs to this
> + * filesystem, so we need to extract the object's LSN and compare it to that
> + * which we read from the superblock. If the UUIDs don't match, then we've got a
> + * stale metadata block from an old filesystem instance that we need to recover
> + * over the top of.
> + */
> +static xfs_lsn_t
> +xlog_recover_get_buf_lsn(
> + struct xfs_mount *mp,
> + struct xfs_buf *bp)
> +{
> + uint32_t magic32;
> + uint16_t magic16;
> + uint16_t magicda;
> + void *blk = bp->b_addr;
> + uuid_t *uuid;
> + xfs_lsn_t lsn = -1;
> +
> + /* v4 filesystems always recover immediately */
> + if (!xfs_sb_version_hascrc(&mp->m_sb))
> + goto recover_immediately;
> +
> + magic32 = be32_to_cpu(*(__be32 *)blk);
> + switch (magic32) {
> + case XFS_ABTB_CRC_MAGIC:
> + case XFS_ABTC_CRC_MAGIC:
> + case XFS_ABTB_MAGIC:
> + case XFS_ABTC_MAGIC:
> + case XFS_RMAP_CRC_MAGIC:
> + case XFS_REFC_CRC_MAGIC:
> + case XFS_IBT_CRC_MAGIC:
> + case XFS_IBT_MAGIC: {
> + struct xfs_btree_block *btb = blk;
> +
> + lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
> + uuid = &btb->bb_u.s.bb_uuid;
> + break;
> + }
> + case XFS_BMAP_CRC_MAGIC:
> + case XFS_BMAP_MAGIC: {
> + struct xfs_btree_block *btb = blk;
> +
> + lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
> + uuid = &btb->bb_u.l.bb_uuid;
> + break;
> + }
> + case XFS_AGF_MAGIC:
> + lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
> + uuid = &((struct xfs_agf *)blk)->agf_uuid;
> + break;
> + case XFS_AGFL_MAGIC:
> + lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
> + uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
> + break;
> + case XFS_AGI_MAGIC:
> + lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
> + uuid = &((struct xfs_agi *)blk)->agi_uuid;
> + break;
> + case XFS_SYMLINK_MAGIC:
> + lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
> + uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
> + break;
> + case XFS_DIR3_BLOCK_MAGIC:
> + case XFS_DIR3_DATA_MAGIC:
> + case XFS_DIR3_FREE_MAGIC:
> + lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
> + uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
> + break;
> + case XFS_ATTR3_RMT_MAGIC:
> + /*
> + * Remote attr blocks are written synchronously, rather than
> + * being logged. That means they do not contain a valid LSN
> + * (i.e. transactionally ordered) in them, and hence any time we
> + * see a buffer to replay over the top of a remote attribute
> + * block we should simply do so.
> + */
> + goto recover_immediately;
> + case XFS_SB_MAGIC:
> + /*
> + * superblock uuids are magic. We may or may not have a
> + * sb_meta_uuid on disk, but it will be set in the in-core
> + * superblock. We set the uuid pointer for verification
> + * according to the superblock feature mask to ensure we check
> + * the relevant UUID in the superblock.
> + */
> + lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
> + if (xfs_sb_version_hasmetauuid(&mp->m_sb))
> + uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
> + else
> + uuid = &((struct xfs_dsb *)blk)->sb_uuid;
> + break;
> + default:
> + break;
> + }
> +
> + if (lsn != (xfs_lsn_t)-1) {
> + if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
> + goto recover_immediately;
> + return lsn;
> + }
> +
> + magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
> + switch (magicda) {
> + case XFS_DIR3_LEAF1_MAGIC:
> + case XFS_DIR3_LEAFN_MAGIC:
> + case XFS_DA3_NODE_MAGIC:
> + lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
> + uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
> + break;
> + default:
> + break;
> + }
> +
> + if (lsn != (xfs_lsn_t)-1) {
> + if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
> + goto recover_immediately;
> + return lsn;
> + }
> +
> + /*
> + * We do individual object checks on dquot and inode buffers as they
> + * have their own individual LSN records. Also, we could have a stale
> + * buffer here, so we have to at least recognise these buffer types.
> + *
> + * A notd complexity here is inode unlinked list processing - it logs
> + * the inode directly in the buffer, but we don't know which inodes have
> + * been modified, and there is no global buffer LSN. Hence we need to
> + * recover all inode buffer types immediately. This problem will be
> + * fixed by logical logging of the unlinked list modifications.
> + */
> + magic16 = be16_to_cpu(*(__be16 *)blk);
> + switch (magic16) {
> + case XFS_DQUOT_MAGIC:
> + case XFS_DINODE_MAGIC:
> + goto recover_immediately;
> + default:
> + break;
> + }
> +
> + /* unknown buffer contents, recover immediately */
> +
> +recover_immediately:
> + return (xfs_lsn_t)-1;
> +
> +}
> +
> +/*
> + * This routine replays a modification made to a buffer at runtime.
> + * There are actually two types of buffer, regular and inode, which
> + * are handled differently. Inode buffers are handled differently
> + * in that we only recover a specific set of data from them, namely
> + * the inode di_next_unlinked fields. This is because all other inode
> + * data is actually logged via inode records and any data we replay
> + * here which overlaps that may be stale.
> + *
> + * When meta-data buffers are freed at run time we log a buffer item
> + * with the XFS_BLF_CANCEL bit set to indicate that previous copies
> + * of the buffer in the log should not be replayed at recovery time.
> + * This is so that if the blocks covered by the buffer are reused for
> + * file data before we crash we don't end up replaying old, freed
> + * meta-data into a user's file.
> + *
> + * To handle the cancellation of buffer log items, we make two passes
> + * over the log during recovery. During the first we build a table of
> + * those buffers which have been cancelled, and during the second we
> + * only replay those buffers which do not have corresponding cancel
> + * records in the table. See xlog_recover_buffer_pass[1,2] above
> + * for more details on the implementation of the table of cancel records.
> + */
> +STATIC int
> +xlog_recover_buffer_commit_pass2(
> + struct xlog *log,
> + struct list_head *buffer_list,
> + struct xlog_recover_item *item,
> + xfs_lsn_t current_lsn)
> +{
> + struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
> + struct xfs_mount *mp = log->l_mp;
> + struct xfs_buf *bp;
> + int error;
> + uint buf_flags;
> + xfs_lsn_t lsn;
> +
> + /*
> + * In this pass we only want to recover all the buffers which have
> + * not been cancelled and are not cancellation buffers themselves.
> + */
> + if (buf_f->blf_flags & XFS_BLF_CANCEL) {
> + if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
> + buf_f->blf_len))
> + goto cancelled;
> + } else {
> +
> + if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
> + buf_f->blf_len))
> + goto cancelled;
> + }
> +
> + trace_xfs_log_recover_buf_recover(log, buf_f);
> +
> + buf_flags = 0;
> + if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> + buf_flags |= XBF_UNMAPPED;
> +
> + error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
> + buf_flags, &bp, NULL);
> + if (error)
> + return error;
> +
> + /*
> + * Recover the buffer only if we get an LSN from it and it's less than
> + * the lsn of the transaction we are replaying.
> + *
> + * Note that we have to be extremely careful of readahead here.
> + * Readahead does not attach verfiers to the buffers so if we don't
> + * actually do any replay after readahead because of the LSN we found
> + * in the buffer if more recent than that current transaction then we
> + * need to attach the verifier directly. Failure to do so can lead to
> + * future recovery actions (e.g. EFI and unlinked list recovery) can
> + * operate on the buffers and they won't get the verifier attached. This
> + * can lead to blocks on disk having the correct content but a stale
> + * CRC.
> + *
> + * It is safe to assume these clean buffers are currently up to date.
> + * If the buffer is dirtied by a later transaction being replayed, then
> + * the verifier will be reset to match whatever recover turns that
> + * buffer into.
> + */
> + lsn = xlog_recover_get_buf_lsn(mp, bp);
> + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> + trace_xfs_log_recover_buf_skip(log, buf_f);
> + xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
> + goto out_release;
> + }
> +
> + if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
> + error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
> + if (error)
> + goto out_release;
> + } else if (buf_f->blf_flags &
> + (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> + bool dirty;
> +
> + dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
> + if (!dirty)
> + goto out_release;
> + } else {
> + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
> + }
> +
> + /*
> + * Perform delayed write on the buffer. Asynchronous writes will be
> + * slower when taking into account all the buffers to be flushed.
> + *
> + * Also make sure that only inode buffers with good sizes stay in
> + * the buffer cache. The kernel moves inodes in buffers of 1 block
> + * or inode_cluster_size bytes, whichever is bigger. The inode
> + * buffers in the log can be a different size if the log was generated
> + * by an older kernel using unclustered inode buffers or a newer kernel
> + * running with a different inode cluster size. Regardless, if the
> + * the inode buffer size isn't max(blocksize, inode_cluster_size)
> + * for *our* value of inode_cluster_size, then we need to keep
> + * the buffer out of the buffer cache so that the buffer won't
> + * overlap with future reads of those inodes.
> + */
> + if (XFS_DINODE_MAGIC ==
> + be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
> + (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
> + xfs_buf_stale(bp);
> + error = xfs_bwrite(bp);
> + } else {
> + ASSERT(bp->b_mount == mp);
> + bp->b_iodone = xlog_recover_iodone;
> + xfs_buf_delwri_queue(bp, buffer_list);
> + }
> +
> +out_release:
> + xfs_buf_relse(bp);
> + return error;
> +cancelled:
> + trace_xfs_log_recover_buf_cancel(log, buf_f);
> + return 0;
> +}
> +
> const struct xlog_recover_item_type xlog_buf_item_type = {
> .reorder_fn = xlog_buf_reorder_fn,
> .ra_pass2_fn = xlog_recover_buffer_ra_pass2,
> .commit_pass1_fn = xlog_recover_buffer_commit_pass1,
> + .commit_pass2_fn = xlog_recover_buffer_commit_pass2,
> };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index fbd1f7d6f1c9..0a241f1c371a 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -284,7 +284,7 @@ xlog_header_check_mount(
> return 0;
> }
>
> -STATIC void
> +void
> xlog_recover_iodone(
> struct xfs_buf *bp)
> {
> @@ -2007,7 +2007,7 @@ xlog_add_buffer_cancelled(
> /*
> * Check if there is and entry for blkno, len in the buffer cancel record table.
> */
> -static bool
> +bool
> xlog_is_buffer_cancelled(
> struct xlog *log,
> xfs_daddr_t blkno,
> @@ -2024,7 +2024,7 @@ xlog_is_buffer_cancelled(
> * buffer is re-used again after its last cancellation we actually replay the
> * changes made at that point.
> */
> -static bool
> +bool
> xlog_put_buffer_cancelled(
> struct xlog *log,
> xfs_daddr_t blkno,
> @@ -2056,791 +2056,6 @@ xlog_buf_readahead(
> xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
> }
>
> -/*
> - * Perform recovery for a buffer full of inodes. In these buffers, the only
> - * data which should be recovered is that which corresponds to the
> - * di_next_unlinked pointers in the on disk inode structures. The rest of the
> - * data for the inodes is always logged through the inodes themselves rather
> - * than the inode buffer and is recovered in xlog_recover_inode_pass2().
> - *
> - * The only time when buffers full of inodes are fully recovered is when the
> - * buffer is full of newly allocated inodes. In this case the buffer will
> - * not be marked as an inode buffer and so will be sent to
> - * xlog_recover_do_reg_buffer() below during recovery.
> - */
> -STATIC int
> -xlog_recover_do_inode_buffer(
> - struct xfs_mount *mp,
> - xlog_recover_item_t *item,
> - struct xfs_buf *bp,
> - xfs_buf_log_format_t *buf_f)
> -{
> - int i;
> - int item_index = 0;
> - int bit = 0;
> - int nbits = 0;
> - int reg_buf_offset = 0;
> - int reg_buf_bytes = 0;
> - int next_unlinked_offset;
> - int inodes_per_buf;
> - xfs_agino_t *logged_nextp;
> - xfs_agino_t *buffer_nextp;
> -
> - trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
> -
> - /*
> - * Post recovery validation only works properly on CRC enabled
> - * filesystems.
> - */
> - if (xfs_sb_version_hascrc(&mp->m_sb))
> - bp->b_ops = &xfs_inode_buf_ops;
> -
> - inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
> - for (i = 0; i < inodes_per_buf; i++) {
> - next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
> - offsetof(xfs_dinode_t, di_next_unlinked);
> -
> - while (next_unlinked_offset >=
> - (reg_buf_offset + reg_buf_bytes)) {
> - /*
> - * The next di_next_unlinked field is beyond
> - * the current logged region. Find the next
> - * logged region that contains or is beyond
> - * the current di_next_unlinked field.
> - */
> - bit += nbits;
> - bit = xfs_next_bit(buf_f->blf_data_map,
> - buf_f->blf_map_size, bit);
> -
> - /*
> - * If there are no more logged regions in the
> - * buffer, then we're done.
> - */
> - if (bit == -1)
> - return 0;
> -
> - nbits = xfs_contig_bits(buf_f->blf_data_map,
> - buf_f->blf_map_size, bit);
> - ASSERT(nbits > 0);
> - reg_buf_offset = bit << XFS_BLF_SHIFT;
> - reg_buf_bytes = nbits << XFS_BLF_SHIFT;
> - item_index++;
> - }
> -
> - /*
> - * If the current logged region starts after the current
> - * di_next_unlinked field, then move on to the next
> - * di_next_unlinked field.
> - */
> - if (next_unlinked_offset < reg_buf_offset)
> - continue;
> -
> - ASSERT(item->ri_buf[item_index].i_addr != NULL);
> - ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
> - ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
> -
> - /*
> - * The current logged region contains a copy of the
> - * current di_next_unlinked field. Extract its value
> - * and copy it to the buffer copy.
> - */
> - logged_nextp = item->ri_buf[item_index].i_addr +
> - next_unlinked_offset - reg_buf_offset;
> - if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
> - xfs_alert(mp,
> - "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
> - "Trying to replay bad (0) inode di_next_unlinked field.",
> - item, bp);
> - return -EFSCORRUPTED;
> - }
> -
> - buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
> - *buffer_nextp = *logged_nextp;
> -
> - /*
> - * If necessary, recalculate the CRC in the on-disk inode. We
> - * have to leave the inode in a consistent state for whoever
> - * reads it next....
> - */
> - xfs_dinode_calc_crc(mp,
> - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
> -
> - }
> -
> - return 0;
> -}
> -
> -/*
> - * V5 filesystems know the age of the buffer on disk being recovered. We can
> - * have newer objects on disk than we are replaying, and so for these cases we
> - * don't want to replay the current change as that will make the buffer contents
> - * temporarily invalid on disk.
> - *
> - * The magic number might not match the buffer type we are going to recover
> - * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence
> - * extract the LSN of the existing object in the buffer based on it's current
> - * magic number. If we don't recognise the magic number in the buffer, then
> - * return a LSN of -1 so that the caller knows it was an unrecognised block and
> - * so can recover the buffer.
> - *
> - * Note: we cannot rely solely on magic number matches to determine that the
> - * buffer has a valid LSN - we also need to verify that it belongs to this
> - * filesystem, so we need to extract the object's LSN and compare it to that
> - * which we read from the superblock. If the UUIDs don't match, then we've got a
> - * stale metadata block from an old filesystem instance that we need to recover
> - * over the top of.
> - */
> -static xfs_lsn_t
> -xlog_recover_get_buf_lsn(
> - struct xfs_mount *mp,
> - struct xfs_buf *bp)
> -{
> - uint32_t magic32;
> - uint16_t magic16;
> - uint16_t magicda;
> - void *blk = bp->b_addr;
> - uuid_t *uuid;
> - xfs_lsn_t lsn = -1;
> -
> - /* v4 filesystems always recover immediately */
> - if (!xfs_sb_version_hascrc(&mp->m_sb))
> - goto recover_immediately;
> -
> - magic32 = be32_to_cpu(*(__be32 *)blk);
> - switch (magic32) {
> - case XFS_ABTB_CRC_MAGIC:
> - case XFS_ABTC_CRC_MAGIC:
> - case XFS_ABTB_MAGIC:
> - case XFS_ABTC_MAGIC:
> - case XFS_RMAP_CRC_MAGIC:
> - case XFS_REFC_CRC_MAGIC:
> - case XFS_IBT_CRC_MAGIC:
> - case XFS_IBT_MAGIC: {
> - struct xfs_btree_block *btb = blk;
> -
> - lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
> - uuid = &btb->bb_u.s.bb_uuid;
> - break;
> - }
> - case XFS_BMAP_CRC_MAGIC:
> - case XFS_BMAP_MAGIC: {
> - struct xfs_btree_block *btb = blk;
> -
> - lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
> - uuid = &btb->bb_u.l.bb_uuid;
> - break;
> - }
> - case XFS_AGF_MAGIC:
> - lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
> - uuid = &((struct xfs_agf *)blk)->agf_uuid;
> - break;
> - case XFS_AGFL_MAGIC:
> - lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
> - uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
> - break;
> - case XFS_AGI_MAGIC:
> - lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
> - uuid = &((struct xfs_agi *)blk)->agi_uuid;
> - break;
> - case XFS_SYMLINK_MAGIC:
> - lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
> - uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
> - break;
> - case XFS_DIR3_BLOCK_MAGIC:
> - case XFS_DIR3_DATA_MAGIC:
> - case XFS_DIR3_FREE_MAGIC:
> - lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
> - uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
> - break;
> - case XFS_ATTR3_RMT_MAGIC:
> - /*
> - * Remote attr blocks are written synchronously, rather than
> - * being logged. That means they do not contain a valid LSN
> - * (i.e. transactionally ordered) in them, and hence any time we
> - * see a buffer to replay over the top of a remote attribute
> - * block we should simply do so.
> - */
> - goto recover_immediately;
> - case XFS_SB_MAGIC:
> - /*
> - * superblock uuids are magic. We may or may not have a
> - * sb_meta_uuid on disk, but it will be set in the in-core
> - * superblock. We set the uuid pointer for verification
> - * according to the superblock feature mask to ensure we check
> - * the relevant UUID in the superblock.
> - */
> - lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
> - if (xfs_sb_version_hasmetauuid(&mp->m_sb))
> - uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
> - else
> - uuid = &((struct xfs_dsb *)blk)->sb_uuid;
> - break;
> - default:
> - break;
> - }
> -
> - if (lsn != (xfs_lsn_t)-1) {
> - if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
> - goto recover_immediately;
> - return lsn;
> - }
> -
> - magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
> - switch (magicda) {
> - case XFS_DIR3_LEAF1_MAGIC:
> - case XFS_DIR3_LEAFN_MAGIC:
> - case XFS_DA3_NODE_MAGIC:
> - lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
> - uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
> - break;
> - default:
> - break;
> - }
> -
> - if (lsn != (xfs_lsn_t)-1) {
> - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
> - goto recover_immediately;
> - return lsn;
> - }
> -
> - /*
> - * We do individual object checks on dquot and inode buffers as they
> - * have their own individual LSN records. Also, we could have a stale
> - * buffer here, so we have to at least recognise these buffer types.
> - *
> - * A notd complexity here is inode unlinked list processing - it logs
> - * the inode directly in the buffer, but we don't know which inodes have
> - * been modified, and there is no global buffer LSN. Hence we need to
> - * recover all inode buffer types immediately. This problem will be
> - * fixed by logical logging of the unlinked list modifications.
> - */
> - magic16 = be16_to_cpu(*(__be16 *)blk);
> - switch (magic16) {
> - case XFS_DQUOT_MAGIC:
> - case XFS_DINODE_MAGIC:
> - goto recover_immediately;
> - default:
> - break;
> - }
> -
> - /* unknown buffer contents, recover immediately */
> -
> -recover_immediately:
> - return (xfs_lsn_t)-1;
> -
> -}
> -
> -/*
> - * Validate the recovered buffer is of the correct type and attach the
> - * appropriate buffer operations to them for writeback. Magic numbers are in a
> - * few places:
> - * the first 16 bits of the buffer (inode buffer, dquot buffer),
> - * the first 32 bits of the buffer (most blocks),
> - * inside a struct xfs_da_blkinfo at the start of the buffer.
> - */
> -static void
> -xlog_recover_validate_buf_type(
> - struct xfs_mount *mp,
> - struct xfs_buf *bp,
> - xfs_buf_log_format_t *buf_f,
> - xfs_lsn_t current_lsn)
> -{
> - struct xfs_da_blkinfo *info = bp->b_addr;
> - uint32_t magic32;
> - uint16_t magic16;
> - uint16_t magicda;
> - char *warnmsg = NULL;
> -
> - /*
> - * We can only do post recovery validation on items on CRC enabled
> - * fielsystems as we need to know when the buffer was written to be able
> - * to determine if we should have replayed the item. If we replay old
> - * metadata over a newer buffer, then it will enter a temporarily
> - * inconsistent state resulting in verification failures. Hence for now
> - * just avoid the verification stage for non-crc filesystems
> - */
> - if (!xfs_sb_version_hascrc(&mp->m_sb))
> - return;
> -
> - magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
> - magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
> - magicda = be16_to_cpu(info->magic);
> - switch (xfs_blft_from_flags(buf_f)) {
> - case XFS_BLFT_BTREE_BUF:
> - switch (magic32) {
> - case XFS_ABTB_CRC_MAGIC:
> - case XFS_ABTB_MAGIC:
> - bp->b_ops = &xfs_bnobt_buf_ops;
> - break;
> - case XFS_ABTC_CRC_MAGIC:
> - case XFS_ABTC_MAGIC:
> - bp->b_ops = &xfs_cntbt_buf_ops;
> - break;
> - case XFS_IBT_CRC_MAGIC:
> - case XFS_IBT_MAGIC:
> - bp->b_ops = &xfs_inobt_buf_ops;
> - break;
> - case XFS_FIBT_CRC_MAGIC:
> - case XFS_FIBT_MAGIC:
> - bp->b_ops = &xfs_finobt_buf_ops;
> - break;
> - case XFS_BMAP_CRC_MAGIC:
> - case XFS_BMAP_MAGIC:
> - bp->b_ops = &xfs_bmbt_buf_ops;
> - break;
> - case XFS_RMAP_CRC_MAGIC:
> - bp->b_ops = &xfs_rmapbt_buf_ops;
> - break;
> - case XFS_REFC_CRC_MAGIC:
> - bp->b_ops = &xfs_refcountbt_buf_ops;
> - break;
> - default:
> - warnmsg = "Bad btree block magic!";
> - break;
> - }
> - break;
> - case XFS_BLFT_AGF_BUF:
> - if (magic32 != XFS_AGF_MAGIC) {
> - warnmsg = "Bad AGF block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_agf_buf_ops;
> - break;
> - case XFS_BLFT_AGFL_BUF:
> - if (magic32 != XFS_AGFL_MAGIC) {
> - warnmsg = "Bad AGFL block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_agfl_buf_ops;
> - break;
> - case XFS_BLFT_AGI_BUF:
> - if (magic32 != XFS_AGI_MAGIC) {
> - warnmsg = "Bad AGI block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_agi_buf_ops;
> - break;
> - case XFS_BLFT_UDQUOT_BUF:
> - case XFS_BLFT_PDQUOT_BUF:
> - case XFS_BLFT_GDQUOT_BUF:
> -#ifdef CONFIG_XFS_QUOTA
> - if (magic16 != XFS_DQUOT_MAGIC) {
> - warnmsg = "Bad DQUOT block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_dquot_buf_ops;
> -#else
> - xfs_alert(mp,
> - "Trying to recover dquots without QUOTA support built in!");
> - ASSERT(0);
> -#endif
> - break;
> - case XFS_BLFT_DINO_BUF:
> - if (magic16 != XFS_DINODE_MAGIC) {
> - warnmsg = "Bad INODE block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_inode_buf_ops;
> - break;
> - case XFS_BLFT_SYMLINK_BUF:
> - if (magic32 != XFS_SYMLINK_MAGIC) {
> - warnmsg = "Bad symlink block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_symlink_buf_ops;
> - break;
> - case XFS_BLFT_DIR_BLOCK_BUF:
> - if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
> - magic32 != XFS_DIR3_BLOCK_MAGIC) {
> - warnmsg = "Bad dir block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_dir3_block_buf_ops;
> - break;
> - case XFS_BLFT_DIR_DATA_BUF:
> - if (magic32 != XFS_DIR2_DATA_MAGIC &&
> - magic32 != XFS_DIR3_DATA_MAGIC) {
> - warnmsg = "Bad dir data magic!";
> - break;
> - }
> - bp->b_ops = &xfs_dir3_data_buf_ops;
> - break;
> - case XFS_BLFT_DIR_FREE_BUF:
> - if (magic32 != XFS_DIR2_FREE_MAGIC &&
> - magic32 != XFS_DIR3_FREE_MAGIC) {
> - warnmsg = "Bad dir3 free magic!";
> - break;
> - }
> - bp->b_ops = &xfs_dir3_free_buf_ops;
> - break;
> - case XFS_BLFT_DIR_LEAF1_BUF:
> - if (magicda != XFS_DIR2_LEAF1_MAGIC &&
> - magicda != XFS_DIR3_LEAF1_MAGIC) {
> - warnmsg = "Bad dir leaf1 magic!";
> - break;
> - }
> - bp->b_ops = &xfs_dir3_leaf1_buf_ops;
> - break;
> - case XFS_BLFT_DIR_LEAFN_BUF:
> - if (magicda != XFS_DIR2_LEAFN_MAGIC &&
> - magicda != XFS_DIR3_LEAFN_MAGIC) {
> - warnmsg = "Bad dir leafn magic!";
> - break;
> - }
> - bp->b_ops = &xfs_dir3_leafn_buf_ops;
> - break;
> - case XFS_BLFT_DA_NODE_BUF:
> - if (magicda != XFS_DA_NODE_MAGIC &&
> - magicda != XFS_DA3_NODE_MAGIC) {
> - warnmsg = "Bad da node magic!";
> - break;
> - }
> - bp->b_ops = &xfs_da3_node_buf_ops;
> - break;
> - case XFS_BLFT_ATTR_LEAF_BUF:
> - if (magicda != XFS_ATTR_LEAF_MAGIC &&
> - magicda != XFS_ATTR3_LEAF_MAGIC) {
> - warnmsg = "Bad attr leaf magic!";
> - break;
> - }
> - bp->b_ops = &xfs_attr3_leaf_buf_ops;
> - break;
> - case XFS_BLFT_ATTR_RMT_BUF:
> - if (magic32 != XFS_ATTR3_RMT_MAGIC) {
> - warnmsg = "Bad attr remote magic!";
> - break;
> - }
> - bp->b_ops = &xfs_attr3_rmt_buf_ops;
> - break;
> - case XFS_BLFT_SB_BUF:
> - if (magic32 != XFS_SB_MAGIC) {
> - warnmsg = "Bad SB block magic!";
> - break;
> - }
> - bp->b_ops = &xfs_sb_buf_ops;
> - break;
> -#ifdef CONFIG_XFS_RT
> - case XFS_BLFT_RTBITMAP_BUF:
> - case XFS_BLFT_RTSUMMARY_BUF:
> - /* no magic numbers for verification of RT buffers */
> - bp->b_ops = &xfs_rtbuf_ops;
> - break;
> -#endif /* CONFIG_XFS_RT */
> - default:
> - xfs_warn(mp, "Unknown buffer type %d!",
> - xfs_blft_from_flags(buf_f));
> - break;
> - }
> -
> - /*
> - * Nothing else to do in the case of a NULL current LSN as this means
> - * the buffer is more recent than the change in the log and will be
> - * skipped.
> - */
> - if (current_lsn == NULLCOMMITLSN)
> - return;
> -
> - if (warnmsg) {
> - xfs_warn(mp, warnmsg);
> - ASSERT(0);
> - }
> -
> - /*
> - * We must update the metadata LSN of the buffer as it is written out to
> - * ensure that older transactions never replay over this one and corrupt
> - * the buffer. This can occur if log recovery is interrupted at some
> - * point after the current transaction completes, at which point a
> - * subsequent mount starts recovery from the beginning.
> - *
> - * Write verifiers update the metadata LSN from log items attached to
> - * the buffer. Therefore, initialize a bli purely to carry the LSN to
> - * the verifier. We'll clean it up in our ->iodone() callback.
> - */
> - if (bp->b_ops) {
> - struct xfs_buf_log_item *bip;
> -
> - ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
> - bp->b_iodone = xlog_recover_iodone;
> - xfs_buf_item_init(bp, mp);
> - bip = bp->b_log_item;
> - bip->bli_item.li_lsn = current_lsn;
> - }
> -}
> -
> -/*
> - * Perform a 'normal' buffer recovery. Each logged region of the
> - * buffer should be copied over the corresponding region in the
> - * given buffer. The bitmap in the buf log format structure indicates
> - * where to place the logged data.
> - */
> -STATIC void
> -xlog_recover_do_reg_buffer(
> - struct xfs_mount *mp,
> - xlog_recover_item_t *item,
> - struct xfs_buf *bp,
> - xfs_buf_log_format_t *buf_f,
> - xfs_lsn_t current_lsn)
> -{
> - int i;
> - int bit;
> - int nbits;
> - xfs_failaddr_t fa;
> - const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot);
> -
> - trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
> -
> - bit = 0;
> - i = 1; /* 0 is the buf format structure */
> - while (1) {
> - bit = xfs_next_bit(buf_f->blf_data_map,
> - buf_f->blf_map_size, bit);
> - if (bit == -1)
> - break;
> - nbits = xfs_contig_bits(buf_f->blf_data_map,
> - buf_f->blf_map_size, bit);
> - ASSERT(nbits > 0);
> - ASSERT(item->ri_buf[i].i_addr != NULL);
> - ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
> - ASSERT(BBTOB(bp->b_length) >=
> - ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
> -
> - /*
> - * The dirty regions logged in the buffer, even though
> - * contiguous, may span multiple chunks. This is because the
> - * dirty region may span a physical page boundary in a buffer
> - * and hence be split into two separate vectors for writing into
> - * the log. Hence we need to trim nbits back to the length of
> - * the current region being copied out of the log.
> - */
> - if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
> - nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
> -
> - /*
> - * Do a sanity check if this is a dquot buffer. Just checking
> - * the first dquot in the buffer should do. XXXThis is
> - * probably a good thing to do for other buf types also.
> - */
> - fa = NULL;
> - if (buf_f->blf_flags &
> - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> - if (item->ri_buf[i].i_addr == NULL) {
> - xfs_alert(mp,
> - "XFS: NULL dquot in %s.", __func__);
> - goto next;
> - }
> - if (item->ri_buf[i].i_len < size_disk_dquot) {
> - xfs_alert(mp,
> - "XFS: dquot too small (%d) in %s.",
> - item->ri_buf[i].i_len, __func__);
> - goto next;
> - }
> - fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
> - -1, 0);
> - if (fa) {
> - xfs_alert(mp,
> - "dquot corrupt at %pS trying to replay into block 0x%llx",
> - fa, bp->b_bn);
> - goto next;
> - }
> - }
> -
> - memcpy(xfs_buf_offset(bp,
> - (uint)bit << XFS_BLF_SHIFT), /* dest */
> - item->ri_buf[i].i_addr, /* source */
> - nbits<<XFS_BLF_SHIFT); /* length */
> - next:
> - i++;
> - bit += nbits;
> - }
> -
> - /* Shouldn't be any more regions */
> - ASSERT(i == item->ri_total);
> -
> - xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
> -}
> -
> -/*
> - * Perform a dquot buffer recovery.
> - * Simple algorithm: if we have found a QUOTAOFF log item of the same type
> - * (ie. USR or GRP), then just toss this buffer away; don't recover it.
> - * Else, treat it as a regular buffer and do recovery.
> - *
> - * Return false if the buffer was tossed and true if we recovered the buffer to
> - * indicate to the caller if the buffer needs writing.
> - */
> -STATIC bool
> -xlog_recover_do_dquot_buffer(
> - struct xfs_mount *mp,
> - struct xlog *log,
> - struct xlog_recover_item *item,
> - struct xfs_buf *bp,
> - struct xfs_buf_log_format *buf_f)
> -{
> - uint type;
> -
> - trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
> -
> - /*
> - * Filesystems are required to send in quota flags at mount time.
> - */
> - if (!mp->m_qflags)
> - return false;
> -
> - type = 0;
> - if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
> - type |= XFS_DQ_USER;
> - if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
> - type |= XFS_DQ_PROJ;
> - if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
> - type |= XFS_DQ_GROUP;
> - /*
> - * This type of quotas was turned off, so ignore this buffer
> - */
> - if (log->l_quotaoffs_flag & type)
> - return false;
> -
> - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
> - return true;
> -}
> -
> -/*
> - * This routine replays a modification made to a buffer at runtime.
> - * There are actually two types of buffer, regular and inode, which
> - * are handled differently. Inode buffers are handled differently
> - * in that we only recover a specific set of data from them, namely
> - * the inode di_next_unlinked fields. This is because all other inode
> - * data is actually logged via inode records and any data we replay
> - * here which overlaps that may be stale.
> - *
> - * When meta-data buffers are freed at run time we log a buffer item
> - * with the XFS_BLF_CANCEL bit set to indicate that previous copies
> - * of the buffer in the log should not be replayed at recovery time.
> - * This is so that if the blocks covered by the buffer are reused for
> - * file data before we crash we don't end up replaying old, freed
> - * meta-data into a user's file.
> - *
> - * To handle the cancellation of buffer log items, we make two passes
> - * over the log during recovery. During the first we build a table of
> - * those buffers which have been cancelled, and during the second we
> - * only replay those buffers which do not have corresponding cancel
> - * records in the table. See xlog_recover_buffer_pass[1,2] above
> - * for more details on the implementation of the table of cancel records.
> - */
> -STATIC int
> -xlog_recover_buffer_pass2(
> - struct xlog *log,
> - struct list_head *buffer_list,
> - struct xlog_recover_item *item,
> - xfs_lsn_t current_lsn)
> -{
> - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
> - xfs_mount_t *mp = log->l_mp;
> - xfs_buf_t *bp;
> - int error;
> - uint buf_flags;
> - xfs_lsn_t lsn;
> -
> - /*
> - * In this pass we only want to recover all the buffers which have
> - * not been cancelled and are not cancellation buffers themselves.
> - */
> - if (buf_f->blf_flags & XFS_BLF_CANCEL) {
> - if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
> - buf_f->blf_len))
> - goto cancelled;
> - } else {
> -
> - if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
> - buf_f->blf_len))
> - goto cancelled;
> - }
> -
> - trace_xfs_log_recover_buf_recover(log, buf_f);
> -
> - buf_flags = 0;
> - if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> - buf_flags |= XBF_UNMAPPED;
> -
> - error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
> - buf_flags, &bp, NULL);
> - if (error)
> - return error;
> -
> - /*
> - * Recover the buffer only if we get an LSN from it and it's less than
> - * the lsn of the transaction we are replaying.
> - *
> - * Note that we have to be extremely careful of readahead here.
> - * Readahead does not attach verfiers to the buffers so if we don't
> - * actually do any replay after readahead because of the LSN we found
> - * in the buffer if more recent than that current transaction then we
> - * need to attach the verifier directly. Failure to do so can lead to
> - * future recovery actions (e.g. EFI and unlinked list recovery) can
> - * operate on the buffers and they won't get the verifier attached. This
> - * can lead to blocks on disk having the correct content but a stale
> - * CRC.
> - *
> - * It is safe to assume these clean buffers are currently up to date.
> - * If the buffer is dirtied by a later transaction being replayed, then
> - * the verifier will be reset to match whatever recover turns that
> - * buffer into.
> - */
> - lsn = xlog_recover_get_buf_lsn(mp, bp);
> - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> - trace_xfs_log_recover_buf_skip(log, buf_f);
> - xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
> - goto out_release;
> - }
> -
> - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
> - error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
> - if (error)
> - goto out_release;
> - } else if (buf_f->blf_flags &
> - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> - bool dirty;
> -
> - dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
> - if (!dirty)
> - goto out_release;
> - } else {
> - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
> - }
> -
> - /*
> - * Perform delayed write on the buffer. Asynchronous writes will be
> - * slower when taking into account all the buffers to be flushed.
> - *
> - * Also make sure that only inode buffers with good sizes stay in
> - * the buffer cache. The kernel moves inodes in buffers of 1 block
> - * or inode_cluster_size bytes, whichever is bigger. The inode
> - * buffers in the log can be a different size if the log was generated
> - * by an older kernel using unclustered inode buffers or a newer kernel
> - * running with a different inode cluster size. Regardless, if the
> - * the inode buffer size isn't max(blocksize, inode_cluster_size)
> - * for *our* value of inode_cluster_size, then we need to keep
> - * the buffer out of the buffer cache so that the buffer won't
> - * overlap with future reads of those inodes.
> - */
> - if (XFS_DINODE_MAGIC ==
> - be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
> - (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
> - xfs_buf_stale(bp);
> - error = xfs_bwrite(bp);
> - } else {
> - ASSERT(bp->b_mount == mp);
> - bp->b_iodone = xlog_recover_iodone;
> - xfs_buf_delwri_queue(bp, buffer_list);
> - }
> -
> -out_release:
> - xfs_buf_relse(bp);
> - return error;
> -cancelled:
> - trace_xfs_log_recover_buf_cancel(log, buf_f);
> - return 0;
> -}
> -
> /*
> * Inode fork owner changes
> *
> @@ -3887,10 +3102,11 @@ xlog_recover_commit_pass2(
> {
> trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
>
> + if (item->ri_type && item->ri_type->commit_pass2_fn)
> + return item->ri_type->commit_pass2_fn(log, buffer_list, item,
> + trans->r_lsn);
> +
> switch (ITEM_TYPE(item)) {
> - case XFS_LI_BUF:
> - return xlog_recover_buffer_pass2(log, buffer_list, item,
> - trans->r_lsn);
> case XFS_LI_INODE:
> return xlog_recover_inode_pass2(log, buffer_list, item,
> trans->r_lsn);
>
>
--
chandan
next prev parent reply other threads:[~2020-05-01 13:49 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-30 0:47 [PATCH v2 00/21] xfs: refactor log recovery Darrick J. Wong
2020-04-30 0:47 ` [PATCH 01/21] xfs: refactor log recovery item sorting into a generic dispatch structure Darrick J. Wong
2020-04-30 5:53 ` Christoph Hellwig
2020-04-30 15:08 ` Darrick J. Wong
2020-04-30 18:16 ` Darrick J. Wong
2020-05-01 8:08 ` Christoph Hellwig
2020-05-01 10:40 ` Chandan Rajendra
2020-04-30 0:47 ` [PATCH 02/21] xfs: refactor log recovery item dispatch for pass2 readhead functions Darrick J. Wong
2020-05-01 12:10 ` Chandan Rajendra
2020-04-30 0:47 ` [PATCH 03/21] xfs: refactor log recovery item dispatch for pass1 commit functions Darrick J. Wong
2020-04-30 0:48 ` [PATCH 04/21] xfs: refactor log recovery buffer item dispatch for pass2 " Darrick J. Wong
2020-05-01 13:43 ` Chandan Rajendra [this message]
2020-04-30 0:48 ` [PATCH 05/21] xfs: refactor log recovery inode " Darrick J. Wong
2020-05-01 14:03 ` Chandan Rajendra
2020-04-30 0:48 ` [PATCH 06/21] xfs: refactor log recovery dquot " Darrick J. Wong
2020-05-01 14:14 ` Chandan Rajendra
2020-04-30 0:48 ` [PATCH 07/21] xfs: refactor log recovery icreate " Darrick J. Wong
2020-05-01 14:18 ` Chandan Rajendra
2020-04-30 0:48 ` [PATCH 08/21] xfs: remove log recovery quotaoff " Darrick J. Wong
2020-05-01 15:09 ` Chandan Rajendra
2020-05-01 17:41 ` Darrick J. Wong
2020-04-30 0:48 ` [PATCH 09/21] xfs: refactor log recovery EFI " Darrick J. Wong
2020-05-01 10:28 ` Christoph Hellwig
2020-05-01 17:56 ` Darrick J. Wong
2020-04-30 0:48 ` [PATCH 10/21] xfs: refactor log recovery RUI " Darrick J. Wong
2020-04-30 0:48 ` [PATCH 11/21] xfs: refactor log recovery CUI " Darrick J. Wong
2020-04-30 0:48 ` [PATCH 12/21] xfs: refactor log recovery BUI " Darrick J. Wong
2020-04-30 0:48 ` [PATCH 13/21] xfs: refactor recovered EFI log item playback Darrick J. Wong
2020-05-01 10:19 ` Christoph Hellwig
2020-05-01 17:58 ` Darrick J. Wong
2020-04-30 0:49 ` [PATCH 14/21] xfs: refactor recovered RUI " Darrick J. Wong
2020-04-30 0:49 ` [PATCH 15/21] xfs: refactor recovered CUI " Darrick J. Wong
2020-04-30 0:49 ` [PATCH 16/21] xfs: refactor recovered BUI " Darrick J. Wong
2020-04-30 0:49 ` [PATCH 17/21] xfs: refactor releasing finished intents during log recovery Darrick J. Wong
2020-04-30 0:49 ` [PATCH 18/21] xfs: refactor adding recovered intent items to the log Darrick J. Wong
2020-04-30 0:49 ` [PATCH 19/21] xfs: refactor intent item RECOVERED flag into the log item Darrick J. Wong
2020-04-30 0:49 ` [PATCH 20/21] xfs: refactor intent item iop_recover calls Darrick J. Wong
2020-04-30 0:49 ` [PATCH 21/21] xfs: remove unnecessary includes from xfs_log_recover.c Darrick J. Wong
2020-05-01 10:15 ` [PATCH v2 00/21] xfs: refactor log recovery Christoph Hellwig
2020-05-01 16:53 ` Darrick J. Wong
2020-05-01 17:03 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5110385.3Rn1M7ztmW@localhost.localdomain \
--to=chandan@linux.ibm.com \
--cc=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).