From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 7/7] xfs: Inode create item recovery
Date: Tue, 7 May 2013 18:04:36 +1000 [thread overview]
Message-ID: <1367913876-30086-8-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1367913876-30086-1-git-send-email-david@fromorbit.com>
When we find a icreate transaction, we need to get and initialise
the buffers in the range that has been passed. Extract and verify
the information in the item record, then loop over the range
initialising and issuing the buffer writes delayed.
Support an arbitrary size range to initialise so that in
future when we allocate inodes in much larger chunks all kernels
that understand this transaction can still recover them.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_ialloc.c | 24 ++++++++----
fs/xfs/xfs_ialloc.h | 5 ++-
fs/xfs/xfs_log_recover.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 113 insertions(+), 10 deletions(-)
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9126b59..51473ee 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -151,12 +151,16 @@ xfs_check_agi_freecount(
#endif
/*
- * Initialise a new set of inodes.
+ * Initialise a new set of inodes. When called without a transaction context
+ * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
+ * than logging them (which in a transaction context puts them into the AIL
+ * for writeback rather than the xfsbufd queue).
*/
int
xfs_ialloc_inode_init(
struct xfs_mount *mp,
struct xfs_trans *tp,
+ struct list_head *buffer_list,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_agblock_t length,
@@ -218,8 +222,9 @@ xfs_ialloc_inode_init(
* Hence we only need to log the buffers as "ordered" buffers so
* they track in the AIL as if they were physically logged.
*/
- xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp),
- mp->m_sb.sb_inodesize, length, gen);
+ if (tp)
+ xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp),
+ mp->m_sb.sb_inodesize, length, gen);
} else if (xfs_sb_version_hasnlink(&mp->m_sb))
version = 2;
else
@@ -254,14 +259,14 @@ xfs_ialloc_inode_init(
ino++;
uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
xfs_dinode_calc_crc(mp, free);
- } else {
+ } else if (tp) {
/* just log the inode core */
xfs_trans_log_buf(tp, fbuf, ioffset,
ioffset + isize - 1);
}
}
- if (version == 3) {
+ if (version == 3 && tp) {
/*
* Mark the buffer as ordered so that it is not logged
* in the transaction but still tracked in the AIL as
@@ -279,7 +284,10 @@ xfs_ialloc_inode_init(
* past this transaction (i.e. by preventing relogging from
* moving it forward in the log).
*/
- xfs_trans_inode_alloc_buf(tp, fbuf);
+ if (tp)
+ xfs_trans_inode_alloc_buf(tp, fbuf);
+ else
+ xfs_buf_delwri_queue(fbuf, buffer_list);
}
return 0;
}
@@ -324,7 +332,7 @@ xfs_ialloc_ag_alloc(
* First try to allocate inodes contiguous with the last-allocated
* chunk of inodes. If the filesystem is striped, this will fill
* an entire stripe unit with inodes.
- */
+ */
agi = XFS_BUF_TO_AGI(agbp);
newino = be32_to_cpu(agi->agi_newino);
agno = be32_to_cpu(agi->agi_seqno);
@@ -423,7 +431,7 @@ xfs_ialloc_ag_alloc(
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
args.len, prandom_u32());
if (error)
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index ec1cc48..ce6980b 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -156,7 +156,8 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
* Inode buffer initialisation routine
*/
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_agblock_t length, unsigned int gen);
+ struct list_head *buffer_list,
+ xfs_agnumber_t agno, xfs_agblock_t agbno,
+ xfs_agblock_t length, unsigned int gen);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec..6d608ce 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -45,6 +45,7 @@
#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_icreate_item.h"
/* Need all the magic numbers and buffer ops structures from these headers */
#include "xfs_symlink.h"
@@ -1618,6 +1619,9 @@ xlog_recover_reorder_trans(
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
switch (ITEM_TYPE(item)) {
+ case XFS_LI_ICREATE:
+ list_move(&item->ri_list, &trans->r_itemq);
+ break;
case XFS_LI_BUF:
if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
trace_xfs_log_recover_item_reorder_head(log,
@@ -2886,6 +2890,93 @@ xlog_recover_efd_pass2(
}
/*
+ * This routine is called when an inode create format structure is found in a
+ * committed transaction in the log. It's purpose is to initialise the inodes
+ * being allocated on disk. This requires us to get inode cluster buffers that
+ * match the range to be intialised, stamped with inode templates and written
+ * by delayed write so that subsequent modifications will hit the cached buffer
+ * and only need writing out at the end of recovery.
+ */
+STATIC int
+xlog_recover_do_icreate_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ xlog_recover_item_t *item)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_icreate_log *icl;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ unsigned int count;
+ unsigned int isize;
+ xfs_agblock_t length;
+
+ icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
+ if (icl->icl_type != XFS_LI_ICREATE) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
+ return EINVAL;
+ }
+
+ if (icl->icl_size != 1) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
+ return EINVAL;
+ }
+
+ agno = be32_to_cpu(icl->icl_ag);
+ if (agno >= mp->m_sb.sb_agcount) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
+ return EINVAL;
+ }
+ agbno = be32_to_cpu(icl->icl_agbno);
+ if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
+ return EINVAL;
+ }
+ isize = be32_to_cpu(icl->icl_isize);
+ if (isize != mp->m_sb.sb_inodesize) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
+ return EINVAL;
+ }
+ count = be32_to_cpu(icl->icl_count);
+ if (!count) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
+ return EINVAL;
+ }
+ length = be32_to_cpu(icl->icl_length);
+ if (!length || length >= mp->m_sb.sb_agblocks) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
+ return EINVAL;
+ }
+
+ /* existing allocation is fixed value */
+ ASSERT(count == XFS_IALLOC_INODES(mp));
+ ASSERT(length == XFS_IALLOC_BLOCKS(mp));
+ if (count != XFS_IALLOC_INODES(mp) ||
+ length != XFS_IALLOC_BLOCKS(mp)) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
+ return EINVAL;
+ }
+
+ /*
+ * Inode buffers can be freed. Do not replay the inode initialisation as
+ * we could be overwriting something written after this inode buffer was
+ * cancelled.
+ *
+ * XXX: we need to iterate all buffers and only init those that are not
+ * cancelled. I think that a more fine grained factoring of
+ * xfs_ialloc_inode_init may be appropriate here to enable this to be
+ * done easily.
+ */
+ if (xlog_check_buffer_cancelled(log,
+ XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
+ return 0;
+
+ xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
+ be32_to_cpu(icl->icl_gen));
+ return 0;
+}
+
+/*
* Free up any resources allocated by the transaction
*
* Remember that EFIs, EFDs, and IUNLINKs are handled later.
@@ -2927,6 +3018,7 @@ xlog_recover_commit_pass1(
case XFS_LI_EFI:
case XFS_LI_EFD:
case XFS_LI_DQUOT:
+ case XFS_LI_ICREATE:
/* nothing to do in pass 1 */
return 0;
default:
@@ -2957,6 +3049,8 @@ xlog_recover_commit_pass2(
return xlog_recover_efd_pass2(log, item);
case XFS_LI_DQUOT:
return xlog_recover_dquot_pass2(log, buffer_list, item);
+ case XFS_LI_ICREATE:
+ return xlog_recover_do_icreate_pass2(log, buffer_list, item);
case XFS_LI_QUOTAOFF:
/* nothing to do in pass2 */
return 0;
--
1.7.10.4
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
prev parent reply other threads:[~2013-05-07 8:04 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-07 8:04 [RFC, PATCH 0/7] xfs: new icreate transaction Dave Chinner
2013-05-07 8:04 ` [PATCH 1/7] xfs: don't do IO when creating an new inode Dave Chinner
2013-05-07 8:04 ` [PATCH 2/7] xfs: xfs_ifree doesn't need to modify the inode buffer Dave Chinner
2013-05-07 8:04 ` [PATCH 3/7] xfs: Introduce an ordered buffer item Dave Chinner
2013-05-07 8:04 ` [PATCH 4/7] xfs: Inode create log items Dave Chinner
2013-05-07 8:04 ` [PATCH 5/7] xfs: Inode create transaction reservations Dave Chinner
2013-05-07 8:04 ` [PATCH 6/7] xfs: Use inode create transaction Dave Chinner
2013-05-07 8:04 ` Dave Chinner [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1367913876-30086-8-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox