From: "Darrick J. Wong" <djwong@kernel.org>
To: Allison Henderson <allison.henderson@oracle.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH v25 05/12] xfs: Implement attr logging and replay
Date: Tue, 23 Nov 2021 16:11:29 -0800 [thread overview]
Message-ID: <20211124001129.GA266024@magnolia> (raw)
In-Reply-To: <20211117041343.3050202-6-allison.henderson@oracle.com>
On Tue, Nov 16, 2021 at 09:13:36PM -0700, Allison Henderson wrote:
> This patch adds the needed routines to create, log and recover logged
> extended attribute intents.
>
> Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
> Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
> ---
> fs/xfs/libxfs/xfs_defer.c | 1 +
> fs/xfs/libxfs/xfs_defer.h | 1 +
> fs/xfs/libxfs/xfs_format.h | 9 +-
> fs/xfs/xfs_attr_item.c | 362 +++++++++++++++++++++++++++++++++++++
> 4 files changed, 372 insertions(+), 1 deletion(-)
>
> diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
> index 51574f0371b5..877c0f7ee557 100644
> --- a/fs/xfs/libxfs/xfs_defer.c
> +++ b/fs/xfs/libxfs/xfs_defer.c
> @@ -185,6 +185,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
> [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
> [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
> [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
> + [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
> };
>
> static bool
> diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
> index fcd23e5cf1ee..114a3a4930a3 100644
> --- a/fs/xfs/libxfs/xfs_defer.h
> +++ b/fs/xfs/libxfs/xfs_defer.h
> @@ -19,6 +19,7 @@ enum xfs_defer_ops_type {
> XFS_DEFER_OPS_TYPE_RMAP,
> XFS_DEFER_OPS_TYPE_FREE,
> XFS_DEFER_OPS_TYPE_AGFL_FREE,
> + XFS_DEFER_OPS_TYPE_ATTR,
> XFS_DEFER_OPS_TYPE_MAX,
> };
>
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index d665c04e69dd..302b50bc5830 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -388,7 +388,9 @@ xfs_sb_has_incompat_feature(
> return (sbp->sb_features_incompat & feature) != 0;
> }
>
> -#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
> +#define XFS_SB_FEAT_INCOMPAT_LOG_XATTRS (1 << 0) /* Delayed Attributes */
> +#define XFS_SB_FEAT_INCOMPAT_LOG_ALL \
> + (XFS_SB_FEAT_INCOMPAT_LOG_XATTRS)
> #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
> static inline bool
> xfs_sb_has_incompat_log_feature(
> @@ -413,6 +415,11 @@ xfs_sb_add_incompat_log_features(
> sbp->sb_features_log_incompat |= features;
> }
>
> +static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp)
> +{
> + return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat &
> + XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
> +}
>
> static inline bool
> xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
> diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
> index 3c0dfb32f2eb..5d0ab9a8504e 100644
> --- a/fs/xfs/xfs_attr_item.c
> +++ b/fs/xfs/xfs_attr_item.c
> @@ -13,6 +13,7 @@
> #include "xfs_defer.h"
> #include "xfs_log_format.h"
> #include "xfs_trans.h"
> +#include "xfs_bmap_btree.h"
> #include "xfs_trans_priv.h"
> #include "xfs_log.h"
> #include "xfs_inode.h"
> @@ -30,6 +31,8 @@
>
> static const struct xfs_item_ops xfs_attri_item_ops;
> static const struct xfs_item_ops xfs_attrd_item_ops;
> +static struct xfs_attrd_log_item *xfs_trans_get_attrd(struct xfs_trans *tp,
> + struct xfs_attri_log_item *attrip);
>
> static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip)
> {
> @@ -254,6 +257,163 @@ xfs_attrd_item_release(
> xfs_attrd_item_free(attrdp);
> }
>
> +/*
> + * Performs one step of an attribute update intent and marks the attrd item
> + * dirty.. An attr operation may be a set or a remove. Note that the
> + * transaction is marked dirty regardless of whether the operation succeeds or
> + * fails to support the ATTRI/ATTRD lifecycle rules.
> + */
> +STATIC int
> +xfs_trans_attr_finish_update(
You could shorten the name of this to xfs_xattri_finish_update.
> + struct xfs_delattr_context *dac,
> + struct xfs_attrd_log_item *attrdp,
> + struct xfs_buf **leaf_bp,
> + uint32_t op_flags)
> +{
> + struct xfs_da_args *args = dac->da_args;
> + unsigned int op = op_flags &
> + XFS_ATTR_OP_FLAGS_TYPE_MASK;
> + int error;
> +
> + switch (op) {
> + case XFS_ATTR_OP_FLAGS_SET:
> + error = xfs_attr_set_iter(dac, leaf_bp);
> + break;
> + case XFS_ATTR_OP_FLAGS_REMOVE:
> + ASSERT(XFS_IFORK_Q(args->dp));
> + error = xfs_attr_remove_iter(dac);
> + break;
> + default:
> + error = -EFSCORRUPTED;
> + break;
> + }
> +
> + /*
> + * Mark the transaction dirty, even on error. This ensures the
> + * transaction is aborted, which:
> + *
> + * 1.) releases the ATTRI and frees the ATTRD
> + * 2.) shuts down the filesystem
> + */
> + args->trans->t_flags |= XFS_TRANS_DIRTY;
> +
> + /*
> + * attr intent/done items are null when delayed attributes are disabled
logged attributes?
> + */
> + if (attrdp)
> + set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
> +
> + return error;
> +}
> +
> +/* Log an attr to the intent item. */
> +STATIC void
> +xfs_attr_log_item(
> + struct xfs_trans *tp,
> + struct xfs_attri_log_item *attrip,
> + struct xfs_attr_item *attr)
> +{
> + struct xfs_attri_log_format *attrp;
> +
> + tp->t_flags |= XFS_TRANS_DIRTY;
> + set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
> +
> + /*
> + * At this point the xfs_attr_item has been constructed, and we've
> + * created the log intent. Fill in the attri log item and log format
> + * structure with fields from this xfs_attr_item
> + */
> + attrp = &attrip->attri_format;
> + attrp->alfi_ino = attr->xattri_dac.da_args->dp->i_ino;
> + attrp->alfi_op_flags = attr->xattri_op_flags;
> + attrp->alfi_value_len = attr->xattri_dac.da_args->valuelen;
> + attrp->alfi_name_len = attr->xattri_dac.da_args->namelen;
> + attrp->alfi_attr_flags = attr->xattri_dac.da_args->attr_filter;
> +
> + attrip->attri_name = (void *)attr->xattri_dac.da_args->name;
> + attrip->attri_value = attr->xattri_dac.da_args->value;
> + attrip->attri_name_len = attr->xattri_dac.da_args->namelen;
> + attrip->attri_value_len = attr->xattri_dac.da_args->valuelen;
> +}
> +
> +/* Get an ATTRI. */
> +static struct xfs_log_item *
> +xfs_attr_create_intent(
> + struct xfs_trans *tp,
> + struct list_head *items,
> + unsigned int count,
> + bool sort)
> +{
> + struct xfs_mount *mp = tp->t_mountp;
> + struct xfs_attri_log_item *attrip;
> + struct xfs_attr_item *attr;
> +
> + ASSERT(count == 1);
> +
> + if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
> + return NULL;
> +
> + attrip = xfs_attri_init(mp, 0);
> + if (attrip == NULL)
> + return NULL;
Hm. To answer my question from before -- if we can't allocate memory
for the xfs_attri_log_item, we silently fall back to not logging xattr
updates. That's not good, especially since the fs is configured for
LARP mode. At least for this callsite, the allocation should be
GFP_NOFAIL to avoid this.
(For recovery, which AFAICT is the only caller to make large
allocations, I think it's ok to omit GFP_NOFAIL since that shouldn't
happen, and having to rerun log recovery isn't the end of the world...)
> +
> + xfs_trans_add_item(tp, &attrip->attri_item);
> + list_for_each_entry(attr, items, xattri_list)
> + xfs_attr_log_item(tp, attrip, attr);
> + return &attrip->attri_item;
> +}
> +
> +/* Process an attr. */
> +STATIC int
> +xfs_attr_finish_item(
> + struct xfs_trans *tp,
> + struct xfs_log_item *done,
> + struct list_head *item,
> + struct xfs_btree_cur **state)
> +{
> + struct xfs_attr_item *attr;
> + struct xfs_attrd_log_item *done_item = NULL;
> + int error;
> + struct xfs_delattr_context *dac;
> +
> + attr = container_of(item, struct xfs_attr_item, xattri_list);
> + dac = &attr->xattri_dac;
> + if (done)
> + done_item = ATTRD_ITEM(done);
> +
> + /*
> + * Always reset trans after EAGAIN cycle
> + * since the transaction is new
> + */
> + dac->da_args->trans = tp;
> +
> + error = xfs_trans_attr_finish_update(dac, done_item, &dac->leaf_bp,
> + attr->xattri_op_flags);
> + if (error != -EAGAIN)
> + kmem_free(attr);
> +
> + return error;
> +}
> +
> +/* Abort all pending ATTRs. */
> +STATIC void
> +xfs_attr_abort_intent(
> + struct xfs_log_item *intent)
> +{
> + xfs_attri_release(ATTRI_ITEM(intent));
> +}
> +
> +/* Cancel an attr */
> +STATIC void
> +xfs_attr_cancel_item(
> + struct list_head *item)
> +{
> + struct xfs_attr_item *attr;
> +
> + attr = container_of(item, struct xfs_attr_item, xattri_list);
> + kmem_free(attr);
> +}
> +
> STATIC xfs_lsn_t
> xfs_attri_item_committed(
> struct xfs_log_item *lip,
> @@ -311,6 +471,160 @@ xfs_attri_validate(
> return xfs_verify_ino(mp, attrp->alfi_ino);
> }
>
> +/*
> + * Process an attr intent item that was recovered from the log. We need to
> + * delete the attr that it describes.
> + */
> +STATIC int
> +xfs_attri_item_recover(
> + struct xfs_log_item *lip,
> + struct list_head *capture_list)
> +{
> + struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
> + struct xfs_attr_item *attr;
> + struct xfs_mount *mp = lip->li_mountp;
> + struct xfs_inode *ip;
> + struct xfs_da_args *args;
> + struct xfs_trans *tp;
> + struct xfs_trans_res tres;
> + struct xfs_attri_log_format *attrp;
> + int error, ret = 0;
> + int total;
> + int local;
> + struct xfs_attrd_log_item *done_item = NULL;
> +
> + /*
> + * First check the validity of the attr described by the ATTRI. If any
> + * are bad, then assume that all are bad and just toss the ATTRI.
> + */
> + attrp = &attrip->attri_format;
> + if (!xfs_attri_validate(mp, attrp))
> + return -EFSCORRUPTED;
> +
> + error = xlog_recover_iget(mp, attrp->alfi_ino, &ip);
> + if (error)
> + return error;
> +
> + attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
> + sizeof(struct xfs_da_args), KM_NOFS);
> + args = (struct xfs_da_args *)(attr + 1);
> +
> + attr->xattri_dac.da_args = args;
> + attr->xattri_op_flags = attrp->alfi_op_flags;
> +
> + args->dp = ip;
> + args->geo = mp->m_attr_geo;
> + args->op_flags = attrp->alfi_op_flags;
> + args->whichfork = XFS_ATTR_FORK;
> + args->name = attrip->attri_name;
> + args->namelen = attrp->alfi_name_len;
> + args->hashval = xfs_da_hashname(args->name, args->namelen);
> + args->attr_filter = attrp->alfi_attr_flags;
> +
> + if (attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET) {
> + args->value = attrip->attri_value;
> + args->valuelen = attrp->alfi_value_len;
> + args->total = xfs_attr_calc_size(args, &local);
> +
> + tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
> + M_RES(mp)->tr_attrsetrt.tr_logres *
> + args->total;
> + tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
> + tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
> + total = args->total;
> + } else {
> + tres = M_RES(mp)->tr_attrrm;
> + total = XFS_ATTRRM_SPACE_RES(mp);
> + }
> + error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp);
> + if (error)
> + goto out;
> +
> + args->trans = tp;
> + done_item = xfs_trans_get_attrd(tp, attrip);
> +
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + xfs_trans_ijoin(tp, ip, 0);
> +
> + ret = xfs_trans_attr_finish_update(&attr->xattri_dac, done_item,
> + &attr->xattri_dac.leaf_bp,
> + attrp->alfi_op_flags);
> + if (ret == -EAGAIN) {
> + /* There's more work to do, so add it to this transaction */
> + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list);
Set attr = NULL here so that we don't free it at the end of the
function. Just in case some day one of these other functions also
starts returning -EAGAIN.
> + } else
> + error = ret;
> +
> + if (error) {
> + xfs_trans_cancel(tp);
> + goto out_unlock;
> + }
> +
> + error = xfs_defer_ops_capture_and_commit(tp, capture_list);
> +
> +out_unlock:
> + if (attr->xattri_dac.leaf_bp)
> + xfs_buf_relse(attr->xattri_dac.leaf_bp);
> +
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + xfs_irele(ip);
> +out:
> + if (ret != -EAGAIN)
> + kmem_free(attr);
> + return error;
> +}
> +
> +/* Re-log an intent item to push the log tail forward. */
> +static struct xfs_log_item *
> +xfs_attri_item_relog(
> + struct xfs_log_item *intent,
> + struct xfs_trans *tp)
> +{
> + struct xfs_attrd_log_item *attrdp;
> + struct xfs_attri_log_item *old_attrip;
> + struct xfs_attri_log_item *new_attrip;
> + struct xfs_attri_log_format *new_attrp;
> + struct xfs_attri_log_format *old_attrp;
> + int buffer_size;
> +
> + old_attrip = ATTRI_ITEM(intent);
> + old_attrp = &old_attrip->attri_format;
> + buffer_size = old_attrp->alfi_value_len + old_attrp->alfi_name_len;
> +
> + tp->t_flags |= XFS_TRANS_DIRTY;
> + attrdp = xfs_trans_get_attrd(tp, old_attrip);
> + set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
> +
> + new_attrip = xfs_attri_init(tp->t_mountp, buffer_size);
> + new_attrp = &new_attrip->attri_format;
> +
> + new_attrp->alfi_ino = old_attrp->alfi_ino;
> + new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
> + new_attrp->alfi_value_len = old_attrp->alfi_value_len;
> + new_attrp->alfi_name_len = old_attrp->alfi_name_len;
> + new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
> +
> + new_attrip->attri_name_len = old_attrip->attri_name_len;
> + new_attrip->attri_name = ((char *)new_attrip) +
> + sizeof(struct xfs_attri_log_item);
> + memcpy(new_attrip->attri_name, old_attrip->attri_name,
> + new_attrip->attri_name_len);
> +
> + new_attrip->attri_value_len = old_attrip->attri_value_len;
> + if (new_attrip->attri_value_len > 0) {
> + new_attrip->attri_value = new_attrip->attri_name +
> + new_attrip->attri_name_len;
> +
> + memcpy(new_attrip->attri_value, old_attrip->attri_value,
> + new_attrip->attri_value_len);
> + }
> +
> + xfs_trans_add_item(tp, &new_attrip->attri_item);
> + set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
> +
> + return &new_attrip->attri_item;
> +}
> +
> STATIC int
> xlog_recover_attri_commit_pass2(
> struct xlog *log,
> @@ -377,6 +691,52 @@ xlog_recover_attri_commit_pass2(
> return 0;
> }
>
> +/*
> + * This routine is called to allocate an "attr free done" log item.
> + */
> +static struct xfs_attrd_log_item *
> +xfs_trans_get_attrd(struct xfs_trans *tp,
> + struct xfs_attri_log_item *attrip)
> +{
> + struct xfs_attrd_log_item *attrdp;
> + uint size;
> +
> + ASSERT(tp != NULL);
> +
> + size = sizeof(struct xfs_attrd_log_item);
> + attrdp = kmem_zalloc(size, 0);
The xattrd items really should get their own slab cache.
> +
> + xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
> + &xfs_attrd_item_ops);
> + attrdp->attrd_attrip = attrip;
> + attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
> +
> + xfs_trans_add_item(tp, &attrdp->attrd_item);
> + return attrdp;
> +}
> +
> +/* Get an ATTRD so we can process all the attrs. */
> +static struct xfs_log_item *
> +xfs_attr_create_done(
> + struct xfs_trans *tp,
> + struct xfs_log_item *intent,
> + unsigned int count)
> +{
> + if (!intent)
> + return NULL;
> +
> + return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
> +}
xfs_trans_get_attrd could be combined into this function.
This series is coming along nicely! I think with these last few
things fixed, it's ready to go.
--D
> +
> +const struct xfs_defer_op_type xfs_attr_defer_type = {
> + .max_items = 1,
> + .create_intent = xfs_attr_create_intent,
> + .abort_intent = xfs_attr_abort_intent,
> + .create_done = xfs_attr_create_done,
> + .finish_item = xfs_attr_finish_item,
> + .cancel_item = xfs_attr_cancel_item,
> +};
> +
> /*
> * This routine is called when an ATTRD format structure is found in a committed
> * transaction in the log. Its purpose is to cancel the corresponding ATTRI if
> @@ -410,7 +770,9 @@ static const struct xfs_item_ops xfs_attri_item_ops = {
> .iop_unpin = xfs_attri_item_unpin,
> .iop_committed = xfs_attri_item_committed,
> .iop_release = xfs_attri_item_release,
> + .iop_recover = xfs_attri_item_recover,
> .iop_match = xfs_attri_item_match,
> + .iop_relog = xfs_attri_item_relog,
> };
>
> const struct xlog_recover_item_ops xlog_attri_item_ops = {
> --
> 2.25.1
>
next prev parent reply other threads:[~2021-11-24 0:11 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-17 4:13 [PATCH v25 00/12] Log Attribute Replay Allison Henderson
2021-11-17 4:13 ` [PATCH v25 01/12] xfs: Fix double unlock in defer capture code Allison Henderson
2021-11-17 4:13 ` [PATCH v25 02/12] xfs: don't commit the first deferred transaction without intents Allison Henderson
2021-11-23 23:28 ` Darrick J. Wong
2021-12-01 7:34 ` Allison Henderson
2021-11-17 4:13 ` [PATCH v25 03/12] xfs: Return from xfs_attr_set_iter if there are no more rmtblks to process Allison Henderson
2021-11-17 4:13 ` [PATCH v25 04/12] xfs: Set up infrastructure for log attribute replay Allison Henderson
2021-11-23 23:50 ` Darrick J. Wong
2021-12-01 7:35 ` Allison Henderson
2021-11-17 4:13 ` [PATCH v25 05/12] xfs: Implement attr logging and replay Allison Henderson
2021-11-24 0:11 ` Darrick J. Wong [this message]
2021-12-01 7:34 ` Allison Henderson
2021-12-01 17:52 ` Darrick J. Wong
2021-11-17 4:13 ` [PATCH v25 06/12] xfs: Skip flip flags for delayed attrs Allison Henderson
2021-11-17 4:13 ` [PATCH v25 07/12] xfs: Add xfs_attr_set_deferred and xfs_attr_remove_deferred Allison Henderson
2021-11-17 4:13 ` [PATCH v25 08/12] xfs: Remove unused xfs_attr_*_args Allison Henderson
2021-11-17 4:13 ` [PATCH v25 09/12] xfs: Add log attribute error tag Allison Henderson
2021-11-17 4:13 ` [PATCH v25 10/12] xfs: Add larp debug option Allison Henderson
2021-11-23 23:29 ` Darrick J. Wong
2021-12-01 7:34 ` Allison Henderson
2021-11-17 4:13 ` [PATCH v25 11/12] xfs: Merge xfs_delattr_context into xfs_attr_item Allison Henderson
2021-11-17 4:13 ` [PATCH v25 12/12] xfs: Add helper function xfs_attr_leaf_addname Allison Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211124001129.GA266024@magnolia \
--to=djwong@kernel.org \
--cc=allison.henderson@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).