* [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
@ 2025-09-02 18:30 Haihua Yang
2025-09-02 21:09 ` Trond Myklebust
0 siblings, 1 reply; 5+ messages in thread
From: Haihua Yang @ 2025-09-02 18:30 UTC (permalink / raw)
To: linux-nfs, Trond Myklebust; +Cc: Haihua Yang
1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
2, update layout stateid when layoutcommit receiving NFS4ERR_OLD_STATEID
---
fs/nfs/callback_proc.c | 2 +-
fs/nfs/nfs4proc.c | 10 +++++++++-
fs/nfs/pnfs.c | 28 +++++++++++++++++-----------
fs/nfs/pnfs.h | 2 +-
4 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 8397c43358bd..1e6e4a7a3f15 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range,
- be32_to_cpu(args->cbl_stateid.seqid))) {
+ be32_to_cpu(args->cbl_stateid.seqid), true)) {
case 0:
case -EBUSY:
/* There are layout segments that need to be returned */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..46a1bc1f31ee 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
struct nfs_server *server = NFS_SERVER(data->args.inode);
+ struct pnfs_layout_range dst_range;
if (!nfs41_sequence_done(task, &data->res.seq_res))
return;
@@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
break;
case 0:
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (data->inode) {
+ nfs4_layout_refresh_old_stateid(&data->args.stateid,
+ &dst_range,
+ data->inode);
+ }
+ fallthrough;
default:
if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
data->args.lastbytewritten,
data->args.inode->i_ino);
+ data->inode = nfs_igrab_and_active(data->args.inode);
if (!sync) {
- data->inode = nfs_igrab_and_active(data->args.inode);
if (data->inode == NULL) {
nfs4_layoutcommit_release(data);
return -EAGAIN;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3135b5af7ee..aaa3719b1957 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
goto out;
}
/* Try to update the seqid to the most recent */
- err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
+ err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, true);
if (err != -EBUSY) {
dst->seqid = lo->plh_stateid.seqid;
*dst_range = range;
@@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
.length = NFS4_MAX_UINT64,
};
- return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
+ return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq, false);
}
static int
@@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
spin_lock(&inode->i_lock);
pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
- pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
@@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino)
}
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list);
- pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0, false);
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
@@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
.iomode = IOMODE_ANY,
.length = NFS4_MAX_UINT64,
};
- pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0, false);
goto out_forget;
} else {
/* We have a completely new layout */
@@ -2628,7 +2628,7 @@ int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
const struct pnfs_layout_range *return_range,
- u32 seq)
+ u32 seq, bool committing)
{
struct pnfs_layout_segment *lseg, *next;
struct nfs_server *server = NFS_SERVER(lo->plh_inode);
@@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
}
if (remaining) {
- pnfs_set_plh_return_info(lo, return_range->iomode, seq);
- return -EBUSY;
+ if (!committing) {
+ pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+ return -EBUSY;
+ } else {
+ return 0;
+ }
}
if (!list_empty(&lo->plh_return_segs)) {
- pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+ if (!committing) {
+ pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+ }
return 0;
}
@@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode *inode,
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works.
*/
- if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0, false) != -EBUSY) {
const struct cred *cred;
nfs4_stateid stateid;
enum pnfs_iomode iomode;
@@ -2804,7 +2810,7 @@ static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
pnfs_get_layout_hdr(lo);
pnfs_set_plh_return_info(lo, range->iomode, 0);
if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- range, 0) != 0 ||
+ range, 0, false) != 0 ||
!pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
spin_unlock(&inode->i_lock);
rcu_read_unlock();
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 91ff877185c8..33a7a09477b2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
const struct pnfs_layout_range *recall_range,
- u32 seq);
+ u32 seq, bool committing);
int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
struct list_head *lseg_list);
bool pnfs_roc(struct inode *ino,
--
2.51.0.87.g1fa68948c3.dirty
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
2025-09-02 18:30 [PATCH] draft patches to fixes LAYOUTCOMMIT related issues Haihua Yang
@ 2025-09-02 21:09 ` Trond Myklebust
2025-09-08 4:52 ` Haihua Yang
2025-09-08 20:33 ` Haihua Yang
0 siblings, 2 replies; 5+ messages in thread
From: Trond Myklebust @ 2025-09-02 21:09 UTC (permalink / raw)
To: Haihua Yang, linux-nfs
On Tue, 2025-09-02 at 18:30 +0000, Haihua Yang wrote:
> [You don't often get email from yanghh@gmail.com. Learn why this is
> important at https://aka.ms/LearnAboutSenderIdentification ]
>
> 1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
> 2, update layout stateid when layoutcommit receiving
> NFS4ERR_OLD_STATEID
Please read Documentation/process/submitting-patches.rst
Specifically, please read the section "Describe your changes"
carefully.
> ---
> fs/nfs/callback_proc.c | 2 +-
> fs/nfs/nfs4proc.c | 10 +++++++++-
> fs/nfs/pnfs.c | 28 +++++++++++++++++-----------
> fs/nfs/pnfs.h | 2 +-
> 4 files changed, 28 insertions(+), 14 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 8397c43358bd..1e6e4a7a3f15 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct
> nfs_client *clp,
> pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
> switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
> &args->cbl_range,
> - be32_to_cpu(args-
> >cbl_stateid.seqid))) {
> + be32_to_cpu(args->cbl_stateid.seqid),
> true)) {
> case 0:
> case -EBUSY:
> /* There are layout segments that need to be returned
> */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 7d2b67e06cc3..46a1bc1f31ee 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task,
> void *calldata)
> {
> struct nfs4_layoutcommit_data *data = calldata;
> struct nfs_server *server = NFS_SERVER(data->args.inode);
> + struct pnfs_layout_range dst_range;
>
> if (!nfs41_sequence_done(task, &data->res.seq_res))
> return;
> @@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task
> *task, void *calldata)
> break;
> case 0:
> break;
> + case -NFS4ERR_OLD_STATEID:
> + if (data->inode) {
> + nfs4_layout_refresh_old_stateid(&data-
> >args.stateid,
> + &dst_range,
> + data->inode);
> + }
> + fallthrough;
> default:
> if (nfs4_async_handle_error(task, server, NULL, NULL)
> == -EAGAIN) {
> rpc_restart_call_prepare(task);
> @@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct
> nfs4_layoutcommit_data *data, bool sync)
> data->args.lastbytewritten,
> data->args.inode->i_ino);
>
> + data->inode = nfs_igrab_and_active(data->args.inode);
> if (!sync) {
> - data->inode = nfs_igrab_and_active(data->args.inode);
> if (data->inode == NULL) {
> nfs4_layoutcommit_release(data);
> return -EAGAIN;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index a3135b5af7ee..aaa3719b1957 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid
> *dst,
> goto out;
> }
> /* Try to update the seqid to the most recent */
> - err = pnfs_mark_matching_lsegs_return(lo, &head,
> &range, 0);
> + err = pnfs_mark_matching_lsegs_return(lo, &head,
> &range, 0, true);
> if (err != -EBUSY) {
> dst->seqid = lo->plh_stateid.seqid;
> *dst_range = range;
> @@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct
> pnfs_layout_hdr *lo,
> .length = NFS4_MAX_UINT64,
> };
>
> - return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> seq);
> + return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> seq, false);
> }
>
> static int
> @@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr
> *lo, u32 iomode)
>
> spin_lock(&inode->i_lock);
> pnfs_layout_set_fail_bit(lo,
> pnfs_iomode_to_fail_bit(iomode));
> - pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
> + pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false);
> spin_unlock(&inode->i_lock);
> pnfs_free_lseg_list(&head);
> dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
> @@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino)
> }
> valid_layout = pnfs_layout_is_valid(lo);
> pnfs_clear_layoutcommit(ino, &tmp_list);
> - pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
> + pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0,
> false);
>
> if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
> NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
> &range);
> @@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
> .iomode = IOMODE_ANY,
> .length = NFS4_MAX_UINT64,
> };
> - pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> 0);
> + pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> 0, false);
> goto out_forget;
> } else {
> /* We have a completely new layout */
> @@ -2628,7 +2628,7 @@ int
> pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
> struct list_head *tmp_list,
> const struct pnfs_layout_range
> *return_range,
> - u32 seq)
> + u32 seq, bool committing)
> {
> struct pnfs_layout_segment *lseg, *next;
> struct nfs_server *server = NFS_SERVER(lo->plh_inode);
> @@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct
> pnfs_layout_hdr *lo,
> }
>
> if (remaining) {
> - pnfs_set_plh_return_info(lo, return_range->iomode,
> seq);
> - return -EBUSY;
> + if (!committing) {
> + pnfs_set_plh_return_info(lo, return_range-
> >iomode, seq);
> + return -EBUSY;
> + } else {
> + return 0;
> + }
NACK.
This change would mean that we can have layout segments on the list lo-
>plh_return_segs without even the NFS_LAYOUT_RETURN_REQUESTED flag
being set.
It also badly breaks pnfs_layout_need_return().
I see no reason why we should need to add this 'committing' flag
argument to pnfs_mark_matching_lsegs_return(). I suggest rather
modifying pnfs_prepare_layoutreturn() to check for
pnfs_layoutcommit_outstanding().
> }
>
> if (!list_empty(&lo->plh_return_segs)) {
> - pnfs_set_plh_return_info(lo, return_range->iomode,
> seq);
> + if (!committing) {
> + pnfs_set_plh_return_info(lo, return_range-
> >iomode, seq);
> + }
> return 0;
> }
>
> @@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode
> *inode,
> * segments at hand when sending layoutreturn. See
> pnfs_put_lseg()
> * for how it works.
> */
> - if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> range, 0) != -EBUSY) {
> + if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> range, 0, false) != -EBUSY) {
> const struct cred *cred;
> nfs4_stateid stateid;
> enum pnfs_iomode iomode;
> @@ -2804,7 +2810,7 @@ static int
> pnfs_layout_return_unused_byserver(struct nfs_server *server,
> pnfs_get_layout_hdr(lo);
> pnfs_set_plh_return_info(lo, range->iomode, 0);
> if (pnfs_mark_matching_lsegs_return(lo, &lo-
> >plh_return_segs,
> - range, 0) != 0 ||
> + range, 0, false)
> != 0 ||
> !pnfs_prepare_layoutreturn(lo, &stateid, &cred,
> &iomode)) {
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 91ff877185c8..33a7a09477b2 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct
> pnfs_layout_hdr *lo,
> int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
> struct list_head *tmp_list,
> const struct pnfs_layout_range
> *recall_range,
> - u32 seq);
> + u32 seq, bool committing);
> int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
> struct list_head *lseg_list);
> bool pnfs_roc(struct inode *ino,
> --
> 2.51.0.87.g1fa68948c3.dirty
>
A.
--
Trond Myklebust Linux NFS client maintainer, Hammerspace
trondmy@kernel.org, trond.myklebust@hammerspace.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
2025-09-02 21:09 ` Trond Myklebust
@ 2025-09-08 4:52 ` Haihua Yang
2025-09-08 20:33 ` Haihua Yang
1 sibling, 0 replies; 5+ messages in thread
From: Haihua Yang @ 2025-09-08 4:52 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs
Hi Trond,
Thanks for your comments. In this change, I also tried to make
LAYOUTCOMMIT call nfs4_layout_refresh_old_stateid to update the
stateid when receiving NFS4ERR_OLD_STATEID. However, the update is
skipped because pnfs_mark_matching_lsegs_return returns -EBUSY. I’m
wondering why the stateid update is skipped in this case, and what the
impact would be if we ignored the -EBUSY error and proceeded with the
update.
Thanks,
On Tue, Sep 2, 2025 at 2:09 PM Trond Myklebust <trondmy@kernel.org> wrote:
>
> On Tue, 2025-09-02 at 18:30 +0000, Haihua Yang wrote:
> > [You don't often get email from yanghh@gmail.com. Learn why this is
> > important at https://aka.ms/LearnAboutSenderIdentification ]
> >
> > 1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
> > 2, update layout stateid when layoutcommit receiving
> > NFS4ERR_OLD_STATEID
>
> Please read Documentation/process/submitting-patches.rst
> Specifically, please read the section "Describe your changes"
> carefully.
>
> > ---
> > fs/nfs/callback_proc.c | 2 +-
> > fs/nfs/nfs4proc.c | 10 +++++++++-
> > fs/nfs/pnfs.c | 28 +++++++++++++++++-----------
> > fs/nfs/pnfs.h | 2 +-
> > 4 files changed, 28 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> > index 8397c43358bd..1e6e4a7a3f15 100644
> > --- a/fs/nfs/callback_proc.c
> > +++ b/fs/nfs/callback_proc.c
> > @@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct
> > nfs_client *clp,
> > pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
> > switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
> > &args->cbl_range,
> > - be32_to_cpu(args-
> > >cbl_stateid.seqid))) {
> > + be32_to_cpu(args->cbl_stateid.seqid),
> > true)) {
> > case 0:
> > case -EBUSY:
> > /* There are layout segments that need to be returned
> > */
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 7d2b67e06cc3..46a1bc1f31ee 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task,
> > void *calldata)
> > {
> > struct nfs4_layoutcommit_data *data = calldata;
> > struct nfs_server *server = NFS_SERVER(data->args.inode);
> > + struct pnfs_layout_range dst_range;
> >
> > if (!nfs41_sequence_done(task, &data->res.seq_res))
> > return;
> > @@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task
> > *task, void *calldata)
> > break;
> > case 0:
> > break;
> > + case -NFS4ERR_OLD_STATEID:
> > + if (data->inode) {
> > + nfs4_layout_refresh_old_stateid(&data-
> > >args.stateid,
> > + &dst_range,
> > + data->inode);
> > + }
> > + fallthrough;
> > default:
> > if (nfs4_async_handle_error(task, server, NULL, NULL)
> > == -EAGAIN) {
> > rpc_restart_call_prepare(task);
> > @@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct
> > nfs4_layoutcommit_data *data, bool sync)
> > data->args.lastbytewritten,
> > data->args.inode->i_ino);
> >
> > + data->inode = nfs_igrab_and_active(data->args.inode);
> > if (!sync) {
> > - data->inode = nfs_igrab_and_active(data->args.inode);
> > if (data->inode == NULL) {
> > nfs4_layoutcommit_release(data);
> > return -EAGAIN;
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index a3135b5af7ee..aaa3719b1957 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid
> > *dst,
> > goto out;
> > }
> > /* Try to update the seqid to the most recent */
> > - err = pnfs_mark_matching_lsegs_return(lo, &head,
> > &range, 0);
> > + err = pnfs_mark_matching_lsegs_return(lo, &head,
> > &range, 0, true);
> > if (err != -EBUSY) {
> > dst->seqid = lo->plh_stateid.seqid;
> > *dst_range = range;
> > @@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct
> > pnfs_layout_hdr *lo,
> > .length = NFS4_MAX_UINT64,
> > };
> >
> > - return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> > seq);
> > + return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> > seq, false);
> > }
> >
> > static int
> > @@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr
> > *lo, u32 iomode)
> >
> > spin_lock(&inode->i_lock);
> > pnfs_layout_set_fail_bit(lo,
> > pnfs_iomode_to_fail_bit(iomode));
> > - pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
> > + pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false);
> > spin_unlock(&inode->i_lock);
> > pnfs_free_lseg_list(&head);
> > dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
> > @@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino)
> > }
> > valid_layout = pnfs_layout_is_valid(lo);
> > pnfs_clear_layoutcommit(ino, &tmp_list);
> > - pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
> > + pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0,
> > false);
> >
> > if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
> > NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
> > &range);
> > @@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
> > .iomode = IOMODE_ANY,
> > .length = NFS4_MAX_UINT64,
> > };
> > - pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> > 0);
> > + pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> > 0, false);
> > goto out_forget;
> > } else {
> > /* We have a completely new layout */
> > @@ -2628,7 +2628,7 @@ int
> > pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
> > struct list_head *tmp_list,
> > const struct pnfs_layout_range
> > *return_range,
> > - u32 seq)
> > + u32 seq, bool committing)
> > {
> > struct pnfs_layout_segment *lseg, *next;
> > struct nfs_server *server = NFS_SERVER(lo->plh_inode);
> > @@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct
> > pnfs_layout_hdr *lo,
> > }
> >
> > if (remaining) {
> > - pnfs_set_plh_return_info(lo, return_range->iomode,
> > seq);
> > - return -EBUSY;
> > + if (!committing) {
> > + pnfs_set_plh_return_info(lo, return_range-
> > >iomode, seq);
> > + return -EBUSY;
> > + } else {
> > + return 0;
> > + }
>
> NACK.
>
> This change would mean that we can have layout segments on the list lo-
> >plh_return_segs without even the NFS_LAYOUT_RETURN_REQUESTED flag
> being set.
>
> It also badly breaks pnfs_layout_need_return().
>
> I see no reason why we should need to add this 'committing' flag
> argument to pnfs_mark_matching_lsegs_return(). I suggest rather
> modifying pnfs_prepare_layoutreturn() to check for
> pnfs_layoutcommit_outstanding().
>
> > }
> >
> > if (!list_empty(&lo->plh_return_segs)) {
> > - pnfs_set_plh_return_info(lo, return_range->iomode,
> > seq);
> > + if (!committing) {
> > + pnfs_set_plh_return_info(lo, return_range-
> > >iomode, seq);
> > + }
> > return 0;
> > }
> >
> > @@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode
> > *inode,
> > * segments at hand when sending layoutreturn. See
> > pnfs_put_lseg()
> > * for how it works.
> > */
> > - if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> > range, 0) != -EBUSY) {
> > + if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> > range, 0, false) != -EBUSY) {
> > const struct cred *cred;
> > nfs4_stateid stateid;
> > enum pnfs_iomode iomode;
> > @@ -2804,7 +2810,7 @@ static int
> > pnfs_layout_return_unused_byserver(struct nfs_server *server,
> > pnfs_get_layout_hdr(lo);
> > pnfs_set_plh_return_info(lo, range->iomode, 0);
> > if (pnfs_mark_matching_lsegs_return(lo, &lo-
> > >plh_return_segs,
> > - range, 0) != 0 ||
> > + range, 0, false)
> > != 0 ||
> > !pnfs_prepare_layoutreturn(lo, &stateid, &cred,
> > &iomode)) {
> > spin_unlock(&inode->i_lock);
> > rcu_read_unlock();
> > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> > index 91ff877185c8..33a7a09477b2 100644
> > --- a/fs/nfs/pnfs.h
> > +++ b/fs/nfs/pnfs.h
> > @@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct
> > pnfs_layout_hdr *lo,
> > int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
> > struct list_head *tmp_list,
> > const struct pnfs_layout_range
> > *recall_range,
> > - u32 seq);
> > + u32 seq, bool committing);
> > int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
> > struct list_head *lseg_list);
> > bool pnfs_roc(struct inode *ino,
> > --
> > 2.51.0.87.g1fa68948c3.dirty
> >
> A.
> --
> Trond Myklebust Linux NFS client maintainer, Hammerspace
> trondmy@kernel.org, trond.myklebust@hammerspace.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
2025-09-02 21:09 ` Trond Myklebust
2025-09-08 4:52 ` Haihua Yang
@ 2025-09-08 20:33 ` Haihua Yang
2025-09-29 20:08 ` Haihua Yang
1 sibling, 1 reply; 5+ messages in thread
From: Haihua Yang @ 2025-09-08 20:33 UTC (permalink / raw)
To: trondmy; +Cc: linux-nfs, yanghh
1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
2, update layout stateid when layoutcommit receiving NFS4ERR_OLD_STATEID
---
fs/nfs/nfs4proc.c | 19 ++++++++++++++++++-
fs/nfs/pnfs.c | 15 ++++++++++++---
fs/nfs/pnfs.h | 6 ++++--
3 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..448a13f2537b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10081,7 +10081,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_OLD_STATEID:
if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid,
&lrp->args.range,
- lrp->args.inode))
+ lrp->args.inode,
+ false))
goto out_restart;
fallthrough;
default:
@@ -10255,6 +10256,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
struct nfs_server *server = NFS_SERVER(data->args.inode);
+ struct pnfs_layout_range dst_range;
if (!nfs41_sequence_done(task, &data->res.seq_res))
return;
@@ -10268,6 +10270,14 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
break;
case 0:
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (data->inode) {
+ nfs4_layout_refresh_old_stateid(&data->args.stateid,
+ &dst_range,
+ data->args.inode,
+ true);
+ }
+ fallthrough;
default:
if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -10279,10 +10289,17 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
+ struct inode *inode = data->args.inode;
pnfs_cleanup_layoutcommit(data);
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
+ struct pnfs_layout_hdr *lo;
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ spin_unlock(&inode->i_lock);
+ pnfs_put_layout_hdr(lo);
+
put_cred(data->cred);
nfs_iput_and_deactive(data->inode);
kfree(data);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3135b5af7ee..defc0a84e6c7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -409,7 +409,8 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
*/
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
- struct inode *inode)
+ struct inode *inode,
+ bool force_update)
{
struct pnfs_layout_hdr *lo;
struct pnfs_layout_range range = {
@@ -433,7 +434,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
}
/* Try to update the seqid to the most recent */
err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
- if (err != -EBUSY) {
+ if (force_update || err != -EBUSY) {
dst->seqid = lo->plh_stateid.seqid;
*dst_range = range;
ret = true;
@@ -1306,6 +1307,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
/* Serialise LAYOUTGET/LAYOUTRETURN */
if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
return false;
+ /* Serialise LAYOUTCOMMIT/LAYOUTRETURN */
+ if (pnfs_layoutcommit_outstanding(lo->plh_inode))
+ return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
return false;
set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
@@ -1686,7 +1690,8 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
return 0;
case -NFS4ERR_OLD_STATEID:
if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
- &arg->range, arg->inode))
+ &arg->range, arg->inode,
+ false))
break;
*ret = -NFS4ERR_NOMATCHING_LAYOUT;
return -EAGAIN;
@@ -3397,6 +3402,10 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
}
}
+ /* the ref will be free on nfs4_layoutcommit_release, and trigger
+ * LAYOUTRETURN
+ */
+ pnfs_get_layout_hdr(nfsi->layout);
status = nfs4_proc_layoutcommit(data, sync);
out:
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 91ff877185c8..c6788f1423a3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -287,7 +287,8 @@ int pnfs_layout_destroy_byclid(struct nfs_client *clp,
enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
- struct inode *inode);
+ struct inode *inode,
+ bool force_update);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
@@ -891,7 +892,8 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
- struct inode *inode)
+ struct inode *inode,
+ bool force_update)
{
return false;
}
--
2.51.0.87.g1fa68948c3.dirty
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
2025-09-08 20:33 ` Haihua Yang
@ 2025-09-29 20:08 ` Haihua Yang
0 siblings, 0 replies; 5+ messages in thread
From: Haihua Yang @ 2025-09-29 20:08 UTC (permalink / raw)
To: yanghh; +Cc: linux-nfs, trondmy
1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
2, update layout stateid when layoutcommit receiving NFS4ERR_OLD_STATEID
---
fs/nfs/nfs4proc.c | 20 +++++++++++++++++++-
fs/nfs/pnfs.c | 15 ++++++++++++---
fs/nfs/pnfs.h | 6 ++++--
3 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..d1607b4606c7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10081,7 +10081,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_OLD_STATEID:
if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid,
&lrp->args.range,
- lrp->args.inode))
+ lrp->args.inode,
+ false))
goto out_restart;
fallthrough;
default:
@@ -10255,6 +10256,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
struct nfs_server *server = NFS_SERVER(data->args.inode);
+ struct pnfs_layout_range dst_range;
if (!nfs41_sequence_done(task, &data->res.seq_res))
return;
@@ -10263,11 +10265,20 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_DELEG_REVOKED: /* layout was recalled */
case -NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
case -NFS4ERR_BADLAYOUT: /* no layout */
+ case -NFS4ERR_STALE_STATEID: /* stale layout */
case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
break;
case 0:
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (data->inode) {
+ nfs4_layout_refresh_old_stateid(&data->args.stateid,
+ &dst_range,
+ data->args.inode,
+ true);
+ }
+ fallthrough;
default:
if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -10279,10 +10290,17 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
+ struct inode *inode = data->args.inode;
pnfs_cleanup_layoutcommit(data);
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
+ struct pnfs_layout_hdr *lo;
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ spin_unlock(&inode->i_lock);
+ pnfs_put_layout_hdr(lo);
+
put_cred(data->cred);
nfs_iput_and_deactive(data->inode);
kfree(data);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3135b5af7ee..defc0a84e6c7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -409,7 +409,8 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
*/
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
- struct inode *inode)
+ struct inode *inode,
+ bool force_update)
{
struct pnfs_layout_hdr *lo;
struct pnfs_layout_range range = {
@@ -433,7 +434,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
}
/* Try to update the seqid to the most recent */
err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
- if (err != -EBUSY) {
+ if (force_update || err != -EBUSY) {
dst->seqid = lo->plh_stateid.seqid;
*dst_range = range;
ret = true;
@@ -1306,6 +1307,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
/* Serialise LAYOUTGET/LAYOUTRETURN */
if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
return false;
+ /* Serialise LAYOUTCOMMIT/LAYOUTRETURN */
+ if (pnfs_layoutcommit_outstanding(lo->plh_inode))
+ return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
return false;
set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
@@ -1686,7 +1690,8 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
return 0;
case -NFS4ERR_OLD_STATEID:
if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
- &arg->range, arg->inode))
+ &arg->range, arg->inode,
+ false))
break;
*ret = -NFS4ERR_NOMATCHING_LAYOUT;
return -EAGAIN;
@@ -3397,6 +3402,10 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
}
}
+ /* the ref will be free on nfs4_layoutcommit_release, and trigger
+ * LAYOUTRETURN
+ */
+ pnfs_get_layout_hdr(nfsi->layout);
status = nfs4_proc_layoutcommit(data, sync);
out:
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 91ff877185c8..c6788f1423a3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -287,7 +287,8 @@ int pnfs_layout_destroy_byclid(struct nfs_client *clp,
enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
- struct inode *inode);
+ struct inode *inode,
+ bool force_update);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
@@ -891,7 +892,8 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
- struct inode *inode)
+ struct inode *inode,
+ bool force_update)
{
return false;
}
--
2.51.0.87.g1fa68948c3.dirty
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-09-29 20:08 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-02 18:30 [PATCH] draft patches to fixes LAYOUTCOMMIT related issues Haihua Yang
2025-09-02 21:09 ` Trond Myklebust
2025-09-08 4:52 ` Haihua Yang
2025-09-08 20:33 ` Haihua Yang
2025-09-29 20:08 ` Haihua Yang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox