public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
@ 2025-09-02 18:30 Haihua Yang
  2025-09-02 21:09 ` Trond Myklebust
  0 siblings, 1 reply; 5+ messages in thread
From: Haihua Yang @ 2025-09-02 18:30 UTC (permalink / raw)
  To: linux-nfs, Trond Myklebust; +Cc: Haihua Yang

1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
2, update layout stateid when layoutcommit receiving NFS4ERR_OLD_STATEID
---
 fs/nfs/callback_proc.c |  2 +-
 fs/nfs/nfs4proc.c      | 10 +++++++++-
 fs/nfs/pnfs.c          | 28 +++++++++++++++++-----------
 fs/nfs/pnfs.h          |  2 +-
 4 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 8397c43358bd..1e6e4a7a3f15 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 	pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
 	switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
 				&args->cbl_range,
-				be32_to_cpu(args->cbl_stateid.seqid))) {
+				be32_to_cpu(args->cbl_stateid.seqid), true)) {
 	case 0:
 	case -EBUSY:
 		/* There are layout segments that need to be returned */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..46a1bc1f31ee 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
+	struct pnfs_layout_range dst_range;
 
 	if (!nfs41_sequence_done(task, &data->res.seq_res))
 		return;
@@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 		break;
 	case 0:
 		break;
+	case -NFS4ERR_OLD_STATEID:
+		if (data->inode) {
+			nfs4_layout_refresh_old_stateid(&data->args.stateid,
+					&dst_range,
+					data->inode);
+		}
+		fallthrough;
 	default:
 		if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
@@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
 		data->args.lastbytewritten,
 		data->args.inode->i_ino);
 
+	data->inode = nfs_igrab_and_active(data->args.inode);
 	if (!sync) {
-		data->inode = nfs_igrab_and_active(data->args.inode);
 		if (data->inode == NULL) {
 			nfs4_layoutcommit_release(data);
 			return -EAGAIN;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3135b5af7ee..aaa3719b1957 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 			goto out;
 		}
 		/* Try to update the seqid to the most recent */
-		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
+		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, true);
 		if (err != -EBUSY) {
 			dst->seqid = lo->plh_stateid.seqid;
 			*dst_range = range;
@@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
 		.length = NFS4_MAX_UINT64,
 	};
 
-	return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
+	return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq, false);
 }
 
 static int
@@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
 
 	spin_lock(&inode->i_lock);
 	pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
-	pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
+	pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false);
 	spin_unlock(&inode->i_lock);
 	pnfs_free_lseg_list(&head);
 	dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
@@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino)
 	}
 	valid_layout = pnfs_layout_is_valid(lo);
 	pnfs_clear_layoutcommit(ino, &tmp_list);
-	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
+	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0, false);
 
 	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
 		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
@@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 			.iomode = IOMODE_ANY,
 			.length = NFS4_MAX_UINT64,
 		};
-		pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
+		pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0, false);
 		goto out_forget;
 	} else {
 		/* We have a completely new layout */
@@ -2628,7 +2628,7 @@ int
 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
 				const struct pnfs_layout_range *return_range,
-				u32 seq)
+				u32 seq, bool committing)
 {
 	struct pnfs_layout_segment *lseg, *next;
 	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
@@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
 		}
 
 	if (remaining) {
-		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
-		return -EBUSY;
+		if (!committing) {
+			pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+			return -EBUSY;
+		} else {
+			return 0;
+		}
 	}
 
 	if (!list_empty(&lo->plh_return_segs)) {
-		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+		if (!committing) {
+			pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+		}
 		return 0;
 	}
 
@@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode *inode,
 	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
 	 * for how it works.
 	 */
-	if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
+	if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0, false) != -EBUSY) {
 		const struct cred *cred;
 		nfs4_stateid stateid;
 		enum pnfs_iomode iomode;
@@ -2804,7 +2810,7 @@ static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
 		pnfs_get_layout_hdr(lo);
 		pnfs_set_plh_return_info(lo, range->iomode, 0);
 		if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
-						    range, 0) != 0 ||
+						    range, 0, false) != 0 ||
 		    !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
 			spin_unlock(&inode->i_lock);
 			rcu_read_unlock();
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 91ff877185c8..33a7a09477b2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
 				const struct pnfs_layout_range *recall_range,
-				u32 seq);
+				u32 seq, bool committing);
 int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
 		struct list_head *lseg_list);
 bool pnfs_roc(struct inode *ino,
-- 
2.51.0.87.g1fa68948c3.dirty


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
  2025-09-02 18:30 [PATCH] draft patches to fixes LAYOUTCOMMIT related issues Haihua Yang
@ 2025-09-02 21:09 ` Trond Myklebust
  2025-09-08  4:52   ` Haihua Yang
  2025-09-08 20:33   ` Haihua Yang
  0 siblings, 2 replies; 5+ messages in thread
From: Trond Myklebust @ 2025-09-02 21:09 UTC (permalink / raw)
  To: Haihua Yang, linux-nfs

On Tue, 2025-09-02 at 18:30 +0000, Haihua Yang wrote:
> [You don't often get email from yanghh@gmail.com. Learn why this is
> important at https://aka.ms/LearnAboutSenderIdentification ]
> 
> 1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
> 2, update layout stateid when layoutcommit receiving
> NFS4ERR_OLD_STATEID

Please read Documentation/process/submitting-patches.rst
Specifically, please read the section "Describe your changes"
carefully.

> ---
>  fs/nfs/callback_proc.c |  2 +-
>  fs/nfs/nfs4proc.c      | 10 +++++++++-
>  fs/nfs/pnfs.c          | 28 +++++++++++++++++-----------
>  fs/nfs/pnfs.h          |  2 +-
>  4 files changed, 28 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 8397c43358bd..1e6e4a7a3f15 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct
> nfs_client *clp,
>         pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
>         switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
>                                 &args->cbl_range,
> -                               be32_to_cpu(args-
> >cbl_stateid.seqid))) {
> +                               be32_to_cpu(args->cbl_stateid.seqid),
> true)) {
>         case 0:
>         case -EBUSY:
>                 /* There are layout segments that need to be returned
> */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 7d2b67e06cc3..46a1bc1f31ee 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task,
> void *calldata)
>  {
>         struct nfs4_layoutcommit_data *data = calldata;
>         struct nfs_server *server = NFS_SERVER(data->args.inode);
> +       struct pnfs_layout_range dst_range;
> 
>         if (!nfs41_sequence_done(task, &data->res.seq_res))
>                 return;
> @@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task
> *task, void *calldata)
>                 break;
>         case 0:
>                 break;
> +       case -NFS4ERR_OLD_STATEID:
> +               if (data->inode) {
> +                       nfs4_layout_refresh_old_stateid(&data-
> >args.stateid,
> +                                       &dst_range,
> +                                       data->inode);
> +               }
> +               fallthrough;
>         default:
>                 if (nfs4_async_handle_error(task, server, NULL, NULL)
> == -EAGAIN) {
>                         rpc_restart_call_prepare(task);
> @@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct
> nfs4_layoutcommit_data *data, bool sync)
>                 data->args.lastbytewritten,
>                 data->args.inode->i_ino);
> 
> +       data->inode = nfs_igrab_and_active(data->args.inode);
>         if (!sync) {
> -               data->inode = nfs_igrab_and_active(data->args.inode);
>                 if (data->inode == NULL) {
>                         nfs4_layoutcommit_release(data);
>                         return -EAGAIN;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index a3135b5af7ee..aaa3719b1957 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid
> *dst,
>                         goto out;
>                 }
>                 /* Try to update the seqid to the most recent */
> -               err = pnfs_mark_matching_lsegs_return(lo, &head,
> &range, 0);
> +               err = pnfs_mark_matching_lsegs_return(lo, &head,
> &range, 0, true);
>                 if (err != -EBUSY) {
>                         dst->seqid = lo->plh_stateid.seqid;
>                         *dst_range = range;
> @@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct
> pnfs_layout_hdr *lo,
>                 .length = NFS4_MAX_UINT64,
>         };
> 
> -       return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> seq);
> +       return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> seq, false);
>  }
> 
>  static int
> @@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr
> *lo, u32 iomode)
> 
>         spin_lock(&inode->i_lock);
>         pnfs_layout_set_fail_bit(lo,
> pnfs_iomode_to_fail_bit(iomode));
> -       pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
> +       pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false);
>         spin_unlock(&inode->i_lock);
>         pnfs_free_lseg_list(&head);
>         dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
> @@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino)
>         }
>         valid_layout = pnfs_layout_is_valid(lo);
>         pnfs_clear_layoutcommit(ino, &tmp_list);
> -       pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
> +       pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0,
> false);
> 
>         if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
>                 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
> &range);
> @@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
>                         .iomode = IOMODE_ANY,
>                         .length = NFS4_MAX_UINT64,
>                 };
> -               pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> 0);
> +               pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> 0, false);
>                 goto out_forget;
>         } else {
>                 /* We have a completely new layout */
> @@ -2628,7 +2628,7 @@ int
>  pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
>                                 struct list_head *tmp_list,
>                                 const struct pnfs_layout_range
> *return_range,
> -                               u32 seq)
> +                               u32 seq, bool committing)
>  {
>         struct pnfs_layout_segment *lseg, *next;
>         struct nfs_server *server = NFS_SERVER(lo->plh_inode);
> @@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct
> pnfs_layout_hdr *lo,
>                 }
> 
>         if (remaining) {
> -               pnfs_set_plh_return_info(lo, return_range->iomode,
> seq);
> -               return -EBUSY;
> +               if (!committing) {
> +                       pnfs_set_plh_return_info(lo, return_range-
> >iomode, seq);
> +                       return -EBUSY;
> +               } else {
> +                       return 0;
> +               }

NACK.

This change would mean that we can have layout segments on the list lo-
>plh_return_segs without even the NFS_LAYOUT_RETURN_REQUESTED flag
being set.

It also badly breaks pnfs_layout_need_return().

I see no reason why we should need to add this 'committing' flag
argument to pnfs_mark_matching_lsegs_return(). I suggest rather
modifying pnfs_prepare_layoutreturn() to check for
pnfs_layoutcommit_outstanding().

>         }
> 
>         if (!list_empty(&lo->plh_return_segs)) {
> -               pnfs_set_plh_return_info(lo, return_range->iomode,
> seq);
> +               if (!committing) {
> +                       pnfs_set_plh_return_info(lo, return_range-
> >iomode, seq);
> +               }
>                 return 0;
>         }
> 
> @@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode
> *inode,
>          * segments at hand when sending layoutreturn. See
> pnfs_put_lseg()
>          * for how it works.
>          */
> -       if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> range, 0) != -EBUSY) {
> +       if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> range, 0, false) != -EBUSY) {
>                 const struct cred *cred;
>                 nfs4_stateid stateid;
>                 enum pnfs_iomode iomode;
> @@ -2804,7 +2810,7 @@ static int
> pnfs_layout_return_unused_byserver(struct nfs_server *server,
>                 pnfs_get_layout_hdr(lo);
>                 pnfs_set_plh_return_info(lo, range->iomode, 0);
>                 if (pnfs_mark_matching_lsegs_return(lo, &lo-
> >plh_return_segs,
> -                                                   range, 0) != 0 ||
> +                                                   range, 0, false)
> != 0 ||
>                     !pnfs_prepare_layoutreturn(lo, &stateid, &cred,
> &iomode)) {
>                         spin_unlock(&inode->i_lock);
>                         rcu_read_unlock();
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 91ff877185c8..33a7a09477b2 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct
> pnfs_layout_hdr *lo,
>  int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
>                                 struct list_head *tmp_list,
>                                 const struct pnfs_layout_range
> *recall_range,
> -                               u32 seq);
> +                               u32 seq, bool committing);
>  int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
>                 struct list_head *lseg_list);
>  bool pnfs_roc(struct inode *ino,
> --
> 2.51.0.87.g1fa68948c3.dirty
> 
   A. 
-- 
Trond Myklebust Linux NFS client maintainer, Hammerspace
trondmy@kernel.org, trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
  2025-09-02 21:09 ` Trond Myklebust
@ 2025-09-08  4:52   ` Haihua Yang
  2025-09-08 20:33   ` Haihua Yang
  1 sibling, 0 replies; 5+ messages in thread
From: Haihua Yang @ 2025-09-08  4:52 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

Hi Trond,
Thanks for your comments. In this change, I also tried to make
LAYOUTCOMMIT call nfs4_layout_refresh_old_stateid to update the
stateid when receiving NFS4ERR_OLD_STATEID. However, the update is
skipped because pnfs_mark_matching_lsegs_return returns -EBUSY. I’m
wondering why the stateid update is skipped in this case, and what the
impact would be if we ignored the -EBUSY error and proceeded with the
update.
Thanks,

On Tue, Sep 2, 2025 at 2:09 PM Trond Myklebust <trondmy@kernel.org> wrote:
>
> On Tue, 2025-09-02 at 18:30 +0000, Haihua Yang wrote:
> > [You don't often get email from yanghh@gmail.com. Learn why this is
> > important at https://aka.ms/LearnAboutSenderIdentification ]
> >
> > 1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
> > 2, update layout stateid when layoutcommit receiving
> > NFS4ERR_OLD_STATEID
>
> Please read Documentation/process/submitting-patches.rst
> Specifically, please read the section "Describe your changes"
> carefully.
>
> > ---
> >  fs/nfs/callback_proc.c |  2 +-
> >  fs/nfs/nfs4proc.c      | 10 +++++++++-
> >  fs/nfs/pnfs.c          | 28 +++++++++++++++++-----------
> >  fs/nfs/pnfs.h          |  2 +-
> >  4 files changed, 28 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> > index 8397c43358bd..1e6e4a7a3f15 100644
> > --- a/fs/nfs/callback_proc.c
> > +++ b/fs/nfs/callback_proc.c
> > @@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct
> > nfs_client *clp,
> >         pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
> >         switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
> >                                 &args->cbl_range,
> > -                               be32_to_cpu(args-
> > >cbl_stateid.seqid))) {
> > +                               be32_to_cpu(args->cbl_stateid.seqid),
> > true)) {
> >         case 0:
> >         case -EBUSY:
> >                 /* There are layout segments that need to be returned
> > */
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 7d2b67e06cc3..46a1bc1f31ee 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task,
> > void *calldata)
> >  {
> >         struct nfs4_layoutcommit_data *data = calldata;
> >         struct nfs_server *server = NFS_SERVER(data->args.inode);
> > +       struct pnfs_layout_range dst_range;
> >
> >         if (!nfs41_sequence_done(task, &data->res.seq_res))
> >                 return;
> > @@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task
> > *task, void *calldata)
> >                 break;
> >         case 0:
> >                 break;
> > +       case -NFS4ERR_OLD_STATEID:
> > +               if (data->inode) {
> > +                       nfs4_layout_refresh_old_stateid(&data-
> > >args.stateid,
> > +                                       &dst_range,
> > +                                       data->inode);
> > +               }
> > +               fallthrough;
> >         default:
> >                 if (nfs4_async_handle_error(task, server, NULL, NULL)
> > == -EAGAIN) {
> >                         rpc_restart_call_prepare(task);
> > @@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct
> > nfs4_layoutcommit_data *data, bool sync)
> >                 data->args.lastbytewritten,
> >                 data->args.inode->i_ino);
> >
> > +       data->inode = nfs_igrab_and_active(data->args.inode);
> >         if (!sync) {
> > -               data->inode = nfs_igrab_and_active(data->args.inode);
> >                 if (data->inode == NULL) {
> >                         nfs4_layoutcommit_release(data);
> >                         return -EAGAIN;
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index a3135b5af7ee..aaa3719b1957 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid
> > *dst,
> >                         goto out;
> >                 }
> >                 /* Try to update the seqid to the most recent */
> > -               err = pnfs_mark_matching_lsegs_return(lo, &head,
> > &range, 0);
> > +               err = pnfs_mark_matching_lsegs_return(lo, &head,
> > &range, 0, true);
> >                 if (err != -EBUSY) {
> >                         dst->seqid = lo->plh_stateid.seqid;
> >                         *dst_range = range;
> > @@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct
> > pnfs_layout_hdr *lo,
> >                 .length = NFS4_MAX_UINT64,
> >         };
> >
> > -       return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> > seq);
> > +       return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range,
> > seq, false);
> >  }
> >
> >  static int
> > @@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr
> > *lo, u32 iomode)
> >
> >         spin_lock(&inode->i_lock);
> >         pnfs_layout_set_fail_bit(lo,
> > pnfs_iomode_to_fail_bit(iomode));
> > -       pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
> > +       pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false);
> >         spin_unlock(&inode->i_lock);
> >         pnfs_free_lseg_list(&head);
> >         dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
> > @@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino)
> >         }
> >         valid_layout = pnfs_layout_is_valid(lo);
> >         pnfs_clear_layoutcommit(ino, &tmp_list);
> > -       pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
> > +       pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0,
> > false);
> >
> >         if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
> >                 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
> > &range);
> > @@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
> >                         .iomode = IOMODE_ANY,
> >                         .length = NFS4_MAX_UINT64,
> >                 };
> > -               pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> > 0);
> > +               pnfs_mark_matching_lsegs_return(lo, &free_me, &range,
> > 0, false);
> >                 goto out_forget;
> >         } else {
> >                 /* We have a completely new layout */
> > @@ -2628,7 +2628,7 @@ int
> >  pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
> >                                 struct list_head *tmp_list,
> >                                 const struct pnfs_layout_range
> > *return_range,
> > -                               u32 seq)
> > +                               u32 seq, bool committing)
> >  {
> >         struct pnfs_layout_segment *lseg, *next;
> >         struct nfs_server *server = NFS_SERVER(lo->plh_inode);
> > @@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct
> > pnfs_layout_hdr *lo,
> >                 }
> >
> >         if (remaining) {
> > -               pnfs_set_plh_return_info(lo, return_range->iomode,
> > seq);
> > -               return -EBUSY;
> > +               if (!committing) {
> > +                       pnfs_set_plh_return_info(lo, return_range-
> > >iomode, seq);
> > +                       return -EBUSY;
> > +               } else {
> > +                       return 0;
> > +               }
>
> NACK.
>
> This change would mean that we can have layout segments on the list lo-
> >plh_return_segs without even the NFS_LAYOUT_RETURN_REQUESTED flag
> being set.
>
> It also badly breaks pnfs_layout_need_return().
>
> I see no reason why we should need to add this 'committing' flag
> argument to pnfs_mark_matching_lsegs_return(). I suggest rather
> modifying pnfs_prepare_layoutreturn() to check for
> pnfs_layoutcommit_outstanding().
>
> >         }
> >
> >         if (!list_empty(&lo->plh_return_segs)) {
> > -               pnfs_set_plh_return_info(lo, return_range->iomode,
> > seq);
> > +               if (!committing) {
> > +                       pnfs_set_plh_return_info(lo, return_range-
> > >iomode, seq);
> > +               }
> >                 return 0;
> >         }
> >
> > @@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode
> > *inode,
> >          * segments at hand when sending layoutreturn. See
> > pnfs_put_lseg()
> >          * for how it works.
> >          */
> > -       if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> > range, 0) != -EBUSY) {
> > +       if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
> > range, 0, false) != -EBUSY) {
> >                 const struct cred *cred;
> >                 nfs4_stateid stateid;
> >                 enum pnfs_iomode iomode;
> > @@ -2804,7 +2810,7 @@ static int
> > pnfs_layout_return_unused_byserver(struct nfs_server *server,
> >                 pnfs_get_layout_hdr(lo);
> >                 pnfs_set_plh_return_info(lo, range->iomode, 0);
> >                 if (pnfs_mark_matching_lsegs_return(lo, &lo-
> > >plh_return_segs,
> > -                                                   range, 0) != 0 ||
> > +                                                   range, 0, false)
> > != 0 ||
> >                     !pnfs_prepare_layoutreturn(lo, &stateid, &cred,
> > &iomode)) {
> >                         spin_unlock(&inode->i_lock);
> >                         rcu_read_unlock();
> > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> > index 91ff877185c8..33a7a09477b2 100644
> > --- a/fs/nfs/pnfs.h
> > +++ b/fs/nfs/pnfs.h
> > @@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct
> > pnfs_layout_hdr *lo,
> >  int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
> >                                 struct list_head *tmp_list,
> >                                 const struct pnfs_layout_range
> > *recall_range,
> > -                               u32 seq);
> > +                               u32 seq, bool committing);
> >  int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
> >                 struct list_head *lseg_list);
> >  bool pnfs_roc(struct inode *ino,
> > --
> > 2.51.0.87.g1fa68948c3.dirty
> >
>    A.
> --
> Trond Myklebust Linux NFS client maintainer, Hammerspace
> trondmy@kernel.org, trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
  2025-09-02 21:09 ` Trond Myklebust
  2025-09-08  4:52   ` Haihua Yang
@ 2025-09-08 20:33   ` Haihua Yang
  2025-09-29 20:08     ` Haihua Yang
  1 sibling, 1 reply; 5+ messages in thread
From: Haihua Yang @ 2025-09-08 20:33 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs, yanghh

1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
2, update layout stateid when layoutcommit receiving NFS4ERR_OLD_STATEID
---
 fs/nfs/nfs4proc.c | 19 ++++++++++++++++++-
 fs/nfs/pnfs.c     | 15 ++++++++++++---
 fs/nfs/pnfs.h     |  6 ++++--
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..448a13f2537b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10081,7 +10081,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 	case -NFS4ERR_OLD_STATEID:
 		if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid,
 					&lrp->args.range,
-					lrp->args.inode))
+					lrp->args.inode,
+					false))
 			goto out_restart;
 		fallthrough;
 	default:
@@ -10255,6 +10256,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
+	struct pnfs_layout_range dst_range;
 
 	if (!nfs41_sequence_done(task, &data->res.seq_res))
 		return;
@@ -10268,6 +10270,14 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 		break;
 	case 0:
 		break;
+	case -NFS4ERR_OLD_STATEID:
+		if (data->inode) {
+			nfs4_layout_refresh_old_stateid(&data->args.stateid,
+					&dst_range,
+					data->args.inode,
+					true);
+		}
+		fallthrough;
 	default:
 		if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
@@ -10279,10 +10289,17 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
+	struct inode *inode = data->args.inode;
 
 	pnfs_cleanup_layoutcommit(data);
 	nfs_post_op_update_inode_force_wcc(data->args.inode,
 					   data->res.fattr);
+	struct pnfs_layout_hdr *lo;
+	spin_lock(&inode->i_lock);
+	lo = NFS_I(inode)->layout;
+	spin_unlock(&inode->i_lock);
+	pnfs_put_layout_hdr(lo);
+
 	put_cred(data->cred);
 	nfs_iput_and_deactive(data->inode);
 	kfree(data);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3135b5af7ee..defc0a84e6c7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -409,7 +409,8 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
  */
 bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		struct pnfs_layout_range *dst_range,
-		struct inode *inode)
+		struct inode *inode,
+		bool force_update)
 {
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_range range = {
@@ -433,7 +434,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		}
 		/* Try to update the seqid to the most recent */
 		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
-		if (err != -EBUSY) {
+		if (force_update || err != -EBUSY) {
 			dst->seqid = lo->plh_stateid.seqid;
 			*dst_range = range;
 			ret = true;
@@ -1306,6 +1307,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
 	/* Serialise LAYOUTGET/LAYOUTRETURN */
 	if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
 		return false;
+	/* Serialise LAYOUTCOMMIT/LAYOUTRETURN */
+	if (pnfs_layoutcommit_outstanding(lo->plh_inode))
+		return false;
 	if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
 		return false;
 	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
@@ -1686,7 +1690,8 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
 		return 0;
 	case -NFS4ERR_OLD_STATEID:
 		if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
-						     &arg->range, arg->inode))
+						     &arg->range, arg->inode,
+						     false))
 			break;
 		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
 		return -EAGAIN;
@@ -3397,6 +3402,10 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 		}
 	}
 
+	/* the ref will be free on nfs4_layoutcommit_release, and trigger
+	 * LAYOUTRETURN
+	 */
+	pnfs_get_layout_hdr(nfsi->layout);
 
 	status = nfs4_proc_layoutcommit(data, sync);
 out:
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 91ff877185c8..c6788f1423a3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -287,7 +287,8 @@ int pnfs_layout_destroy_byclid(struct nfs_client *clp,
 			       enum pnfs_layout_destroy_mode mode);
 bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		struct pnfs_layout_range *dst_range,
-		struct inode *inode);
+		struct inode *inode,
+		bool force_update);
 void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
 			     const nfs4_stateid *new,
@@ -891,7 +892,8 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
 
 static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		struct pnfs_layout_range *dst_range,
-		struct inode *inode)
+		struct inode *inode,
+		bool force_update)
 {
 	return false;
 }
-- 
2.51.0.87.g1fa68948c3.dirty


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH] draft patches to fixes LAYOUTCOMMIT related issues
  2025-09-08 20:33   ` Haihua Yang
@ 2025-09-29 20:08     ` Haihua Yang
  0 siblings, 0 replies; 5+ messages in thread
From: Haihua Yang @ 2025-09-29 20:08 UTC (permalink / raw)
  To: yanghh; +Cc: linux-nfs, trondmy

1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT
2, update layout stateid when layoutcommit receiving NFS4ERR_OLD_STATEID
---
 fs/nfs/nfs4proc.c | 20 +++++++++++++++++++-
 fs/nfs/pnfs.c     | 15 ++++++++++++---
 fs/nfs/pnfs.h     |  6 ++++--
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..d1607b4606c7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10081,7 +10081,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 	case -NFS4ERR_OLD_STATEID:
 		if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid,
 					&lrp->args.range,
-					lrp->args.inode))
+					lrp->args.inode,
+					false))
 			goto out_restart;
 		fallthrough;
 	default:
@@ -10255,6 +10256,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
+	struct pnfs_layout_range dst_range;
 
 	if (!nfs41_sequence_done(task, &data->res.seq_res))
 		return;
@@ -10263,11 +10265,20 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 	case -NFS4ERR_DELEG_REVOKED: /* layout was recalled */
 	case -NFS4ERR_BADIOMODE:     /* no IOMODE_RW layout for range */
 	case -NFS4ERR_BADLAYOUT:     /* no layout */
+	case -NFS4ERR_STALE_STATEID: /* stale layout */
 	case -NFS4ERR_GRACE:	    /* loca_recalim always false */
 		task->tk_status = 0;
 		break;
 	case 0:
 		break;
+	case -NFS4ERR_OLD_STATEID:
+		if (data->inode) {
+			nfs4_layout_refresh_old_stateid(&data->args.stateid,
+					&dst_range,
+					data->args.inode,
+					true);
+		}
+		fallthrough;
 	default:
 		if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
@@ -10279,10 +10290,17 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
+	struct inode *inode = data->args.inode;
 
 	pnfs_cleanup_layoutcommit(data);
 	nfs_post_op_update_inode_force_wcc(data->args.inode,
 					   data->res.fattr);
+	struct pnfs_layout_hdr *lo;
+	spin_lock(&inode->i_lock);
+	lo = NFS_I(inode)->layout;
+	spin_unlock(&inode->i_lock);
+	pnfs_put_layout_hdr(lo);
+
 	put_cred(data->cred);
 	nfs_iput_and_deactive(data->inode);
 	kfree(data);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3135b5af7ee..defc0a84e6c7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -409,7 +409,8 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
  */
 bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		struct pnfs_layout_range *dst_range,
-		struct inode *inode)
+		struct inode *inode,
+		bool force_update)
 {
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_range range = {
@@ -433,7 +434,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		}
 		/* Try to update the seqid to the most recent */
 		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
-		if (err != -EBUSY) {
+		if (force_update || err != -EBUSY) {
 			dst->seqid = lo->plh_stateid.seqid;
 			*dst_range = range;
 			ret = true;
@@ -1306,6 +1307,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
 	/* Serialise LAYOUTGET/LAYOUTRETURN */
 	if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
 		return false;
+	/* Serialise LAYOUTCOMMIT/LAYOUTRETURN */
+	if (pnfs_layoutcommit_outstanding(lo->plh_inode))
+		return false;
 	if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
 		return false;
 	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
@@ -1686,7 +1690,8 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
 		return 0;
 	case -NFS4ERR_OLD_STATEID:
 		if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
-						     &arg->range, arg->inode))
+						     &arg->range, arg->inode,
+						     false))
 			break;
 		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
 		return -EAGAIN;
@@ -3397,6 +3402,10 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 		}
 	}
 
+	/* the ref will be free on nfs4_layoutcommit_release, and trigger
+	 * LAYOUTRETURN
+	 */
+	pnfs_get_layout_hdr(nfsi->layout);
 
 	status = nfs4_proc_layoutcommit(data, sync);
 out:
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 91ff877185c8..c6788f1423a3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -287,7 +287,8 @@ int pnfs_layout_destroy_byclid(struct nfs_client *clp,
 			       enum pnfs_layout_destroy_mode mode);
 bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		struct pnfs_layout_range *dst_range,
-		struct inode *inode);
+		struct inode *inode,
+		bool force_update);
 void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
 			     const nfs4_stateid *new,
@@ -891,7 +892,8 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
 
 static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
 		struct pnfs_layout_range *dst_range,
-		struct inode *inode)
+		struct inode *inode,
+		bool force_update)
 {
 	return false;
 }
-- 
2.51.0.87.g1fa68948c3.dirty


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-09-29 20:08 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-02 18:30 [PATCH] draft patches to fixes LAYOUTCOMMIT related issues Haihua Yang
2025-09-02 21:09 ` Trond Myklebust
2025-09-08  4:52   ` Haihua Yang
2025-09-08 20:33   ` Haihua Yang
2025-09-29 20:08     ` Haihua Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox