Linux NFS development
 help / color / mirror / Atom feed
* [PATCH v2] NFSD: Disallow layoutget during grace period
@ 2025-09-03 19:34 Sergey Bashirov
  2025-09-04  5:26 ` Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Sergey Bashirov @ 2025-09-03 19:34 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo,
	Tom Talpey
  Cc: linux-nfs, linux-kernel, Sergey Bashirov, Konstantin Evtushenko

When the block/scsi layout server is recovering from a reboot and is in a
grace period, any operation that may result in deletion or reallocation of
block extents should not be allowed. See RFC 8881, section 18.43.3.

If multiple clients write data to the same file, rebooting the server
during writing can result in the file corruption. Observed this behavior
while testing pNFS block volume setup.

Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
---
Changes in v2:
 - Push down the check to layout driver level

 fs/nfsd/blocklayout.c    | 8 +++++++-
 fs/nfsd/flexfilelayout.c | 2 +-
 fs/nfsd/nfs4proc.c       | 3 ++-
 fs/nfsd/pnfs.h           | 2 +-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 0822d8a119c6..1fbc5bbde07f 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -19,7 +19,7 @@
 
 static __be32
 nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
-		struct nfsd4_layoutget *args)
+		struct nfsd4_layoutget *args, bool in_grace)
 {
 	struct nfsd4_layout_seg *seg = &args->lg_seg;
 	struct super_block *sb = inode->i_sb;
@@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 		goto out_layoutunavailable;
 	}
 
+	if (in_grace)
+		goto out_grace;
+
 	/*
 	 * Some clients barf on non-zero block numbers for NONE or INVALID
 	 * layouts, so make sure to zero the whole structure.
@@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 out_layoutunavailable:
 	seg->length = 0;
 	return nfserr_layoutunavailable;
+out_grace:
+	seg->length = 0;
+	return nfserr_grace;
 }
 
 static __be32
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index 3ca5304440ff..274a1e9bb596 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -21,7 +21,7 @@
 
 static __be32
 nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
-		struct nfsd4_layoutget *args)
+		struct nfsd4_layoutget *args, bool in_grace)
 {
 	struct nfsd4_layout_seg *seg = &args->lg_seg;
 	u32 device_generation = 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d7c58aa64f06..5d1d343a4e23 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2435,6 +2435,7 @@ static __be32
 nfsd4_layoutget(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct net *net = SVC_NET(rqstp);
 	struct nfsd4_layoutget *lgp = &u->layoutget;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
@@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
 		goto out_put_stid;
 
 	nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
-				     current_fh, lgp);
+				     current_fh, lgp, locks_in_grace(net));
 	if (nfserr)
 		goto out_put_stid;
 
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index dfd411d1f363..61c2528ef077 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
 			const struct nfsd4_getdeviceinfo *gdevp);
 
 	__be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
-			struct nfsd4_layoutget *lgp);
+			struct nfsd4_layoutget *lgp, bool in_grace);
 	__be32 (*encode_layoutget)(struct xdr_stream *xdr,
 			const struct nfsd4_layoutget *lgp);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
  2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
@ 2025-09-04  5:26 ` Christoph Hellwig
  2025-09-04 10:14 ` Jeff Layton
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2025-09-04  5:26 UTC (permalink / raw)
  To: Sergey Bashirov
  Cc: Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo,
	Tom Talpey, linux-nfs, linux-kernel, Konstantin Evtushenko

On Wed, Sep 03, 2025 at 10:34:24PM +0300, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
> 
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
  2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
  2025-09-04  5:26 ` Christoph Hellwig
@ 2025-09-04 10:14 ` Jeff Layton
  2025-09-04 15:54 ` Chuck Lever
  2025-09-04 15:57 ` Chuck Lever
  3 siblings, 0 replies; 6+ messages in thread
From: Jeff Layton @ 2025-09-04 10:14 UTC (permalink / raw)
  To: Sergey Bashirov, Chuck Lever, NeilBrown, Olga Kornievskaia,
	Dai Ngo, Tom Talpey
  Cc: linux-nfs, linux-kernel, Konstantin Evtushenko

On Wed, 2025-09-03 at 22:34 +0300, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
> 
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
> 
> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
> ---
> Changes in v2:
>  - Push down the check to layout driver level
> 
>  fs/nfsd/blocklayout.c    | 8 +++++++-
>  fs/nfsd/flexfilelayout.c | 2 +-
>  fs/nfsd/nfs4proc.c       | 3 ++-
>  fs/nfsd/pnfs.h           | 2 +-
>  4 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 0822d8a119c6..1fbc5bbde07f 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -19,7 +19,7 @@
>  
>  static __be32
>  nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> -		struct nfsd4_layoutget *args)
> +		struct nfsd4_layoutget *args, bool in_grace)
>  {
>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>  	struct super_block *sb = inode->i_sb;
> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  		goto out_layoutunavailable;
>  	}
>  
> +	if (in_grace)
> +		goto out_grace;
> +
>  	/*
>  	 * Some clients barf on non-zero block numbers for NONE or INVALID
>  	 * layouts, so make sure to zero the whole structure.
> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  out_layoutunavailable:
>  	seg->length = 0;
>  	return nfserr_layoutunavailable;
> +out_grace:
> +	seg->length = 0;
> +	return nfserr_grace;
>  }
>  
>  static __be32
> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
> index 3ca5304440ff..274a1e9bb596 100644
> --- a/fs/nfsd/flexfilelayout.c
> +++ b/fs/nfsd/flexfilelayout.c
> @@ -21,7 +21,7 @@
>  
>  static __be32
>  nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> -		struct nfsd4_layoutget *args)
> +		struct nfsd4_layoutget *args, bool in_grace)
>  {
>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>  	u32 device_generation = 0;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d7c58aa64f06..5d1d343a4e23 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2435,6 +2435,7 @@ static __be32
>  nfsd4_layoutget(struct svc_rqst *rqstp,
>  		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
>  {
> +	struct net *net = SVC_NET(rqstp);
>  	struct nfsd4_layoutget *lgp = &u->layoutget;
>  	struct svc_fh *current_fh = &cstate->current_fh;
>  	const struct nfsd4_layout_ops *ops;
> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
>  		goto out_put_stid;
>  
>  	nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
> -				     current_fh, lgp);
> +				     current_fh, lgp, locks_in_grace(net));
>  	if (nfserr)
>  		goto out_put_stid;
>  
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index dfd411d1f363..61c2528ef077 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
>  			const struct nfsd4_getdeviceinfo *gdevp);
>  
>  	__be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
> -			struct nfsd4_layoutget *lgp);
> +			struct nfsd4_layoutget *lgp, bool in_grace);
>  	__be32 (*encode_layoutget)(struct xdr_stream *xdr,
>  			const struct nfsd4_layoutget *lgp);
>  

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
  2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
  2025-09-04  5:26 ` Christoph Hellwig
  2025-09-04 10:14 ` Jeff Layton
@ 2025-09-04 15:54 ` Chuck Lever
  2025-09-05 13:41   ` Chuck Lever
  2025-09-04 15:57 ` Chuck Lever
  3 siblings, 1 reply; 6+ messages in thread
From: Chuck Lever @ 2025-09-04 15:54 UTC (permalink / raw)
  To: Sergey Bashirov, Jeff Layton, NeilBrown, Olga Kornievskaia,
	Dai Ngo, Tom Talpey
  Cc: linux-nfs, linux-kernel, Konstantin Evtushenko

On 9/3/25 3:34 PM, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
> 
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
> 
> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
> ---
> Changes in v2:
>  - Push down the check to layout driver level
> 
>  fs/nfsd/blocklayout.c    | 8 +++++++-
>  fs/nfsd/flexfilelayout.c | 2 +-
>  fs/nfsd/nfs4proc.c       | 3 ++-
>  fs/nfsd/pnfs.h           | 2 +-
>  4 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 0822d8a119c6..1fbc5bbde07f 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -19,7 +19,7 @@
>  
>  static __be32
>  nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> -		struct nfsd4_layoutget *args)
> +		struct nfsd4_layoutget *args, bool in_grace)
>  {
>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>  	struct super_block *sb = inode->i_sb;
> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  		goto out_layoutunavailable;
>  	}
>  
> +	if (in_grace)
> +		goto out_grace;

Taste/style nit:

I prefer that the controlling svc_rqst is passed to ->proc_layoutget,
rather than passing a boolean. The ff layout can just ignore that
new parameter, and the block layout can deref the network namespace and
do the locks_in_grace check.


> +
>  	/*
>  	 * Some clients barf on non-zero block numbers for NONE or INVALID
>  	 * layouts, so make sure to zero the whole structure.
> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  out_layoutunavailable:
>  	seg->length = 0;
>  	return nfserr_layoutunavailable;
> +out_grace:
> +	seg->length = 0;
> +	return nfserr_grace;
>  }
>  
>  static __be32
> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
> index 3ca5304440ff..274a1e9bb596 100644
> --- a/fs/nfsd/flexfilelayout.c
> +++ b/fs/nfsd/flexfilelayout.c
> @@ -21,7 +21,7 @@
>  
>  static __be32
>  nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> -		struct nfsd4_layoutget *args)
> +		struct nfsd4_layoutget *args, bool in_grace)
>  {
>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>  	u32 device_generation = 0;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d7c58aa64f06..5d1d343a4e23 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2435,6 +2435,7 @@ static __be32
>  nfsd4_layoutget(struct svc_rqst *rqstp,
>  		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
>  {
> +	struct net *net = SVC_NET(rqstp);
>  	struct nfsd4_layoutget *lgp = &u->layoutget;
>  	struct svc_fh *current_fh = &cstate->current_fh;
>  	const struct nfsd4_layout_ops *ops;
> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
>  		goto out_put_stid;
>  
>  	nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
> -				     current_fh, lgp);
> +				     current_fh, lgp, locks_in_grace(net));
>  	if (nfserr)
>  		goto out_put_stid;
>  
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index dfd411d1f363..61c2528ef077 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
>  			const struct nfsd4_getdeviceinfo *gdevp);
>  
>  	__be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
> -			struct nfsd4_layoutget *lgp);
> +			struct nfsd4_layoutget *lgp, bool in_grace);
>  	__be32 (*encode_layoutget)(struct xdr_stream *xdr,
>  			const struct nfsd4_layoutget *lgp);
>  


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
  2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
                   ` (2 preceding siblings ...)
  2025-09-04 15:54 ` Chuck Lever
@ 2025-09-04 15:57 ` Chuck Lever
  3 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2025-09-04 15:57 UTC (permalink / raw)
  To: Sergey Bashirov, Jeff Layton, NeilBrown, Olga Kornievskaia,
	Dai Ngo, Tom Talpey
  Cc: linux-nfs, linux-kernel, Konstantin Evtushenko

On 9/3/25 3:34 PM, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
> 
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
> 
> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
> ---
> Changes in v2:
>  - Push down the check to layout driver level
> 
>  fs/nfsd/blocklayout.c    | 8 +++++++-
>  fs/nfsd/flexfilelayout.c | 2 +-
>  fs/nfsd/nfs4proc.c       | 3 ++-
>  fs/nfsd/pnfs.h           | 2 +-
>  4 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 0822d8a119c6..1fbc5bbde07f 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -19,7 +19,7 @@
>  
>  static __be32
>  nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> -		struct nfsd4_layoutget *args)
> +		struct nfsd4_layoutget *args, bool in_grace)
>  {
>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>  	struct super_block *sb = inode->i_sb;
> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  		goto out_layoutunavailable;
>  	}
>  
> +	if (in_grace)
> +		goto out_grace;
> +
>  	/*
>  	 * Some clients barf on non-zero block numbers for NONE or INVALID
>  	 * layouts, so make sure to zero the whole structure.
> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  out_layoutunavailable:
>  	seg->length = 0;
>  	return nfserr_layoutunavailable;
> +out_grace:
> +	seg->length = 0;
> +	return nfserr_grace;

Also setting the seg->length to zero is probably unnecessary:

union LAYOUTGET4res switch (nfsstat4 logr_status) {
case NFS4_OK:
        LAYOUTGET4resok     logr_resok4;
case NFS4ERR_LAYOUTTRYLATER:
        bool                logr_will_signal_layout_avail;
default:
        void;
};

Is the segment length value used at all if ->proc_layoutget returns
NFS4ERR_GRACE ?


>  }
>  
>  static __be32
> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
> index 3ca5304440ff..274a1e9bb596 100644
> --- a/fs/nfsd/flexfilelayout.c
> +++ b/fs/nfsd/flexfilelayout.c
> @@ -21,7 +21,7 @@
>  
>  static __be32
>  nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> -		struct nfsd4_layoutget *args)
> +		struct nfsd4_layoutget *args, bool in_grace)
>  {
>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>  	u32 device_generation = 0;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d7c58aa64f06..5d1d343a4e23 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2435,6 +2435,7 @@ static __be32
>  nfsd4_layoutget(struct svc_rqst *rqstp,
>  		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
>  {
> +	struct net *net = SVC_NET(rqstp);
>  	struct nfsd4_layoutget *lgp = &u->layoutget;
>  	struct svc_fh *current_fh = &cstate->current_fh;
>  	const struct nfsd4_layout_ops *ops;
> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
>  		goto out_put_stid;
>  
>  	nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
> -				     current_fh, lgp);
> +				     current_fh, lgp, locks_in_grace(net));
>  	if (nfserr)
>  		goto out_put_stid;
>  
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index dfd411d1f363..61c2528ef077 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
>  			const struct nfsd4_getdeviceinfo *gdevp);
>  
>  	__be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
> -			struct nfsd4_layoutget *lgp);
> +			struct nfsd4_layoutget *lgp, bool in_grace);
>  	__be32 (*encode_layoutget)(struct xdr_stream *xdr,
>  			const struct nfsd4_layoutget *lgp);
>  


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
  2025-09-04 15:54 ` Chuck Lever
@ 2025-09-05 13:41   ` Chuck Lever
  0 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2025-09-05 13:41 UTC (permalink / raw)
  To: Sergey Bashirov, Jeff Layton, NeilBrown, Olga Kornievskaia,
	Dai Ngo, Tom Talpey
  Cc: linux-nfs, linux-kernel, Konstantin Evtushenko

On 9/4/25 11:54 AM, Chuck Lever wrote:
> On 9/3/25 3:34 PM, Sergey Bashirov wrote:
>> When the block/scsi layout server is recovering from a reboot and is in a
>> grace period, any operation that may result in deletion or reallocation of
>> block extents should not be allowed. See RFC 8881, section 18.43.3.
>>
>> If multiple clients write data to the same file, rebooting the server
>> during writing can result in the file corruption. Observed this behavior
>> while testing pNFS block volume setup.
>>
>> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
>> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
>> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
>> ---
>> Changes in v2:
>>  - Push down the check to layout driver level
>>
>>  fs/nfsd/blocklayout.c    | 8 +++++++-
>>  fs/nfsd/flexfilelayout.c | 2 +-
>>  fs/nfsd/nfs4proc.c       | 3 ++-
>>  fs/nfsd/pnfs.h           | 2 +-
>>  4 files changed, 11 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
>> index 0822d8a119c6..1fbc5bbde07f 100644
>> --- a/fs/nfsd/blocklayout.c
>> +++ b/fs/nfsd/blocklayout.c
>> @@ -19,7 +19,7 @@
>>  
>>  static __be32
>>  nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>> -		struct nfsd4_layoutget *args)
>> +		struct nfsd4_layoutget *args, bool in_grace)
>>  {
>>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>>  	struct super_block *sb = inode->i_sb;
>> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>>  		goto out_layoutunavailable;
>>  	}
>>  
>> +	if (in_grace)
>> +		goto out_grace;
> 
> Taste/style nit:
> 
> I prefer that the controlling svc_rqst is passed to ->proc_layoutget,
> rather than passing a boolean. The ff layout can just ignore that
> new parameter, and the block layout can deref the network namespace and
> do the locks_in_grace check.

Never mind. I will take v2 as is and fix this up myself.


>> +
>>  	/*
>>  	 * Some clients barf on non-zero block numbers for NONE or INVALID
>>  	 * layouts, so make sure to zero the whole structure.
>> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>>  out_layoutunavailable:
>>  	seg->length = 0;
>>  	return nfserr_layoutunavailable;
>> +out_grace:
>> +	seg->length = 0;
>> +	return nfserr_grace;
>>  }
>>  
>>  static __be32
>> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
>> index 3ca5304440ff..274a1e9bb596 100644
>> --- a/fs/nfsd/flexfilelayout.c
>> +++ b/fs/nfsd/flexfilelayout.c
>> @@ -21,7 +21,7 @@
>>  
>>  static __be32
>>  nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>> -		struct nfsd4_layoutget *args)
>> +		struct nfsd4_layoutget *args, bool in_grace)
>>  {
>>  	struct nfsd4_layout_seg *seg = &args->lg_seg;
>>  	u32 device_generation = 0;
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index d7c58aa64f06..5d1d343a4e23 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -2435,6 +2435,7 @@ static __be32
>>  nfsd4_layoutget(struct svc_rqst *rqstp,
>>  		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
>>  {
>> +	struct net *net = SVC_NET(rqstp);
>>  	struct nfsd4_layoutget *lgp = &u->layoutget;
>>  	struct svc_fh *current_fh = &cstate->current_fh;
>>  	const struct nfsd4_layout_ops *ops;
>> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
>>  		goto out_put_stid;
>>  
>>  	nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
>> -				     current_fh, lgp);
>> +				     current_fh, lgp, locks_in_grace(net));
>>  	if (nfserr)
>>  		goto out_put_stid;
>>  
>> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
>> index dfd411d1f363..61c2528ef077 100644
>> --- a/fs/nfsd/pnfs.h
>> +++ b/fs/nfsd/pnfs.h
>> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
>>  			const struct nfsd4_getdeviceinfo *gdevp);
>>  
>>  	__be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
>> -			struct nfsd4_layoutget *lgp);
>> +			struct nfsd4_layoutget *lgp, bool in_grace);
>>  	__be32 (*encode_layoutget)(struct xdr_stream *xdr,
>>  			const struct nfsd4_layoutget *lgp);
>>  
> 
> 


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-09-05 13:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
2025-09-04  5:26 ` Christoph Hellwig
2025-09-04 10:14 ` Jeff Layton
2025-09-04 15:54 ` Chuck Lever
2025-09-05 13:41   ` Chuck Lever
2025-09-04 15:57 ` Chuck Lever

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox