* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
@ 2025-09-04 5:26 ` Christoph Hellwig
2025-09-04 10:14 ` Jeff Layton
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2025-09-04 5:26 UTC (permalink / raw)
To: Sergey Bashirov
Cc: Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo,
Tom Talpey, linux-nfs, linux-kernel, Konstantin Evtushenko
On Wed, Sep 03, 2025 at 10:34:24PM +0300, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
>
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
2025-09-04 5:26 ` Christoph Hellwig
@ 2025-09-04 10:14 ` Jeff Layton
2025-09-04 15:54 ` Chuck Lever
2025-09-04 15:57 ` Chuck Lever
3 siblings, 0 replies; 6+ messages in thread
From: Jeff Layton @ 2025-09-04 10:14 UTC (permalink / raw)
To: Sergey Bashirov, Chuck Lever, NeilBrown, Olga Kornievskaia,
Dai Ngo, Tom Talpey
Cc: linux-nfs, linux-kernel, Konstantin Evtushenko
On Wed, 2025-09-03 at 22:34 +0300, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
>
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
>
> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
> ---
> Changes in v2:
> - Push down the check to layout driver level
>
> fs/nfsd/blocklayout.c | 8 +++++++-
> fs/nfsd/flexfilelayout.c | 2 +-
> fs/nfsd/nfs4proc.c | 3 ++-
> fs/nfsd/pnfs.h | 2 +-
> 4 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 0822d8a119c6..1fbc5bbde07f 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -19,7 +19,7 @@
>
> static __be32
> nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *args)
> + struct nfsd4_layoutget *args, bool in_grace)
> {
> struct nfsd4_layout_seg *seg = &args->lg_seg;
> struct super_block *sb = inode->i_sb;
> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> goto out_layoutunavailable;
> }
>
> + if (in_grace)
> + goto out_grace;
> +
> /*
> * Some clients barf on non-zero block numbers for NONE or INVALID
> * layouts, so make sure to zero the whole structure.
> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> out_layoutunavailable:
> seg->length = 0;
> return nfserr_layoutunavailable;
> +out_grace:
> + seg->length = 0;
> + return nfserr_grace;
> }
>
> static __be32
> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
> index 3ca5304440ff..274a1e9bb596 100644
> --- a/fs/nfsd/flexfilelayout.c
> +++ b/fs/nfsd/flexfilelayout.c
> @@ -21,7 +21,7 @@
>
> static __be32
> nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *args)
> + struct nfsd4_layoutget *args, bool in_grace)
> {
> struct nfsd4_layout_seg *seg = &args->lg_seg;
> u32 device_generation = 0;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d7c58aa64f06..5d1d343a4e23 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2435,6 +2435,7 @@ static __be32
> nfsd4_layoutget(struct svc_rqst *rqstp,
> struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
> {
> + struct net *net = SVC_NET(rqstp);
> struct nfsd4_layoutget *lgp = &u->layoutget;
> struct svc_fh *current_fh = &cstate->current_fh;
> const struct nfsd4_layout_ops *ops;
> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
> goto out_put_stid;
>
> nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
> - current_fh, lgp);
> + current_fh, lgp, locks_in_grace(net));
> if (nfserr)
> goto out_put_stid;
>
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index dfd411d1f363..61c2528ef077 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
> const struct nfsd4_getdeviceinfo *gdevp);
>
> __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *lgp);
> + struct nfsd4_layoutget *lgp, bool in_grace);
> __be32 (*encode_layoutget)(struct xdr_stream *xdr,
> const struct nfsd4_layoutget *lgp);
>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
2025-09-04 5:26 ` Christoph Hellwig
2025-09-04 10:14 ` Jeff Layton
@ 2025-09-04 15:54 ` Chuck Lever
2025-09-05 13:41 ` Chuck Lever
2025-09-04 15:57 ` Chuck Lever
3 siblings, 1 reply; 6+ messages in thread
From: Chuck Lever @ 2025-09-04 15:54 UTC (permalink / raw)
To: Sergey Bashirov, Jeff Layton, NeilBrown, Olga Kornievskaia,
Dai Ngo, Tom Talpey
Cc: linux-nfs, linux-kernel, Konstantin Evtushenko
On 9/3/25 3:34 PM, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
>
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
>
> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
> ---
> Changes in v2:
> - Push down the check to layout driver level
>
> fs/nfsd/blocklayout.c | 8 +++++++-
> fs/nfsd/flexfilelayout.c | 2 +-
> fs/nfsd/nfs4proc.c | 3 ++-
> fs/nfsd/pnfs.h | 2 +-
> 4 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 0822d8a119c6..1fbc5bbde07f 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -19,7 +19,7 @@
>
> static __be32
> nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *args)
> + struct nfsd4_layoutget *args, bool in_grace)
> {
> struct nfsd4_layout_seg *seg = &args->lg_seg;
> struct super_block *sb = inode->i_sb;
> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> goto out_layoutunavailable;
> }
>
> + if (in_grace)
> + goto out_grace;
Taste/style nit:
I prefer that the controlling svc_rqst is passed to ->proc_layoutget,
rather than passing a boolean. The ff layout can just ignore that
new parameter, and the block layout can deref the network namespace and
do the locks_in_grace check.
> +
> /*
> * Some clients barf on non-zero block numbers for NONE or INVALID
> * layouts, so make sure to zero the whole structure.
> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> out_layoutunavailable:
> seg->length = 0;
> return nfserr_layoutunavailable;
> +out_grace:
> + seg->length = 0;
> + return nfserr_grace;
> }
>
> static __be32
> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
> index 3ca5304440ff..274a1e9bb596 100644
> --- a/fs/nfsd/flexfilelayout.c
> +++ b/fs/nfsd/flexfilelayout.c
> @@ -21,7 +21,7 @@
>
> static __be32
> nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *args)
> + struct nfsd4_layoutget *args, bool in_grace)
> {
> struct nfsd4_layout_seg *seg = &args->lg_seg;
> u32 device_generation = 0;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d7c58aa64f06..5d1d343a4e23 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2435,6 +2435,7 @@ static __be32
> nfsd4_layoutget(struct svc_rqst *rqstp,
> struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
> {
> + struct net *net = SVC_NET(rqstp);
> struct nfsd4_layoutget *lgp = &u->layoutget;
> struct svc_fh *current_fh = &cstate->current_fh;
> const struct nfsd4_layout_ops *ops;
> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
> goto out_put_stid;
>
> nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
> - current_fh, lgp);
> + current_fh, lgp, locks_in_grace(net));
> if (nfserr)
> goto out_put_stid;
>
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index dfd411d1f363..61c2528ef077 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
> const struct nfsd4_getdeviceinfo *gdevp);
>
> __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *lgp);
> + struct nfsd4_layoutget *lgp, bool in_grace);
> __be32 (*encode_layoutget)(struct xdr_stream *xdr,
> const struct nfsd4_layoutget *lgp);
>
--
Chuck Lever
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
2025-09-04 15:54 ` Chuck Lever
@ 2025-09-05 13:41 ` Chuck Lever
0 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2025-09-05 13:41 UTC (permalink / raw)
To: Sergey Bashirov, Jeff Layton, NeilBrown, Olga Kornievskaia,
Dai Ngo, Tom Talpey
Cc: linux-nfs, linux-kernel, Konstantin Evtushenko
On 9/4/25 11:54 AM, Chuck Lever wrote:
> On 9/3/25 3:34 PM, Sergey Bashirov wrote:
>> When the block/scsi layout server is recovering from a reboot and is in a
>> grace period, any operation that may result in deletion or reallocation of
>> block extents should not be allowed. See RFC 8881, section 18.43.3.
>>
>> If multiple clients write data to the same file, rebooting the server
>> during writing can result in the file corruption. Observed this behavior
>> while testing pNFS block volume setup.
>>
>> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
>> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
>> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
>> ---
>> Changes in v2:
>> - Push down the check to layout driver level
>>
>> fs/nfsd/blocklayout.c | 8 +++++++-
>> fs/nfsd/flexfilelayout.c | 2 +-
>> fs/nfsd/nfs4proc.c | 3 ++-
>> fs/nfsd/pnfs.h | 2 +-
>> 4 files changed, 11 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
>> index 0822d8a119c6..1fbc5bbde07f 100644
>> --- a/fs/nfsd/blocklayout.c
>> +++ b/fs/nfsd/blocklayout.c
>> @@ -19,7 +19,7 @@
>>
>> static __be32
>> nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>> - struct nfsd4_layoutget *args)
>> + struct nfsd4_layoutget *args, bool in_grace)
>> {
>> struct nfsd4_layout_seg *seg = &args->lg_seg;
>> struct super_block *sb = inode->i_sb;
>> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>> goto out_layoutunavailable;
>> }
>>
>> + if (in_grace)
>> + goto out_grace;
>
> Taste/style nit:
>
> I prefer that the controlling svc_rqst is passed to ->proc_layoutget,
> rather than passing a boolean. The ff layout can just ignore that
> new parameter, and the block layout can deref the network namespace and
> do the locks_in_grace check.
Never mind. I will take v2 as is and fix this up myself.
>> +
>> /*
>> * Some clients barf on non-zero block numbers for NONE or INVALID
>> * layouts, so make sure to zero the whole structure.
>> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>> out_layoutunavailable:
>> seg->length = 0;
>> return nfserr_layoutunavailable;
>> +out_grace:
>> + seg->length = 0;
>> + return nfserr_grace;
>> }
>>
>> static __be32
>> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
>> index 3ca5304440ff..274a1e9bb596 100644
>> --- a/fs/nfsd/flexfilelayout.c
>> +++ b/fs/nfsd/flexfilelayout.c
>> @@ -21,7 +21,7 @@
>>
>> static __be32
>> nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>> - struct nfsd4_layoutget *args)
>> + struct nfsd4_layoutget *args, bool in_grace)
>> {
>> struct nfsd4_layout_seg *seg = &args->lg_seg;
>> u32 device_generation = 0;
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index d7c58aa64f06..5d1d343a4e23 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -2435,6 +2435,7 @@ static __be32
>> nfsd4_layoutget(struct svc_rqst *rqstp,
>> struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
>> {
>> + struct net *net = SVC_NET(rqstp);
>> struct nfsd4_layoutget *lgp = &u->layoutget;
>> struct svc_fh *current_fh = &cstate->current_fh;
>> const struct nfsd4_layout_ops *ops;
>> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
>> goto out_put_stid;
>>
>> nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
>> - current_fh, lgp);
>> + current_fh, lgp, locks_in_grace(net));
>> if (nfserr)
>> goto out_put_stid;
>>
>> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
>> index dfd411d1f363..61c2528ef077 100644
>> --- a/fs/nfsd/pnfs.h
>> +++ b/fs/nfsd/pnfs.h
>> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
>> const struct nfsd4_getdeviceinfo *gdevp);
>>
>> __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
>> - struct nfsd4_layoutget *lgp);
>> + struct nfsd4_layoutget *lgp, bool in_grace);
>> __be32 (*encode_layoutget)(struct xdr_stream *xdr,
>> const struct nfsd4_layoutget *lgp);
>>
>
>
--
Chuck Lever
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2] NFSD: Disallow layoutget during grace period
2025-09-03 19:34 [PATCH v2] NFSD: Disallow layoutget during grace period Sergey Bashirov
` (2 preceding siblings ...)
2025-09-04 15:54 ` Chuck Lever
@ 2025-09-04 15:57 ` Chuck Lever
3 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2025-09-04 15:57 UTC (permalink / raw)
To: Sergey Bashirov, Jeff Layton, NeilBrown, Olga Kornievskaia,
Dai Ngo, Tom Talpey
Cc: linux-nfs, linux-kernel, Konstantin Evtushenko
On 9/3/25 3:34 PM, Sergey Bashirov wrote:
> When the block/scsi layout server is recovering from a reboot and is in a
> grace period, any operation that may result in deletion or reallocation of
> block extents should not be allowed. See RFC 8881, section 18.43.3.
>
> If multiple clients write data to the same file, rebooting the server
> during writing can result in the file corruption. Observed this behavior
> while testing pNFS block volume setup.
>
> Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
> ---
> Changes in v2:
> - Push down the check to layout driver level
>
> fs/nfsd/blocklayout.c | 8 +++++++-
> fs/nfsd/flexfilelayout.c | 2 +-
> fs/nfsd/nfs4proc.c | 3 ++-
> fs/nfsd/pnfs.h | 2 +-
> 4 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 0822d8a119c6..1fbc5bbde07f 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -19,7 +19,7 @@
>
> static __be32
> nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *args)
> + struct nfsd4_layoutget *args, bool in_grace)
> {
> struct nfsd4_layout_seg *seg = &args->lg_seg;
> struct super_block *sb = inode->i_sb;
> @@ -34,6 +34,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> goto out_layoutunavailable;
> }
>
> + if (in_grace)
> + goto out_grace;
> +
> /*
> * Some clients barf on non-zero block numbers for NONE or INVALID
> * layouts, so make sure to zero the whole structure.
> @@ -111,6 +114,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> out_layoutunavailable:
> seg->length = 0;
> return nfserr_layoutunavailable;
> +out_grace:
> + seg->length = 0;
> + return nfserr_grace;
Also setting the seg->length to zero is probably unnecessary:
union LAYOUTGET4res switch (nfsstat4 logr_status) {
case NFS4_OK:
LAYOUTGET4resok logr_resok4;
case NFS4ERR_LAYOUTTRYLATER:
bool logr_will_signal_layout_avail;
default:
void;
};
Is the segment length value used at all if ->proc_layoutget returns
NFS4ERR_GRACE ?
> }
>
> static __be32
> diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
> index 3ca5304440ff..274a1e9bb596 100644
> --- a/fs/nfsd/flexfilelayout.c
> +++ b/fs/nfsd/flexfilelayout.c
> @@ -21,7 +21,7 @@
>
> static __be32
> nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *args)
> + struct nfsd4_layoutget *args, bool in_grace)
> {
> struct nfsd4_layout_seg *seg = &args->lg_seg;
> u32 device_generation = 0;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d7c58aa64f06..5d1d343a4e23 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2435,6 +2435,7 @@ static __be32
> nfsd4_layoutget(struct svc_rqst *rqstp,
> struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
> {
> + struct net *net = SVC_NET(rqstp);
> struct nfsd4_layoutget *lgp = &u->layoutget;
> struct svc_fh *current_fh = &cstate->current_fh;
> const struct nfsd4_layout_ops *ops;
> @@ -2498,7 +2499,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
> goto out_put_stid;
>
> nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
> - current_fh, lgp);
> + current_fh, lgp, locks_in_grace(net));
> if (nfserr)
> goto out_put_stid;
>
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index dfd411d1f363..61c2528ef077 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -30,7 +30,7 @@ struct nfsd4_layout_ops {
> const struct nfsd4_getdeviceinfo *gdevp);
>
> __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
> - struct nfsd4_layoutget *lgp);
> + struct nfsd4_layoutget *lgp, bool in_grace);
> __be32 (*encode_layoutget)(struct xdr_stream *xdr,
> const struct nfsd4_layoutget *lgp);
>
--
Chuck Lever
^ permalink raw reply [flat|nested] 6+ messages in thread