linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] fuse: fix race in fuse_notify_store()
@ 2025-01-29 18:44 Luis Henriques
  2025-01-29 19:30 ` Bernd Schubert
  0 siblings, 1 reply; 3+ messages in thread
From: Luis Henriques @ 2025-01-29 18:44 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel, Luis Henriques, Teng Qin

Userspace filesystems can push data for a specific inode without it being
explicitly requested.  This can be accomplished by using NOTIFY_STORE.  However,
this may race against another process performing different operations on the
same inode.

If, for example, there is a process reading from it, it may happen that it will
block waiting for data to be available (locking the folio), while the FUSE
server will also block trying to lock the same folio to update it with the inode
data.

The easiest solution, as suggested by Miklos, is to allow the userspace
filesystem to skip locked folios.

Link: https://lore.kernel.org/CH2PR14MB41040692ABC50334F500789ED6C89@CH2PR14MB4104.namprd14.prod.outlook.com
Reported-by: Teng Qin <tqin@jumptrading.com>
Originally-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Luis Henriques <luis@igalia.com>
---
Hi!

Instead of sending the usual 'ping' to the original thread, I've decided to
resend the patch as an RFC.

As I mentioned before, this is an attempt to forward port the original patch
from Miklos to the folios world.  Also, the same question:

if we fail to get a folio and need to skip it, 'this_num' needs to be
updated; but I'm not 100% sure if it's OK to use PAGE_SIZE in that case.

Obviously, libfuse will need to support this new NOWAIT flag (I can look at
that, of course).  But I was wondering if the NOTIFY_STORE behaviour
shouldn't *always* skip locked folios instead of doing it only when the flag
is set.

(By the way, I'm not sure if I'm using the 'Originally-by:' tag correctly;
I just want to make sure the authorship is preserved.  Please let me know if
that's not correct.)

Cheers,
-- 
Luis

 fs/fuse/dev.c             | 29 ++++++++++++++++++++++-------
 include/uapi/linux/fuse.h |  8 +++++++-
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 27ccae63495d..9a0cd88a9bb9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1630,6 +1630,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 	unsigned int num;
 	loff_t file_size;
 	loff_t end;
+	int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
 
 	err = -EINVAL;
 	if (size < sizeof(outarg))
@@ -1645,6 +1646,9 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 
 	nodeid = outarg.nodeid;
 
+	if (outarg.flags & FUSE_NOTIFY_STORE_NOWAIT)
+		fgp_flags |= FGP_NOWAIT;
+
 	down_read(&fc->killsb);
 
 	err = -ENOENT;
@@ -1668,14 +1672,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 		struct page *page;
 		unsigned int this_num;
 
-		folio = filemap_grab_folio(mapping, index);
-		err = PTR_ERR(folio);
-		if (IS_ERR(folio))
-			goto out_iput;
+		folio = __filemap_get_folio(mapping, index, fgp_flags,
+					    mapping_gfp_mask(mapping));
+		err = PTR_ERR_OR_ZERO(folio);
+		if (err) {
+			if (!(outarg.flags & FUSE_NOTIFY_STORE_NOWAIT))
+				goto out_iput;
+			page = NULL;
+			/* XXX */
+			this_num = min_t(unsigned int, num, PAGE_SIZE - offset);
+		} else {
+			page = &folio->page;
+			this_num = min_t(unsigned int, num,
+					 folio_size(folio) - offset);
+		}
 
-		page = &folio->page;
-		this_num = min_t(unsigned, num, folio_size(folio) - offset);
 		err = fuse_copy_page(cs, &page, offset, this_num, 0);
+		if (!page)
+			goto skip;
+
 		if (!folio_test_uptodate(folio) && !err && offset == 0 &&
 		    (this_num == folio_size(folio) || file_size == end)) {
 			folio_zero_segment(folio, this_num, folio_size(folio));
@@ -1683,7 +1698,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 		}
 		folio_unlock(folio);
 		folio_put(folio);
-
+skip:
 		if (err)
 			goto out_iput;
 
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index e9e78292d107..59725f89340e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -576,6 +576,12 @@ struct fuse_file_lock {
  */
 #define FUSE_EXPIRE_ONLY		(1 << 0)
 
+/**
+ * notify_store flags
+ * FUSE_NOTIFY_STORE_NOWAIT: skip locked pages
+ */
+#define FUSE_NOTIFY_STORE_NOWAIT	(1 << 0)
+
 /**
  * extension type
  * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx
@@ -1075,7 +1081,7 @@ struct fuse_notify_store_out {
 	uint64_t	nodeid;
 	uint64_t	offset;
 	uint32_t	size;
-	uint32_t	padding;
+	uint32_t	flags;
 };
 
 struct fuse_notify_retrieve_out {

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH] fuse: fix race in fuse_notify_store()
  2025-01-29 18:44 [RFC PATCH] fuse: fix race in fuse_notify_store() Luis Henriques
@ 2025-01-29 19:30 ` Bernd Schubert
  2025-01-29 21:26   ` Luis Henriques
  0 siblings, 1 reply; 3+ messages in thread
From: Bernd Schubert @ 2025-01-29 19:30 UTC (permalink / raw)
  To: Luis Henriques, Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel, Teng Qin

Hi Luis,

On 1/29/25 19:44, Luis Henriques wrote:
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 27ccae63495d..9a0cd88a9bb9 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -1630,6 +1630,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>  	unsigned int num;
>  	loff_t file_size;
>  	loff_t end;
> +	int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
>  
>  	err = -EINVAL;
>  	if (size < sizeof(outarg))
> @@ -1645,6 +1646,9 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>  
>  	nodeid = outarg.nodeid;
>  
> +	if (outarg.flags & FUSE_NOTIFY_STORE_NOWAIT)
> +		fgp_flags |= FGP_NOWAIT;
> +
>  	down_read(&fc->killsb);
>  
>  	err = -ENOENT;
> @@ -1668,14 +1672,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>  		struct page *page;
>  		unsigned int this_num;
>  
> -		folio = filemap_grab_folio(mapping, index);
> -		err = PTR_ERR(folio);
> -		if (IS_ERR(folio))
> -			goto out_iput;
> +		folio = __filemap_get_folio(mapping, index, fgp_flags,
> +					    mapping_gfp_mask(mapping));
> +		err = PTR_ERR_OR_ZERO(folio);
> +		if (err) {
> +			if (!(outarg.flags & FUSE_NOTIFY_STORE_NOWAIT))
> +				goto out_iput;
> +			page = NULL;
> +			/* XXX */

What is the XXX for? 
Also, I think you want to go to "skip" only on -EAGAIN? And if so, need
to unset err? 


> +			this_num = min_t(unsigned int, num, PAGE_SIZE - offset);
> +		} else {
> +			page = &folio->page;
> +			this_num = min_t(unsigned int, num,
> +					 folio_size(folio) - offset);
> +		}
>  
> -		page = &folio->page;
> -		this_num = min_t(unsigned, num, folio_size(folio) - offset);
>  		err = fuse_copy_page(cs, &page, offset, this_num, 0);
> +		if (!page)
> +			goto skip;
> +
>  		if (!folio_test_uptodate(folio) && !err && offset == 0 &&
>  		    (this_num == folio_size(folio) || file_size == end)) {
>  			folio_zero_segment(folio, this_num, folio_size(folio));
> @@ -1683,7 +1698,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>  		}
>  		folio_unlock(folio);
>  		folio_put(folio);
> -
> +skip:
>  		if (err)
>  			goto out_iput;
>  
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index e9e78292d107..59725f89340e 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -576,6 +576,12 @@ struct fuse_file_lock {
>   */
>  #define FUSE_EXPIRE_ONLY		(1 << 0)
>  
> +/**
> + * notify_store flags
> + * FUSE_NOTIFY_STORE_NOWAIT: skip locked pages
> + */
> +#define FUSE_NOTIFY_STORE_NOWAIT	(1 << 0)
> +
>  /**
>   * extension type
>   * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx
> @@ -1075,7 +1081,7 @@ struct fuse_notify_store_out {
>  	uint64_t	nodeid;
>  	uint64_t	offset;
>  	uint32_t	size;
> -	uint32_t	padding;
> +	uint32_t	flags;
>  };
>  
>  struct fuse_notify_retrieve_out {
> 

Thanks,
Bernd


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH] fuse: fix race in fuse_notify_store()
  2025-01-29 19:30 ` Bernd Schubert
@ 2025-01-29 21:26   ` Luis Henriques
  0 siblings, 0 replies; 3+ messages in thread
From: Luis Henriques @ 2025-01-29 21:26 UTC (permalink / raw)
  To: Bernd Schubert; +Cc: Miklos Szeredi, linux-fsdevel, linux-kernel, Teng Qin

Hi Bernd,

On Wed, Jan 29 2025, Bernd Schubert wrote:

> Hi Luis,
>
> On 1/29/25 19:44, Luis Henriques wrote:
>> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
>> index 27ccae63495d..9a0cd88a9bb9 100644
>> --- a/fs/fuse/dev.c
>> +++ b/fs/fuse/dev.c
>> @@ -1630,6 +1630,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>>  	unsigned int num;
>>  	loff_t file_size;
>>  	loff_t end;
>> +	int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
>>  
>>  	err = -EINVAL;
>>  	if (size < sizeof(outarg))
>> @@ -1645,6 +1646,9 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>>  
>>  	nodeid = outarg.nodeid;
>>  
>> +	if (outarg.flags & FUSE_NOTIFY_STORE_NOWAIT)
>> +		fgp_flags |= FGP_NOWAIT;
>> +
>>  	down_read(&fc->killsb);
>>  
>>  	err = -ENOENT;
>> @@ -1668,14 +1672,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>>  		struct page *page;
>>  		unsigned int this_num;
>>  
>> -		folio = filemap_grab_folio(mapping, index);
>> -		err = PTR_ERR(folio);
>> -		if (IS_ERR(folio))
>> -			goto out_iput;
>> +		folio = __filemap_get_folio(mapping, index, fgp_flags,
>> +					    mapping_gfp_mask(mapping));
>> +		err = PTR_ERR_OR_ZERO(folio);
>> +		if (err) {
>> +			if (!(outarg.flags & FUSE_NOTIFY_STORE_NOWAIT))
>> +				goto out_iput;
>> +			page = NULL;
>> +			/* XXX */
>
> What is the XXX for? 

Right, I guess I should have added extra info there.  I just wanted to
point out that I'm not sure about the value to use for the min_t():

  - If we have a folio, I believe it's clear that we should use
    (offset will always be '0' except for the first iteration);
  - If we don't have a folio, I'm using PAGE_SIZE

I *think* that's the correct value.  But I may be wrong.

> Also, I think you want to go to "skip" only on -EAGAIN? And if so, need
> to unset err? 

Ah, good point!  Thanks!  So, something like this should fix it:

		if (!(outarg.flags & FUSE_NOTIFY_STORE_NOWAIT) || err != -EAGAIN)
			goto out_iput;

I'll wait until tomorrow before sending v2 because I was hoping to also
have some feedback on the idea of completely dropping the use of the
NOWAIT flag.  Not sure you have some opinion about it, or maybe Miklos, as
this patch was originally from him.

Cheers,
-- 
Luís

>> +			this_num = min_t(unsigned int, num, PAGE_SIZE - offset);
>> +		} else {
>> +			page = &folio->page;
>> +			this_num = min_t(unsigned int, num,
>> +					 folio_size(folio) - offset);
>> +		}
>>  
>> -		page = &folio->page;
>> -		this_num = min_t(unsigned, num, folio_size(folio) - offset);
>>  		err = fuse_copy_page(cs, &page, offset, this_num, 0);
>> +		if (!page)
>> +			goto skip;
>> +
>>  		if (!folio_test_uptodate(folio) && !err && offset == 0 &&
>>  		    (this_num == folio_size(folio) || file_size == end)) {
>>  			folio_zero_segment(folio, this_num, folio_size(folio));
>> @@ -1683,7 +1698,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>>  		}
>>  		folio_unlock(folio);
>>  		folio_put(folio);
>> -
>> +skip:
>>  		if (err)
>>  			goto out_iput;
>>  
>> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
>> index e9e78292d107..59725f89340e 100644
>> --- a/include/uapi/linux/fuse.h
>> +++ b/include/uapi/linux/fuse.h
>> @@ -576,6 +576,12 @@ struct fuse_file_lock {
>>   */
>>  #define FUSE_EXPIRE_ONLY		(1 << 0)
>>  
>> +/**
>> + * notify_store flags
>> + * FUSE_NOTIFY_STORE_NOWAIT: skip locked pages
>> + */
>> +#define FUSE_NOTIFY_STORE_NOWAIT	(1 << 0)
>> +
>>  /**
>>   * extension type
>>   * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx
>> @@ -1075,7 +1081,7 @@ struct fuse_notify_store_out {
>>  	uint64_t	nodeid;
>>  	uint64_t	offset;
>>  	uint32_t	size;
>> -	uint32_t	padding;
>> +	uint32_t	flags;
>>  };
>>  
>>  struct fuse_notify_retrieve_out {
>> 
>
> Thanks,
> Bernd
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-01-29 21:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-29 18:44 [RFC PATCH] fuse: fix race in fuse_notify_store() Luis Henriques
2025-01-29 19:30 ` Bernd Schubert
2025-01-29 21:26   ` Luis Henriques

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).