linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag
@ 2024-07-29  9:15 Max Kellermann
  2024-07-29 12:56 ` Jeff Layton
  2024-07-30 16:01 ` [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" David Howells
  0 siblings, 2 replies; 17+ messages in thread
From: Max Kellermann @ 2024-07-29  9:15 UTC (permalink / raw)
  To: dhowells, jlayton
  Cc: willy, linux-cachefs, linux-fsdevel, ceph-devel, linux-kernel,
	stable, Max Kellermann

This fixes a crash bug caused by commit ae678317b95e ("netfs: Remove
deprecated use of PG_private_2 as a second writeback flag") by
removing a leftover folio_end_private_2() call after all calls to
folio_start_private_2() had been removed by the commit.

By calling folio_end_private_2() without folio_start_private_2(), the
folio refcounter breaks and causes trouble like RCU stalls and general
protection faults.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
Link: https://lore.kernel.org/ceph-devel/CAKPOu+_DA8XiMAA2ApMj7Pyshve_YWknw8Hdt1=zCy9Y87R1qw@mail.gmail.com/
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/addr.c          |  2 +-
 fs/netfs/fscache_io.c   | 29 +----------------------------
 include/linux/fscache.h | 30 ++++--------------------------
 3 files changed, 6 insertions(+), 55 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c16bc5250ef..485cbd1730d1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -512,7 +512,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
 	struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
 
 	fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
-			       ceph_fscache_write_terminated, inode, true, caching);
+			       ceph_fscache_write_terminated, inode, caching);
 }
 #else
 static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..0d8f3f646598 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -166,30 +166,10 @@ struct fscache_write_request {
 	loff_t			start;
 	size_t			len;
 	bool			set_bits;
-	bool			using_pgpriv2;
 	netfs_io_terminated_t	term_func;
 	void			*term_func_priv;
 };
 
-void __fscache_clear_page_bits(struct address_space *mapping,
-			       loff_t start, size_t len)
-{
-	pgoff_t first = start / PAGE_SIZE;
-	pgoff_t last = (start + len - 1) / PAGE_SIZE;
-	struct page *page;
-
-	if (len) {
-		XA_STATE(xas, &mapping->i_pages, first);
-
-		rcu_read_lock();
-		xas_for_each(&xas, page, last) {
-			folio_end_private_2(page_folio(page));
-		}
-		rcu_read_unlock();
-	}
-}
-EXPORT_SYMBOL(__fscache_clear_page_bits);
-
 /*
  * Deal with the completion of writing the data to the cache.
  */
@@ -198,10 +178,6 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
 {
 	struct fscache_write_request *wreq = priv;
 
-	if (wreq->using_pgpriv2)
-		fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
-					wreq->set_bits);
-
 	if (wreq->term_func)
 		wreq->term_func(wreq->term_func_priv, transferred_or_error,
 				was_async);
@@ -214,7 +190,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
 			      loff_t start, size_t len, loff_t i_size,
 			      netfs_io_terminated_t term_func,
 			      void *term_func_priv,
-			      bool using_pgpriv2, bool cond)
+			      bool cond)
 {
 	struct fscache_write_request *wreq;
 	struct netfs_cache_resources *cres;
@@ -232,7 +208,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
 	wreq->mapping		= mapping;
 	wreq->start		= start;
 	wreq->len		= len;
-	wreq->using_pgpriv2	= using_pgpriv2;
 	wreq->set_bits		= cond;
 	wreq->term_func		= term_func;
 	wreq->term_func_priv	= term_func_priv;
@@ -260,8 +235,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
 abandon_free:
 	kfree(wreq);
 abandon:
-	if (using_pgpriv2)
-		fscache_clear_page_bits(mapping, start, len, cond);
 	if (term_func)
 		term_func(term_func_priv, ret, false);
 }
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9de27643607f..f8c52bddaa15 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -177,8 +177,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
 			      loff_t start, size_t len, loff_t i_size,
 			      netfs_io_terminated_t term_func,
 			      void *term_func_priv,
-			      bool using_pgpriv2, bool cond);
-extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t);
+			      bool cond);
 
 /**
  * fscache_acquire_volume - Register a volume as desiring caching services
@@ -573,24 +572,6 @@ int fscache_write(struct netfs_cache_resources *cres,
 	return ops->write(cres, start_pos, iter, term_func, term_func_priv);
 }
 
-/**
- * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages
- * @mapping: The netfs inode to use as the source
- * @start: The start position in @mapping
- * @len: The amount of data to unlock
- * @caching: If PG_fscache has been set
- *
- * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's
- * waiting.
- */
-static inline void fscache_clear_page_bits(struct address_space *mapping,
-					   loff_t start, size_t len,
-					   bool caching)
-{
-	if (caching)
-		__fscache_clear_page_bits(mapping, start, len);
-}
-
 /**
  * fscache_write_to_cache - Save a write to the cache and clear PG_fscache
  * @cookie: The cookie representing the cache object
@@ -600,7 +581,6 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
  * @i_size: The new size of the inode
  * @term_func: The function to call upon completion
  * @term_func_priv: The private data for @term_func
- * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write
  * @caching: If we actually want to do the caching
  *
  * Helper function for a netfs to write dirty data from an inode into the cache
@@ -612,21 +592,19 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
  * marked with PG_fscache.
  *
  * If given, @term_func will be called upon completion and supplied with
- * @term_func_priv.  Note that if @using_pgpriv2 is set, the PG_private_2 flags
- * will have been cleared by this point, so the netfs must retain its own pin
- * on the mapping.
+ * @term_func_priv.
  */
 static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
 					  struct address_space *mapping,
 					  loff_t start, size_t len, loff_t i_size,
 					  netfs_io_terminated_t term_func,
 					  void *term_func_priv,
-					  bool using_pgpriv2, bool caching)
+					  bool caching)
 {
 	if (caching)
 		__fscache_write_to_cache(cookie, mapping, start, len, i_size,
 					 term_func, term_func_priv,
-					 using_pgpriv2, caching);
+					 caching);
 	else if (term_func)
 		term_func(term_func_priv, -ENOBUFS, false);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag
  2024-07-29  9:15 [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag Max Kellermann
@ 2024-07-29 12:56 ` Jeff Layton
  2024-07-29 13:04   ` Max Kellermann
  2024-07-29 15:35   ` Max Kellermann
  2024-07-30 16:01 ` [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" David Howells
  1 sibling, 2 replies; 17+ messages in thread
From: Jeff Layton @ 2024-07-29 12:56 UTC (permalink / raw)
  To: Max Kellermann, dhowells
  Cc: willy, linux-cachefs, linux-fsdevel, ceph-devel, linux-kernel,
	stable, xiubli, Ilya Dryomov

On Mon, 2024-07-29 at 11:15 +0200, Max Kellermann wrote:
> This fixes a crash bug caused by commit ae678317b95e ("netfs: Remove
> deprecated use of PG_private_2 as a second writeback flag") by
> removing a leftover folio_end_private_2() call after all calls to
> folio_start_private_2() had been removed by the commit.
> 
> By calling folio_end_private_2() without folio_start_private_2(), the
> folio refcounter breaks and causes trouble like RCU stalls and general
> protection faults.
> 
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
> Link: https://lore.kernel.org/ceph-devel/CAKPOu+_DA8XiMAA2ApMj7Pyshve_YWknw8Hdt1=zCy9Y87R1qw@mail.gmail.com/
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
>  fs/ceph/addr.c          |  2 +-
>  fs/netfs/fscache_io.c   | 29 +----------------------------
>  include/linux/fscache.h | 30 ++++--------------------------
>  3 files changed, 6 insertions(+), 55 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 8c16bc5250ef..485cbd1730d1 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -512,7 +512,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
>  	struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
>  
>  	fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
> -			       ceph_fscache_write_terminated, inode, true, caching);
> +			       ceph_fscache_write_terminated, inode, caching);
>  }
>  #else
>  static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
> diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
> index 38637e5c9b57..0d8f3f646598 100644
> --- a/fs/netfs/fscache_io.c
> +++ b/fs/netfs/fscache_io.c
> @@ -166,30 +166,10 @@ struct fscache_write_request {
>  	loff_t			start;
>  	size_t			len;
>  	bool			set_bits;
> -	bool			using_pgpriv2;
>  	netfs_io_terminated_t	term_func;
>  	void			*term_func_priv;
>  };
>  
> -void __fscache_clear_page_bits(struct address_space *mapping,
> -			       loff_t start, size_t len)
> -{
> -	pgoff_t first = start / PAGE_SIZE;
> -	pgoff_t last = (start + len - 1) / PAGE_SIZE;
> -	struct page *page;
> -
> -	if (len) {
> -		XA_STATE(xas, &mapping->i_pages, first);
> -
> -		rcu_read_lock();
> -		xas_for_each(&xas, page, last) {
> -			folio_end_private_2(page_folio(page));
> -		}
> -		rcu_read_unlock();
> -	}
> -}
> -EXPORT_SYMBOL(__fscache_clear_page_bits);
> -
>  /*
>   * Deal with the completion of writing the data to the cache.
>   */
> @@ -198,10 +178,6 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
>  {
>  	struct fscache_write_request *wreq = priv;
>  
> -	if (wreq->using_pgpriv2)
> -		fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
> -					wreq->set_bits);
> -
>  	if (wreq->term_func)
>  		wreq->term_func(wreq->term_func_priv, transferred_or_error,
>  				was_async);
> @@ -214,7 +190,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  			      loff_t start, size_t len, loff_t i_size,
>  			      netfs_io_terminated_t term_func,
>  			      void *term_func_priv,
> -			      bool using_pgpriv2, bool cond)
> +			      bool cond)
>  {
>  	struct fscache_write_request *wreq;
>  	struct netfs_cache_resources *cres;
> @@ -232,7 +208,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  	wreq->mapping		= mapping;
>  	wreq->start		= start;
>  	wreq->len		= len;
> -	wreq->using_pgpriv2	= using_pgpriv2;
>  	wreq->set_bits		= cond;
>  	wreq->term_func		= term_func;
>  	wreq->term_func_priv	= term_func_priv;
> @@ -260,8 +235,6 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  abandon_free:
>  	kfree(wreq);
>  abandon:
> -	if (using_pgpriv2)
> -		fscache_clear_page_bits(mapping, start, len, cond);
>  	if (term_func)
>  		term_func(term_func_priv, ret, false);
>  }
> diff --git a/include/linux/fscache.h b/include/linux/fscache.h
> index 9de27643607f..f8c52bddaa15 100644
> --- a/include/linux/fscache.h
> +++ b/include/linux/fscache.h
> @@ -177,8 +177,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
>  			      loff_t start, size_t len, loff_t i_size,
>  			      netfs_io_terminated_t term_func,
>  			      void *term_func_priv,
> -			      bool using_pgpriv2, bool cond);
> -extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t);
> +			      bool cond);
>  
>  /**
>   * fscache_acquire_volume - Register a volume as desiring caching services
> @@ -573,24 +572,6 @@ int fscache_write(struct netfs_cache_resources *cres,
>  	return ops->write(cres, start_pos, iter, term_func, term_func_priv);
>  }
>  
> -/**
> - * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages
> - * @mapping: The netfs inode to use as the source
> - * @start: The start position in @mapping
> - * @len: The amount of data to unlock
> - * @caching: If PG_fscache has been set
> - *
> - * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's
> - * waiting.
> - */
> -static inline void fscache_clear_page_bits(struct address_space *mapping,
> -					   loff_t start, size_t len,
> -					   bool caching)
> -{
> -	if (caching)
> -		__fscache_clear_page_bits(mapping, start, len);
> -}
> -
>  /**
>   * fscache_write_to_cache - Save a write to the cache and clear PG_fscache
>   * @cookie: The cookie representing the cache object
> @@ -600,7 +581,6 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
>   * @i_size: The new size of the inode
>   * @term_func: The function to call upon completion
>   * @term_func_priv: The private data for @term_func
> - * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write
>   * @caching: If we actually want to do the caching
>   *
>   * Helper function for a netfs to write dirty data from an inode into the cache
> @@ -612,21 +592,19 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
>   * marked with PG_fscache.
>   *
>   * If given, @term_func will be called upon completion and supplied with
> - * @term_func_priv.  Note that if @using_pgpriv2 is set, the PG_private_2 flags
> - * will have been cleared by this point, so the netfs must retain its own pin
> - * on the mapping.
> + * @term_func_priv.
>   */
>  static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
>  					  struct address_space *mapping,
>  					  loff_t start, size_t len, loff_t i_size,
>  					  netfs_io_terminated_t term_func,
>  					  void *term_func_priv,
> -					  bool using_pgpriv2, bool caching)
> +					  bool caching)
>  {
>  	if (caching)
>  		__fscache_write_to_cache(cookie, mapping, start, len, i_size,
>  					 term_func, term_func_priv,
> -					 using_pgpriv2, caching);
> +					 caching);
>  	else if (term_func)
>  		term_func(term_func_priv, -ENOBUFS, false);
>  


(cc'ing the cephfs maintainers too)

Nice work! I'd prefer this patch over the first one. It looks like the
Fixes: commit went into v6.10. Did it go into earlier kernels too?

If so, what might be best is to take both of your patches. Have the
simple one first that just flips the flag, and mark that one for
stable. Then we can add the second patch on top to remove all of this
stuff for mainline.

Either way, you can add this to both patches:

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag
  2024-07-29 12:56 ` Jeff Layton
@ 2024-07-29 13:04   ` Max Kellermann
  2024-07-29 15:35   ` Max Kellermann
  1 sibling, 0 replies; 17+ messages in thread
From: Max Kellermann @ 2024-07-29 13:04 UTC (permalink / raw)
  To: Jeff Layton
  Cc: dhowells, willy, linux-cachefs, linux-fsdevel, ceph-devel,
	linux-kernel, stable, xiubli, Ilya Dryomov

On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> wrote:
> Nice work! I'd prefer this patch over the first one. It looks like the
> Fixes: commit went into v6.10. Did it go into earlier kernels too?

No, it's 6.10 only.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag
  2024-07-29 12:56 ` Jeff Layton
  2024-07-29 13:04   ` Max Kellermann
@ 2024-07-29 15:35   ` Max Kellermann
  2024-07-29 15:51     ` Jeff Layton
  1 sibling, 1 reply; 17+ messages in thread
From: Max Kellermann @ 2024-07-29 15:35 UTC (permalink / raw)
  To: Jeff Layton
  Cc: dhowells, willy, linux-cachefs, linux-fsdevel, ceph-devel,
	linux-kernel, stable, xiubli, Ilya Dryomov

On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org> wrote:
> Either way, you can add this to both patches:
>
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Stop the merge :-)

I just found that my patch introduces another lockup; copy_file_range
locks up this way:

 [<0>] folio_wait_private_2+0xd9/0x140
 [<0>] ceph_write_begin+0x56/0x90
 [<0>] generic_perform_write+0xc0/0x210
 [<0>] ceph_write_iter+0x4e2/0x650
 [<0>] iter_file_splice_write+0x30d/0x550
 [<0>] splice_file_range_actor+0x2c/0x40
 [<0>] splice_direct_to_actor+0xee/0x270
 [<0>] splice_file_range+0x80/0xc0
 [<0>] ceph_copy_file_range+0xbb/0x5b0
 [<0>] vfs_copy_file_range+0x33e/0x5d0
 [<0>] __x64_sys_copy_file_range+0xf7/0x200
 [<0>] do_syscall_64+0x64/0x100
 [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e

Turns out that there are still private_2 users left in both fs/ceph
and fs/netfs. My patches fix one problem, but cause another problem.
Too bad!

This leaves me confused again: how shall I fix this? Can all
folio_wait_private_2() calls simply be removed?
This looks like some refactoring gone wrong, and some parts don't make
sense (like netfs and ceph claim ownership of the folio_private
pointer). I could try to fix the mess, but I need to know how this is
meant to be. David, can you enlighten me?

Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag
  2024-07-29 15:35   ` Max Kellermann
@ 2024-07-29 15:51     ` Jeff Layton
  0 siblings, 0 replies; 17+ messages in thread
From: Jeff Layton @ 2024-07-29 15:51 UTC (permalink / raw)
  To: Max Kellermann, dhowells
  Cc: willy, linux-cachefs, linux-fsdevel, ceph-devel, linux-kernel,
	stable, xiubli, Ilya Dryomov

On Mon, 2024-07-29 at 17:35 +0200, Max Kellermann wrote:
> On Mon, Jul 29, 2024 at 2:56 PM Jeff Layton <jlayton@kernel.org>
> wrote:
> > Either way, you can add this to both patches:
> > 
> > Reviewed-by: Jeff Layton <jlayton@kernel.org>
> 
> Stop the merge :-)
> 
> I just found that my patch introduces another lockup; copy_file_range
> locks up this way:
> 
>  [<0>] folio_wait_private_2+0xd9/0x140
>  [<0>] ceph_write_begin+0x56/0x90
>  [<0>] generic_perform_write+0xc0/0x210
>  [<0>] ceph_write_iter+0x4e2/0x650
>  [<0>] iter_file_splice_write+0x30d/0x550
>  [<0>] splice_file_range_actor+0x2c/0x40
>  [<0>] splice_direct_to_actor+0xee/0x270
>  [<0>] splice_file_range+0x80/0xc0
>  [<0>] ceph_copy_file_range+0xbb/0x5b0
>  [<0>] vfs_copy_file_range+0x33e/0x5d0
>  [<0>] __x64_sys_copy_file_range+0xf7/0x200
>  [<0>] do_syscall_64+0x64/0x100
>  [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Turns out that there are still private_2 users left in both fs/ceph
> and fs/netfs. My patches fix one problem, but cause another problem.
> Too bad!
> 
> This leaves me confused again: how shall I fix this? Can all
> folio_wait_private_2() calls simply be removed?
> This looks like some refactoring gone wrong, and some parts don't
> make
> sense (like netfs and ceph claim ownership of the folio_private
> pointer). I could try to fix the mess, but I need to know how this is
> meant to be. David, can you enlighten me?
> 
> Max

I suspect the folio_wait_private_2 call in ceph_write_begin should have
also been removed in ae678317b95, and it just got missed somehow in the
original patch. All of the other callsites that did anything with
private_2 were removed in that patch.

David, can you confirm that?
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag"
  2024-07-29  9:15 [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag Max Kellermann
  2024-07-29 12:56 ` Jeff Layton
@ 2024-07-30 16:01 ` David Howells
  2024-07-30 16:28   ` Max Kellermann
  1 sibling, 1 reply; 17+ messages in thread
From: David Howells @ 2024-07-30 16:01 UTC (permalink / raw)
  To: Max Kellermann
  Cc: dhowells, Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel,
	netfs, linux-fsdevel, linux-kernel, stable

Hi Max,

Can you try this patch instead of either of yours?

David
---

This reverts commit ae678317b95e760607c7b20b97c9cd4ca9ed6e1a.

Revert the patch that removes the deprecated use of PG_private_2 in
netfslib for the moment as Ceph is actually still using this to track
data copied to the cache.

Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---
 fs/ceph/addr.c               |   19 +++++
 fs/netfs/buffered_read.c     |    8 ++
 fs/netfs/io.c                |  144 +++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/netfs.h |    1 
 4 files changed, 170 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c16bc5250ef..73b5a07bf94d 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -498,6 +498,11 @@ const struct netfs_request_ops ceph_netfs_ops = {
 };
 
 #ifdef CONFIG_CEPH_FSCACHE
+static void ceph_set_page_fscache(struct page *page)
+{
+	folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
+}
+
 static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
 {
 	struct inode *inode = priv;
@@ -515,6 +520,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
 			       ceph_fscache_write_terminated, inode, true, caching);
 }
 #else
+static inline void ceph_set_page_fscache(struct page *page)
+{
+}
+
 static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
 {
 }
@@ -706,6 +715,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		len = wlen;
 
 	set_page_writeback(page);
+	if (caching)
+		ceph_set_page_fscache(page);
 	ceph_fscache_write_to_cache(inode, page_off, len, caching);
 
 	if (IS_ENCRYPTED(inode)) {
@@ -789,6 +800,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
 		return AOP_WRITEPAGE_ACTIVATE;
 	}
 
+	folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
+
 	err = writepage_nounlock(page, wbc);
 	if (err == -ERESTARTSYS) {
 		/* direct memory reclaimer was killed by SIGKILL. return 0
@@ -1062,7 +1075,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 				unlock_page(page);
 				break;
 			}
-			if (PageWriteback(page)) {
+			if (PageWriteback(page) ||
+			    PagePrivate2(page) /* [DEPRECATED] */) {
 				if (wbc->sync_mode == WB_SYNC_NONE) {
 					doutc(cl, "%p under writeback\n", page);
 					unlock_page(page);
@@ -1070,6 +1084,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 				}
 				doutc(cl, "waiting on writeback %p\n", page);
 				wait_on_page_writeback(page);
+				folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
 			}
 
 			if (!clear_page_dirty_for_io(page)) {
@@ -1254,6 +1269,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 			}
 
 			set_page_writeback(page);
+			if (caching)
+				ceph_set_page_fscache(page);
 			len += thp_size(page);
 		}
 		ceph_fscache_write_to_cache(inode, offset, len, caching);
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a688d4c75d99..424048f9ed1f 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -466,7 +466,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
 	if (!netfs_is_cache_enabled(ctx) &&
 	    netfs_skip_folio_read(folio, pos, len, false)) {
 		netfs_stat(&netfs_n_rh_write_zskip);
-		goto have_folio;
+		goto have_folio_no_wait;
 	}
 
 	rreq = netfs_alloc_request(mapping, file,
@@ -507,6 +507,12 @@ int netfs_write_begin(struct netfs_inode *ctx,
 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
+	if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
+		ret = folio_wait_private_2_killable(folio);
+		if (ret < 0)
+			goto error;
+	}
+have_folio_no_wait:
 	*_folio = folio;
 	_leave(" = 0");
 	return 0;
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..c179a1c73fa7 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -98,6 +98,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
 	netfs_put_request(rreq, was_async, netfs_rreq_trace_put_complete);
 }
 
+/*
+ * [DEPRECATED] Deal with the completion of writing the data to the cache.  We
+ * have to clear the PG_fscache bits on the folios involved and release the
+ * caller's ref.
+ *
+ * May be called in softirq mode and we inherit a ref from the caller.
+ */
+static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
+					  bool was_async)
+{
+	struct netfs_io_subrequest *subreq;
+	struct folio *folio;
+	pgoff_t unlocked = 0;
+	bool have_unlocked = false;
+
+	rcu_read_lock();
+
+	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+		XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
+
+		xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+			if (xas_retry(&xas, folio))
+				continue;
+
+			/* We might have multiple writes from the same huge
+			 * folio, but we mustn't unlock a folio more than once.
+			 */
+			if (have_unlocked && folio->index <= unlocked)
+				continue;
+			unlocked = folio_next_index(folio) - 1;
+			trace_netfs_folio(folio, netfs_folio_trace_end_copy);
+			folio_end_private_2(folio);
+			have_unlocked = true;
+		}
+	}
+
+	rcu_read_unlock();
+	netfs_rreq_completed(rreq, was_async);
+}
+
+static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
+				       bool was_async) /* [DEPRECATED] */
+{
+	struct netfs_io_subrequest *subreq = priv;
+	struct netfs_io_request *rreq = subreq->rreq;
+
+	if (IS_ERR_VALUE(transferred_or_error)) {
+		netfs_stat(&netfs_n_rh_write_failed);
+		trace_netfs_failure(rreq, subreq, transferred_or_error,
+				    netfs_fail_copy_to_cache);
+	} else {
+		netfs_stat(&netfs_n_rh_write_done);
+	}
+
+	trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
+
+	/* If we decrement nr_copy_ops to 0, the ref belongs to us. */
+	if (atomic_dec_and_test(&rreq->nr_copy_ops))
+		netfs_rreq_unmark_after_write(rreq, was_async);
+
+	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+}
+
+/*
+ * [DEPRECATED] Perform any outstanding writes to the cache.  We inherit a ref
+ * from the caller.
+ */
+static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
+{
+	struct netfs_cache_resources *cres = &rreq->cache_resources;
+	struct netfs_io_subrequest *subreq, *next, *p;
+	struct iov_iter iter;
+	int ret;
+
+	trace_netfs_rreq(rreq, netfs_rreq_trace_copy);
+
+	/* We don't want terminating writes trying to wake us up whilst we're
+	 * still going through the list.
+	 */
+	atomic_inc(&rreq->nr_copy_ops);
+
+	list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
+		if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+			list_del_init(&subreq->rreq_link);
+			netfs_put_subrequest(subreq, false,
+					     netfs_sreq_trace_put_no_copy);
+		}
+	}
+
+	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+		/* Amalgamate adjacent writes */
+		while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+			next = list_next_entry(subreq, rreq_link);
+			if (next->start != subreq->start + subreq->len)
+				break;
+			subreq->len += next->len;
+			list_del_init(&next->rreq_link);
+			netfs_put_subrequest(next, false,
+					     netfs_sreq_trace_put_merged);
+		}
+
+		ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
+					       subreq->len, rreq->i_size, true);
+		if (ret < 0) {
+			trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
+			trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
+			continue;
+		}
+
+		iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages,
+				subreq->start, subreq->len);
+
+		atomic_inc(&rreq->nr_copy_ops);
+		netfs_stat(&netfs_n_rh_write);
+		netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache);
+		trace_netfs_sreq(subreq, netfs_sreq_trace_write);
+		cres->ops->write(cres, subreq->start, &iter,
+				 netfs_rreq_copy_terminated, subreq);
+	}
+
+	/* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
+	if (atomic_dec_and_test(&rreq->nr_copy_ops))
+		netfs_rreq_unmark_after_write(rreq, false);
+}
+
+static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */
+{
+	struct netfs_io_request *rreq =
+		container_of(work, struct netfs_io_request, work);
+
+	netfs_rreq_do_write_to_cache(rreq);
+}
+
+static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */
+{
+	rreq->work.func = netfs_rreq_write_to_cache_work;
+	if (!queue_work(system_unbound_wq, &rreq->work))
+		BUG();
+}
+
 /*
  * Handle a short read.
  */
@@ -275,6 +415,10 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async)
 	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
 	wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
 
+	if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) &&
+	    test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags))
+		return netfs_rreq_write_to_cache(rreq);
+
 	netfs_rreq_completed(rreq, was_async);
 }
 
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index da23484268df..24ec3434d32e 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -145,6 +145,7 @@
 	EM(netfs_folio_trace_clear_g,		"clear-g")	\
 	EM(netfs_folio_trace_clear_s,		"clear-s")	\
 	EM(netfs_folio_trace_copy_to_cache,	"mark-copy")	\
+	EM(netfs_folio_trace_end_copy,		"end-copy")	\
 	EM(netfs_folio_trace_filled_gaps,	"filled-gaps")	\
 	EM(netfs_folio_trace_kill,		"kill")		\
 	EM(netfs_folio_trace_kill_cc,		"kill-cc")	\


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag"
  2024-07-30 16:01 ` [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" David Howells
@ 2024-07-30 16:28   ` Max Kellermann
  2024-07-30 20:00     ` Max Kellermann
                       ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Max Kellermann @ 2024-07-30 16:28 UTC (permalink / raw)
  To: David Howells
  Cc: Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel, netfs,
	linux-fsdevel, linux-kernel, stable

On Tue, Jul 30, 2024 at 6:01 PM David Howells <dhowells@redhat.com> wrote:
> Can you try this patch instead of either of yours?

I booted it on one of the servers, and no problem so far. All tests
complete successfully, even the one with copy_file_range that crashed
with my patch. I'll let you know when problems occur later, but until
then, I agree with merging your revert instead of my patches.

If I understand this correctly, my other problem (the
folio_attach_private conflict between netfs and ceph) I posted in
https://lore.kernel.org/ceph-devel/CAKPOu+8q_1rCnQndOj3KAitNY2scPQFuSS-AxeGru02nP9ZO0w@mail.gmail.com/
was caused by my (bad) patch after all, wasn't it?

> For the moment, ceph has to continue using PG_private_2.  It doesn't use
> netfs_writepages().  I have mostly complete patches to fix that, but they got
> popped onto the back burner for a bit.

When you're done with those patches, Cc me on those if you want me to
help test them.

Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag"
  2024-07-30 16:28   ` Max Kellermann
@ 2024-07-30 20:00     ` Max Kellermann
  2024-07-31  8:16     ` Max Kellermann
  2024-07-31 10:41     ` David Howells
  2 siblings, 0 replies; 17+ messages in thread
From: Max Kellermann @ 2024-07-30 20:00 UTC (permalink / raw)
  To: David Howells
  Cc: Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel, netfs,
	linux-fsdevel, linux-kernel, stable

On Tue, Jul 30, 2024 at 6:28 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> I'll let you know when problems occur later, but until
> then, I agree with merging your revert instead of my patches.

Not sure if that's the same bug/cause (looks different), but 6.10.2
with your patch is still unstable:

 rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {
9-.... 15-.... } 521399 jiffies s: 2085 root: 0x1/.
 rcu: blocking rcu_node structures (internal RCU debug): l=1:0-15:0x8200/.
 Sending NMI from CPU 3 to CPUs 9:
 NMI backtrace for cpu 9
 CPU: 9 PID: 2756 Comm: kworker/9:2 Tainted: G      D
6.10.2-cm4all2-vm+ #171
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
 Workqueue: ceph-msgr ceph_con_workfn
 RIP: 0010:native_queued_spin_lock_slowpath+0x80/0x260
 Code: 57 85 c0 74 10 0f b6 03 84 c0 74 09 f3 90 0f b6 03 84 c0 75 f7
b8 01 00 00 00 66 89 03 5b 5d 41 5c 41 5d c3 cc cc cc cc f3 90 <eb> 93
8b 37 b8 00 02 00 00 81 fe 00 01 00 00 74 07 eb a1 83 e8 01
 RSP: 0018:ffffaf5880c03bb8 EFLAGS: 00000202
 RAX: 0000000000000001 RBX: ffffa02bc37c9e98 RCX: ffffaf5880c03c90
 RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffffa02bc37c9e98
 RBP: ffffa02bc2f94000 R08: ffffaf5880c03c90 R09: 0000000000000010
 R10: 0000000000000514 R11: 0000000000000000 R12: ffffaf5880c03c90
 R13: ffffffffb4bcb2f0 R14: ffffa036c9e7e8e8 R15: ffffa02bc37c9e98
 FS:  0000000000000000(0000) GS:ffffa036cf040000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000055fecac48568 CR3: 000000030d82c002 CR4: 00000000001706b0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  <NMI>
  ? nmi_cpu_backtrace+0x83/0xf0
  ? nmi_cpu_backtrace_handler+0xd/0x20
  ? nmi_handle+0x56/0x120
  ? default_do_nmi+0x40/0x100
  ? exc_nmi+0xdc/0x100
  ? end_repeat_nmi+0xf/0x53
  ? __pfx_ceph_ino_compare+0x10/0x10
  ? native_queued_spin_lock_slowpath+0x80/0x260
  ? native_queued_spin_lock_slowpath+0x80/0x260
  ? native_queued_spin_lock_slowpath+0x80/0x260
  </NMI>
  <TASK>
  ? __pfx_ceph_ino_compare+0x10/0x10
  _raw_spin_lock+0x1e/0x30
  find_inode+0x6e/0xc0
  ? __pfx_ceph_ino_compare+0x10/0x10
  ? __pfx_ceph_set_ino_cb+0x10/0x10
  ilookup5_nowait+0x6d/0xa0
  ? __pfx_ceph_ino_compare+0x10/0x10
  iget5_locked+0x33/0xe0
  ceph_get_inode+0xb8/0xf0
  mds_dispatch+0xfe8/0x1ff0
  ? inet_recvmsg+0x4d/0xf0
  ceph_con_process_message+0x66/0x80
  ceph_con_v1_try_read+0xcfc/0x17c0
  ? __switch_to_asm+0x39/0x70
  ? finish_task_switch.isra.0+0x78/0x240
  ? __schedule+0x32a/0x1440
  ceph_con_workfn+0x339/0x4f0
  process_one_work+0x138/0x2e0
  worker_thread+0x2b9/0x3d0
  ? __pfx_worker_thread+0x10/0x10
  kthread+0xba/0xe0
  ? __pfx_kthread+0x10/0x10
  ret_from_fork+0x30/0x50
  ? __pfx_kthread+0x10/0x10
  ret_from_fork_asm+0x1a/0x30
 </TASK>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag"
  2024-07-30 16:28   ` Max Kellermann
  2024-07-30 20:00     ` Max Kellermann
@ 2024-07-31  8:16     ` Max Kellermann
  2024-07-31 10:41     ` David Howells
  2 siblings, 0 replies; 17+ messages in thread
From: Max Kellermann @ 2024-07-31  8:16 UTC (permalink / raw)
  To: David Howells
  Cc: Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel, netfs,
	linux-fsdevel, linux-kernel, stable

On Tue, Jul 30, 2024 at 6:28 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> If I understand this correctly, my other problem (the
> folio_attach_private conflict between netfs and ceph) I posted in
> https://lore.kernel.org/ceph-devel/CAKPOu+8q_1rCnQndOj3KAitNY2scPQFuSS-AxeGru02nP9ZO0w@mail.gmail.com/
> was caused by my (bad) patch after all, wasn't it?

It was not caused by my bad patch. Without my patch, but with your
revert instead I just got a crash (this time, I enabled lots of
debugging options in the kernel, including KASAN) - it's the same
crash as in the post I linked in my previous email:

 ------------[ cut here ]------------
 WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386
ceph_put_wrbuffer_cap_refs+0x416/0x500
 Modules linked in:
 CPU: 13 PID: 3621 Comm: rsync Not tainted 6.10.2-cm4all2-vm+ #176
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
 RIP: 0010:ceph_put_wrbuffer_cap_refs+0x416/0x500
 Code: e8 af 7f 50 01 45 84 ed 75 27 45 8d 74 24 ff e9 cf fd ff ff e8
ab ea 64 ff e9 4c fc ff ff 31 f6 48 89 df e8 3c 86 ff ff eb b5 <0f> 0b
e9 7a ff ff ff 31 f6 48 89 df e8 29 86 ff ff eb cd 0f 0b 48
 RSP: 0018:ffff88813c57f868 EFLAGS: 00010286
 RAX: dffffc0000000000 RBX: ffff88823dc66588 RCX: 0000000000000000
 RDX: 1ffff11047b8cda7 RSI: ffff88823dc66df0 RDI: ffff88823dc66d38
 RBP: 0000000000000001 R08: 0000000000000000 R09: fffffbfff5f9a8cd
 R10: ffffffffafcd466f R11: 0000000000000001 R12: 0000000000000000
 R13: ffffea000947af00 R14: 00000000ffffffff R15: 0000000000000356
 FS:  00007f1e82957b80(0000) GS:ffff888a73400000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000559037dacea8 CR3: 000000013f1b2002 CR4: 00000000001706b0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  <TASK>
  ? __warn+0xc8/0x2c0
  ? ceph_put_wrbuffer_cap_refs+0x416/0x500
  ? report_bug+0x257/0x2b0
  ? handle_bug+0x3c/0x70
  ? exc_invalid_op+0x13/0x40
  ? asm_exc_invalid_op+0x16/0x20
  ? ceph_put_wrbuffer_cap_refs+0x416/0x500
  ? ceph_put_wrbuffer_cap_refs+0x2e/0x500
  ceph_invalidate_folio+0x241/0x310
  truncate_cleanup_folio+0x277/0x330
  truncate_inode_pages_range+0x1b4/0x940
  ? __pfx_truncate_inode_pages_range+0x10/0x10
  ? __lock_acquire+0x19f3/0x5c10
  ? __lock_acquire+0x19f3/0x5c10
  ? __pfx___lock_acquire+0x10/0x10
  ? __pfx___lock_acquire+0x10/0x10
  ? srso_alias_untrain_ret+0x1/0x10
  ? lock_acquire+0x186/0x490
  ? find_held_lock+0x2d/0x110
  ? kvm_sched_clock_read+0xd/0x20
  ? local_clock_noinstr+0x9/0xb0
  ? __pfx_lock_release+0x10/0x10
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ceph_evict_inode+0xd5/0x530
  evict+0x251/0x560
  __dentry_kill+0x17b/0x500
  dput+0x393/0x690
  __fput+0x40e/0xa60
  __x64_sys_close+0x78/0xd0
  do_syscall_64+0x82/0x130
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ? syscall_exit_to_user_mode+0x9f/0x190
  ? do_syscall_64+0x8e/0x130
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ? syscall_exit_to_user_mode+0x9f/0x190
  ? do_syscall_64+0x8e/0x130
  ? do_syscall_64+0x8e/0x130
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  entry_SYSCALL_64_after_hwframe+0x76/0x7e
 RIP: 0033:0x7f1e823178e0
 Code: 0d 00 00 00 eb b2 e8 ff f7 01 00 66 2e 0f 1f 84 00 00 00 00 00
0f 1f 44 00 00 80 3d 01 1d 0e 00 00 74 17 b8 03 00 00 00 0f 05 <48> 3d
00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c
 RSP: 002b:00007ffe16c2e108 EFLAGS: 00000202 ORIG_RAX: 0000000000000003
 RAX: ffffffffffffffda RBX: 000000000000001e RCX: 00007f1e823178e0
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001
 RBP: 00007f1e8219bc08 R08: 0000000000000000 R09: 0000559037df64b0
 R10: fe04b91e88691591 R11: 0000000000000202 R12: 0000000000000001
 R13: 0000000000000000 R14: 00007ffe16c2e220 R15: 0000000000000001
  </TASK>
 irq event stamp: 26945
 hardirqs last  enabled at (26951): [<ffffffffaaac5a99>]
console_unlock+0x189/0x1b0
 hardirqs last disabled at (26956): [<ffffffffaaac5a7e>]
console_unlock+0x16e/0x1b0
 softirqs last  enabled at (26518): [<ffffffffaa962375>] irq_exit_rcu+0x95/0xc0
 softirqs last disabled at (26513): [<ffffffffaa962375>] irq_exit_rcu+0x95/0xc0
 ---[ end trace 0000000000000000 ]---
 ==================================================================
 BUG: KASAN: null-ptr-deref in ceph_put_snap_context+0x18/0x50
 Write of size 4 at addr 0000000000000356 by task rsync/3621

 CPU: 13 PID: 3621 Comm: rsync Tainted: G        W
6.10.2-cm4all2-vm+ #176
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
 Call Trace:
  <TASK>
  dump_stack_lvl+0x74/0xd0
  kasan_report+0xb9/0xf0
  ? ceph_put_snap_context+0x18/0x50
  kasan_check_range+0xeb/0x1a0
  ceph_put_snap_context+0x18/0x50
  ceph_invalidate_folio+0x249/0x310
  truncate_cleanup_folio+0x277/0x330
  truncate_inode_pages_range+0x1b4/0x940
  ? __pfx_truncate_inode_pages_range+0x10/0x10
  ? __lock_acquire+0x19f3/0x5c10
  ? __lock_acquire+0x19f3/0x5c10
  ? __pfx___lock_acquire+0x10/0x10
  ? __pfx___lock_acquire+0x10/0x10
  ? srso_alias_untrain_ret+0x1/0x10
  ? lock_acquire+0x186/0x490
  ? find_held_lock+0x2d/0x110
  ? kvm_sched_clock_read+0xd/0x20
  ? local_clock_noinstr+0x9/0xb0
  ? __pfx_lock_release+0x10/0x10
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ceph_evict_inode+0xd5/0x530
  evict+0x251/0x560
  __dentry_kill+0x17b/0x500
  dput+0x393/0x690
  __fput+0x40e/0xa60
  __x64_sys_close+0x78/0xd0
  do_syscall_64+0x82/0x130
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ? syscall_exit_to_user_mode+0x9f/0x190
  ? do_syscall_64+0x8e/0x130
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  ? syscall_exit_to_user_mode+0x9f/0x190
  ? do_syscall_64+0x8e/0x130
  ? do_syscall_64+0x8e/0x130
  ? lockdep_hardirqs_on_prepare+0x275/0x3e0
  entry_SYSCALL_64_after_hwframe+0x76/0x7e
 RIP: 0033:0x7f1e823178e0
 Code: 0d 00 00 00 eb b2 e8 ff f7 01 00 66 2e 0f 1f 84 00 00 00 00 00
0f 1f 44 00 00 80 3d 01 1d 0e 00 00 74 17 b8 03 00 00 00 0f 05 <48> 3d
00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c
 RSP: 002b:00007ffe16c2e108 EFLAGS: 00000202 ORIG_RAX: 0000000000000003
 RAX: ffffffffffffffda RBX: 000000000000001e RCX: 00007f1e823178e0
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001
 RBP: 00007f1e8219bc08 R08: 0000000000000000 R09: 0000559037df64b0
 R10: fe04b91e88691591 R11: 0000000000000202 R12: 0000000000000001
 R13: 0000000000000000 R14: 00007ffe16c2e220 R15: 0000000000000001
  </TASK>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag"
  2024-07-30 16:28   ` Max Kellermann
  2024-07-30 20:00     ` Max Kellermann
  2024-07-31  8:16     ` Max Kellermann
@ 2024-07-31 10:41     ` David Howells
  2024-07-31 11:37       ` Max Kellermann
                         ` (3 more replies)
  2 siblings, 4 replies; 17+ messages in thread
From: David Howells @ 2024-07-31 10:41 UTC (permalink / raw)
  To: Max Kellermann
  Cc: dhowells, Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel,
	netfs, linux-fsdevel, linux-kernel, stable

Max Kellermann <max.kellermann@ionos.com> wrote:

> It was not caused by my bad patch. Without my patch, but with your
> revert instead I just got a crash (this time, I enabled lots of
> debugging options in the kernel, including KASAN) - it's the same
> crash as in the post I linked in my previous email:
> 
>  ------------[ cut here ]------------
>  WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386
> ceph_put_wrbuffer_cap_refs+0x416/0x500

Is that "WARN_ON_ONCE(ci->i_auth_cap);" for you?

David


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag"
  2024-07-31 10:41     ` David Howells
@ 2024-07-31 11:37       ` Max Kellermann
  2024-08-04 13:57         ` [PATCH] netfs: Set NETFS_RREQ_WRITE_TO_CACHE when caching is possible Hristo Venev
  2024-08-07 20:17       ` [RFC][PATCH] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags David Howells
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 17+ messages in thread
From: Max Kellermann @ 2024-07-31 11:37 UTC (permalink / raw)
  To: David Howells
  Cc: Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel, netfs,
	linux-fsdevel, linux-kernel, stable

On Wed, Jul 31, 2024 at 12:41 PM David Howells <dhowells@redhat.com> wrote:

> >  ------------[ cut here ]------------
> >  WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386
> > ceph_put_wrbuffer_cap_refs+0x416/0x500
>
> Is that "WARN_ON_ONCE(ci->i_auth_cap);" for you?

Yes, and that happens because no "capsnap" was found, because the
"snapc" parameter is 0x356 (NETFS_FOLIO_COPY_TO_CACHE); no
snap_context with address 0x356 could be found, of course.

Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] netfs: Set NETFS_RREQ_WRITE_TO_CACHE when caching is possible
  2024-07-31 11:37       ` Max Kellermann
@ 2024-08-04 13:57         ` Hristo Venev
  2024-08-04 23:22           ` Trond Myklebust
  0 siblings, 1 reply; 17+ messages in thread
From: Hristo Venev @ 2024-08-04 13:57 UTC (permalink / raw)
  To: Max Kellermann, David Howells
  Cc: Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel, netfs,
	linux-fsdevel, linux-kernel, linux-nfs, blokos, Trond Myklebust,
	dan.aloni@vastdata.com

[-- Attachment #1: Type: text/plain, Size: 1870 bytes --]

In addition to Ceph, in NFS there are also some crashes related to the
use of 0x356 as a pointer.

`netfs_is_cache_enabled()` only returns true when the fscache cookie is
fully initialized. This may happen after the request has been created,
so check for the cookie's existence instead.

Link: https://lore.kernel.org/linux-nfs/b78c88db-8b3a-4008-94cb-82ae08f0e37b@free.fr/T/
Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
Cc: linux-nfs@vger.kernel.org <linux-nfs@vger.kernel.org>
Cc: blokos <blokos@free.fr>
Cc: Trond Myklebust <trondmy@hammerspace.com>
Cc: dan.aloni@vastdata.com <dan.aloni@vastdata.com>
Signed-off-by: Hristo Venev <hristo@venev.name>
---
 fs/netfs/objects.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index f4a6427274792..a74ca90c86c9b 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -27,7 +27,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 	bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
 			      origin == NETFS_DIO_READ ||
 			      origin == NETFS_DIO_WRITE);
-	bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
 	int ret;
 
 	for (;;) {
@@ -56,8 +55,9 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 	refcount_set(&rreq->ref, 1);
 
 	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
-	if (cached) {
-		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
+	if (!is_unbuffered && fscache_cookie_valid(netfs_i_cookie(ctx))) {
+		if(netfs_is_cache_enabled(ctx))
+			__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
 		if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
 			/* Filesystem uses deprecated PG_private_2 marking. */
 			__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 858 bytes --]

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs: Set NETFS_RREQ_WRITE_TO_CACHE when caching is possible
  2024-08-04 13:57         ` [PATCH] netfs: Set NETFS_RREQ_WRITE_TO_CACHE when caching is possible Hristo Venev
@ 2024-08-04 23:22           ` Trond Myklebust
  2024-08-05 15:34             ` Hristo Venev
  0 siblings, 1 reply; 17+ messages in thread
From: Trond Myklebust @ 2024-08-04 23:22 UTC (permalink / raw)
  To: max.kellermann@ionos.com, hristo@venev.name, dhowells@redhat.com
  Cc: dan.aloni@vastdata.com, xiubli@redhat.com,
	linux-fsdevel@vger.kernel.org, ceph-devel@vger.kernel.org,
	linux-kernel@vger.kernel.org, netfs@lists.linux.dev,
	jlayton@kernel.org, idryomov@gmail.com, willy@infradead.org,
	blokos@free.fr, linux-nfs@vger.kernel.org

On Sun, 2024-08-04 at 16:57 +0300, Hristo Venev wrote:
> In addition to Ceph, in NFS there are also some crashes related to
> the
> use of 0x356 as a pointer.
> 
> `netfs_is_cache_enabled()` only returns true when the fscache cookie
> is
> fully initialized. This may happen after the request has been
> created,
> so check for the cookie's existence instead.
> 
> Link:
> https://lore.kernel.org/linux-nfs/b78c88db-8b3a-4008-94cb-82ae08f0e37b@free.fr/T/
> Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio-
> >private and marking dirty")
> Cc: linux-nfs@vger.kernel.org <linux-nfs@vger.kernel.org>
> Cc: blokos <blokos@free.fr>
> Cc: Trond Myklebust <trondmy@hammerspace.com>
> Cc: dan.aloni@vastdata.com <dan.aloni@vastdata.com>
> Signed-off-by: Hristo Venev <hristo@venev.name>
> ---
>  fs/netfs/objects.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
> index f4a6427274792..a74ca90c86c9b 100644
> --- a/fs/netfs/objects.c
> +++ b/fs/netfs/objects.c
> @@ -27,7 +27,6 @@ struct netfs_io_request *netfs_alloc_request(struct
> address_space *mapping,
>  	bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
>  			      origin == NETFS_DIO_READ ||
>  			      origin == NETFS_DIO_WRITE);
> -	bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
>  	int ret;
>  
>  	for (;;) {
> @@ -56,8 +55,9 @@ struct netfs_io_request *netfs_alloc_request(struct
> address_space *mapping,
>  	refcount_set(&rreq->ref, 1);
>  
>  	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
> -	if (cached) {
> -		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
> +	if (!is_unbuffered &&
> fscache_cookie_valid(netfs_i_cookie(ctx))) {
> +		if(netfs_is_cache_enabled(ctx))
> +			__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq-
> >flags);
>  		if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
>  			/* Filesystem uses deprecated PG_private_2
> marking. */
>  			__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq-
> >flags);

Does this mean that netfs could still end up setting a value for folio-
>private in NFS given some other set of circumstances?


-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] netfs: Set NETFS_RREQ_WRITE_TO_CACHE when caching is possible
  2024-08-04 23:22           ` Trond Myklebust
@ 2024-08-05 15:34             ` Hristo Venev
  0 siblings, 0 replies; 17+ messages in thread
From: Hristo Venev @ 2024-08-05 15:34 UTC (permalink / raw)
  To: Trond Myklebust, max.kellermann@ionos.com, dhowells@redhat.com
  Cc: dan.aloni@vastdata.com, xiubli@redhat.com,
	linux-fsdevel@vger.kernel.org, ceph-devel@vger.kernel.org,
	linux-kernel@vger.kernel.org, netfs@lists.linux.dev,
	jlayton@kernel.org, idryomov@gmail.com, willy@infradead.org,
	blokos@free.fr, linux-nfs@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 2752 bytes --]

On Sun, 2024-08-04 at 23:22 +0000, Trond Myklebust wrote:
> On Sun, 2024-08-04 at 16:57 +0300, Hristo Venev wrote:
> > In addition to Ceph, in NFS there are also some crashes related to
> > the
> > use of 0x356 as a pointer.
> > 
> > `netfs_is_cache_enabled()` only returns true when the fscache
> > cookie
> > is
> > fully initialized. This may happen after the request has been
> > created,
> > so check for the cookie's existence instead.
> > 
> > Link:
> > https://lore.kernel.org/linux-nfs/b78c88db-8b3a-4008-94cb-82ae08f0e37b@free.fr/T/
> > Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio-
> > > private and marking dirty")
> > Cc: linux-nfs@vger.kernel.org <linux-nfs@vger.kernel.org>
> > Cc: blokos <blokos@free.fr>
> > Cc: Trond Myklebust <trondmy@hammerspace.com>
> > Cc: dan.aloni@vastdata.com <dan.aloni@vastdata.com>
> > Signed-off-by: Hristo Venev <hristo@venev.name>
> > ---
> >  fs/netfs/objects.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
> > index f4a6427274792..a74ca90c86c9b 100644
> > --- a/fs/netfs/objects.c
> > +++ b/fs/netfs/objects.c
> > @@ -27,7 +27,6 @@ struct netfs_io_request
> > *netfs_alloc_request(struct
> > address_space *mapping,
> >  	bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
> >  			      origin == NETFS_DIO_READ ||
> >  			      origin == NETFS_DIO_WRITE);
> > -	bool cached = !is_unbuffered &&
> > netfs_is_cache_enabled(ctx);
> >  	int ret;
> >  
> >  	for (;;) {
> > @@ -56,8 +55,9 @@ struct netfs_io_request
> > *netfs_alloc_request(struct
> > address_space *mapping,
> >  	refcount_set(&rreq->ref, 1);
> >  
> >  	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
> > -	if (cached) {
> > -		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq-
> > >flags);
> > +	if (!is_unbuffered &&
> > fscache_cookie_valid(netfs_i_cookie(ctx))) {
> > +		if(netfs_is_cache_enabled(ctx))
> > +			__set_bit(NETFS_RREQ_WRITE_TO_CACHE,
> > &rreq-
> > > flags);
> >  		if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
> >  			/* Filesystem uses deprecated PG_private_2
> > marking. */
> >  			__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq-
> > > flags);
> 
> Does this mean that netfs could still end up setting a value for
> folio-
> > private in NFS given some other set of circumstances?

Hopefully not? For NFS the cookie should be allocated in
`nfs_fscache_init_inode`, and for Ceph I think `ceph_fill_inode` (which
calls `ceph_fscache_register_inode_cookie`) should also be called early
enough as well.

> -- 
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
> 
> 


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags
  2024-07-31 10:41     ` David Howells
  2024-07-31 11:37       ` Max Kellermann
@ 2024-08-07 20:17       ` David Howells
  2024-08-08 11:46       ` [PATCH v2] " David Howells
  2024-08-08 21:31       ` [PATCH v3] " David Howells
  3 siblings, 0 replies; 17+ messages in thread
From: David Howells @ 2024-08-07 20:17 UTC (permalink / raw)
  To: Max Kellermann, Hristo Venev
  Cc: dhowells, Ilya Dryomov, Xiubo Li, Jeff Layton, willy, ceph-devel,
	netfs, linux-fsdevel, linux-kernel, stable

The attached patch gets me most of the way there, applied on the top of the
reversion one.  See:

	https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/log/?h=netfs-fixes

There's still an occasional slab-use-after-free that pops up:

	BUG: KASAN: slab-use-after-free in xa_head+0xe/0x70
	Read of size 8 at addr ffff8881b2cf6df8 by task kworker/0:1/9
	...
	 xa_head+0xe/0x70
	 xas_start+0xca/0x140
	 xas_load+0x16/0x110
	 xas_find+0x84/0x1f0
	 __fscache_clear_page_bits+0x136/0x340
	...

where the thing being allocated is a ceph inode.

Note that Hristo's patch is not sufficient.

David
---
    netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags
    
    The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used
    correctly.  The problem is that we try to set them up in the request
    initialisation, but we the cache may be in the process of setting up still,
    and so the state may not be correct.  Further, we secondarily sample the
    cache state and make contradictory decisions later.
    
    The issue arises because we set up the cache resources, which allows the
    cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which
    triggers cache writing even if we didn't set the flags when allocating.
    
    Fix this in the following way:
    
     (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in
         ->init_request() rather than trying to juggle that in
         netfs_alloc_request().
    
     (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is
         to be done, then PG_private_2 is to be used rather than only setting
         it if we decide to cache and then having netfs_rreq_unlock_folios()
         set the non-PG_private_2 writeback-to-cache if it wasn't set.
    
     (3) Split netfs_rreq_unlock_folios() into two functions, one of which
         contains the deprecated code for using PG_private_2 to avoid
         accidentally doing the writeback path - and always use it if
         USE_PGPRIV2 is set.
    
     (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always
         wait for PG_private_2.  This function is deprecated and only used by
         ceph anyway, and so label it so.
    
     (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use
         fscache_operation_valid() on the cache_resources instead.  This has
         the advantage of picking up the result of netfs_begin_cache_read() and
         fscache_begin_write_operation() - which are called after the object is
         initialised and will wait for the cache to come to a usable state.
    
    Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be
    applied on top of that.  Without this as well, things like:
    
     rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {
    
    and:
    
     WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386
    
    may happen, along with some UAFs due to PG_private_2 not getting used to
    wait on writeback completion.
    
    Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
    Reported-by: Max Kellermann <max.kellermann@ionos.com>
    Signed-off-by: David Howells <dhowells@redhat.com>
    cc: Ilya Dryomov <idryomov@gmail.com>
    cc: Xiubo Li <xiubli@redhat.com>
    cc: Hristo Venev <hristo@venev.name>
    cc: Jeff Layton <jlayton@kernel.org>
    cc: Matthew Wilcox <willy@infradead.org>
    cc: ceph-devel@vger.kernel.org
    cc: netfs@lists.linux.dev
    cc: linux-fsdevel@vger.kernel.org
    cc: linux-mm@kvack.org
    Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1]

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 73b5a07bf94d..cc0a2240de98 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -424,6 +424,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 	struct ceph_netfs_request_data *priv;
 	int ret = 0;
 
+	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+	__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+
 	if (rreq->origin != NETFS_READAHEAD)
 		return 0;
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8f8de8f33abb..71cd70514efa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
 	/* Set parameters for the netfs library */
 	netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
-	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-	__set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
 
 	spin_lock_init(&ci->i_ceph_lock);
 
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 424048f9ed1f..79d83abb655b 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -9,6 +9,97 @@
 #include <linux/task_io_accounting_ops.h>
 #include "internal.h"
 
+/*
+ * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
+ * is using PG_private_2 and direct writing to the cache from here rather than
+ * marking the page for writeback.
+ *
+ * Note that we don't touch folio->private in this code.
+ */
+static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq)
+{
+	struct netfs_io_subrequest *subreq;
+	struct folio *folio;
+	pgoff_t start_page = rreq->start / PAGE_SIZE;
+	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+	size_t account = 0;
+	bool subreq_failed = false;
+
+	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+	/* Walk through the pagecache and the I/O request lists simultaneously.
+	 * We may have a mixture of cached and uncached sections and we only
+	 * really want to write out the uncached sections.  This is slightly
+	 * complicated by the possibility that we might have huge pages with a
+	 * mixture inside.
+	 */
+	subreq = list_first_entry(&rreq->subrequests,
+				  struct netfs_io_subrequest, rreq_link);
+	subreq_failed = (subreq->error < 0);
+
+	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, last_page) {
+		loff_t pg_end;
+		bool pg_failed = false;
+		bool folio_started = false;
+
+		if (xas_retry(&xas, folio))
+			continue;
+
+		pg_end = folio_pos(folio) + folio_size(folio) - 1;
+
+		for (;;) {
+			loff_t sreq_end;
+
+			if (!subreq) {
+				pg_failed = true;
+				break;
+			}
+
+			if (!folio_started &&
+			    test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
+			    fscache_operation_valid(&rreq->cache_resources)) {
+				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+				folio_start_private_2(folio);
+				folio_started = true;
+			}
+
+			pg_failed |= subreq_failed;
+			sreq_end = subreq->start + subreq->len - 1;
+			if (pg_end < sreq_end)
+				break;
+
+			account += subreq->transferred;
+			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+				subreq = list_next_entry(subreq, rreq_link);
+				subreq_failed = (subreq->error < 0);
+			} else {
+				subreq = NULL;
+				subreq_failed = false;
+			}
+
+			if (pg_end == sreq_end)
+				break;
+		}
+
+		if (!pg_failed) {
+			flush_dcache_folio(folio);
+			folio_mark_uptodate(folio);
+		}
+
+		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+			if (folio->index == rreq->no_unlock_folio &&
+			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
+				kdebug("no unlock");
+			else
+				folio_unlock(folio);
+		}
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Unlock the folios in a read operation.  We need to set PG_writeback on any
  * folios we're going to write back before we unlock them.
@@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 		}
 	}
 
+	/* Handle deprecated PG_private_2 case. */
+	if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
+		netfs_rreq_unlock_folios_pgpriv2(rreq);
+		goto out;
+	}
+
 	/* Walk through the pagecache and the I/O request lists simultaneously.
 	 * We may have a mixture of cached and uncached sections and we only
 	 * really want to write out the uncached sections.  This is slightly
@@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 		loff_t pg_end;
 		bool pg_failed = false;
 		bool wback_to_cache = false;
-		bool folio_started = false;
 
 		if (xas_retry(&xas, folio))
 			continue;
@@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 				pg_failed = true;
 				break;
 			}
-			if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
-				if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
-							       &subreq->flags)) {
-					trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
-					folio_start_private_2(folio);
-					folio_started = true;
-				}
-			} else {
-				wback_to_cache |=
-					test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
-			}
+
+			wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
 			pg_failed |= subreq_failed;
 			sreq_end = subreq->start + subreq->len - 1;
 			if (pg_end < sreq_end)
@@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 	}
 	rcu_read_unlock();
 
+out:
 	task_io_account_read(account);
 	if (rreq->netfs_ops->done)
 		rreq->netfs_ops->done(rreq);
@@ -395,7 +483,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
 }
 
 /**
- * netfs_write_begin - Helper to prepare for writing
+ * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
  * @ctx: The netfs context
  * @file: The file to read from
  * @mapping: The mapping to read from
@@ -426,6 +514,9 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
  * inode before calling this.
  *
  * This is usable whether or not caching is enabled.
+ *
+ * Note that this should be considered deprecated and netfs_perform_write()
+ * used instead.
  */
 int netfs_write_begin(struct netfs_inode *ctx,
 		      struct file *file, struct address_space *mapping,
@@ -507,11 +598,9 @@ int netfs_write_begin(struct netfs_inode *ctx,
 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
-	if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
-		ret = folio_wait_private_2_killable(folio);
-		if (ret < 0)
-			goto error;
-	}
+	ret = folio_wait_private_2_killable(folio);
+	if (ret < 0)
+		goto error;
 have_folio_no_wait:
 	*_folio = folio;
 	_leave(" = 0");
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index f4a642727479..0faea0cee179 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -57,10 +57,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 
 	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
 	if (cached) {
-		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
-		if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
-			/* Filesystem uses deprecated PG_private_2 marking. */
-			__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
 	}
 	if (file && file->f_flags & O_NONBLOCK)
 		__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 9258d30cffe3..d35bb0f25d69 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -102,7 +102,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
 	_enter("R=%x", wreq->debug_id);
 
 	ictx = netfs_inode(wreq->inode);
-	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+	if (fscache_operation_valid(&wreq->cache_resources))
 		fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
 
 	wreq->contiguity = wreq->start;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7202ce84d0eb..bf29a65c5027 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
 {
 	rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
 	rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+	__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
 
 	return 0;
 }
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index fbed0027996f..e8adae1bc260 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
 {
 	netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
-	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-	__set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags);
 }
 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d0288938cc2..983816608f15 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -73,8 +73,6 @@ struct netfs_inode {
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
 #define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
-#define NETFS_ICTX_USE_PGPRIV2	31		/* [DEPRECATED] Use PG_private_2 to mark
-						 * write to cache on read */
 };
 
 /*
@@ -269,7 +267,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
-#define NETFS_RREQ_WRITE_TO_CACHE	7	/* Need to write to the cache */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
 #define NETFS_RREQ_BLOCKED		10	/* We blocked */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 24ec3434d32e..606b4a0f92da 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -51,6 +51,7 @@
 	EM(netfs_rreq_trace_resubmit,		"RESUBMT")	\
 	EM(netfs_rreq_trace_set_pause,		"PAUSE  ")	\
 	EM(netfs_rreq_trace_unlock,		"UNLOCK ")	\
+	EM(netfs_rreq_trace_unlock_pgpriv2,	"UNLCK-2")	\
 	EM(netfs_rreq_trace_unmark,		"UNMARK ")	\
 	EM(netfs_rreq_trace_wait_ip,		"WAIT-IP")	\
 	EM(netfs_rreq_trace_wait_pause,		"WT-PAUS")	\


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags
  2024-07-31 10:41     ` David Howells
  2024-07-31 11:37       ` Max Kellermann
  2024-08-07 20:17       ` [RFC][PATCH] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags David Howells
@ 2024-08-08 11:46       ` David Howells
  2024-08-08 21:31       ` [PATCH v3] " David Howells
  3 siblings, 0 replies; 17+ messages in thread
From: David Howells @ 2024-08-08 11:46 UTC (permalink / raw)
  To: Max Kellermann
  Cc: dhowells, Hristo Venev, Ilya Dryomov, Xiubo Li, Jeff Layton,
	willy, ceph-devel, netfs, linux-fsdevel, linux-kernel, stable

Okay, I updated to -rc2 and now the apparent UAF of a ceph inode doesn't
seem to happen.  I've fixed a couple of minor issues in the patch:

 - Switched a kdebug() to a _debug().

 - netfs_rreq_unlock_folios_pgpriv2() needed to updated the 'account'
   variable in the caller, not do it's own thing.

David
---
netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags

The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used
correctly.  The problem is that we try to set them up in the request
initialisation, but we the cache may be in the process of setting up still,
and so the state may not be correct.  Further, we secondarily sample the
cache state and make contradictory decisions later.

The issue arises because we set up the cache resources, which allows the
cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which
triggers cache writing even if we didn't set the flags when allocating.

Fix this in the following way:

 (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in
     ->init_request() rather than trying to juggle that in
     netfs_alloc_request().

 (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is
     to be done, then PG_private_2 is to be used rather than only setting
     it if we decide to cache and then having netfs_rreq_unlock_folios()
     set the non-PG_private_2 writeback-to-cache if it wasn't set.

 (3) Split netfs_rreq_unlock_folios() into two functions, one of which
     contains the deprecated code for using PG_private_2 to avoid
     accidentally doing the writeback path - and always use it if
     USE_PGPRIV2 is set.

 (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always
     wait for PG_private_2.  This function is deprecated and only used by
     ceph anyway, and so label it so.

 (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use
     fscache_operation_valid() on the cache_resources instead.  This has
     the advantage of picking up the result of netfs_begin_cache_read() and
     fscache_begin_write_operation() - which are called after the object is
     initialised and will wait for the cache to come to a usable state.

Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be
applied on top of that.  Without this as well, things like:

 rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {

and:

 WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386

may happen, along with some UAFs due to PG_private_2 not getting used to
wait on writeback completion.

Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Hristo Venev <hristo@venev.name>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1]
---
 fs/ceph/addr.c               |    3 +
 fs/ceph/inode.c              |    2 
 fs/netfs/buffered_read.c     |  125 ++++++++++++++++++++++++++++++++++++-------
 fs/netfs/objects.c           |    4 -
 fs/netfs/write_issue.c       |    2 
 fs/nfs/fscache.c             |    2 
 fs/nfs/fscache.h             |    2 
 include/linux/netfs.h        |    3 -
 include/trace/events/netfs.h |    1 
 9 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 73b5a07bf94d..cc0a2240de98 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -424,6 +424,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 	struct ceph_netfs_request_data *priv;
 	int ret = 0;
 
+	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+	__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+
 	if (rreq->origin != NETFS_READAHEAD)
 		return 0;
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8f8de8f33abb..71cd70514efa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
 	/* Set parameters for the netfs library */
 	netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
-	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-	__set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
 
 	spin_lock_init(&ci->i_ceph_lock);
 
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 424048f9ed1f..27c750d39476 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -9,6 +9,97 @@
 #include <linux/task_io_accounting_ops.h>
 #include "internal.h"
 
+/*
+ * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
+ * is using PG_private_2 and direct writing to the cache from here rather than
+ * marking the page for writeback.
+ *
+ * Note that we don't touch folio->private in this code.
+ */
+static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq,
+					     size_t *account)
+{
+	struct netfs_io_subrequest *subreq;
+	struct folio *folio;
+	pgoff_t start_page = rreq->start / PAGE_SIZE;
+	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+	bool subreq_failed = false;
+
+	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+	/* Walk through the pagecache and the I/O request lists simultaneously.
+	 * We may have a mixture of cached and uncached sections and we only
+	 * really want to write out the uncached sections.  This is slightly
+	 * complicated by the possibility that we might have huge pages with a
+	 * mixture inside.
+	 */
+	subreq = list_first_entry(&rreq->subrequests,
+				  struct netfs_io_subrequest, rreq_link);
+	subreq_failed = (subreq->error < 0);
+
+	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, last_page) {
+		loff_t pg_end;
+		bool pg_failed = false;
+		bool folio_started = false;
+
+		if (xas_retry(&xas, folio))
+			continue;
+
+		pg_end = folio_pos(folio) + folio_size(folio) - 1;
+
+		for (;;) {
+			loff_t sreq_end;
+
+			if (!subreq) {
+				pg_failed = true;
+				break;
+			}
+
+			if (!folio_started &&
+			    test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
+			    fscache_operation_valid(&rreq->cache_resources)) {
+				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+				folio_start_private_2(folio);
+				folio_started = true;
+			}
+
+			pg_failed |= subreq_failed;
+			sreq_end = subreq->start + subreq->len - 1;
+			if (pg_end < sreq_end)
+				break;
+
+			*account += subreq->transferred;
+			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+				subreq = list_next_entry(subreq, rreq_link);
+				subreq_failed = (subreq->error < 0);
+			} else {
+				subreq = NULL;
+				subreq_failed = false;
+			}
+
+			if (pg_end == sreq_end)
+				break;
+		}
+
+		if (!pg_failed) {
+			flush_dcache_folio(folio);
+			folio_mark_uptodate(folio);
+		}
+
+		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+			if (folio->index == rreq->no_unlock_folio &&
+			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
+				_debug("no unlock");
+			else
+				folio_unlock(folio);
+		}
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Unlock the folios in a read operation.  We need to set PG_writeback on any
  * folios we're going to write back before we unlock them.
@@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 		}
 	}
 
+	/* Handle deprecated PG_private_2 case. */
+	if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
+		netfs_rreq_unlock_folios_pgpriv2(rreq, &account);
+		goto out;
+	}
+
 	/* Walk through the pagecache and the I/O request lists simultaneously.
 	 * We may have a mixture of cached and uncached sections and we only
 	 * really want to write out the uncached sections.  This is slightly
@@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 		loff_t pg_end;
 		bool pg_failed = false;
 		bool wback_to_cache = false;
-		bool folio_started = false;
 
 		if (xas_retry(&xas, folio))
 			continue;
@@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 				pg_failed = true;
 				break;
 			}
-			if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
-				if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
-							       &subreq->flags)) {
-					trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
-					folio_start_private_2(folio);
-					folio_started = true;
-				}
-			} else {
-				wback_to_cache |=
-					test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
-			}
+
+			wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
 			pg_failed |= subreq_failed;
 			sreq_end = subreq->start + subreq->len - 1;
 			if (pg_end < sreq_end)
@@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 	}
 	rcu_read_unlock();
 
+out:
 	task_io_account_read(account);
 	if (rreq->netfs_ops->done)
 		rreq->netfs_ops->done(rreq);
@@ -395,7 +483,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
 }
 
 /**
- * netfs_write_begin - Helper to prepare for writing
+ * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
  * @ctx: The netfs context
  * @file: The file to read from
  * @mapping: The mapping to read from
@@ -426,6 +514,9 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
  * inode before calling this.
  *
  * This is usable whether or not caching is enabled.
+ *
+ * Note that this should be considered deprecated and netfs_perform_write()
+ * used instead.
  */
 int netfs_write_begin(struct netfs_inode *ctx,
 		      struct file *file, struct address_space *mapping,
@@ -507,11 +598,9 @@ int netfs_write_begin(struct netfs_inode *ctx,
 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
-	if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
-		ret = folio_wait_private_2_killable(folio);
-		if (ret < 0)
-			goto error;
-	}
+	ret = folio_wait_private_2_killable(folio);
+	if (ret < 0)
+		goto error;
 have_folio_no_wait:
 	*_folio = folio;
 	_leave(" = 0");
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index f4a642727479..0faea0cee179 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -57,10 +57,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 
 	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
 	if (cached) {
-		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
-		if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
-			/* Filesystem uses deprecated PG_private_2 marking. */
-			__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
 	}
 	if (file && file->f_flags & O_NONBLOCK)
 		__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 9258d30cffe3..d35bb0f25d69 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -102,7 +102,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
 	_enter("R=%x", wreq->debug_id);
 
 	ictx = netfs_inode(wreq->inode);
-	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+	if (fscache_operation_valid(&wreq->cache_resources))
 		fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
 
 	wreq->contiguity = wreq->start;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7202ce84d0eb..bf29a65c5027 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
 {
 	rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
 	rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+	__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
 
 	return 0;
 }
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index fbed0027996f..e8adae1bc260 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
 {
 	netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
-	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-	__set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags);
 }
 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d0288938cc2..983816608f15 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -73,8 +73,6 @@ struct netfs_inode {
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
 #define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
-#define NETFS_ICTX_USE_PGPRIV2	31		/* [DEPRECATED] Use PG_private_2 to mark
-						 * write to cache on read */
 };
 
 /*
@@ -269,7 +267,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
-#define NETFS_RREQ_WRITE_TO_CACHE	7	/* Need to write to the cache */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
 #define NETFS_RREQ_BLOCKED		10	/* We blocked */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 24ec3434d32e..606b4a0f92da 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -51,6 +51,7 @@
 	EM(netfs_rreq_trace_resubmit,		"RESUBMT")	\
 	EM(netfs_rreq_trace_set_pause,		"PAUSE  ")	\
 	EM(netfs_rreq_trace_unlock,		"UNLOCK ")	\
+	EM(netfs_rreq_trace_unlock_pgpriv2,	"UNLCK-2")	\
 	EM(netfs_rreq_trace_unmark,		"UNMARK ")	\
 	EM(netfs_rreq_trace_wait_ip,		"WAIT-IP")	\
 	EM(netfs_rreq_trace_wait_pause,		"WT-PAUS")	\


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags
  2024-07-31 10:41     ` David Howells
                         ` (2 preceding siblings ...)
  2024-08-08 11:46       ` [PATCH v2] " David Howells
@ 2024-08-08 21:31       ` David Howells
  3 siblings, 0 replies; 17+ messages in thread
From: David Howells @ 2024-08-08 21:31 UTC (permalink / raw)
  Cc: dhowells, Max Kellermann, Hristo Venev, Ilya Dryomov, Xiubo Li,
	Jeff Layton, willy, Christian Brauner, ceph-devel, netfs,
	linux-fsdevel, linux-kernel, stable

It turned out that I had accidentally disabled caching for 9p, afs and cifs,
so here's a v3 that fixes that.

David
---
netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags

The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used
correctly.  The problem is that we try to set them up in the request
initialisation, but we the cache may be in the process of setting up still,
and so the state may not be correct.  Further, we secondarily sample the
cache state and make contradictory decisions later.

The issue arises because we set up the cache resources, which allows the
cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which
triggers cache writing even if we didn't set the flags when allocating.

Fix this in the following way:

 (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in
     ->init_request() rather than trying to juggle that in
     netfs_alloc_request().

 (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is
     to be done, then PG_private_2 is to be used rather than only setting
     it if we decide to cache and then having netfs_rreq_unlock_folios()
     set the non-PG_private_2 writeback-to-cache if it wasn't set.

 (3) Split netfs_rreq_unlock_folios() into two functions, one of which
     contains the deprecated code for using PG_private_2 to avoid
     accidentally doing the writeback path - and always use it if
     USE_PGPRIV2 is set.

 (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always
     wait for PG_private_2.  This function is deprecated and only used by
     ceph anyway, and so label it so.

 (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use
     fscache_operation_valid() on the cache_resources instead.  This has
     the advantage of picking up the result of netfs_begin_cache_read() and
     fscache_begin_write_operation() - which are called after the object is
     initialised and will wait for the cache to come to a usable state.

Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be
applied on top of that.  Without this as well, things like:

 rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {

and:

 WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386

may happen, along with some UAFs due to PG_private_2 not getting used to
wait on writeback completion.

Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Hristo Venev <hristo@venev.name>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1]
---
 fs/ceph/addr.c               |    3 +
 fs/ceph/inode.c              |    2 
 fs/netfs/buffered_read.c     |  125 ++++++++++++++++++++++++++++++++++++-------
 fs/netfs/objects.c           |   10 ---
 fs/netfs/write_issue.c       |    4 +
 fs/nfs/fscache.c             |    2 
 fs/nfs/fscache.h             |    2 
 include/linux/netfs.h        |    3 -
 include/trace/events/netfs.h |    1 
 9 files changed, 116 insertions(+), 36 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 73b5a07bf94d..cc0a2240de98 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -424,6 +424,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 	struct ceph_netfs_request_data *priv;
 	int ret = 0;
 
+	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+	__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+
 	if (rreq->origin != NETFS_READAHEAD)
 		return 0;
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8f8de8f33abb..71cd70514efa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
 	/* Set parameters for the netfs library */
 	netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
-	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-	__set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
 
 	spin_lock_init(&ci->i_ceph_lock);
 
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 424048f9ed1f..27c750d39476 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -9,6 +9,97 @@
 #include <linux/task_io_accounting_ops.h>
 #include "internal.h"
 
+/*
+ * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
+ * is using PG_private_2 and direct writing to the cache from here rather than
+ * marking the page for writeback.
+ *
+ * Note that we don't touch folio->private in this code.
+ */
+static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq,
+					     size_t *account)
+{
+	struct netfs_io_subrequest *subreq;
+	struct folio *folio;
+	pgoff_t start_page = rreq->start / PAGE_SIZE;
+	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+	bool subreq_failed = false;
+
+	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+	/* Walk through the pagecache and the I/O request lists simultaneously.
+	 * We may have a mixture of cached and uncached sections and we only
+	 * really want to write out the uncached sections.  This is slightly
+	 * complicated by the possibility that we might have huge pages with a
+	 * mixture inside.
+	 */
+	subreq = list_first_entry(&rreq->subrequests,
+				  struct netfs_io_subrequest, rreq_link);
+	subreq_failed = (subreq->error < 0);
+
+	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, last_page) {
+		loff_t pg_end;
+		bool pg_failed = false;
+		bool folio_started = false;
+
+		if (xas_retry(&xas, folio))
+			continue;
+
+		pg_end = folio_pos(folio) + folio_size(folio) - 1;
+
+		for (;;) {
+			loff_t sreq_end;
+
+			if (!subreq) {
+				pg_failed = true;
+				break;
+			}
+
+			if (!folio_started &&
+			    test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
+			    fscache_operation_valid(&rreq->cache_resources)) {
+				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+				folio_start_private_2(folio);
+				folio_started = true;
+			}
+
+			pg_failed |= subreq_failed;
+			sreq_end = subreq->start + subreq->len - 1;
+			if (pg_end < sreq_end)
+				break;
+
+			*account += subreq->transferred;
+			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+				subreq = list_next_entry(subreq, rreq_link);
+				subreq_failed = (subreq->error < 0);
+			} else {
+				subreq = NULL;
+				subreq_failed = false;
+			}
+
+			if (pg_end == sreq_end)
+				break;
+		}
+
+		if (!pg_failed) {
+			flush_dcache_folio(folio);
+			folio_mark_uptodate(folio);
+		}
+
+		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+			if (folio->index == rreq->no_unlock_folio &&
+			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
+				_debug("no unlock");
+			else
+				folio_unlock(folio);
+		}
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Unlock the folios in a read operation.  We need to set PG_writeback on any
  * folios we're going to write back before we unlock them.
@@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 		}
 	}
 
+	/* Handle deprecated PG_private_2 case. */
+	if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
+		netfs_rreq_unlock_folios_pgpriv2(rreq, &account);
+		goto out;
+	}
+
 	/* Walk through the pagecache and the I/O request lists simultaneously.
 	 * We may have a mixture of cached and uncached sections and we only
 	 * really want to write out the uncached sections.  This is slightly
@@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 		loff_t pg_end;
 		bool pg_failed = false;
 		bool wback_to_cache = false;
-		bool folio_started = false;
 
 		if (xas_retry(&xas, folio))
 			continue;
@@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 				pg_failed = true;
 				break;
 			}
-			if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
-				if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
-							       &subreq->flags)) {
-					trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
-					folio_start_private_2(folio);
-					folio_started = true;
-				}
-			} else {
-				wback_to_cache |=
-					test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
-			}
+
+			wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
 			pg_failed |= subreq_failed;
 			sreq_end = subreq->start + subreq->len - 1;
 			if (pg_end < sreq_end)
@@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 	}
 	rcu_read_unlock();
 
+out:
 	task_io_account_read(account);
 	if (rreq->netfs_ops->done)
 		rreq->netfs_ops->done(rreq);
@@ -395,7 +483,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
 }
 
 /**
- * netfs_write_begin - Helper to prepare for writing
+ * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
  * @ctx: The netfs context
  * @file: The file to read from
  * @mapping: The mapping to read from
@@ -426,6 +514,9 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
  * inode before calling this.
  *
  * This is usable whether or not caching is enabled.
+ *
+ * Note that this should be considered deprecated and netfs_perform_write()
+ * used instead.
  */
 int netfs_write_begin(struct netfs_inode *ctx,
 		      struct file *file, struct address_space *mapping,
@@ -507,11 +598,9 @@ int netfs_write_begin(struct netfs_inode *ctx,
 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
-	if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
-		ret = folio_wait_private_2_killable(folio);
-		if (ret < 0)
-			goto error;
-	}
+	ret = folio_wait_private_2_killable(folio);
+	if (ret < 0)
+		goto error;
 have_folio_no_wait:
 	*_folio = folio;
 	_leave(" = 0");
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index f4a642727479..0294df70c3ff 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 	struct netfs_io_request *rreq;
 	mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool;
 	struct kmem_cache *cache = mempool->pool_data;
-	bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
-			      origin == NETFS_DIO_READ ||
-			      origin == NETFS_DIO_WRITE);
-	bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
 	int ret;
 
 	for (;;) {
@@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 	refcount_set(&rreq->ref, 1);
 
 	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
-	if (cached) {
-		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
-		if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
-			/* Filesystem uses deprecated PG_private_2 marking. */
-			__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
-	}
 	if (file && file->f_flags & O_NONBLOCK)
 		__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
 	if (rreq->netfs_ops->init_request) {
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 9258d30cffe3..3f7e37e50c7d 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
 {
 	struct netfs_io_request *wreq;
 	struct netfs_inode *ictx;
+	bool is_buffered = (origin == NETFS_WRITEBACK ||
+			    origin == NETFS_WRITETHROUGH);
 
 	wreq = netfs_alloc_request(mapping, file, start, 0, origin);
 	if (IS_ERR(wreq))
@@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
 	_enter("R=%x", wreq->debug_id);
 
 	ictx = netfs_inode(wreq->inode);
-	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+	if (is_buffered && netfs_is_cache_enabled(ictx))
 		fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
 
 	wreq->contiguity = wreq->start;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7202ce84d0eb..bf29a65c5027 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
 {
 	rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
 	rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+	__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
 
 	return 0;
 }
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index fbed0027996f..e8adae1bc260 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
 {
 	netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
-	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-	__set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags);
 }
 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d0288938cc2..983816608f15 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -73,8 +73,6 @@ struct netfs_inode {
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
 #define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
-#define NETFS_ICTX_USE_PGPRIV2	31		/* [DEPRECATED] Use PG_private_2 to mark
-						 * write to cache on read */
 };
 
 /*
@@ -269,7 +267,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
-#define NETFS_RREQ_WRITE_TO_CACHE	7	/* Need to write to the cache */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
 #define NETFS_RREQ_BLOCKED		10	/* We blocked */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 24ec3434d32e..606b4a0f92da 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -51,6 +51,7 @@
 	EM(netfs_rreq_trace_resubmit,		"RESUBMT")	\
 	EM(netfs_rreq_trace_set_pause,		"PAUSE  ")	\
 	EM(netfs_rreq_trace_unlock,		"UNLOCK ")	\
+	EM(netfs_rreq_trace_unlock_pgpriv2,	"UNLCK-2")	\
 	EM(netfs_rreq_trace_unmark,		"UNMARK ")	\
 	EM(netfs_rreq_trace_wait_ip,		"WAIT-IP")	\
 	EM(netfs_rreq_trace_wait_pause,		"WT-PAUS")	\


^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2024-08-08 21:31 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-29  9:15 [PATCH] fs/netfs/fscache_io: remove the obsolete "using_pgpriv2" flag Max Kellermann
2024-07-29 12:56 ` Jeff Layton
2024-07-29 13:04   ` Max Kellermann
2024-07-29 15:35   ` Max Kellermann
2024-07-29 15:51     ` Jeff Layton
2024-07-30 16:01 ` [PATCH] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" David Howells
2024-07-30 16:28   ` Max Kellermann
2024-07-30 20:00     ` Max Kellermann
2024-07-31  8:16     ` Max Kellermann
2024-07-31 10:41     ` David Howells
2024-07-31 11:37       ` Max Kellermann
2024-08-04 13:57         ` [PATCH] netfs: Set NETFS_RREQ_WRITE_TO_CACHE when caching is possible Hristo Venev
2024-08-04 23:22           ` Trond Myklebust
2024-08-05 15:34             ` Hristo Venev
2024-08-07 20:17       ` [RFC][PATCH] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags David Howells
2024-08-08 11:46       ` [PATCH v2] " David Howells
2024-08-08 21:31       ` [PATCH v3] " David Howells

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).