* [PATCH v2 0/1] fuse: fix readahead reclaim deadlock
@ 2025-10-10 22:07 Joanne Koong
2025-10-10 22:07 ` [PATCH v2 1/1] " Joanne Koong
0 siblings, 1 reply; 2+ messages in thread
From: Joanne Koong @ 2025-10-10 22:07 UTC (permalink / raw)
To: miklos; +Cc: linux-fsdevel, amir73il, osandov, hsiangkao, kernel-team
This is v2 for fixing the readahead reclaim deadlock we ran into at Meta on
fuse servers that don't implement open (fc->no_open == 1). v1 is here:
https://lore.kernel.org/linux-fsdevel/20250925224404.2058035-1-joannelkoong@gmail.com/
This approach in v2 fixes the deadlock by allocating ff->release_args and
grabbing the reference on the inode in fuse_prepare_release() even if the
server doesn't implement open. This is an alternative approach to v1, which
explicitly grabbed the inode reference before sending readahead requests and
dropped the reference after the requests completed.
Thanks,
Joanne
Joanne Koong (1):
fuse: fix readahead reclaim deadlock
fs/fuse/file.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)
--
2.47.3
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH v2 1/1] fuse: fix readahead reclaim deadlock
2025-10-10 22:07 [PATCH v2 0/1] fuse: fix readahead reclaim deadlock Joanne Koong
@ 2025-10-10 22:07 ` Joanne Koong
0 siblings, 0 replies; 2+ messages in thread
From: Joanne Koong @ 2025-10-10 22:07 UTC (permalink / raw)
To: miklos; +Cc: linux-fsdevel, amir73il, osandov, hsiangkao, kernel-team, stable
Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is
needed") skips allocating ff->release_args if the server does not
implement open. However in doing so, fuse_prepare_release() now skips
grabbing the reference on the inode, which makes it possible for an
inode to be evicted from the dcache while there are inflight readahead
requests. This causes a deadlock if the server triggers reclaim while
servicing the readahead request and reclaim attempts to evict the inode
of the file being read ahead. Since the folio is locked during
readahead, when reclaim evicts the fuse inode and fuse_evict_inode()
attempts to remove all folios associated with the inode from the page
cache (truncate_inode_pages_range()), reclaim will block forever waiting
for the lock since readahead cannot relinquish the lock because it is
itself blocked in reclaim:
>>> stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4)
folio_lock (./include/linux/pagemap.h:1052:3)
truncate_inode_pages_range (mm/truncate.c:336:10)
fuse_evict_inode (fs/fuse/inode.c:161:2)
evict (fs/inode.c:704:3)
dentry_unlink_inode (fs/dcache.c:412:3)
__dentry_kill (fs/dcache.c:615:3)
shrink_kill (fs/dcache.c:1060:12)
shrink_dentry_list (fs/dcache.c:1087:3)
prune_dcache_sb (fs/dcache.c:1168:2)
super_cache_scan (fs/super.c:221:10)
do_shrink_slab (mm/shrinker.c:435:9)
shrink_slab (mm/shrinker.c:626:10)
shrink_node (mm/vmscan.c:5951:2)
shrink_zones (mm/vmscan.c:6195:3)
do_try_to_free_pages (mm/vmscan.c:6257:3)
do_swap_page (mm/memory.c:4136:11)
handle_pte_fault (mm/memory.c:5562:10)
handle_mm_fault (mm/memory.c:5870:9)
do_user_addr_fault (arch/x86/mm/fault.c:1338:10)
handle_page_fault (arch/x86/mm/fault.c:1481:3)
exc_page_fault (arch/x86/mm/fault.c:1539:2)
asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the
reference on the inode when preparing the file for release even if the
server does not implement open. The inode reference will be dropped when
the last reference on the fuse file is dropped (see fuse_file_put() ->
fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed")
Cc: stable@vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Reported-by: Omar Sandoval <osandov@fb.com>
---
fs/fuse/file.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f1ef77a0be05..654e21ee93fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -100,7 +100,7 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
kfree(ra);
}
-static void fuse_file_put(struct fuse_file *ff, bool sync)
+static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
{
if (refcount_dec_and_test(&ff->count)) {
struct fuse_release_args *ra = &ff->args->release_args;
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
fuse_file_io_release(ff, ra->inode);
if (!args) {
- /* Do nothing when server does not implement 'open' */
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (!isdir && ff->fm->fc->no_open) {
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- /* No release needed */
- kfree(ff->args);
- ff->args = NULL;
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
@@ -363,7 +375,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* own ref to the file, the IO completion has to drop the ref, which is
* how the fuse server can end up closing its clients' files.
*/
- fuse_file_put(ff, false);
+ fuse_file_put(ff, false, isdir);
}
void fuse_release_common(struct file *file, bool isdir)
@@ -394,7 +406,7 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
{
WARN_ON(refcount_read(&ff->count) > 1);
fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true);
- fuse_file_put(ff, true);
+ fuse_file_put(ff, true, false);
}
EXPORT_SYMBOL_GPL(fuse_sync_release);
@@ -891,7 +903,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
folio_put(ap->folios[i]);
}
if (ia->ff)
- fuse_file_put(ia->ff, false);
+ fuse_file_put(ia->ff, false, false);
fuse_io_free(ia);
}
@@ -1815,7 +1827,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
- fuse_file_put(wpa->ia.ff, false);
+ fuse_file_put(wpa->ia.ff, false, false);
kfree(ap->folios);
kfree(wpa);
@@ -1968,7 +1980,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
ff = __fuse_write_file_get(fi);
err = fuse_flush_times(inode, ff);
if (ff)
- fuse_file_put(ff, false);
+ fuse_file_put(ff, false, false);
return err;
}
@@ -2186,7 +2198,7 @@ static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc,
}
if (data->ff)
- fuse_file_put(data->ff, false);
+ fuse_file_put(data->ff, false, false);
return error;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-10-10 22:08 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-10 22:07 [PATCH v2 0/1] fuse: fix readahead reclaim deadlock Joanne Koong
2025-10-10 22:07 ` [PATCH v2 1/1] " Joanne Koong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).