From: Al Viro <viro@zeniv.linux.org.uk>
To: Max Kellermann <max.kellermann@ionos.com>
Cc: linux-fsdevel@vger.kernel.org,
Christian Brauner <brauner@kernel.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 20/21] __dentry_kill(): new locking scheme
Date: Thu, 22 Jan 2026 06:24:58 +0000 [thread overview]
Message-ID: <20260122062458.GA3330204@ZenIV> (raw)
In-Reply-To: <20260121215550.GD3183987@ZenIV>
On Wed, Jan 21, 2026 at 09:55:50PM +0000, Al Viro wrote:
> It's not so much an intention as having nothing good to wait on.
>
> Theoretically, there's a way to deal with that - dentry in the middle
> of stuck iput() from dentry_unlink_inode() from __dentry_kill() is
> guaranteed to be
> * negative
> * unhashed
> * not in-lookup
>
> What we could do is adding an hlist_head aliased with ->d_alias, ->d_rcu
> and ->d_in_lookup_hash. Then select_collect2() running into a dentry
> with negative refcount would set _that_ as victim and bugger off, same
> as we do for ones on shrink lists.
No need to bother with hlist - all we need is a single-linked list that
does not need to be generic. Variant below seems to survive the local
beating...
From 4d1f759a6338703f07a27a4d8e5814604968fb5b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 21 Jan 2026 18:17:12 -0500
Subject: [PATCH] get rid of busy-waiting in shrink_dcache_parent()
There's a case in which shrink_dcache_parent() ends up busy-waiting:
if some dentry in the subtree in question is found to be in process
of being evicted by another thread. We need to wait for that to
finish so that parent would no longer be pinned; otherwise we'd end
up with ridiculous situation - nothing in a subtree is busy, and call
of shrink_dcache_parent() fails to evict some directory only because
memory pressure initiated eviction of some of its children before we
got to those.
The reason why we busy-wait is the lack of anything convenient we could
wait on. That can be fixed, though, and without growing struct dentry.
Dentries in question are
* already marked dead (negative ->d_count)
* already negative
* already unhashed
* already not in in-lookup hash
* yet to get removed from ->d_sib and get DCACHE_DENTRY_KILLED in
flags.
Neither ->d_alias nor the fields overlapping it (->d_rcu and ->d_in_lookup_hash)
are going to be accessed for these dentries until after dentry_unlist(). What's
more, ->d_alias.next is guaranteed to be NULL.
So we can embed struct completion into struct select_data and (ab)use
->d_alias.next for linked list of struct select_data instances.
If dentry_unlist() finds ->d_alias.next non-NULL, it carefully goes over
the list and calls complete() for each of those.
That way select_collect2() can treat negative ->d_count the same way it
deals with dentries on other thread's shrink list - grab rcu_read_lock(),
stash dentry into data.victim and tell d_walk() to stop.
Should shrink_dcache_parent() run into that case, it would attach its
select_data to victim dentry, drop whatever normal eviction candidates
it has gathered and wait for completion. Voila...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/dcache.c | 84 ++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 71 insertions(+), 13 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index cf865c12cdf9..283c8ab767e1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -605,6 +605,54 @@ void d_drop(struct dentry *dentry)
}
EXPORT_SYMBOL(d_drop);
+struct select_data {
+ struct dentry *start;
+ union {
+ long found;
+ struct {
+ struct dentry *victim;
+ struct select_data *next;
+ struct completion completion;
+ };
+ };
+ struct list_head dispose;
+};
+
+/*
+ * shrink_dcache_parent() needs to be notified when dentry in process of
+ * being evicted finally gets unlisted. Such dentries are
+ * already with negative ->d_count
+ * already negative
+ * already not in in-lookup hash
+ * reachable only via ->d_sib.
+ *
+ * Neither ->d_alias, nor ->d_rcu, nor ->d_in_lookup_hash are going to be
+ * accessed for those, so we can (ab)use ->d_alias.next for list of
+ * select_data of waiters. Initially it's going to be NULL and as long
+ * as dentry_unlist() returns it to that state we are fine.
+ */
+static inline void d_add_waiter(struct dentry *dentry, struct select_data *p)
+{
+ struct select_data *v = (void *)dentry->d_u.d_alias.next;
+ init_completion(&p->completion);
+ p->next = v;
+ dentry->d_u.d_alias.next = (void *)p;
+}
+
+static inline void d_complete_waiters(struct dentry *dentry)
+{
+ struct select_data *v = (void *)dentry->d_u.d_alias.next;
+ if (unlikely(v)) {
+ /* some shrink_dcache_tree() instances are waiting */
+ dentry->d_u.d_alias.next = NULL;
+ while (v) {
+ struct completion *r = &v->completion;
+ v = v->next;
+ complete(r);
+ }
+ }
+}
+
static inline void dentry_unlist(struct dentry *dentry)
{
struct dentry *next;
@@ -613,6 +661,7 @@ static inline void dentry_unlist(struct dentry *dentry)
* attached to the dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ d_complete_waiters(dentry);
if (unlikely(hlist_unhashed(&dentry->d_sib)))
return;
__hlist_del(&dentry->d_sib);
@@ -1499,15 +1548,6 @@ int d_set_mounted(struct dentry *dentry)
* constraints.
*/
-struct select_data {
- struct dentry *start;
- union {
- long found;
- struct dentry *victim;
- };
- struct list_head dispose;
-};
-
static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
{
struct select_data *data = _data;
@@ -1559,6 +1599,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
return D_WALK_QUIT;
}
to_shrink_list(dentry, &data->dispose);
+ } else if (dentry->d_lockref.count < 0) {
+ rcu_read_lock();
+ data->victim = dentry;
+ return D_WALK_QUIT;
}
/*
* We can return to the caller if we have found some (this
@@ -1598,12 +1642,26 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
data.victim = NULL;
d_walk(parent, &data, select_collect2);
if (data.victim) {
- spin_lock(&data.victim->d_lock);
- if (!lock_for_kill(data.victim)) {
- spin_unlock(&data.victim->d_lock);
+ struct dentry *v = data.victim;
+
+ spin_lock(&v->d_lock);
+ if (v->d_lockref.count < 0 &&
+ !(v->d_flags & DCACHE_DENTRY_KILLED)) {
+ // It's busy dying; have it notify us once
+ // it becomes invisible to d_walk().
+ d_add_waiter(v, &data);
+ spin_unlock(&v->d_lock);
+ rcu_read_unlock();
+ if (!list_empty(&data.dispose))
+ shrink_dentry_list(&data.dispose);
+ wait_for_completion(&data.completion);
+ continue;
+ }
+ if (!lock_for_kill(v)) {
+ spin_unlock(&v->d_lock);
rcu_read_unlock();
} else {
- shrink_kill(data.victim);
+ shrink_kill(v);
}
}
if (!list_empty(&data.dispose))
--
2.47.3
next prev parent reply other threads:[~2026-01-22 6:23 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-24 6:02 [RFC][PATCHSET v3] simplifying fast_dput(), dentry_kill() et.al Al Viro
2023-11-24 6:04 ` [PATCH v3 01/21] switch nfsd_client_rmdir() to use of simple_recursive_removal() Al Viro
2023-11-24 6:04 ` [PATCH v3 02/21] coda_flag_children(): cope with dentries turning negative Al Viro
2023-11-24 21:22 ` Linus Torvalds
2023-11-24 22:58 ` Paul E. McKenney
2023-11-24 6:04 ` [PATCH v3 03/21] dentry: switch the lists of children to hlist Al Viro
2023-11-24 7:44 ` Amir Goldstein
2023-11-24 7:55 ` Al Viro
2023-11-24 8:02 ` Amir Goldstein
2023-11-24 6:04 ` [PATCH v3 04/21] centralize killing dentry from shrink list Al Viro
2023-11-24 6:04 ` [PATCH v3 05/21] shrink_dentry_list(): no need to check that dentry refcount is marked dead Al Viro
2023-11-24 6:04 ` [PATCH v3 06/21] fast_dput(): having ->d_delete() is not reason to delay refcount decrement Al Viro
2023-11-24 6:04 ` [PATCH v3 07/21] fast_dput(): handle underflows gracefully Al Viro
2023-11-24 6:04 ` [PATCH v3 08/21] fast_dput(): new rules for refcount Al Viro
2023-11-24 6:04 ` [PATCH v3 09/21] __dput_to_list(): do decrement of refcount in the callers Al Viro
2023-11-24 6:04 ` [PATCH v3 10/21] make retain_dentry() neutral with respect to refcounting Al Viro
2023-11-24 6:04 ` [PATCH v3 11/21] __dentry_kill(): get consistent rules for victim's refcount Al Viro
2023-11-24 6:04 ` [PATCH v3 12/21] dentry_kill(): don't bother with retain_dentry() on slow path Al Viro
2023-11-24 6:04 ` [PATCH v3 13/21] Call retain_dentry() with refcount 0 Al Viro
2023-11-24 6:04 ` [PATCH v3 14/21] fold the call of retain_dentry() into fast_dput() Al Viro
2023-11-24 6:04 ` [PATCH v3 15/21] don't try to cut corners in shrink_lock_dentry() Al Viro
2023-11-24 6:04 ` [PATCH v3 16/21] fold dentry_kill() into dput() Al Viro
2023-11-24 6:04 ` [PATCH v3 17/21] to_shrink_list(): call only if refcount is 0 Al Viro
2023-11-24 6:04 ` [PATCH v3 18/21] switch select_collect{,2}() to use of to_shrink_list() Al Viro
2023-11-24 6:04 ` [PATCH v3 19/21] d_prune_aliases(): use a shrink list Al Viro
2023-11-24 6:04 ` [PATCH v3 20/21] __dentry_kill(): new locking scheme Al Viro
2025-07-07 17:20 ` Max Kellermann
2025-07-07 17:29 ` Al Viro
2025-07-07 17:43 ` Max Kellermann
2025-07-07 18:00 ` Al Viro
2025-07-07 18:11 ` Max Kellermann
2025-07-07 19:31 ` Al Viro
2025-07-07 20:00 ` Max Kellermann
2025-07-07 20:31 ` Al Viro
2025-07-07 20:39 ` Max Kellermann
2025-07-07 20:49 ` Al Viro
2025-07-07 20:52 ` Max Kellermann
2025-07-07 20:59 ` Al Viro
2025-07-07 21:06 ` Max Kellermann
2025-07-07 21:32 ` Al Viro
2025-07-07 21:47 ` Max Kellermann
2025-07-07 22:19 ` Al Viro
2025-07-07 22:37 ` Al Viro
2025-07-08 4:45 ` Max Kellermann
2026-01-21 21:55 ` Al Viro
2026-01-22 6:24 ` Al Viro [this message]
2025-07-07 22:26 ` Al Viro
2023-11-24 6:04 ` [PATCH v3 21/21] retain_dentry(): introduce a trimmed-down lockless variant Al Viro
2023-11-24 21:28 ` [RFC][PATCHSET v3] simplifying fast_dput(), dentry_kill() et.al Linus Torvalds
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260122062458.GA3330204@ZenIV \
--to=viro@zeniv.linux.org.uk \
--cc=brauner@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=max.kellermann@ionos.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox