* [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static
@ 2026-06-12 17:05 Max Kellermann
2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
2026-06-12 17:05 ` [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM Max Kellermann
0 siblings, 2 replies; 4+ messages in thread
From: Max Kellermann @ 2026-06-12 17:05 UTC (permalink / raw)
To: idryomov, amarkuze, ceph-devel, linux-kernel; +Cc: Max Kellermann
It's only used from within caps.c.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
fs/ceph/caps.c | 2 +-
fs/ceph/super.h | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d51454e995a8..efa6a15c470b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1119,7 +1119,7 @@ int ceph_is_any_caps(struct inode *inode)
* caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
-void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_client *cl = session->s_mdsc->fsc->client;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index afc89ce91804..76b946116613 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1261,7 +1261,6 @@ extern void ceph_add_cap(struct inode *inode,
unsigned issued, unsigned wanted,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap **new_cap);
-extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
bool queue_release);
extern void __ceph_remove_caps(struct ceph_inode_info *ci);
--
2.47.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL
2026-06-12 17:05 [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static Max Kellermann
@ 2026-06-12 17:05 ` Max Kellermann
2026-06-16 16:03 ` Max Kellermann
2026-06-12 17:05 ` [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM Max Kellermann
1 sibling, 1 reply; 4+ messages in thread
From: Max Kellermann @ 2026-06-12 17:05 UTC (permalink / raw)
To: idryomov, amarkuze, ceph-devel, linux-kernel; +Cc: Max Kellermann
__ceph_remove_cap() erases the ceph_cap object from the RB tree, thus
it seems natural to use RB_CLEAR_NODE() / RB_EMPTY_NODE() for the
removal check.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
fs/ceph/caps.c | 11 +++++------
fs/ceph/mds_client.c | 2 +-
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index efa6a15c470b..9e7c76a66624 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1128,8 +1128,8 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
struct ceph_mds_client *mdsc;
int removed = 0;
- /* 'ci' being NULL means the remove have already occurred */
- if (!ci) {
+ if (RB_EMPTY_NODE(&cap->ci_node)) {
+ /* this means the remove has already occurred */
doutc(cl, "inode is NULL\n");
return;
}
@@ -1142,6 +1142,7 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
+ RB_CLEAR_NODE(&cap->ci_node);
if (ci->i_auth_cap == cap)
ci->i_auth_cap = NULL;
@@ -1158,8 +1159,6 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
cap->session = NULL;
removed = 1;
}
- /* protect backpointer with s_cap_lock: see iterate_session_caps */
- cap->ci = NULL;
/*
* s_cap_reconnect is protected by s_cap_lock. no one changes
@@ -1201,8 +1200,8 @@ void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
struct ceph_inode_info *ci = cap->ci;
struct ceph_fs_client *fsc;
- /* 'ci' being NULL means the remove have already occurred */
- if (!ci) {
+ if (RB_EMPTY_NODE(&cap->ci_node)) {
+ /* this means the remove has already occurred */
doutc(mdsc->fsc->client, "inode is NULL\n");
return;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ed17e0023705..f092960953a8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1900,7 +1900,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
spin_lock(&session->s_cap_lock);
p = p->next;
- if (!cap->ci) {
+ if (RB_EMPTY_NODE(&cap->ci_node)) {
doutc(cl, "finishing cap %p removal\n", cap);
BUG_ON(cap->session != session);
cap->session = NULL;
--
2.47.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM
2026-06-12 17:05 [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static Max Kellermann
2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
@ 2026-06-12 17:05 ` Max Kellermann
1 sibling, 0 replies; 4+ messages in thread
From: Max Kellermann @ 2026-06-12 17:05 UTC (permalink / raw)
To: idryomov, amarkuze, ceph-devel, linux-kernel; +Cc: Max Kellermann
All these functions already have a ceph_inode_info pointer, so let's
use that instead of letting every function reload it from RAM
(i.e. `ceph_cap.ci`). This eliminates several memory accesses.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
fs/ceph/caps.c | 56 +++++++++++++++++++++-----------------------
fs/ceph/mds_client.c | 2 +-
fs/ceph/super.h | 1 +
3 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9e7c76a66624..ef167118efdf 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -785,9 +785,9 @@ void ceph_add_cap(struct inode *inode,
* generation of the MDS session (i.e. has not gone 'stale' due to
* us losing touch with the mds).
*/
-static int __cap_is_valid(struct ceph_cap *cap)
+static int __cap_is_valid(struct ceph_inode_info *ci, struct ceph_cap *cap)
{
- struct inode *inode = &cap->ci->netfs.inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_client *cl = cap->session->s_mdsc->fsc->client;
unsigned long ttl;
u32 gen;
@@ -822,7 +822,7 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
*implemented = 0;
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
- if (!__cap_is_valid(cap))
+ if (!__cap_is_valid(ci, cap))
continue;
doutc(cl, "%p %llx.%llx cap %p issued %s\n", inode,
ceph_vinop(inode), cap, ceph_cap_string(cap->issued));
@@ -855,7 +855,7 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
cap = rb_entry(p, struct ceph_cap, ci_node);
if (cap == ocap)
continue;
- if (!__cap_is_valid(cap))
+ if (!__cap_is_valid(ci, cap))
continue;
have |= cap->issued;
}
@@ -866,9 +866,9 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
* Move a cap to the end of the LRU (oldest caps at list head, newest
* at list tail).
*/
-static void __touch_cap(struct ceph_cap *cap)
+static void __touch_cap(struct ceph_inode_info *ci, struct ceph_cap *cap)
{
- struct inode *inode = &cap->ci->netfs.inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_mds_session *s = cap->session;
struct ceph_client *cl = s->s_mdsc->fsc->client;
@@ -906,7 +906,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
- if (!__cap_is_valid(cap))
+ if (!__cap_is_valid(ci, cap))
continue;
if ((cap->issued & mask) == mask) {
doutc(cl, "mask %p %llx.%llx cap %p issued %s (mask %s)\n",
@@ -914,7 +914,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch)
- __touch_cap(cap);
+ __touch_cap(ci, cap);
return 1;
}
@@ -929,15 +929,15 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
struct rb_node *q;
/* touch this + preceding caps */
- __touch_cap(cap);
+ __touch_cap(ci, cap);
for (q = rb_first(&ci->i_caps); q != p;
q = rb_next(q)) {
cap = rb_entry(q, struct ceph_cap,
ci_node);
- if (!__cap_is_valid(cap))
+ if (!__cap_is_valid(ci, cap))
continue;
if (cap->issued & mask)
- __touch_cap(cap);
+ __touch_cap(ci, cap);
}
}
return 1;
@@ -1091,7 +1091,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
- if (check && !__cap_is_valid(cap))
+ if (check && !__cap_is_valid(ci, cap))
continue;
if (cap == ci->i_auth_cap)
mds_wanted |= cap->mds_wanted;
@@ -1119,11 +1119,10 @@ int ceph_is_any_caps(struct inode *inode)
* caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
-static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+static void __ceph_remove_cap(struct ceph_inode_info *ci, struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_client *cl = session->s_mdsc->fsc->client;
- struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc;
int removed = 0;
@@ -1195,9 +1194,9 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
}
void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+ struct ceph_inode_info *ci,
bool queue_release)
{
- struct ceph_inode_info *ci = cap->ci;
struct ceph_fs_client *fsc;
if (RB_EMPTY_NODE(&cap->ci_node)) {
@@ -1214,7 +1213,7 @@ void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
!fsc->blocklisted &&
!ceph_inode_is_shutdown(&ci->netfs.inode));
- __ceph_remove_cap(cap, queue_release);
+ __ceph_remove_cap(ci, cap, queue_release);
}
struct cap_msg_args {
@@ -1374,7 +1373,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
p = rb_next(p);
- ceph_remove_cap(mdsc, cap, true);
+ ceph_remove_cap(mdsc, cap, ci, true);
}
spin_unlock(&ci->i_ceph_lock);
}
@@ -1387,11 +1386,10 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
* Make note of max_size reported/requested from mds, revoked caps
* that have now been implemented.
*/
-static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
+static void __prep_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci, struct ceph_cap *cap,
int op, int flags, int used, int want, int retain,
int flushing, u64 flush_tid, u64 oldest_flush_tid)
{
- struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->netfs.inode;
struct ceph_client *cl = ceph_inode_to_client(inode);
int held, revoking;
@@ -2206,7 +2204,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
if (want & ~cap->mds_wanted) {
if (want & ~(cap->mds_wanted | cap->issued))
goto ack;
- if (!__cap_is_valid(cap))
+ if (!__cap_is_valid(ci, cap))
goto ack;
}
@@ -2248,7 +2246,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
mds = cap->mds; /* remember mds, so we don't repeat */
- __prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used,
+ __prep_cap(&arg, ci, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used,
want, retain, flushing, flush_tid, oldest_flush_tid);
spin_unlock(&ci->i_ceph_lock);
@@ -2310,7 +2308,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
flush_tid = __mark_caps_flushing(inode, session, true,
&oldest_flush_tid);
- __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
+ __prep_cap(&arg, ci, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
__ceph_caps_used(ci), __ceph_caps_wanted(ci),
(cap->issued | cap->implemented),
flushing, flush_tid, oldest_flush_tid);
@@ -2604,7 +2602,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
doutc(cl, "%p %llx.%llx cap %p tid %llu %s\n",
inode, ceph_vinop(inode), cap, cf->tid,
ceph_cap_string(cf->caps));
- __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH,
+ __prep_cap(&arg, ci, cap, CEPH_CAP_OP_FLUSH,
(cf->tid < last_snap_flush ?
CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
__ceph_caps_used(ci),
@@ -4095,7 +4093,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
goto out_unlock;
if (target < 0) {
- ceph_remove_cap(mdsc, cap, false);
+ ceph_remove_cap(mdsc, cap, ci, false);
goto out_unlock;
}
@@ -4132,7 +4130,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
change_auth_cap_ses(ci, tcap->session);
}
}
- ceph_remove_cap(mdsc, cap, false);
+ ceph_remove_cap(mdsc, cap, ci, false);
goto out_unlock;
} else if (tsession) {
/* add placeholder for the export target */
@@ -4149,7 +4147,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
spin_unlock(&mdsc->cap_dirty_lock);
}
- ceph_remove_cap(mdsc, cap, false);
+ ceph_remove_cap(mdsc, cap, ci, false);
goto out_unlock;
}
@@ -4265,7 +4263,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), peer,
ocap->seq, ocap->mseq, mds, piseq, pmseq);
}
- ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+ ceph_remove_cap(mdsc, ocap, ci, (ph->flags & CEPH_CAP_FLAG_RELEASE));
}
*old_issued = issued;
@@ -4862,7 +4860,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
- if (cap && __cap_is_valid(cap)) {
+ if (cap && __cap_is_valid(ci, cap)) {
unless &= cap->issued;
if (unless) {
if (unless & CEPH_CAP_AUTH_EXCL)
@@ -5021,7 +5019,7 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali
cap, ci, inode, ceph_vinop(inode));
is_auth = (cap == ci->i_auth_cap);
- __ceph_remove_cap(cap, false);
+ __ceph_remove_cap(ci, cap, false);
if (is_auth) {
struct ceph_cap_flush *cf;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f092960953a8..5b17d7412eef 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2256,7 +2256,7 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg)
if (oissued) {
/* we aren't the only cap.. just remove us */
- ceph_remove_cap(mdsc, cap, true);
+ ceph_remove_cap(mdsc, cap, ci, true);
(*remaining)--;
} else {
struct dentry *dentry;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 76b946116613..3b07b071dba4 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1262,6 +1262,7 @@ extern void ceph_add_cap(struct inode *inode,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap **new_cap);
extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+ struct ceph_inode_info *ci,
bool queue_release);
extern void __ceph_remove_caps(struct ceph_inode_info *ci);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
--
2.47.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL
2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
@ 2026-06-16 16:03 ` Max Kellermann
0 siblings, 0 replies; 4+ messages in thread
From: Max Kellermann @ 2026-06-16 16:03 UTC (permalink / raw)
To: idryomov, amarkuze, ceph-devel, linux-kernel
On Fri, Jun 12, 2026 at 7:05 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> @@ -1142,6 +1142,7 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>
> /* remove from inode's cap rbtree, and clear auth cap */
> rb_erase(&cap->ci_node, &ci->i_caps);
> + RB_CLEAR_NODE(&cap->ci_node);
> if (ci->i_auth_cap == cap)
> ci->i_auth_cap = NULL;
>
> @@ -1158,8 +1159,6 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
> cap->session = NULL;
> removed = 1;
> }
> - /* protect backpointer with s_cap_lock: see iterate_session_caps */
> - cap->ci = NULL;
Don't merge this patch; while writing follow-up optimizations, I found
out that this causes a data race because clearing cap->ci_node is no
longer protected by s_cap_lock.
Modifying this "cap removed" marker requires holding BOTH
ci->i_ceph_lock and session->s_cap_lock. The existing code comment
(which I ignored & removed, ugh!) is not sufficient.
(And setting the marker is really only necessary if
session->s_cap_iterator==cap.)
I will eventually post v2 without this bug. And with more
documentation on the locking semantics.
--
Max Kellermann
Principal Architect
Hosting Technology
cm4all | Im Mediapark 6a | 50670 Köln | Germany
General information about the company can be found here:
https://www.cm4all.com/impressum
A member of the IONOS Group
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-06-16 16:03 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-12 17:05 [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static Max Kellermann
2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
2026-06-16 16:03 ` Max Kellermann
2026-06-12 17:05 ` [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM Max Kellermann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox