The Linux Kernel Mailing List
 help / color / mirror / Atom feed
* [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static
@ 2026-06-12 17:05 Max Kellermann
  2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
  2026-06-12 17:05 ` [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM Max Kellermann
  0 siblings, 2 replies; 4+ messages in thread
From: Max Kellermann @ 2026-06-12 17:05 UTC (permalink / raw)
  To: idryomov, amarkuze, ceph-devel, linux-kernel; +Cc: Max Kellermann

It's only used from within caps.c.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/caps.c  | 2 +-
 fs/ceph/super.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d51454e995a8..efa6a15c470b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1119,7 +1119,7 @@ int ceph_is_any_caps(struct inode *inode)
  * caller should hold i_ceph_lock.
  * caller will not hold session s_mutex if called from destroy_inode.
  */
-void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 {
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_client *cl = session->s_mdsc->fsc->client;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index afc89ce91804..76b946116613 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1261,7 +1261,6 @@ extern void ceph_add_cap(struct inode *inode,
 			 unsigned issued, unsigned wanted,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
-extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
 extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 			    bool queue_release);
 extern void __ceph_remove_caps(struct ceph_inode_info *ci);
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL
  2026-06-12 17:05 [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static Max Kellermann
@ 2026-06-12 17:05 ` Max Kellermann
  2026-06-16 16:03   ` Max Kellermann
  2026-06-12 17:05 ` [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM Max Kellermann
  1 sibling, 1 reply; 4+ messages in thread
From: Max Kellermann @ 2026-06-12 17:05 UTC (permalink / raw)
  To: idryomov, amarkuze, ceph-devel, linux-kernel; +Cc: Max Kellermann

__ceph_remove_cap() erases the ceph_cap object from the RB tree, thus
it seems natural to use RB_CLEAR_NODE() / RB_EMPTY_NODE() for the
removal check.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/caps.c       | 11 +++++------
 fs/ceph/mds_client.c |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index efa6a15c470b..9e7c76a66624 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1128,8 +1128,8 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 	struct ceph_mds_client *mdsc;
 	int removed = 0;
 
-	/* 'ci' being NULL means the remove have already occurred */
-	if (!ci) {
+	if (RB_EMPTY_NODE(&cap->ci_node)) {
+		/* this means the remove has already occurred */
 		doutc(cl, "inode is NULL\n");
 		return;
 	}
@@ -1142,6 +1142,7 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
 	/* remove from inode's cap rbtree, and clear auth cap */
 	rb_erase(&cap->ci_node, &ci->i_caps);
+	RB_CLEAR_NODE(&cap->ci_node);
 	if (ci->i_auth_cap == cap)
 		ci->i_auth_cap = NULL;
 
@@ -1158,8 +1159,6 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 		cap->session = NULL;
 		removed = 1;
 	}
-	/* protect backpointer with s_cap_lock: see iterate_session_caps */
-	cap->ci = NULL;
 
 	/*
 	 * s_cap_reconnect is protected by s_cap_lock. no one changes
@@ -1201,8 +1200,8 @@ void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_fs_client *fsc;
 
-	/* 'ci' being NULL means the remove have already occurred */
-	if (!ci) {
+	if (RB_EMPTY_NODE(&cap->ci_node)) {
+		/* this means the remove has already occurred */
 		doutc(mdsc->fsc->client, "inode is NULL\n");
 		return;
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ed17e0023705..f092960953a8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1900,7 +1900,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
 
 		spin_lock(&session->s_cap_lock);
 		p = p->next;
-		if (!cap->ci) {
+		if (RB_EMPTY_NODE(&cap->ci_node)) {
 			doutc(cl, "finishing cap %p removal\n", cap);
 			BUG_ON(cap->session != session);
 			cap->session = NULL;
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM
  2026-06-12 17:05 [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static Max Kellermann
  2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
@ 2026-06-12 17:05 ` Max Kellermann
  1 sibling, 0 replies; 4+ messages in thread
From: Max Kellermann @ 2026-06-12 17:05 UTC (permalink / raw)
  To: idryomov, amarkuze, ceph-devel, linux-kernel; +Cc: Max Kellermann

All these functions already have a ceph_inode_info pointer, so let's
use that instead of letting every function reload it from RAM
(i.e. `ceph_cap.ci`).  This eliminates several memory accesses.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/caps.c       | 56 +++++++++++++++++++++-----------------------
 fs/ceph/mds_client.c |  2 +-
 fs/ceph/super.h      |  1 +
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9e7c76a66624..ef167118efdf 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -785,9 +785,9 @@ void ceph_add_cap(struct inode *inode,
  * generation of the MDS session (i.e. has not gone 'stale' due to
  * us losing touch with the mds).
  */
-static int __cap_is_valid(struct ceph_cap *cap)
+static int __cap_is_valid(struct ceph_inode_info *ci, struct ceph_cap *cap)
 {
-	struct inode *inode = &cap->ci->netfs.inode;
+	struct inode *inode = &ci->netfs.inode;
 	struct ceph_client *cl = cap->session->s_mdsc->fsc->client;
 	unsigned long ttl;
 	u32 gen;
@@ -822,7 +822,7 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
 		*implemented = 0;
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (!__cap_is_valid(cap))
+		if (!__cap_is_valid(ci, cap))
 			continue;
 		doutc(cl, "%p %llx.%llx cap %p issued %s\n", inode,
 		      ceph_vinop(inode), cap, ceph_cap_string(cap->issued));
@@ -855,7 +855,7 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
 		cap = rb_entry(p, struct ceph_cap, ci_node);
 		if (cap == ocap)
 			continue;
-		if (!__cap_is_valid(cap))
+		if (!__cap_is_valid(ci, cap))
 			continue;
 		have |= cap->issued;
 	}
@@ -866,9 +866,9 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
  * Move a cap to the end of the LRU (oldest caps at list head, newest
  * at list tail).
  */
-static void __touch_cap(struct ceph_cap *cap)
+static void __touch_cap(struct ceph_inode_info *ci, struct ceph_cap *cap)
 {
-	struct inode *inode = &cap->ci->netfs.inode;
+	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_session *s = cap->session;
 	struct ceph_client *cl = s->s_mdsc->fsc->client;
 
@@ -906,7 +906,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (!__cap_is_valid(cap))
+		if (!__cap_is_valid(ci, cap))
 			continue;
 		if ((cap->issued & mask) == mask) {
 			doutc(cl, "mask %p %llx.%llx cap %p issued %s (mask %s)\n",
@@ -914,7 +914,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 			      ceph_cap_string(cap->issued),
 			      ceph_cap_string(mask));
 			if (touch)
-				__touch_cap(cap);
+				__touch_cap(ci, cap);
 			return 1;
 		}
 
@@ -929,15 +929,15 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 				struct rb_node *q;
 
 				/* touch this + preceding caps */
-				__touch_cap(cap);
+				__touch_cap(ci, cap);
 				for (q = rb_first(&ci->i_caps); q != p;
 				     q = rb_next(q)) {
 					cap = rb_entry(q, struct ceph_cap,
 						       ci_node);
-					if (!__cap_is_valid(cap))
+					if (!__cap_is_valid(ci, cap))
 						continue;
 					if (cap->issued & mask)
-						__touch_cap(cap);
+						__touch_cap(ci, cap);
 				}
 			}
 			return 1;
@@ -1091,7 +1091,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (check && !__cap_is_valid(cap))
+		if (check && !__cap_is_valid(ci, cap))
 			continue;
 		if (cap == ci->i_auth_cap)
 			mds_wanted |= cap->mds_wanted;
@@ -1119,11 +1119,10 @@ int ceph_is_any_caps(struct inode *inode)
  * caller should hold i_ceph_lock.
  * caller will not hold session s_mutex if called from destroy_inode.
  */
-static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+static void __ceph_remove_cap(struct ceph_inode_info *ci, struct ceph_cap *cap, bool queue_release)
 {
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_client *cl = session->s_mdsc->fsc->client;
-	struct ceph_inode_info *ci = cap->ci;
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc;
 	int removed = 0;
@@ -1195,9 +1194,9 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 }
 
 void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+		     struct ceph_inode_info *ci,
 		     bool queue_release)
 {
-	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_fs_client *fsc;
 
 	if (RB_EMPTY_NODE(&cap->ci_node)) {
@@ -1214,7 +1213,7 @@ void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 		     !fsc->blocklisted &&
 		     !ceph_inode_is_shutdown(&ci->netfs.inode));
 
-	__ceph_remove_cap(cap, queue_release);
+	__ceph_remove_cap(ci, cap, queue_release);
 }
 
 struct cap_msg_args {
@@ -1374,7 +1373,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
 		p = rb_next(p);
-		ceph_remove_cap(mdsc, cap, true);
+		ceph_remove_cap(mdsc, cap, ci, true);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 }
@@ -1387,11 +1386,10 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
  * Make note of max_size reported/requested from mds, revoked caps
  * that have now been implemented.
  */
-static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
+static void __prep_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci, struct ceph_cap *cap,
 		       int op, int flags, int used, int want, int retain,
 		       int flushing, u64 flush_tid, u64 oldest_flush_tid)
 {
-	struct ceph_inode_info *ci = cap->ci;
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int held, revoking;
@@ -2206,7 +2204,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 		if (want & ~cap->mds_wanted) {
 			if (want & ~(cap->mds_wanted | cap->issued))
 				goto ack;
-			if (!__cap_is_valid(cap))
+			if (!__cap_is_valid(ci, cap))
 				goto ack;
 		}
 
@@ -2248,7 +2246,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 
 		mds = cap->mds;  /* remember mds, so we don't repeat */
 
-		__prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used,
+		__prep_cap(&arg, ci, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used,
 			   want, retain, flushing, flush_tid, oldest_flush_tid);
 
 		spin_unlock(&ci->i_ceph_lock);
@@ -2310,7 +2308,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 		flush_tid = __mark_caps_flushing(inode, session, true,
 						 &oldest_flush_tid);
 
-		__prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
+		__prep_cap(&arg, ci, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
 			   __ceph_caps_used(ci), __ceph_caps_wanted(ci),
 			   (cap->issued | cap->implemented),
 			   flushing, flush_tid, oldest_flush_tid);
@@ -2604,7 +2602,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			doutc(cl, "%p %llx.%llx cap %p tid %llu %s\n",
 			      inode, ceph_vinop(inode), cap, cf->tid,
 			      ceph_cap_string(cf->caps));
-			__prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH,
+			__prep_cap(&arg, ci, cap, CEPH_CAP_OP_FLUSH,
 					 (cf->tid < last_snap_flush ?
 					  CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
 					  __ceph_caps_used(ci),
@@ -4095,7 +4093,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		goto out_unlock;
 
 	if (target < 0) {
-		ceph_remove_cap(mdsc, cap, false);
+		ceph_remove_cap(mdsc, cap, ci, false);
 		goto out_unlock;
 	}
 
@@ -4132,7 +4130,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 				change_auth_cap_ses(ci, tcap->session);
 			}
 		}
-		ceph_remove_cap(mdsc, cap, false);
+		ceph_remove_cap(mdsc, cap, ci, false);
 		goto out_unlock;
 	} else if (tsession) {
 		/* add placeholder for the export target */
@@ -4149,7 +4147,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			spin_unlock(&mdsc->cap_dirty_lock);
 		}
 
-		ceph_remove_cap(mdsc, cap, false);
+		ceph_remove_cap(mdsc, cap, ci, false);
 		goto out_unlock;
 	}
 
@@ -4265,7 +4263,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 					inode, ceph_vinop(inode), peer,
 					ocap->seq, ocap->mseq, mds, piseq, pmseq);
 		}
-		ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		ceph_remove_cap(mdsc, ocap, ci, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
 
 	*old_issued = issued;
@@ -4862,7 +4860,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 	drop &= ~(used | dirty);
 
 	cap = __get_cap_for_mds(ci, mds);
-	if (cap && __cap_is_valid(cap)) {
+	if (cap && __cap_is_valid(ci, cap)) {
 		unless &= cap->issued;
 		if (unless) {
 			if (unless & CEPH_CAP_AUTH_EXCL)
@@ -5021,7 +5019,7 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali
 	      cap, ci, inode, ceph_vinop(inode));
 
 	is_auth = (cap == ci->i_auth_cap);
-	__ceph_remove_cap(cap, false);
+	__ceph_remove_cap(ci, cap, false);
 	if (is_auth) {
 		struct ceph_cap_flush *cf;
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f092960953a8..5b17d7412eef 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2256,7 +2256,7 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg)
 
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
-		ceph_remove_cap(mdsc, cap, true);
+		ceph_remove_cap(mdsc, cap, ci, true);
 		(*remaining)--;
 	} else {
 		struct dentry *dentry;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 76b946116613..3b07b071dba4 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1262,6 +1262,7 @@ extern void ceph_add_cap(struct inode *inode,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
 extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+			    struct ceph_inode_info *ci,
 			    bool queue_release);
 extern void __ceph_remove_caps(struct ceph_inode_info *ci);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL
  2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
@ 2026-06-16 16:03   ` Max Kellermann
  0 siblings, 0 replies; 4+ messages in thread
From: Max Kellermann @ 2026-06-16 16:03 UTC (permalink / raw)
  To: idryomov, amarkuze, ceph-devel, linux-kernel

On Fri, Jun 12, 2026 at 7:05 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> @@ -1142,6 +1142,7 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>
>         /* remove from inode's cap rbtree, and clear auth cap */
>         rb_erase(&cap->ci_node, &ci->i_caps);
> +       RB_CLEAR_NODE(&cap->ci_node);
>         if (ci->i_auth_cap == cap)
>                 ci->i_auth_cap = NULL;
>
> @@ -1158,8 +1159,6 @@ static void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>                 cap->session = NULL;
>                 removed = 1;
>         }
> -       /* protect backpointer with s_cap_lock: see iterate_session_caps */
> -       cap->ci = NULL;

Don't merge this patch; while writing follow-up optimizations, I found
out that this causes a data race because clearing cap->ci_node is no
longer protected by s_cap_lock.

Modifying this "cap removed" marker requires holding BOTH
ci->i_ceph_lock and session->s_cap_lock. The existing code comment
(which I ignored & removed, ugh!) is not sufficient.
(And setting the marker is really only necessary if
session->s_cap_iterator==cap.)

I will eventually post v2 without this bug. And with more
documentation on the locking semantics.

-- 
Max Kellermann
Principal Architect
Hosting Technology

cm4all | Im Mediapark 6a | 50670 Köln | Germany
General information about the company can be found here:
https://www.cm4all.com/impressum
A member of the IONOS Group

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-16 16:03 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-12 17:05 [PATCH 1/3] fs/ceph/caps: make __ceph_remove_cap() static Max Kellermann
2026-06-12 17:05 ` [PATCH 2/3] fs/ceph/caps: mark cap remove with RB_CLEAR_NODE() instead of ci=NULL Max Kellermann
2026-06-16 16:03   ` Max Kellermann
2026-06-12 17:05 ` [PATCH 3/3] fs/ceph/cap: pass inode pointer around instead of reloading from RAM Max Kellermann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox