public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH][RFC] make take_dentry_name_snapshot() lockless
Date: Mon, 9 Dec 2024 22:28:54 +0000	[thread overview]
Message-ID: <20241209222854.GB3387508@ZenIV> (raw)
In-Reply-To: <20241209211708.GA3387508@ZenIV>

On Mon, Dec 09, 2024 at 09:17:08PM +0000, Al Viro wrote:

> And yes, they are aligned - d_iname follows a pointer, inline_name follows
> struct qstr, i.e. u64 + pointer.  How about we add struct inlined_name {
> unsigned char name[DNAME_INLINE_LEN];}; and turn d_iname and inline_name
> into anon unions with that?  Hell, might even make it an array of unsigned
> long and use that to deal with this
>                 } else {
>                         /*
>                          * Both are internal.
>                          */
>                         unsigned int i;
>                         BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
>                         for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
>                                 swap(((long *) &dentry->d_iname)[i],
>                                      ((long *) &target->d_iname)[i]);
>                         }
>                 }
> in swap_names().  With struct assignment in the corresponding case in
> copy_name() and in take_dentry_name_snapshot() - that does generate sane
> code...

Do you have any objections to the diff below?  Completely untested and needs
to be split in two...  It does seem to generate decent code; it obviously
will need to be profiled - copy_name() in the common (short-to-short) case
copies the entire thing, all 5 words of it on 64bit, instead of memcpy()
of just the amount that needs to be copied.  That thing may cross the
cacheline boundary, but both cachelines had been freshly brought into
cache and dirtied, so...

diff --git a/fs/dcache.c b/fs/dcache.c
index b4d5e9e1e43d..687558622acf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,10 +296,8 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
 }
 
 struct external_name {
-	union {
-		atomic_t count;
-		struct rcu_head head;
-	} u;
+	atomic_t count;		// ->count and ->head can't be combined
+	struct rcu_head head;	// see take_dentry_name_snapshot()
 	unsigned char name[];
 };
 
@@ -329,16 +327,33 @@ static inline int dname_external(const struct dentry *dentry)
 
 void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
 {
-	spin_lock(&dentry->d_lock);
-	name->name = dentry->d_name;
-	if (unlikely(dname_external(dentry))) {
-		atomic_inc(&external_name(dentry)->u.count);
-	} else {
-		memcpy(name->inline_name, dentry->d_iname,
-		       dentry->d_name.len + 1);
+	unsigned seq;
+	const unsigned char *s;
+
+	rcu_read_lock();
+retry:
+	seq = read_seqcount_begin(&dentry->d_seq);
+	s = READ_ONCE(dentry->d_name.name);
+	name->name.hash_len = dentry->d_name.hash_len;
+	if (likely(s == dentry->d_iname)) {
 		name->name.name = name->inline_name;
+		name->__inline_name = dentry->__d_iname;
+		if (read_seqcount_retry(&dentry->d_seq, seq))
+			goto retry;
+	} else {
+		struct external_name *p;
+		p = container_of(s, struct external_name, name[0]);
+		name->name.name = s;
+		// get a valid reference
+		if (unlikely(!atomic_inc_not_zero(&p->count)))
+			goto retry;
+		if (read_seqcount_retry(&dentry->d_seq, seq)) {
+			if (unlikely(atomic_dec_and_test(&p->count)))
+				kfree_rcu(p, head);
+			goto retry;
+		}
 	}
-	spin_unlock(&dentry->d_lock);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(take_dentry_name_snapshot);
 
@@ -347,8 +362,8 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
 	if (unlikely(name->name.name != name->inline_name)) {
 		struct external_name *p;
 		p = container_of(name->name.name, struct external_name, name[0]);
-		if (unlikely(atomic_dec_and_test(&p->u.count)))
-			kfree_rcu(p, u.head);
+		if (unlikely(atomic_dec_and_test(&p->count)))
+			kfree_rcu(p, head);
 	}
 }
 EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -386,7 +401,7 @@ static void dentry_free(struct dentry *dentry)
 	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
 	if (unlikely(dname_external(dentry))) {
 		struct external_name *p = external_name(dentry);
-		if (likely(atomic_dec_and_test(&p->u.count))) {
+		if (likely(atomic_dec_and_test(&p->count))) {
 			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
 			return;
 		}
@@ -1667,7 +1682,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 			kmem_cache_free(dentry_cache, dentry); 
 			return NULL;
 		}
-		atomic_set(&p->u.count, 1);
+		atomic_set(&p->count, 1);
 		dname = p->name;
 	} else  {
 		dname = dentry->d_iname;
@@ -2749,11 +2764,9 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
 			 * Both are internal.
 			 */
 			unsigned int i;
-			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
-			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
-				swap(((long *) &dentry->d_iname)[i],
-				     ((long *) &target->d_iname)[i]);
-			}
+			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++)
+				swap(dentry->__d_iname.name[i],
+				     target->__d_iname.name[i]);
 		}
 	}
 	swap(dentry->d_name.hash_len, target->d_name.hash_len);
@@ -2765,16 +2778,15 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
 	if (unlikely(dname_external(dentry)))
 		old_name = external_name(dentry);
 	if (unlikely(dname_external(target))) {
-		atomic_inc(&external_name(target)->u.count);
+		atomic_inc(&external_name(target)->count);
 		dentry->d_name = target->d_name;
 	} else {
-		memcpy(dentry->d_iname, target->d_name.name,
-				target->d_name.len + 1);
+		dentry->__d_iname = target->__d_iname;
 		dentry->d_name.name = dentry->d_iname;
 		dentry->d_name.hash_len = target->d_name.hash_len;
 	}
-	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-		kfree_rcu(old_name, u.head);
+	if (old_name && likely(atomic_dec_and_test(&old_name->count)))
+		kfree_rcu(old_name, head);
 }
 
 /*
@@ -3189,6 +3201,7 @@ static void __init dcache_init_early(void)
 
 static void __init dcache_init(void)
 {
+	BUILD_BUG_ON(DNAME_INLINE_LEN != sizeof(struct short_name_store));
 	/*
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1f28f56fd406..d604a4826765 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -77,6 +77,10 @@ extern const struct qstr dotdot_name;
 # endif
 #endif
 
+struct short_name_store {
+	unsigned long name[DNAME_INLINE_LEN / sizeof(unsigned long)];
+};
+
 #define d_lock	d_lockref.lock
 
 struct dentry {
@@ -88,7 +92,10 @@ struct dentry {
 	struct qstr d_name;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
-	unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+	union {
+		unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+		struct short_name_store __d_iname;
+	};
 	/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
 
 	/* Ref lookup also touches following */
@@ -590,7 +597,10 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 
 struct name_snapshot {
 	struct qstr name;
-	unsigned char inline_name[DNAME_INLINE_LEN];
+	union {
+		unsigned char inline_name[DNAME_INLINE_LEN];
+		struct short_name_store __inline_name;
+	};
 };
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);

  reply	other threads:[~2024-12-09 22:28 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-09  3:52 [PATCH][RFC] make take_dentry_name_snapshot() lockless Al Viro
2024-12-09  6:33 ` Mateusz Guzik
2024-12-09  6:58   ` Al Viro
2024-12-09  7:18     ` Mateusz Guzik
2024-12-09  7:41       ` Al Viro
2024-12-09 18:27 ` Linus Torvalds
2024-12-09 21:17   ` Al Viro
2024-12-09 22:28     ` Al Viro [this message]
2024-12-09 22:49       ` Linus Torvalds
2024-12-09 22:55         ` Linus Torvalds
2024-12-09 23:12           ` Al Viro
2024-12-10  2:45             ` Al Viro
2024-12-10  2:48               ` [PATCH 1/5] make sure that DCACHE_INLINE_LEN is a multiple of word size Al Viro
2024-12-10  2:48                 ` [PATCH 2/5] dcache: back inline names with a struct-wrapped array of unsigned long Al Viro
2024-12-10  2:48                 ` [PATCH 3/5] make take_dentry_name_snapshot() lockless Al Viro
2024-12-10  2:48                 ` [PATCH 4/5] dissolve external_name.u into separate members Al Viro
2024-12-10  2:48                 ` [PATCH 5/5] ext4 fast_commit: make use of name_snapshot primitives Al Viro
2024-12-23  4:25               ` [PATCH][RFC] make take_dentry_name_snapshot() lockless Al Viro
2024-12-23  4:37                 ` Al Viro
2024-12-23 21:31                 ` Jens Axboe
2024-12-24 19:18                   ` Al Viro
2024-12-24 19:44                     ` Linus Torvalds
2024-12-24 20:24                       ` Al Viro
2024-12-09 19:06 ` Linus Torvalds
2024-12-09 20:27   ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241209222854.GB3387508@ZenIV \
    --to=viro@zeniv.linux.org.uk \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox