From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
linux-fsdevel <linux-fsdevel@vger.kernel.org>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Mikhail Efremov <sem@altlinux.org>
Subject: Re: [PATCH v2] vfs: Don't exchange "short" filenames unconditionally.
Date: Mon, 29 Sep 2014 17:27:14 +0100 [thread overview]
Message-ID: <20140929162714.GH7996@ZenIV.linux.org.uk> (raw)
In-Reply-To: <CA+55aFyveyOABqssxLinsmHkBg9=95TwEGC1ZT1POe+RmTbj+A@mail.gmail.com>
On Mon, Sep 29, 2014 at 09:07:00AM -0700, Linus Torvalds wrote:
> On Mon, Sep 29, 2014 at 8:59 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > Now RCU lookup starts. And on another CPU we move the first dentry over
> > the third one. copy_name() is called, it decrements the refcount down
> > to 1 (__d_free() hasn't happened yet) and doesn't schedule any freeing.
>
> Ahh. If we were to do *all* of the copy-name name freeing under RCU,
> we'd be ok. But we don't. We do the refcount decrement immediately
> (and have to, if we want to have the rcu/refcount union). Yeah, good
> call, although I find that doubvle rcu grace period for the common
> case to be very annoying.
See below (or in followup to what you were replying to); it *is* trivial to
avoid double grace periods there. Look: in free_dentry() we know if we want
the name freed. So let's have __d_free() for "just free dentry" and
__d_free_ext() for "free dentry and name". No looking at refcount in the
latter - just choose which one to call_rcu() when free_dentry() does decrement.
It's equivalent from the grace period POV to doing kfree_rcu(), but avoids
double call_rcu(); it just that in case we'd want kfree_rcu() we
schedule the call of a function that frees both.
What we get in free_dentry() is:
* external name not shared: refcount driven to 0, RCU-delayed
call of "free dentry, free ext name"
* external name still shared: refcount positive after decrement,
no freeing ext name
* no external name: no ext name to free
In the last two cases we do what dentry_free() used to do, except that now
__d_free() doesn't even look for ext name. Just frees the dentry. If
it never had been hashed - directly called, otherwise - via call_rcu().
Does that look OK for you?
diff --git a/fs/dcache.c b/fs/dcache.c
index cb25a1a..0cf5aff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -235,20 +235,44 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
return dentry_string_cmp(cs, ct, tcount);
}
+struct ext_name {
+ union {
+ atomic_t count;
+ struct rcu_head head;
+ };
+ unsigned char name[];
+};
+
+static inline struct ext_name *ext_name(struct dentry *dentry)
+{
+ if (likely(!dname_external(dentry)))
+ return NULL;
+ return container_of(dentry->d_name.name, struct ext_name, name[0]);
+}
+
static void __d_free(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
WARN_ON(!hlist_unhashed(&dentry->d_alias));
- if (dname_external(dentry))
- kfree(dentry->d_name.name);
+ kmem_cache_free(dentry_cache, dentry);
+}
+
+static void __d_free_ext(struct rcu_head *head)
+{
+ struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+ WARN_ON(!hlist_unhashed(&dentry->d_alias));
+ kfree(ext_name(dentry));
kmem_cache_free(dentry_cache, dentry);
}
static void dentry_free(struct dentry *dentry)
{
+ struct ext_name *p = ext_name(dentry);
+ if (unlikely(p) && likely(atomic_dec_and_test(&p->count)))
+ call_rcu(&dentry->d_u.d_rcu, __d_free_ext);
/* if dentry was never visible to RCU, immediate free is OK */
- if (!(dentry->d_flags & DCACHE_RCUACCESS))
+ else if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -1438,11 +1462,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
*/
dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
if (name->len > DNAME_INLINE_LEN-1) {
- dname = kmalloc(name->len + 1, GFP_KERNEL);
- if (!dname) {
+ size_t size = offsetof(struct ext_name, name) + name->len + 1;
+ struct ext_name *p = kmalloc(size, GFP_KERNEL);
+ if (!p) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
+ atomic_set(&p->count, 1);
+ dname = p->name;
} else {
dname = dentry->d_iname;
}
@@ -2372,11 +2399,10 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
}
EXPORT_SYMBOL(dentry_update_name_case);
-static void switch_names(struct dentry *dentry, struct dentry *target,
- bool exchange)
+static void swap_names(struct dentry *dentry, struct dentry *target)
{
- if (dname_external(target)) {
- if (dname_external(dentry)) {
+ if (unlikely(dname_external(target))) {
+ if (unlikely(dname_external(dentry))) {
/*
* Both external: swap the pointers
*/
@@ -2392,7 +2418,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target,
target->d_name.name = target->d_iname;
}
} else {
- if (dname_external(dentry)) {
+ if (unlikely(dname_external(dentry))) {
/*
* dentry:external, target:internal. Give dentry's
* storage to target and make dentry internal
@@ -2407,12 +2433,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target,
*/
unsigned int i;
BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
- if (!exchange) {
- memcpy(dentry->d_iname, target->d_name.name,
- target->d_name.len + 1);
- dentry->d_name.hash_len = target->d_name.hash_len;
- return;
- }
for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
swap(((long *) &dentry->d_iname)[i],
((long *) &target->d_iname)[i]);
@@ -2422,6 +2442,22 @@ static void switch_names(struct dentry *dentry, struct dentry *target,
swap(dentry->d_name.hash_len, target->d_name.hash_len);
}
+static void copy_name(struct dentry *dentry, struct dentry *target)
+{
+ struct ext_name *old_name = ext_name(dentry);
+ if (unlikely(dname_external(target))) {
+ atomic_inc(&ext_name(target)->count);
+ dentry->d_name = target->d_name;
+ } else {
+ memcpy(dentry->d_iname, target->d_name.name,
+ target->d_name.len + 1);
+ dentry->d_name.name = dentry->d_iname;
+ dentry->d_name.hash_len = target->d_name.hash_len;
+ }
+ if (unlikely(old_name) && likely(atomic_dec_and_test(&old_name->count)))
+ kfree_rcu(old_name, head);
+}
+
static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
{
/*
@@ -2518,7 +2554,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
}
/* Switch the names.. */
- switch_names(dentry, target, exchange);
+ if (exchange)
+ swap_names(dentry, target);
+ else
+ copy_name(dentry, target);
/* ... and switch them in the tree */
if (IS_ROOT(dentry)) {
next prev parent reply other threads:[~2014-09-29 16:27 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-24 18:14 [PATCH v2] vfs: Don't exchange "short" filenames unconditionally Mikhail Efremov
2014-09-24 18:55 ` Al Viro
2014-09-24 19:20 ` Linus Torvalds
2014-09-24 19:27 ` Linus Torvalds
2014-09-24 20:18 ` Al Viro
2014-09-25 4:46 ` Al Viro
2014-09-26 16:44 ` Al Viro
2014-09-27 4:45 ` Al Viro
2014-09-27 17:56 ` Linus Torvalds
2014-09-27 18:31 ` Al Viro
2014-09-27 19:16 ` Al Viro
2014-09-27 19:37 ` Linus Torvalds
2014-09-27 19:39 ` Linus Torvalds
2014-09-27 19:49 ` Al Viro
2014-09-27 19:55 ` Linus Torvalds
2014-09-27 21:48 ` [git pull] vfs.git for 3.17-rc7 Al Viro
2014-09-27 19:45 ` [PATCH v2] vfs: Don't exchange "short" filenames unconditionally Al Viro
2014-09-28 7:47 ` Al Viro
2014-09-28 18:05 ` Al Viro
2014-09-28 21:51 ` Al Viro
2014-09-29 1:06 ` [PATCH] missing data dependency barrier in prepend_name() Al Viro
2014-09-29 15:15 ` [PATCH v2] vfs: Don't exchange "short" filenames unconditionally Linus Torvalds
2014-09-29 15:59 ` Al Viro
2014-09-29 16:07 ` Linus Torvalds
2014-09-29 16:27 ` Al Viro [this message]
2014-09-29 17:54 ` Linus Torvalds
2014-09-29 19:04 ` Al Viro
2014-09-29 20:45 ` Linus Torvalds
2014-09-29 18:42 ` Paul E. McKenney
2014-10-01 0:16 ` Al Viro
2014-10-02 5:38 ` Paul E. McKenney
2014-10-02 10:35 ` Chuck Ebbert
2014-10-03 2:11 ` Paul E. McKenney
2014-09-29 13:16 ` Paul E. McKenney
2014-09-29 15:04 ` Al Viro
2014-09-28 15:01 ` Mikhail Efremov
2014-09-26 20:23 ` Al Viro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140929162714.GH7996@ZenIV.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=sem@altlinux.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.