From: Pavel Emelyanov <xemul@parallels.com>
To: Hugh Dickins <hughd@google.com>, Nick Piggin <npiggin@kernel.dk>,
Andrea Arcangeli <aarcange@redhat.com>,
Rik van Riel <riel@redhat.com>,
Dave Hansen <dave@linux.vnet.ibm.com>,
Alexa
Cc: linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: [PATCH 9/13] vfs: More than one mob management
Date: Tue, 03 May 2011 16:18:58 +0400 [thread overview]
Message-ID: <4DBFF2B2.9000508@parallels.com> (raw)
In-Reply-To: <4DBFF1AD.90303@parallels.com>
This management includes
* new dentry attached to some parent inherits the mob from one
* on rename from one mob to another, the subtree is reattached to
the new mob (see comment below)
* the mob root it marked with a flag and on its death the mob is
killed
Some more words about the rename. This rechage is slow, but is OK,
since the main usecase for mobs is per-countainer dcache management
and the move of some subdir from one container is another is actually
a rare operation which is not expected to be fast.
Moreover, in OpenVZ each container's root is a bind-mount, so if one
tries to do mv one_ct_root/x other_ct_root/ the rename check for the
vfsmnt equality will fail and the real copy will occur.
One bad thing of this approach is - when we mount some new filesystem
to non-init mob-ed dentry the new mount will be attached to the init
mob. Need to do the mob change when we attach an fs to a mountpoint.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
fs/dcache.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/dcache.h | 5 ++
2 files changed, 186 insertions(+), 1 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index bfe047d..51fb998 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -145,6 +145,8 @@ static void __d_free(struct rcu_head *head)
kmem_cache_free(dentry_cache, dentry);
}
+static void destroy_mob(struct dentry_mob *mob);
+
/*
* no locks, please.
*/
@@ -154,6 +156,8 @@ static void d_free(struct dentry *dentry)
percpu_counter_dec(&dentry->d_mob->nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
+ if (dentry->d_flags & DCACHE_MOB_ROOT)
+ destroy_mob(dentry->d_mob);
/* if dentry was never inserted into hash, immediate free is OK */
if (hlist_bl_unhashed(&dentry->d_hash))
@@ -1101,7 +1105,11 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
char *dname;
struct dentry_mob *dmob;
- dmob = &init_dentry_mob;
+ if (parent)
+ dmob = parent->d_mob;
+ else
+ dmob = &init_dentry_mob;
+
if (dcache_mem_check(dmob))
return NULL;
@@ -2039,6 +2047,23 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
spin_unlock(&target->d_parent->d_lock);
}
+static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob);
+static void switch_mobs(struct dentry *dentry, struct dentry *target)
+{
+ if (dentry->d_mob == target->d_mob)
+ return;
+
+ if (dentry->d_flags & DCACHE_MOB_ROOT)
+ return;
+
+ dcache_move_to_new_mob(dentry, target->d_mob);
+ if (target->d_flags & DCACHE_MOB_ROOT) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_MOB_ROOT;
+ spin_unlock(&dentry->d_lock);
+ }
+}
+
/*
* When switching names, the actual string doesn't strictly have to
* be preserved in the target - because we're dropping the target
@@ -2115,6 +2140,8 @@ void d_move(struct dentry * dentry, struct dentry * target)
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&dentry->d_sb->s_rename_lock);
+
+ switch_mobs(dentry, target);
}
EXPORT_SYMBOL(d_move);
@@ -2826,6 +2853,159 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
}
EXPORT_SYMBOL(find_inode_number);
+static struct dentry_mob *create_mob(struct dentry_mob *cur)
+{
+ struct dentry_mob *dmob;
+
+ dmob = kmalloc(sizeof(struct dentry_mob), GFP_KERNEL);
+ if (dmob == NULL)
+ return NULL;
+
+ if (percpu_counter_init(&dmob->nr_dentry, 0) < 0) {
+ kfree(dmob);
+ return NULL;
+ }
+
+ dmob->nr_dentry_max = cur->nr_dentry_max;
+ INIT_LIST_HEAD(&dmob->dentry_lru);
+
+ return dmob;
+}
+
+static void destroy_mob(struct dentry_mob *mob)
+{
+ if (percpu_counter_sum(&mob->nr_dentry) != 0)
+ BUG();
+ if (!list_empty(&mob->dentry_lru))
+ BUG();
+
+ percpu_counter_destroy(&mob->nr_dentry);
+ kfree(mob);
+}
+
+static void dentry_move_to_mob(struct dentry *de, struct dentry_mob *dmob)
+{
+ percpu_counter_dec(&de->d_mob->nr_dentry);
+ if (!list_empty(&de->d_lru)) {
+ spin_lock(&dcache_lru_lock);
+ list_del_init(&de->d_lru);
+ spin_unlock(&dcache_lru_lock);
+ }
+
+ de->d_mob = dmob;
+
+ percpu_counter_inc(&dmob->nr_dentry);
+ if (!de->d_count) {
+ spin_lock(&dcache_lru_lock);
+ list_add_tail(&de->d_lru, &dmob->dentry_lru);
+ spin_unlock(&dcache_lru_lock);
+ }
+}
+
+static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+ unsigned seq;
+ int locked = 0;
+
+ seq = read_seqbegin(&root->d_sb->s_rename_lock);
+again:
+ this_parent = root;
+ spin_lock(&this_parent->d_lock);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ /*
+ * Descend a level if the d_subdirs list is non-empty.
+ */
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+
+ dentry_move_to_mob(dentry, dmob);
+
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ if (this_parent != root) {
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
+
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ dentry_move_to_mob(child, dmob);
+ spin_unlock(&child->d_lock);
+
+ goto resume;
+ }
+
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ read_sequnlock(&root->d_sb->s_rename_lock);
+ return;
+
+rename_retry:
+ locked = 1;
+ read_seqlock(&root->d_sb->s_rename_lock);
+ goto again;
+}
+
+int dcache_new_mob(struct dentry *root)
+{
+ struct dentry_mob *dmob, *old = NULL;
+
+ if (root->d_flags & DCACHE_MOB_ROOT)
+ old = root->d_mob;
+
+ dmob = create_mob(root->d_mob);
+ if (dmob == NULL)
+ return -ENOMEM;
+
+ dcache_move_to_new_mob(root, dmob);
+
+ spin_lock(&root->d_lock);
+ root->d_flags |= DCACHE_MOB_ROOT;
+ root->d_mob = dmob;
+ percpu_counter_inc(&dmob->nr_dentry);
+ spin_unlock(&root->d_lock);
+
+ if (old != NULL)
+ destroy_mob(old);
+
+ return 0;
+}
+
static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
{
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 80bb9e4..3681307 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -54,6 +54,9 @@ struct dentry_mob {
struct list_head dentry_lru;
};
+struct dentry;
+int dcache_new_mob(struct dentry *root);
+
/*
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
@@ -227,6 +230,8 @@ struct dentry_operations {
#define DCACHE_MANAGED_DENTRY \
(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
+#define DCACHE_MOB_ROOT 0x80000
+
static inline int dname_external(struct dentry *dentry)
{
return dentry->d_name.name != dentry->d_iname;
--
1.5.5.6
next prev parent reply other threads:[~2011-05-03 12:18 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-03 12:14 [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 1/13] vfs: Lighten r/o rename_lock lockers Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 2/13] vfs: Factor out rename_lock locking Pavel Emelyanov
2011-05-03 12:16 ` [PATCH 3/13] vfs: Make the rename_lock per-sb Pavel Emelyanov
2011-05-03 12:16 ` [PATCH 4/13] vfs: Factor out tree (of four) shrinkers code Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 5/13] vfs: Make dentry LRU list global Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 6/13] vfs: Turn the nr_dentry into percpu_counter Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 7/13] vfs: Limit the number of dentries globally Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 8/13] vfs: Introduce the dentry mobs Pavel Emelyanov
2011-06-18 13:40 ` Andrea Arcangeli
2011-05-03 12:18 ` Pavel Emelyanov [this message]
2011-05-03 12:19 ` [PATCH 10/13] vfs: Routnes for setting mob size and getting stats Pavel Emelyanov
2011-05-03 12:19 ` [PATCH 11/13] vfs: Make shrink_dcache_memory prune dcache from all mobs Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 12/13] vfs: Mobs creation and mgmt API Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 13/13] vfs: Dentry mobs listing in proc Pavel Emelyanov
2011-05-06 1:05 ` [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Dave Chinner
2011-05-06 12:15 ` Pavel Emelyanov
2011-05-07 0:01 ` Dave Chinner
2011-05-10 11:18 ` Pavel Emelyanov
2011-06-18 13:30 ` Andrea Arcangeli
2011-06-20 0:49 ` Dave Chinner
2011-07-04 5:32 ` Pavel Emelyanov
2011-05-23 6:43 ` Pavel Emelyanov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DBFF2B2.9000508@parallels.com \
--to=xemul@parallels.com \
--cc=aarcange@redhat.com \
--cc=dave@linux.vnet.ibm.com \
--cc=hughd@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=npiggin@kernel.dk \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).