From: Pavel Emelyanov <xemul@parallels.com>
To: Hugh Dickins <hughd@google.com>, Nick Piggin <npiggin@kernel.dk>,
Andrea Arcangeli <aarcange@redhat.com>,
Rik van Riel <riel@redhat.com>,
Dave Hansen <dave@linux.vnet.ibm.com>,
Alexa
Cc: linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: [PATCH 9/13] vfs: More than one mob management
Date: Tue, 03 May 2011 16:18:58 +0400 [thread overview]
Message-ID: <4DBFF2B2.9000508@parallels.com> (raw)
In-Reply-To: <4DBFF1AD.90303@parallels.com>
This management includes
* new dentry attached to some parent inherits the mob from one
* on rename from one mob to another, the subtree is reattached to
the new mob (see comment below)
* the mob root it marked with a flag and on its death the mob is
killed
Some more words about the rename. This rechage is slow, but is OK,
since the main usecase for mobs is per-countainer dcache management
and the move of some subdir from one container is another is actually
a rare operation which is not expected to be fast.
Moreover, in OpenVZ each container's root is a bind-mount, so if one
tries to do mv one_ct_root/x other_ct_root/ the rename check for the
vfsmnt equality will fail and the real copy will occur.
One bad thing of this approach is - when we mount some new filesystem
to non-init mob-ed dentry the new mount will be attached to the init
mob. Need to do the mob change when we attach an fs to a mountpoint.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
fs/dcache.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/dcache.h | 5 ++
2 files changed, 186 insertions(+), 1 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index bfe047d..51fb998 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -145,6 +145,8 @@ static void __d_free(struct rcu_head *head)
kmem_cache_free(dentry_cache, dentry);
}
+static void destroy_mob(struct dentry_mob *mob);
+
/*
* no locks, please.
*/
@@ -154,6 +156,8 @@ static void d_free(struct dentry *dentry)
percpu_counter_dec(&dentry->d_mob->nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
+ if (dentry->d_flags & DCACHE_MOB_ROOT)
+ destroy_mob(dentry->d_mob);
/* if dentry was never inserted into hash, immediate free is OK */
if (hlist_bl_unhashed(&dentry->d_hash))
@@ -1101,7 +1105,11 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
char *dname;
struct dentry_mob *dmob;
- dmob = &init_dentry_mob;
+ if (parent)
+ dmob = parent->d_mob;
+ else
+ dmob = &init_dentry_mob;
+
if (dcache_mem_check(dmob))
return NULL;
@@ -2039,6 +2047,23 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
spin_unlock(&target->d_parent->d_lock);
}
+static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob);
+static void switch_mobs(struct dentry *dentry, struct dentry *target)
+{
+ if (dentry->d_mob == target->d_mob)
+ return;
+
+ if (dentry->d_flags & DCACHE_MOB_ROOT)
+ return;
+
+ dcache_move_to_new_mob(dentry, target->d_mob);
+ if (target->d_flags & DCACHE_MOB_ROOT) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_MOB_ROOT;
+ spin_unlock(&dentry->d_lock);
+ }
+}
+
/*
* When switching names, the actual string doesn't strictly have to
* be preserved in the target - because we're dropping the target
@@ -2115,6 +2140,8 @@ void d_move(struct dentry * dentry, struct dentry * target)
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&dentry->d_sb->s_rename_lock);
+
+ switch_mobs(dentry, target);
}
EXPORT_SYMBOL(d_move);
@@ -2826,6 +2853,159 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
}
EXPORT_SYMBOL(find_inode_number);
+static struct dentry_mob *create_mob(struct dentry_mob *cur)
+{
+ struct dentry_mob *dmob;
+
+ dmob = kmalloc(sizeof(struct dentry_mob), GFP_KERNEL);
+ if (dmob == NULL)
+ return NULL;
+
+ if (percpu_counter_init(&dmob->nr_dentry, 0) < 0) {
+ kfree(dmob);
+ return NULL;
+ }
+
+ dmob->nr_dentry_max = cur->nr_dentry_max;
+ INIT_LIST_HEAD(&dmob->dentry_lru);
+
+ return dmob;
+}
+
+static void destroy_mob(struct dentry_mob *mob)
+{
+ if (percpu_counter_sum(&mob->nr_dentry) != 0)
+ BUG();
+ if (!list_empty(&mob->dentry_lru))
+ BUG();
+
+ percpu_counter_destroy(&mob->nr_dentry);
+ kfree(mob);
+}
+
+static void dentry_move_to_mob(struct dentry *de, struct dentry_mob *dmob)
+{
+ percpu_counter_dec(&de->d_mob->nr_dentry);
+ if (!list_empty(&de->d_lru)) {
+ spin_lock(&dcache_lru_lock);
+ list_del_init(&de->d_lru);
+ spin_unlock(&dcache_lru_lock);
+ }
+
+ de->d_mob = dmob;
+
+ percpu_counter_inc(&dmob->nr_dentry);
+ if (!de->d_count) {
+ spin_lock(&dcache_lru_lock);
+ list_add_tail(&de->d_lru, &dmob->dentry_lru);
+ spin_unlock(&dcache_lru_lock);
+ }
+}
+
+static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+ unsigned seq;
+ int locked = 0;
+
+ seq = read_seqbegin(&root->d_sb->s_rename_lock);
+again:
+ this_parent = root;
+ spin_lock(&this_parent->d_lock);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ /*
+ * Descend a level if the d_subdirs list is non-empty.
+ */
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+
+ dentry_move_to_mob(dentry, dmob);
+
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ if (this_parent != root) {
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
+
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ dentry_move_to_mob(child, dmob);
+ spin_unlock(&child->d_lock);
+
+ goto resume;
+ }
+
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ read_sequnlock(&root->d_sb->s_rename_lock);
+ return;
+
+rename_retry:
+ locked = 1;
+ read_seqlock(&root->d_sb->s_rename_lock);
+ goto again;
+}
+
+int dcache_new_mob(struct dentry *root)
+{
+ struct dentry_mob *dmob, *old = NULL;
+
+ if (root->d_flags & DCACHE_MOB_ROOT)
+ old = root->d_mob;
+
+ dmob = create_mob(root->d_mob);
+ if (dmob == NULL)
+ return -ENOMEM;
+
+ dcache_move_to_new_mob(root, dmob);
+
+ spin_lock(&root->d_lock);
+ root->d_flags |= DCACHE_MOB_ROOT;
+ root->d_mob = dmob;
+ percpu_counter_inc(&dmob->nr_dentry);
+ spin_unlock(&root->d_lock);
+
+ if (old != NULL)
+ destroy_mob(old);
+
+ return 0;
+}
+
static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
{
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 80bb9e4..3681307 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -54,6 +54,9 @@ struct dentry_mob {
struct list_head dentry_lru;
};
+struct dentry;
+int dcache_new_mob(struct dentry *root);
+
/*
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
@@ -227,6 +230,8 @@ struct dentry_operations {
#define DCACHE_MANAGED_DENTRY \
(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
+#define DCACHE_MOB_ROOT 0x80000
+
static inline int dname_external(struct dentry *dentry)
{
return dentry->d_name.name != dentry->d_iname;
--
1.5.5.6
next prev parent reply other threads:[~2011-05-03 12:18 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-03 12:14 [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 1/13] vfs: Lighten r/o rename_lock lockers Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 2/13] vfs: Factor out rename_lock locking Pavel Emelyanov
2011-05-03 12:16 ` [PATCH 3/13] vfs: Make the rename_lock per-sb Pavel Emelyanov
2011-05-03 12:16 ` [PATCH 4/13] vfs: Factor out tree (of four) shrinkers code Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 5/13] vfs: Make dentry LRU list global Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 6/13] vfs: Turn the nr_dentry into percpu_counter Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 7/13] vfs: Limit the number of dentries globally Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 8/13] vfs: Introduce the dentry mobs Pavel Emelyanov
2011-06-18 13:40 ` Andrea Arcangeli
2011-05-03 12:18 ` Pavel Emelyanov [this message]
2011-05-03 12:19 ` [PATCH 10/13] vfs: Routnes for setting mob size and getting stats Pavel Emelyanov
2011-05-03 12:19 ` [PATCH 11/13] vfs: Make shrink_dcache_memory prune dcache from all mobs Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 12/13] vfs: Mobs creation and mgmt API Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 13/13] vfs: Dentry mobs listing in proc Pavel Emelyanov
2011-05-06 1:05 ` [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Dave Chinner
2011-05-06 12:15 ` Pavel Emelyanov
2011-05-07 0:01 ` Dave Chinner
2011-05-10 11:18 ` Pavel Emelyanov
2011-06-18 13:30 ` Andrea Arcangeli
2011-06-20 0:49 ` Dave Chinner
2011-07-04 5:32 ` Pavel Emelyanov
2011-05-23 6:43 ` Pavel Emelyanov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DBFF2B2.9000508@parallels.com \
--to=xemul@parallels.com \
--cc=aarcange@redhat.com \
--cc=dave@linux.vnet.ibm.com \
--cc=hughd@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=npiggin@kernel.dk \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.