From: Mike Waychison <mikew@google.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH v1 1/8] Deferred batching of dput()
Date: Fri, 16 Jan 2009 18:29:42 -0800 [thread overview]
Message-ID: <20090117022942.20425.15217.stgit@crlf.corp.google.com> (raw)
In-Reply-To: <20090117022936.20425.43248.stgit@crlf.corp.google.com>
This patch adds the notion of postponed dputs to the VFS. We do this by
introducing struct postponed_dentries, a data structure that maintains a list
of dentries that are pending a final dput.
Each CPU gets an on-heap allocated postponed_dentries structure that is
protected by disabling pre-emption and ensuring that they are only ever
accessed from the respective CPU. When a queue gets full, we allocate a new
one to replace it and swap them atomically, afterwhich we release the previous
queue. In the case where we fail to allocate a new queue, we go down a slow
path and iterate processing a single dentry at a time until the queue is empty.
The structure itself has three lists embedded in it. We maintain:
- Dentries pending for dput.
- Dentries and their associated inodes pending dentry_iput.
We reuse the first list as we discover parents.
Currently, postponed dputs are still handled in a serialized fashion, but we
defer them into struct postponed_dentries. The lock consolidation will come in
a later patch.
Lastly, we introduce a way to flush any pending dput()s via dput_drain_all() to
ensure that all dentries are finalized before fs shutdown.
Signed-off-by: Mike Waychison <mikew@google.com>
---
fs/dcache.c | 289 +++++++++++++++++++++++++++++++++++++++++++-----
fs/super.c | 2
include/linux/dcache.h | 1
3 files changed, 261 insertions(+), 31 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 4547f66..ea6b8f0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include <linux/cpu.h>
#include "internal.h"
int sysctl_vfs_cache_pressure __read_mostly = 100;
@@ -182,6 +183,175 @@ static struct dentry *d_kill(struct dentry *dentry)
return parent;
}
+struct postponed_dentries {
+ unsigned size;
+ struct {
+ unsigned nr;
+ struct dentry **dentries;
+ } pending_dput;
+ struct {
+ unsigned nr;
+ struct dentry **dentries;
+ struct inode **inodes;
+ } pending_dentry_iput;
+};
+
+struct postponed_dentries_onstack {
+ struct postponed_dentries ppd;
+ struct dentry *dentry_pending_dput;
+ struct dentry *dentry_pending_dentry_iput;
+ struct inode *inode_pending_dentry_iput;
+};
+
+static struct postponed_dentries *init_ppd_onstack(
+ struct postponed_dentries_onstack *ppd_onstack)
+{
+ struct postponed_dentries *ppd;
+ ppd = &ppd_onstack->ppd;
+ ppd->size = 1;
+ ppd->pending_dput.nr = 0;
+ ppd->pending_dput.dentries = &ppd_onstack->dentry_pending_dput;
+ ppd->pending_dentry_iput.nr = 0;
+ ppd->pending_dentry_iput.dentries =
+ &ppd_onstack->dentry_pending_dentry_iput;
+ ppd->pending_dentry_iput.inodes =
+ &ppd_onstack->inode_pending_dentry_iput;
+ return ppd;
+}
+
+static unsigned postponed_dentries_per_page(void)
+{
+ return (PAGE_SIZE - sizeof(struct postponed_dentries)) /
+ (3 * sizeof(void *));
+}
+
+/* Allocate a postponed_dentries structure on the heap. */
+struct postponed_dentries *new_postponed_dentries(void)
+{
+ struct postponed_dentries *ppd;
+ struct page *page;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return NULL;
+
+ ppd = page_address(page);
+
+ /* Create an set of three arrays immediately after the structure. */
+ ppd->size = postponed_dentries_per_page();
+ ppd->pending_dput.nr = 0;
+ ppd->pending_dput.dentries = (struct dentry **)(ppd + 1);
+ ppd->pending_dentry_iput.nr = 0;
+ ppd->pending_dentry_iput.dentries =
+ ppd->pending_dput.dentries + ppd->size;
+ ppd->pending_dentry_iput.inodes = (struct inode **)
+ (ppd->pending_dentry_iput.dentries + ppd->size);
+
+ return ppd;
+}
+
+static int pending_dput_full(struct postponed_dentries *ppd)
+{
+ return ppd->pending_dput.nr == ppd->size;
+}
+
+static void add_pending_dput(struct postponed_dentries *ppd,
+ struct dentry *dentry)
+{
+ ppd->pending_dput.dentries[ppd->pending_dput.nr++] = dentry;
+}
+
+static DEFINE_PER_CPU(struct postponed_dentries *, postponed_dentries);
+
+static int initialize_postponed_dentries(long cpu)
+{
+ struct postponed_dentries **pppd = &per_cpu(postponed_dentries, cpu);
+ *pppd = new_postponed_dentries();
+ if (!*pppd)
+ return 1;
+ return 0;
+}
+
+static void process_postponed_dentries(struct postponed_dentries *ppd);
+static void release_postponed_dentries(struct postponed_dentries *ppd)
+{
+ process_postponed_dentries(ppd);
+ free_page((unsigned long)ppd);
+}
+
+static int __cpuinit cpuup_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (initialize_postponed_dentries(cpu))
+ return NOTIFY_STOP;
+ break;
+ case CPU_DEAD:
+ release_postponed_dentries(per_cpu(postponed_dentries, cpu));
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata dentry_put_cache_notifier = {
+ &cpuup_callback, NULL, 0
+};
+
+static void real_dput(struct dentry *dentry)
+{
+ /* Legacy: */
+repeat:
+ spin_lock(&dcache_lock);
+ if (atomic_dec_and_test(&dentry->d_count)) {
+ spin_unlock(&dcache_lock);
+ return;
+ }
+
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+ }
+
+ /*
+ * AV: ->d_delete() is _NOT_ allowed to block now.
+ */
+ if (dentry->d_op && dentry->d_op->d_delete) {
+ if (dentry->d_op->d_delete(dentry))
+ goto unhash_it;
+ }
+ /* Unreachable? Get rid of it */
+ if (d_unhashed(dentry))
+ goto kill_it;
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_flags |= DCACHE_REFERENCED;
+ dentry_lru_add(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+
+unhash_it:
+ __d_drop(dentry);
+kill_it:
+ /* if dentry was on the d_lru list delete it from there */
+ dentry_lru_del(dentry);
+ dentry = d_kill(dentry);
+ if (dentry)
+ goto repeat;
+}
+
+static void process_postponed_dentries(struct postponed_dentries *ppd)
+{
+ unsigned i;
+
+ for (i = 0; i < ppd->pending_dput.nr; i++)
+ real_dput(ppd->pending_dput.dentries[i]);
+}
/*
* This is dput
*
@@ -199,6 +369,40 @@ static struct dentry *d_kill(struct dentry *dentry)
* Real recursion would eat up our stack space.
*/
+static void postpone_dput(struct dentry *dentry)
+{
+ struct postponed_dentries *ppd, *new_ppd;
+
+again:
+ ppd = get_cpu_var(postponed_dentries);
+ if (!pending_dput_full(ppd)) {
+ add_pending_dput(ppd, dentry);
+ put_cpu_var(postponed_dentries);
+ return;
+ }
+
+ /* need to flush out existing pending dentries. */
+ put_cpu_var(postponed_dentries);
+ /* Allocate more space.. */
+ new_ppd = new_postponed_dentries();
+ if (!new_ppd) {
+ /* Take the slow path, memory is low */
+ struct postponed_dentries_onstack ppd_onstack;
+ struct postponed_dentries *ppd;
+
+ ppd = init_ppd_onstack(&ppd_onstack);
+ add_pending_dput(ppd, dentry);
+ process_postponed_dentries(ppd);
+ return;
+ }
+ ppd = get_cpu_var(postponed_dentries);
+ __get_cpu_var(postponed_dentries) = new_ppd;
+ put_cpu_var(postponed_dentries);
+ process_postponed_dentries(ppd);
+ goto again;
+}
+
+
/*
* dput - release a dentry
* @dentry: dentry to release
@@ -216,45 +420,62 @@ void dput(struct dentry *dentry)
if (!dentry)
return;
-repeat:
if (atomic_read(&dentry->d_count) == 1)
might_sleep();
- if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
+ /* Decrement the count unless we would hit zero */
+ if (atomic_add_unless(&dentry->d_count, -1, 1))
return;
+ postpone_dput(dentry);
+}
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
- return;
+/**
+ * dput_drain_slowpath - drain out the postponed dentries on this cpu
+ *
+ * Iterates through and loops until there are no dentries pending dput on the
+ * invoked CPU. Must be called with pre-emption disabled, but may re-enable
+ * pre-emption. Returns with pre-emption disabled. Caller is required to
+ * ensure that this thread will not change CPUs in the meantime.
+ */
+static void dput_drain_slowpath(void)
+{
+ struct postponed_dentries *ppd;
+
+ ppd = __get_cpu_var(postponed_dentries);
+ while (ppd->pending_dput.nr) {
+ struct postponed_dentries_onstack ppd_onstack;
+ struct postponed_dentries *tmp_ppd;
+ struct dentry *dentry;
+
+ dentry = ppd->pending_dput.dentries[--ppd->pending_dput.nr];
+
+ tmp_ppd = init_ppd_onstack(&ppd_onstack);
+ add_pending_dput(tmp_ppd, dentry);
+ put_cpu_var(postponed_dentries);
+ process_postponed_dentries(tmp_ppd);
+ ppd = get_cpu_var(postponed_dentries);
}
+}
- /*
- * AV: ->d_delete() is _NOT_ allowed to block now.
- */
- if (dentry->d_op && dentry->d_op->d_delete) {
- if (dentry->d_op->d_delete(dentry))
- goto unhash_it;
+static void dput_drain_per_cpu(struct work_struct *dummy)
+{
+ struct postponed_dentries *ppd, *new_ppd;
+
+ new_ppd = new_postponed_dentries();
+
+ ppd = get_cpu_var(postponed_dentries);
+ if (new_ppd) {
+ __get_cpu_var(postponed_dentries) = new_ppd;
+ put_cpu_var(postponed_dentries);
+ release_postponed_dentries(ppd);
+ } else {
+ dput_drain_slowpath();
+ put_cpu_var(postponed_dentries);
}
- /* Unreachable? Get rid of it */
- if (d_unhashed(dentry))
- goto kill_it;
- if (list_empty(&dentry->d_lru)) {
- dentry->d_flags |= DCACHE_REFERENCED;
- dentry_lru_add(dentry);
- }
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
- return;
+}
-unhash_it:
- __d_drop(dentry);
-kill_it:
- /* if dentry was on the d_lru list delete it from there */
- dentry_lru_del(dentry);
- dentry = d_kill(dentry);
- if (dentry)
- goto repeat;
+void dput_drain_all(void)
+{
+ schedule_on_each_cpu(dput_drain_per_cpu);
}
/**
@@ -2321,6 +2542,7 @@ void __init vfs_caches_init_early(void)
void __init vfs_caches_init(unsigned long mempages)
{
unsigned long reserve;
+ long cpu;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
@@ -2337,6 +2559,11 @@ void __init vfs_caches_init(unsigned long mempages)
mnt_init();
bdev_cache_init();
chrdev_init();
+
+ for_each_online_cpu(cpu)
+ if (initialize_postponed_dentries(cpu))
+ panic("Couldn't init postponed dentries\n");
+ register_cpu_notifier(&dentry_put_cache_notifier);
}
EXPORT_SYMBOL(d_alloc);
diff --git a/fs/super.c b/fs/super.c
index ed080c4..534840f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -292,6 +292,8 @@ void generic_shutdown_super(struct super_block *sb)
const struct super_operations *sop = sb->s_op;
+ dput_drain_all();
+
if (sb->s_root) {
shrink_dcache_for_umount(sb);
fsync_super(sb);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c66d224..c9f7c95 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -362,6 +362,7 @@ static inline struct dentry *dget_parent(struct dentry *dentry)
}
extern void dput(struct dentry *);
+void dput_drain_all(void);
static inline int d_mountpoint(struct dentry *dentry)
{
next prev parent reply other threads:[~2009-01-17 2:30 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-17 2:29 [PATCH v1 0/8] Deferred dput() and iput() -- reducing lock contention Mike Waychison
2009-01-17 2:29 ` Mike Waychison [this message]
2009-01-17 10:15 ` [PATCH v1 1/8] Deferred batching of dput() Evgeniy Polyakov
2009-01-20 20:07 ` Mike Waychison
2009-01-17 2:29 ` [PATCH v1 2/8] Parallel dput() Mike Waychison
2009-01-17 2:29 ` [PATCH v1 3/8] Deferred batching of iput() Mike Waychison
2009-01-17 10:18 ` Evgeniy Polyakov
2009-01-20 20:07 ` Mike Waychison
2009-01-17 2:29 ` [PATCH v1 4/8] Fixing iput() called from put_super path Mike Waychison
2009-01-17 2:30 ` [PATCH v1 5/8] Parallelize iput() Mike Waychison
2009-01-17 2:30 ` [PATCH v1 6/8] hugetlbfs drop_inode update Mike Waychison
2009-01-17 2:30 ` [PATCH v1 7/8] Make drop_caches flush pending dput()s and iput()s Mike Waychison
2009-01-17 2:30 ` [PATCH v1 8/8] Make the sync path drain dentries and inodes Mike Waychison
2009-01-17 7:04 ` [PATCH v1 0/8] Deferred dput() and iput() -- reducing lock contention Eric Dumazet
2009-01-20 20:00 ` Mike Waychison
2009-01-20 20:00 ` Mike Waychison
2009-01-17 8:12 ` Dave Chinner
2009-01-20 19:01 ` Mike Waychison
2009-01-29 2:09 ` Mike Waychison
2009-01-21 5:52 ` Andi Kleen
2009-01-21 6:22 ` Mike Waychison
2009-01-21 8:48 ` Andi Kleen
2009-01-21 17:28 ` Mike Waychison
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090117022942.20425.15217.stgit@crlf.corp.google.com \
--to=mikew@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.