From: npiggin@kernel.dk
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Subject: [patch 21/35] fs: icache per-cpu nr_inodes, non-atomic nr_unused counters
Date: Tue, 19 Oct 2010 14:42:37 +1100 [thread overview]
Message-ID: <20101019034657.693361350@kernel.dk> (raw)
In-Reply-To: 20101019034216.319085068@kernel.dk
[-- Attachment #1: fs-inode-nr_inodes.patch --]
[-- Type: text/plain, Size: 8057 bytes --]
From: Eric Dumazet <eric.dumazet@gmail.com>
[Eric Dumazet]
Make nr_inodes a per-cpu counter to avoid cache line ping pongs between cpus.
[Nick Piggin]
Make nr_unused non-atomic and protected by wb_inode_list_lock.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/fs-writeback.c | 18 +++++++++++-----
fs/inode.c | 58 ++++++++++++++++++++++++++++++++++++++---------------
include/linux/fs.h | 15 +++++--------
kernel/sysctl.c | 4 +--
4 files changed, 63 insertions(+), 32 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-19 14:38:03.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-19 14:38:27.000000000 +1100
@@ -139,12 +139,42 @@
* Statistics gathering..
*/
struct inodes_stat_t inodes_stat = {
- .nr_inodes = ATOMIC_INIT(0),
- .nr_unused = ATOMIC_INIT(0),
+ .nr_inodes = 0,
+ .nr_unused = 0,
};
+static DEFINE_PER_CPU(unsigned int, nr_inodes);
+
static struct kmem_cache *inode_cachep __read_mostly;
+int get_nr_inodes(void)
+{
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_inodes, i);
+ return sum < 0 ? 0 : sum;
+}
+
+int get_nr_inodes_unused(void)
+{
+ return inodes_stat.nr_unused;
+}
+
+/*
+ * Handle nr_dentry sysctl
+ */
+int proc_nr_inodes(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+ inodes_stat.nr_inodes = get_nr_inodes();
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+#else
+ return -ENOSYS;
+#endif
+}
+
static void wake_up_inode(struct inode *inode)
{
/*
@@ -232,7 +262,7 @@
inode->i_fsnotify_mask = 0;
#endif
- atomic_inc(&inodes_stat.nr_inodes);
+ this_cpu_inc(nr_inodes);
return 0;
out:
@@ -280,7 +310,7 @@
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
- atomic_dec(&inodes_stat.nr_inodes);
+ this_cpu_dec(nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);
@@ -400,7 +430,7 @@
static int invalidate_list(struct list_head *head, struct list_head *dispose)
{
struct list_head *next;
- int busy = 0, count = 0;
+ int busy = 0;
next = head->next;
for (;;) {
@@ -420,19 +450,17 @@
if (!inode->i_count) {
spin_lock(&wb_inode_list_lock);
list_del(&inode->i_list);
+ inodes_stat.nr_unused--;
spin_unlock(&wb_inode_list_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
list_add(&inode->i_list, dispose);
- count++;
continue;
}
spin_unlock(&inode->i_lock);
busy = 1;
}
- /* only unused inodes may be cached with i_count zero */
- atomic_sub(count, &inodes_stat.nr_unused);
return busy;
}
@@ -494,7 +522,6 @@
static void prune_icache(unsigned long nr_to_scan)
{
LIST_HEAD(freeable);
- int nr_pruned = 0;
unsigned long reap = 0;
down_read(&iprune_sem);
@@ -515,7 +542,7 @@
if (inode->i_count || (inode->i_state & ~I_REFERENCED)) {
list_del_init(&inode->i_list);
spin_unlock(&inode->i_lock);
- atomic_dec(&inodes_stat.nr_unused);
+ inodes_stat.nr_unused--;
continue;
}
if (inode->i_state & I_REFERENCED) {
@@ -557,9 +584,8 @@
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- nr_pruned++;
+ inodes_stat.nr_unused--;
}
- atomic_sub(nr_pruned, &inodes_stat.nr_unused);
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
@@ -587,7 +613,7 @@
unsigned long nr;
shrinker_add_scan(&nr_to_scan, scanned, global,
- atomic_read(&inodes_stat.nr_unused),
+ inodes_stat.nr_unused,
SHRINK_DEFAULT_SEEKS * 100 / sysctl_vfs_cache_pressure);
/*
* Nasty deadlock avoidance. We may hold various FS locks,
@@ -1372,8 +1398,8 @@
list_empty(&inode->i_list)) {
spin_lock(&wb_inode_list_lock);
list_add(&inode->i_list, &inode_unused);
+ inodes_stat.nr_unused++;
spin_unlock(&wb_inode_list_lock);
- atomic_inc(&inodes_stat.nr_unused);
}
spin_unlock(&inode->i_lock);
return;
@@ -1390,9 +1416,9 @@
if (!list_empty(&inode->i_list)) {
spin_lock(&wb_inode_list_lock);
list_del_init(&inode->i_list);
- spin_unlock(&wb_inode_list_lock);
if (!inode->i_state)
- atomic_dec(&inodes_stat.nr_unused);
+ inodes_stat.nr_unused--;
+ spin_unlock(&wb_inode_list_lock);
}
spin_lock(&sb_inode_list_lock);
list_del_rcu(&inode->i_sb_list);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-10-19 14:38:03.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-10-19 14:38:05.000000000 +1100
@@ -40,14 +40,8 @@
};
struct inodes_stat_t {
- /*
- * Using atomics here is a hack which should just happen to
- * work on all architectures today. Not a big deal though,
- * because it goes away and gets fixed properly later in the
- * inode scaling series.
- */
- atomic_t nr_inodes;
- atomic_t nr_unused;
+ int nr_inodes;
+ int nr_unused;
int dummy[5]; /* padding for sysctl ABI compatibility */
};
@@ -413,6 +407,8 @@
extern int get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
+extern int get_nr_inodes(void);
+extern int get_nr_inodes_unused(void);
extern int leases_enable, lease_break_time;
struct buffer_head;
@@ -2490,7 +2486,8 @@
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-19 14:38:03.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-19 14:38:05.000000000 +1100
@@ -738,6 +738,7 @@
{
unsigned long expired;
long nr_pages;
+ int nr_dirty_inodes;
/*
* When set to zero, disable periodic writeback
@@ -750,11 +751,15 @@
if (time_before(jiffies, expired))
return 0;
+ /* approximate dirty inodes */
+ nr_dirty_inodes = get_nr_inodes() - get_nr_inodes_unused();
+ if (nr_dirty_inodes < 0)
+ nr_dirty_inodes = 0;
+
wb->last_old_flush = jiffies;
nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS) +
- (atomic_read(&inodes_stat.nr_inodes) -
- atomic_read(&inodes_stat.nr_unused));
+ nr_dirty_inodes;
if (nr_pages) {
struct wb_writeback_work work = {
@@ -1120,6 +1125,7 @@
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ int nr_dirty_inodes;
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
@@ -1129,9 +1135,11 @@
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- work.nr_pages = nr_dirty + nr_unstable +
- (atomic_read(&inodes_stat.nr_inodes) -
- atomic_read(&inodes_stat.nr_unused));
+ nr_dirty_inodes = get_nr_inodes() - get_nr_inodes_unused();
+ if (nr_dirty_inodes < 0)
+ nr_dirty_inodes = 0;
+
+ work.nr_pages = nr_dirty + nr_unstable + nr_dirty_inodes;
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c 2010-10-19 14:19:24.000000000 +1100
+++ linux-2.6/kernel/sysctl.c 2010-10-19 14:38:05.000000000 +1100
@@ -1340,14 +1340,14 @@
.data = &inodes_stat,
.maxlen = 2*sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_nr_inodes,
},
{
.procname = "inode-state",
.data = &inodes_stat,
.maxlen = 7*sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_nr_inodes,
},
{
.procname = "file-nr",
next prev parent reply other threads:[~2010-10-19 3:56 UTC|newest]
Thread overview: 78+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-19 3:42 [patch 00/35] my inode scaling series for review npiggin
2010-10-19 3:42 ` [patch 01/35] bit_spinlock: add required includes npiggin
2010-10-19 3:42 ` [patch 02/35] kernel: add bl_list npiggin
2010-10-19 3:42 ` [patch 03/35] mm: implement per-zone shrinker npiggin
2010-10-19 3:42 ` npiggin
2010-10-19 4:49 ` KOSAKI Motohiro
2010-10-19 4:49 ` KOSAKI Motohiro
2010-10-19 5:33 ` Nick Piggin
2010-10-19 5:33 ` Nick Piggin
2010-10-19 5:40 ` KOSAKI Motohiro
2010-10-19 5:40 ` KOSAKI Motohiro
2010-10-19 3:42 ` [patch 04/35] vfs: convert inode and dentry caches to " npiggin
2010-10-19 3:42 ` npiggin
2010-10-19 3:42 ` [patch 05/35] fs: icache lock s_inodes list npiggin
2010-10-19 3:42 ` [patch 06/35] fs: icache lock inode hash npiggin
2010-10-19 3:42 ` [patch 07/35] fs: icache lock i_state npiggin
2010-10-19 10:47 ` Miklos Szeredi
2010-10-19 17:06 ` Peter Zijlstra
2010-10-19 3:42 ` [patch 08/35] fs: icache lock i_count npiggin
2010-10-19 10:16 ` Boaz Harrosh
2010-10-20 2:14 ` Nick Piggin
2010-10-19 3:42 ` [patch 09/35] fs: icache lock lru/writeback lists npiggin
2010-10-19 3:42 ` [patch 10/35] fs: icache atomic inodes_stat npiggin
2010-10-19 3:42 ` [patch 11/35] fs: icache lock inode state npiggin
2010-10-19 3:42 ` [patch 12/35] fs: inode atomic last_ino, iunique lock npiggin
2010-10-19 3:42 ` [patch 13/35] fs: icache remove inode_lock npiggin
2010-10-19 3:42 ` [patch 14/35] fs: icache factor hash lock into functions npiggin
2010-10-19 3:42 ` [patch 15/35] fs: icache per-bucket inode hash locks npiggin
2010-10-19 3:42 ` [patch 16/35] fs: icache lazy inode lru npiggin
2010-10-19 3:42 ` [patch 17/35] fs: icache RCU free inodes npiggin
2010-10-19 3:42 ` [patch 18/35] fs: avoid inode RCU freeing for pseudo fs npiggin
2010-10-19 3:42 ` [patch 19/35] fs: icache remove redundant i_sb_list umount locking npiggin
2010-10-20 12:46 ` Al Viro
2010-10-20 13:03 ` Nick Piggin
2010-10-20 13:27 ` Al Viro
2010-10-19 3:42 ` [patch 20/35] fs: icache rcu walk for i_sb_list npiggin
2010-10-19 3:42 ` npiggin [this message]
2010-10-19 3:42 ` [patch 22/35] fs: icache per-cpu last_ino allocator npiggin
2010-10-19 3:42 ` [patch 23/35] fs: icache use per-CPU lists and locks for sb inode lists npiggin
2010-10-19 15:33 ` Miklos Szeredi
2010-10-20 2:37 ` Nick Piggin
2010-10-19 3:42 ` [patch 24/35] fs: icache use RCU to avoid locking in hash lookups npiggin
2010-10-19 3:42 ` [patch 25/35] fs: icache reduce some locking overheads npiggin
2010-10-19 3:42 ` [patch 26/35] fs: icache alloc anonymous inode allocation npiggin
2010-10-19 15:50 ` Miklos Szeredi
2010-10-20 2:38 ` Nick Piggin
2010-10-19 16:33 ` Christoph Hellwig
2010-10-20 3:07 ` Nick Piggin
2010-10-19 3:42 ` [patch 27/35] fs: icache split IO and LRU lists npiggin
2010-10-19 16:12 ` Miklos Szeredi
2010-10-20 2:41 ` Nick Piggin
2010-10-19 3:42 ` [patch 28/35] fs: icache split writeback and lru locks npiggin
2010-10-19 3:42 ` [patch 29/35] fs: icache per-bdi writeback list locking npiggin
2010-10-19 3:42 ` [patch 30/35] fs: icache lazy LRU avoid LRU locking after IO operation npiggin
2010-10-19 3:42 ` [patch 31/35] fs: icache per-zone inode LRU npiggin
2010-10-19 12:38 ` Dave Chinner
2010-10-20 2:35 ` Nick Piggin
2010-10-20 3:12 ` Nick Piggin
2010-10-20 3:12 ` Nick Piggin
2010-10-20 9:43 ` Dave Chinner
2010-10-20 9:43 ` Dave Chinner
2010-10-20 10:02 ` Nick Piggin
2010-10-20 10:02 ` Nick Piggin
2010-10-20 3:14 ` KOSAKI Motohiro
2010-10-20 3:20 ` Nick Piggin
2010-10-20 3:29 ` KOSAKI Motohiro
2010-10-20 10:19 ` Dave Chinner
2010-10-20 10:41 ` Nick Piggin
2010-10-19 3:42 ` [patch 32/35] fs: icache minimise I_FREEING latency npiggin
2010-10-19 3:42 ` [patch 33/35] fs: icache introduce inode_get/inode_get_ilock npiggin
2010-10-19 10:17 ` Boaz Harrosh
2010-10-20 2:17 ` Nick Piggin
2010-10-19 3:42 ` [patch 34/35] fs: inode rename i_count to i_refs npiggin
2010-10-19 3:42 ` [patch 35/35] fs: icache document more lock orders npiggin
2010-10-19 16:22 ` [patch 00/35] my inode scaling series for review Christoph Hellwig
2010-10-20 3:05 ` Nick Piggin
2010-10-20 13:14 ` Al Viro
2010-10-20 13:59 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101019034657.693361350@kernel.dk \
--to=npiggin@kernel.dk \
--cc=eric.dumazet@gmail.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.