All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH 02/18] fs: Convert nr_inodes and nr_unused to per-cpu counters
Date: Fri,  8 Oct 2010 16:21:16 +1100	[thread overview]
Message-ID: <1286515292-15882-3-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

The number of inodes allocated does not need to be tied to the
addition or removal of an inode to/from a list. If we are not tied
to a list lock, we could update the counters when inodes are
initialised or destroyed, but to do that we need to convert the
counters to be per-cpu (i.e. independent of a lock). This means that
we have the freedom to change the list/locking implementation
without needing to care about the counters.

Based on a patch originally from Eric Dumazet.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c  |    5 +--
 fs/inode.c         |   65 ++++++++++++++++++++++++++++++++++++---------------
 include/linux/fs.h |    4 ++-
 kernel/sysctl.c    |    4 +-
 4 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ab38fef..58a95b7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 	wb->last_old_flush = jiffies;
 	nr_pages = global_page_state(NR_FILE_DIRTY) +
 			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+			get_nr_dirty_inodes();
 
 	if (nr_pages) {
 		struct wb_writeback_work work = {
@@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb)
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	work.nr_pages = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+	work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
 
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
diff --git a/fs/inode.c b/fs/inode.c
index 8646433..f04d501 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem);
  */
 struct inodes_stat_t inodes_stat;
 
+static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
+static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+
 static struct kmem_cache *inode_cachep __read_mostly;
 
+static inline int get_nr_inodes(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes);
+}
+
+static inline int get_nr_inodes_unused(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes_unused);
+}
+
+int get_nr_dirty_inodes(void)
+{
+	int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+	return nr_dirty > 0 ? nr_dirty : 0;
+
+}
+
+/*
+ * Handle nr_inode sysctl
+ */
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_inodes(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	inodes_stat.nr_inodes = get_nr_inodes();
+	inodes_stat.nr_unused = get_nr_inodes_unused();
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
 static void wake_up_inode(struct inode *inode)
 {
 	/*
@@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_fsnotify_mask = 0;
 #endif
 
+	percpu_counter_inc(&nr_inodes);
+
 	return 0;
 out:
 	return -ENOMEM;
@@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode)
 	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
 		posix_acl_release(inode->i_default_acl);
 #endif
+	percpu_counter_dec(&nr_inodes);
 }
 EXPORT_SYMBOL(__destroy_inode);
 
@@ -286,7 +322,7 @@ void __iget(struct inode *inode)
 
 	if (!(inode->i_state & (I_DIRTY|I_SYNC)))
 		list_move(&inode->i_list, &inode_in_use);
-	inodes_stat.nr_unused--;
+	percpu_counter_dec(&nr_inodes_unused);
 }
 
 void end_writeback(struct inode *inode)
@@ -327,8 +363,6 @@ static void evict(struct inode *inode)
  */
 static void dispose_list(struct list_head *head)
 {
-	int nr_disposed = 0;
-
 	while (!list_empty(head)) {
 		struct inode *inode;
 
@@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head)
 
 		wake_up_inode(inode);
 		destroy_inode(inode);
-		nr_disposed++;
 	}
-	spin_lock(&inode_lock);
-	inodes_stat.nr_inodes -= nr_disposed;
-	spin_unlock(&inode_lock);
 }
 
 /*
@@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head)
 static int invalidate_list(struct list_head *head, struct list_head *dispose)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0;
 
 	next = head->next;
 	for (;;) {
@@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 			list_move(&inode->i_list, dispose);
 			WARN_ON(inode->i_state & I_NEW);
 			inode->i_state |= I_FREEING;
-			count++;
+			percpu_counter_dec(&nr_inodes_unused);
 			continue;
 		}
 		busy = 1;
 	}
-	/* only unused inodes may be cached with i_count zero */
-	inodes_stat.nr_unused -= count;
 	return busy;
 }
 
@@ -448,7 +476,6 @@ static int can_unuse(struct inode *inode)
 static void prune_icache(int nr_to_scan)
 {
 	LIST_HEAD(freeable);
-	int nr_pruned = 0;
 	int nr_scanned;
 	unsigned long reap = 0;
 
@@ -484,9 +511,8 @@ static void prune_icache(int nr_to_scan)
 		list_move(&inode->i_list, &freeable);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
-		nr_pruned++;
+		percpu_counter_dec(&nr_inodes_unused);
 	}
-	inodes_stat.nr_unused -= nr_pruned;
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
@@ -518,7 +544,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 			return -1;
 		prune_icache(nr);
 	}
-	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker icache_shrinker = {
@@ -595,7 +621,6 @@ static inline void
 __inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
 			struct inode *inode)
 {
-	inodes_stat.nr_inodes++;
 	list_add(&inode->i_list, &inode_in_use);
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	if (head)
@@ -1215,7 +1240,7 @@ static void iput_final(struct inode *inode)
 	if (!drop) {
 		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
 			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
+		percpu_counter_inc(&nr_inodes_unused);
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode_lock);
 			return;
@@ -1227,14 +1252,13 @@ static void iput_final(struct inode *inode)
 		spin_lock(&inode_lock);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
-		inodes_stat.nr_unused--;
+		percpu_counter_dec(&nr_inodes_unused);
 		hlist_del_init(&inode->i_hash);
 	}
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 	evict(inode);
 	spin_lock(&inode_lock);
@@ -1489,6 +1513,7 @@ void __init inode_init_early(void)
 
 	for (loop = 0; loop < (1 << i_hash_shift); loop++)
 		INIT_HLIST_HEAD(&inode_hashtable[loop]);
+
 }
 
 void __init inode_init(void)
@@ -1503,6 +1528,8 @@ void __init inode_init(void)
 					 SLAB_MEM_SPREAD),
 					 init_once);
 	register_shrinker(&icache_shrinker);
+	percpu_counter_init(&nr_inodes, 0);
+	percpu_counter_init(&nr_inodes_unused, 0);
 
 	/* Hash may have been set up in inode_init_early */
 	if (!hashdist)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d069b..1fb92f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -407,6 +407,7 @@ extern struct files_stat_struct files_stat;
 extern int get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
+extern int get_nr_dirty_inodes(void);
 extern int leases_enable, lease_break_time;
 
 struct buffer_head;
@@ -2474,7 +2475,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f88552c..33d1733 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = {
 		.data		= &inodes_stat,
 		.maxlen		= 2*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_nr_inodes,
 	},
 	{
 		.procname	= "inode-state",
 		.data		= &inodes_stat,
 		.maxlen		= 7*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_nr_inodes,
 	},
 	{
 		.procname	= "file-nr",
-- 
1.7.1


  parent reply	other threads:[~2010-10-08  5:22 UTC|newest]

Thread overview: 168+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-08  5:21 fs: Inode cache scalability V2 Dave Chinner
2010-10-08  5:21 ` [PATCH 01/18] kernel: add bl_list Dave Chinner
2010-10-08  8:18   ` Andi Kleen
2010-10-08 10:33     ` Dave Chinner
2010-10-08  5:21 ` Dave Chinner [this message]
2010-10-08  7:01   ` [PATCH 02/18] fs: Convert nr_inodes and nr_unused to per-cpu counters Christoph Hellwig
2010-10-08  5:21 ` [PATCH 03/18] fs: keep inode with backing-dev Dave Chinner
2010-10-08  7:01   ` Christoph Hellwig
2010-10-08  7:27     ` Dave Chinner
2010-10-08  5:21 ` [PATCH 04/18] fs: Implement lazy LRU updates for inodes Dave Chinner
2010-10-08  7:08   ` Christoph Hellwig
2010-10-08  7:31     ` Dave Chinner
2010-10-08  9:08   ` Al Viro
2010-10-08  9:51     ` Dave Chinner
2010-10-08  5:21 ` [PATCH 05/18] fs: inode split IO and LRU lists Dave Chinner
2010-10-08  7:14   ` Christoph Hellwig
2010-10-08  7:38     ` Dave Chinner
2010-10-08  9:16   ` Al Viro
2010-10-08  9:58     ` Dave Chinner
2010-10-08  5:21 ` [PATCH 06/18] fs: Clean up inode reference counting Dave Chinner
2010-10-08  7:20   ` Christoph Hellwig
2010-10-08  7:46     ` Dave Chinner
2010-10-08  8:15       ` Christoph Hellwig
2010-10-08  5:21 ` [PATCH 07/18] exofs: use iput() for inode reference count decrements Dave Chinner
2010-10-08  7:21   ` Christoph Hellwig
2010-10-16  7:56   ` Nick Piggin
2010-10-16 16:29     ` Christoph Hellwig
2010-10-17 15:41       ` Boaz Harrosh
2010-10-08  5:21 ` [PATCH 08/18] fs: add inode reference coutn read accessor Dave Chinner
2010-10-08  7:24   ` Christoph Hellwig
2010-10-08  5:21 ` [PATCH 09/18] fs: rework icount to be a locked variable Dave Chinner
2010-10-08  7:27   ` Christoph Hellwig
2010-10-08  7:50     ` Dave Chinner
2010-10-08  8:17       ` Christoph Hellwig
2010-10-08 13:16         ` Chris Mason
2010-10-08  9:32   ` Al Viro
2010-10-08 10:15     ` Dave Chinner
2010-10-08 13:14       ` Chris Mason
2010-10-08 13:53       ` Christoph Hellwig
2010-10-08 14:09         ` Dave Chinner
2010-10-08  5:21 ` [PATCH 10/18] fs: Factor inode hash operations into functions Dave Chinner
2010-10-08  7:29   ` Christoph Hellwig
2010-10-08  9:41     ` Al Viro
2010-10-08  5:21 ` [PATCH 11/18] fs: Introduce per-bucket inode hash locks Dave Chinner
2010-10-08  7:33   ` Christoph Hellwig
2010-10-08  7:51     ` Dave Chinner
2010-10-08  9:49   ` Al Viro
2010-10-08  9:51     ` Christoph Hellwig
2010-10-08 13:43   ` Christoph Hellwig
2010-10-08 14:17     ` Dave Chinner
2010-10-08 18:54   ` Christoph Hellwig
2010-10-16  7:57     ` Nick Piggin
2010-10-16 16:16       ` Christoph Hellwig
2010-10-16 17:12         ` Nick Piggin
2010-10-17  0:45           ` Christoph Hellwig
2010-10-17  2:06             ` Nick Piggin
2010-10-17  0:46           ` Dave Chinner
2010-10-17  2:25             ` Nick Piggin
2010-10-18 16:16               ` Andi Kleen
2010-10-18 16:21                 ` Christoph Hellwig
2010-10-19  7:00                   ` Nick Piggin
2010-10-19 16:50                     ` Christoph Hellwig
2010-10-20  3:11                       ` Nick Piggin
2010-10-24 15:44                       ` Thomas Gleixner
2010-10-24 21:17                         ` Nick Piggin
2010-10-25  4:41                           ` Thomas Gleixner
2010-10-25  7:04                             ` Thomas Gleixner
2010-10-26  0:12                               ` Nick Piggin
2010-10-26  0:06                             ` Nick Piggin
2010-10-26  0:06                               ` Nick Piggin
2010-10-08  5:21 ` [PATCH 12/18] fs: add a per-superblock lock for the inode list Dave Chinner
2010-10-08  7:35   ` Christoph Hellwig
2010-10-08  5:21 ` [PATCH 13/18] fs: split locking of inode writeback and LRU lists Dave Chinner
2010-10-08  7:42   ` Christoph Hellwig
2010-10-08  8:00     ` Dave Chinner
2010-10-08  8:18       ` Christoph Hellwig
2010-10-16  7:57         ` Nick Piggin
2010-10-16 16:20           ` Christoph Hellwig
2010-10-16 17:19             ` Nick Piggin
2010-10-17  1:00               ` Dave Chinner
2010-10-17  2:20                 ` Nick Piggin
2010-10-08  5:21 ` [PATCH 14/18] fs: Protect inode->i_state with th einode->i_lock Dave Chinner
2010-10-08  7:49   ` Christoph Hellwig
2010-10-08  8:04     ` Dave Chinner
2010-10-08  8:18       ` Christoph Hellwig
2010-10-16  7:57         ` Nick Piggin
2010-10-16 16:19           ` Christoph Hellwig
2010-10-09  8:05       ` Christoph Hellwig
2010-10-09 14:52       ` Matthew Wilcox
2010-10-10  2:01         ` Dave Chinner
2010-10-08  5:21 ` [PATCH 15/18] fs: introduce a per-cpu last_ino allocator Dave Chinner
2010-10-08  7:53   ` Christoph Hellwig
2010-10-08  8:05     ` Dave Chinner
2010-10-08  8:22   ` Andi Kleen
2010-10-08  8:44     ` Christoph Hellwig
2010-10-08  9:58     ` Al Viro
2010-10-08 10:09       ` Andi Kleen
2010-10-08 10:19         ` Al Viro
2010-10-08 10:20           ` Eric Dumazet
2010-10-08 10:20             ` Eric Dumazet
2010-10-08  9:56   ` Al Viro
2010-10-08 10:03     ` Christoph Hellwig
2010-10-08 10:20       ` Eric Dumazet
2010-10-08 10:20         ` Eric Dumazet
2010-10-08 13:48         ` Christoph Hellwig
2010-10-08 14:06           ` Eric Dumazet
2010-10-08 14:06             ` Eric Dumazet
2010-10-08 19:10             ` Christoph Hellwig
2010-10-09 17:14             ` Matthew Wilcox
2010-10-16  7:57       ` Nick Piggin
2010-10-16 16:22         ` Christoph Hellwig
2010-10-16 17:21           ` Nick Piggin
2010-10-08  5:21 ` [PATCH 16/18] fs: Make iunique independent of inode_lock Dave Chinner
2010-10-08  7:55   ` Christoph Hellwig
2010-10-08  8:06     ` Dave Chinner
2010-10-08  8:19       ` Christoph Hellwig
2010-10-08  5:21 ` [PATCH 17/18] fs: icache remove inode_lock Dave Chinner
2010-10-08  8:03   ` Christoph Hellwig
2010-10-08  8:09     ` Dave Chinner
2010-10-13  7:20   ` Nick Piggin
2010-10-13  7:27     ` Nick Piggin
2010-10-13 11:28       ` Christoph Hellwig
2010-10-13 12:03         ` Nick Piggin
2010-10-13 12:20           ` Christoph Hellwig
2010-10-13 12:25             ` Nick Piggin
2010-10-13 10:42     ` Eric Dumazet
2010-10-13 12:07       ` Nick Piggin
2010-10-13 11:25     ` Christoph Hellwig
2010-10-13 12:30       ` Nick Piggin
2010-10-13 23:23         ` Dave Chinner
2010-10-14  9:06           ` Nick Piggin
2010-10-14  9:13             ` Nick Piggin
2010-10-14 14:41             ` Christoph Hellwig
2010-10-15  0:14               ` Nick Piggin
2010-10-15  3:13                 ` Dave Chinner
2010-10-15  3:30                   ` Nick Piggin
2010-10-15  3:44                     ` Nick Piggin
2010-10-15  6:41                       ` Nick Piggin
2010-10-15 10:59                         ` Dave Chinner
2010-10-15 13:03                           ` Nick Piggin
2010-10-15 13:29                             ` Nick Piggin
2010-10-15 17:33                               ` Nick Piggin
2010-10-15 17:52                                 ` Christoph Hellwig
2010-10-15 18:02                                   ` Nick Piggin
2010-10-15 18:14                                     ` Nick Piggin
2010-10-16  2:09                                     ` Nick Piggin
2010-10-15 14:11                             ` Nick Piggin
2010-10-15 20:50                           ` Nick Piggin
2010-10-15 20:56                             ` Nick Piggin
2010-10-15  4:04               ` Nick Piggin
2010-10-15 11:33                 ` Dave Chinner
2010-10-15 13:14                   ` Nick Piggin
2010-10-15 15:38                   ` Nick Piggin
2010-10-16  7:57   ` Nick Piggin
2010-10-08  5:21 ` [PATCH 18/18] fs: Reduce inode I_FREEING and factor inode disposal Dave Chinner
2010-10-08  8:11   ` Christoph Hellwig
2010-10-08 10:18   ` Al Viro
2010-10-08 10:52     ` Dave Chinner
2010-10-08 12:10       ` Al Viro
2010-10-08 13:55         ` Dave Chinner
2010-10-09 17:22   ` Matthew Wilcox
2010-10-09  8:08 ` [PATCH 19/18] fs: split __inode_add_to_list Christoph Hellwig
2010-10-12 10:47   ` Dave Chinner
2010-10-12 10:47     ` Dave Chinner
2010-10-12 11:31     ` Christoph Hellwig
2010-10-12 12:05       ` Dave Chinner
2010-10-09 11:18 ` [PATCH 20/18] fs: do not assign default i_ino in new_inode Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2010-10-13  0:15 fs: Inode cache scalability V3 Dave Chinner
2010-10-13  0:15 ` [PATCH 02/18] fs: Convert nr_inodes and nr_unused to per-cpu counters Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1286515292-15882-3-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.