linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH 14/17] fs: Inode counters do not need to be atomic.
Date: Wed, 29 Sep 2010 22:18:46 +1000	[thread overview]
Message-ID: <1285762729-17928-15-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1285762729-17928-1-git-send-email-david@fromorbit.com>

From: Nick Piggin <npiggin@suse.de>

atomics for counters do not scale on large machines, so convert them
back to normal variables protected by spin locks. We can do this
because the counters are associated with specific list operations
that are protected by locks; nr_inodes can be protected by the
sb_inode_list_lock, and nr_unused can be protected by the
wb_inode_list_lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c  |    6 ++----
 fs/inode.c         |   30 ++++++++++++------------------
 include/linux/fs.h |   12 ++++++------
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 432a4df..8e390e8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -743,8 +743,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 	wb->last_old_flush = jiffies;
 	nr_pages = global_page_state(NR_FILE_DIRTY) +
 			global_page_state(NR_UNSTABLE_NFS) +
-			(atomic_read(&inodes_stat.nr_inodes) -
-			atomic_read(&inodes_stat.nr_unused));
+			inodes_stat.nr_inodes - inodes_stat.nr_unused;
 
 	if (nr_pages) {
 		struct wb_writeback_work work = {
@@ -1116,8 +1115,7 @@ void writeback_inodes_sb(struct super_block *sb)
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	work.nr_pages = nr_dirty + nr_unstable +
-			(atomic_read(&inodes_stat.nr_inodes) -
-			atomic_read(&inodes_stat.nr_unused));
+			inodes_stat.nr_inodes - inodes_stat.nr_unused;
 
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
diff --git a/fs/inode.c b/fs/inode.c
index 50599d7..d279517 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -139,8 +139,8 @@ static DECLARE_RWSEM(iprune_sem);
  * Statistics gathering..
  */
 struct inodes_stat_t inodes_stat = {
-	.nr_inodes = ATOMIC_INIT(0),
-	.nr_unused = ATOMIC_INIT(0),
+	.nr_inodes = 0,
+	.nr_unused = 0,
 };
 
 static struct kmem_cache *inode_cachep __read_mostly;
@@ -376,7 +376,6 @@ static void dispose_list(struct list_head *head)
 		destroy_inode(inode);
 		nr_disposed++;
 	}
-	atomic_sub(nr_disposed, &inodes_stat.nr_inodes);
 }
 
 /*
@@ -385,7 +384,7 @@ static void dispose_list(struct list_head *head)
 static int invalidate_list(struct list_head *head, struct list_head *dispose)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0;
 
 	next = head->next;
 	for (;;) {
@@ -413,19 +412,17 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 		if (!inode->i_count) {
 			spin_lock(&wb_inode_list_lock);
 			list_del(&inode->i_list);
+			inodes_stat.nr_unused--;
 			spin_unlock(&wb_inode_list_lock);
 			WARN_ON(inode->i_state & I_NEW);
 			inode->i_state |= I_FREEING;
 			spin_unlock(&inode->i_lock);
 			list_add(&inode->i_list, dispose);
-			count++;
 			continue;
 		}
 		spin_unlock(&inode->i_lock);
 		busy = 1;
 	}
-	/* only unused inodes may be cached with i_count zero */
-	atomic_sub(count, &inodes_stat.nr_unused);
 	return busy;
 }
 
@@ -471,7 +468,6 @@ EXPORT_SYMBOL(invalidate_inodes);
 static void prune_icache(int nr_to_scan)
 {
 	LIST_HEAD(freeable);
-	int nr_pruned = 0;
 	unsigned long reap = 0;
 
 	down_read(&iprune_sem);
@@ -492,7 +488,7 @@ again:
 		if (inode->i_count || (inode->i_state & ~I_REFERENCED)) {
 			list_del_init(&inode->i_list);
 			spin_unlock(&inode->i_lock);
-			atomic_dec(&inodes_stat.nr_unused);
+			inodes_stat.nr_unused--;
 			continue;
 		}
 		if (inode->i_state) {
@@ -518,9 +514,8 @@ again:
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
 		spin_unlock(&inode->i_lock);
-		nr_pruned++;
+		inodes_stat.nr_unused--;
 	}
-	atomic_sub(nr_pruned, &inodes_stat.nr_unused);
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
@@ -552,8 +547,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 			return -1;
 		prune_icache(nr);
 	}
-	return (atomic_read(&inodes_stat.nr_unused) / 100) *
-					sysctl_vfs_cache_pressure;
+	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker icache_shrinker = {
@@ -649,7 +643,7 @@ static inline void
 __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
 			struct inode *inode)
 {
-	atomic_inc(&inodes_stat.nr_inodes);
+	inodes_stat.nr_inodes++;
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	spin_unlock(&sb_inode_list_lock);
 	if (b) {
@@ -1325,9 +1319,9 @@ static void iput_final(struct inode *inode)
 		if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
 			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, &inode_unused);
+			inodes_stat.nr_unused++;
 			spin_unlock(&wb_inode_list_lock);
 		}
-		atomic_inc(&inodes_stat.nr_unused);
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&sb_inode_list_lock);
@@ -1347,16 +1341,16 @@ static void iput_final(struct inode *inode)
 	if (!list_empty(&inode->i_list)) {
 		spin_lock(&wb_inode_list_lock);
 		list_del_init(&inode->i_list);
-		spin_unlock(&wb_inode_list_lock);
 		if (!inode->i_state)
-			atomic_dec(&inodes_stat.nr_unused);
+			inodes_stat.nr_unused--;
+		spin_unlock(&wb_inode_list_lock);
 	}
 	list_del_init(&inode->i_sb_list);
+	inodes_stat.nr_inodes--;
 	spin_unlock(&sb_inode_list_lock);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
 	spin_unlock(&inode->i_lock);
-	atomic_dec(&inodes_stat.nr_inodes);
 	evict(inode);
 
 	/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 096a5eb..3a43313 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -32,6 +32,12 @@
 #define SEEK_END	2	/* seek relative to end of file */
 #define SEEK_MAX	SEEK_END
 
+struct inodes_stat_t {
+	int nr_inodes;
+	int nr_unused;
+	int dummy[5];		/* padding for sysctl ABI compatibility */
+};
+
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
 	int nr_files;		/* read only */
@@ -410,12 +416,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private, int ret,
 			bool is_async);
 
-struct inodes_stat_t {
-	atomic_t nr_inodes;
-	atomic_t nr_unused;
-	int dummy[5];		/* padding for sysctl ABI compatibility */
-};
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
-- 
1.7.1

  parent reply	other threads:[~2010-09-29 12:18 UTC|newest]

Thread overview: 111+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-29 12:18 [PATCH 0/17] fs: Inode cache scalability Dave Chinner
2010-09-29 12:18 ` [PATCH 01/17] kernel: add bl_list Dave Chinner
2010-09-30  4:52   ` Andrew Morton
2010-10-16  7:55     ` Nick Piggin
2010-10-16 16:28       ` Christoph Hellwig
2010-10-01  5:48   ` Christoph Hellwig
2010-09-29 12:18 ` [PATCH 02/17] fs: icache lock s_inodes list Dave Chinner
2010-10-01  5:49   ` Christoph Hellwig
2010-10-16  7:54     ` Nick Piggin
2010-10-16 16:12       ` Christoph Hellwig
2010-10-16 17:09         ` Nick Piggin
2010-10-17  0:42           ` Christoph Hellwig
2010-10-17  2:03             ` Nick Piggin
2010-09-29 12:18 ` [PATCH 03/17] fs: icache lock inode hash Dave Chinner
2010-09-30  4:52   ` Andrew Morton
2010-09-30  6:13     ` Dave Chinner
2010-10-01  6:06   ` Christoph Hellwig
2010-10-16  7:57     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 04/17] fs: icache lock i_state Dave Chinner
2010-10-01  5:54   ` Christoph Hellwig
2010-10-16  7:54     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 05/17] fs: icache lock i_count Dave Chinner
2010-09-30  4:52   ` Andrew Morton
2010-10-01  5:55     ` Christoph Hellwig
2010-10-01  6:04       ` Andrew Morton
2010-10-01  6:16         ` Christoph Hellwig
2010-10-01  6:23           ` Andrew Morton
2010-09-29 12:18 ` [PATCH 06/17] fs: icache lock lru/writeback lists Dave Chinner
2010-09-30  4:52   ` Andrew Morton
2010-09-30  6:16     ` Dave Chinner
2010-10-16  7:55     ` Nick Piggin
2010-10-01  6:01   ` Christoph Hellwig
2010-10-05 22:30     ` Dave Chinner
2010-09-29 12:18 ` [PATCH 07/17] fs: icache atomic inodes_stat Dave Chinner
2010-09-30  4:52   ` Andrew Morton
2010-09-30  6:20     ` Dave Chinner
2010-09-30  6:37       ` Andrew Morton
2010-10-16  7:56     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 08/17] fs: icache protect inode state Dave Chinner
2010-10-01  6:02   ` Christoph Hellwig
2010-10-16  7:54     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 09/17] fs: Make last_ino, iunique independent of inode_lock Dave Chinner
2010-09-30  4:53   ` Andrew Morton
2010-10-01  6:08   ` Christoph Hellwig
2010-10-16  7:54     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 10/17] fs: icache remove inode_lock Dave Chinner
2010-09-29 12:18 ` [PATCH 11/17] fs: Factor inode hash operations into functions Dave Chinner
2010-10-01  6:06   ` Christoph Hellwig
2010-10-16  7:54     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 12/17] fs: Introduce per-bucket inode hash locks Dave Chinner
2010-09-30  1:52   ` Christoph Hellwig
2010-09-30  2:43     ` Dave Chinner
2010-10-16  7:55     ` Nick Piggin
2010-09-29 12:18 ` [PATCH 13/17] fs: Implement lazy LRU updates for inodes Dave Chinner
2010-09-30  2:05   ` Christoph Hellwig
2010-10-16  7:54     ` Nick Piggin
2010-09-29 12:18 ` Dave Chinner [this message]
2010-09-29 12:18 ` [PATCH 15/17] fs: inode per-cpu last_ino allocator Dave Chinner
2010-09-30  2:07   ` Christoph Hellwig
2010-10-06  6:29     ` Dave Chinner
2010-10-06  8:51       ` Christoph Hellwig
2010-09-30  4:53   ` Andrew Morton
2010-09-30  5:36     ` Eric Dumazet
2010-09-30  7:53       ` Eric Dumazet
2010-09-30  8:14         ` Andrew Morton
2010-09-30 10:22           ` [PATCH] " Eric Dumazet
2010-09-30 16:45             ` Andrew Morton
2010-09-30 17:28               ` Eric Dumazet
2010-09-30 17:39                 ` Andrew Morton
2010-09-30 18:05                   ` Eric Dumazet
2010-10-01  6:12                 ` Christoph Hellwig
2010-10-01  6:45                   ` Eric Dumazet
2010-10-16  6:36                 ` Nick Piggin
2010-10-16  6:40                   ` Nick Piggin
2010-09-29 12:18 ` [PATCH 16/17] fs: Convert nr_inodes to a per-cpu counter Dave Chinner
2010-09-30  2:12   ` Christoph Hellwig
2010-09-30  4:53   ` Andrew Morton
2010-09-30  6:10     ` Dave Chinner
2010-10-16  7:55       ` Nick Piggin
2010-10-16  8:29         ` Eric Dumazet
2010-10-16  9:07           ` Andrew Morton
2010-10-16  9:31             ` Eric Dumazet
2010-10-16 14:19               ` [PATCH] percpu_counter : add percpu_counter_add_fast() Eric Dumazet
2010-10-18 15:24                 ` Christoph Lameter
2010-10-18 15:39                   ` Eric Dumazet
2010-10-18 16:12                     ` Christoph Lameter
2010-10-21 22:37                 ` Andrew Morton
2010-10-21 23:10                   ` Christoph Lameter
2010-10-22  0:45                     ` Andrew Morton
2010-10-22  1:55                       ` Andrew Morton
2010-10-22  1:58                         ` Nick Piggin
2010-10-22  2:14                           ` Andrew Morton
2010-10-22  4:12                       ` Eric Dumazet
2010-10-21 22:43                 ` Andrew Morton
2010-10-21 22:58                   ` Eric Dumazet
2010-10-21 23:18                     ` Andrew Morton
2010-10-21 23:22                       ` Eric Dumazet
2010-10-21 22:31               ` [PATCH 16/17] fs: Convert nr_inodes to a per-cpu counter Andrew Morton
2010-10-21 22:58                 ` Eric Dumazet
2010-10-02 16:02     ` Christoph Hellwig
2010-09-29 12:18 ` [PATCH 17/17] fs: Clean up inode reference counting Dave Chinner
2010-09-30  2:15   ` Christoph Hellwig
2010-10-16  7:55     ` Nick Piggin
2010-10-16 16:14       ` Christoph Hellwig
2010-10-16 17:09         ` Nick Piggin
2010-09-30  4:53   ` Andrew Morton
2010-09-29 23:57 ` [PATCH 0/17] fs: Inode cache scalability Christoph Hellwig
2010-09-30  0:24   ` Dave Chinner
2010-09-30  2:21 ` Christoph Hellwig
2010-10-02 23:10 ` Carlos Carvalho
2010-10-04  7:22   ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1285762729-17928-15-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).