* [PATCH 12/18] fs: add a per-superblock lock for the inode list
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
To allow removal of the inode_lock, we first need to protect the
superblock inode list with its own lock instead of using the
inode_lock. Add a lock to the superblock to protect this list and
nest the new lock inside the inode_lock around the list operations
it needs to protect.
Based on a patch originally from Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/drop_caches.c | 4 ++++
fs/fs-writeback.c | 4 ++++
fs/inode.c | 22 +++++++++++++++++++---
fs/notify/inode_mark.c | 3 +++
fs/quota/dquot.c | 6 ++++++
fs/super.c | 1 +
include/linux/fs.h | 1 +
7 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c4f3e06..c808ca8 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -17,18 +17,22 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
struct inode *inode, *toput_inode = NULL;
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
continue;
if (inode->i_mapping->nrpages == 0)
continue;
iref_locked(inode);
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
}
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
iput(toput_inode);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d63ab47..29f8032 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1026,6 +1026,7 @@ static void wait_sb_inodes(struct super_block *sb)
WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
/*
* Data integrity sync. Must wait for all pages under writeback,
@@ -1043,6 +1044,7 @@ static void wait_sb_inodes(struct super_block *sb)
if (mapping->nrpages == 0)
continue;
iref_locked(inode);
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
/*
* We hold a reference to 'inode' so it couldn't have
@@ -1060,7 +1062,9 @@ static void wait_sb_inodes(struct super_block *sb)
cond_resched();
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
}
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
iput(old_inode);
}
diff --git a/fs/inode.c b/fs/inode.c
index 3c07719..e6bb36d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -33,13 +33,18 @@
* i_ref
* inode_hash_bucket lock protects:
* inode hash table, i_hash
+ * sb inode lock protects:
+ * s_inodes, i_sb_list
*
* Lock orders
* inode_lock
* inode hash bucket lock
* inode->i_lock
+ *
+ * inode_lock
+ * sb inode lock
+ * inode->i_lock
*/
-
/*
* This is needed for the following functions:
* - inode_has_buffers
@@ -488,7 +493,9 @@ static void dispose_list(struct list_head *head)
spin_lock(&inode_lock);
__remove_inode_hash(inode);
+ spin_lock(&inode->i_sb->s_inodes_lock);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&inode->i_sb->s_inodes_lock);
spin_unlock(&inode_lock);
wake_up_inode(inode);
@@ -499,7 +506,8 @@ static void dispose_list(struct list_head *head)
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_list(struct super_block *sb, struct list_head *head,
+ struct list_head *dispose)
{
struct list_head *next;
int busy = 0;
@@ -516,6 +524,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
* shrink_icache_memory() away.
*/
cond_resched_lock(&inode_lock);
+ cond_resched_lock(&sb->s_inodes_lock);
next = next->next;
if (tmp == head)
@@ -555,8 +564,10 @@ int invalidate_inodes(struct super_block *sb)
down_write(&iprune_sem);
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
fsnotify_unmount_inodes(&sb->s_inodes);
- busy = invalidate_list(&sb->s_inodes, &throw_away);
+ busy = invalidate_list(sb, &sb->s_inodes, &throw_away);
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
dispose_list(&throw_away);
@@ -753,7 +764,9 @@ static inline void
__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
struct inode *inode)
{
+ spin_lock(&sb->s_inodes_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
+ spin_unlock(&sb->s_inodes_lock);
if (b) {
spin_lock_bucket(b);
hlist_bl_add_head(&inode->i_hash, &b->head);
@@ -1397,7 +1410,10 @@ static void iput_final(struct inode *inode)
percpu_counter_dec(&nr_inodes_unused);
}
+ spin_lock(&sb->s_inodes_lock);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb->s_inodes_lock);
+
spin_unlock(&inode_lock);
evict(inode);
remove_inode_hash(inode);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 2fe319b..3389ff0 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -242,6 +242,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
+ struct super_block *sb = inode->i_sb;
/*
* We cannot iref() an inode in state I_FREEING,
@@ -288,6 +289,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
if (need_iput_tmp)
@@ -301,5 +303,6 @@ void fsnotify_unmount_inodes(struct list_head *list)
iput(inode);
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
}
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5199418..b7cbc41 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -897,6 +897,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
#endif
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
continue;
@@ -910,6 +911,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
continue;
iref_locked(inode);
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
iput(old_inode);
@@ -921,7 +923,9 @@ static void add_dquot_ref(struct super_block *sb, int type)
* keep the reference and iput it later. */
old_inode = inode;
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
}
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
iput(old_inode);
@@ -1004,6 +1008,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
int reserved = 0;
spin_lock(&inode_lock);
+ spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/*
* We have to scan also I_NEW inodes because they can already
@@ -1017,6 +1022,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
remove_inode_dquot_ref(inode, type, tofree_head);
}
}
+ spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
diff --git a/fs/super.c b/fs/super.c
index 8819e3a..d826214 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,6 +76,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
+ spin_lock_init(&(s->s_inodes_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
/*
* The locking rules for s_lock are up to the
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 34f983f..54c4e86 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1342,6 +1342,7 @@ struct super_block {
#endif
const struct xattr_handler **s_xattr;
+ spinlock_t s_inodes_lock; /* lock for s_inodes */
struct list_head s_inodes; /* all inodes */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
#ifdef CONFIG_SMP
--
1.7.1
^ permalink raw reply related
* [PATCH 09/18] fs: rework icount to be a locked variable
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The inode reference count is currently an atomic variable so that it can be
sampled/modified outside the inode_lock. However, the inode_lock is still
needed to synchronise the final reference count and checks against the inode
state.
To avoid needing the protection of the inode lock, protect the inode reference
count with the per-inode i_lock and convert it to a normal variable. To avoid
existing out-of-tree code accidentally compiling against the new method, rename
the i_count field to i_ref. This is relatively straight forward as there
are limited external references to the i_count field remaining.
Based on work originally from Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/btrfs/inode.c | 8 ++++-
fs/inode.c | 83 ++++++++++++++++++++++++++++++++++++-----------
fs/nfs/nfs4state.c | 2 +-
fs/nilfs2/mdt.c | 2 +-
fs/notify/inode_mark.c | 16 ++++++---
include/linux/fs.h | 2 +-
6 files changed, 84 insertions(+), 29 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2953e9f..9f04478 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1964,8 +1964,14 @@ void btrfs_add_delayed_iput(struct inode *inode)
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct delayed_iput *delayed;
- if (atomic_add_unless(&inode->i_count, -1, 1))
+ /* XXX: filesystems should not play refcount games like this */
+ spin_lock(&inode->i_lock);
+ if (inode->i_ref > 1) {
+ inode->i_ref--;
+ spin_unlock(&inode->i_lock);
return;
+ }
+ spin_unlock(&inode->i_lock);
delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
delayed->inode = inode;
diff --git a/fs/inode.c b/fs/inode.c
index b1dc6dc..5c8a3ea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,6 +26,13 @@
#include <linux/posix_acl.h>
/*
+ * Locking rules.
+ *
+ * inode->i_lock protects:
+ * i_ref
+ */
+
+/*
* This is needed for the following functions:
* - inode_has_buffers
* - invalidate_inode_buffers
@@ -64,9 +71,9 @@ static unsigned int i_hash_shift __read_mostly;
* Each inode can be on two separate lists. One is
* the hash list of the inode, used for lookups. The
* other linked list is the "type" list:
- * "in_use" - valid inode, i_count > 0, i_nlink > 0
+ * "in_use" - valid inode, i_ref > 0, i_nlink > 0
* "dirty" - as "in_use" but also dirty
- * "unused" - valid inode, i_count = 0
+ * "unused" - valid inode, i_ref = 0
*
* A "dirty" list is maintained for each super block,
* allowing for low-overhead inode sync() operations.
@@ -164,7 +171,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
+ inode->i_ref = 1;
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
@@ -313,31 +320,38 @@ static void init_once(void *foo)
inode_init_once(inode);
}
+
+/*
+ * inode_lock must be held
+ */
+void iref_locked(struct inode *inode)
+{
+ inode->i_ref++;
+}
EXPORT_SYMBOL_GPL(iref_locked);
void iref(struct inode *inode)
{
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(iref);
/*
- * inode_lock must be held
- */
-void iref_locked(struct inode *inode)
-{
- atomic_inc(&inode->i_count);
-}
-
-/*
* Nobody outside of core code should really be looking at the inode reference
* count. Please don't add new users of this function.
*/
int iref_read(struct inode *inode)
{
- return atomic_read(&inode->i_count);
+ int ref;
+
+ spin_lock(&inode->i_lock);
+ ref = inode->i_ref;
+ spin_unlock(&inode->i_lock);
+ return ref;
}
EXPORT_SYMBOL_GPL(iref_read);
@@ -425,7 +439,9 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
if (inode->i_state & I_NEW)
continue;
invalidate_inode_buffers(inode);
- if (!atomic_read(&inode->i_count)) {
+ spin_lock(&inode->i_lock);
+ if (!inode->i_ref) {
+ spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, dispose);
list_del_init(&inode->i_io);
WARN_ON(inode->i_state & I_NEW);
@@ -433,6 +449,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
percpu_counter_dec(&nr_inodes_unused);
continue;
}
+ spin_unlock(&inode->i_lock);
busy = 1;
}
return busy;
@@ -470,7 +487,7 @@ static int can_unuse(struct inode *inode)
return 0;
if (inode_has_buffers(inode))
return 0;
- if (atomic_read(&inode->i_count))
+ if (iref_read(inode))
return 0;
if (inode->i_data.nrpages)
return 0;
@@ -506,19 +523,22 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_unused.prev, struct inode, i_lru);
- if (atomic_read(&inode->i_count) ||
- (inode->i_state & ~I_REFERENCED)) {
+ spin_lock(&inode->i_lock);
+ if (inode->i_ref || (inode->i_state & ~I_REFERENCED)) {
+ spin_unlock(&inode->i_lock);
list_del_init(&inode->i_lru);
percpu_counter_dec(&nr_inodes_unused);
continue;
}
if (inode->i_state & I_REFERENCED) {
+ spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, &inode_unused);
inode->i_state &= ~I_REFERENCED;
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
@@ -535,7 +555,8 @@ static void prune_icache(int nr_to_scan)
list_move(&inode->i_lru, &inode_unused);
continue;
}
- }
+ } else
+ spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, &freeable);
list_del_init(&inode->i_io);
WARN_ON(inode->i_state & I_NEW);
@@ -788,7 +809,9 @@ static struct inode *get_new_inode(struct super_block *sb,
* us. Use the old inode instead of the one we just
* allocated.
*/
+ spin_lock(&old->i_lock);
iref_locked(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -835,7 +858,9 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
* us. Use the old inode instead of the one we just
* allocated.
*/
+ spin_lock(&old->i_lock);
iref_locked(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -887,9 +912,11 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
spin_lock(&inode_lock);
- if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+ if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
+ spin_lock(&inode->i_lock);
iref_locked(inode);
- else
+ spin_unlock(&inode->i_lock);
+ } else
/*
* Handle the case where s_op->clear_inode is not been
* called yet, and somebody is calling igrab
@@ -929,7 +956,9 @@ static struct inode *ifind(struct super_block *sb,
spin_lock(&inode_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
+ spin_lock(&inode->i_lock);
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
@@ -962,7 +991,9 @@ static struct inode *ifind_fast(struct super_block *sb,
spin_lock(&inode_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
+ spin_lock(&inode->i_lock);
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
@@ -1145,7 +1176,9 @@ int insert_inode_locked(struct inode *inode)
spin_unlock(&inode_lock);
return 0;
}
+ spin_lock(&old->i_lock);
iref_locked(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
@@ -1184,7 +1217,9 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
spin_unlock(&inode_lock);
return 0;
}
+ spin_lock(&old->i_lock);
iref_locked(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
@@ -1324,8 +1359,16 @@ void iput(struct inode *inode)
if (inode) {
BUG_ON(inode->i_state & I_CLEAR);
- if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
+ spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
+ inode->i_ref--;
+ if (inode->i_ref == 0) {
+ spin_unlock(&inode->i_lock);
iput_final(inode);
+ return;
+ }
+ spin_unlock(&inode->i_lock);
+ spin_lock(&inode_lock);
}
}
EXPORT_SYMBOL(iput);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3e2f19b..d7fc5d0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -506,8 +506,8 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
state->owner = owner;
atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states);
- state->inode = igrab(inode);
spin_unlock(&inode->i_lock);
+ state->inode = igrab(inode);
/* Note: The reclaim code dictates that we add stateless
* and read-only stateids to the end of the list */
list_add_tail(&state->open_states, &owner->so_states);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 2ee524f..435ba11 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -480,7 +480,7 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
inode->i_sb = sb; /* sb may be NULL for some meta data files */
inode->i_blkbits = nilfs->ns_blocksize_bits;
inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
+ inode->i_ref = 1;
inode->i_nlink = 1;
inode->i_ino = ino;
inode->i_mode = S_IFREG;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 6c54e02..2fe319b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -257,7 +257,8 @@ void fsnotify_unmount_inodes(struct list_head *list)
* actually evict all unreferenced inodes from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- if (!iref_read(inode))
+ spin_lock(&inode->i_lock);
+ if (!inode->i_ref)
continue;
need_iput_tmp = need_iput;
@@ -268,12 +269,17 @@ void fsnotify_unmount_inodes(struct list_head *list)
iref_locked(inode);
else
need_iput_tmp = NULL;
+ spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */
- if ((&next_i->i_sb_list != list) && iref_read(inode) &&
- !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
- iref_locked(next_i);
- need_iput = next_i;
+ if (&next_i->i_sb_list != list) {
+ spin_lock(&next_i->i_lock);
+ if (inode->i_ref &&
+ !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
+ iref_locked(next_i);
+ need_iput = next_i;
+ }
+ spin_unlock(&next_i->i_lock);
}
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6f0df2a..1162c10 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -730,7 +730,7 @@ struct inode {
struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
- atomic_t i_count;
+ unsigned int i_ref;
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
--
1.7.1
^ permalink raw reply related
* [PATCH 08/18] fs: add inode reference coutn read accessor
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
To remove most of the remaining direct references to the inode
reference count, add an iref_read() accessor function to read the
current reference count. New users of this function should be
frowned upon, as there is rarely a good reason for looking at the
current reference count.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
arch/powerpc/platforms/cell/spufs/file.c | 2 +-
drivers/staging/pohmelfs/inode.c | 10 +++++-----
fs/btrfs/inode.c | 6 +++---
fs/ceph/mds_client.c | 2 +-
fs/cifs/inode.c | 2 +-
fs/ext3/ialloc.c | 4 ++--
fs/ext4/ialloc.c | 4 ++--
fs/fs-writeback.c | 2 +-
fs/hpfs/inode.c | 2 +-
fs/inode.c | 10 ++++++++++
fs/locks.c | 2 +-
fs/logfs/readwrite.c | 2 +-
fs/nfs/inode.c | 4 ++--
fs/notify/inode_mark.c | 11 +++++------
fs/reiserfs/stree.c | 2 +-
fs/smbfs/inode.c | 2 +-
fs/ubifs/super.c | 2 +-
fs/xfs/linux-2.6/xfs_trace.h | 2 +-
fs/xfs/xfs_inode.h | 2 +-
include/linux/fs.h | 1 +
20 files changed, 42 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 1a40da9..2e4263c 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1549,7 +1549,7 @@ static int spufs_mfc_open(struct inode *inode, struct file *file)
if (ctx->owner != current->mm)
return -EINVAL;
- if (atomic_read(&inode->i_count) != 1)
+ if (iref_read(inode) != 1)
return -EBUSY;
mutex_lock(&ctx->mapping_lock);
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 97dae29..d8a308d 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1289,11 +1289,11 @@ static void pohmelfs_put_super(struct super_block *sb)
dprintk("%s: ino: %llu, pi: %p, inode: %p, count: %u.\n",
__func__, pi->ino, pi, inode, count);
- if (atomic_read(&inode->i_count) != count) {
+ if (iref_read(inode) != count) {
printk("%s: ino: %llu, pi: %p, inode: %p, count: %u, i_count: %d.\n",
__func__, pi->ino, pi, inode, count,
- atomic_read(&inode->i_count));
- count = atomic_read(&inode->i_count);
+ iref_read(inode));
+ count = iref_read(inode);
in_drop_list++;
}
@@ -1305,7 +1305,7 @@ static void pohmelfs_put_super(struct super_block *sb)
pi = POHMELFS_I(inode);
dprintk("%s: ino: %llu, pi: %p, inode: %p, i_count: %u.\n",
- __func__, pi->ino, pi, inode, atomic_read(&inode->i_count));
+ __func__, pi->ino, pi, inode, iref_read(inode));
/*
* These are special inodes, they were created during
@@ -1313,7 +1313,7 @@ static void pohmelfs_put_super(struct super_block *sb)
* so they live here with reference counter being 1 and prevent
* umount from succeed since it believes that they are busy.
*/
- count = atomic_read(&inode->i_count);
+ count = iref_read(inode);
if (count) {
list_del_init(&inode->i_sb_list);
while (count--)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0c3a35b..2953e9f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2718,10 +2718,10 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
return ERR_PTR(-ENOSPC);
/* check if there is someone else holds reference */
- if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
+ if (S_ISDIR(inode->i_mode) && iref_read(inode) > 1)
return ERR_PTR(-ENOSPC);
- if (atomic_read(&inode->i_count) > 2)
+ if (iref_read(inode) > 2)
return ERR_PTR(-ENOSPC);
if (xchg(&root->fs_info->enospc_unlink, 1))
@@ -3939,7 +3939,7 @@ again:
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
- if (atomic_read(&inode->i_count) > 1)
+ if (iref_read(inode) > 1)
d_prune_aliases(inode);
/*
* btrfs_drop_inode will have it removed from
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fad95f8..b6d0ef1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1102,7 +1102,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
spin_unlock(&inode->i_lock);
d_prune_aliases(inode);
dout("trim_caps_cb %p cap %p pruned, count now %d\n",
- inode, cap, atomic_read(&inode->i_count));
+ inode, cap, iref_read(inode));
return 0;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 63a0bdb..74cb762 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1641,7 +1641,7 @@ int cifs_revalidate_dentry(struct dentry *dentry)
}
cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
- "jiffies %ld", full_path, inode, inode->i_count.counter,
+ "jiffies %ld", full_path, inode, iref_read(inode),
dentry, dentry->d_time, jiffies);
if (CIFS_SB(sb)->tcon->unix_ext)
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4ab72db..64669aa 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -100,9 +100,9 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
struct ext3_sb_info *sbi;
int fatal = 0, err;
- if (atomic_read(&inode->i_count) > 1) {
+ if (iref_read(inode) > 1) {
printk ("ext3_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ iref_read(inode));
return;
}
if (inode->i_nlink) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0..38ac6e5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -189,9 +189,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
- if (atomic_read(&inode->i_count) > 1) {
+ if (iref_read(inode) > 1) {
printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ iref_read(inode));
return;
}
if (inode->i_nlink) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1bf8a28..ec7a689 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -315,7 +315,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
unsigned dirty;
int ret;
- if (!atomic_read(&inode->i_count))
+ if (!iref_read(inode))
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
else
WARN_ON(inode->i_state & I_WILL_FREE);
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f0da1..05b5d79 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -183,7 +183,7 @@ void hpfs_write_inode(struct inode *i)
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
struct inode *parent;
if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return;
- if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) {
+ if (hpfs_inode->i_rddir_off && !iref_read(i)) {
if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n");
kfree(hpfs_inode->i_rddir_off);
hpfs_inode->i_rddir_off = NULL;
diff --git a/fs/inode.c b/fs/inode.c
index aa66e07..b1dc6dc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -331,6 +331,16 @@ void iref_locked(struct inode *inode)
atomic_inc(&inode->i_count);
}
+/*
+ * Nobody outside of core code should really be looking at the inode reference
+ * count. Please don't add new users of this function.
+ */
+int iref_read(struct inode *inode)
+{
+ return atomic_read(&inode->i_count);
+}
+EXPORT_SYMBOL_GPL(iref_read);
+
void end_writeback(struct inode *inode)
{
might_sleep();
diff --git a/fs/locks.c b/fs/locks.c
index ab24d49..cbf3114 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1376,7 +1376,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
goto out;
if ((arg == F_WRLCK)
&& ((atomic_read(&dentry->d_count) > 1)
- || (atomic_read(&inode->i_count) > 1)))
+ || (iref_read(inode) > 1)))
goto out;
}
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf..8beb842 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1002,7 +1002,7 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
{
struct logfs_inode *li = logfs_inode(inode);
- if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
+ if ((inode->i_nlink == 0) && iref_read(inode) == 1)
return 0;
if (bix < I0_BLOCKS)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 886be68..387f4dc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -385,7 +385,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
- atomic_read(&inode->i_count));
+ iref_read(inode));
out:
return inode;
@@ -1191,7 +1191,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
- atomic_read(&inode->i_count), fattr->valid);
+ iref_read(inode), fattr->valid);
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
goto out_fileid;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 8096a9e..6c54e02 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -252,12 +252,12 @@ void fsnotify_unmount_inodes(struct list_head *list)
continue;
/*
- * If i_count is zero, the inode cannot have any watches and
- * doing an iref/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
+ * If the inode is not referenced, the inode cannot have any
+ * watches and doing an iref/iput with MS_ACTIVE clear would
+ * actually evict all unreferenced inodes from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- if (!atomic_read(&inode->i_count))
+ if (!iref_read(inode))
continue;
need_iput_tmp = need_iput;
@@ -270,8 +270,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
need_iput_tmp = NULL;
/* In case the dropping of a reference would nuke next_i. */
- if ((&next_i->i_sb_list != list) &&
- atomic_read(&next_i->i_count) &&
+ if ((&next_i->i_sb_list != list) && iref_read(inode) &&
!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
iref_locked(next_i);
need_iput = next_i;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 313d39d..55c3ad3 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1477,7 +1477,7 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
** reading in the last block. The user will hit problems trying to
** read the file, but for now we just skip the indirect2direct
*/
- if (atomic_read(&inode->i_count) > 1 ||
+ if (iref_read(inode) > 1 ||
!tail_has_to_be_packed(inode) ||
!page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
/* leave tail in an unformatted node */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 450c919..792593b 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -320,7 +320,7 @@ out:
}
/*
- * This routine is called when i_nlink == 0 and i_count goes to 0.
+ * This routine is called when i_nlink == 0 and the reference count goes to 0.
* All blocking cleanup operations need to go here to avoid races.
*/
static void
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 45888fb..a1b109c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -342,7 +342,7 @@ static void ubifs_evict_inode(struct inode *inode)
goto out;
dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
- ubifs_assert(!atomic_read(&inode->i_count));
+ ubifs_assert(!iref_read(inode));
truncate_inode_pages(&inode->i_data, 0);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index be5dffd..c3940ab 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -599,7 +599,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->count = atomic_read(&VFS_I(ip)->i_count);
+ __entry->count = iref_read(VFS_I(ip));
__entry->pincount = atomic_read(&ip->i_pincount);
__entry->caller_ip = caller_ip;
),
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cbb4791..5000660 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -481,7 +481,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *);
#define IHOLD(ip) \
do { \
- ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
+ ASSERT(iref_read(VFS_I(ip)) > 0) ; \
iref(VFS_I(ip)); \
trace_xfs_ihold(ip, _THIS_IP_); \
} while (0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2e971f2..6f0df2a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2186,6 +2186,7 @@ extern void unlock_new_inode(struct inode *);
extern void iref(struct inode *inode);
extern void iref_locked(struct inode *inode);
+extern int iref_read(struct inode *inode);
extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
extern void destroy_inode(struct inode *);
--
1.7.1
^ permalink raw reply related
* [PATCH 13/18] fs: split locking of inode writeback and LRU lists
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Now that the inode LRU and IO lists are split apart, we can separate
the locking for them. The IO lists are only ever accessed in the
context of writeback, so a per-BDI lock for those lists separates
them out nicely.
For the inode LRU, introduce a simple global lock to protect it.
While this could be made per-sb, it is unclear yet as to what is the
next steps for optimising/parallelising reclaim of inodes. Rather
than optimise now, leave it as a global list and lock until further
analysis canbe done.
Based on a patch originally from Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 48 +++++++++++++-------
fs/inode.c | 101 ++++++++++++++++++++++++++++++++++--------
fs/internal.h | 6 +++
fs/super.c | 2 +-
include/linux/backing-dev.h | 1 +
include/linux/writeback.h | 12 ++++-
mm/backing-dev.c | 21 +++++++++
7 files changed, 150 insertions(+), 41 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 29f8032..49d44cc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -69,16 +69,6 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return test_bit(BDI_writeback_running, &bdi->state);
}
-static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
-
- if (strcmp(sb->s_type->name, "bdev") == 0)
- return inode->i_mapping->a_bdi;
-
- return sb->s_bdi;
-}
-
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
@@ -169,6 +159,7 @@ static void redirty_tail(struct inode *inode)
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+ assert_spin_locked(&wb->b_lock);
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -186,6 +177,7 @@ static void requeue_io(struct inode *inode)
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+ assert_spin_locked(&wb->b_lock);
list_move(&inode->i_io, &wb->b_more_io);
}
@@ -268,6 +260,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
*/
static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
{
+ assert_spin_locked(&wb->b_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
}
@@ -311,6 +304,7 @@ static void inode_wait_for_writeback(struct inode *inode)
static int
writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
{
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
unsigned dirty;
int ret;
@@ -330,7 +324,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* completed a full scan of b_io.
*/
if (wbc->sync_mode != WB_SYNC_ALL) {
+ spin_lock(&bdi->wb.b_lock);
requeue_io(inode);
+ spin_unlock(&bdi->wb.b_lock);
return 0;
}
@@ -385,6 +381,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* sometimes bales out without doing anything.
*/
inode->i_state |= I_DIRTY_PAGES;
+ spin_lock(&bdi->wb.b_lock);
if (wbc->nr_to_write <= 0) {
/*
* slice used up: queue for next turn
@@ -400,6 +397,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
*/
redirty_tail(inode);
}
+ spin_unlock(&bdi->wb.b_lock);
} else if (inode->i_state & I_DIRTY) {
/*
* Filesystems can dirty the inode during writeback
@@ -407,14 +405,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* submission or metadata updates after data IO
* completion.
*/
+ spin_lock(&bdi->wb.b_lock);
redirty_tail(inode);
+ spin_unlock(&bdi->wb.b_lock);
} else {
/* The inode is clean */
+ spin_lock(&bdi->wb.b_lock);
list_del_init(&inode->i_io);
- if (list_empty(&inode->i_lru)) {
- list_add(&inode->i_lru, &inode_unused);
- percpu_counter_inc(&nr_inodes_unused);
- }
+ spin_unlock(&bdi->wb.b_lock);
+ inode_lru_list_add(inode);
}
}
inode_sync_complete(inode);
@@ -460,6 +459,7 @@ static bool pin_sb_for_writeback(struct super_block *sb)
static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
struct writeback_control *wbc, bool only_this_sb)
{
+ assert_spin_locked(&wb->b_lock);
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
@@ -475,7 +475,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
redirty_tail(inode);
continue;
}
-
/*
* The inode belongs to a different superblock.
* Bounce back to the caller to unpin this and
@@ -484,7 +483,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
return 0;
}
- if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+ if (inode->i_state & (I_NEW | I_WILL_FREE | I_FREEING)) {
requeue_io(inode);
continue;
}
@@ -495,8 +494,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
if (inode_dirtied_after(inode, wbc->wb_start))
return 1;
- BUG_ON(inode->i_state & I_FREEING);
+ spin_lock(&inode->i_lock);
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&wb->b_lock);
+
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
if (wbc->pages_skipped != pages_skipped) {
@@ -504,12 +506,15 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
+ spin_lock(&wb->b_lock);
redirty_tail(inode);
+ spin_unlock(&wb->b_lock);
}
spin_unlock(&inode_lock);
iput(inode);
cond_resched();
spin_lock(&inode_lock);
+ spin_lock(&wb->b_lock);
if (wbc->nr_to_write <= 0) {
wbc->more_io = 1;
return 1;
@@ -529,6 +534,8 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
if (!wbc->wb_start)
wbc->wb_start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
+ spin_lock(&wb->b_lock);
+
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
@@ -547,6 +554,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
if (ret)
break;
}
+ spin_unlock(&wb->b_lock);
spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */
}
@@ -557,9 +565,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock);
+ spin_lock(&wb->b_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&wb->b_lock);
spin_unlock(&inode_lock);
}
@@ -672,8 +682,10 @@ static long wb_writeback(struct bdi_writeback *wb,
*/
spin_lock(&inode_lock);
if (!list_empty(&wb->b_more_io)) {
+ spin_lock(&wb->b_lock);
inode = list_entry(wb->b_more_io.prev,
struct inode, i_io);
+ spin_unlock(&wb->b_lock);
trace_wbc_writeback_wait(&wbc, wb->bdi);
inode_wait_for_writeback(inode);
}
@@ -986,8 +998,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
wakeup_bdi = true;
}
+ spin_lock(&bdi->wb.b_lock);
inode->dirtied_when = jiffies;
list_move(&inode->i_io, &bdi->wb.b_dirty);
+ spin_unlock(&bdi->wb.b_lock);
}
}
out:
diff --git a/fs/inode.c b/fs/inode.c
index e6bb36d..4ad7900 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -35,6 +35,10 @@
* inode hash table, i_hash
* sb inode lock protects:
* s_inodes, i_sb_list
+ * bdi writeback lock protects:
+ * b_io, b_more_io, b_dirty, i_io
+ * inode_lru_lock protects:
+ * inode_lru, i_lru
*
* Lock orders
* inode_lock
@@ -43,7 +47,9 @@
*
* inode_lock
* sb inode lock
- * inode->i_lock
+ * inode_lru_lock
+ * wb->b_lock
+ * inode->i_lock
*/
/*
* This is needed for the following functions:
@@ -92,7 +98,8 @@ static unsigned int i_hash_shift __read_mostly;
* allowing for low-overhead inode sync() operations.
*/
-LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
+static DEFINE_SPINLOCK(inode_lru_lock);
struct inode_hash_bucket {
struct hlist_bl_head head;
@@ -383,6 +390,30 @@ int iref_read(struct inode *inode)
}
EXPORT_SYMBOL_GPL(iref_read);
+/*
+ * check against I_FREEING as inode writeback completion could race with
+ * setting the I_FREEING and removing the inode from the LRU.
+ */
+void inode_lru_list_add(struct inode *inode)
+{
+ spin_lock(&inode_lru_lock);
+ if (list_empty(&inode->i_lru) && !(inode->i_state & I_FREEING)) {
+ list_add(&inode->i_lru, &inode_lru);
+ percpu_counter_inc(&nr_inodes_unused);
+ }
+ spin_unlock(&inode_lru_lock);
+}
+
+void inode_lru_list_del(struct inode *inode)
+{
+ spin_lock(&inode_lru_lock);
+ if (!list_empty(&inode->i_lru)) {
+ list_del_init(&inode->i_lru);
+ percpu_counter_dec(&nr_inodes_unused);
+ }
+ spin_unlock(&inode_lru_lock);
+}
+
static unsigned long hash(struct super_block *sb, unsigned long hashval)
{
unsigned long tmp;
@@ -535,11 +566,26 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
invalidate_inode_buffers(inode);
spin_lock(&inode->i_lock);
if (!inode->i_ref) {
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
+
spin_unlock(&inode->i_lock);
- list_move(&inode->i_lru, dispose);
- list_del_init(&inode->i_io);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
+
+
+ /*
+ * move the inode off the IO lists and LRU once
+ * I_FREEING is set so that it won't get moved back on
+ * there if it is dirty.
+ */
+ spin_lock(&bdi->wb.b_lock);
+ list_del_init(&inode->i_io);
+ spin_unlock(&bdi->wb.b_lock);
+
+ spin_lock(&inode_lru_lock);
+ list_move(&inode->i_lru, dispose);
+ spin_unlock(&inode_lru_lock);
+
percpu_counter_dec(&nr_inodes_unused);
continue;
}
@@ -596,7 +642,7 @@ static int can_unuse(struct inode *inode)
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
- * the front of the inode_unused list. So look for it there and if the
+ * the front of the inode_lru list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way.
*
@@ -611,13 +657,15 @@ static void prune_icache(int nr_to_scan)
down_read(&iprune_sem);
spin_lock(&inode_lock);
+ spin_lock(&inode_lru_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
+ struct backing_dev_info *bdi;
- if (list_empty(&inode_unused))
+ if (list_empty(&inode_lru))
break;
- inode = list_entry(inode_unused.prev, struct inode, i_lru);
+ inode = list_entry(inode_lru.prev, struct inode, i_lru);
spin_lock(&inode->i_lock);
if (inode->i_ref || (inode->i_state & ~I_REFERENCED)) {
@@ -628,19 +676,21 @@ static void prune_icache(int nr_to_scan)
}
if (inode->i_state & I_REFERENCED) {
spin_unlock(&inode->i_lock);
- list_move(&inode->i_lru, &inode_unused);
+ list_move(&inode->i_lru, &inode_lru);
inode->i_state &= ~I_REFERENCED;
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
iref_locked(inode);
spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lru_lock);
spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
spin_lock(&inode_lock);
+ spin_lock(&inode_lru_lock);
/*
* if we can't reclaim this inod immediately, give it
@@ -648,21 +698,32 @@ static void prune_icache(int nr_to_scan)
* on it.
*/
if (!can_unuse(inode)) {
- list_move(&inode->i_lru, &inode_unused);
+ list_move(&inode->i_lru, &inode_lru);
continue;
}
} else
spin_unlock(&inode->i_lock);
- list_move(&inode->i_lru, &freeable);
- list_del_init(&inode->i_io);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
+
+ /*
+ * move the inode off the IO lists and LRU once
+ * I_FREEING is set so that it won't get moved back on
+ * there if it is dirty.
+ */
+ bdi = inode_to_bdi(inode);
+ spin_lock(&bdi->wb.b_lock);
+ list_del_init(&inode->i_io);
+ spin_unlock(&bdi->wb.b_lock);
+
+ list_move(&inode->i_lru, &freeable);
percpu_counter_dec(&nr_inodes_unused);
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
+ spin_unlock(&inode_lru_lock);
spin_unlock(&inode_lock);
dispose_list(&freeable);
@@ -1369,6 +1430,7 @@ static void iput_final(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
const struct super_operations *op = inode->i_sb->s_op;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
int drop;
if (op && op->drop_inode)
@@ -1381,8 +1443,7 @@ static void iput_final(struct inode *inode)
inode->i_state |= I_REFERENCED;
if (!(inode->i_state & (I_DIRTY|I_SYNC)) &&
list_empty(&inode->i_lru)) {
- list_add(&inode->i_lru, &inode_unused);
- percpu_counter_inc(&nr_inodes_unused);
+ inode_lru_list_add(inode);
}
spin_unlock(&inode_lock);
return;
@@ -1396,19 +1457,19 @@ static void iput_final(struct inode *inode)
inode->i_state &= ~I_WILL_FREE;
__remove_inode_hash(inode);
}
- list_del_init(&inode->i_io);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
/*
- * We avoid moving dirty inodes back onto the LRU now because I_FREEING
- * is set and hence writeback_single_inode() won't move the inode
+ * move the inode off the IO lists and LRU once I_FREEING is set so
+ * that it won't get moved back on there if it is dirty.
* around.
*/
- if (!list_empty(&inode->i_lru)) {
- list_del_init(&inode->i_lru);
- percpu_counter_dec(&nr_inodes_unused);
- }
+ spin_lock(&bdi->wb.b_lock);
+ list_del_init(&inode->i_io);
+ spin_unlock(&bdi->wb.b_lock);
+
+ inode_lru_list_del(inode);
spin_lock(&sb->s_inodes_lock);
list_del_init(&inode->i_sb_list);
diff --git a/fs/internal.h b/fs/internal.h
index a6910e9..ece3565 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,9 @@ extern void put_super(struct super_block *sb);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+
+/*
+ * inode.c
+ */
+extern void inode_lru_list_add(struct inode *inode);
+extern void inode_lru_list_del(struct inode *inode);
diff --git a/fs/super.c b/fs/super.c
index d826214..c5332e5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,7 +76,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
- spin_lock_init(&(s->s_inodes_lock);
+ spin_lock_init(&s->s_inodes_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
/*
* The locking rules for s_lock are up to the
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 31e1346..5106fc4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -57,6 +57,7 @@ struct bdi_writeback {
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
+ spinlock_t b_lock; /* writeback lists lock */
};
struct backing_dev_info {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f7ed2a0..b182ccc 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,10 +10,7 @@
struct backing_dev_info;
extern spinlock_t inode_lock;
-extern struct list_head inode_unused;
-extern struct percpu_counter nr_inodes;
-extern struct percpu_counter nr_inodes_unused;
/*
* fs/fs-writeback.c
@@ -82,6 +79,15 @@ static inline void inode_sync_wait(struct inode *inode)
TASK_UNINTERRUPTIBLE);
}
+static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (strcmp(sb->s_type->name, "bdev") == 0)
+ return inode->i_mapping->a_bdi;
+
+ return sb->s_bdi;
+}
/*
* mm/page-writeback.c
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a124991..74e8269 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -74,12 +74,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
spin_lock(&inode_lock);
+ spin_lock(&wb->b_lock);
list_for_each_entry(inode, &wb->b_dirty, i_io)
nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_io)
nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_io)
nr_more_io++;
+ spin_unlock(&wb->b_lock);
spin_unlock(&inode_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -634,6 +636,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
INIT_LIST_HEAD(&wb->b_dirty);
INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io);
+ spin_lock_init(&wb->b_lock);
setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
}
@@ -671,6 +674,18 @@ err:
}
EXPORT_SYMBOL(bdi_init);
+static void bdi_lock_two(struct backing_dev_info *bdi1,
+ struct backing_dev_info *bdi2)
+{
+ if (bdi1 < bdi2) {
+ spin_lock(&bdi1->wb.b_lock);
+ spin_lock_nested(&bdi2->wb.b_lock, 1);
+ } else {
+ spin_lock(&bdi2->wb.b_lock);
+ spin_lock_nested(&bdi1->wb.b_lock, 1);
+ }
+}
+
void mapping_set_bdi(struct address_space *mapping,
struct backing_dev_info *bdi)
{
@@ -681,6 +696,7 @@ void mapping_set_bdi(struct address_space *mapping,
return;
spin_lock(&inode_lock);
+ bdi_lock_two(bdi, old);
if (!list_empty(&inode->i_io)) {
struct inode *i;
@@ -709,6 +725,8 @@ void mapping_set_bdi(struct address_space *mapping,
}
found:
mapping->a_bdi = bdi;
+ spin_unlock(&bdi->wb.b_lock);
+ spin_unlock(&old->wb.b_lock);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(mapping_set_bdi);
@@ -726,6 +744,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
struct inode *i, *tmp;
spin_lock(&inode_lock);
+ bdi_lock_two(bdi, &default_backing_dev_info);
list_for_each_entry_safe(i, tmp, &bdi->wb.b_dirty, i_io) {
list_del(&i->i_io);
list_add_tail(&i->i_io, &dst->b_dirty);
@@ -741,6 +760,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
list_add_tail(&i->i_io, &dst->b_more_io);
i->i_mapping->a_bdi = bdi;
}
+ spin_unlock(&bdi->wb.b_lock);
+ spin_unlock(&dst->b_lock);
spin_unlock(&inode_lock);
}
--
1.7.1
^ permalink raw reply related
* [PATCH 03/18] fs: keep inode with backing-dev
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Nick Piggin <npiggin@suse.de>
Having inode on writeback lists of a different bdi than
inode->i_mapping->backing_dev_info makes it very difficult to do
per-bdi locking of the writeback lists. Add functions to move these
inodes over when the mapping backing dev is changed.
Also, rename i_mapping.backing_dev_info to i_mapping.a_bdi while we're
here. Succinct is nice, and it catches conversion errors.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
drivers/char/mem.c | 2 +-
drivers/char/raw.c | 2 +-
drivers/mtd/mtdchar.c | 2 +-
fs/afs/write.c | 6 ++--
fs/block_dev.c | 13 +++++----
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/file.c | 2 +-
fs/btrfs/inode.c | 10 +++---
fs/buffer.c | 2 +-
fs/ceph/addr.c | 2 +-
fs/ceph/inode.c | 4 +-
fs/cifs/file.c | 2 +-
fs/cifs/inode.c | 2 +-
fs/configfs/inode.c | 3 +-
fs/ext2/ialloc.c | 2 +-
fs/fs-writeback.c | 2 +-
fs/fuse/file.c | 6 ++--
fs/fuse/inode.c | 2 +-
fs/gfs2/glock.c | 3 +-
fs/hugetlbfs/inode.c | 3 +-
fs/inode.c | 6 ++--
fs/nfs/inode.c | 3 +-
fs/nfs/write.c | 7 ++---
fs/nilfs2/btnode.c | 2 +-
fs/nilfs2/mdt.c | 2 +-
fs/nilfs2/the_nilfs.c | 2 +-
fs/ntfs/file.c | 2 +-
fs/ocfs2/dlmfs/dlmfs.c | 4 +-
fs/ocfs2/file.c | 2 +-
fs/ramfs/inode.c | 2 +-
fs/romfs/super.c | 4 +-
fs/sysfs/inode.c | 2 +-
fs/ubifs/dir.c | 2 +-
fs/ubifs/super.c | 2 +-
fs/xfs/linux-2.6/xfs_buf.c | 4 +-
fs/xfs/linux-2.6/xfs_file.c | 2 +-
include/linux/backing-dev.h | 16 ++++++++---
include/linux/fs.h | 2 +-
kernel/cgroup.c | 2 +-
mm/backing-dev.c | 61 ++++++++++++++++++++++++++++++++++++++++--
mm/fadvise.c | 4 +-
mm/filemap.c | 4 +-
mm/filemap_xip.c | 2 +-
mm/page-writeback.c | 15 +++++-----
mm/readahead.c | 6 ++--
mm/shmem.c | 2 +-
mm/swap.c | 2 +-
mm/swap_state.c | 2 +-
mm/swapfile.c | 2 +-
mm/truncate.c | 3 +-
mm/vmscan.c | 2 +-
51 files changed, 155 insertions(+), 90 deletions(-)
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 1f528fa..2285c1e 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -872,7 +872,7 @@ static int memory_open(struct inode *inode, struct file *filp)
filp->f_op = dev->fops;
if (dev->dev_info)
- filp->f_mapping->backing_dev_info = dev->dev_info;
+ mapping_set_bdi(filp->f_mapping, dev->dev_info);
if (dev->fops->open)
return dev->fops->open(inode, filp);
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index b38942f..5baa83f 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -109,7 +109,7 @@ static int raw_release(struct inode *inode, struct file *filp)
if (--raw_devices[minor].inuse == 0) {
/* Here inode->i_mapping == bdev->bd_inode->i_mapping */
inode->i_mapping = &inode->i_data;
- inode->i_mapping->backing_dev_info = &default_backing_dev_info;
+ mapping_set_bdi(inode->i_mapping, &default_backing_dev_info);
}
mutex_unlock(&raw_mutex);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index a825002..26af8b1 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -113,7 +113,7 @@ static int mtd_open(struct inode *inode, struct file *file)
if (mtd_ino->i_state & I_NEW) {
mtd_ino->i_private = mtd;
mtd_ino->i_mode = S_IFCHR;
- mtd_ino->i_data.backing_dev_info = mtd->backing_dev_info;
+ mapping_new_set_bdi(&mtd_ino->i_data, mtd->backing_dev_info);
unlock_new_inode(mtd_ino);
}
file->f_mapping = mtd_ino->i_mapping;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 722743b..b321bfc 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -438,7 +438,7 @@ no_more:
*/
int afs_writepage(struct page *page, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = page->mapping->backing_dev_info;
+ struct backing_dev_info *bdi = page->mapping->a_bdi;
struct afs_writeback *wb;
int ret;
@@ -469,7 +469,7 @@ static int afs_writepages_region(struct address_space *mapping,
struct writeback_control *wbc,
pgoff_t index, pgoff_t end, pgoff_t *_next)
{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
struct afs_writeback *wb;
struct page *page;
int ret, n;
@@ -548,7 +548,7 @@ static int afs_writepages_region(struct address_space *mapping,
int afs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
pgoff_t start, end, next;
int ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 50e8c85..ac070d7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -533,7 +533,7 @@ struct block_device *bdget(dev_t dev)
inode->i_bdev = bdev;
inode->i_data.a_ops = &def_blk_aops;
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
- inode->i_data.backing_dev_info = &default_backing_dev_info;
+ mapping_new_set_bdi(&inode->i_data, &default_backing_dev_info);
spin_lock(&bdev_lock);
list_add(&bdev->bd_list, &all_bdevs);
spin_unlock(&bdev_lock);
@@ -1390,7 +1390,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
bdi = &default_backing_dev_info;
- bdev->bd_inode->i_data.backing_dev_info = bdi;
+ mapping_set_bdi(&bdev->bd_inode->i_data, bdi);
}
if (bdev->bd_invalidated)
rescan_partitions(disk, bdev);
@@ -1405,8 +1405,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (ret)
goto out_clear;
bdev->bd_contains = whole;
- bdev->bd_inode->i_data.backing_dev_info =
- whole->bd_inode->i_data.backing_dev_info;
+ mapping_set_bdi(&bdev->bd_inode->i_data,
+ whole->bd_inode->i_data.a_bdi);
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
!bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1439,7 +1439,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_disk = NULL;
bdev->bd_part = NULL;
- bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+ mapping_set_bdi(&bdev->bd_inode->i_data, &default_backing_dev_info);
if (bdev != bdev->bd_contains)
__blkdev_put(bdev->bd_contains, mode, 1);
bdev->bd_contains = NULL;
@@ -1533,7 +1533,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
- bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+ mapping_set_bdi(&bdev->bd_inode->i_data,
+ &default_backing_dev_info);
if (bdev != bdev->bd_contains)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 64f1008..05c3fc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1636,7 +1636,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
*/
fs_info->btree_inode->i_size = OFFSET_MAX;
fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
- fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
+ mapping_new_set_bdi(fs_info->btree_inode->i_mapping, &fs_info->bdi);
RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33..96e3883 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -872,7 +872,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
goto out;
count = ocount;
- current->backing_dev_info = inode->i_mapping->backing_dev_info;
+ current->backing_dev_info = inode->i_mapping->a_bdi;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c038644..c646c0c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2475,7 +2475,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &root->fs_info->bdi);
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
@@ -2490,7 +2490,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
inode->i_mapping->a_ops = &btrfs_symlink_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &root->fs_info->bdi);
break;
default:
inode->i_op = &btrfs_special_inode_operations;
@@ -4705,7 +4705,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
drop_inode = 1;
else {
inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &root->fs_info->bdi);
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
@@ -6699,7 +6699,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
drop_inode = 1;
else {
inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &root->fs_info->bdi);
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
@@ -6739,7 +6739,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &btrfs_symlink_inode_operations;
inode->i_mapping->a_ops = &btrfs_symlink_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &root->fs_info->bdi);
inode_set_bytes(inode, name_len);
btrfs_i_size_write(inode, name_len - 1);
err = btrfs_update_inode(trans, root, inode);
diff --git a/fs/buffer.c b/fs/buffer.c
index 3e7dca2..b5c4153 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3161,7 +3161,7 @@ void block_sync_page(struct page *page)
smp_mb();
mapping = page_mapping(page);
if (mapping)
- blk_run_backing_dev(mapping->backing_dev_info, page);
+ blk_run_backing_dev(mapping->a_bdi, page);
}
EXPORT_SYMBOL(block_sync_page);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index efbc604..448400a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -588,7 +588,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client;
pgoff_t index, start, end;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 62377ec..e427082 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -624,8 +624,8 @@ static int fill_inode(struct inode *inode,
}
inode->i_mapping->a_ops = &ceph_aops;
- inode->i_mapping->backing_dev_info =
- &ceph_sb_to_client(inode->i_sb)->backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping,
+ &ceph_sb_to_client(inode->i_sb)->backing_dev_info);
switch (inode->i_mode & S_IFMT) {
case S_IFIFO:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index de748c6..3673e66 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1337,7 +1337,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
static int cifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
unsigned int bytes_to_write;
unsigned int bytes_written;
struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 53cce8c..63a0bdb 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -802,7 +802,7 @@ retry_iget5_locked:
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
if (S_ISREG(inode->i_mode))
- inode->i_data.backing_dev_info = sb->s_bdi;
+ inode->i_data.a_bdi = sb->s_bdi;
#ifdef CONFIG_CIFS_FSCACHE
/* initialize per-inode cache cookie pointer */
CIFS_I(inode)->fscache = NULL;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cf78d44..40b2bec 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -136,7 +136,8 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
struct inode * inode = new_inode(configfs_sb);
if (inode) {
inode->i_mapping->a_ops = &configfs_aops;
- inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping,
+ &configfs_backing_dev_info);
inode->i_op = &configfs_inode_operations;
if (sd->s_iattr) {
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad70479..29942f0 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -172,7 +172,7 @@ static void ext2_preread_inode(struct inode *inode)
struct ext2_group_desc * gdp;
struct backing_dev_info *bdi;
- bdi = inode->i_mapping->backing_dev_info;
+ bdi = inode->i_mapping->a_bdi;
if (bdi_read_congested(bdi))
return;
if (bdi_write_congested(bdi))
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 58a95b7..3209aff 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -74,7 +74,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
struct super_block *sb = inode->i_sb;
if (strcmp(sb->s_type->name, "bdev") == 0)
- return inode->i_mapping->backing_dev_info;
+ return inode->i_mapping->a_bdi;
return sb->s_bdi;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c822458..193a0d1 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -945,7 +945,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
+ current->backing_dev_info = mapping->a_bdi;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
@@ -1133,7 +1133,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
{
struct inode *inode = req->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+ struct backing_dev_info *bdi = inode->i_mapping->a_bdi;
list_del(&req->writepages_entry);
dec_bdi_stat(bdi, BDI_WRITEBACK);
@@ -1247,7 +1247,7 @@ static int fuse_writepage_locked(struct page *page)
req->end = fuse_writepage_end;
req->inode = inode;
- inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
+ inc_bdi_stat(mapping->a_bdi, BDI_WRITEBACK);
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
end_page_writeback(page);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e1..5cf105c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -256,7 +256,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
if ((inode->i_state & I_NEW)) {
inode->i_flags |= S_NOATIME|S_NOCMTIME;
inode->i_generation = generation;
- inode->i_data.backing_dev_info = &fc->bdi;
+ mapping_new_set_bdi(&inode->i_data, &fc->bdi);
fuse_init_inode(inode, attr);
unlock_new_inode(inode);
} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9adf8f9..c8f4c50 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -8,6 +8,7 @@
*/
#include <linux/sched.h>
+#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/buffer_head.h>
@@ -797,7 +798,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->assoc_mapping = NULL;
- mapping->backing_dev_info = s->s_bdi;
+ mapping_new_set_bdi(mapping, s->s_bdi);
mapping->writeback_index = 0;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6e5bd42..a37920a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -459,7 +459,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
inode->i_uid = uid;
inode->i_gid = gid;
inode->i_mapping->a_ops = &hugetlbfs_aops;
- inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping,
+ &hugetlbfs_backing_dev_info);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
INIT_LIST_HEAD(&inode->i_mapping->private_list);
info = HUGETLBFS_I(inode);
diff --git a/fs/inode.c b/fs/inode.c
index f04d501..22ef3f1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -201,7 +201,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->assoc_mapping = NULL;
- mapping->backing_dev_info = &default_backing_dev_info;
+ mapping_new_set_bdi(mapping, &default_backing_dev_info);
mapping->writeback_index = 0;
/*
@@ -212,8 +212,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
if (sb->s_bdev) {
struct backing_dev_info *bdi;
- bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
- mapping->backing_dev_info = bdi;
+ bdi = sb->s_bdev->bd_inode->i_mapping->a_bdi;
+ mapping_new_set_bdi(mapping, bdi);
}
inode->i_private = NULL;
inode->i_mapping = mapping;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d2d6c7..886be68 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -287,7 +287,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (S_ISREG(inode->i_mode)) {
inode->i_fop = &nfs_file_operations;
inode->i_data.a_ops = &nfs_file_aops;
- inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ mapping_new_set_bdi(&inode->i_data,
+ &NFS_SB(sb)->backing_dev_info);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 874972d..a8baf4b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -455,7 +455,7 @@ nfs_mark_request_commit(struct nfs_page *req)
nfsi->ncommit++;
spin_unlock(&inode->i_lock);
inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ inc_bdi_stat(req->wb_page->mapping->a_bdi, BDI_RECLAIMABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
@@ -466,7 +466,7 @@ nfs_clear_request_commit(struct nfs_page *req)
if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
dec_zone_page_state(page, NR_UNSTABLE_NFS);
- dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(page->mapping->a_bdi, BDI_RECLAIMABLE);
return 1;
}
return 0;
@@ -1321,8 +1321,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
nfs_list_remove_request(req);
nfs_mark_request_commit(req);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ dec_bdi_stat(req->wb_page->mapping->a_bdi, BDI_RECLAIMABLE);
nfs_clear_page_tag_locked(req);
}
nfs_commit_clear_lock(NFS_I(inode));
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index f78ab10..d74ed8f 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -59,7 +59,7 @@ void nilfs_btnode_cache_init(struct address_space *btnc,
btnc->flags = 0;
mapping_set_gfp_mask(btnc, GFP_NOFS);
btnc->assoc_mapping = NULL;
- btnc->backing_dev_info = bdi;
+ mapping_new_set_bdi(btnc, bdi);
btnc->a_ops = &def_btnode_aops;
}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index d01aff4..7713861 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -517,7 +517,7 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
mapping->flags = 0;
mapping_set_gfp_mask(mapping, gfp_mask);
mapping->assoc_mapping = NULL;
- mapping->backing_dev_info = nilfs->ns_bdi;
+ mapping_new_set_bdi(mapping, nilfs->ns_bdi);
inode->i_mapping = mapping;
}
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ba7c10c..cb81695 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -729,7 +729,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
nilfs->ns_mount_state = le16_to_cpu(sbp->s_state);
- bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
+ bdi = nilfs->ns_bdev->bd_inode->i_mapping->a_bdi;
nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
err = nilfs_store_log_cursor(nilfs, sbp);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 113ebd9..19f9447 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2088,7 +2088,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
pos = *ppos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim. */
- current->backing_dev_info = mapping->backing_dev_info;
+ current->backing_dev_info = mapping->a_bdi;
written = 0;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index c2903b8..6b931db 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -403,7 +403,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping, &dlmfs_backing_dev_info);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inc_nlink(inode);
@@ -428,7 +428,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping, &dlmfs_backing_dev_info);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ip = DLMFS_I(inode);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9a03c15..863e016 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2327,7 +2327,7 @@ relock:
goto out_dio;
}
} else {
- current->backing_dev_info = file->f_mapping->backing_dev_info;
+ current->backing_dev_info = file->f_mapping->a_bdi;
written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
ppos, count, 0);
current->backing_dev_info = NULL;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a5ebae7..02d8ffb 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -60,7 +60,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
if (inode) {
inode_init_owner(inode, dir, mode);
inode->i_mapping->a_ops = &ramfs_aops;
- inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping, &ramfs_backing_dev_info);
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 42d2135..bb4b195 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -356,8 +356,8 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
i->i_fop = &romfs_ro_fops;
i->i_data.a_ops = &romfs_aops;
if (i->i_sb->s_mtd)
- i->i_data.backing_dev_info =
- i->i_sb->s_mtd->backing_dev_info;
+ mapping_new_set_bdi(&i->i_data,
+ i->i_sb->s_mtd->backing_dev_info);
if (nextfh & ROMFH_EXEC)
mode |= S_IXUGO;
break;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cffb1fd..3d049e5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -251,7 +251,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
- inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping, &sysfs_backing_dev_info);
inode->i_op = &sysfs_inode_operations;
set_default_inode_attr(inode, sd->s_mode);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce..d669260 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -109,7 +109,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
ubifs_current_time(inode);
inode->i_mapping->nrpages = 0;
/* Disable readahead */
- inode->i_mapping->backing_dev_info = &c->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &c->bdi);
switch (mode & S_IFMT) {
case S_IFREG:
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index cd5900b..45888fb 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -157,7 +157,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
goto out_invalid;
/* Disable read-ahead */
- inode->i_mapping->backing_dev_info = &c->bdi;
+ mapping_new_set_bdi(inode->i_mapping, &c->bdi);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 286e36e..7038d77 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -630,7 +630,7 @@ xfs_buf_readahead(
{
struct backing_dev_info *bdi;
- bdi = target->bt_mapping->backing_dev_info;
+ bdi = target->bt_mapping->a_bdi;
if (bdi_read_congested(bdi))
return;
@@ -1580,7 +1580,7 @@ xfs_mapping_buftarg(
bdi = &default_backing_dev_info;
mapping = &inode->i_data;
mapping->a_ops = &mapping_aops;
- mapping->backing_dev_info = bdi;
+ mapping_new_set_bdi(mapping, bdi);
mapping_set_gfp_mask(mapping, GFP_NOFS);
btp->bt_mapping = mapping;
return 0;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ba8ad42..94cf85b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -679,7 +679,7 @@ start:
goto out_unlock_internal;
/* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
+ current->backing_dev_info = mapping->a_bdi;
if ((ioflags & IO_ISDIRECT)) {
if (mapping->nrpages) {
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 35b0074..31e1346 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -314,19 +314,27 @@ static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
return bdi == &default_backing_dev_info;
}
+void mapping_set_bdi(struct address_space *mapping,
+ struct backing_dev_info *bdi);
+static inline void mapping_new_set_bdi(struct address_space *mapping,
+ struct backing_dev_info *bdi)
+{
+ mapping->a_bdi = bdi;
+}
+
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
- return bdi_cap_writeback_dirty(mapping->backing_dev_info);
+ return bdi_cap_writeback_dirty(mapping->a_bdi);
}
static inline bool mapping_cap_account_dirty(struct address_space *mapping)
{
- return bdi_cap_account_dirty(mapping->backing_dev_info);
+ return bdi_cap_account_dirty(mapping->a_bdi);
}
static inline bool mapping_cap_swap_backed(struct address_space *mapping)
{
- return bdi_cap_swap_backed(mapping->backing_dev_info);
+ return bdi_cap_swap_backed(mapping->a_bdi);
}
static inline int bdi_sched_wait(void *word)
@@ -345,7 +353,7 @@ static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
static inline void blk_run_address_space(struct address_space *mapping)
{
if (mapping)
- blk_run_backing_dev(mapping->backing_dev_info, NULL);
+ blk_run_backing_dev(mapping->a_bdi, NULL);
}
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1fb92f9..6f0b07f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -633,7 +633,7 @@ struct address_space {
pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits/gfp mask */
- struct backing_dev_info *backing_dev_info; /* device readahead, etc */
+ struct backing_dev_info *a_bdi; /* device readahead, etc */
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c9483d8..8f1952b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -782,7 +782,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping, &cgroup_backing_dev_info);
}
return inode;
}
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 65d4204..0188d99 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -671,6 +671,48 @@ err:
}
EXPORT_SYMBOL(bdi_init);
+void mapping_set_bdi(struct address_space *mapping,
+ struct backing_dev_info *bdi)
+{
+ struct inode *inode = mapping->host;
+ struct backing_dev_info *old = mapping->a_bdi;
+
+ if (unlikely(old == bdi))
+ return;
+
+ spin_lock(&inode_lock);
+ if (!list_empty(&inode->i_list)) {
+ struct inode *i;
+
+ list_for_each_entry(i, &old->wb.b_dirty, i_list) {
+ if (inode == i) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &bdi->wb.b_dirty);
+ goto found;
+ }
+ }
+ list_for_each_entry(i, &old->wb.b_io, i_list) {
+ if (inode == i) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &bdi->wb.b_io);
+ goto found;
+ }
+ }
+ list_for_each_entry(i, &old->wb.b_more_io, i_list) {
+ if (inode == i) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &bdi->wb.b_more_io);
+ goto found;
+ }
+ }
+ BUG();
+ }
+found:
+ mapping->a_bdi = bdi;
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(mapping_set_bdi);
+
void bdi_destroy(struct backing_dev_info *bdi)
{
int i;
@@ -681,11 +723,24 @@ void bdi_destroy(struct backing_dev_info *bdi)
*/
if (bdi_has_dirty_io(bdi)) {
struct bdi_writeback *dst = &default_backing_dev_info.wb;
+ struct inode *i, *tmp;
spin_lock(&inode_lock);
- list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
- list_splice(&bdi->wb.b_io, &dst->b_io);
- list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
+ list_for_each_entry_safe(i, tmp, &bdi->wb.b_dirty, i_list) {
+ list_del(&i->i_list);
+ list_add_tail(&i->i_list, &dst->b_dirty);
+ i->i_mapping->a_bdi = bdi;
+ }
+ list_for_each_entry_safe(i, tmp, &bdi->wb.b_io, i_list) {
+ list_del(&i->i_list);
+ list_add_tail(&i->i_list, &dst->b_io);
+ i->i_mapping->a_bdi = bdi;
+ }
+ list_for_each_entry_safe(i, tmp, &bdi->wb.b_more_io, i_list) {
+ list_del(&i->i_list);
+ list_add_tail(&i->i_list, &dst->b_more_io);
+ i->i_mapping->a_bdi = bdi;
+ }
spin_unlock(&inode_lock);
}
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 8d723c9..72e3ac5 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -72,7 +72,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
else
endbyte--; /* inclusive */
- bdi = mapping->backing_dev_info;
+ bdi = mapping->a_bdi;
switch (advice) {
case POSIX_FADV_NORMAL:
@@ -116,7 +116,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
case POSIX_FADV_NOREUSE:
break;
case POSIX_FADV_DONTNEED:
- if (!bdi_write_congested(mapping->backing_dev_info))
+ if (!bdi_write_congested(mapping->a_bdi))
filemap_flush(mapping);
/* First and last FULL page! */
diff --git a/mm/filemap.c b/mm/filemap.c
index 3d4df44..454d5ec 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -136,7 +136,7 @@ void __remove_from_page_cache(struct page *page)
*/
if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
- dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(mapping->a_bdi, BDI_RECLAIMABLE);
}
}
@@ -2373,7 +2373,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
+ current->backing_dev_info = mapping->a_bdi;
written = 0;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 83364df..cdca914 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -409,7 +409,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
+ current->backing_dev_info = mapping->a_bdi;
ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
if (ret)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e3bccac..e2d50b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -489,7 +489,7 @@ static void balance_dirty_pages(struct address_space *mapping,
unsigned long pages_written = 0;
unsigned long pause = 1;
bool dirty_exceeded = false;
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
for (;;) {
struct writeback_control wbc = {
@@ -633,7 +633,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long *p;
ratelimit = ratelimit_pages;
- if (mapping->backing_dev_info->dirty_exceeded)
+ if (mapping->a_bdi->dirty_exceeded)
ratelimit = 8;
/*
@@ -964,7 +964,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- trace_wbc_writepage(wbc, mapping->backing_dev_info);
+ trace_wbc_writepage(wbc, mapping->a_bdi);
ret = (*writepage)(page, wbc, data);
if (unlikely(ret)) {
if (ret == AOP_WRITEPAGE_ACTIVATE) {
@@ -1121,7 +1121,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
{
if (mapping_cap_account_dirty(mapping)) {
__inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ __inc_bdi_stat(mapping->a_bdi, BDI_RECLAIMABLE);
task_dirty_inc(current);
task_io_account_write(PAGE_CACHE_SIZE);
}
@@ -1297,8 +1297,7 @@ int clear_page_dirty_for_io(struct page *page)
*/
if (TestClearPageDirty(page)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
- dec_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ dec_bdi_stat(mapping->a_bdi, BDI_RECLAIMABLE);
return 1;
}
return 0;
@@ -1313,7 +1312,7 @@ int test_clear_page_writeback(struct page *page)
int ret;
if (mapping) {
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
@@ -1342,7 +1341,7 @@ int test_set_page_writeback(struct page *page)
int ret;
if (mapping) {
- struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct backing_dev_info *bdi = mapping->a_bdi;
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a2..831b927 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -25,7 +25,7 @@
void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
{
- ra->ra_pages = mapping->backing_dev_info->ra_pages;
+ ra->ra_pages = mapping->a_bdi->ra_pages;
ra->prev_pos = -1;
}
EXPORT_SYMBOL_GPL(file_ra_state_init);
@@ -549,7 +549,7 @@ page_cache_async_readahead(struct address_space *mapping,
/*
* Defer asynchronous read-ahead on IO congestion.
*/
- if (bdi_read_congested(mapping->backing_dev_info))
+ if (bdi_read_congested(mapping->a_bdi))
return;
/* do read-ahead */
@@ -564,7 +564,7 @@ page_cache_async_readahead(struct address_space *mapping,
* explicitly kick off the IO.
*/
if (PageUptodate(page))
- blk_run_backing_dev(mapping->backing_dev_info, NULL);
+ blk_run_backing_dev(mapping->a_bdi, NULL);
#endif
}
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 080b09a..fbee46d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1588,7 +1588,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
if (inode) {
inode_init_owner(inode, dir, mode);
inode->i_blocks = 0;
- inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
+ mapping_new_set_bdi(inode->i_mapping, &shmem_backing_dev_info);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_generation = get_seconds();
info = SHMEM_I(inode);
diff --git a/mm/swap.c b/mm/swap.c
index 3ce7bc3..9352a37 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -501,7 +501,7 @@ void __init swap_setup(void)
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
#ifdef CONFIG_SWAP
- bdi_init(swapper_space.backing_dev_info);
+ bdi_init(swapper_space.a_bdi);
#endif
/* Use a smaller cluster for small-memory machines */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e10f583..6496074 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -45,7 +45,7 @@ struct address_space swapper_space = {
.tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
.a_ops = &swap_aops,
.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
- .backing_dev_info = &swap_backing_dev_info,
+ .a_bdi = &swap_backing_dev_info,
};
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7c703ff..c14b755 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -116,7 +116,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
*/
WARN_ON(page_count(page) <= 1);
- bdi = bdev->bd_inode->i_mapping->backing_dev_info;
+ bdi = bdev->bd_inode->i_mapping->a_bdi;
blk_run_backing_dev(bdi, page);
}
up_read(&swap_unplug_sem);
diff --git a/mm/truncate.c b/mm/truncate.c
index ba887bf..bb79cef 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -75,8 +75,7 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
struct address_space *mapping = page->mapping;
if (mapping && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
- dec_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ dec_bdi_stat(mapping->a_bdi, BDI_RECLAIMABLE);
if (account_size)
task_io_account_cancelled_write(account_size);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c5dfabf..8f58773 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -366,7 +366,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
- if (!may_write_to_queue(mapping->backing_dev_info))
+ if (!may_write_to_queue(mapping->a_bdi))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
--
1.7.1
^ permalink raw reply related
* [RFD] Device Renaming Mechanism
From: Nao Nishijima @ 2010-10-08 5:23 UTC (permalink / raw)
To: gregkh, James.Bottomley, rwheeler
Cc: linux-kernel, linux-hotplug-devel, linux-hotplug,
masami.hiramatsu.pt
Hi,
I'm trying to solve a device name(or device node) mismatch problem caused by
device configuration changes. Now I have an idea of device renaming to solve it,
and would like to request for comments from kernel developers.
Device Name Mismatch
==========
Device names(e.g. sda) are assigned by the order of driver loading and device
recognizing (usually from small bus number). This may cause a device name
mismatch between previous and current boot whenever the device configuration is
changed. Suppose there is an application opens disk via /dev/sdb. When device
configuration changing (hot-plug, device breakdown) or system configuration
changing(driver loading order, changing modprobe.conf) causes changing order
device names. This device names does not always point to same disks.
This mismatch causes unexpected disk access and redundancy miss setting (e.g.
Multipath, software-raid), if you use device file names to a configuration file.
Udev Solution
======
Typically we use to avoid this problem we uses persistent device names provided
by udev.
Udev makes persistent symbolic links(by-{id, uuid, path, label}) pointing to each
device based on device information. Applications access the device via these
symbolic links. Udev solves mismatch between device name and physical disk.
However the persistent name mismatches kernel's device name.
This mismatch causes following 4 issues.
Issue 1: /proc/partitions, /proc/diskstat gives you device names
We have to run "ls -l /dev/disk/by-*" or "udevadm" for finding corresponding
persistent symbolic links.
Issue 2: dmesg output device name instead of persistent symbolic links
Users might not know which disk is sdX, because they identify the disk by a
persistent symbolic link.
Issue 3: Some system commands don't accept symbolic link(e.g. df, iostat,...)
These commands just expect sdX device name or check input by /proc information.
This will also occur on several GNOME/KDE/etc GUI sysadmin tools. :(
Issue 4: Undecided symbolic link
Even if we would like to introduce device names/persistent symbolic links
mapping tool to solve it, we can not determine a symbolic link from a device,
because several symbolic links point a device file.
Therefore, I think the symbolic link is not enough to solve. We need a
better solution.
Proposal
====
I'd like to propose introducing device renaming interface to solve these issues.
I think renaming device name in the kernel is the simplest way to solve mismatch
dmesg and /proc information. This can be done while kernel booting up(like
ifcfg). Of course, udev still needs to assign new name for each device via that
interface.
This proposal just requests to add a simple interface to kernel as below. And we
can continue to use user program without any modification.
int rename_device(const char *newname, const char *oldname)
Any comments, or suggestions are very welcome!
Best Regards,
--
Nao NISHIJIMA
2nd Dept. Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
Email: nao.nishijima.xt@hitachi.com
^ permalink raw reply
* [PATCH 17/18] fs: icache remove inode_lock
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
All the functionality that the inode_lock protected has now been
wrapped up in new independent locks and/or functionality. Hence the
inode_lock does not serve a purpose any longer and hence can now be
removed.
Based on work originally done by Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
Documentation/filesystems/Locking | 2 +-
Documentation/filesystems/porting | 10 ++++-
Documentation/filesystems/vfs.txt | 2 +-
fs/buffer.c | 2 +-
fs/drop_caches.c | 4 --
fs/fs-writeback.c | 47 ++++-----------------
fs/inode.c | 82 ++++---------------------------------
fs/logfs/inode.c | 2 +-
fs/notify/inode_mark.c | 11 ++---
fs/notify/mark.c | 1 -
fs/notify/vfsmount_mark.c | 1 -
fs/ntfs/inode.c | 4 +-
fs/ocfs2/inode.c | 2 +-
fs/quota/dquot.c | 12 +----
include/linux/fs.h | 2 +-
include/linux/writeback.h | 3 -
mm/backing-dev.c | 6 ---
mm/filemap.c | 6 +-
mm/rmap.c | 6 +-
19 files changed, 48 insertions(+), 157 deletions(-)
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2db4283..e92dad2 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -114,7 +114,7 @@ alloc_inode:
destroy_inode:
dirty_inode: (must not sleep)
write_inode:
-drop_inode: !!!inode_lock!!!
+drop_inode: !!!i_lock, sb_inode_list_lock!!!
evict_inode:
put_super: write
write_super: read
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index b12c895..ab07213 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -299,7 +299,7 @@ be used instead. It gets called whenever the inode is evicted, whether it has
remaining links or not. Caller does *not* evict the pagecache or inode-associated
metadata buffers; getting rid of those is responsibility of method, as it had
been for ->delete_inode().
- ->drop_inode() returns int now; it's called on final iput() with inode_lock
+ ->drop_inode() returns int now; it's called on final iput() with i_lock
held and it returns true if filesystems wants the inode to be dropped. As before,
generic_drop_inode() is still the default and it's been updated appropriately.
generic_delete_inode() is also alive and it consists simply of return 1. Note that
@@ -318,3 +318,11 @@ if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput(
may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
free the on-disk inode, you may end up doing that while ->write_inode() is writing
to it.
+
+
+[mandatory]
+ inode_lock is gone, replaced by fine grained locks. See fs/inode.c
+for details of what locks to replace inode_lock with in order to protect
+particular things. Most of the time, a filesystem only needs ->i_lock, which
+protects *all* the inode state and its membership on lists that was
+previously protected with inode_lock.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index ed7e5ef..405beb2 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -246,7 +246,7 @@ or bottom half).
should be synchronous or not, not all filesystems check this flag.
drop_inode: called when the last access to the inode is dropped,
- with the inode_lock spinlock held.
+ with the i_lock and sb_inode_list_lock spinlock held.
This method should be either NULL (normal UNIX filesystem
semantics) or "generic_delete_inode" (for filesystems that do not
diff --git a/fs/buffer.c b/fs/buffer.c
index b5c4153..99a9f8d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1145,7 +1145,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and the global inode_lock.
+ * and mapping->tree_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
{
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 00180dc..2105713 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -16,7 +16,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
struct inode *inode, *toput_inode = NULL;
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
@@ -28,15 +27,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
}
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
iput(toput_inode);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 404d449..f8eb27c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -184,7 +184,7 @@ static void requeue_io(struct inode *inode)
static void inode_sync_complete(struct inode *inode)
{
/*
- * Prevent speculative execution through spin_unlock(&inode_lock);
+ * Prevent speculative execution through spin_unlock(&inode->i_lock);
*/
smp_mb();
wake_up_bit(&inode->i_state, __I_SYNC);
@@ -283,25 +283,21 @@ static void inode_wait_for_writeback(struct inode *inode)
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) {
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
}
}
/*
- * Write out an inode's dirty pages. Called under inode_lock. Either the
- * caller has ref on the inode (either via iref_locked or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ * Write out an inode's dirty pages. Either the caller has ref on the inode
+ * (either via iref_locked or via syscall against an fd) or the inode has
+ * I_WILL_FREE set (via generic_forget_inode)
*
* If `wait' is set, wait on the writeout.
*
* The whole writeout design is quite complex and fragile. We want to avoid
* starvation of particular inodes when others are being redirtied, prevent
* livelocks, etc.
- *
- * Called under inode_lock.
*/
static int
writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -346,7 +342,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY_PAGES;
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
ret = do_writepages(mapping, wbc);
@@ -366,12 +361,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* due to delalloc, clear dirty metadata flags right before
* write_inode()
*/
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc);
@@ -379,7 +372,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
ret = err;
}
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
@@ -527,10 +519,8 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
redirty_tail(inode);
spin_unlock(&wb->b_lock);
}
- spin_unlock(&inode_lock);
iput(inode);
cond_resched();
- spin_lock(&inode_lock);
spin_lock(&wb->b_lock);
if (wbc->nr_to_write <= 0) {
wbc->more_io = 1;
@@ -550,9 +540,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
if (!wbc->wb_start)
wbc->wb_start = jiffies; /* livelock avoidance */
- spin_lock(&inode_lock);
spin_lock(&wb->b_lock);
-
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
@@ -572,7 +560,6 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
break;
}
spin_unlock(&wb->b_lock);
- spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */
}
@@ -581,13 +568,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
{
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- spin_lock(&inode_lock);
spin_lock(&wb->b_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
writeback_sb_inodes(sb, wb, wbc, true);
spin_unlock(&wb->b_lock);
- spin_unlock(&inode_lock);
}
/*
@@ -697,7 +682,6 @@ static long wb_writeback(struct bdi_writeback *wb,
* become available for writeback. Otherwise
* we'll just busyloop.
*/
- spin_lock(&inode_lock);
if (!list_empty(&wb->b_more_io)) {
spin_lock(&wb->b_lock);
inode = list_entry(wb->b_more_io.prev,
@@ -708,7 +692,6 @@ static long wb_writeback(struct bdi_writeback *wb,
inode_wait_for_writeback(inode);
spin_unlock(&inode->i_lock);
}
- spin_unlock(&inode_lock);
}
return wrote;
@@ -971,7 +954,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
@@ -1029,8 +1011,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
out_unlock:
spin_unlock(&inode->i_lock);
out:
- spin_unlock(&inode_lock);
-
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
}
@@ -1063,7 +1043,6 @@ static void wait_sb_inodes(struct super_block *sb)
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
/*
@@ -1086,14 +1065,12 @@ static void wait_sb_inodes(struct super_block *sb)
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
/*
- * We hold a reference to 'inode' so it couldn't have
- * been removed from s_inodes list while we dropped the
- * inode_lock. We cannot iput the inode now as we can
- * be holding the last reference and we cannot iput it
- * under inode_lock. So we keep the reference and iput
- * it later.
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * s_inodes_lock. We cannot iput the inode now as we can be
+ * holding the last reference and we cannot iput it under
+ * s_inodes_lock. So we keep the reference and iput it later.
*/
iput(old_inode);
old_inode = inode;
@@ -1102,11 +1079,9 @@ static void wait_sb_inodes(struct super_block *sb)
cond_resched();
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
}
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
iput(old_inode);
}
@@ -1209,9 +1184,7 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0;
might_sleep();
- spin_lock(&inode_lock);
ret = writeback_single_inode(inode, &wbc);
- spin_unlock(&inode_lock);
if (sync)
inode_sync_wait(inode);
return ret;
@@ -1233,9 +1206,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret;
- spin_lock(&inode_lock);
ret = writeback_single_inode(inode, wbc);
- spin_unlock(&inode_lock);
return ret;
}
EXPORT_SYMBOL(sync_inode);
diff --git a/fs/inode.c b/fs/inode.c
index 4ec360e..c778ec4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -41,11 +41,9 @@
* inode_lru, i_lru
*
* Lock orders
- * inode_lock
* inode hash bucket lock
* inode->i_lock
*
- * inode_lock
* sb inode lock
* inode_lru_lock
* wb->b_lock
@@ -118,14 +116,6 @@ static inline void spin_unlock_bucket(struct inode_hash_bucket *b)
static struct inode_hash_bucket *inode_hashtable __read_mostly;
/*
- * A simple spinlock to protect the list manipulations.
- *
- * NOTE! You also have to own the lock if you change
- * the i_state of an inode while it is in use..
- */
-DEFINE_SPINLOCK(inode_lock);
-
-/*
* iprune_sem provides exclusion between the kswapd or try_to_free_pages
* icache shrinking path, and the umount path. Without this exclusion,
* by the time prune_icache calls iput for the inode whose pages it has
@@ -357,7 +347,7 @@ static void init_once(void *foo)
}
/*
- * inode_lock must be held
+ * i_lock must be held
*/
void iref_locked(struct inode *inode)
{
@@ -369,11 +359,9 @@ EXPORT_SYMBOL_GPL(iref_locked);
void iref(struct inode *inode)
{
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
iref_locked(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(iref);
@@ -439,11 +427,9 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
struct inode_hash_bucket *b;
b = inode_hashtable + hash(inode->i_sb, hashval);
- spin_lock(&inode_lock);
spin_lock_bucket(b);
hlist_bl_add_head(&inode->i_hash, &b->head);
spin_unlock_bucket(b);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(__insert_inode_hash);
@@ -472,9 +458,7 @@ static void __remove_inode_hash(struct inode *inode)
*/
void remove_inode_hash(struct inode *inode)
{
- spin_lock(&inode_lock);
__remove_inode_hash(inode);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(remove_inode_hash);
@@ -526,12 +510,10 @@ static void dispose_list(struct list_head *head)
evict(inode);
- spin_lock(&inode_lock);
__remove_inode_hash(inode);
spin_lock(&inode->i_sb->s_inodes_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&inode->i_sb->s_inodes_lock);
- spin_unlock(&inode_lock);
wake_up_inode(inode);
destroy_inode(inode);
@@ -558,7 +540,6 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
* change during umount anymore, and because iprune_sem keeps
* shrink_icache_memory() away.
*/
- cond_resched_lock(&inode_lock);
cond_resched_lock(&sb->s_inodes_lock);
next = next->next;
@@ -614,12 +595,10 @@ int invalidate_inodes(struct super_block *sb)
LIST_HEAD(throw_away);
down_write(&iprune_sem);
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
fsnotify_unmount_inodes(&sb->s_inodes);
busy = invalidate_list(sb, &sb->s_inodes, &throw_away);
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
dispose_list(&throw_away);
up_write(&iprune_sem);
@@ -644,7 +623,7 @@ static int can_unuse(struct inode *inode)
/*
* Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * a temporary list and then are freed outside LRU lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
@@ -662,7 +641,6 @@ static void prune_icache(int nr_to_scan)
unsigned long reap = 0;
down_read(&iprune_sem);
- spin_lock(&inode_lock);
spin_lock(&inode_lru_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
@@ -690,12 +668,10 @@ static void prune_icache(int nr_to_scan)
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lru_lock);
- spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
- spin_lock(&inode_lock);
spin_lock(&inode_lru_lock);
spin_lock(&inode->i_lock);
@@ -733,7 +709,6 @@ static void prune_icache(int nr_to_scan)
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lru_lock);
- spin_unlock(&inode_lock);
dispose_list(&freeable);
up_read(&iprune_sem);
@@ -854,9 +829,9 @@ __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
* @inode: inode to mark in use
*
* When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
+ * list, the owning superblock and the inode hash.
+ *
+ * We calculate the hash list to add to here so it is all internal
* which requires the caller to have already set up the inode number in the
* inode to add.
*/
@@ -864,9 +839,7 @@ void inode_add_to_lists(struct super_block *sb, struct inode *inode)
{
struct inode_hash_bucket *b = inode_hashtable + hash(sb, inode->i_ino);
- spin_lock(&inode_lock);
__inode_add_to_lists(sb, b, inode);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(inode_add_to_lists);
@@ -923,15 +896,11 @@ struct inode *new_inode(struct super_block *sb)
{
struct inode *inode;
- spin_lock_prefetch(&inode_lock);
-
inode = alloc_inode(sb);
if (inode) {
- spin_lock(&inode_lock);
inode->i_ino = last_ino_get();
inode->i_state = 0;
__inode_add_to_lists(sb, NULL, inode);
- spin_unlock(&inode_lock);
}
return inode;
}
@@ -990,7 +959,6 @@ static struct inode *get_new_inode(struct super_block *sb,
if (inode) {
struct inode *old;
- spin_lock(&inode_lock);
/* We released the lock, so.. */
old = find_inode(sb, b, test, data);
if (!old) {
@@ -999,7 +967,6 @@ static struct inode *get_new_inode(struct super_block *sb,
inode->i_state = I_NEW;
__inode_add_to_lists(sb, b, inode);
- spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@@ -1014,7 +981,6 @@ static struct inode *get_new_inode(struct super_block *sb,
*/
iref_locked(old);
spin_unlock(&old->i_lock);
- spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
@@ -1022,7 +988,6 @@ static struct inode *get_new_inode(struct super_block *sb,
return inode;
set_failed:
- spin_unlock(&inode_lock);
destroy_inode(inode);
return NULL;
}
@@ -1040,14 +1005,12 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
if (inode) {
struct inode *old;
- spin_lock(&inode_lock);
/* We released the lock, so.. */
old = find_inode_fast(sb, b, ino);
if (!old) {
inode->i_ino = ino;
__inode_add_to_lists(sb, b, inode);
inode->i_state = I_NEW;
- spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@@ -1062,7 +1025,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
*/
iref_locked(old);
spin_unlock(&old->i_lock);
- spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
@@ -1119,7 +1081,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
static unsigned int counter;
ino_t res;
- spin_lock(&inode_lock);
spin_lock(&unique_lock);
do {
if (counter <= max_reserved)
@@ -1127,7 +1088,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
res = counter++;
} while (!test_inode_iunique(sb, res));
spin_unlock(&unique_lock);
- spin_unlock(&inode_lock);
return res;
}
@@ -1135,7 +1095,6 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
iref_locked(inode);
@@ -1149,7 +1108,6 @@ struct inode *igrab(struct inode *inode)
*/
inode = NULL;
}
- spin_unlock(&inode_lock);
return inode;
}
EXPORT_SYMBOL(igrab);
@@ -1171,7 +1129,7 @@ EXPORT_SYMBOL(igrab);
*
* Otherwise NULL is returned.
*
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
*/
static struct inode *ifind(struct super_block *sb,
struct inode_hash_bucket *b,
@@ -1180,17 +1138,14 @@ static struct inode *ifind(struct super_block *sb,
{
struct inode *inode;
- spin_lock(&inode_lock);
inode = find_inode(sb, b, test, data);
if (inode) {
iref_locked(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
return inode;
}
- spin_unlock(&inode_lock);
return NULL;
}
@@ -1215,16 +1170,13 @@ static struct inode *ifind_fast(struct super_block *sb,
{
struct inode *inode;
- spin_lock(&inode_lock);
inode = find_inode_fast(sb, b, ino);
if (inode) {
iref_locked(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
}
- spin_unlock(&inode_lock);
return NULL;
}
@@ -1247,7 +1199,7 @@ static struct inode *ifind_fast(struct super_block *sb,
*
* Otherwise NULL is returned.
*
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
*/
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
@@ -1275,7 +1227,7 @@ EXPORT_SYMBOL(ilookup5_nowait);
*
* Otherwise NULL is returned.
*
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
*/
struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
@@ -1326,7 +1278,7 @@ EXPORT_SYMBOL(ilookup);
* inode and this is returned locked, hashed, and with the I_NEW flag set. The
* file system gets to fill it in before unlocking it via unlock_new_inode().
*
- * Note both @test and @set are called with the inode_lock held, so can't sleep.
+ * Note both @test and @set are called with the i_lock held, so can't sleep.
*/
struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *),
@@ -1391,7 +1343,6 @@ int insert_inode_locked(struct inode *inode)
while (1) {
struct hlist_bl_node *node;
struct inode *old = NULL;
- spin_lock(&inode_lock);
spin_lock_bucket(b);
hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
if (old->i_ino != ino)
@@ -1408,13 +1359,11 @@ int insert_inode_locked(struct inode *inode)
if (likely(!node)) {
hlist_bl_add_head(&inode->i_hash, &b->head);
spin_unlock_bucket(b);
- spin_unlock(&inode_lock);
return 0;
}
iref_locked(old);
spin_unlock(&old->i_lock);
spin_unlock_bucket(b);
- spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
iput(old);
@@ -1437,7 +1386,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
struct hlist_bl_node *node;
struct inode *old = NULL;
- spin_lock(&inode_lock);
spin_lock_bucket(b);
hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
if (old->i_sb != sb)
@@ -1454,13 +1402,11 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
if (likely(!node)) {
hlist_bl_add_head(&inode->i_hash, &b->head);
spin_unlock_bucket(b);
- spin_unlock(&inode_lock);
return 0;
}
iref_locked(old);
spin_unlock(&old->i_lock);
spin_unlock_bucket(b);
- spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
iput(old);
@@ -1523,15 +1469,12 @@ static void iput_final(struct inode *inode)
return;
}
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
return;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
write_inode_now(inode, 1);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
@@ -1556,7 +1499,6 @@ static void iput_final(struct inode *inode)
list_del_init(&inode->i_sb_list);
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
evict(inode);
remove_inode_hash(inode);
wake_up_inode(inode);
@@ -1576,7 +1518,6 @@ static void iput_final(struct inode *inode)
void iput(struct inode *inode)
{
if (inode) {
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
BUG_ON(inode->i_state & I_CLEAR);
@@ -1586,7 +1527,6 @@ void iput(struct inode *inode)
return;
}
spin_unlock(&inode->i_lock);
- spin_lock(&inode_lock);
}
}
EXPORT_SYMBOL(iput);
@@ -1766,8 +1706,6 @@ EXPORT_SYMBOL(inode_wait);
* It doesn't matter if I_NEW is not set initially, a call to
* wake_up_inode() after removing from the hash list will DTRT.
*
- * This is called with inode_lock held.
- *
* Called with i_lock held and returns with it dropped.
*/
static void __wait_on_freeing_inode(struct inode *inode)
@@ -1777,10 +1715,8 @@ static void __wait_on_freeing_inode(struct inode *inode)
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
schedule();
finish_wait(wq, &wait.wait);
- spin_lock(&inode_lock);
}
static __initdata unsigned long ihash_entries;
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ec..a67b607 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -286,7 +286,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
return ret;
}
-/* called with inode_lock held */
+/* called with i_lock held */
static int logfs_drop_inode(struct inode *inode)
{
struct logfs_super *super = logfs_super(inode->i_sb);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 8a05213..57c28ae 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -22,7 +22,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <linux/writeback.h> /* for inode_lock */
+#include <linux/writeback.h>
#include <asm/atomic.h>
@@ -232,9 +232,8 @@ out:
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes)
*
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
+ * Called with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * sb_inode_list_lock to protect the super block's list of inodes.
*/
void fsnotify_unmount_inodes(struct list_head *list)
{
@@ -288,13 +287,12 @@ void fsnotify_unmount_inodes(struct list_head *list)
}
/*
- * We can safely drop inode_lock here because we hold
+ * We can safely drop sb->s_inodes_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
@@ -306,7 +304,6 @@ void fsnotify_unmount_inodes(struct list_head *list)
iput(inode);
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
}
}
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 325185e..50c0085 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,7 +91,6 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
-#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h>
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b5..6f8eefe 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -23,7 +23,6 @@
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h>
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b1..7c530f3 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -54,7 +54,7 @@
*
* Return 1 if the attributes match and 0 if not.
*
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the i_lock spin lock held so it is not
* allowed to sleep.
*/
int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
*
* Return 0 on success and -errno on error.
*
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the i_lock spin lock held so it is not
* allowed to sleep. (Hence the GFP_ATOMIC allocation.)
*/
static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index eece3e0..65c61e2 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1195,7 +1195,7 @@ void ocfs2_evict_inode(struct inode *inode)
ocfs2_clear_inode(inode);
}
-/* Called under inode_lock, with no more references on the
+/* Called under i_lock, with no more references on the
* struct inode, so it's safe here to check the flags field
* and to manipulate i_nlink without any other locks. */
int ocfs2_drop_inode(struct inode *inode)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index c7b5fc6..533cd95 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -76,7 +76,7 @@
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/quotaops.h>
-#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
+#include <linux/writeback.h>
#include <asm/uaccess.h>
@@ -896,7 +896,6 @@ static void add_dquot_ref(struct super_block *sb, int type)
int reserved = 0;
#endif
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
@@ -914,21 +913,18 @@ static void add_dquot_ref(struct super_block *sb, int type)
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
iput(old_inode);
__dquot_initialize(inode, type);
/* We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the inode_lock.
+ * removed from s_inodes list while we dropped the lock.
* We cannot iput the inode now as we can be holding the last
- * reference and we cannot iput it under inode_lock. So we
+ * reference and we cannot iput it under the lock. So we
* keep the reference and iput it later. */
old_inode = inode;
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
}
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
iput(old_inode);
#ifdef CONFIG_QUOTA_DEBUG
@@ -1009,7 +1005,6 @@ static void remove_dquot_ref(struct super_block *sb, int type,
struct inode *inode;
int reserved = 0;
- spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/*
@@ -1025,7 +1020,6 @@ static void remove_dquot_ref(struct super_block *sb, int type,
}
}
spin_unlock(&sb->s_inodes_lock);
- spin_unlock(&inode_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota"
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 54c4e86..453e0b4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1588,7 +1588,7 @@ struct super_operations {
};
/*
- * Inode state bits. Protected by inode_lock.
+ * Inode state bits. Protected by i_lock.
*
* Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
* I_DIRTY_DATASYNC and I_DIRTY_PAGES.
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b182ccc..67be7a2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,9 +9,6 @@
struct backing_dev_info;
-extern spinlock_t inode_lock;
-
-
/*
* fs/fs-writeback.c
*/
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 74e8269..0c0586b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -73,7 +73,6 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
struct inode *inode;
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
- spin_lock(&inode_lock);
spin_lock(&wb->b_lock);
list_for_each_entry(inode, &wb->b_dirty, i_io)
nr_dirty++;
@@ -82,7 +81,6 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
list_for_each_entry(inode, &wb->b_more_io, i_io)
nr_more_io++;
spin_unlock(&wb->b_lock);
- spin_unlock(&inode_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
@@ -695,7 +693,6 @@ void mapping_set_bdi(struct address_space *mapping,
if (unlikely(old == bdi))
return;
- spin_lock(&inode_lock);
bdi_lock_two(bdi, old);
if (!list_empty(&inode->i_io)) {
struct inode *i;
@@ -727,7 +724,6 @@ found:
mapping->a_bdi = bdi;
spin_unlock(&bdi->wb.b_lock);
spin_unlock(&old->wb.b_lock);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(mapping_set_bdi);
@@ -743,7 +739,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
struct bdi_writeback *dst = &default_backing_dev_info.wb;
struct inode *i, *tmp;
- spin_lock(&inode_lock);
bdi_lock_two(bdi, &default_backing_dev_info);
list_for_each_entry_safe(i, tmp, &bdi->wb.b_dirty, i_io) {
list_del(&i->i_io);
@@ -762,7 +757,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
}
spin_unlock(&bdi->wb.b_lock);
spin_unlock(&dst->b_lock);
- spin_unlock(&inode_lock);
}
bdi_unregister(bdi);
diff --git a/mm/filemap.c b/mm/filemap.c
index 454d5ec..857fb34 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -80,7 +80,7 @@
* ->i_mutex
* ->i_alloc_sem (various)
*
- * ->inode_lock
+ * ->i_lock
* ->sb_lock (fs/fs-writeback.c)
* ->mapping->tree_lock (__sync_single_inode)
*
@@ -98,8 +98,8 @@
* ->zone.lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty)
* ->tree_lock (page_remove_rmap->set_page_dirty)
- * ->inode_lock (page_remove_rmap->set_page_dirty)
- * ->inode_lock (zap_pte_range->set_page_dirty)
+ * ->i_lock (page_remove_rmap->set_page_dirty)
+ * ->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
*
* ->task->proc_lock
diff --git a/mm/rmap.c b/mm/rmap.c
index 92e6757..dbfccae 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -31,11 +31,11 @@
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers)
- * inode_lock (in set_page_dirty's __mark_inode_dirty)
- * sb_lock (within inode_lock in fs/fs-writeback.c)
+ * i_lock (in set_page_dirty's __mark_inode_dirty)
+ * sb_lock (within i_lock in fs/fs-writeback.c)
* mapping->tree_lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock,
- * within inode_lock in __sync_single_inode)
+ * within i_lock in __sync_single_inode)
*
* (code doesn't rely on that order so it could be switched around)
* ->tasklist_lock
--
1.7.1
^ permalink raw reply related
* [PATCH 14/18] fs: Protect inode->i_state with th einode->i_lock
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
We currently protect the per-inode state flags with the inode_lock.
Using a global lock to protect per-object state is overkill when we
coul duse a per-inode lock to protect the state. Use the
inode->i_lock for this, and wrap all the state changes and checks
with the inode->i_lock.
Based on work originally written by Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/drop_caches.c | 9 +++--
fs/fs-writeback.c | 49 ++++++++++++++++++++++------
fs/inode.c | 83 ++++++++++++++++++++++++++++++++---------------
fs/nilfs2/gcdat.c | 1 +
fs/notify/inode_mark.c | 10 ++++--
fs/quota/dquot.c | 12 ++++---
6 files changed, 115 insertions(+), 49 deletions(-)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c808ca8..00180dc 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -19,11 +19,14 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
- continue;
- if (inode->i_mapping->nrpages == 0)
+ spin_lock(&inode->i_lock);
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ (inode->i_mapping->nrpages == 0)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49d44cc..404d449 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -281,10 +281,12 @@ static void inode_wait_for_writeback(struct inode *inode)
wait_queue_head_t *wqh;
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- while (inode->i_state & I_SYNC) {
+ while (inode->i_state & I_SYNC) {
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
}
}
@@ -309,7 +311,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
unsigned dirty;
int ret;
- if (!iref_read(inode))
+ spin_lock(&inode->i_lock);
+ if (!inode->i_ref)
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
else
WARN_ON(inode->i_state & I_WILL_FREE);
@@ -324,6 +327,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* completed a full scan of b_io.
*/
if (wbc->sync_mode != WB_SYNC_ALL) {
+ spin_unlock(&inode->i_lock);
spin_lock(&bdi->wb.b_lock);
requeue_io(inode);
spin_unlock(&bdi->wb.b_lock);
@@ -341,6 +345,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
/* Set I_SYNC, reset I_DIRTY_PAGES */
inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY_PAGES;
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
ret = do_writepages(mapping, wbc);
@@ -362,8 +367,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* write_inode()
*/
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
@@ -373,6 +380,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -381,6 +389,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* sometimes bales out without doing anything.
*/
inode->i_state |= I_DIRTY_PAGES;
+ spin_unlock(&inode->i_lock);
spin_lock(&bdi->wb.b_lock);
if (wbc->nr_to_write <= 0) {
/*
@@ -405,16 +414,21 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* submission or metadata updates after data IO
* completion.
*/
+ spin_unlock(&inode->i_lock);
spin_lock(&bdi->wb.b_lock);
redirty_tail(inode);
spin_unlock(&bdi->wb.b_lock);
} else {
/* The inode is clean */
+ spin_unlock(&inode->i_lock);
spin_lock(&bdi->wb.b_lock);
list_del_init(&inode->i_io);
spin_unlock(&bdi->wb.b_lock);
inode_lru_list_add(inode);
}
+ } else {
+ /* freer will clean up */
+ spin_unlock(&inode->i_lock);
}
inode_sync_complete(inode);
return ret;
@@ -483,7 +497,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
return 0;
}
+ spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_WILL_FREE | I_FREEING)) {
+ spin_unlock(&inode->i_lock);
requeue_io(inode);
continue;
}
@@ -491,10 +507,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
* Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock.
*/
- if (inode_dirtied_after(inode, wbc->wb_start))
+ if (inode_dirtied_after(inode, wbc->wb_start)) {
+ spin_unlock(&inode->i_lock);
return 1;
+ }
- spin_lock(&inode->i_lock);
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&wb->b_lock);
@@ -687,7 +704,9 @@ static long wb_writeback(struct bdi_writeback *wb,
struct inode, i_io);
spin_unlock(&wb->b_lock);
trace_wbc_writeback_wait(&wbc, wb->bdi);
+ spin_lock(&inode->i_lock);
inode_wait_for_writeback(inode);
+ spin_unlock(&inode->i_lock);
}
spin_unlock(&inode_lock);
}
@@ -953,6 +972,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
block_dump___mark_inode_dirty(inode);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
@@ -964,7 +984,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* superblock list, based upon its state.
*/
if (inode->i_state & I_SYNC)
- goto out;
+ goto out_unlock;
/*
* Only add valid (hashed) inodes to the superblock's
@@ -972,10 +992,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
*/
if (!S_ISBLK(inode->i_mode)) {
if (hlist_bl_unhashed(&inode->i_hash))
- goto out;
+ goto out_unlock;
}
if (inode->i_state & I_FREEING)
- goto out;
+ goto out_unlock;
/*
* If the inode was already on b_dirty/b_io/b_more_io, don't
@@ -998,12 +1018,16 @@ void __mark_inode_dirty(struct inode *inode, int flags)
wakeup_bdi = true;
}
- spin_lock(&bdi->wb.b_lock);
inode->dirtied_when = jiffies;
+ spin_unlock(&inode->i_lock);
+ spin_lock(&bdi->wb.b_lock);
list_move(&inode->i_io, &bdi->wb.b_dirty);
spin_unlock(&bdi->wb.b_lock);
+ goto out;
}
}
+out_unlock:
+ spin_unlock(&inode->i_lock);
out:
spin_unlock(&inode_lock);
@@ -1052,12 +1076,15 @@ static void wait_sb_inodes(struct super_block *sb)
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
- continue;
+ spin_lock(&inode->i_lock);
mapping = inode->i_mapping;
- if (mapping->nrpages == 0)
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ (mapping->nrpages == 0)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
/*
diff --git a/fs/inode.c b/fs/inode.c
index 4ad7900..d3bd08a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -30,7 +30,7 @@
* Locking rules.
*
* inode->i_lock protects:
- * i_ref
+ * i_ref i_state
* inode_hash_bucket lock protects:
* inode hash table, i_hash
* sb inode lock protects:
@@ -182,7 +182,7 @@ int proc_nr_inodes(ctl_table *table, int write,
static void wake_up_inode(struct inode *inode)
{
/*
- * Prevent speculative execution through spin_unlock(&inode_lock);
+ * Prevent speculative execution through spin_unlock(&inode->i_lock);
*/
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
@@ -361,6 +361,8 @@ static void init_once(void *foo)
*/
void iref_locked(struct inode *inode)
{
+ assert_spin_locked(&inode->i_lock);
+
inode->i_ref++;
}
EXPORT_SYMBOL_GPL(iref_locked);
@@ -484,7 +486,9 @@ void end_writeback(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
inode_sync_wait(inode);
+ spin_lock(&inode->i_lock);
inode->i_state = I_FREEING | I_CLEAR;
+ spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(end_writeback);
@@ -561,17 +565,18 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
if (tmp == head)
break;
inode = list_entry(tmp, struct inode, i_sb_list);
- if (inode->i_state & I_NEW)
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & I_NEW) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
invalidate_inode_buffers(inode);
- spin_lock(&inode->i_lock);
if (!inode->i_ref) {
struct backing_dev_info *bdi = inode_to_bdi(inode);
- spin_unlock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
-
+ spin_unlock(&inode->i_lock);
/*
* move the inode off the IO lists and LRU once
@@ -625,11 +630,12 @@ EXPORT_SYMBOL(invalidate_inodes);
static int can_unuse(struct inode *inode)
{
+ assert_spin_locked(&inode->i_lock);
if (inode->i_state)
return 0;
if (inode_has_buffers(inode))
return 0;
- if (iref_read(inode))
+ if (inode->i_ref)
return 0;
if (inode->i_data.nrpages)
return 0;
@@ -675,9 +681,9 @@ static void prune_icache(int nr_to_scan)
continue;
}
if (inode->i_state & I_REFERENCED) {
+ inode->i_state &= ~I_REFERENCED;
spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, &inode_lru);
- inode->i_state &= ~I_REFERENCED;
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -691,6 +697,7 @@ static void prune_icache(int nr_to_scan)
iput(inode);
spin_lock(&inode_lock);
spin_lock(&inode_lru_lock);
+ spin_lock(&inode->i_lock);
/*
* if we can't reclaim this inod immediately, give it
@@ -699,12 +706,14 @@ static void prune_icache(int nr_to_scan)
*/
if (!can_unuse(inode)) {
list_move(&inode->i_lru, &inode_lru);
+ spin_unlock(&inode->i_lock);
continue;
}
- } else
- spin_unlock(&inode->i_lock);
+ }
+
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
+ spin_unlock(&inode->i_lock);
/*
* move the inode off the IO lists and LRU once
@@ -761,7 +770,7 @@ static struct shrinker icache_shrinker = {
static void __wait_on_freeing_inode(struct inode *inode);
/*
- * Called with the inode lock held.
+ * Returns with inode->i_lock held.
* NOTE: we are not increasing the inode-refcount, you must call iref_locked()
* by hand after calling find_inode now! This simplifies iunique and won't
* add any additional branch in the common code.
@@ -779,8 +788,11 @@ repeat:
hlist_bl_for_each_entry(inode, node, &b->head, i_hash) {
if (inode->i_sb != sb)
continue;
- if (!test(inode, data))
+ spin_lock(&inode->i_lock);
+ if (!test(inode, data)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
spin_unlock_bucket(b);
__wait_on_freeing_inode(inode);
@@ -810,6 +822,7 @@ repeat:
continue;
if (inode->i_sb != sb)
continue;
+ spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
spin_unlock_bucket(b);
__wait_on_freeing_inode(inode);
@@ -884,9 +897,9 @@ struct inode *new_inode(struct super_block *sb)
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
- __inode_add_to_lists(sb, NULL, inode);
inode->i_ino = ++last_ino;
inode->i_state = 0;
+ __inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode_lock);
}
return inode;
@@ -953,8 +966,8 @@ static struct inode *get_new_inode(struct super_block *sb,
if (set(inode, data))
goto set_failed;
- __inode_add_to_lists(sb, b, inode);
inode->i_state = I_NEW;
+ __inode_add_to_lists(sb, b, inode);
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -968,7 +981,6 @@ static struct inode *get_new_inode(struct super_block *sb,
* us. Use the old inode instead of the one we just
* allocated.
*/
- spin_lock(&old->i_lock);
iref_locked(old);
spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
@@ -1017,7 +1029,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
* us. Use the old inode instead of the one we just
* allocated.
*/
- spin_lock(&old->i_lock);
iref_locked(old);
spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
@@ -1071,17 +1082,19 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
- spin_lock(&inode->i_lock);
iref_locked(inode);
spin_unlock(&inode->i_lock);
- } else
+ } else {
+ spin_unlock(&inode->i_lock);
/*
* Handle the case where s_op->clear_inode is not been
* called yet, and somebody is calling igrab
* while the inode is getting freed.
*/
inode = NULL;
+ }
spin_unlock(&inode_lock);
return inode;
}
@@ -1116,7 +1129,6 @@ static struct inode *ifind(struct super_block *sb,
spin_lock(&inode_lock);
inode = find_inode(sb, b, test, data);
if (inode) {
- spin_lock(&inode->i_lock);
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -1152,7 +1164,6 @@ static struct inode *ifind_fast(struct super_block *sb,
spin_lock(&inode_lock);
inode = find_inode_fast(sb, b, ino);
if (inode) {
- spin_lock(&inode->i_lock);
iref_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -1318,6 +1329,10 @@ int insert_inode_locked(struct inode *inode)
ino_t ino = inode->i_ino;
struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+ /*
+ * Nobody else can see the new inode yet, so it is safe to set flags
+ * without locking here.
+ */
inode->i_state |= I_NEW;
while (1) {
struct hlist_bl_node *node;
@@ -1329,8 +1344,11 @@ int insert_inode_locked(struct inode *inode)
continue;
if (old->i_sb != sb)
continue;
- if (old->i_state & (I_FREEING|I_WILL_FREE))
+ spin_lock(&old->i_lock);
+ if (old->i_state & (I_FREEING|I_WILL_FREE)) {
+ spin_unlock(&old->i_lock);
continue;
+ }
break;
}
if (likely(!node)) {
@@ -1339,7 +1357,6 @@ int insert_inode_locked(struct inode *inode)
spin_unlock(&inode_lock);
return 0;
}
- spin_lock(&old->i_lock);
iref_locked(old);
spin_unlock(&old->i_lock);
spin_unlock_bucket(b);
@@ -1373,8 +1390,11 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
continue;
if (!test(old, data))
continue;
- if (old->i_state & (I_FREEING|I_WILL_FREE))
+ spin_lock(&old->i_lock);
+ if (old->i_state & (I_FREEING|I_WILL_FREE)) {
+ spin_unlock(&old->i_lock);
continue;
+ }
break;
}
if (likely(!node)) {
@@ -1383,7 +1403,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
spin_unlock(&inode_lock);
return 0;
}
- spin_lock(&old->i_lock);
iref_locked(old);
spin_unlock(&old->i_lock);
spin_unlock_bucket(b);
@@ -1433,6 +1452,8 @@ static void iput_final(struct inode *inode)
struct backing_dev_info *bdi = inode_to_bdi(inode);
int drop;
+ assert_spin_locked(&inode->i_lock);
+
if (op && op->drop_inode)
drop = op->drop_inode(inode);
else
@@ -1443,22 +1464,28 @@ static void iput_final(struct inode *inode)
inode->i_state |= I_REFERENCED;
if (!(inode->i_state & (I_DIRTY|I_SYNC)) &&
list_empty(&inode->i_lru)) {
+ spin_unlock(&inode->i_lock);
inode_lru_list_add(inode);
+ return;
}
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
write_inode_now(inode, 1);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
__remove_inode_hash(inode);
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
+ spin_unlock(&inode->i_lock);
/*
* move the inode off the IO lists and LRU once I_FREEING is set so
@@ -1495,13 +1522,12 @@ static void iput_final(struct inode *inode)
void iput(struct inode *inode)
{
if (inode) {
- BUG_ON(inode->i_state & I_CLEAR);
-
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ BUG_ON(inode->i_state & I_CLEAR);
+
inode->i_ref--;
if (inode->i_ref == 0) {
- spin_unlock(&inode->i_lock);
iput_final(inode);
return;
}
@@ -1687,6 +1713,8 @@ EXPORT_SYMBOL(inode_wait);
* wake_up_inode() after removing from the hash list will DTRT.
*
* This is called with inode_lock held.
+ *
+ * Called with i_lock held and returns with it dropped.
*/
static void __wait_on_freeing_inode(struct inode *inode)
{
@@ -1694,6 +1722,7 @@ static void __wait_on_freeing_inode(struct inode *inode)
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
schedule();
finish_wait(wq, &wait.wait);
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index 84a45d1..c51f0e8 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -27,6 +27,7 @@
#include "page.h"
#include "mdt.h"
+/* XXX: what protects i_state? */
int nilfs_init_gcdat_inode(struct the_nilfs *nilfs)
{
struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 3389ff0..8a05213 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -249,8 +249,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
/*
* If the inode is not referenced, the inode cannot have any
@@ -258,9 +261,10 @@ void fsnotify_unmount_inodes(struct list_head *list)
* actually evict all unreferenced inodes from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- spin_lock(&inode->i_lock);
- if (!inode->i_ref)
+ if (!inode->i_ref) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
need_iput_tmp = need_iput;
need_iput = NULL;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b7cbc41..c7b5fc6 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -899,18 +899,20 @@ static void add_dquot_ref(struct super_block *sb, int type)
spin_lock(&inode_lock);
spin_lock(&sb->s_inodes_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+ spin_lock(&inode->i_lock);
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ !atomic_read(&inode->i_writecount) ||
+ !dqinit_needed(inode, type)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
#ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1;
#endif
- if (!atomic_read(&inode->i_writecount))
- continue;
- if (!dqinit_needed(inode, type))
- continue;
iref_locked(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inodes_lock);
spin_unlock(&inode_lock);
--
1.7.1
^ permalink raw reply related
* [PATCH 16/18] fs: Make iunique independent of inode_lock
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Nick Piggin <npiggin@suse.de>
Before removing the inode_lock, the iunique counter needs to be made
independent of the inode_lock. Add a new lock to protect the iunique
counter and nest it inside the inode_lock to provide the same
protection that the inode_lock currently provides.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/inode.c | 33 ++++++++++++++++++++++++++++-----
1 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 13e1325..4ec360e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1070,6 +1070,30 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
return inode;
}
+/*
+ * search the inode cache for a matching inode number.
+ * If we find one, then the inode number we are trying to
+ * allocate is not unique and so we should not use it.
+ *
+ * Returns 1 if the inode number is unique, 0 if it is not.
+ */
+static int test_inode_iunique(struct super_block * sb, unsigned long ino)
+{
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+ struct hlist_bl_node *node;
+ struct inode *inode;
+
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(inode, node, &b->head, i_hash) {
+ if (inode->i_ino == ino && inode->i_sb == sb) {
+ spin_unlock_bucket(b);
+ return 0;
+ }
+ }
+ spin_unlock_bucket(b);
+ return 1;
+}
+
/**
* iunique - get a unique inode number
* @sb: superblock
@@ -1091,19 +1115,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
* error if st_ino won't fit in target struct field. Use 32bit counter
* here to attempt to avoid that.
*/
+ static DEFINE_SPINLOCK(unique_lock);
static unsigned int counter;
- struct inode *inode;
- struct inode_hash_bucket *b;
ino_t res;
spin_lock(&inode_lock);
+ spin_lock(&unique_lock);
do {
if (counter <= max_reserved)
counter = max_reserved + 1;
res = counter++;
- b = inode_hashtable + hash(sb, res);
- inode = find_inode_fast(sb, b, res);
- } while (inode != NULL);
+ } while (!test_inode_iunique(sb, res));
+ spin_unlock(&unique_lock);
spin_unlock(&inode_lock);
return res;
--
1.7.1
^ permalink raw reply related
* [PATCH 15/18] fs: introduce a per-cpu last_ino allocator
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Eric Dumazet <eric.dumazet@gmail.com>
new_inode() dirties a contended cache line to get increasing
inode numbers. This limits performance on workloads that cause
significant parallel inode allocation.
Solve this problem by using a per_cpu variable fed by the shared
last_ino in batches of 1024 allocations. This reduces contention on
the shared last_ino, and give same spreading ino numbers than before
(i.e. same wraparound after 2^32 allocations).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/inode.c | 45 ++++++++++++++++++++++++++++++++++++++-------
1 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index d3bd08a..13e1325 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -870,6 +870,43 @@ void inode_add_to_lists(struct super_block *sb, struct inode *inode)
}
EXPORT_SYMBOL_GPL(inode_add_to_lists);
+/*
+ * Each cpu owns a range of LAST_INO_BATCH numbers.
+ * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
+ * to renew the exhausted range.
+ *
+ * This does not significantly increase overflow rate because every CPU can
+ * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
+ * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
+ * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
+ * overflow rate by 2x, which does not seem too significant.
+ *
+ * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
+ * error if st_ino won't fit in target struct field. Use 32bit counter
+ * here to attempt to avoid that.
+ */
+#define LAST_INO_BATCH 1024
+static DEFINE_PER_CPU(unsigned int, last_ino);
+
+static unsigned int last_ino_get(void)
+{
+ unsigned int *p = &get_cpu_var(last_ino);
+ unsigned int res = *p;
+
+#ifdef CONFIG_SMP
+ if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
+ static atomic_t shared_last_ino;
+ int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
+
+ res = next - LAST_INO_BATCH;
+ }
+#endif
+
+ *p = ++res;
+ put_cpu_var(last_ino);
+ return res;
+}
+
/**
* new_inode - obtain an inode
* @sb: superblock
@@ -884,12 +921,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
*/
struct inode *new_inode(struct super_block *sb)
{
- /*
- * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
- * error if st_ino won't fit in target struct field. Use 32bit counter
- * here to attempt to avoid that.
- */
- static unsigned int last_ino;
struct inode *inode;
spin_lock_prefetch(&inode_lock);
@@ -897,7 +928,7 @@ struct inode *new_inode(struct super_block *sb)
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
- inode->i_ino = ++last_ino;
+ inode->i_ino = last_ino_get();
inode->i_state = 0;
__inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode_lock);
--
1.7.1
^ permalink raw reply related
* [PATCH 18/18] fs: Reduce inode I_FREEING and factor inode disposal
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Inode reclaim can push many inodes into the I_FREEING state before
it actually frees them. During the time it gathers these inodes, it
can call iput(), invalidate_mapping_pages, be preempted, etc. As a
result, holding inodes in I_FREEING can cause pauses.
After the inode scalability work, there is not a big reason to batch
up inodes to reclaim them, so we can dispose them as they are found
from the LRU. With similar reasoning, we can do the same during
unmount, completely removing the need for the dispose_list()
function.
Further, iput_final() does the same inode cleanup as reclaim and
unmount, so convert them all to use a single function for destroying
inodes. This is written such that the callers can optimise list
removals to avoid unneccessary lock round trips when removing inodes
from lists.
Based on a patch originally from Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/inode.c | 150 +++++++++++++++++++++++++-----------------------------------
1 files changed, 63 insertions(+), 87 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index c778ec4..03ddd19 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -29,6 +29,8 @@
/*
* Locking rules.
*
+ * inode->i_lock is *always* the innermost lock.
+ *
* inode->i_lock protects:
* i_ref i_state
* inode_hash_bucket lock protects:
@@ -46,8 +48,15 @@
*
* sb inode lock
* inode_lru_lock
- * wb->b_lock
- * inode->i_lock
+ * wb->b_lock
+ * inode->i_lock
+ *
+ * wb->b_lock
+ * sb_lock (pin sb for writeback)
+ * inode->i_lock
+ *
+ * inode_lru
+ * inode->i_lock
*/
/*
* This is needed for the following functions:
@@ -434,13 +443,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
EXPORT_SYMBOL(__insert_inode_hash);
/**
- * __remove_inode_hash - remove an inode from the hash
+ * remove_inode_hash - remove an inode from the hash
* @inode: inode to unhash
*
- * Remove an inode from the superblock. inode->i_lock must be
- * held.
+ * Remove an inode from the superblock.
*/
-static void __remove_inode_hash(struct inode *inode)
+void remove_inode_hash(struct inode *inode)
{
struct inode_hash_bucket *b;
@@ -449,17 +457,6 @@ static void __remove_inode_hash(struct inode *inode)
hlist_bl_del_init(&inode->i_hash);
spin_unlock_bucket(b);
}
-
-/**
- * remove_inode_hash - remove an inode from the hash
- * @inode: inode to unhash
- *
- * Remove an inode from the superblock.
- */
-void remove_inode_hash(struct inode *inode)
-{
- __remove_inode_hash(inode);
-}
EXPORT_SYMBOL(remove_inode_hash);
void end_writeback(struct inode *inode)
@@ -494,37 +491,53 @@ static void evict(struct inode *inode)
}
/*
- * dispose_list - dispose of the contents of a local list
- * @head: the head of the list to free
+ * Free the inode passed in, removing it from the lists it is still connected
+ * to but avoiding unnecessary lock round-trips for the lists it is no longer
+ * on.
*
- * Dispose-list gets a local list with local inodes in it, so it doesn't
- * need to worry about list corruption and SMP locks.
+ * An inode must already be marked I_FREEING so that we avoid the inode being
+ * moved back onto lists if we race with other code that manipulates the lists
+ * (e.g. writeback_single_inode).
*/
-static void dispose_list(struct list_head *head)
+static void dispose_one_inode(struct inode *inode)
{
- while (!list_empty(head)) {
- struct inode *inode;
+ BUG_ON(!(inode->i_state & I_FREEING));
- inode = list_first_entry(head, struct inode, i_lru);
- list_del_init(&inode->i_lru);
+ /*
+ * move the inode off the IO lists and LRU once
+ * I_FREEING is set so that it won't get moved back on
+ * there if it is dirty.
+ */
+ if (!list_empty(&inode->i_io)) {
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
- evict(inode);
+ spin_lock(&bdi->wb.b_lock);
+ list_del_init(&inode->i_io);
+ spin_unlock(&bdi->wb.b_lock);
+ }
+
+ if (!list_empty(&inode->i_lru))
+ inode_lru_list_del(inode);
- __remove_inode_hash(inode);
+ if (!list_empty(&inode->i_sb_list)) {
spin_lock(&inode->i_sb->s_inodes_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&inode->i_sb->s_inodes_lock);
-
- wake_up_inode(inode);
- destroy_inode(inode);
}
+
+ evict(inode);
+
+ remove_inode_hash(inode);
+ wake_up_inode(inode);
+ BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
+ destroy_inode(inode);
}
+
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct super_block *sb, struct list_head *head,
- struct list_head *dispose)
+static int invalidate_list(struct super_block *sb, struct list_head *head)
{
struct list_head *next;
int busy = 0;
@@ -553,30 +566,22 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
}
invalidate_inode_buffers(inode);
if (!inode->i_ref) {
- struct backing_dev_info *bdi = inode_to_bdi(inode);
-
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- /*
- * move the inode off the IO lists and LRU once
- * I_FREEING is set so that it won't get moved back on
- * there if it is dirty.
- */
- spin_lock(&bdi->wb.b_lock);
- list_del_init(&inode->i_io);
- spin_unlock(&bdi->wb.b_lock);
+ /* save a lock round trip by removing the inode here. */
+ list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb->s_inodes_lock);
- spin_lock(&inode_lru_lock);
- list_move(&inode->i_lru, dispose);
- spin_unlock(&inode_lru_lock);
+ dispose_one_inode(inode);
- percpu_counter_dec(&nr_inodes_unused);
+ spin_lock(&sb->s_inodes_lock);
continue;
}
spin_unlock(&inode->i_lock);
busy = 1;
+
}
return busy;
}
@@ -592,15 +597,12 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
int invalidate_inodes(struct super_block *sb)
{
int busy;
- LIST_HEAD(throw_away);
down_write(&iprune_sem);
spin_lock(&sb->s_inodes_lock);
fsnotify_unmount_inodes(&sb->s_inodes);
- busy = invalidate_list(sb, &sb->s_inodes, &throw_away);
+ busy = invalidate_list(sb, &sb->s_inodes);
spin_unlock(&sb->s_inodes_lock);
-
- dispose_list(&throw_away);
up_write(&iprune_sem);
return busy;
@@ -636,7 +638,6 @@ static int can_unuse(struct inode *inode)
*/
static void prune_icache(int nr_to_scan)
{
- LIST_HEAD(freeable);
int nr_scanned;
unsigned long reap = 0;
@@ -644,7 +645,6 @@ static void prune_icache(int nr_to_scan)
spin_lock(&inode_lru_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
- struct backing_dev_info *bdi;
if (list_empty(&inode_lru))
break;
@@ -691,18 +691,15 @@ static void prune_icache(int nr_to_scan)
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- /*
- * move the inode off the IO lists and LRU once
- * I_FREEING is set so that it won't get moved back on
- * there if it is dirty.
- */
- bdi = inode_to_bdi(inode);
- spin_lock(&bdi->wb.b_lock);
- list_del_init(&inode->i_io);
- spin_unlock(&bdi->wb.b_lock);
-
- list_move(&inode->i_lru, &freeable);
+ /* save a lock round trip by removing the inode here. */
+ list_del_init(&inode->i_lru);
percpu_counter_dec(&nr_inodes_unused);
+ spin_unlock(&inode_lru_lock);
+
+ dispose_one_inode(inode);
+ cond_resched();
+
+ spin_lock(&inode_lru_lock);
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -710,7 +707,6 @@ static void prune_icache(int nr_to_scan)
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lru_lock);
- dispose_list(&freeable);
up_read(&iprune_sem);
}
@@ -1449,7 +1445,6 @@ static void iput_final(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
const struct super_operations *op = inode->i_sb->s_op;
- struct backing_dev_info *bdi = inode_to_bdi(inode);
int drop;
assert_spin_locked(&inode->i_lock);
@@ -1475,35 +1470,16 @@ static void iput_final(struct inode *inode)
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode->i_lock);
write_inode_now(inode, 1);
+ remove_inode_hash(inode);
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- __remove_inode_hash(inode);
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- /*
- * move the inode off the IO lists and LRU once I_FREEING is set so
- * that it won't get moved back on there if it is dirty.
- * around.
- */
- spin_lock(&bdi->wb.b_lock);
- list_del_init(&inode->i_io);
- spin_unlock(&bdi->wb.b_lock);
-
- inode_lru_list_del(inode);
-
- spin_lock(&sb->s_inodes_lock);
- list_del_init(&inode->i_sb_list);
- spin_unlock(&sb->s_inodes_lock);
-
- evict(inode);
- remove_inode_hash(inode);
- wake_up_inode(inode);
- BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
- destroy_inode(inode);
+ dispose_one_inode(inode);
}
/**
--
1.7.1
^ permalink raw reply related
* [PATCH 11/18] fs: Introduce per-bucket inode hash locks
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Nick Piggin <npiggin@suse.de>
Protect the inod hash with a single lock is not scalable. Convert
the inode hash to use the new bit-locked hash list implementation
that allows per-bucket locks to be used. This allows us to replace
the global inode_lock with finer grained locking without increasing
the size of the hash table.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/btrfs/inode.c | 2 +-
fs/fs-writeback.c | 2 +-
fs/hfs/hfs_fs.h | 2 +-
fs/hfs/inode.c | 2 +-
fs/hfsplus/hfsplus_fs.h | 2 +-
fs/hfsplus/inode.c | 2 +-
fs/inode.c | 165 ++++++++++++++++++++++++++++++----------------
fs/nilfs2/gcinode.c | 22 ++++---
fs/nilfs2/segment.c | 2 +-
fs/nilfs2/the_nilfs.h | 2 +-
fs/reiserfs/xattr.c | 2 +-
include/linux/fs.h | 3 +-
mm/shmem.c | 4 +-
13 files changed, 132 insertions(+), 80 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9f04478..f908a12 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3855,7 +3855,7 @@ again:
p = &root->inode_tree.rb_node;
parent = NULL;
- if (hlist_unhashed(&inode->i_hash))
+ if (hlist_bl_unhashed(&inode->i_hash))
return;
spin_lock(&root->inode_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ec7a689..d63ab47 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -959,7 +959,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* dirty list. Add blockdev inodes as well.
*/
if (!S_ISBLK(inode->i_mode)) {
- if (hlist_unhashed(&inode->i_hash))
+ if (hlist_bl_unhashed(&inode->i_hash))
goto out;
}
if (inode->i_state & I_FREEING)
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 4f55651..24591be 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -148,7 +148,7 @@ struct hfs_sb_info {
int fs_div;
- struct hlist_head rsrc_inodes;
+ struct hlist_bl_head rsrc_inodes;
};
#define HFS_FLG_BITMAP_DIRTY 0
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 397b7ad..7778298 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
HFS_I(inode)->rsrc_inode = dir;
HFS_I(dir)->rsrc_inode = inode;
igrab(dir);
- hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes);
+ hlist_bl_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes);
mark_inode_dirty(inode);
out:
d_add(dentry, inode);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index dc856be..499f5a5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -144,7 +144,7 @@ struct hfsplus_sb_info {
unsigned long flags;
- struct hlist_head rsrc_inodes;
+ struct hlist_bl_head rsrc_inodes;
};
#define HFSPLUS_SB_WRITEBACKUP 0x0001
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index c5a979d..b755cf0 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -202,7 +202,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
HFSPLUS_I(inode).rsrc_inode = dir;
HFSPLUS_I(dir).rsrc_inode = inode;
igrab(dir);
- hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes);
+ hlist_bl_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes);
mark_inode_dirty(inode);
out:
d_add(dentry, inode);
diff --git a/fs/inode.c b/fs/inode.c
index 32da15e..3c07719 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,12 +24,20 @@
#include <linux/mount.h>
#include <linux/async.h>
#include <linux/posix_acl.h>
+#include <linux/bit_spinlock.h>
/*
* Locking rules.
*
* inode->i_lock protects:
* i_ref
+ * inode_hash_bucket lock protects:
+ * inode hash table, i_hash
+ *
+ * Lock orders
+ * inode_lock
+ * inode hash bucket lock
+ * inode->i_lock
*/
/*
@@ -80,7 +88,22 @@ static unsigned int i_hash_shift __read_mostly;
*/
LIST_HEAD(inode_unused);
-static struct hlist_head *inode_hashtable __read_mostly;
+
+struct inode_hash_bucket {
+ struct hlist_bl_head head;
+};
+
+static inline void spin_lock_bucket(struct inode_hash_bucket *b)
+{
+ bit_spin_lock(0, (unsigned long *)b);
+}
+
+static inline void spin_unlock_bucket(struct inode_hash_bucket *b)
+{
+ __bit_spin_unlock(0, (unsigned long *)b);
+}
+
+static struct inode_hash_bucket *inode_hashtable __read_mostly;
/*
* A simple spinlock to protect the list manipulations.
@@ -295,7 +318,7 @@ void destroy_inode(struct inode *inode)
void inode_init_once(struct inode *inode)
{
memset(inode, 0, sizeof(*inode));
- INIT_HLIST_NODE(&inode->i_hash);
+ init_hlist_bl_node(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_io);
@@ -375,9 +398,13 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
*/
void __insert_inode_hash(struct inode *inode, unsigned long hashval)
{
- struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
+ struct inode_hash_bucket *b;
+
+ b = inode_hashtable + hash(inode->i_sb, hashval);
spin_lock(&inode_lock);
- hlist_add_head(&inode->i_hash, head);
+ spin_lock_bucket(b);
+ hlist_bl_add_head(&inode->i_hash, &b->head);
+ spin_unlock_bucket(b);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(__insert_inode_hash);
@@ -391,7 +418,12 @@ EXPORT_SYMBOL(__insert_inode_hash);
*/
static void __remove_inode_hash(struct inode *inode)
{
- hlist_del_init(&inode->i_hash);
+ struct inode_hash_bucket *b;
+
+ b = inode_hashtable + hash(inode->i_sb, inode->i_ino);
+ spin_lock_bucket(b);
+ hlist_bl_del_init(&inode->i_hash);
+ spin_unlock_bucket(b);
}
/**
@@ -403,7 +435,7 @@ static void __remove_inode_hash(struct inode *inode)
void remove_inode_hash(struct inode *inode)
{
spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
+ __remove_inode_hash(inode);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(remove_inode_hash);
@@ -663,25 +695,28 @@ static void __wait_on_freeing_inode(struct inode *inode);
* add any additional branch in the common code.
*/
static struct inode *find_inode(struct super_block *sb,
- struct hlist_head *head,
+ struct inode_hash_bucket *b,
int (*test)(struct inode *, void *),
void *data)
{
- struct hlist_node *node;
+ struct hlist_bl_node *node;
struct inode *inode = NULL;
repeat:
- hlist_for_each_entry(inode, node, head, i_hash) {
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(inode, node, &b->head, i_hash) {
if (inode->i_sb != sb)
continue;
if (!test(inode, data))
continue;
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
+ spin_unlock_bucket(b);
__wait_on_freeing_inode(inode);
goto repeat;
}
break;
}
+ spin_unlock_bucket(b);
return node ? inode : NULL;
}
@@ -690,33 +725,40 @@ repeat:
* iget_locked for details.
*/
static struct inode *find_inode_fast(struct super_block *sb,
- struct hlist_head *head, unsigned long ino)
+ struct inode_hash_bucket *b,
+ unsigned long ino)
{
- struct hlist_node *node;
+ struct hlist_bl_node *node;
struct inode *inode = NULL;
repeat:
- hlist_for_each_entry(inode, node, head, i_hash) {
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(inode, node, &b->head, i_hash) {
if (inode->i_ino != ino)
continue;
if (inode->i_sb != sb)
continue;
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
+ spin_unlock_bucket(b);
__wait_on_freeing_inode(inode);
goto repeat;
}
break;
}
+ spin_unlock_bucket(b);
return node ? inode : NULL;
}
static inline void
-__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
+__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
struct inode *inode)
{
list_add(&inode->i_sb_list, &sb->s_inodes);
- if (head)
- hlist_add_head(&inode->i_hash, head);
+ if (b) {
+ spin_lock_bucket(b);
+ hlist_bl_add_head(&inode->i_hash, &b->head);
+ spin_unlock_bucket(b);
+ }
}
/**
@@ -733,10 +775,10 @@ __inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
*/
void inode_add_to_lists(struct super_block *sb, struct inode *inode)
{
- struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, inode->i_ino);
spin_lock(&inode_lock);
- __inode_add_to_lists(sb, head, inode);
+ __inode_add_to_lists(sb, b, inode);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(inode_add_to_lists);
@@ -819,7 +861,7 @@ EXPORT_SYMBOL(unlock_new_inode);
* -- rmk@arm.uk.linux.org
*/
static struct inode *get_new_inode(struct super_block *sb,
- struct hlist_head *head,
+ struct inode_hash_bucket *b,
int (*test)(struct inode *, void *),
int (*set)(struct inode *, void *),
void *data)
@@ -832,12 +874,12 @@ static struct inode *get_new_inode(struct super_block *sb,
spin_lock(&inode_lock);
/* We released the lock, so.. */
- old = find_inode(sb, head, test, data);
+ old = find_inode(sb, b, test, data);
if (!old) {
if (set(inode, data))
goto set_failed;
- __inode_add_to_lists(sb, head, inode);
+ __inode_add_to_lists(sb, b, inode);
inode->i_state = I_NEW;
spin_unlock(&inode_lock);
@@ -873,7 +915,7 @@ set_failed:
* comment at iget_locked for details.
*/
static struct inode *get_new_inode_fast(struct super_block *sb,
- struct hlist_head *head, unsigned long ino)
+ struct inode_hash_bucket *b, unsigned long ino)
{
struct inode *inode;
@@ -883,10 +925,10 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
spin_lock(&inode_lock);
/* We released the lock, so.. */
- old = find_inode_fast(sb, head, ino);
+ old = find_inode_fast(sb, b, ino);
if (!old) {
inode->i_ino = ino;
- __inode_add_to_lists(sb, head, inode);
+ __inode_add_to_lists(sb, b, inode);
inode->i_state = I_NEW;
spin_unlock(&inode_lock);
@@ -935,7 +977,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
*/
static unsigned int counter;
struct inode *inode;
- struct hlist_head *head;
+ struct inode_hash_bucket *b;
ino_t res;
spin_lock(&inode_lock);
@@ -943,8 +985,8 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
if (counter <= max_reserved)
counter = max_reserved + 1;
res = counter++;
- head = inode_hashtable + hash(sb, res);
- inode = find_inode_fast(sb, head, res);
+ b = inode_hashtable + hash(sb, res);
+ inode = find_inode_fast(sb, b, res);
} while (inode != NULL);
spin_unlock(&inode_lock);
@@ -991,13 +1033,14 @@ EXPORT_SYMBOL(igrab);
* Note, @test is called with the inode_lock held, so can't sleep.
*/
static struct inode *ifind(struct super_block *sb,
- struct hlist_head *head, int (*test)(struct inode *, void *),
+ struct inode_hash_bucket *b,
+ int (*test)(struct inode *, void *),
void *data, const int wait)
{
struct inode *inode;
spin_lock(&inode_lock);
- inode = find_inode(sb, head, test, data);
+ inode = find_inode(sb, b, test, data);
if (inode) {
spin_lock(&inode->i_lock);
iref_locked(inode);
@@ -1027,12 +1070,13 @@ static struct inode *ifind(struct super_block *sb,
* Otherwise NULL is returned.
*/
static struct inode *ifind_fast(struct super_block *sb,
- struct hlist_head *head, unsigned long ino)
+ struct inode_hash_bucket *b,
+ unsigned long ino)
{
struct inode *inode;
spin_lock(&inode_lock);
- inode = find_inode_fast(sb, head, ino);
+ inode = find_inode_fast(sb, b, ino);
if (inode) {
spin_lock(&inode->i_lock);
iref_locked(inode);
@@ -1069,9 +1113,9 @@ static struct inode *ifind_fast(struct super_block *sb,
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
- return ifind(sb, head, test, data, 0);
+ return ifind(sb, b, test, data, 0);
}
EXPORT_SYMBOL(ilookup5_nowait);
@@ -1097,9 +1141,9 @@ EXPORT_SYMBOL(ilookup5_nowait);
struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
- return ifind(sb, head, test, data, 1);
+ return ifind(sb, b, test, data, 1);
}
EXPORT_SYMBOL(ilookup5);
@@ -1119,9 +1163,9 @@ EXPORT_SYMBOL(ilookup5);
*/
struct inode *ilookup(struct super_block *sb, unsigned long ino)
{
- struct hlist_head *head = inode_hashtable + hash(sb, ino);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
- return ifind_fast(sb, head, ino);
+ return ifind_fast(sb, b, ino);
}
EXPORT_SYMBOL(ilookup);
@@ -1149,17 +1193,17 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *),
int (*set)(struct inode *, void *), void *data)
{
- struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
struct inode *inode;
- inode = ifind(sb, head, test, data, 1);
+ inode = ifind(sb, b, test, data, 1);
if (inode)
return inode;
/*
* get_new_inode() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
- return get_new_inode(sb, head, test, set, data);
+ return get_new_inode(sb, b, test, set, data);
}
EXPORT_SYMBOL(iget5_locked);
@@ -1180,17 +1224,17 @@ EXPORT_SYMBOL(iget5_locked);
*/
struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
- struct hlist_head *head = inode_hashtable + hash(sb, ino);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
struct inode *inode;
- inode = ifind_fast(sb, head, ino);
+ inode = ifind_fast(sb, b, ino);
if (inode)
return inode;
/*
* get_new_inode_fast() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
- return get_new_inode_fast(sb, head, ino);
+ return get_new_inode_fast(sb, b, ino);
}
EXPORT_SYMBOL(iget_locked);
@@ -1198,14 +1242,15 @@ int insert_inode_locked(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
ino_t ino = inode->i_ino;
- struct hlist_head *head = inode_hashtable + hash(sb, ino);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
inode->i_state |= I_NEW;
while (1) {
- struct hlist_node *node;
+ struct hlist_bl_node *node;
struct inode *old = NULL;
spin_lock(&inode_lock);
- hlist_for_each_entry(old, node, head, i_hash) {
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
if (old->i_ino != ino)
continue;
if (old->i_sb != sb)
@@ -1215,16 +1260,18 @@ int insert_inode_locked(struct inode *inode)
break;
}
if (likely(!node)) {
- hlist_add_head(&inode->i_hash, head);
+ hlist_bl_add_head(&inode->i_hash, &b->head);
+ spin_unlock_bucket(b);
spin_unlock(&inode_lock);
return 0;
}
spin_lock(&old->i_lock);
iref_locked(old);
spin_unlock(&old->i_lock);
+ spin_unlock_bucket(b);
spin_unlock(&inode_lock);
wait_on_inode(old);
- if (unlikely(!hlist_unhashed(&old->i_hash))) {
+ if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
iput(old);
return -EBUSY;
}
@@ -1237,16 +1284,17 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
struct super_block *sb = inode->i_sb;
- struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
inode->i_state |= I_NEW;
while (1) {
- struct hlist_node *node;
+ struct hlist_bl_node *node;
struct inode *old = NULL;
spin_lock(&inode_lock);
- hlist_for_each_entry(old, node, head, i_hash) {
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
if (old->i_sb != sb)
continue;
if (!test(old, data))
@@ -1256,16 +1304,18 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
break;
}
if (likely(!node)) {
- hlist_add_head(&inode->i_hash, head);
+ hlist_bl_add_head(&inode->i_hash, &b->head);
+ spin_unlock_bucket(b);
spin_unlock(&inode_lock);
return 0;
}
spin_lock(&old->i_lock);
iref_locked(old);
spin_unlock(&old->i_lock);
+ spin_unlock_bucket(b);
spin_unlock(&inode_lock);
wait_on_inode(old);
- if (unlikely(!hlist_unhashed(&old->i_hash))) {
+ if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
iput(old);
return -EBUSY;
}
@@ -1288,7 +1338,7 @@ EXPORT_SYMBOL(generic_delete_inode);
*/
int generic_drop_inode(struct inode *inode)
{
- return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
+ return !inode->i_nlink || hlist_bl_unhashed(&inode->i_hash);
}
EXPORT_SYMBOL_GPL(generic_drop_inode);
@@ -1331,7 +1381,6 @@ static void iput_final(struct inode *inode)
spin_lock(&inode_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- hlist_del_init(&inode->i_hash);
__remove_inode_hash(inode);
}
list_del_init(&inode->i_io);
@@ -1599,7 +1648,7 @@ void __init inode_init_early(void)
inode_hashtable =
alloc_large_system_hash("Inode-cache",
- sizeof(struct hlist_head),
+ sizeof(struct inode_hash_bucket),
ihash_entries,
14,
HASH_EARLY,
@@ -1608,7 +1657,7 @@ void __init inode_init_early(void)
0);
for (loop = 0; loop < (1 << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&inode_hashtable[loop].head);
}
@@ -1633,7 +1682,7 @@ void __init inode_init(void)
inode_hashtable =
alloc_large_system_hash("Inode-cache",
- sizeof(struct hlist_head),
+ sizeof(struct inode_hash_bucket),
ihash_entries,
14,
0,
@@ -1642,7 +1691,7 @@ void __init inode_init(void)
0);
for (loop = 0; loop < (1 << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&inode_hashtable[loop].head);
}
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index bed3a78..ce7344e 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -196,13 +196,13 @@ int nilfs_init_gccache(struct the_nilfs *nilfs)
INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
nilfs->ns_gc_inodes_h =
- kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE,
+ kmalloc(sizeof(struct hlist_bl_head) * NILFS_GCINODE_HASH_SIZE,
GFP_NOFS);
if (nilfs->ns_gc_inodes_h == NULL)
return -ENOMEM;
for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++)
- INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]);
+ INIT_HLIST_BL_HEAD(&nilfs->ns_gc_inodes_h[loop]);
return 0;
}
@@ -254,18 +254,18 @@ static unsigned long ihash(ino_t ino, __u64 cno)
*/
struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno)
{
- struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno);
- struct hlist_node *node;
+ struct hlist_bl_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno);
+ struct hlist_bl_node *node;
struct inode *inode;
- hlist_for_each_entry(inode, node, head, i_hash) {
+ hlist_bl_for_each_entry(inode, node, head, i_hash) {
if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno)
return inode;
}
inode = alloc_gcinode(nilfs, ino, cno);
if (likely(inode)) {
- hlist_add_head(&inode->i_hash, head);
+ hlist_bl_add_head(&inode->i_hash, head);
list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
}
return inode;
@@ -284,16 +284,18 @@ void nilfs_clear_gcinode(struct inode *inode)
*/
void nilfs_remove_all_gcinode(struct the_nilfs *nilfs)
{
- struct hlist_head *head = nilfs->ns_gc_inodes_h;
- struct hlist_node *node, *n;
+ struct hlist_bl_head *head = nilfs->ns_gc_inodes_h;
+ struct hlist_bl_node *node;
struct inode *inode;
int loop;
for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) {
- hlist_for_each_entry_safe(inode, node, n, head, i_hash) {
- hlist_del_init(&inode->i_hash);
+restart:
+ hlist_bl_for_each_entry(inode, node, head, i_hash) {
+ hlist_bl_del_init(&inode->i_hash);
list_del_init(&NILFS_I(inode)->i_dirty);
nilfs_clear_gcinode(inode); /* might sleep */
+ goto restart;
}
}
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9fd051a..038251c 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2452,7 +2452,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
list_for_each_entry_safe(ii, n, head, i_dirty) {
if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
continue;
- hlist_del_init(&ii->vfs_inode.i_hash);
+ hlist_bl_del_init(&ii->vfs_inode.i_hash);
list_del_init(&ii->i_dirty);
nilfs_clear_gcinode(&ii->vfs_inode);
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f785a7b..1ab441a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -167,7 +167,7 @@ struct the_nilfs {
/* GC inode list and hash table head */
struct list_head ns_gc_inodes;
- struct hlist_head *ns_gc_inodes_h;
+ struct hlist_bl_head *ns_gc_inodes_h;
/* Disk layout information (static) */
unsigned int ns_blocksize_bits;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c4cf27..ea2f55c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -424,7 +424,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
- if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
+ if (hlist_bl_unhashed(&inode->i_hash) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1162c10..34f983f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -383,6 +383,7 @@ struct inodes_stat_t {
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fiemap.h>
+#include <linux/rculist_bl.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
@@ -724,7 +725,7 @@ struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))
struct inode {
- struct hlist_node i_hash;
+ struct hlist_bl_node i_hash;
struct list_head i_io; /* backing dev IO list */
struct list_head i_lru; /* backing dev IO list */
struct list_head i_sb_list;
diff --git a/mm/shmem.c b/mm/shmem.c
index 4daaa24..7a2a5de 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2146,7 +2146,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
if (*len < 3)
return 255;
- if (hlist_unhashed(&inode->i_hash)) {
+ if (hlist_bl_unhashed(&inode->i_hash)) {
/* Unfortunately insert_inode_hash is not idempotent,
* so as we hash inodes here rather than at creation
* time, we need a lock to ensure we only try
@@ -2154,7 +2154,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
*/
static DEFINE_SPINLOCK(lock);
spin_lock(&lock);
- if (hlist_unhashed(&inode->i_hash))
+ if (hlist_bl_unhashed(&inode->i_hash))
__insert_inode_hash(inode,
inode->i_ino + inode->i_generation);
spin_unlock(&lock);
--
1.7.1
^ permalink raw reply related
* [PATCH 01/18] kernel: add bl_list
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Nick Piggin <npiggin@suse.de>
Introduce a type of hlist that can support the use of the lowest bit
in the hlist_head. This will be subsequently used to implement
per-bucket bit spinlock for inode hashes.
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
include/linux/list_bl.h | 127 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/poison.h | 2 +
2 files changed, 129 insertions(+), 0 deletions(-)
create mode 100644 include/linux/list_bl.h
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
new file mode 100644
index 0000000..961bc89
--- /dev/null
+++ b/include/linux/list_bl.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_LIST_BL_H
+#define _LINUX_LIST_BL_H
+
+#include <linux/list.h>
+#include <linux/bit_spinlock.h>
+
+/*
+ * Special version of lists, where head of the list has a bit spinlock
+ * in the lowest bit. This is useful for scalable hash tables without
+ * increasing memory footprint overhead.
+ *
+ * For modification operations, the 0 bit of hlist_bl_head->first
+ * pointer must be set.
+ */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#define LIST_BL_LOCKMASK 1UL
+#else
+#define LIST_BL_LOCKMASK 0UL
+#endif
+
+#ifdef CONFIG_DEBUG_LIST
+#define LIST_BL_BUG_ON(x) BUG_ON(x)
+#else
+#define LIST_BL_BUG_ON(x)
+#endif
+
+
+struct hlist_bl_head {
+ struct hlist_bl_node *first;
+};
+
+struct hlist_bl_node {
+ struct hlist_bl_node *next, **pprev;
+};
+#define INIT_HLIST_BL_HEAD(ptr) \
+ ((ptr)->first = NULL)
+
+static inline void init_hlist_bl_node(struct hlist_bl_node *h)
+{
+ h->next = NULL;
+ h->pprev = NULL;
+}
+
+#define hlist_bl_entry(ptr, type, member) container_of(ptr, type, member)
+
+static inline int hlist_bl_unhashed(const struct hlist_bl_node *h)
+{
+ return !h->pprev;
+}
+
+static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
+{
+ return (struct hlist_bl_node *)
+ ((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+}
+
+static inline void hlist_bl_set_first(struct hlist_bl_head *h,
+ struct hlist_bl_node *n)
+{
+ LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+ LIST_BL_BUG_ON(!bit_spin_is_locked(0, (unsigned long *)&h->first));
+ h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
+}
+
+static inline int hlist_bl_empty(const struct hlist_bl_head *h)
+{
+ return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+}
+
+static inline void hlist_bl_add_head(struct hlist_bl_node *n,
+ struct hlist_bl_head *h)
+{
+ struct hlist_bl_node *first = hlist_bl_first(h);
+
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ n->pprev = &h->first;
+ hlist_bl_set_first(h, n);
+}
+
+static inline void __hlist_bl_del(struct hlist_bl_node *n)
+{
+ struct hlist_bl_node *next = n->next;
+ struct hlist_bl_node **pprev = n->pprev;
+
+ LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+
+ /* pprev may be `first`, so be careful not to lose the lock bit */
+ *pprev = (struct hlist_bl_node *)
+ ((unsigned long)next |
+ ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+ if (next)
+ next->pprev = pprev;
+}
+
+static inline void hlist_bl_del(struct hlist_bl_node *n)
+{
+ __hlist_bl_del(n);
+ n->next = BL_LIST_POISON1;
+ n->pprev = BL_LIST_POISON2;
+}
+
+static inline void hlist_bl_del_init(struct hlist_bl_node *n)
+{
+ if (!hlist_bl_unhashed(n)) {
+ __hlist_bl_del(n);
+ init_hlist_bl_node(n);
+ }
+}
+
+/**
+ * hlist_bl_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_bl_for_each_entry(tpos, pos, head, member) \
+ for (pos = hlist_bl_first(head); \
+ pos && \
+ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
+ pos = pos->next)
+
+#endif
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 2110a81..d367d39 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -22,6 +22,8 @@
#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA)
#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA)
+#define BL_LIST_POISON1 ((void *) 0x00300300 + POISON_POINTER_DELTA)
+#define BL_LIST_POISON2 ((void *) 0x00400400 + POISON_POINTER_DELTA)
/********** include/linux/timer.h **********/
/*
* Magic number "tsta" to indicate a static timer initializer
--
1.7.1
^ permalink raw reply related
* [PATCH 07/18] exofs: use iput() for inode reference count decrements
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Direct modification of the inode reference count is a no-no. Convert
the exofs decrements to call iput() instead of acting directly on
i_count.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/exofs/inode.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index b631ff3..0fb4d4c 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1101,7 +1101,7 @@ static void create_done(struct exofs_io_state *ios, void *p)
set_obj_created(oi);
- atomic_dec(&inode->i_count);
+ iput(inode);
wake_up(&oi->i_wq);
}
@@ -1161,7 +1161,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
ios->cred = oi->i_cred;
ret = exofs_sbi_create(ios);
if (ret) {
- atomic_dec(&inode->i_count);
+ iput(inode);
exofs_put_io_state(ios);
return ERR_PTR(ret);
}
--
1.7.1
^ permalink raw reply related
* [PATCH 10/18] fs: Factor inode hash operations into functions
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Before replacing the inode hash locking with a more scalable
mechanism, factor the removal of the inode from the hashes rather
than open coding it in several places.
Based on a patch originally from Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/inode.c | 100 +++++++++++++++++++++++++++++++++--------------------------
1 files changed, 56 insertions(+), 44 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 5c8a3ea..32da15e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -355,6 +355,59 @@ int iref_read(struct inode *inode)
}
EXPORT_SYMBOL_GPL(iref_read);
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+ unsigned long tmp;
+
+ tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+ L1_CACHE_BYTES;
+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+ return tmp & I_HASHMASK;
+}
+
+/**
+ * __insert_inode_hash - hash an inode
+ * @inode: unhashed inode
+ * @hashval: unsigned long value used to locate this object in the
+ * inode_hashtable.
+ *
+ * Add an inode to the inode hash for this superblock.
+ */
+void __insert_inode_hash(struct inode *inode, unsigned long hashval)
+{
+ struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
+ spin_lock(&inode_lock);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(__insert_inode_hash);
+
+/**
+ * __remove_inode_hash - remove an inode from the hash
+ * @inode: inode to unhash
+ *
+ * Remove an inode from the superblock. inode->i_lock must be
+ * held.
+ */
+static void __remove_inode_hash(struct inode *inode)
+{
+ hlist_del_init(&inode->i_hash);
+}
+
+/**
+ * remove_inode_hash - remove an inode from the hash
+ * @inode: inode to unhash
+ *
+ * Remove an inode from the superblock.
+ */
+void remove_inode_hash(struct inode *inode)
+{
+ spin_lock(&inode_lock);
+ hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(remove_inode_hash);
+
void end_writeback(struct inode *inode)
{
might_sleep();
@@ -402,7 +455,7 @@ static void dispose_list(struct list_head *head)
evict(inode);
spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
+ __remove_inode_hash(inode);
list_del_init(&inode->i_sb_list);
spin_unlock(&inode_lock);
@@ -657,16 +710,6 @@ repeat:
return node ? inode : NULL;
}
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
- unsigned long tmp;
-
- tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
- L1_CACHE_BYTES;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
- return tmp & I_HASHMASK;
-}
-
static inline void
__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
struct inode *inode)
@@ -1231,36 +1274,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
}
EXPORT_SYMBOL(insert_inode_locked4);
-/**
- * __insert_inode_hash - hash an inode
- * @inode: unhashed inode
- * @hashval: unsigned long value used to locate this object in the
- * inode_hashtable.
- *
- * Add an inode to the inode hash for this superblock.
- */
-void __insert_inode_hash(struct inode *inode, unsigned long hashval)
-{
- struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
- spin_lock(&inode_lock);
- hlist_add_head(&inode->i_hash, head);
- spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(__insert_inode_hash);
-
-/**
- * remove_inode_hash - remove an inode from the hash
- * @inode: inode to unhash
- *
- * Remove an inode from the superblock.
- */
-void remove_inode_hash(struct inode *inode)
-{
- spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(remove_inode_hash);
int generic_delete_inode(struct inode *inode)
{
@@ -1319,6 +1332,7 @@ static void iput_final(struct inode *inode)
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
hlist_del_init(&inode->i_hash);
+ __remove_inode_hash(inode);
}
list_del_init(&inode->i_io);
WARN_ON(inode->i_state & I_NEW);
@@ -1337,9 +1351,7 @@ static void iput_final(struct inode *inode)
list_del_init(&inode->i_sb_list);
spin_unlock(&inode_lock);
evict(inode);
- spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_lock);
+ remove_inode_hash(inode);
wake_up_inode(inode);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
destroy_inode(inode);
--
1.7.1
^ permalink raw reply related
* fs: Inode cache scalability V2
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
This patch set is derived from Nick Piggin's VFS scalability tree.
there doesn't appear to be any push to get that tree into shape for
.37, so this is an attempt to get finer grained review of the series
for upstream inclusion. I'm hitting VFS lock contention problems
with XFS on 8-16p machines now, so I need to get this stuff moving.
This patch set is just the basic inode_lock breakup patches plus a
few more simple changes to the inode code. It stops short of
introducing RCU inode freeing because those changes are not
completely baked yet.
As a result, the full inode handling improvements of Nick's patch
set are not realised with this short series. However, my own testing
indicates that the amount of lock traffic and contention is down by
an order of magnitude on an 8-way box for parallel inode create and
unlink workloads, so there is still significant improvements from
just this patch set.
Version 2 of this series is a complete rework of the original patch
series. Nick's original code nested list locks inside the the
inode->i_lock, resulting in a large mess of trylock operations to
get locks out of order all over the place. In many cases, the reason
fo this lock ordering is removed later on in Nick's series as
cleanups are introduced.
As a result I've pulled in several of the cleanups and re-ordered
the series such that cleanups, factoring and list splitting are done
before any of the locking changes. Instead of converting the inode
state flags first, I've converted them last, ensuring that
manipulations are kept inside other locks rather than outside them.
The series is made up of the following steps:
- inode counters are made per-cpu
- inode LRU manipulations are made lazy
- i_list is split into two lists (grows inode by 2
pointers), one for tracking lru status, one for writeback
status
- reference counting is factored, then renamed and locked
differently
- inode hash operations are factored, then locked per bucket
- superblock inode listis locked per-superblock
- inode LRU is locked via a global lock
- unclear what the best way to split this up from
here is, so no attempt is made to optimise
further.
- Currently not showing signs of contention under
any workload on an 8p machine.
- inode IO list are locked via a per-BDI lock
- further analysis needed to determine the next step
in optimising this list. It is extremely contended
under parallel workloads because foreground
throttling (balance_dirty_pages) causes unbound
writeback parallelism and contention. Fixing the
unbound parallelism, I think, is a more important
first optimisation step than making the list
per-cpu.
- lock i_state operations with i_lock
- convert last_ino allocation to a percpu counter
- protect iunique counter with it's own lock
- remove inode_lock
- kill dispose_list() and factor destroying an inode into
dispose_one_inode() which is called from reclaim, unmount
and iput_final.
None of the patcheѕ are unchanged, and several of them are new or
completely rewritten, so any previous testing is completely
invalidated. I have not tried to optimise locking by using trylock
loops - anywhere that requires out-of-order locking drops locks and
regains the locks needed for the next operation. This approach
simplified the code and lead to several improvments in the patch
series (e.g. moving inode->i_lock inside writeback_single_inode(),
and the dispose_one_inode factoring) that would have gone unnoticed
if I'd gone down the same trylock loop path that Nick used.
I've done some testing so far on ext3, ext4 and XFS (mostly sanity
and lock_stat profile testing), but I have not tested any other
filesystems. IOWs, it is light on testing at this point. I'm sending
out for review now that it passes basic sanity tests so that
comments on the reworked approach can be made.
Version 2:
- complete rework of series.
--
The following changes since commit cb655d0f3d57c23db51b981648e452988c0223f9:
Linux 2.6.36-rc7 (2010-10-06 13:39:52 -0700)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/dgc/xfsdev.git inode-scale
Dave Chinner (11):
fs: Convert nr_inodes and nr_unused to per-cpu counters
fs: Clean up inode reference counting
exofs: use iput() for inode reference count decrements
fs: add inode reference coutn read accessor
fs: rework icount to be a locked variable
fs: Factor inode hash operations into functions
fs: add a per-superblock lock for the inode list
fs: split locking of inode writeback and LRU lists
fs: Protect inode->i_state with th einode->i_lock
fs: icache remove inode_lock
fs: Reduce inode I_FREEING and factor inode disposal
Eric Dumazet (1):
fs: introduce a per-cpu last_ino allocator
Nick Piggin (6):
kernel: add bl_list
fs: keep inode with backing-dev
fs: Implement lazy LRU updates for inodes.
fs: inode split IO and LRU lists
fs: Introduce per-bucket inode hash locks
fs: Make iunique independent of inode_lock
Documentation/filesystems/Locking | 2 +-
Documentation/filesystems/porting | 10 +-
Documentation/filesystems/vfs.txt | 2 +-
arch/powerpc/platforms/cell/spufs/file.c | 2 +-
drivers/char/mem.c | 2 +-
drivers/char/raw.c | 2 +-
drivers/mtd/mtdchar.c | 2 +-
drivers/staging/pohmelfs/inode.c | 10 +-
fs/9p/vfs_inode.c | 5 +-
fs/affs/inode.c | 2 +-
fs/afs/dir.c | 2 +-
fs/afs/write.c | 6 +-
fs/anon_inodes.c | 5 +-
fs/bfs/dir.c | 2 +-
fs/block_dev.c | 26 +-
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/file.c | 2 +-
fs/btrfs/inode.c | 28 +-
fs/buffer.c | 4 +-
fs/ceph/addr.c | 2 +-
fs/ceph/inode.c | 4 +-
fs/ceph/mds_client.c | 2 +-
fs/cifs/file.c | 2 +-
fs/cifs/inode.c | 4 +-
fs/coda/dir.c | 2 +-
fs/configfs/inode.c | 3 +-
fs/drop_caches.c | 19 +-
fs/exofs/inode.c | 6 +-
fs/exofs/namei.c | 2 +-
fs/ext2/ialloc.c | 2 +-
fs/ext2/namei.c | 2 +-
fs/ext3/ialloc.c | 4 +-
fs/ext3/namei.c | 2 +-
fs/ext4/ialloc.c | 4 +-
fs/ext4/namei.c | 2 +-
fs/fs-writeback.c | 184 ++++----
fs/fuse/file.c | 6 +-
fs/fuse/inode.c | 2 +-
fs/gfs2/glock.c | 3 +-
fs/gfs2/ops_inode.c | 2 +-
fs/hfs/hfs_fs.h | 2 +-
fs/hfs/inode.c | 2 +-
fs/hfsplus/dir.c | 2 +-
fs/hfsplus/hfsplus_fs.h | 2 +-
fs/hfsplus/inode.c | 2 +-
fs/hpfs/inode.c | 2 +-
fs/hugetlbfs/inode.c | 3 +-
fs/inode.c | 764 ++++++++++++++++++++----------
fs/internal.h | 6 +
fs/jffs2/dir.c | 4 +-
fs/jfs/jfs_txnmgr.c | 2 +-
fs/jfs/namei.c | 2 +-
fs/libfs.c | 2 +-
fs/locks.c | 2 +-
fs/logfs/dir.c | 2 +-
fs/logfs/inode.c | 2 +-
fs/logfs/readwrite.c | 2 +-
fs/minix/namei.c | 2 +-
fs/namei.c | 2 +-
fs/nfs/dir.c | 2 +-
fs/nfs/getroot.c | 2 +-
fs/nfs/inode.c | 7 +-
fs/nfs/nfs4state.c | 2 +-
fs/nfs/write.c | 9 +-
fs/nilfs2/btnode.c | 2 +-
fs/nilfs2/gcdat.c | 1 +
fs/nilfs2/gcinode.c | 22 +-
fs/nilfs2/mdt.c | 7 +-
fs/nilfs2/namei.c | 2 +-
fs/nilfs2/segment.c | 2 +-
fs/nilfs2/the_nilfs.c | 2 +-
fs/nilfs2/the_nilfs.h | 2 +-
fs/notify/inode_mark.c | 47 ++-
fs/notify/mark.c | 1 -
fs/notify/vfsmount_mark.c | 1 -
fs/ntfs/file.c | 2 +-
fs/ntfs/inode.c | 4 +-
fs/ntfs/super.c | 4 +-
fs/ocfs2/dlmfs/dlmfs.c | 4 +-
fs/ocfs2/file.c | 2 +-
fs/ocfs2/inode.c | 2 +-
fs/ocfs2/namei.c | 2 +-
fs/quota/dquot.c | 32 +-
fs/ramfs/inode.c | 2 +-
fs/reiserfs/namei.c | 2 +-
fs/reiserfs/stree.c | 2 +-
fs/reiserfs/xattr.c | 2 +-
fs/romfs/super.c | 4 +-
fs/smbfs/inode.c | 2 +-
fs/super.c | 1 +
fs/sysfs/inode.c | 2 +-
fs/sysv/namei.c | 2 +-
fs/ubifs/dir.c | 4 +-
fs/ubifs/super.c | 4 +-
fs/udf/namei.c | 2 +-
fs/ufs/namei.c | 2 +-
fs/xfs/linux-2.6/xfs_buf.c | 4 +-
fs/xfs/linux-2.6/xfs_file.c | 2 +-
fs/xfs/linux-2.6/xfs_iops.c | 2 +-
fs/xfs/linux-2.6/xfs_trace.h | 2 +-
fs/xfs/xfs_inode.h | 4 +-
include/linux/backing-dev.h | 17 +-
include/linux/fs.h | 34 +-
include/linux/list_bl.h | 127 +++++
include/linux/poison.h | 2 +
include/linux/writeback.h | 13 +-
ipc/mqueue.c | 2 +-
kernel/cgroup.c | 2 +-
kernel/futex.c | 2 +-
kernel/sysctl.c | 4 +-
mm/backing-dev.c | 90 ++++-
mm/fadvise.c | 4 +-
mm/filemap.c | 10 +-
mm/filemap_xip.c | 2 +-
mm/page-writeback.c | 15 +-
mm/readahead.c | 6 +-
mm/rmap.c | 6 +-
mm/shmem.c | 8 +-
mm/swap.c | 2 +-
mm/swap_state.c | 2 +-
mm/swapfile.c | 2 +-
mm/truncate.c | 3 +-
mm/vmscan.c | 2 +-
net/socket.c | 2 +-
124 files changed, 1131 insertions(+), 616 deletions(-)
create mode 100644 include/linux/list_bl.h
^ permalink raw reply
* [PATCH 02/18] fs: Convert nr_inodes and nr_unused to per-cpu counters
From: Dave Chinner @ 2010-10-08 5:21 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-kernel
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The number of inodes allocated does not need to be tied to the
addition or removal of an inode to/from a list. If we are not tied
to a list lock, we could update the counters when inodes are
initialised or destroyed, but to do that we need to convert the
counters to be per-cpu (i.e. independent of a lock). This means that
we have the freedom to change the list/locking implementation
without needing to care about the counters.
Based on a patch originally from Eric Dumazet.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 5 +--
fs/inode.c | 65 ++++++++++++++++++++++++++++++++++++---------------
include/linux/fs.h | 4 ++-
kernel/sysctl.c | 4 +-
4 files changed, 53 insertions(+), 25 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ab38fef..58a95b7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
wb->last_old_flush = jiffies;
nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS) +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ get_nr_dirty_inodes();
if (nr_pages) {
struct wb_writeback_work work = {
@@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb)
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- work.nr_pages = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
diff --git a/fs/inode.c b/fs/inode.c
index 8646433..f04d501 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem);
*/
struct inodes_stat_t inodes_stat;
+static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
+static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+
static struct kmem_cache *inode_cachep __read_mostly;
+static inline int get_nr_inodes(void)
+{
+ return percpu_counter_sum_positive(&nr_inodes);
+}
+
+static inline int get_nr_inodes_unused(void)
+{
+ return percpu_counter_sum_positive(&nr_inodes_unused);
+}
+
+int get_nr_dirty_inodes(void)
+{
+ int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+ return nr_dirty > 0 ? nr_dirty : 0;
+
+}
+
+/*
+ * Handle nr_inode sysctl
+ */
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_inodes(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ inodes_stat.nr_inodes = get_nr_inodes();
+ inodes_stat.nr_unused = get_nr_inodes_unused();
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
static void wake_up_inode(struct inode *inode)
{
/*
@@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
+ percpu_counter_inc(&nr_inodes);
+
return 0;
out:
return -ENOMEM;
@@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
+ percpu_counter_dec(&nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);
@@ -286,7 +322,7 @@ void __iget(struct inode *inode)
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_in_use);
- inodes_stat.nr_unused--;
+ percpu_counter_dec(&nr_inodes_unused);
}
void end_writeback(struct inode *inode)
@@ -327,8 +363,6 @@ static void evict(struct inode *inode)
*/
static void dispose_list(struct list_head *head)
{
- int nr_disposed = 0;
-
while (!list_empty(head)) {
struct inode *inode;
@@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head)
wake_up_inode(inode);
destroy_inode(inode);
- nr_disposed++;
}
- spin_lock(&inode_lock);
- inodes_stat.nr_inodes -= nr_disposed;
- spin_unlock(&inode_lock);
}
/*
@@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head)
static int invalidate_list(struct list_head *head, struct list_head *dispose)
{
struct list_head *next;
- int busy = 0, count = 0;
+ int busy = 0;
next = head->next;
for (;;) {
@@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
list_move(&inode->i_list, dispose);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- count++;
+ percpu_counter_dec(&nr_inodes_unused);
continue;
}
busy = 1;
}
- /* only unused inodes may be cached with i_count zero */
- inodes_stat.nr_unused -= count;
return busy;
}
@@ -448,7 +476,6 @@ static int can_unuse(struct inode *inode)
static void prune_icache(int nr_to_scan)
{
LIST_HEAD(freeable);
- int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;
@@ -484,9 +511,8 @@ static void prune_icache(int nr_to_scan)
list_move(&inode->i_list, &freeable);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- nr_pruned++;
+ percpu_counter_dec(&nr_inodes_unused);
}
- inodes_stat.nr_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
@@ -518,7 +544,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
return -1;
prune_icache(nr);
}
- return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker icache_shrinker = {
@@ -595,7 +621,6 @@ static inline void
__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
struct inode *inode)
{
- inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_sb_list, &sb->s_inodes);
if (head)
@@ -1215,7 +1240,7 @@ static void iput_final(struct inode *inode)
if (!drop) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_unused);
- inodes_stat.nr_unused++;
+ percpu_counter_inc(&nr_inodes_unused);
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
return;
@@ -1227,14 +1252,13 @@ static void iput_final(struct inode *inode)
spin_lock(&inode_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- inodes_stat.nr_unused--;
+ percpu_counter_dec(&nr_inodes_unused);
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
evict(inode);
spin_lock(&inode_lock);
@@ -1489,6 +1513,7 @@ void __init inode_init_early(void)
for (loop = 0; loop < (1 << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
+
}
void __init inode_init(void)
@@ -1503,6 +1528,8 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
+ percpu_counter_init(&nr_inodes, 0);
+ percpu_counter_init(&nr_inodes_unused, 0);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d069b..1fb92f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -407,6 +407,7 @@ extern struct files_stat_struct files_stat;
extern int get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
+extern int get_nr_dirty_inodes(void);
extern int leases_enable, lease_break_time;
struct buffer_head;
@@ -2474,7 +2475,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f88552c..33d1733 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = {
.data = &inodes_stat,
.maxlen = 2*sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_nr_inodes,
},
{
.procname = "inode-state",
.data = &inodes_stat,
.maxlen = 7*sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_nr_inodes,
},
{
.procname = "file-nr",
--
1.7.1
^ permalink raw reply related
* [PATCH] block: Fix double free in blk_integrity_unregister
From: Martin K. Petersen @ 2010-10-08 5:18 UTC (permalink / raw)
To: Jens Axboe; +Cc: Xiaotian Feng, linux-scsi
Commit 3839e4b introduced a kobject_put but failed to remove the
kmem_cache_free beneath it, leading to a double free.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 885cbb5..54bcba6 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -432,7 +432,6 @@ void blk_integrity_unregister(struct gendisk *disk)
kobject_uevent(&bi->kobj, KOBJ_REMOVE);
kobject_del(&bi->kobj);
kobject_put(&bi->kobj);
- kmem_cache_free(integrity_cachep, bi);
disk->integrity = NULL;
}
EXPORT_SYMBOL(blk_integrity_unregister);
^ permalink raw reply related
* [PATCH] block: Make the integrity mapped property a bio flag
From: Martin K. Petersen @ 2010-10-08 5:19 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi
Previously we tracked whether the integrity metadata had been remapped
using a request flag. This was fine for low-level retries. However, if
an I/O was redriven by upper layers we would end up remapping again,
causing the retry to fail.
Deprecate the REQ_INTEGRITY flag and introduce BIO_MAPPED_INTEGRITY
which enables filesystems to notify lower layers that the bio in
question has already been remapped.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 84be621..0cb39ff 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -375,21 +375,20 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
unsigned int i, j;
u32 phys, virt;
- /* Already remapped? */
- if (rq->cmd_flags & REQ_INTEGRITY)
- return 0;
-
sdkp = rq->bio->bi_bdev->bd_disk->private_data;
if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
return 0;
- rq->cmd_flags |= REQ_INTEGRITY;
phys = hw_sector & 0xffffffff;
__rq_for_each_bio(bio, rq) {
struct bio_vec *iv;
+ /* Already remapped? */
+ if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
+ break;
+
virt = bio->bi_integrity->bip_sector & 0xffffffff;
bip_for_each_vec(iv, bio->bi_integrity, i) {
@@ -408,6 +407,8 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
kunmap_atomic(sdt, KM_USER0);
}
+
+ bio->bi_flags |= BIO_MAPPED_INTEGRITY;
}
return 0;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ca83a97..925b80d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -97,6 +97,7 @@ struct bio {
#define BIO_NULL_MAPPED 9 /* contains invalid user pages */
#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */
#define BIO_QUIET 11 /* Make BIO Quiet */
+#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
@@ -146,7 +147,6 @@ enum rq_flag_bits {
__REQ_ORDERED_COLOR, /* is before or after barrier */
__REQ_ALLOCED, /* request came from our alloc pool */
__REQ_COPY_USER, /* contains copies of user pages */
- __REQ_INTEGRITY, /* integrity metadata has been remapped */
__REQ_FLUSH, /* request for cache flush */
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
@@ -187,7 +187,6 @@ enum rq_flag_bits {
#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
-#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
#define REQ_FLUSH (1 << __REQ_FLUSH)
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
^ permalink raw reply related
* Re: [PATCH 1/2] block: Ensure physical block size is unsigned int
From: Martin K. Petersen @ 2010-10-08 5:15 UTC (permalink / raw)
To: jaxboe, James.Bottomley; +Cc: Mike Snitzer, linux-scsi
In-Reply-To: <20100927174052.GA14180@redhat.com>
>>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:
>> Physical block size was declared unsigned int to accomodate the
>> maximum size reported by READ CAPACITY(16). Make sure we use the
>> right type in the related functions.
>>
>> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Mike> Acked-by: Mike Snitzer <snitzer@redhat.com>
Jens, ping on the block fix.
James, ping on the sd ditto.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* Re: [PATCH v2] memcg: reduce lock time at move charge (Was Re: [PATCH 04/10] memcg: disable local interrupts in lock_page_cgroup()
From: KAMEZAWA Hiroyuki @ 2010-10-08 5:12 UTC (permalink / raw)
To: Andrew Morton
Cc: Daisuke Nishimura, Minchan Kim, Greg Thelen, linux-kernel,
linux-mm, containers, Andrea Righi, Balbir Singh
In-Reply-To: <20101007215556.21412ae6.akpm@linux-foundation.org>
On Thu, 7 Oct 2010 21:55:56 -0700
Andrew Morton <akpm@linux-foundation.org> wrote:
> On Fri, 8 Oct 2010 13:37:12 +0900 KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
>
> > On Thu, 7 Oct 2010 16:14:54 -0700
> > Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > > On Thu, 7 Oct 2010 17:04:05 +0900
> > > KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > >
> > > > Now, at task migration among cgroup, memory cgroup scans page table and moving
> > > > account if flags are properly set.
> > > >
> > > > The core code, mem_cgroup_move_charge_pte_range() does
> > > >
> > > > pte_offset_map_lock();
> > > > for all ptes in a page table:
> > > > 1. look into page table, find_and_get a page
> > > > 2. remove it from LRU.
> > > > 3. move charge.
> > > > 4. putback to LRU. put_page()
> > > > pte_offset_map_unlock();
> > > >
> > > > for pte entries on a 3rd level? page table.
> > > >
> > > > This pte_offset_map_lock seems a bit long. This patch modifies a rountine as
> > > >
> > > > for 32 pages: pte_offset_map_lock()
> > > > find_and_get a page
> > > > record it
> > > > pte_offset_map_unlock()
> > > > for all recorded pages
> > > > isolate it from LRU.
> > > > move charge
> > > > putback to LRU
> > > > for all recorded pages
> > > > put_page()
> > >
> > > The patch makes the code larger, more complex and slower!
> > >
> >
> > Slower ?
>
> Sure. It walks the same data three times, potentially causing
> thrashing in the L1 cache.
Hmm, make this 2 times, at least.
> It takes and releases locks at a higher frequency. It increases the text size.
>
But I don't think page_table_lock is a lock which someone can hold so long
that
1. find_get_page
2. spin_lock(zone->lock)
3. remove it from LRU
4. lock_page_cgroup()
5. move charge (This means page
5. putback to LRU
for 4096/8=1024 pages long.
will try to make the routine smarter.
But I want to get rid of page_table_lock -> lock_page_cgroup().
Thanks,
-Kame
^ permalink raw reply
* linux-next: Tree for October 8
From: Stephen Rothwell @ 2010-10-08 5:15 UTC (permalink / raw)
To: linux-next; +Cc: LKML
[-- Attachment #1: Type: text/plain, Size: 10852 bytes --]
Hi all,
Changes since 20101007:
All the trees hosted on git.infradead.org are unfetchable (and presumably
unable to be updated) due to the machine crashing.
The galak tree lost its conflict.
The xfs tree gained a build failure from a mismerge for which I have
applied a patch.
The drm tree lost its conflicts.
The alacrity tree gained a conflict against various trees.
The hwpoison tree gained a conflict against Linus' tree.
The irqflags tree lost its conflict.
----------------------------------------------------------------------------
I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/v2.6/next/ ). If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one. You should use "git fetch" as mentioned in the FAQ on the wiki
(see below).
You can see which trees have been included by looking in the Next/Trees
file in the source. There are also quilt-import.log and merge.log files
in the Next directory. Between each merge, the tree was built with
a ppc64_defconfig for powerpc and an allmodconfig for x86_64. After the
final fixups (if any), it is also built with powerpc allnoconfig (32 and
64 bit), ppc44x_defconfig and allyesconfig (minus
CONFIG_PROFILE_ALL_BRANCHES - this fails its final link) and i386, sparc
and sparc64 defconfig. These builds also have
CONFIG_ENABLE_WARN_DEPRECATED, CONFIG_ENABLE_MUST_CHECK and
CONFIG_DEBUG_INFO disabled when necessary.
Below is a summary of the state of the merge.
We are up to 176 trees (counting Linus' and 22 trees of patches pending
for Linus' tree), more are welcome (even if they are currently empty).
Thanks to those who have contributed, and to those who haven't, please do.
Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next . If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.
Thanks to Randy Dunlap for doing many randconfig builds.
There is a wiki covering stuff to do with linux-next at
http://linux.f-seidel.de/linux-next/pmwiki/ . Thanks to Frank Seidel.
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
$ git checkout master
$ git reset --hard stable
Merging origin/master
Merging fixes/fixes
Merging arm-current/master
Merging m68k-current/for-linus
Merging powerpc-merge/merge
Merging sparc-current/master
Merging scsi-rc-fixes/master
Merging net-current/master
Merging sound-current/for-linus
Merging pci-current/for-linus
Merging wireless-current/master
Merging kbuild-current/rc-fixes
Merging quilt/driver-core.current
Merging quilt/tty.current
Merging quilt/usb.current
Merging quilt/staging.current
Merging cpufreq-current/fixes
Merging input-current/for-linus
Merging md-current/for-linus
Merging audit-current/for-linus
Merging crypto-current/master
Merging ide-curent/master
Merging dwmw2/master
Merging gcl-current/merge
Merging arm/devel
Merging davinci/davinci-next
Merging i.MX/for-next
Merging msm/for-next
Merging omap/for-next
Merging pxa/for-next
Merging samsung/next-samsung
Merging s5p/for-next
Merging tegra/for-next
Merging avr32/avr32-arch
Merging blackfin/for-linus
Merging cris/for-next
Merging ia64/test
Merging m68k/for-next
Merging m68knommu/for-next
Merging microblaze/next
Merging mips/mips-for-linux-next
Merging parisc/next
Merging powerpc/next
Merging 4xx/next
Merging 52xx-and-virtex/next
Merging galak/next
Merging s390/features
Merging sh/master
Merging genesis/master
Merging sparc/master
Merging tile/master
Merging xtensa/master
CONFLICT (content): Merge conflict in arch/xtensa/configs/iss_defconfig
Merging ceph/for-next
Merging cifs/master
Merging configfs/linux-next
Merging ecryptfs/next
Merging ext3/for_next
Merging ext4/next
Merging fatfs/master
Merging fuse/for-next
Merging gfs2/master
Merging hfsplus/for-next
Merging jfs/next
Merging logfs/master
CONFLICT (content): Merge conflict in fs/logfs/logfs.h
Merging nfs/linux-next
Merging nfsd/nfsd-next
Merging nilfs2/for-next
Merging ocfs2/linux-next
Merging omfs/for-next
Merging squashfs/master
Merging udf/for_next
Merging v9fs/for-next
Merging ubifs/linux-next
Merging xfs/master
Applying: xfs: fix up mismerge of __xfs_inode_clear_reclaim_tag
Merging vfs/for-next
Merging pci/linux-next
Merging hid/for-next
Merging quilt/i2c
Merging bjdooks-i2c/next-i2c
Merging quilt/jdelvare-hwmon
Merging hwmon-staging/hwmon-next
Merging quilt/kernel-doc
Merging v4l-dvb/master
Merging kbuild/for-next
Merging kconfig/for-next
Merging ide/master
Merging libata/NEXT
Merging infiniband/for-next
Merging acpi/test
Merging idle-test/idle-test
Merging ieee1394/for-next
Merging ubi/linux-next
Merging kvm/linux-next
Merging dlm/next
Merging swiotlb/master
Merging swiotlb-xen/master
Merging ibft/master
Merging scsi/master
Merging async_tx/next
Merging wireless/master
CONFLICT (content): Merge conflict in Documentation/feature-removal-schedule.txt
CONFLICT (content): Merge conflict in arch/arm/mach-omap2/board-omap3pandora.c
CONFLICT (content): Merge conflict in arch/arm/mach-omap2/board-zoom-peripherals.c
CONFLICT (content): Merge conflict in drivers/net/wireless/libertas/if_sdio.c
Merging net/master
CONFLICT (content): Merge conflict in drivers/net/pcmcia/pcnet_cs.c
CONFLICT (content): Merge conflict in drivers/net/wireless/ipw2x00/ipw2200.c
CONFLICT (content): Merge conflict in net/caif/caif_socket.c
Merging mtd/master
Merging crypto/master
CONFLICT (content): Merge conflict in arch/arm/mach-omap2/devices.c
Merging sound-asoc/for-next
CONFLICT (content): Merge conflict in drivers/video/sh_mobile_hdmi.c
Merging sound/for-next
Merging cpufreq/next
Merging quilt/rr
Merging input/next
CONFLICT (content): Merge conflict in drivers/input/keyboard/Kconfig
Merging lsm/for-next
Merging block/for-next
CONFLICT (content): Merge conflict in fs/ext4/mballoc.c
Merging quilt/device-mapper
Merging embedded/master
Merging firmware/master
Merging pcmcia/master
CONFLICT (content): Merge conflict in drivers/net/pcmcia/smc91c92_cs.c
Merging battery/master
Merging leds/for-mm
Merging backlight/for-mm
Merging mmc/mmc-next
Merging kgdb/kgdb-next
CONFLICT (content): Merge conflict in drivers/char/sysrq.c
Merging slab/for-next
Merging uclinux/for-next
Merging md/for-next
Merging mfd/for-next
CONFLICT (content): Merge conflict in drivers/mfd/sh_mobile_sdhi.c
Merging hdlc/hdlc-next
Merging drm/drm-next
Merging viafb/viafb-next
Merging voltage/for-next
Merging security-testing/next
Merging lblnet/master
Merging agp/agp-next
Merging uwb/for-upstream
Merging watchdog/master
Merging bdev/master
Merging dwmw2-iommu/master
Merging cputime/cputime
Merging osd/linux-next
Merging jc_docs/docs-next
Merging nommu/master
Merging trivial/for-next
Merging audit/for-next
Merging quilt/aoe
Merging suspend/linux-next
Merging bluetooth/master
Merging fsnotify/for-next
Merging irda/for-next
Merging catalin/for-next
CONFLICT (content): Merge conflict in arch/arm/include/asm/smp_plat.h
CONFLICT (content): Merge conflict in arch/arm/kernel/Makefile
CONFLICT (content): Merge conflict in arch/arm/mach-vexpress/ct-ca9x4.c
CONFLICT (content): Merge conflict in arch/arm/mm/flush.c
Merging alacrity/linux-next
CONFLICT (content): Merge conflict in include/linux/Kbuild
Merging i7core_edac/linux_next
CONFLICT (content): Merge conflict in MAINTAINERS
Merging i7300_edac/linux_next
Merging devicetree/next-devicetree
Merging spi/next-spi
Merging omap_dss2/for-next
Merging xen/upstream/xen
Merging rcu/rcu/next
CONFLICT (content): Merge conflict in include/linux/rcupdate.h
Merging tip/auto-latest
CONFLICT (content): Merge conflict in arch/x86/kernel/module.c
CONFLICT (content): Merge conflict in include/linux/percpu.h
CONFLICT (content): Merge conflict in net/core/dev.c
Merging edac-amd/for-next
Merging oprofile/for-next
Merging percpu/for-next
CONFLICT (content): Merge conflict in include/linux/percpu.h
CONFLICT (content): Merge conflict in mm/percpu.c
Merging workqueues/for-next
Merging sfi/sfi-test
Merging asm-generic/next
Merging drivers-x86/linux-next
Merging hwpoison/hwpoison
CONFLICT (content): Merge conflict in mm/memory-failure.c
Merging sysctl/master
Merging quilt/driver-core
CONFLICT (content): Merge conflict in drivers/misc/Makefile
Merging quilt/tty
Merging quilt/usb
CONFLICT (content): Merge conflict in drivers/usb/gadget/rndis.c
Merging staging-next/staging-next
CONFLICT (rename/modify): Merge conflict in drivers/misc/ti-st/st_kim.c
CONFLICT (content): Merge conflict in arch/arm/plat-omap/devices.c
CONFLICT (content): Merge conflict in drivers/misc/Makefile
CONFLICT (content): Merge conflict in drivers/staging/Makefile
CONFLICT (content): Merge conflict in drivers/staging/batman-adv/hard-interface.c
CONFLICT (delete/modify): drivers/staging/mrst-touchscreen/Makefile deleted in HEAD and modified in staging-next/staging-next. Version staging-next/staging-next of drivers/staging/mrst-touchscreen/Makefile left in tree.
CONFLICT (delete/modify): drivers/staging/mrst-touchscreen/intel-mid-touch.c deleted in HEAD and modified in staging-next/staging-next. Version staging-next/staging-next of drivers/staging/mrst-touchscreen/intel-mid-touch.c left in tree.
CONFLICT (delete/modify): drivers/staging/ti-st/st.h deleted in staging-next/staging-next and modified in HEAD. Version HEAD of drivers/staging/ti-st/st.h left in tree.
$ git rm -f drivers/staging/mrst-touchscreen/Makefile drivers/staging/mrst-touchscreen/intel-mid-touch.c
$ git rm -f drivers/staging/ti-st/st.h drivers/staging/ti-st/st_core.h
Applying: staging: ath6kl: Fixing the driver to use modified mmc_host structure
Merging slabh/slabh
Merging bkl-trivial/trivial
CONFLICT (content): Merge conflict in drivers/block/ataflop.c
CONFLICT (content): Merge conflict in drivers/char/pcmcia/cm4000_cs.c
CONFLICT (content): Merge conflict in drivers/char/pcmcia/cm4040_cs.c
CONFLICT (content): Merge conflict in drivers/mmc/card/block.c
Merging bkl-llseek/llseek
CONFLICT (content): Merge conflict in drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
CONFLICT (content): Merge conflict in drivers/infiniband/hw/cxgb4/device.c
Merging bkl-vfs/vfs
CONFLICT (content): Merge conflict in fs/nilfs2/super.c
Merging bkl-config/config
CONFLICT (content): Merge conflict in fs/compat_ioctl.c
Merging irqflags/master
Merging cleancache/linux-next
CONFLICT (content): Merge conflict in include/linux/fs.h
CONFLICT (content): Merge conflict in mm/Kconfig
Merging scsi-post-merge/merge-base:master
[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]
^ permalink raw reply
* Re: [PATCH v2] memcg: reduce lock time at move charge (Was Re: [PATCH 04/10] memcg: disable local interrupts in lock_page_cgroup()
From: KAMEZAWA Hiroyuki @ 2010-10-08 5:12 UTC (permalink / raw)
To: Andrew Morton
Cc: Daisuke Nishimura, Minchan Kim, Greg Thelen, linux-kernel,
linux-mm, containers, Andrea Righi, Balbir Singh
In-Reply-To: <20101007215556.21412ae6.akpm@linux-foundation.org>
On Thu, 7 Oct 2010 21:55:56 -0700
Andrew Morton <akpm@linux-foundation.org> wrote:
> On Fri, 8 Oct 2010 13:37:12 +0900 KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
>
> > On Thu, 7 Oct 2010 16:14:54 -0700
> > Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > > On Thu, 7 Oct 2010 17:04:05 +0900
> > > KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > >
> > > > Now, at task migration among cgroup, memory cgroup scans page table and moving
> > > > account if flags are properly set.
> > > >
> > > > The core code, mem_cgroup_move_charge_pte_range() does
> > > >
> > > > pte_offset_map_lock();
> > > > for all ptes in a page table:
> > > > 1. look into page table, find_and_get a page
> > > > 2. remove it from LRU.
> > > > 3. move charge.
> > > > 4. putback to LRU. put_page()
> > > > pte_offset_map_unlock();
> > > >
> > > > for pte entries on a 3rd level? page table.
> > > >
> > > > This pte_offset_map_lock seems a bit long. This patch modifies a rountine as
> > > >
> > > > for 32 pages: pte_offset_map_lock()
> > > > find_and_get a page
> > > > record it
> > > > pte_offset_map_unlock()
> > > > for all recorded pages
> > > > isolate it from LRU.
> > > > move charge
> > > > putback to LRU
> > > > for all recorded pages
> > > > put_page()
> > >
> > > The patch makes the code larger, more complex and slower!
> > >
> >
> > Slower ?
>
> Sure. It walks the same data three times, potentially causing
> thrashing in the L1 cache.
Hmm, make this 2 times, at least.
> It takes and releases locks at a higher frequency. It increases the text size.
>
But I don't think page_table_lock is a lock which someone can hold so long
that
1. find_get_page
2. spin_lock(zone->lock)
3. remove it from LRU
4. lock_page_cgroup()
5. move charge (This means page
5. putback to LRU
for 4096/8=1024 pages long.
will try to make the routine smarter.
But I want to get rid of page_table_lock -> lock_page_cgroup().
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* Re: empty filter on FORWARD chain with rp_filter means safe right?
From: Jan Engelhardt @ 2010-10-08 5:02 UTC (permalink / raw)
To: Payam Chychi; +Cc: Scott Mcdermott, netfilter
In-Reply-To: <4CAEA0D3.1020207@gmail.com>
On Friday 2010-10-08 06:40, Payam Chychi wrote:
> Thats correct Scott,
> in order for any systems to abuse your setup they will need to be directly
> connected to a segment that has knowledge of valid route to the end system...
> meaning if a computer is 2 hops away and the router in between has no knowledge
> of how to get to your private rfc1918 then pkts get dropped.
>
> Keep in mind that as ipv4 exhaustion gets extreme, some isps will use rcf1918
> blocks and route them either in their IGP or even EGP (aka internet routes)...
Internally yes, but externally no. And it's not really RFC1918 routes being
"used in the Internet" - instead, it is "enlarging our NAT domain". (Mobile
UMTS/HSDPA providers do this in Germany already.)
^ permalink raw reply
* Re: [RFC PATCH] Audio standards on tm6000
From: Dmitri Belimov @ 2010-10-08 19:03 UTC (permalink / raw)
To: Mauro Carvalho Chehab
Cc: Felipe Sanches, Stefan Ringel, Bee Hock Goh,
Luis Henrique Fagundes, Linux Media Mailing List
In-Reply-To: <4CAD5A78.3070803@redhat.com>
Hi Mauro
Not so good. Audio with this patch has bad white noise sometimes and
bad quality. I try found better configuration for SECAM-DK.
With my best regards, Dmitry.
> Hi Dmitri,
>
> IMO, the better is to remove the audio init from tm6000-core and add
> a separate per-standard set of tables.
>
> I'm enclosing the patch for it. Please check if this won't break for
> your device.
>
> On all tests I did here with a tm6010 device (HVR 900H), I was only
> able to listen to white noise.
>
> I'm suspecting that this device uses XC3028 MTS mode (e. g. uses
> xc3028 to decode audio, and just inputs the audio stream from some
> line IN. As the driver is not able yet to handle an audio mux, this
> may explain why I'm not able to receive any audio at all.
>
> Maybe tm5600 devices may also require (or use) line input entries,
> instead of I2S.
>
> Could you please check those issues?
>
> PS.: the PAL/M hunk will probably fail, as I likely applied some
> patches before this one, in order to try to fix it. It should be
> trivial to solve the conflicts.
>
> ---
>
> tm6000: Implement audio standard tables
>
> Implement separate tables for audio standards, associating them with
> the video standards.
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
>
> diff --git a/drivers/staging/tm6000/tm6000-core.c
> b/drivers/staging/tm6000/tm6000-core.c index 57cb69e..9cb2901 100644
> --- a/drivers/staging/tm6000/tm6000-core.c
> +++ b/drivers/staging/tm6000/tm6000-core.c
> @@ -200,6 +200,10 @@ int tm6000_init_analog_mode(struct tm6000_core
> *dev) val &= ~0x40;
> tm6000_set_reg(dev,
> TM6010_REQ07_RC0_ACTIVE_VIDEO_SOURCE, val);
> + tm6000_set_reg(dev,
> TM6010_REQ08_RF1_AADC_POWER_DOWN, 0xfc); +
> +#if 0 /* FIXME: VBI is standard-dependent */
> +
> /* Init teletext */
> tm6000_set_reg(dev, TM6010_REQ07_R3F_RESET, 0x01);
> tm6000_set_reg(dev,
> TM6010_REQ07_R41_TELETEXT_VBI_CODE1, 0x27); @@ -249,44 +253,7 @@ int
> tm6000_init_analog_mode(struct tm6000_core *dev) tm6000_set_reg(dev,
> TM6010_REQ07_R5B_VBI_TELETEXT_DTO0, 0x4c); tm6000_set_reg(dev,
> TM6010_REQ07_R40_TELETEXT_VBI_CODE0, 0x01); tm6000_set_reg(dev,
> TM6010_REQ07_R3F_RESET, 0x00); -
> -
> - /* Init audio */
> - tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> - tm6000_set_reg(dev, TM6010_REQ08_R04_A_SIF_AMP_CTRL,
> 0xa0);
> - tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> - tm6000_set_reg(dev, TM6010_REQ08_R07_A_LEFT_VOL,
> 0x00);
> - tm6000_set_reg(dev, TM6010_REQ08_R08_A_RIGHT_VOL,
> 0x00);
> - tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> - tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> - tm6000_set_reg(dev, TM6010_REQ08_R0B_A_ASD_THRES1,
> 0x20);
> - tm6000_set_reg(dev, TM6010_REQ08_R0C_A_ASD_THRES2,
> 0x12);
> - tm6000_set_reg(dev, TM6010_REQ08_R0D_A_AMD_THRES,
> 0x20);
> - tm6000_set_reg(dev, TM6010_REQ08_R0E_A_MONO_THRES1,
> 0xf0);
> - tm6000_set_reg(dev, TM6010_REQ08_R0F_A_MONO_THRES2,
> 0x80);
> - tm6000_set_reg(dev, TM6010_REQ08_R10_A_MUTE_THRES1,
> 0xc0);
> - tm6000_set_reg(dev, TM6010_REQ08_R11_A_MUTE_THRES2,
> 0x80);
> - tm6000_set_reg(dev, TM6010_REQ08_R12_A_AGC_U, 0x12);
> - tm6000_set_reg(dev, TM6010_REQ08_R13_A_AGC_ERR_T,
> 0xfe);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R14_A_AGC_GAIN_INIT, 0x20);
> - tm6000_set_reg(dev, TM6010_REQ08_R15_A_AGC_STEP_THR,
> 0x14);
> - tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> - tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> - tm6000_set_reg(dev, TM6010_REQ08_R18_A_TR_CTRL,
> 0xa0);
> - tm6000_set_reg(dev, TM6010_REQ08_R19_A_FH_2FH_GAIN,
> 0x32);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R1A_A_NICAM_SER_MAX, 0x64);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R1B_A_NICAM_SER_MIN, 0x20);
> - tm6000_set_reg(dev, REQ_08_SET_GET_AVREG_BIT, 0x1c,
> 0x00);
> - tm6000_set_reg(dev, REQ_08_SET_GET_AVREG_BIT, 0x1d,
> 0x00);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R1E_A_GAIN_DEEMPH_OUT, 0x13);
> - tm6000_set_reg(dev,
> TM6010_REQ08_R1F_A_TEST_INTF_SEL, 0x00);
> - tm6000_set_reg(dev, TM6010_REQ08_R20_A_TEST_PIN_SEL,
> 0x00);
> - tm6000_set_reg(dev, TM6010_REQ08_RE4_ADC_IN2_SEL,
> 0xf3);
> - tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x00);
> - tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> -
> +#endif
> } else {
> /* Enables soft reset */
> tm6000_set_reg(dev, TM6010_REQ07_R3F_RESET, 0x01);
> @@ -360,7 +327,6 @@ int tm6000_init_digital_mode(struct tm6000_core
> *dev) tm6000_set_reg(dev, TM6010_REQ07_RFE_POWER_DOWN, 0x28);
> tm6000_set_reg(dev,
> TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xfc); tm6000_set_reg(dev,
> TM6010_REQ08_RE6_POWER_DOWN_CTRL2, 0xff);
> - tm6000_set_reg(dev,
> TM6010_REQ08_RF1_AADC_POWER_DOWN, 0xfe); tm6000_read_write_usb(dev,
> 0xc0, 0x0e, 0x00c2, 0x0008, buf, 2); printk(KERN_INFO"buf %#x %#x\n",
> buf[0], buf[1]); } else {
> diff --git a/drivers/staging/tm6000/tm6000-stds.c
> b/drivers/staging/tm6000/tm6000-stds.c index 33adf6c..e79a72e 100644
> --- a/drivers/staging/tm6000/tm6000-stds.c
> +++ b/drivers/staging/tm6000/tm6000-stds.c
> @@ -28,8 +28,22 @@ struct tm6000_reg_settings {
> unsigned char value;
> };
>
> +enum tm6000_audio_std {
> + BG_NICAM,
> + BTSC,
> + BG_A2,
> + DK_NICAM,
> + EIAJ,
> + FM_RADIO,
> + I_NICAM,
> + KOREA_A2,
> + L_NICAM,
> +};
> +
> struct tm6000_std_tv_settings {
> v4l2_std_id id;
> + enum tm6000_audio_std audio_default_std;
> +
> struct tm6000_reg_settings sif[12];
> struct tm6000_reg_settings nosif[12];
> struct tm6000_reg_settings common[26];
> @@ -37,12 +51,14 @@ struct tm6000_std_tv_settings {
>
> struct tm6000_std_settings {
> v4l2_std_id id;
> + enum tm6000_audio_std audio_default_std;
> struct tm6000_reg_settings common[37];
> };
>
> static struct tm6000_std_tv_settings tv_stds[] = {
> {
> .id = V4L2_STD_PAL_M,
> + .audio_default_std = BTSC,
> .sif = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf2},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf8},
> @@ -96,12 +112,14 @@ static struct tm6000_std_tv_settings tv_stds[] =
> {
> {TM6010_REQ07_R04_LUMA_HAGC_CONTROL, 0xdc},
> {TM6010_REQ07_R0D_CHROMA_KILL_LEVEL, 0x07},
> - {TM6010_REQ08_R05_A_STANDARD_MOD, 0x22},
> +
> {TM6010_REQ07_R3F_RESET, 0x00},
> +
> {0, 0, 0},
> },
> }, {
> .id = V4L2_STD_PAL_Nc,
> + .audio_default_std = BTSC,
> .sif = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf2},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf8},
> @@ -161,6 +179,7 @@ static struct tm6000_std_tv_settings tv_stds[] = {
> },
> }, {
> .id = V4L2_STD_PAL,
> + .audio_default_std = BG_A2,
> .sif = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf2},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf8},
> @@ -220,6 +239,7 @@ static struct tm6000_std_tv_settings tv_stds[] = {
> },
> }, {
> .id = V4L2_STD_SECAM,
> + .audio_default_std = BG_NICAM,
> .sif = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf2},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf8},
> @@ -278,6 +298,7 @@ static struct tm6000_std_tv_settings tv_stds[] = {
> },
> }, {
> .id = V4L2_STD_NTSC,
> + .audio_default_std = BTSC,
> .sif = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf2},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf8},
> @@ -341,6 +362,7 @@ static struct tm6000_std_tv_settings tv_stds[] = {
> static struct tm6000_std_settings composite_stds[] = {
> {
> .id = V4L2_STD_PAL_M,
> + .audio_default_std = BTSC,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf4},
> @@ -383,6 +405,7 @@ static struct tm6000_std_settings
> composite_stds[] = { },
> }, {
> .id = V4L2_STD_PAL_Nc,
> + .audio_default_std = BTSC,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf4},
> @@ -425,6 +448,7 @@ static struct tm6000_std_settings
> composite_stds[] = { },
> }, {
> .id = V4L2_STD_PAL,
> + .audio_default_std = BG_A2,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf4},
> @@ -467,6 +491,7 @@ static struct tm6000_std_settings
> composite_stds[] = { },
> }, {
> .id = V4L2_STD_SECAM,
> + .audio_default_std = BG_NICAM,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf4},
> @@ -508,6 +533,7 @@ static struct tm6000_std_settings
> composite_stds[] = { },
> }, {
> .id = V4L2_STD_NTSC,
> + .audio_default_std = BTSC,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xf4},
> @@ -554,6 +580,7 @@ static struct tm6000_std_settings
> composite_stds[] = { static struct tm6000_std_settings svideo_stds[]
> = { {
> .id = V4L2_STD_PAL_M,
> + .audio_default_std = BTSC,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xfc},
> @@ -596,6 +623,7 @@ static struct tm6000_std_settings svideo_stds[] =
> { },
> }, {
> .id = V4L2_STD_PAL_Nc,
> + .audio_default_std = BTSC,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xfc},
> @@ -638,6 +666,7 @@ static struct tm6000_std_settings svideo_stds[] =
> { },
> }, {
> .id = V4L2_STD_PAL,
> + .audio_default_std = BG_A2,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xfc},
> @@ -680,6 +709,7 @@ static struct tm6000_std_settings svideo_stds[] =
> { },
> }, {
> .id = V4L2_STD_SECAM,
> + .audio_default_std = BG_NICAM,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xfc},
> @@ -721,6 +751,7 @@ static struct tm6000_std_settings svideo_stds[] =
> { },
> }, {
> .id = V4L2_STD_NTSC,
> + .audio_default_std = BTSC,
> .common = {
> {TM6010_REQ08_RE2_POWER_DOWN_CTRL1, 0xf0},
> {TM6010_REQ08_RE3_ADC_IN1_SEL, 0xfc},
> @@ -765,6 +796,136 @@ static struct tm6000_std_settings svideo_stds[]
> = { },
> };
>
> +
> +static int tm6000_set_audio_std(struct tm6000_core *dev,
> + enum tm6000_audio_std std)
> +{
> + switch (std) {
> + case BG_NICAM:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x11);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case BTSC:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x02);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R0E_A_MONO_THRES1,
> 0xf0);
> + tm6000_set_reg(dev, TM6010_REQ08_R0F_A_MONO_THRES2,
> 0x80);
> + tm6000_set_reg(dev, TM6010_REQ08_R10_A_MUTE_THRES1,
> 0xc0);
> + tm6000_set_reg(dev, TM6010_REQ08_R11_A_MUTE_THRES2,
> 0x80);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case BG_A2:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x05);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R0E_A_MONO_THRES1,
> 0xf0);
> + tm6000_set_reg(dev, TM6010_REQ08_R0F_A_MONO_THRES2,
> 0x80);
> + tm6000_set_reg(dev, TM6010_REQ08_R10_A_MUTE_THRES1,
> 0xc0);
> + tm6000_set_reg(dev, TM6010_REQ08_R11_A_MUTE_THRES2,
> 0x80);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case DK_NICAM:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R0C_A_ASD_THRES2,
> 0x0a);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case EIAJ:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x03);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case FM_RADIO:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x01);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x0c);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x10);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case I_NICAM:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R0C_A_ASD_THRES2,
> 0x0a);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case KOREA_A2:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x04);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x04);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R0E_A_MONO_THRES1,
> 0xf0);
> + tm6000_set_reg(dev, TM6010_REQ08_R0F_A_MONO_THRES2,
> 0x80);
> + tm6000_set_reg(dev, TM6010_REQ08_R10_A_MUTE_THRES1,
> 0xc0);
> + tm6000_set_reg(dev, TM6010_REQ08_R11_A_MUTE_THRES2,
> 0xf0);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + case L_NICAM:
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x00);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R02_A_FIX_GAIN_CTRL, 0x02);
> + tm6000_set_reg(dev,
> TM6010_REQ08_R03_A_AUTO_GAIN_CTRL, 0x00);
> + tm6000_set_reg(dev, TM6010_REQ08_R05_A_STANDARD_MOD,
> 0x0a);
> + tm6000_set_reg(dev, TM6010_REQ08_R06_A_SOUND_MOD,
> 0x06);
> + tm6000_set_reg(dev, TM6010_REQ08_R09_A_MAIN_VOL,
> 0x08);
> + tm6000_set_reg(dev, TM6010_REQ08_R0A_A_I2S_MOD,
> 0x91);
> + tm6000_set_reg(dev, TM6010_REQ08_R16_A_AGC_GAIN_MAX,
> 0xfe);
> + tm6000_set_reg(dev, TM6010_REQ08_R17_A_AGC_GAIN_MIN,
> 0x01);
> + tm6000_set_reg(dev, TM6010_REQ08_R01_A_INIT, 0x80);
> + break;
> + }
> + return 0;
> +}
> +
> void tm6000_get_std_res(struct tm6000_core *dev)
> {
> /* Currently, those are the only supported resoltions */
> @@ -825,6 +986,8 @@ static int tm6000_set_tv(struct tm6000_core *dev,
> int pos) rc = tm6000_load_std(dev, tv_stds[pos].common,
> sizeof(tv_stds[pos].common));
>
> + tm6000_set_audio_std(dev, tv_stds[pos].audio_default_std);
> +
> return rc;
> }
>
> @@ -850,6 +1013,8 @@ int tm6000_set_standard(struct tm6000_core *dev,
> v4l2_std_id * norm) rc = tm6000_load_std(dev, svideo_stds[i].common,
> sizeof(svideo_stds[i].
> common));
> + tm6000_set_audio_std(dev,
> svideo_stds[i].audio_default_std); +
> goto ret;
> }
> }
> @@ -861,6 +1026,7 @@ int tm6000_set_standard(struct tm6000_core *dev,
> v4l2_std_id * norm) composite_stds[i].common,
> sizeof(composite_stds[i].
> common));
> + tm6000_set_audio_std(dev,
> composite_stds[i].audio_default_std); goto ret;
> }
> }
> diff --git a/drivers/staging/tm6000/tm6000-video.c
> b/drivers/staging/tm6000/tm6000-video.c index a45b012..9304158 100644
> --- a/drivers/staging/tm6000/tm6000-video.c
> +++ b/drivers/staging/tm6000/tm6000-video.c
> @@ -1015,7 +1015,8 @@ static int vidioc_s_std (struct file *file,
> void *priv, v4l2_std_id *norm) struct tm6000_fh *fh=priv;
> struct tm6000_core *dev = fh->dev;
>
> - rc=tm6000_set_standard (dev, norm);
> + rc = tm6000_set_standard(dev, norm);
> + rc = tm6000_init_analog_mode(dev);
>
> fh->width = dev->width;
> fh->height = dev->height;
> @@ -1292,9 +1293,10 @@ static int tm6000_open(struct file *file)
> "active=%d\n",list_empty(&dev->vidq.active));
>
> /* initialize hardware on analog mode */
> - if (dev->mode!=TM6000_MODE_ANALOG) {
> - rc=tm6000_init_analog_mode (dev);
> - if (rc<0)
> +// if (dev->mode!=TM6000_MODE_ANALOG) {
> +// rc = tm6000_set_standard(dev, dev->norm);
> + rc += tm6000_init_analog_mode(dev);
> + if (rc < 0)
> return rc;
>
> /* Put all controls at a sane state */
> @@ -1302,7 +1304,7 @@ static int tm6000_open(struct file *file)
> qctl_regs[i] =tm6000_qctrl[i].default_value;
>
> dev->mode=TM6000_MODE_ANALOG;
> - }
> +// }
>
> videobuf_queue_vmalloc_init(&fh->vb_vidq, &tm6000_video_qops,
> NULL, &dev->slock,
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.