From: Al Viro <viro@ZenIV.linux.org.uk>
To: linux-fsdevel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
linux-kernel@vger.kernel.org,
Miklos Szeredi <mszeredi@redhat.com>
Subject: [PATCH 02/16] new primitive: discard_new_inode()
Date: Sun, 29 Jul 2018 23:04:39 +0100 [thread overview]
Message-ID: <20180729220453.13431-2-viro@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20180729220453.13431-1-viro@ZenIV.linux.org.uk>
From: Al Viro <viro@zeniv.linux.org.uk>
We don't want open-by-handle picking half-set-up in-core
struct inode from e.g. mkdir() having failed halfway through.
In other words, we don't want such inodes returned by iget_locked()
on their way to extinction. However, we can't just have them
unhashed - otherwise open-by-handle immediately *after* that would've
ended up creating a new in-core inode over the on-disk one that
is in process of being freed right under us.
Solution: new flag (I_CREATING) set by insert_inode_locked() and
removed by unlock_new_inode() and a new primitive (discard_new_inode())
to be used by such halfway-through-setup failure exits instead of
unlock_new_inode() / iput() combinations. That primitive unlocks new
inode, but leaves I_CREATING in place.
iget_locked() treats finding an I_CREATING inode as failure
(-ESTALE, once we sort out the error propagation).
insert_inode_locked() treats the same as instant -EBUSY.
ilookup() treats those as icache miss.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/inode.c | 44 ++++++++++++++++++++++++++++++++++++++++----
include/linux/fs.h | 6 +++++-
2 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..04dd7e0d5142 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -804,6 +804,10 @@ static struct inode *find_inode(struct super_block *sb,
__wait_on_freeing_inode(inode);
goto repeat;
}
+ if (unlikely(inode->i_state & I_CREATING)) {
+ spin_unlock(&inode->i_lock);
+ return ERR_PTR(-ESTALE);
+ }
__iget(inode);
spin_unlock(&inode->i_lock);
return inode;
@@ -831,6 +835,10 @@ static struct inode *find_inode_fast(struct super_block *sb,
__wait_on_freeing_inode(inode);
goto repeat;
}
+ if (unlikely(inode->i_state & I_CREATING)) {
+ spin_unlock(&inode->i_lock);
+ return ERR_PTR(-ESTALE);
+ }
__iget(inode);
spin_unlock(&inode->i_lock);
return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
+ inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
+void discard_new_inode(struct inode *inode)
+{
+ lockdep_annotate_inode_mutex_key(inode);
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_NEW);
+ spin_unlock(&inode->i_lock);
+ iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
/**
* lock_two_nondirectories - take two i_mutexes on non-directory objects
*
@@ -1039,6 +1060,8 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
* Use the old inode instead of the preallocated one.
*/
spin_unlock(&inode_hash_lock);
+ if (IS_ERR(old))
+ return NULL;
wait_on_inode(old);
if (unlikely(inode_unhashed(old))) {
iput(old);
@@ -1128,6 +1151,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock);
if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
iput(inode);
@@ -1165,6 +1190,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
*/
spin_unlock(&inode_hash_lock);
destroy_inode(inode);
+ if (IS_ERR(old))
+ return NULL;
inode = old;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
inode = find_inode(sb, head, test, data);
spin_unlock(&inode_hash_lock);
- return inode;
+ return IS_ERR(inode) ? NULL : inode;
}
EXPORT_SYMBOL(ilookup5_nowait);
@@ -1338,6 +1365,8 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
spin_unlock(&inode_hash_lock);
if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
iput(inode);
@@ -1421,12 +1450,16 @@ int insert_inode_locked(struct inode *inode)
}
if (likely(!old)) {
spin_lock(&inode->i_lock);
- inode->i_state |= I_NEW;
+ inode->i_state |= I_NEW | I_CREATING;
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
return 0;
}
+ if (unlikely(old->i_state & I_CREATING)) {
+ spin_unlock(&old->i_lock);
+ return -EBUSY;
+ }
__iget(old);
spin_unlock(&old->i_lock);
spin_unlock(&inode_hash_lock);
@@ -1443,7 +1476,10 @@ EXPORT_SYMBOL(insert_inode_locked);
int insert_inode_locked4(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+ struct inode *old;
+
+ inode->i_state |= I_CREATING;
+ old = inode_insert5(inode, hashval, test, NULL, data);
if (old != inode) {
iput(old);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..a42600565925 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
* and work dirs among overlayfs mounts.
*
+ * I_CREATING New object's inode in the middle of setting up.
+ *
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
#define I_DIRTY_SYNC (1 << 0)
@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
#define __I_DIRTY_TIME_EXPIRED 12
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_WB_SWITCH (1 << 13)
-#define I_OVL_INUSE (1 << 14)
+#define I_OVL_INUSE (1 << 14)
+#define I_CREATING (1 << 15)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
#endif
extern void unlock_new_inode(struct inode *);
+extern void discard_new_inode(struct inode *);
extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb);
--
2.11.0
next prev parent reply other threads:[~2018-07-29 22:04 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-07-29 22:03 [PATCHES][RFC] icache-related stuff Al Viro
2018-07-29 22:04 ` [PATCH 01/16] nfs_instantiate(): prevent multiple aliases for directory inode Al Viro
2018-07-29 22:04 ` Al Viro [this message]
2018-07-29 22:04 ` [PATCH 03/16] vfs: don't evict uninitialized inode Al Viro
2018-07-30 5:09 ` Amir Goldstein
2018-07-30 7:41 ` Miklos Szeredi
2018-08-16 16:29 ` Amir Goldstein
2018-08-24 6:47 ` Amir Goldstein
2018-10-08 6:41 ` Miklos Szeredi
2018-10-08 13:23 ` Greg KH
2018-07-29 22:04 ` [PATCH 04/16] btrfs: switch to discard_new_inode() Al Viro
2018-08-01 15:25 ` David Sterba
2018-07-29 22:04 ` [PATCH 05/16] ufs: " Al Viro
2018-07-29 22:04 ` [PATCH 06/16] udf: " Al Viro
2018-07-29 22:04 ` [PATCH 07/16] ext2: make sure that partially set up inodes won't be returned by ext2_iget() Al Viro
2018-07-29 22:04 ` [PATCH 08/16] btrfs: btrfs_iget() never returns an is_bad_inode() inode Al Viro
2018-07-30 8:13 ` Nikolay Borisov
2018-07-29 22:04 ` [PATCH 09/16] btrfs: IS_ERR(p) && PTR_ERR(p) == n is a weird way to spell p == ERR_PTR(n) Al Viro
2018-07-30 8:06 ` Nikolay Borisov
2018-07-29 22:04 ` [PATCH 10/16] kill d_instantiate_no_diralias() Al Viro
2018-07-29 22:04 ` [PATCH 11/16] jfs: switch to discard_new_inode() Al Viro
2018-07-29 22:04 ` [PATCH 12/16] new helper: inode_fake_hash() Al Viro
2018-07-29 22:04 ` [PATCH 13/16] btrfs: lift make_bad_inode() into btrfs_iget() Al Viro
2018-07-30 8:15 ` Nikolay Borisov
2018-07-29 22:04 ` [PATCH 14/16] btrfs: simplify btrfs_iget() Al Viro
2018-07-30 8:17 ` Nikolay Borisov
2018-07-29 22:04 ` [PATCH 15/16] adfs: don't put inodes into icache Al Viro
2018-07-29 22:04 ` [PATCH 16/16] jfs: don't bother with make_bad_inode() in ialloc() Al Viro
2018-07-30 21:35 ` [PATCHES][RFC] icache-related stuff Linus Torvalds
2018-08-01 15:25 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180729220453.13431-2-viro@ZenIV.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mszeredi@redhat.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.