From: Jan Kara <jack@suse.cz>
To: linux-fsdevel@vger.kernel.org
Cc: Amir Goldstein <amir73il@gmail.com>, Jan Kara <jack@suse.cz>,
stable@vger.kernel.org
Subject: Re: [PATCH v2] fsnotify: Fix busy inodes during unmount
Date: Fri, 19 Oct 2018 13:45:59 +0200 [thread overview]
Message-ID: <20181019114559.GE17214@quack2.suse.cz> (raw)
In-Reply-To: <20181019114332.14925-1-jack@suse.cz>
Please ignore this, I had uncommitted change in my tree which is missing in
this patch. Sorry for the noise.
Honza
On Fri 19-10-18 13:43:32, Jan Kara wrote:
> Detaching of mark connector from fsnotify_put_mark() can race with
> unmounting of the filesystem like:
>
> CPU1 CPU2
> fsnotify_put_mark()
> spin_lock(&conn->lock);
> ...
> inode = fsnotify_detach_connector_from_object(conn)
> spin_unlock(&conn->lock);
> generic_shutdown_super()
> fsnotify_unmount_inodes()
> sees connector detached for inode
> -> nothing to do
> evict_inode()
> barfs on pending inode reference
> iput(inode);
>
> Resulting in "Busy inodes after unmount" message and possible kernel
> oops. Make fsnotify_unmount_inodes() properly wait for outstanding inode
> references from detached connectors.
>
> Note that the accounting of outstanding inode references in the
> superblock can cause some cacheline contention on the counter. OTOH it
> happens only during deletion of the last notification mark from an inode
> (or during unlinking of watched inode) and that is not too bad. I have
> measured time to create & delete inotify watch 100000 times from 64
> processes in parallel (each process having its own inotify group and its
> own file on a shared superblock) on a 64 CPU machine. Average and
> standard deviation of 15 runs look like:
>
> Avg Stddev
> Vanilla 9.817400 0.276165
> Fixed 9.710467 0.228294
>
> So there's no statistically significant difference.
>
> Fixes: 6b3f05d24d35 ("fsnotify: Detach mark from object list when last reference is dropped")
> CC: stable@vger.kernel.org
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
> fs/notify/fsnotify.c | 3 +++
> fs/notify/mark.c | 39 +++++++++++++++++++++++++++++++--------
> include/linux/fs.h | 3 +++
> 3 files changed, 37 insertions(+), 8 deletions(-)
>
> Changes since v1:
> * added Fixes tag
> * improved fsnotify_drop_object to take object type
>
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index f174397b63a0..00d4f4357724 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -96,6 +96,9 @@ void fsnotify_unmount_inodes(struct super_block *sb)
>
> if (iput_inode)
> iput(iput_inode);
> + /* Wait for outstanding inode references from connectors */
> + wait_var_event(&sb->s_fsnotify_inode_refs,
> + !atomic_long_read(&sb->s_fsnotify_inode_refs));
> }
>
> /*
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 59cdb27826de..f4e330b5b379 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -179,17 +179,20 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
> }
> }
>
> -static struct inode *fsnotify_detach_connector_from_object(
> - struct fsnotify_mark_connector *conn)
> +static void *fsnotify_detach_connector_from_object(
> + struct fsnotify_mark_connector *conn,
> + unsigned int *type)
> {
> struct inode *inode = NULL;
>
> + *type = conn->type;
> if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
> return NULL;
>
> if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
> inode = fsnotify_conn_inode(conn);
> inode->i_fsnotify_mask = 0;
> + atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
> } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
> fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
> }
> @@ -211,10 +214,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
> fsnotify_put_group(group);
> }
>
> +/* Drop object reference originally held by a connector */
> +static void fsnotify_drop_object(unsigned int type, void *objp)
> +{
> + struct inode *inode;
> + struct super_block *sb;
> +
> + if (!objp)
> + return;
> + /* Currently only inode references are passed to be dropped */
> + if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
> + return;
> + inode = objp;
> + sb = inode->i_sb;
> + iput(inode);
> + if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
> + wake_up_var(&sb->s_fsnotify_inode_refs);
> +}
> +
> void fsnotify_put_mark(struct fsnotify_mark *mark)
> {
> struct fsnotify_mark_connector *conn;
> - struct inode *inode = NULL;
> + void *objp = NULL;
> + unsigned int type;
> bool free_conn = false;
>
> /* Catch marks that were actually never attached to object */
> @@ -234,7 +256,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
> conn = mark->connector;
> hlist_del_init_rcu(&mark->obj_list);
> if (hlist_empty(&conn->list)) {
> - inode = fsnotify_detach_connector_from_object(conn);
> + objp = fsnotify_detach_connector_from_object(conn, &type);
> free_conn = true;
> } else {
> __fsnotify_recalc_mask(conn);
> @@ -242,7 +264,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
> mark->connector = NULL;
> spin_unlock(&conn->lock);
>
> - iput(inode);
> + fsnotify_drop_object(type, objp);
>
> if (free_conn) {
> spin_lock(&destroy_lock);
> @@ -709,7 +731,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
> {
> struct fsnotify_mark_connector *conn;
> struct fsnotify_mark *mark, *old_mark = NULL;
> - struct inode *inode;
> + void *objp;
> + unsigned int type;
>
> conn = fsnotify_grab_connector(connp);
> if (!conn)
> @@ -735,11 +758,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
> * mark references get dropped. It would lead to strange results such
> * as delaying inode deletion or blocking unmount.
> */
> - inode = fsnotify_detach_connector_from_object(conn);
> + objp = fsnotify_detach_connector_from_object(conn, &type);
> spin_unlock(&conn->lock);
> if (old_mark)
> fsnotify_put_mark(old_mark);
> - iput(inode);
> + fsnotify_drop_object(type, objp);
> }
>
> /*
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 33322702c910..5090f3dcec3b 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1428,6 +1428,9 @@ struct super_block {
> /* Number of inodes with nlink == 0 but still referenced */
> atomic_long_t s_remove_count;
>
> + /* Pending fsnotify inode refs */
> + atomic_long_t s_fsnotify_inode_refs;
> +
> /* Being remounted read-only */
> int s_readonly_remount;
>
> --
> 2.16.4
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
next prev parent reply other threads:[~2018-10-19 11:45 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-19 11:43 [PATCH v2] fsnotify: Fix busy inodes during unmount Jan Kara
2018-10-19 11:45 ` Jan Kara [this message]
2018-10-19 13:58 ` Amir Goldstein
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181019114559.GE17214@quack2.suse.cz \
--to=jack@suse.cz \
--cc=amir73il@gmail.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).