cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: Tejun Heo <tj@kernel.org>
Cc: Tahsin Erdogan <tahsin@google.com>, Jan Kara <jack@suse.cz>,
	Jens Axboe <axboe@kernel.dk>,
	cgroups@vger.kernel.org, Theodore Ts'o <tytso@mit.edu>,
	Nauman Rafique <nauman@google.com>,
	linux-kernel@vger.kernel.org, Jan Kara <jack@suse.com>,
	Al Viro <viro@ZenIV.linux.org.uk>
Subject: Re: [PATCH v2 block/for-linus] writeback: flush inode cgroup wb switches instead of pinning super_block
Date: Tue, 1 Mar 2016 10:20:11 +0100	[thread overview]
Message-ID: <20160301092011.GA27666@quack.suse.cz> (raw)
In-Reply-To: <20160229232853.GD3965@htj.duckdns.org>

On Mon 29-02-16 18:28:53, Tejun Heo wrote:
> If cgroup writeback is in use, inodes can be scheduled for
> asynchronous wb switching.  Before 5ff8eaac1636 ("writeback: keep
> superblock pinned during cgroup writeback association switches"), this
> could race with umount leading to super_block being destroyed while
> inodes are pinned for wb switching.  5ff8eaac1636 fixed it by bumping
> s_active while wb switches are in flight; however, this allowed
> in-flight wb switches to make umounts asynchronous when the userland
> expected synchronosity - e.g. fsck immediately following umount may
> fail because the device is still busy.
> 
> This patch removes the problematic super_block pinning and instead
> makes generic_shutdown_super() flush in-flight wb switches.  wb
> switches are now executed on a dedicated isw_wq so that they can be
> flushed and isw_nr_in_flight keeps track of the number of in-flight wb
> switches so that flushing can be avoided in most cases.
> 
> v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE
>     check in inode_switch_wbs() as Jan an Al suggested.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reported-by: Tahsin Erdogan <tahsin@google.com>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Al Viro <viro@ZenIV.linux.org.uk>
> Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com
> Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches")
> Cc: stable@vger.kernel.org #v4.5

The patch looks good to me now. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

									Honza

> ---
>  fs/fs-writeback.c         |   54 ++++++++++++++++++++++++++++++++++------------
>  fs/super.c                |    1 
>  include/linux/writeback.h |    5 ++++
>  3 files changed, 47 insertions(+), 13 deletions(-)
> 
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -223,6 +223,9 @@ static void wb_wait_for_completion(struc
>  #define WB_FRN_HIST_MAX_SLOTS	(WB_FRN_HIST_THR_SLOTS / 2 + 1)
>  					/* one round can affect upto 5 slots */
>  
> +static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
> +static struct workqueue_struct *isw_wq;
> +
>  void __inode_attach_wb(struct inode *inode, struct page *page)
>  {
>  	struct backing_dev_info *bdi = inode_to_bdi(inode);
> @@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(str
>  	struct inode_switch_wbs_context *isw =
>  		container_of(work, struct inode_switch_wbs_context, work);
>  	struct inode *inode = isw->inode;
> -	struct super_block *sb = inode->i_sb;
>  	struct address_space *mapping = inode->i_mapping;
>  	struct bdi_writeback *old_wb = inode->i_wb;
>  	struct bdi_writeback *new_wb = isw->new_wb;
> @@ -424,8 +426,9 @@ skip_switch:
>  	wb_put(new_wb);
>  
>  	iput(inode);
> -	deactivate_super(sb);
>  	kfree(isw);
> +
> +	atomic_dec(&isw_nr_in_flight);
>  }
>  
>  static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
> @@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(stru
>  
>  	/* needs to grab bh-unsafe locks, bounce to work item */
>  	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
> -	schedule_work(&isw->work);
> +	queue_work(isw_wq, &isw->work);
>  }
>  
>  /**
> @@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inod
>  
>  	/* while holding I_WB_SWITCH, no one else can update the association */
>  	spin_lock(&inode->i_lock);
> -
> -	if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
> -	    inode_to_wb(inode) == isw->new_wb)
> -		goto out_unlock;
> -
> -	if (!atomic_inc_not_zero(&inode->i_sb->s_active))
> -		goto out_unlock;
> -
> +	if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
> +	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
> +	    inode_to_wb(inode) == isw->new_wb) {
> +		spin_unlock(&inode->i_lock);
> +		goto out_free;
> +	}
>  	inode->i_state |= I_WB_SWITCH;
>  	spin_unlock(&inode->i_lock);
>  
>  	ihold(inode);
>  	isw->inode = inode;
>  
> +	atomic_inc(&isw_nr_in_flight);
> +
>  	/*
>  	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
>  	 * the RCU protected stat update paths to grab the mapping's
> @@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inod
>  	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
>  	return;
>  
> -out_unlock:
> -	spin_unlock(&inode->i_lock);
>  out_free:
>  	if (isw->new_wb)
>  		wb_put(isw->new_wb);
> @@ -847,6 +848,33 @@ restart:
>  		wb_put(last_wb);
>  }
>  
> +/**
> + * cgroup_writeback_umount - flush inode wb switches for umount
> + *
> + * This function is called when a super_block is about to be destroyed and
> + * flushes in-flight inode wb switches.  An inode wb switch goes through
> + * RCU and then workqueue, so the two need to be flushed in order to ensure
> + * that all previously scheduled switches are finished.  As wb switches are
> + * rare occurrences and synchronize_rcu() can take a while, perform
> + * flushing iff wb switches are in flight.
> + */
> +void cgroup_writeback_umount(void)
> +{
> +	if (atomic_read(&isw_nr_in_flight)) {
> +		synchronize_rcu();
> +		flush_workqueue(isw_wq);
> +	}
> +}
> +
> +static int __init cgroup_writeback_init(void)
> +{
> +	isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
> +	if (!isw_wq)
> +		return -ENOMEM;
> +	return 0;
> +}
> +fs_initcall(cgroup_writeback_init);
> +
>  #else	/* CONFIG_CGROUP_WRITEBACK */
>  
>  static struct bdi_writeback *
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -415,6 +415,7 @@ void generic_shutdown_super(struct super
>  		sb->s_flags &= ~MS_ACTIVE;
>  
>  		fsnotify_unmount_inodes(sb);
> +		cgroup_writeback_umount();
>  
>  		evict_inodes(sb);
>  
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct
>  void wbc_detach_inode(struct writeback_control *wbc);
>  void wbc_account_io(struct writeback_control *wbc, struct page *page,
>  		    size_t bytes);
> +void cgroup_writeback_umount(void);
>  
>  /**
>   * inode_attach_wb - associate an inode with its wb
> @@ -301,6 +302,10 @@ static inline void wbc_account_io(struct
>  {
>  }
>  
> +static inline void cgroup_writeback_umount(void)
> +{
> +}
> +
>  #endif	/* CONFIG_CGROUP_WRITEBACK */
>  
>  /*
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

  reply	other threads:[~2016-03-01  9:20 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-12 19:32 [BUG] cgroup writeback crash Tahsin Erdogan
     [not found] ` <CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-02-15 21:00   ` Tejun Heo
     [not found]     ` <20160215210047.GN3965-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
2016-02-16  7:56       ` Tahsin Erdogan
     [not found]         ` <CAAeU0aNAd1Ra6LXmWwq8row4MD_BpVHiSXOwHx07m86UWREvHw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-02-16 18:24           ` [PATCH block/for-4.5-fixes] writeback: keep superblock pinned during cgroup writeback association switches Tejun Heo
     [not found]             ` <20160216182457.GO3741-qYNAdHglDFBN0TnZuCh8vA@public.gmane.org>
2016-02-16 18:34               ` Jens Axboe
2016-02-17 20:57               ` Jan Kara
     [not found]                 ` <20160217205721.GE14140-+0h/O2h83AeN3ZZ/Hiejyg@public.gmane.org>
2016-02-17 21:07                   ` Tejun Heo
2016-02-17 22:30                     ` Jan Kara
2016-02-17 22:41                       ` Tahsin Erdogan
     [not found]                         ` <CAAeU0aOvSwPbLPU0=20D1RExNj8VsbB38hUnyso2L8xNSQC0XA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-02-17 23:02                           ` Tejun Heo
2016-02-18  9:55                             ` Jan Kara
     [not found]                               ` <20160218095538.GA4338-+0h/O2h83AeN3ZZ/Hiejyg@public.gmane.org>
2016-02-18 13:00                                 ` Tejun Heo
2016-02-18 13:20                                   ` Jan Kara
2016-02-19 20:18                                   ` Al Viro
2016-02-19 20:51                                     ` Tejun Heo
     [not found]                                       ` <20160219205147.GN13177-qYNAdHglDFBN0TnZuCh8vA@public.gmane.org>
2016-02-19 21:58                                         ` Al Viro
     [not found]                                           ` <20160219215811.GA17997-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
2016-02-19 22:15                                             ` Tejun Heo
2016-02-19 22:26                                               ` Al Viro
     [not found]                                                 ` <20160219222609.GC17997-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
2016-02-28 21:53                                                   ` Tejun Heo
     [not found]                             ` <20160217230231.GC6479-qYNAdHglDFBN0TnZuCh8vA@public.gmane.org>
2016-02-29 20:47                               ` [PATCH block/for-linus] writeback: flush inode cgroup wb switches instead of pinning super_block Tejun Heo
     [not found]                                 ` <20160229204724.GV3965-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
2016-02-29 20:54                                   ` Al Viro
     [not found]                                     ` <20160229205428.GB17997-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
2016-02-29 20:58                                       ` Tejun Heo
     [not found]                                         ` <20160229205837.GX3965-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
2016-02-29 21:06                                           ` Al Viro
     [not found]                                             ` <20160229210614.GC17997-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
2016-02-29 21:08                                               ` Tejun Heo
     [not found]                                                 ` <20160229210800.GY3965-piEFEHQLUPpN0TnZuCh8vA@public.gmane.org>
2016-02-29 21:21                                                   ` Jan Kara
2016-02-29 23:28                                   ` [PATCH v2 " Tejun Heo
2016-03-01  9:20                                     ` Jan Kara [this message]
2016-03-01 17:46                                     ` Jens Axboe
     [not found]                                       ` <56D5D592.2020800-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2016-03-01 17:50                                         ` Tejun Heo
     [not found]                                           ` <CAOS58YO5vTBnM561np7gpXKGQELrT169bYqmcfvAvsquBJK5yw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-03-02 10:29                                             ` Jan Kara
2016-03-01 13:39                                 ` [PATCH " Tahsin Erdogan
2016-02-18 10:12               ` [PATCH block/for-4.5-fixes] writeback: keep superblock pinned during cgroup writeback association switches Nikolay Borisov
2016-02-18 12:57                 ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160301092011.GA27666@quack.suse.cz \
    --to=jack@suse.cz \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=jack@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nauman@google.com \
    --cc=tahsin@google.com \
    --cc=tj@kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).