From: <gregkh@linuxfoundation.org>
To: tj@kernel.org, axboe@fb.com, gregkh@linuxfoundation.org,
jack@suse.cz, tahsin@google.com, viro@ZenIV.linux.org.uk
Cc: <stable@vger.kernel.org>, <stable-commits@vger.kernel.org>
Subject: Patch "writeback: flush inode cgroup wb switches instead of pinning super_block" has been added to the 4.4-stable tree
Date: Sat, 05 Mar 2016 11:45:20 -0800 [thread overview]
Message-ID: <1457207120244134@kroah.com> (raw)
This is a note to let you know that I've just added the patch titled
writeback: flush inode cgroup wb switches instead of pinning super_block
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
The filename of the patch is:
writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.
>From a1a0e23e49037c23ea84bc8cc146a03584d13577 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 29 Feb 2016 18:28:53 -0500
Subject: writeback: flush inode cgroup wb switches instead of pinning super_block
From: Tejun Heo <tj@kernel.org>
commit a1a0e23e49037c23ea84bc8cc146a03584d13577 upstream.
If cgroup writeback is in use, inodes can be scheduled for
asynchronous wb switching. Before 5ff8eaac1636 ("writeback: keep
superblock pinned during cgroup writeback association switches"), this
could race with umount leading to super_block being destroyed while
inodes are pinned for wb switching. 5ff8eaac1636 fixed it by bumping
s_active while wb switches are in flight; however, this allowed
in-flight wb switches to make umounts asynchronous when the userland
expected synchronosity - e.g. fsck immediately following umount may
fail because the device is still busy.
This patch removes the problematic super_block pinning and instead
makes generic_shutdown_super() flush in-flight wb switches. wb
switches are now executed on a dedicated isw_wq so that they can be
flushed and isw_nr_in_flight keeps track of the number of in-flight wb
switches so that flushing can be avoided in most cases.
v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE
check in inode_switch_wbs() as Jan an Al suggested.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Tahsin Erdogan <tahsin@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com
Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches")
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/fs-writeback.c | 54 ++++++++++++++++++++++++++++++++++------------
fs/super.c | 1
include/linux/writeback.h | 5 ++++
3 files changed, 47 insertions(+), 13 deletions(-)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struc
#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
/* one round can affect upto 5 slots */
+static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
+static struct workqueue_struct *isw_wq;
+
void __inode_attach_wb(struct inode *inode, struct page *page)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(str
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
- struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -424,8 +426,9 @@ skip_switch:
wb_put(new_wb);
iput(inode);
- deactivate_super(sb);
kfree(isw);
+
+ atomic_dec(&isw_nr_in_flight);
}
static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(stru
/* needs to grab bh-unsafe locks, bounce to work item */
INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
- schedule_work(&isw->work);
+ queue_work(isw_wq, &isw->work);
}
/**
@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inod
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
-
- if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
- inode_to_wb(inode) == isw->new_wb)
- goto out_unlock;
-
- if (!atomic_inc_not_zero(&inode->i_sb->s_active))
- goto out_unlock;
-
+ if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+ inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+ inode_to_wb(inode) == isw->new_wb) {
+ spin_unlock(&inode->i_lock);
+ goto out_free;
+ }
inode->i_state |= I_WB_SWITCH;
spin_unlock(&inode->i_lock);
ihold(inode);
isw->inode = inode;
+ atomic_inc(&isw_nr_in_flight);
+
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
* the RCU protected stat update paths to grab the mapping's
@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inod
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
return;
-out_unlock:
- spin_unlock(&inode->i_lock);
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
@@ -849,6 +850,33 @@ restart:
wb_put(last_wb);
}
+/**
+ * cgroup_writeback_umount - flush inode wb switches for umount
+ *
+ * This function is called when a super_block is about to be destroyed and
+ * flushes in-flight inode wb switches. An inode wb switch goes through
+ * RCU and then workqueue, so the two need to be flushed in order to ensure
+ * that all previously scheduled switches are finished. As wb switches are
+ * rare occurrences and synchronize_rcu() can take a while, perform
+ * flushing iff wb switches are in flight.
+ */
+void cgroup_writeback_umount(void)
+{
+ if (atomic_read(&isw_nr_in_flight)) {
+ synchronize_rcu();
+ flush_workqueue(isw_wq);
+ }
+}
+
+static int __init cgroup_writeback_init(void)
+{
+ isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
+ if (!isw_wq)
+ return -ENOMEM;
+ return 0;
+}
+fs_initcall(cgroup_writeback_init);
+
#else /* CONFIG_CGROUP_WRITEBACK */
static struct bdi_writeback *
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super
sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(sb);
+ cgroup_writeback_umount();
evict_inodes(sb);
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct
void wbc_detach_inode(struct writeback_control *wbc);
void wbc_account_io(struct writeback_control *wbc, struct page *page,
size_t bytes);
+void cgroup_writeback_umount(void);
/**
* inode_attach_wb - associate an inode with its wb
@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct
{
}
+static inline void cgroup_writeback_umount(void)
+{
+}
+
#endif /* CONFIG_CGROUP_WRITEBACK */
/*
Patches currently in stable-queue which might be from tj@kernel.org are
queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch
queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch
queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch
queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch
queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch
queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch
reply other threads:[~2016-03-05 19:45 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1457207120244134@kroah.com \
--to=gregkh@linuxfoundation.org \
--cc=axboe@fb.com \
--cc=jack@suse.cz \
--cc=stable-commits@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=tahsin@google.com \
--cc=tj@kernel.org \
--cc=viro@ZenIV.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.