From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Tejun Heo <tj@kernel.org>, Jens Axboe <axboe@kernel.dk>,
Sasha Levin <sashal@kernel.org>,
linux-fsdevel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH AUTOSEL 4.14 27/40] writeback: synchronize sync(2) against cgroup writeback membership switches
Date: Thu, 14 Feb 2019 21:13:00 -0500 [thread overview]
Message-ID: <20190215021313.178476-27-sashal@kernel.org> (raw)
In-Reply-To: <20190215021313.178476-1-sashal@kernel.org>
From: Tejun Heo <tj@kernel.org>
[ Upstream commit 7fc5854f8c6efae9e7624970ab49a1eac2faefb1 ]
sync_inodes_sb() can race against cgwb (cgroup writeback) membership
switches and fail to writeback some inodes. For example, if an inode
switches to another wb while sync_inodes_sb() is in progress, the new
wb might not be visible to bdi_split_work_to_wbs() at all or the inode
might jump from a wb which hasn't issued writebacks yet to one which
already has.
This patch adds backing_dev_info->wb_switch_rwsem to synchronize cgwb
switch path against sync_inodes_sb() so that sync_inodes_sb() is
guaranteed to see all the target wbs and inodes can't jump wbs to
escape syncing.
v2: Fixed misplaced rwsem init. Spotted by Jiufei.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jiufei Xue <xuejiufei@gmail.com>
Link: http://lkml.kernel.org/r/dc694ae2-f07f-61e1-7097-7c8411cee12d@gmail.com
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/fs-writeback.c | 40 ++++++++++++++++++++++++++++++--
include/linux/backing-dev-defs.h | 1 +
mm/backing-dev.c | 1 +
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3244932f4d5c..6a76616c9401 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
struct work_struct work;
};
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ up_write(&bdi->wb_switch_rwsem);
+}
+
static void inode_switch_wbs_work_fn(struct work_struct *work)
{
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -343,6 +354,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
bool switched = false;
void **slot;
+ /*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
/*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
@@ -435,6 +452,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
+ up_read(&bdi->wb_switch_rwsem);
+
if (switched) {
wb_wakeup(new_wb);
wb_put(old_wb);
@@ -475,9 +494,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
+ /*
+ * Avoid starting new switches while sync_inodes_sb() is in
+ * progress. Otherwise, if the down_write protected issue path
+ * blocks heavily, we might end up starting a large number of
+ * switches which will block on the rwsem.
+ */
+ if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ return;
+
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- return;
+ goto out_unlock;
/* find and pin the new wb */
rcu_read_lock();
@@ -511,12 +539,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
- return;
+ goto out_unlock;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
+out_unlock:
+ up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -894,6 +924,9 @@ fs_initcall(cgroup_writeback_init);
#else /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -2408,8 +2441,11 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+ bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
wb_wait_for_completion(bdi, &done);
+ bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 19240379637f..b186c4b464e0 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -165,6 +165,7 @@ struct backing_dev_info {
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
+ struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
#else
struct bdi_writeback_congested *wb_congested;
#endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 9386c98dac12..6fa31754eadd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -684,6 +684,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
mutex_init(&bdi->cgwb_release_mutex);
+ init_rwsem(&bdi->wb_switch_rwsem);
ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (!ret) {
--
2.19.1
next prev parent reply other threads:[~2019-02-15 2:25 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-15 2:12 [PATCH AUTOSEL 4.14 01/40] drm/msm: Unblock writer if reader closes file Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 02/40] ASoC: Intel: Haswell/Broadwell: fix setting for .dynamic field Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 03/40] ALSA: compress: prevent potential divide by zero bugs Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 04/40] ASoC: Variable "val" in function rt274_i2c_probe() could be uninitialized Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 05/40] clk: vc5: Abort clock configuration without upstream clock Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 06/40] thermal: int340x_thermal: Fix a NULL vs IS_ERR() check Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 07/40] usb: dwc3: gadget: synchronize_irq dwc irq in suspend Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 08/40] usb: dwc3: gadget: Fix the uninitialized link_state when udc starts Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 09/40] usb: gadget: Potential NULL dereference on allocation error Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 10/40] genirq: Make sure the initial affinity is not empty Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 11/40] ASoC: dapm: change snprintf to scnprintf for possible overflow Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 12/40] ASoC: imx-audmux: " Sasha Levin
2019-02-15 2:12 ` Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 13/40] selftests: seccomp: use LDLIBS instead of LDFLAGS sashal
2019-02-15 2:12 ` Sasha Levin
2019-02-15 2:12 ` Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 14/40] selftests: gpio-mockup-chardev: Check asprintf() for error Sasha Levin
2019-02-15 2:12 ` Sasha Levin
2019-02-15 2:12 ` sashal
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 15/40] ARC: fix __ffs return value to avoid build warnings Sasha Levin
2019-02-15 2:12 ` Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 16/40] drivers: thermal: int340x_thermal: Fix sysfs race condition Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 17/40] staging: rtl8723bs: Fix build error with Clang when inlining is disabled Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 18/40] mac80211: fix miscounting of ttl-dropped frames Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 19/40] sched/wait: Fix rcuwait_wake_up() ordering Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 20/40] futex: Fix (possible) missed wakeup Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 21/40] locking/rwsem: " Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 22/40] libceph: avoid KEEPALIVE_PENDING races in ceph_con_keepalive() Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 23/40] drm/amd/powerplay: OD setting fix on Vega10 Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 24/40] serial: fsl_lpuart: fix maximum acceptable baud rate with over-sampling Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 25/40] staging: android: ion: Support cpu access during dma_buf_detach Sasha Levin
2019-02-15 2:12 ` Sasha Levin
2019-02-15 2:12 ` [PATCH AUTOSEL 4.14 26/40] direct-io: allow direct writes to empty inodes Sasha Levin
2019-02-15 2:13 ` Sasha Levin [this message]
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 28/40] scsi: csiostor: fix NULL pointer dereference in csio_vport_set_state() Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 29/40] net: altera_tse: fix connect_local_phy error path Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 30/40] hv_netvsc: Fix ethtool change hash key error Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 31/40] sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 32/40] ax25: fix possible use-after-free Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 33/40] net: usb: asix: ax88772_bind return error when hw_reset fail Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 34/40] net: dev_is_mac_header_xmit() true for ARPHRD_RAWIP Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 35/40] ibmveth: Do not process frames after calling napi_reschedule Sasha Levin
2019-02-15 2:13 ` Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 36/40] mac80211: don't initiate TDLS connection if station is not associated to AP Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 37/40] mac80211: Add attribute aligned(2) to struct 'action' Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 38/40] cfg80211: extend range deviation for DMG Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 39/40] svm: Fix AVIC incomplete IPI emulation Sasha Levin
2019-02-15 2:13 ` [PATCH AUTOSEL 4.14 40/40] KVM: nSVM: clear events pending from svm_complete_interrupts() when exiting to L1 Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190215021313.178476-27-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=axboe@kernel.dk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=stable@vger.kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.