From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Tejun Heo <tj@kernel.org>,
Jiufei Xue <xuejiufei@gmail.com>, Jan Kara <jack@suse.cz>,
Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.14 32/52] writeback: synchronize sync(2) against cgroup writeback membership switches
Date: Mon, 4 Mar 2019 09:22:30 +0100 [thread overview]
Message-ID: <20190304081619.007389596@linuxfoundation.org> (raw)
In-Reply-To: <20190304081617.159014799@linuxfoundation.org>
4.14-stable review patch. If anyone has any objections, please let me know.
------------------
[ Upstream commit 7fc5854f8c6efae9e7624970ab49a1eac2faefb1 ]
sync_inodes_sb() can race against cgwb (cgroup writeback) membership
switches and fail to writeback some inodes. For example, if an inode
switches to another wb while sync_inodes_sb() is in progress, the new
wb might not be visible to bdi_split_work_to_wbs() at all or the inode
might jump from a wb which hasn't issued writebacks yet to one which
already has.
This patch adds backing_dev_info->wb_switch_rwsem to synchronize cgwb
switch path against sync_inodes_sb() so that sync_inodes_sb() is
guaranteed to see all the target wbs and inodes can't jump wbs to
escape syncing.
v2: Fixed misplaced rwsem init. Spotted by Jiufei.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jiufei Xue <xuejiufei@gmail.com>
Link: http://lkml.kernel.org/r/dc694ae2-f07f-61e1-7097-7c8411cee12d@gmail.com
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/fs-writeback.c | 40 ++++++++++++++++++++++++++++++--
include/linux/backing-dev-defs.h | 1 +
mm/backing-dev.c | 1 +
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3244932f4d5cc..6a76616c9401b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
struct work_struct work;
};
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ up_write(&bdi->wb_switch_rwsem);
+}
+
static void inode_switch_wbs_work_fn(struct work_struct *work)
{
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -343,6 +354,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
bool switched = false;
void **slot;
+ /*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
/*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
@@ -435,6 +452,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
+ up_read(&bdi->wb_switch_rwsem);
+
if (switched) {
wb_wakeup(new_wb);
wb_put(old_wb);
@@ -475,9 +494,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
+ /*
+ * Avoid starting new switches while sync_inodes_sb() is in
+ * progress. Otherwise, if the down_write protected issue path
+ * blocks heavily, we might end up starting a large number of
+ * switches which will block on the rwsem.
+ */
+ if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ return;
+
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- return;
+ goto out_unlock;
/* find and pin the new wb */
rcu_read_lock();
@@ -511,12 +539,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
- return;
+ goto out_unlock;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
+out_unlock:
+ up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -894,6 +924,9 @@ fs_initcall(cgroup_writeback_init);
#else /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -2408,8 +2441,11 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+ bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
wb_wait_for_completion(bdi, &done);
+ bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 19240379637fe..b186c4b464e02 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -165,6 +165,7 @@ struct backing_dev_info {
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
+ struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
#else
struct bdi_writeback_congested *wb_congested;
#endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 9386c98dac123..6fa31754eadd9 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -684,6 +684,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
mutex_init(&bdi->cgwb_release_mutex);
+ init_rwsem(&bdi->wb_switch_rwsem);
ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (!ret) {
--
2.19.1
next prev parent reply other threads:[~2019-03-04 8:25 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-04 8:21 [PATCH 4.14 00/52] 4.14.105-stable review Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.14 01/52] Revert "loop: Fix double mutex_unlock(&loop_ctl_mutex) in loop_control_ioctl()" Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 02/52] Revert "loop: Get rid of loop_index_mutex" Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 03/52] Revert "loop: Fold __loop_release into loop_release" Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 04/52] net: stmmac: Fix reception of Broadcom switches tags Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 05/52] net: stmmac: Disable ACS Feature for GMAC >= 4 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 06/52] scsi: libsas: Fix rphy phy_identifier for PHYs with end devices attached Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 07/52] drm/msm: Unblock writer if reader closes file Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 08/52] ASoC: Intel: Haswell/Broadwell: fix setting for .dynamic field Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 09/52] ALSA: compress: prevent potential divide by zero bugs Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 10/52] ASoC: Variable "val" in function rt274_i2c_probe() could be uninitialized Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 11/52] clk: vc5: Abort clock configuration without upstream clock Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 12/52] thermal: int340x_thermal: Fix a NULL vs IS_ERR() check Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 13/52] usb: dwc3: gadget: synchronize_irq dwc irq in suspend Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 14/52] usb: dwc3: gadget: Fix the uninitialized link_state when udc starts Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 15/52] usb: gadget: Potential NULL dereference on allocation error Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 16/52] genirq: Make sure the initial affinity is not empty Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 17/52] ASoC: dapm: change snprintf to scnprintf for possible overflow Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 18/52] ASoC: imx-audmux: " Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 19/52] selftests: seccomp: use LDLIBS instead of LDFLAGS Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 20/52] selftests: gpio-mockup-chardev: Check asprintf() for error Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 21/52] ARC: fix __ffs return value to avoid build warnings Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 22/52] drivers: thermal: int340x_thermal: Fix sysfs race condition Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 23/52] staging: rtl8723bs: Fix build error with Clang when inlining is disabled Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 24/52] mac80211: fix miscounting of ttl-dropped frames Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 25/52] sched/wait: Fix rcuwait_wake_up() ordering Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 26/52] futex: Fix (possible) missed wakeup Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 27/52] locking/rwsem: " Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 28/52] drm/amd/powerplay: OD setting fix on Vega10 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 29/52] serial: fsl_lpuart: fix maximum acceptable baud rate with over-sampling Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 30/52] staging: android: ion: Support cpu access during dma_buf_detach Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 31/52] direct-io: allow direct writes to empty inodes Greg Kroah-Hartman
2019-03-04 8:22 ` Greg Kroah-Hartman [this message]
2019-03-04 8:22 ` [PATCH 4.14 33/52] scsi: csiostor: fix NULL pointer dereference in csio_vport_set_state() Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 34/52] net: altera_tse: fix connect_local_phy error path Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 35/52] hv_netvsc: Fix ethtool change hash key error Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 36/52] net: usb: asix: ax88772_bind return error when hw_reset fail Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 37/52] net: dev_is_mac_header_xmit() true for ARPHRD_RAWIP Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 38/52] ibmveth: Do not process frames after calling napi_reschedule Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 39/52] mac80211: dont initiate TDLS connection if station is not associated to AP Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 40/52] mac80211: Add attribute aligned(2) to struct action Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 41/52] cfg80211: extend range deviation for DMG Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 42/52] svm: Fix AVIC incomplete IPI emulation Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 43/52] KVM: nSVM: clear events pending from svm_complete_interrupts() when exiting to L1 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 44/52] powerpc: Always initialize input array when calling epapr_hypercall() Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 45/52] mmc: spi: Fix card detection during probe Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 46/52] mmc: tmio_mmc_core: dont claim spurious interrupts Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 47/52] mmc: tmio: fix access width of Block Count Register Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 48/52] mmc: sdhci-esdhc-imx: correct the fix of ERR004536 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 49/52] mm: enforce min addr even if capable() in expand_downwards() Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 50/52] MIPS: fix truncation in __cmpxchg_small for short values Greg Kroah-Hartman
2019-03-04 8:22 ` Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 51/52] MIPS: eBPF: Fix icache flush end address Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.14 52/52] x86/uaccess: Dont leak the AC flag into __put_user() value evaluation Greg Kroah-Hartman
2019-03-04 18:42 ` [PATCH 4.14 00/52] 4.14.105-stable review Naresh Kamboju
2019-03-05 3:38 ` Guenter Roeck
2019-03-05 14:06 ` Jon Hunter
2019-03-05 14:06 ` Jon Hunter
2019-03-05 16:11 ` shuah
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190304081619.007389596@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=axboe@kernel.dk \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
--cc=tj@kernel.org \
--cc=xuejiufei@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.