From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Tejun Heo <tj@kernel.org>,
Jiufei Xue <xuejiufei@gmail.com>, Jan Kara <jack@suse.cz>,
Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.20 45/88] writeback: synchronize sync(2) against cgroup writeback membership switches
Date: Mon, 4 Mar 2019 09:22:28 +0100 [thread overview]
Message-ID: <20190304081632.345744157@linuxfoundation.org> (raw)
In-Reply-To: <20190304081630.610632175@linuxfoundation.org>
4.20-stable review patch. If anyone has any objections, please let me know.
------------------
[ Upstream commit 7fc5854f8c6efae9e7624970ab49a1eac2faefb1 ]
sync_inodes_sb() can race against cgwb (cgroup writeback) membership
switches and fail to writeback some inodes. For example, if an inode
switches to another wb while sync_inodes_sb() is in progress, the new
wb might not be visible to bdi_split_work_to_wbs() at all or the inode
might jump from a wb which hasn't issued writebacks yet to one which
already has.
This patch adds backing_dev_info->wb_switch_rwsem to synchronize cgwb
switch path against sync_inodes_sb() so that sync_inodes_sb() is
guaranteed to see all the target wbs and inodes can't jump wbs to
escape syncing.
v2: Fixed misplaced rwsem init. Spotted by Jiufei.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jiufei Xue <xuejiufei@gmail.com>
Link: http://lkml.kernel.org/r/dc694ae2-f07f-61e1-7097-7c8411cee12d@gmail.com
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/fs-writeback.c | 40 ++++++++++++++++++++++++++++++--
include/linux/backing-dev-defs.h | 1 +
mm/backing-dev.c | 1 +
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b40168fcc94a6..36855c1f8dafd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
struct work_struct work;
};
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ up_write(&bdi->wb_switch_rwsem);
+}
+
static void inode_switch_wbs_work_fn(struct work_struct *work)
{
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -343,6 +354,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct page *page;
bool switched = false;
+ /*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
/*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
@@ -428,6 +445,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
+ up_read(&bdi->wb_switch_rwsem);
+
if (switched) {
wb_wakeup(new_wb);
wb_put(old_wb);
@@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
+ /*
+ * Avoid starting new switches while sync_inodes_sb() is in
+ * progress. Otherwise, if the down_write protected issue path
+ * blocks heavily, we might end up starting a large number of
+ * switches which will block on the rwsem.
+ */
+ if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ return;
+
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- return;
+ goto out_unlock;
/* find and pin the new wb */
rcu_read_lock();
@@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
- return;
+ goto out_unlock;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
+out_unlock:
+ up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init);
#else /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+ bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
wb_wait_for_completion(bdi, &done);
+ bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c311571355981..07e02d6df5ad9 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -190,6 +190,7 @@ struct backing_dev_info {
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
+ struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
#else
struct bdi_writeback_congested *wb_congested;
#endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8a8bb8796c6c4..72e6d0c55cfad 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -689,6 +689,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
mutex_init(&bdi->cgwb_release_mutex);
+ init_rwsem(&bdi->wb_switch_rwsem);
ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (!ret) {
--
2.19.1
next prev parent reply other threads:[~2019-03-04 8:34 UTC|newest]
Thread overview: 103+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-04 8:21 [PATCH 4.20 00/88] 4.20.14-stable review Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 01/88] genirq/matrix: Improve target CPU selection for managed interrupts Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 02/88] scsi: libsas: Fix rphy phy_identifier for PHYs with end devices attached Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 03/88] drm/msm: Unblock writer if reader closes file Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 04/88] ASoC: Intel: Haswell/Broadwell: fix setting for .dynamic field Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 05/88] ALSA: compress: prevent potential divide by zero bugs Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 06/88] ASoC: rt5682: Fix recording no sound issue Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 07/88] ASoC: Variable "val" in function rt274_i2c_probe() could be uninitialized Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 08/88] clk: tegra: dfll: Fix a potential Oop in remove() Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 09/88] clk: sysfs: fix invalid JSON in clk_dump Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 10/88] clk: vc5: Abort clock configuration without upstream clock Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 11/88] thermal: int340x_thermal: Fix a NULL vs IS_ERR() check Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 12/88] usb: dwc3: gadget: synchronize_irq dwc irq in suspend Greg Kroah-Hartman
2019-03-04 8:46 ` He, Bo
2019-03-04 8:53 ` Greg Kroah-Hartman
2019-03-04 9:03 ` Marek Szyprowski
2019-03-04 9:44 ` Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 13/88] usb: dwc3: gadget: Fix the uninitialized link_state when udc starts Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 14/88] usb: gadget: Potential NULL dereference on allocation error Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 15/88] HID: i2c-hid: Disable runtime PM on Goodix touchpad Greg Kroah-Hartman
2019-03-04 8:21 ` [PATCH 4.20 16/88] ASoC: core: Make snd_soc_find_component() more robust Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 17/88] selftests: rtc: rtctest: fix alarm tests Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 18/88] selftests: rtc: rtctest: add alarm test on minute boundary Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 19/88] genirq: Make sure the initial affinity is not empty Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 20/88] x86/mm/mem_encrypt: Fix erroneous sizeof() Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 21/88] ASoC: rt5682: Fix PLL source register definitions Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 22/88] ASoC: dapm: change snprintf to scnprintf for possible overflow Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 23/88] ASoC: imx-audmux: " Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 24/88] selftests/vm/gup_benchmark.c: match gup struct to kernel Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 25/88] phy: ath79-usb: Fix the power on error path Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 26/88] phy: ath79-usb: Fix the main reset name to match the DT binding Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 27/88] selftests: seccomp: use LDLIBS instead of LDFLAGS Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 28/88] selftests: gpio-mockup-chardev: Check asprintf() for error Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 29/88] irqchip/gic-v3-mbi: Fix uninitialized mbi_lock Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 30/88] ARC: fix __ffs return value to avoid build warnings Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 31/88] ARC: show_regs: lockdep: avoid page allocator Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 32/88] drivers: thermal: int340x_thermal: Fix sysfs race condition Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 33/88] staging: rtl8723bs: Fix build error with Clang when inlining is disabled Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 34/88] mac80211: fix miscounting of ttl-dropped frames Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 35/88] sched/wait: Fix rcuwait_wake_up() ordering Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 36/88] sched/wake_q: Fix wakeup ordering for wake_q Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 37/88] futex: Fix (possible) missed wakeup Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 38/88] locking/rwsem: " Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 39/88] drm/amd/powerplay: OD setting fix on Vega10 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 40/88] tty: serial: qcom_geni_serial: Allow mctrl when flow control is disabled Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 41/88] serial: fsl_lpuart: fix maximum acceptable baud rate with over-sampling Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 42/88] drm/sun4i: hdmi: Fix usage of TMDS clock Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 43/88] staging: android: ion: Support cpu access during dma_buf_detach Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 44/88] direct-io: allow direct writes to empty inodes Greg Kroah-Hartman
2019-03-04 8:22 ` Greg Kroah-Hartman [this message]
2019-03-04 8:22 ` [PATCH 4.20 46/88] scsi: lpfc: nvme: avoid hang / use-after-free when destroying localport Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 47/88] scsi: lpfc: nvmet: avoid hang / use-after-free when destroying targetport Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 48/88] scsi: csiostor: fix NULL pointer dereference in csio_vport_set_state() Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 49/88] net: altera_tse: fix connect_local_phy error path Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 50/88] hv_netvsc: Fix ethtool change hash key error Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 51/88] hv_netvsc: Refactor assignments of struct netvsc_device_info Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 52/88] hv_netvsc: Fix hash key value reset after other ops Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 53/88] nvme-rdma: fix timeout handler Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 54/88] nvme-multipath: drop optimization for static ANA group IDs Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 55/88] cifs: fix memory leak of an allocated cifs_ntsd structure Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 56/88] drm/msm: Fix A6XX support for opp-level Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 57/88] drm/msm: avoid unused function warning Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 58/88] net: usb: asix: ax88772_bind return error when hw_reset fail Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 59/88] net: dev_is_mac_header_xmit() true for ARPHRD_RAWIP Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 60/88] ibmveth: Do not process frames after calling napi_reschedule Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 61/88] mac80211: dont initiate TDLS connection if station is not associated to AP Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 62/88] mac80211: Add attribute aligned(2) to struct action Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 63/88] cfg80211: extend range deviation for DMG Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 64/88] svm: Fix AVIC incomplete IPI emulation Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 65/88] KVM: nSVM: clear events pending from svm_complete_interrupts() when exiting to L1 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 66/88] kvm: selftests: Fix region overlap check in kvm_util Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 67/88] KVM: selftests: check returned evmcs version range Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 68/88] mmc: spi: Fix card detection during probe Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 69/88] mmc: tmio_mmc_core: dont claim spurious interrupts Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 70/88] mmc: tmio: fix access width of Block Count Register Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 71/88] mmc: core: Fix NULL ptr crash from mmc_should_fail_request Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 72/88] mmc: cqhci: fix space allocated for transfer descriptor Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 73/88] mmc: cqhci: Fix a tiny potential memory leak on error condition Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 74/88] mmc: sdhci-esdhc-imx: correct the fix of ERR004536 Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 75/88] mm: enforce min addr even if capable() in expand_downwards() Greg Kroah-Hartman
2019-03-04 8:22 ` [PATCH 4.20 76/88] drm: Block fb changes for async plane updates Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 77/88] hugetlbfs: fix races and page leaks during migration Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 78/88] crypto: ccree - add missing inline qualifier Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 79/88] MIPS: fix truncation in __cmpxchg_small for short values Greg Kroah-Hartman
2019-03-04 8:23 ` Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 80/88] MIPS: BCM63XX: provide DMA masks for ethernet devices Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 81/88] MIPS: fix memory setup for platforms with PHYS_OFFSET != 0 Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 82/88] scsi: 3w-sas: fix calls to dma_set_mask_and_coherent() Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 83/88] scsi: csiostor: " Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 84/88] scsi: 3w-9xxx: " Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 85/88] scsi: aic94xx: " Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 86/88] arm64: dts: qcom: msm8998: Extend TZ reserved memory area Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 87/88] MIPS: eBPF: Fix icache flush end address Greg Kroah-Hartman
2019-03-04 8:23 ` [PATCH 4.20 88/88] x86/uaccess: Dont leak the AC flag into __put_user() value evaluation Greg Kroah-Hartman
2019-03-04 20:36 ` [PATCH 4.20 00/88] 4.20.14-stable review Naresh Kamboju
2019-03-05 7:58 ` Greg Kroah-Hartman
2019-03-05 3:39 ` Guenter Roeck
2019-03-05 7:58 ` Greg Kroah-Hartman
2019-03-05 14:07 ` Jon Hunter
2019-03-05 14:07 ` Jon Hunter
2019-03-05 14:55 ` Greg Kroah-Hartman
2019-03-05 16:20 ` shuah
2019-03-05 16:51 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190304081632.345744157@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=axboe@kernel.dk \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
--cc=tj@kernel.org \
--cc=xuejiufei@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.