linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Jan Kara <jack@suse.cz>
Subject: [PATCH 4.4 49/62] writeback: Fix sync livelock due to b_dirty_time processing
Date: Tue,  1 Sep 2020 17:10:32 +0200	[thread overview]
Message-ID: <20200901150923.194885340@linuxfoundation.org> (raw)
In-Reply-To: <20200901150920.697676718@linuxfoundation.org>

From: Jan Kara <jack@suse.cz>

commit f9cae926f35e8230330f28c7b743ad088611a8de upstream.

When we are processing writeback for sync(2), move_expired_inodes()
didn't set any inode expiry value (older_than_this). This can result in
writeback never completing if there's steady stream of inodes added to
b_dirty_time list as writeback rechecks dirty lists after each writeback
round whether there's more work to be done. Fix the problem by using
sync(2) start time is inode expiry value when processing b_dirty_time
list similarly as for ordinarily dirtied inodes. This requires some
refactoring of older_than_this handling which simplifies the code
noticeably as a bonus.

Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
CC: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 fs/fs-writeback.c                |   44 +++++++++++++++------------------------
 include/trace/events/writeback.h |   13 +++++------
 2 files changed, 23 insertions(+), 34 deletions(-)

--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_completion {
 struct wb_writeback_work {
 	long nr_pages;
 	struct super_block *sb;
-	unsigned long *older_than_this;
 	enum writeback_sync_modes sync_mode;
 	unsigned int tagged_writepages:1;
 	unsigned int for_kupdate:1;
@@ -1109,16 +1108,13 @@ static bool inode_dirtied_after(struct i
 #define EXPIRE_DIRTY_ATIME 0x0001
 
 /*
- * Move expired (dirtied before work->older_than_this) dirty inodes from
+ * Move expired (dirtied before dirtied_before) dirty inodes from
  * @delaying_queue to @dispatch_queue.
  */
 static int move_expired_inodes(struct list_head *delaying_queue,
 			       struct list_head *dispatch_queue,
-			       int flags,
-			       struct wb_writeback_work *work)
+			       int flags, unsigned long dirtied_before)
 {
-	unsigned long *older_than_this = NULL;
-	unsigned long expire_time;
 	LIST_HEAD(tmp);
 	struct list_head *pos, *node;
 	struct super_block *sb = NULL;
@@ -1126,16 +1122,9 @@ static int move_expired_inodes(struct li
 	int do_sb_sort = 0;
 	int moved = 0;
 
-	if ((flags & EXPIRE_DIRTY_ATIME) == 0)
-		older_than_this = work->older_than_this;
-	else if (!work->for_sync) {
-		expire_time = jiffies - (dirtytime_expire_interval * HZ);
-		older_than_this = &expire_time;
-	}
 	while (!list_empty(delaying_queue)) {
 		inode = wb_inode(delaying_queue->prev);
-		if (older_than_this &&
-		    inode_dirtied_after(inode, *older_than_this))
+		if (inode_dirtied_after(inode, dirtied_before))
 			break;
 		list_move(&inode->i_io_list, &tmp);
 		moved++;
@@ -1181,18 +1170,22 @@ out:
  *                                           |
  *                                           +--> dequeue for IO
  */
-static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
+		     unsigned long dirtied_before)
 {
 	int moved;
+	unsigned long time_expire_jif = dirtied_before;
 
 	assert_spin_locked(&wb->list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
-	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before);
+	if (!work->for_sync)
+		time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
 	moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
-				     EXPIRE_DIRTY_ATIME, work);
+				     EXPIRE_DIRTY_ATIME, time_expire_jif);
 	if (moved)
 		wb_io_lists_populated(wb);
-	trace_writeback_queue_io(wb, work, moved);
+	trace_writeback_queue_io(wb, work, dirtied_before, moved);
 }
 
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1703,7 +1696,7 @@ static long writeback_inodes_wb(struct b
 	blk_start_plug(&plug);
 	spin_lock(&wb->list_lock);
 	if (list_empty(&wb->b_io))
-		queue_io(wb, &work);
+		queue_io(wb, &work, jiffies);
 	__writeback_inodes_wb(wb, &work);
 	spin_unlock(&wb->list_lock);
 	blk_finish_plug(&plug);
@@ -1723,7 +1716,7 @@ static long writeback_inodes_wb(struct b
  * takes longer than a dirty_writeback_interval interval, then leave a
  * one-second gap.
  *
- * older_than_this takes precedence over nr_to_write.  So we'll only write back
+ * dirtied_before takes precedence over nr_to_write.  So we'll only write back
  * all dirty pages if they are all attached to "old" mappings.
  */
 static long wb_writeback(struct bdi_writeback *wb,
@@ -1731,14 +1724,11 @@ static long wb_writeback(struct bdi_writ
 {
 	unsigned long wb_start = jiffies;
 	long nr_pages = work->nr_pages;
-	unsigned long oldest_jif;
+	unsigned long dirtied_before = jiffies;
 	struct inode *inode;
 	long progress;
 	struct blk_plug plug;
 
-	oldest_jif = jiffies;
-	work->older_than_this = &oldest_jif;
-
 	blk_start_plug(&plug);
 	spin_lock(&wb->list_lock);
 	for (;;) {
@@ -1772,14 +1762,14 @@ static long wb_writeback(struct bdi_writ
 		 * safe.
 		 */
 		if (work->for_kupdate) {
-			oldest_jif = jiffies -
+			dirtied_before = jiffies -
 				msecs_to_jiffies(dirty_expire_interval * 10);
 		} else if (work->for_background)
-			oldest_jif = jiffies;
+			dirtied_before = jiffies;
 
 		trace_writeback_start(wb, work);
 		if (list_empty(&wb->b_io))
-			queue_io(wb, work);
+			queue_io(wb, work, dirtied_before);
 		if (work->sb)
 			progress = writeback_sb_inodes(work->sb, wb, work);
 		else
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -390,8 +390,9 @@ DEFINE_WBC_EVENT(wbc_writepage);
 TRACE_EVENT(writeback_queue_io,
 	TP_PROTO(struct bdi_writeback *wb,
 		 struct wb_writeback_work *work,
+		 unsigned long dirtied_before,
 		 int moved),
-	TP_ARGS(wb, work, moved),
+	TP_ARGS(wb, work, dirtied_before, moved),
 	TP_STRUCT__entry(
 		__array(char,		name, 32)
 		__field(unsigned long,	older)
@@ -401,19 +402,17 @@ TRACE_EVENT(writeback_queue_io,
 		__dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
 	),
 	TP_fast_assign(
-		unsigned long *older_than_this = work->older_than_this;
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
-		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+		__entry->older	= dirtied_before;
+		__entry->age	= (jiffies - dirtied_before) * 1000 / HZ;
 		__entry->moved	= moved;
 		__entry->reason	= work->reason;
 		__trace_wb_assign_cgroup(__get_str(cgroup), wb);
 	),
 	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
 		__entry->name,
-		__entry->older,	/* older_than_this in jiffies */
-		__entry->age,	/* older_than_this in relative milliseconds */
+		__entry->older,	/* dirtied_before in jiffies */
+		__entry->age,	/* dirtied_before in relative milliseconds */
 		__entry->moved,
 		__print_symbolic(__entry->reason, WB_WORK_REASON),
 		__get_str(cgroup)



  parent reply	other threads:[~2020-09-01 15:13 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-01 15:09 [PATCH 4.4 00/62] 4.4.235-rc1 review Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 01/62] net: Fix potential wrong skb->protocol in skb_vlan_untag() Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 02/62] tipc: fix uninit skb->data in tipc_nl_compat_dumpit() Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 03/62] ipvlan: fix device features Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 04/62] bonding: show saner speed for broadcast mode Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 05/62] bonding: fix a potential double-unregister Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 06/62] powerpc/pseries: Do not initiate shutdown when system is running on UPS Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 07/62] ALSA: pci: delete repeated words in comments Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 08/62] ASoC: tegra: Fix reference count leaks Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 09/62] media: pci: ttpci: av7110: fix possible buffer overflow caused by bad DMA value in debiirq() Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 10/62] scsi: target: tcmu: Fix crash on ARM during cmd completion Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 11/62] drm/amdkfd: Fix reference count leaks Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 12/62] drm/radeon: fix multiple reference count leak Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 13/62] drm/amdgpu: fix ref count leak in amdgpu_driver_open_kms Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 14/62] drm/amd/display: fix ref count leak in amdgpu_drm_ioctl Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 15/62] drm/amdgpu: fix ref count leak in amdgpu_display_crtc_set_config Greg Kroah-Hartman
2020-09-01 15:09 ` [PATCH 4.4 16/62] drm/amdgpu/display: fix ref count leak when pm_runtime_get_sync fails Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 17/62] scsi: lpfc: Fix shost refcount mismatch when deleting vport Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 18/62] selftests/powerpc: Purge extra count_pmc() calls of ebb selftests Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 19/62] PCI: Fix pci_create_slot() reference count leak Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 20/62] rtlwifi: rtl8192cu: Prevent leaking urb Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 21/62] mips/vdso: Fix resource leaks in genvdso.c Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 22/62] drm/nouveau/drm/noveau: fix reference count leak in nouveau_fbcon_open Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 23/62] drm/nouveau: Fix reference count leak in nouveau_connector_detect Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 24/62] locking/lockdep: Fix overflow in presentation of average lock-time Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 25/62] scsi: iscsi: Do not put host in iscsi_set_flashnode_param() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 26/62] ceph: fix potential mdsc use-after-free crash Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 27/62] scsi: fcoe: Memory leak fix in fcoe_sysfs_fcf_del() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 28/62] EDAC/ie31200: Fallback if host bridge device is already initialized Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 29/62] media: davinci: vpif_capture: fix potential double free Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 30/62] powerpc/spufs: add CONFIG_COREDUMP dependency Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 31/62] USB: sisusbvga: Fix a potential UB casued by left shifting a negative value Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 32/62] Revert "ath10k: fix DMA related firmware crashes on multiple devices" Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 33/62] i2c: rcar: in slave mode, clear NACK earlier Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 34/62] jbd2: make sure jh have b_transaction set in refile/unfile_buffer Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 35/62] jbd2: abort journal if free a async write error metadata buffer Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 36/62] s390/cio: add cond_resched() in the slow_eval_known_fn() loop Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 37/62] scsi: ufs: Fix possible infinite loop in ufshcd_hold Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 38/62] net: gianfar: Add of_node_put() before goto statement Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 39/62] powerpc/perf: Fix soft lockups due to missed interrupt accounting Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 40/62] fbcon: prevent user font height or width change from causing potential out-of-bounds access Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 41/62] USB: lvtest: return proper error code in probe Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 42/62] vt: defer kfree() of vc_screenbuf in vc_do_resize() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 43/62] vt_ioctl: change VT_RESIZEX ioctl to check for error return from vc_resize() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 44/62] serial: samsung: Removes the IRQ not found warning Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 45/62] serial: pl011: Dont leak amba_ports entry on driver register error Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 46/62] serial: 8250: change lock order in serial8250_do_startup() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 47/62] writeback: Protect inode->i_io_list with inode->i_lock Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 48/62] writeback: Avoid skipping inode writeback Greg Kroah-Hartman
2020-09-01 15:10 ` Greg Kroah-Hartman [this message]
2020-09-01 15:10 ` [PATCH 4.4 50/62] XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information Greg Kroah-Hartman
2020-09-29 13:13   ` Stefan Bader
2020-09-29 14:05     ` Jürgen Groß
2020-09-29 14:21       ` Stefan Bader
2020-09-30  8:52       ` Stefan Bader
2020-09-30  9:01         ` Jürgen Groß
2020-09-01 15:10 ` [PATCH 4.4 51/62] xhci: Do warm-reset when both CAS and XDEV_RESUME are set Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 52/62] PM: sleep: core: Fix the handling of pending runtime resume requests Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 53/62] device property: Fix the secondary firmware node handling in set_primary_fwnode() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 54/62] USB: yurex: Fix bad gfp argument Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 55/62] usb: uas: Add quirk for PNY Pro Elite Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 56/62] USB: quirks: Add no-lpm quirk for another Raydium touchscreen Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 57/62] USB: Ignore UAS for JMicron JMS567 ATA/ATAPI Bridge Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 58/62] usb: host: ohci-exynos: Fix error handling in exynos_ohci_probe() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 59/62] usb: storage: Add unusual_uas entry for Sony PSZ drives Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 60/62] btrfs: check the right error variable in btrfs_del_dir_entries_in_log Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 61/62] HID: hiddev: Fix slab-out-of-bounds write in hiddev_ioctl_usage() Greg Kroah-Hartman
2020-09-01 15:10 ` [PATCH 4.4 62/62] ALSA: usb-audio: Update documentation comment for MS2109 quirk Greg Kroah-Hartman
2020-09-01 18:58 ` [PATCH 4.4 00/62] 4.4.235-rc1 review Guenter Roeck
2020-09-02  7:37   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200901150923.194885340@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).