stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Josh Snyder <joshs@netflix.com>,
	Tejun Heo <tj@kernel.org>, Balbir Singh <bsingharora@gmail.com>,
	Brendan Gregg <bgregg@netflix.com>, Jens Axboe <axboe@kernel.dk>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	linux-block@vger.kernel.org, Ingo Molnar <mingo@kernel.org>
Subject: [PATCH 4.14 28/89] delayacct: Account blkio completion on the correct task
Date: Mon, 22 Jan 2018 09:45:08 +0100	[thread overview]
Message-ID: <20180122083957.481864595@linuxfoundation.org> (raw)
In-Reply-To: <20180122083954.683903493@linuxfoundation.org>

4.14-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Josh Snyder <joshs@netflix.com>

commit c96f5471ce7d2aefd0dda560cc23f08ab00bc65d upstream.

Before commit:

  e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler")

delayacct_blkio_end() was called after context-switching into the task which
completed I/O.

This resulted in double counting: the task would account a delay both waiting
for I/O and for time spent in the runqueue.

With e33a9bba85a8, delayacct_blkio_end() is called by try_to_wake_up().
In ttwu, we have not yet context-switched. This is more correct, in that
the delay accounting ends when the I/O is complete.

But delayacct_blkio_end() relies on 'get_current()', and we have not yet
context-switched into the task whose I/O completed. This results in the
wrong task having its delay accounting statistics updated.

Instead of doing that, pass the task_struct being woken to delayacct_blkio_end(),
so that it can update the statistics of the correct task.

Signed-off-by: Josh Snyder <joshs@netflix.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Cc: Brendan Gregg <bgregg@netflix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-block@vger.kernel.org
Fixes: e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler")
Link: http://lkml.kernel.org/r/1513613712-571-1-git-send-email-joshs@netflix.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/delayacct.h |    8 ++++----
 kernel/delayacct.c        |   42 ++++++++++++++++++++++++++----------------
 kernel/sched/core.c       |    6 +++---
 3 files changed, 33 insertions(+), 23 deletions(-)

--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -71,7 +71,7 @@ extern void delayacct_init(void);
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
-extern void __delayacct_blkio_end(void);
+extern void __delayacct_blkio_end(struct task_struct *);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
@@ -122,10 +122,10 @@ static inline void delayacct_blkio_start
 		__delayacct_blkio_start();
 }
 
-static inline void delayacct_blkio_end(void)
+static inline void delayacct_blkio_end(struct task_struct *p)
 {
 	if (current->delays)
-		__delayacct_blkio_end();
+		__delayacct_blkio_end(p);
 	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
 
@@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(st
 {}
 static inline void delayacct_blkio_start(void)
 {}
-static inline void delayacct_blkio_end(void)
+static inline void delayacct_blkio_end(struct task_struct *p)
 {}
 static inline int delayacct_add_tsk(struct taskstats *d,
 					struct task_struct *tsk)
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_st
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumalator (@total) and @count
  */
-static void delayacct_end(u64 *start, u64 *total, u32 *count)
+static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)
 {
 	s64 ns = ktime_get_ns() - *start;
 	unsigned long flags;
 
 	if (ns > 0) {
-		spin_lock_irqsave(&current->delays->lock, flags);
+		spin_lock_irqsave(lock, flags);
 		*total += ns;
 		(*count)++;
-		spin_unlock_irqrestore(&current->delays->lock, flags);
+		spin_unlock_irqrestore(lock, flags);
 	}
 }
 
@@ -69,17 +69,25 @@ void __delayacct_blkio_start(void)
 	current->delays->blkio_start = ktime_get_ns();
 }
 
-void __delayacct_blkio_end(void)
+/*
+ * We cannot rely on the `current` macro, as we haven't yet switched back to
+ * the process being woken.
+ */
+void __delayacct_blkio_end(struct task_struct *p)
 {
-	if (current->delays->flags & DELAYACCT_PF_SWAPIN)
-		/* Swapin block I/O */
-		delayacct_end(&current->delays->blkio_start,
-			&current->delays->swapin_delay,
-			&current->delays->swapin_count);
-	else	/* Other block I/O */
-		delayacct_end(&current->delays->blkio_start,
-			&current->delays->blkio_delay,
-			&current->delays->blkio_count);
+	struct task_delay_info *delays = p->delays;
+	u64 *total;
+	u32 *count;
+
+	if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
+		total = &delays->swapin_delay;
+		count = &delays->swapin_count;
+	} else {
+		total = &delays->blkio_delay;
+		count = &delays->blkio_count;
+	}
+
+	delayacct_end(&delays->lock, &delays->blkio_start, total, count);
 }
 
 int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -153,8 +161,10 @@ void __delayacct_freepages_start(void)
 
 void __delayacct_freepages_end(void)
 {
-	delayacct_end(&current->delays->freepages_start,
-			&current->delays->freepages_delay,
-			&current->delays->freepages_count);
+	delayacct_end(
+		&current->delays->lock,
+		&current->delays->freepages_start,
+		&current->delays->freepages_delay,
+		&current->delays->freepages_count);
 }
 
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2046,7 +2046,7 @@ try_to_wake_up(struct task_struct *p, un
 	p->state = TASK_WAKING;
 
 	if (p->in_iowait) {
-		delayacct_blkio_end();
+		delayacct_blkio_end(p);
 		atomic_dec(&task_rq(p)->nr_iowait);
 	}
 
@@ -2059,7 +2059,7 @@ try_to_wake_up(struct task_struct *p, un
 #else /* CONFIG_SMP */
 
 	if (p->in_iowait) {
-		delayacct_blkio_end();
+		delayacct_blkio_end(p);
 		atomic_dec(&task_rq(p)->nr_iowait);
 	}
 
@@ -2112,7 +2112,7 @@ static void try_to_wake_up_local(struct
 
 	if (!task_on_rq_queued(p)) {
 		if (p->in_iowait) {
-			delayacct_blkio_end();
+			delayacct_blkio_end(p);
 			atomic_dec(&rq->nr_iowait);
 		}
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);

  parent reply	other threads:[~2018-01-22  8:52 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-22  8:44 [PATCH 4.14 00/89] 4.14.15-stable review Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 02/89] drm/nouveau/disp/gf119: add missing drive vfunc ptr Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 03/89] objtool: Fix seg fault with clang-compiled objects Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 04/89] objtool: Fix Clang enum conversion warning Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 05/89] objtool: Fix seg fault caused by missing parameter Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 06/89] powerpc/pseries: Add H_GET_CPU_CHARACTERISTICS flags & wrapper Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 07/89] powerpc/64: Add macros for annotating the destination of rfid/hrfid Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 08/89] powerpc/64s: Simple RFI macro conversions Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 09/89] powerpc/64: Convert the syscall exit path to use RFI_TO_USER/KERNEL Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 10/89] powerpc/64: Convert fast_exception_return " Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 11/89] powerpc/64s: Convert slb_miss_common " Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 12/89] powerpc/64s: Add support for RFI flush of L1-D cache Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 13/89] powerpc/64s: Support disabling RFI flush with no_rfi_flush and nopti Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 14/89] powerpc/pseries: Query hypervisor for RFI flush settings Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 15/89] powerpc/powernv: Check device-tree " Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 16/89] futex: Avoid violating the 10th rule of futex Greg Kroah-Hartman
2018-01-22  9:48   ` Geert Uytterhoeven
2018-01-22  9:53     ` Greg Kroah-Hartman
2018-01-22 10:04       ` Geert Uytterhoeven
2018-01-22  8:44 ` [PATCH 4.14 17/89] futex: Prevent overflow by strengthen input validation Greg Kroah-Hartman
2018-01-25 13:45   ` Jiri Slaby
2018-01-25 14:03     ` Thomas Gleixner
2018-01-25 14:06       ` Jiri Slaby
2018-01-25 14:30         ` Thomas Gleixner
2018-01-25 14:47           ` Jiri Slaby
2018-01-25 15:12             ` Greg Kroah-Hartman
2018-01-25 15:21               ` Jiri Slaby
2018-01-25 15:30                 ` Peter Zijlstra
2018-01-25 21:42                 ` Darren Hart
2018-01-22  8:44 ` [PATCH 4.14 18/89] ALSA: seq: Make ioctls race-free Greg Kroah-Hartman
2018-01-22  8:44 ` [PATCH 4.14 19/89] ALSA: pcm: Remove yet superfluous WARN_ON() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 20/89] ALSA: hda - Apply headphone noise quirk for another Dell XPS 13 variant Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 21/89] ALSA: hda - Apply the existing quirk to iMac 14,1 Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 22/89] IB/hfi1: Prevent a NULL dereference Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 23/89] RDMA/mlx5: Fix out-of-bound access while querying AH Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 24/89] timers: Unconditionally check deferrable base Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 25/89] af_key: fix buffer overread in verify_address_len() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 26/89] af_key: fix buffer overread in parse_exthdrs() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 27/89] iser-target: Fix possible use-after-free in connection establishment error Greg Kroah-Hartman
2018-01-22  8:45 ` Greg Kroah-Hartman [this message]
2018-01-22  8:45 ` [PATCH 4.14 29/89] objtool: Fix seg fault with gold linker Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 31/89] x86/kasan: Panic if there is not enough memory to boot Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 32/89] x86/retpoline: Fill RSB on context switch for affected CPUs Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 33/89] x86/retpoline: Add LFENCE to the retpoline/RSB filling RSB macros Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 34/89] objtool: Improve error message for bad file argument Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 36/89] module: Add retpoline tag to VERMAGIC Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 37/89] x86/intel_rdt/cqm: Prevent use after free Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 38/89] x86/mm/pkeys: Fix fill_sig_info_pkey Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 39/89] x86/idt: Mark IDT tables __initconst Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 40/89] x86/tsc: Future-proof native_calibrate_tsc() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 41/89] x86/tsc: Fix erroneous TSC rate on Skylake Xeon Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 42/89] pipe: avoid round_pipe_size() nr_pages overflow on 32-bit Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 43/89] x86/apic/vector: Fix off by one in error path Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 44/89] x86/mm: Clean up register saving in the __enc_copy() assembly code Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 45/89] x86/mm: Use a struct to reduce parameters for SME PGD mapping Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 46/89] x86/mm: Centralize PMD flags in sme_encrypt_kernel() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 47/89] x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 48/89] ARM: OMAP3: hwmod_data: add missing module_offs for MMC3 Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 49/89] x86/mm: Encrypt the initrd earlier for BSP microcode update Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 50/89] Input: ALPS - fix multi-touch decoding on SS4 plus touchpads Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 51/89] Input: synaptics-rmi4 - prevent UAF reported by KASAN Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 52/89] Input: 88pm860x-ts - fix child-node lookup Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 53/89] Input: twl6040-vibra " Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 54/89] Input: twl4030-vibra - fix sibling-node lookup Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 55/89] tracing: Fix converting enums from the map in trace_event_eval_update() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 56/89] phy: work around phys references to usb-nop-xceiv devices Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 57/89] ARM64: dts: marvell: armada-cp110: Fix clock resources for various node Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 58/89] ARM: sunxi_defconfig: Enable CMA Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 59/89] ARM: dts: kirkwood: fix pin-muxing of MPP7 on OpenBlocks A7 Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 60/89] can: peak: fix potential bug in packet fragmentation Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 61/89] can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 62/89] can: af_can: canfd_rcv(): " Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 63/89] i2c: core-smbus: prevent stack corruption on read I2C_BLOCK_DATA Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 64/89] scripts/gdb/linux/tasks.py: fix get_thread_info Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 65/89] proc: fix coredump vs read /proc/*/stat race Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 66/89] libata: apply MAX_SEC_1024 to all LITEON EP1 series devices Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 67/89] scsi: libsas: Disable asynchronous aborts for SATA devices Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 68/89] workqueue: avoid hard lockups in show_workqueue_state() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 69/89] drm/vmwgfx: fix memory corruption with legacy/sou connectors Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 70/89] dm btree: fix serious bug in btree_split_beneath() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 71/89] dm thin metadata: THIN_MAX_CONCURRENT_LOCKS should be 6 Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 72/89] dm integrity: dont store cipher request on the stack Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 73/89] dm crypt: fix crash by adding missing check for auth key size Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 74/89] dm crypt: wipe kernel key copy after IV initialization Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 75/89] dm crypt: fix error return code in crypt_ctr() Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 76/89] x86: Use __nostackprotect for sme_encrypt_kernel Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 77/89] alpha/PCI: Fix noname IRQ level detection Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 78/89] MIPS: CM: Drop WARN_ON(vp != 0) Greg Kroah-Hartman
2018-01-22  8:45 ` [PATCH 4.14 79/89] KVM: arm/arm64: Check pagesize when allocating a hugepage at Stage 2 Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 80/89] arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 81/89] x86/mce: Make machine check speculation protected Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 82/89] retpoline: Introduce start/end markers of indirect thunk Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 83/89] kprobes/x86: Blacklist indirect thunk functions for kprobes Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 84/89] kprobes/x86: Disable optimizing on the function jumps to indirect thunk Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 85/89] x86/pti: Document fix wrong index Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 86/89] x86/retpoline: Optimize inline assembler for vmexit_fill_RSB Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 87/89] x86/mm: Rework wbinvd, hlt operation in stop_this_cpu() Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 88/89] mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte() Greg Kroah-Hartman
2018-01-22  8:46 ` [PATCH 4.14 89/89] net: mvpp2: do not disable GMAC padding Greg Kroah-Hartman
2018-01-22 19:10 ` [PATCH 4.14 00/89] 4.14.15-stable review Guenter Roeck
2018-01-23  6:36   ` Greg Kroah-Hartman
2018-01-22 20:39 ` Naresh Kamboju
2018-01-22 21:00 ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180122083957.481864595@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=axboe@kernel.dk \
    --cc=bgregg@netflix.com \
    --cc=bsingharora@gmail.com \
    --cc=joshs@netflix.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).