public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: riel@redhat.com
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@kernel.org, pbonzini@redhat.com,
	fweisbec@redhat.com, wanpeng.li@hotmail.com, efault@gmx.de,
	tglx@linutronix.de, rkrcmar@redhat.com
Subject: [PATCH 4/4] irqtime: drop local_irq_save/restore from irqtime_account_irq
Date: Thu, 30 Jun 2016 15:35:50 -0400	[thread overview]
Message-ID: <1467315350-3152-5-git-send-email-riel@redhat.com> (raw)
In-Reply-To: <1467315350-3152-1-git-send-email-riel@redhat.com>

From: Rik van Riel <riel@redhat.com>

Drop local_irq_save/restore from irqtime_account_irq.
Instead, have softirq and hardirq track their time spent
independently, with the softirq code subtracting hardirq
time that happened during the duration of the softirq run.

The softirq code can be interrupted by hardirq code at
any point in time, but it can check whether it got a
consistent snapshot of the timekeeping variables it wants,
and loop around in the unlikely case that it did not.

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 kernel/sched/cputime.c | 72 +++++++++++++++++++++++++++++++++++++++++---------
 kernel/sched/sched.h   | 38 +++++++++++++++++++++-----
 2 files changed, 90 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a0aefd4c7ea6..b78991fac228 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -26,7 +26,9 @@
 DEFINE_PER_CPU(u64, cpu_hardirq_time);
 DEFINE_PER_CPU(u64, cpu_softirq_time);
 
-static DEFINE_PER_CPU(u64, irq_start_time);
+static DEFINE_PER_CPU(u64, hardirq_start_time);
+static DEFINE_PER_CPU(u64, softirq_start_time);
+static DEFINE_PER_CPU(u64, prev_hardirq_time);
 static int sched_clock_irqtime;
 
 void enable_sched_clock_irqtime(void)
@@ -41,6 +43,7 @@ void disable_sched_clock_irqtime(void)
 
 #ifndef CONFIG_64BIT
 DEFINE_PER_CPU(seqcount_t, irq_time_seq);
+DEFINE_PER_CPU(seqcount_t, softirq_time_seq);
 #endif /* CONFIG_64BIT */
 
 /*
@@ -53,36 +56,79 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  * softirq -> hardirq, hardirq -> softirq
  *
  * When exiting hardirq or softirq time, account the elapsed time.
+ *
+ * When exiting softirq time, subtract the amount of hardirq time that
+ * interrupted this softirq run, to avoid double accounting of that time.
  */
 void irqtime_account_irq(struct task_struct *curr, int irqtype)
 {
-	unsigned long flags;
+	u64 prev_softirq_start;
+	bool leaving_softirq;
+	u64 prev_hardirq;
+	u64 hardirq_time;
 	s64 delta;
 	int cpu;
 
 	if (!sched_clock_irqtime)
 		return;
 
-	local_irq_save(flags);
-
 	cpu = smp_processor_id();
-	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
-	__this_cpu_add(irq_start_time, delta);
 
-	irq_time_write_begin();
+	/*
+	 * Hardirq time accounting is pretty straightforward. If not in
+	 * hardirq context yet (entering hardirq), set the start time.
+	 * If already in hardirq context (leaving), account the elapsed time.
+	 */
+	if (irqtype == HARDIRQ_OFFSET) {
+		bool leaving_hardirq = hardirq_count();
+		delta = sched_clock_cpu(cpu) - __this_cpu_read(hardirq_start_time);
+		__this_cpu_add(hardirq_start_time, delta);
+		if (leaving_hardirq) {
+			hardirq_time_write_begin();
+			__this_cpu_add(cpu_hardirq_time, delta);
+			hardirq_time_write_end();
+		}
+		return;
+	}
+
+	/*
+	 * Softirq context may get interrupted by hardirq context, on the
+	 * same CPU. At softirq entry time the amount of time this CPU spent
+	 * in hardirq context is stored. At softirq exit time, the time spent
+	 * in hardirq context during the softirq is subtracted.
+	 */
+	prev_softirq_start = __this_cpu_read(softirq_start_time);
+	prev_hardirq = __this_cpu_read(prev_hardirq_time);
+	leaving_softirq = in_serving_softirq();
+
+	do {
+		u64 now = sched_clock_cpu(cpu);
+
+		hardirq_time = READ_ONCE(per_cpu(cpu_hardirq_time, cpu));
+		__this_cpu_write(softirq_start_time, now);
+		__this_cpu_write(prev_hardirq_time, hardirq_time);
+
+		if (leaving_softirq) {
+			/*
+			 * Subtract hardirq time that happened during this
+			 * softirq.
+			 */
+			s64 hi_delta = hardirq_time - prev_hardirq;
+			delta = now - prev_softirq_start - hi_delta;
+		}
+		/* Loop around if interrupted by a hardirq. */
+	} while (hardirq_time != READ_ONCE(per_cpu(cpu_hardirq_time, cpu)));
+
 	/*
 	 * We do not account for softirq time from ksoftirqd here.
 	 * We want to continue accounting softirq time to ksoftirqd thread
 	 * in that case, so as not to confuse scheduler with a special task
 	 * that do not consume any time, but still wants to run.
 	 */
-	if (hardirq_count())
-		__this_cpu_add(cpu_hardirq_time, delta);
-	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+	softirq_time_write_begin();
+	if (leaving_softirq && curr != this_cpu_ksoftirqd())
 		__this_cpu_add(cpu_softirq_time, delta);
-
-	irq_time_write_end();
-	local_irq_restore(flags);
+	softirq_time_write_end();
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d23527e..cad4df9835f7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1752,38 +1752,62 @@ DECLARE_PER_CPU(u64, cpu_softirq_time);
 
 #ifndef CONFIG_64BIT
 DECLARE_PER_CPU(seqcount_t, irq_time_seq);
+DECLARE_PER_CPU(seqcount_t, softirq_time_seq);
 
-static inline void irq_time_write_begin(void)
+static inline void hardirq_time_write_begin(void)
 {
 	__this_cpu_inc(irq_time_seq.sequence);
 	smp_wmb();
 }
 
-static inline void irq_time_write_end(void)
+static inline void hardirq_time_write_end(void)
 {
 	smp_wmb();
 	__this_cpu_inc(irq_time_seq.sequence);
 }
 
+static inline void softirq_time_write_begin(void)
+{
+	__this_cpu_inc(softirq_time_seq.sequence);
+	smp_wmb();
+}
+
+static inline void softirq_time_write_end(void)
+{
+	smp_wmb();
+	__this_cpu_inc(softirq_time_seq.sequence);
+}
+
 static inline u64 irq_time_read(int cpu)
 {
 	u64 irq_time;
-	unsigned seq;
+	unsigned hi_seq;
+	unsigned si_seq;
 
 	do {
-		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+		hi_seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+		si_seq = read_seqcount_begin(&per_cpu(softirq_time_seq, cpu));
 		irq_time = per_cpu(cpu_softirq_time, cpu) +
 			   per_cpu(cpu_hardirq_time, cpu);
-	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), hi_seq) ||
+		 read_seqcount_retry(&per_cpu(softirq_time_seq, cpu), si_seq));
 
 	return irq_time;
 }
 #else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
+static inline void hardirq_time_write_begin(void)
+{
+}
+
+static inline void hardirq_time_write_end(void)
+{
+}
+
+static inline void softirq_time_write_begin(void)
 {
 }
 
-static inline void irq_time_write_end(void)
+static inline void softirq_time_write_end(void)
 {
 }
 
-- 
2.7.4

  parent reply	other threads:[~2016-06-30 19:36 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-30 19:35 [PATCH v3 0/4] sched,time: fix irq time accounting with nohz_idle riel
2016-06-30 19:35 ` [PATCH 1/4] sched,time: count actually elapsed irq & softirq time riel
2016-07-05 12:40   ` Frederic Weisbecker
2016-07-05 13:08     ` Rik van Riel
2016-07-05 14:00       ` Frederic Weisbecker
2016-07-05 16:47     ` [PATCH v3 " Rik van Riel
2016-07-06 14:15       ` Frederic Weisbecker
2016-06-30 19:35 ` [PATCH 2/4] nohz,cputime: replace VTIME_GEN irq time code with IRQ_TIME_ACCOUNTING code riel
2016-06-30 19:35 ` [PATCH 3/4] irqtime: add irq type parameter to irqtime_account_irq riel
2016-06-30 19:35 ` riel [this message]
2016-07-08 12:30   ` [PATCH 4/4] irqtime: drop local_irq_save/restore from irqtime_account_irq Frederic Weisbecker
2016-07-08 13:19     ` Rik van Riel
2016-07-08 14:01       ` Frederic Weisbecker
2016-07-08 14:34       ` Paolo Bonzini
2016-07-08 15:56         ` Rik van Riel
2016-07-08 23:58           ` Frederic Weisbecker
2016-07-05 13:02 ` [PATCH v3 0/4] sched,time: fix irq time accounting with nohz_idle Nikolay Borisov
2016-07-05 13:09   ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1467315350-3152-5-git-send-email-riel@redhat.com \
    --to=riel@redhat.com \
    --cc=efault@gmx.de \
    --cc=fweisbec@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rkrcmar@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=wanpeng.li@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox