public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <dima@arista.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Hannes Frederic Sowa <hannes@stressinduktion.org>,
	Ingo Molnar <mingo@kernel.org>,
	"Levin, Alexander (Sasha Levin)" <alexander.levin@verizon.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Mauro Carvalho Chehab <mchehab@s-opensource.com>,
	Mike Galbraith <efault@gmx.de>, Paolo Abeni <pabeni@redhat.com>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Radu Rendec <rrendec@arista.com>, Rik van Riel <riel@redhat.com>,
	Stanislaw Gruszka <sgruszka@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Wanpeng Li <wanpeng.li@hotmail.com>
Subject: [RFC 6/6] softirq/sched: Account si cpu time to ksoftirqd(s)
Date: Thu, 18 Jan 2018 16:12:38 +0000	[thread overview]
Message-ID: <20180118161238.13792-7-dima@arista.com> (raw)
In-Reply-To: <20180118161238.13792-1-dima@arista.com>

Warning: non-merge-ready in any sense

Under CONFIG_FAIR_SOFTIRQ_SCHEDULE each sched tick will account cpu time
spent on processing softirqs to ksoftirqd of the softirq's group.
Update then ksoftirqd->se.sum_exec_runtime and recalculate
ksoftirqd->se.vruntime.

Use CFS's vrutime to decide if softirq needs to be served or deferred.
It's possible to tune this with ksoftirqd nice policy.

Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 include/linux/interrupt.h |  1 +
 kernel/sched/fair.c       | 38 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h      | 19 +++++++++++++++++++
 kernel/softirq.c          | 45 +++++++++++++++++++++++++++++++++++++--------
 4 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 17e1a04445fa..a0b5c24c088a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -512,6 +512,7 @@ extern struct task_struct *__percpu **ksoftirqd;
 extern unsigned nr_softirq_groups;
 
 extern bool servicing_softirq(unsigned nr);
+extern unsigned group_softirqs(unsigned nr);
 static inline bool current_is_ksoftirqd(void)
 {
 	unsigned i;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2fe3aa853e4d..d0105739551f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -813,6 +813,42 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 }
 #endif /* CONFIG_SMP */
 
+static void update_ksoftirqd(struct cfs_rq *cfs_rq)
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+	int rq_cpu = cpu_of(rq_of(cfs_rq));
+	u64 si_times[NR_SOFTIRQS], delta[NR_SOFTIRQS];
+	unsigned i;
+
+	if (unlikely(!ksoftirqd))
+		return;
+
+	softirq_time_read(rq_cpu, si_times);
+
+	for (i = 0; i < NR_SOFTIRQS; i++) {
+		delta[i] = si_times[i] - cfs_rq->prev_si_time[i];
+		cfs_rq->prev_si_time[i] = si_times[i];
+		if (unlikely((s64)delta[i] < 0))
+			delta[i] = 0;
+	}
+
+	for (i = 0; i < nr_softirq_groups; i++) {
+		unsigned j, softirq = 0, group_mask = group_softirqs(i);
+		struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
+		u64 sum_delta = 0;
+
+		while ((j = ffs(group_mask))) {
+			softirq += j - 1;
+			group_mask >>= j;
+			sum_delta += delta[softirq];
+		}
+
+		tsk->se.sum_exec_runtime += sum_delta;
+		tsk->se.vruntime += calc_delta_fair(sum_delta, &tsk->se);
+	}
+#endif
+}
+
 /*
  * Update the current task's runtime statistics.
  */
@@ -822,6 +858,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	u64 now = rq_clock_task(rq_of(cfs_rq));
 	u64 delta_exec;
 
+	update_ksoftirqd(cfs_rq);
+
 	if (unlikely(!curr))
 		return;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14e154c86dc5..e95d8d4f9146 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -487,6 +487,10 @@ struct cfs_rq {
 	struct list_head leaf_cfs_rq_list;
 	struct task_group *tg;	/* group that "owns" this runqueue */
 
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+	u64 prev_si_time[NR_SOFTIRQS];
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 	int runtime_enabled;
 	u64 runtime_expires;
@@ -2081,6 +2085,21 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
+static inline void softirq_time_read(int cpu, u64 si_times[NR_SOFTIRQS])
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+	unsigned int seq, i;
+
+	for (i = 0; i < NR_SOFTIRQS; i++) {
+		do {
+			seq = __u64_stats_fetch_begin(&irqtime->sync);
+			si_times[i] = irqtime->total_si[i];
+		} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+	}
+#endif
+}
+
 #ifdef CONFIG_CPU_FREQ
 DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 516e31d3d5b4..a123bafa11c2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -82,6 +82,11 @@ bool servicing_softirq(unsigned nr)
 	return false;
 }
 
+unsigned group_softirqs(unsigned nr)
+{
+	return group_to_softirqs[nr];
+}
+
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
  * but we also don't want to introduce a worst case 1/HZ latency
@@ -112,15 +117,10 @@ static void wakeup_softirqd(u32 softirq_mask)
  * If ksoftirqd is scheduled, we do not want to process pending softirqs
  * right now. Let ksoftirqd handle this at its own rate, to get fairness.
  */
-static bool ksoftirqd_running(void)
+static bool ksoftirqd_running(__u32 pending)
 {
-	/* We rely that there are pending softirqs */
-	__u32 pending = local_softirq_pending();
 	unsigned i;
 
-	if (!ksoftirqd)
-		return false;
-
 	for (i = 0; i < nr_softirq_groups && pending; i++) {
 		/* Interrupts are disabled: no need to stop preemption */
 		struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
@@ -137,6 +137,33 @@ static bool ksoftirqd_running(void)
 	return !pending;
 }
 
+static __u32 softirqs_to_serve(__u32 pending)
+{
+	unsigned i;
+	__u32 unserve = pending;
+
+	if (!ksoftirqd || !current || is_idle_task(current))
+		return pending;
+
+	if (!IS_ENABLED(CONFIG_FAIR_SOFTIRQ_SCHEDULE))
+		return ksoftirqd_running(pending) ? 0 : pending;
+
+	for (i = 0; i < nr_softirq_groups && unserve; i++) {
+		/* Interrupts are disabled: no need to stop preemption */
+		struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
+
+		if (tsk && (s64)(current->se.vruntime - tsk->se.vruntime) < 0) {
+			if (tsk->state != TASK_RUNNING)
+				wake_up_process(tsk);
+			continue;
+		}
+
+		unserve &= ~group_to_softirqs[i];
+	}
+
+	return pending & ~unserve;
+}
+
 /*
  * preempt_count and SOFTIRQ_OFFSET usage:
  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
@@ -385,7 +412,8 @@ asmlinkage __visible void do_softirq(void)
 
 	local_irq_save(flags);
 
-	if (!ksoftirqd_running())
+	pending = softirqs_to_serve(pending);
+	if (pending)
 		do_softirq_own_stack(pending);
 
 	local_irq_restore(flags);
@@ -414,7 +442,8 @@ static inline void invoke_softirq(void)
 {
 	__u32 pending = local_softirq_pending();
 
-	if (!pending || !ksoftirqd_running())
+	pending = softirqs_to_serve(pending);
+	if (!pending)
 		return;
 
 	if (!force_irqthreads) {
-- 
2.13.6

      parent reply	other threads:[~2018-01-18 16:13 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-18 16:12 [RFC 0/6] Multi-thread per-cpu ksoftirqd Dmitry Safonov
2018-01-18 16:12 ` [RFC 1/6] softirq: Add softirq_groups boot parameter Dmitry Safonov
2018-01-18 16:12 ` [RFC 2/6] softirq: Introduce mask for __do_softirq() Dmitry Safonov
2018-01-18 16:12 ` [RFC 3/6] softirq: Add reverse group-to-softirq map Dmitry Safonov
2018-01-18 16:12 ` [RFC 4/6] softirq: Run per-group per-cpu ksoftirqd thread Dmitry Safonov
2018-01-18 17:00   ` Mike Galbraith
2018-01-18 17:53     ` Dmitry Safonov
2018-01-18 18:28       ` Mike Galbraith
2018-01-18 16:12 ` [RFC 5/6] softirq: Add time accounting per-softirq type Dmitry Safonov
2018-01-18 16:12 ` Dmitry Safonov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180118161238.13792-7-dima@arista.com \
    --to=dima@arista.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.levin@verizon.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=efault@gmx.de \
    --cc=fweisbec@gmail.com \
    --cc=hannes@stressinduktion.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@s-opensource.com \
    --cc=mingo@kernel.org \
    --cc=pabeni@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=rrendec@arista.com \
    --cc=sgruszka@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=wanpeng.li@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox