From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>,
Levin Alexander <alexander.levin@verizon.com>,
Peter Zijlstra <peterz@infradead.org>,
Mauro Carvalho Chehab <mchehab@s-opensource.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Hannes Frederic Sowa <hannes@stressinduktion.org>,
"Paul E . McKenney" <paulmck@linux.vnet.ibm.com>,
Wanpeng Li <wanpeng.li@hotmail.com>,
Dmitry Safonov <dima@arista.com>,
Thomas Gleixner <tglx@linutronix.de>,
Andrew Morton <akpm@linux-foundation.org>,
Paolo Abeni <pabeni@redhat.com>, Radu Rendec <rrendec@arista.com>,
Ingo Molnar <mingo@kernel.org>,
Stanislaw Gruszka <sgruszka@redhat.com>,
Rik van Riel <riel@redhat.com>,
Eric Dumazet <edumazet@google.com>,
David Miller <davem@davemloft.net>
Subject: [RFC PATCH 4/4] softirq: Replace ksoftirqd with workqueues entirely
Date: Fri, 19 Jan 2018 16:46:14 +0100 [thread overview]
Message-ID: <1516376774-24076-5-git-send-email-frederic@kernel.org> (raw)
In-Reply-To: <1516376774-24076-1-git-send-email-frederic@kernel.org>
Ksoftirqd only remains to implement threaded IRQs. Convert it to
existing per-vector workqueues to avoid code duplication.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Dmitry Safonov <dima@arista.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Levin Alexander <alexander.levin@verizon.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Radu Rendec <rrendec@arista.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
Documentation/RCU/stallwarn.txt | 4 +-
include/linux/interrupt.h | 7 ----
kernel/sched/cputime.c | 13 +++---
kernel/sched/sched.h | 4 +-
kernel/softirq.c | 87 +++++++++--------------------------------
net/ipv4/tcp_output.c | 4 +-
6 files changed, 31 insertions(+), 88 deletions(-)
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index a08f928..ea3a8de 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -17,8 +17,8 @@ o A CPU looping in an RCU read-side critical section.
o A CPU looping with interrupts disabled.
o A CPU looping with preemption disabled. This condition can
- result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
- stalls.
+ result in RCU-sched stalls and, if softirq workqueue is in use,
+ RCU-bh stalls.
o A CPU looping with bottom halves disabled. This condition can
result in RCU-sched and RCU-bh stalls.
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 92d044d..680f620 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -507,13 +507,6 @@ extern void __raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
-DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
-
-static inline struct task_struct *this_cpu_ksoftirqd(void)
-{
- return this_cpu_read(ksoftirqd);
-}
-
extern int softirq_serving_workqueue(void);
/* Tasklets --- multithreaded analogue of BHs.
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 30f70e5..c5b8dbd 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -64,15 +64,14 @@ void irqtime_account_irq(struct task_struct *curr)
irqtime->irq_start_time += delta;
/*
- * We do not account for softirq time from ksoftirqd here.
- * We want to continue accounting softirq time to ksoftirqd thread
+ * We do not account for softirq time from workqueue here.
+ * We want to continue accounting softirq time to workqueue thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
if (hardirq_count())
irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd() &&
- !softirq_serving_workqueue())
+ else if (in_serving_softirq() && !softirq_serving_workqueue())
irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
@@ -376,11 +375,11 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
cputime -= other;
- if (this_cpu_ksoftirqd() == p || softirq_serving_workqueue()) {
+ if (softirq_serving_workqueue()) {
/*
- * ksoftirqd time do not get accounted in cpu_softirq_time.
+ * Softirq wq time do not get accounted in cpu_softirq_time.
* So, we have to handle it separately here.
- * Also, p->stime needs to be updated for ksoftirqd.
+ * Also, p->stime needs to be updated for workqueue.
*/
account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
} else if (user_tick) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b19552a2..5d481f1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2061,8 +2061,8 @@ struct irqtime {
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
/*
- * Returns the irqtime minus the softirq time computed by ksoftirqd.
- * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
+ * Returns the irqtime minus the softirq time computed by workqueue.
+ * Otherwise workqueue's sum_exec_runtime is substracted its own runtime
* and never move forward.
*/
static inline u64 irq_time_read(int cpu)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bb0cffa..cf43a8d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -55,8 +55,6 @@ EXPORT_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
-
const char * const softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
"TASKLET", "SCHED", "HRTIMER", "RCU"
@@ -76,32 +74,6 @@ struct softirq {
static DEFINE_PER_CPU(struct softirq, softirq_cpu);
/*
- * we cannot loop indefinitely here to avoid userspace starvation,
- * but we also don't want to introduce a worst case 1/HZ latency
- * to the pending events, so lets the scheduler to balance
- * the softirq load for us.
- */
-static void wakeup_softirqd(void)
-{
- /* Interrupts are disabled: no need to stop preemption */
- struct task_struct *tsk = __this_cpu_read(ksoftirqd);
-
- if (tsk && tsk->state != TASK_RUNNING)
- wake_up_process(tsk);
-}
-
-/*
- * If ksoftirqd is scheduled, we do not want to process pending softirqs
- * right now. Let ksoftirqd handle this at its own rate, to get fairness.
- */
-static bool ksoftirqd_running(void)
-{
- struct task_struct *tsk = __this_cpu_read(ksoftirqd);
-
- return tsk && (tsk->state == TASK_RUNNING);
-}
-
-/*
* preempt_count and SOFTIRQ_OFFSET usage:
* - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
* softirq processing.
@@ -388,7 +360,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
asmlinkage __visible void do_softirq(void)
{
- __u32 pending;
+ __u32 pending, pending_work;
unsigned long flags;
if (in_interrupt())
@@ -397,8 +369,9 @@ asmlinkage __visible void do_softirq(void)
local_irq_save(flags);
pending = local_softirq_pending();
+ pending_work = __this_cpu_read(softirq_cpu.pending_work_mask);
- if (pending && !ksoftirqd_running())
+ if (pending & ~pending_work)
do_softirq_own_stack();
local_irq_restore(flags);
@@ -412,7 +385,7 @@ void irq_enter(void)
rcu_irq_enter();
if (is_idle_task(current) && !in_interrupt()) {
/*
- * Prevent raise_softirq from needlessly waking up ksoftirqd
+ * Prevent raise_softirq from needlessly waking up workqueue
* here, as softirq will be serviced on return from interrupt.
*/
local_bh_disable();
@@ -425,7 +398,15 @@ void irq_enter(void)
static inline void invoke_softirq(void)
{
- if (ksoftirqd_running())
+ unsigned int pending_work, pending = local_softirq_pending();
+
+ if (!pending)
+ return;
+
+ pending_work = __this_cpu_read(softirq_cpu.pending_work_mask);
+ pending &= ~pending_work;
+
+ if (!pending)
return;
if (!force_irqthreads) {
@@ -445,7 +426,7 @@ static inline void invoke_softirq(void)
do_softirq_own_stack();
#endif
} else {
- wakeup_softirqd();
+ do_softirq_workqueue(pending);
}
}
@@ -474,7 +455,7 @@ void irq_exit(void)
#endif
account_irq_exit_time(current);
preempt_count_sub(HARDIRQ_OFFSET);
- if (!in_interrupt() && local_softirq_pending())
+ if (!in_interrupt())
invoke_softirq();
tick_irq_exit();
@@ -495,11 +476,11 @@ inline void raise_softirq_irqoff(unsigned int nr)
* actually run the softirq once we return from
* the irq or softirq.
*
- * Otherwise we wake up ksoftirqd to make sure we
+ * Otherwise we wake up workqueue to make sure we
* schedule the softirq soon.
*/
if (!in_interrupt())
- wakeup_softirqd();
+ do_softirq_workqueue(BIT(nr));
}
void raise_softirq(unsigned int nr)
@@ -736,27 +717,6 @@ void __init softirq_init(void)
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
-static int ksoftirqd_should_run(unsigned int cpu)
-{
- return local_softirq_pending();
-}
-
-static void run_ksoftirqd(unsigned int cpu)
-{
- local_irq_disable();
- if (local_softirq_pending()) {
- /*
- * We can safely run softirq on inline stack, as we are not deep
- * in the task stack here.
- */
- __do_softirq();
- local_irq_enable();
- cond_resched_rcu_qs();
- return;
- }
- local_irq_enable();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
/*
* tasklet_kill_immediate is called to remove a tasklet which can already be
@@ -819,22 +779,13 @@ static int takeover_tasklets(unsigned int cpu)
#define takeover_tasklets NULL
#endif /* CONFIG_HOTPLUG_CPU */
-static struct smp_hotplug_thread softirq_threads = {
- .store = &ksoftirqd,
- .thread_should_run = ksoftirqd_should_run,
- .thread_fn = run_ksoftirqd,
- .thread_comm = "ksoftirqd/%u",
-};
-
-static __init int spawn_ksoftirqd(void)
+static __init int tasklet_set_takeover(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
- BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
-
return 0;
}
-early_initcall(spawn_ksoftirqd);
+early_initcall(tasklet_set_takeover);
/*
* [ These __weak aliases are kept in a separate compilation unit, so that
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4e4160..3b4811e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -912,7 +912,7 @@ void tcp_wfree(struct sk_buff *skb)
*/
WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
- /* If this softirq is serviced by ksoftirqd, we are likely under stress.
+ /* If this softirq is serviced by workqueue, we are likely under stress.
* Wait until our queues (qdisc + devices) are drained.
* This gives :
* - less callbacks to tcp_write_xmit(), reducing stress (batches)
@@ -920,7 +920,7 @@ void tcp_wfree(struct sk_buff *skb)
* to migrate this flow (skb->ooo_okay will be eventually set)
*/
if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) &&
- (this_cpu_ksoftirqd() == current || softirq_serving_workqueue()))
+ softirq_serving_workqueue())
goto out;
for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
--
2.7.4
next prev parent reply other threads:[~2018-01-19 15:47 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-19 15:46 [RFC PATCH 0/4] softirq: Per vector threading v3 Frederic Weisbecker
2018-01-19 15:46 ` [RFC PATCH 1/4] softirq: Limit vector to a single iteration on IRQ tail Frederic Weisbecker
2018-01-19 16:16 ` David Miller
2018-01-19 18:25 ` Linus Torvalds
2018-01-19 18:47 ` David Miller
2018-01-21 16:30 ` Frederic Weisbecker
2018-01-21 16:57 ` David Miller
2018-01-19 15:46 ` [RFC PATCH 2/4] softirq: Per vector deferment to workqueue Frederic Weisbecker
2018-01-20 8:41 ` Pavan Kondeti
2018-01-21 16:11 ` Frederic Weisbecker
2018-01-21 17:50 ` Pavan Kondeti
2018-01-21 20:48 ` Frederic Weisbecker
2018-02-08 17:44 ` Sebastian Andrzej Siewior
2018-02-08 18:45 ` David Miller
2018-02-08 20:14 ` Dmitry Safonov
2018-02-08 20:22 ` David Miller
2018-02-08 20:30 ` Dmitry Safonov
2018-02-09 4:11 ` Mike Galbraith
2018-02-09 12:35 ` Sebastian Andrzej Siewior
2018-02-15 16:13 ` Frederic Weisbecker
2018-02-15 16:58 ` Sebastian Andrzej Siewior
2018-01-19 15:46 ` [RFC PATCH 3/4] softirq: Defer to workqueue when rescheduling is needed Frederic Weisbecker
2018-01-19 15:46 ` Frederic Weisbecker [this message]
2018-01-22 19:58 ` [RFC PATCH 0/4] softirq: Per vector threading v3 Mauro Carvalho Chehab
2018-01-23 10:13 ` Paolo Abeni
2018-01-23 12:32 ` Dmitry Safonov
2018-01-24 2:12 ` Frederic Weisbecker
2018-01-23 16:22 ` David Miller
2018-01-23 16:57 ` Paolo Abeni
2018-01-23 17:42 ` Linus Torvalds
2018-01-23 18:01 ` Mike Galbraith
2018-01-23 18:24 ` David Miller
2018-01-24 1:57 ` Frederic Weisbecker
2018-01-24 2:01 ` Frederic Weisbecker
2018-01-24 14:54 ` Paolo Abeni
2018-01-24 15:05 ` David Miller
2018-01-24 16:11 ` Paolo Abeni
2018-02-07 14:18 ` Mauro Carvalho Chehab
2018-03-01 15:21 ` Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1516376774-24076-5-git-send-email-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alexander.levin@verizon.com \
--cc=davem@davemloft.net \
--cc=dima@arista.com \
--cc=edumazet@google.com \
--cc=hannes@stressinduktion.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mchehab@s-opensource.com \
--cc=mingo@kernel.org \
--cc=pabeni@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
--cc=rrendec@arista.com \
--cc=sgruszka@redhat.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=wanpeng.li@hotmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.