linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
	linux-rt-users <linux-rt-users@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Carsten Emde <C.Emde@osadl.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	John Kacur <jkacur@redhat.com>,
	Paul Gortmaker <paul.gortmaker@windriver.com>,
	<stable-rt@vger.kernel.org>
Subject: [PATCH RT 07/22] softirq: split timer softirqs out of ksoftirqd
Date: Wed, 02 Mar 2016 10:09:06 -0500	[thread overview]
Message-ID: <20160302151111.368583146@goodmis.org> (raw)
In-Reply-To: 20160302150859.204542604@goodmis.org

[-- Attachment #1: 0007-softirq-split-timer-softirqs-out-of-ksoftirqd.patch --]
[-- Type: text/plain, Size: 6889 bytes --]

4.1.15-rt18-rc1 stable review patch.
If anyone has any objections, please let me know.

------------------

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

The softirqd runs in -RT with SCHED_FIFO (prio 1) and deals mostly with
timer wakeup which can not happen in hardirq context. The prio has been
risen from the normal SCHED_OTHER so the timer wakeup does not happen
too late.
With enough networking load it is possible that the system never goes
idle and schedules ksoftirqd and everything else with a higher priority.
One of the tasks left behind is one of RCU's threads and so we see stalls
and eventually run out of memory.
This patch moves the TIMER and HRTIMER softirqs out of the `ksoftirqd`
thread into its own `ktimersoftd`. The former can now run SCHED_OTHER
(same as mainline) and the latter at SCHED_FIFO due to the wakeups.

>From networking point of view: The NAPI callback runs after the network
interrupt thread completes. If its run time takes too long the NAPI code
itself schedules the `ksoftirqd`. Here in the thread it can run at
SCHED_OTHER priority and it won't defer RCU anymore.

Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/softirq.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 74 insertions(+), 11 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index f4c2e679a7d7..aff764fad236 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,6 +58,10 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
 
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+#ifdef CONFIG_PREEMPT_RT_FULL
+#define TIMER_SOFTIRQS	((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
+DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
+#endif
 
 const char * const softirq_to_name[NR_SOFTIRQS] = {
 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -171,6 +175,17 @@ static void wakeup_softirqd(void)
 		wake_up_process(tsk);
 }
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void wakeup_timer_softirqd(void)
+{
+	/* Interrupts are disabled: no need to stop preemption */
+	struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
+
+	if (tsk && tsk->state != TASK_RUNNING)
+		wake_up_process(tsk);
+}
+#endif
+
 static void handle_softirq(unsigned int vec_nr)
 {
 	struct softirq_action *h = softirq_vec + vec_nr;
@@ -473,7 +488,6 @@ void __raise_softirq_irqoff(unsigned int nr)
 static inline void local_bh_disable_nort(void) { local_bh_disable(); }
 static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
 static void ksoftirqd_set_sched_params(unsigned int cpu) { }
-static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
 
 #else /* !PREEMPT_RT_FULL */
 
@@ -618,8 +632,12 @@ void thread_do_softirq(void)
 
 static void do_raise_softirq_irqoff(unsigned int nr)
 {
+	unsigned int mask;
+
+	mask = 1UL << nr;
+
 	trace_softirq_raise(nr);
-	or_softirq_pending(1UL << nr);
+	or_softirq_pending(mask);
 
 	/*
 	 * If we are not in a hard interrupt and inside a bh disabled
@@ -628,16 +646,30 @@ static void do_raise_softirq_irqoff(unsigned int nr)
 	 * delegate it to ksoftirqd.
 	 */
 	if (!in_irq() && current->softirq_nestcnt)
-		current->softirqs_raised |= (1U << nr);
-	else if (__this_cpu_read(ksoftirqd))
-		__this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
+		current->softirqs_raised |= mask;
+	else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
+		return;
+
+	if (mask & TIMER_SOFTIRQS)
+		__this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
+	else
+		__this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
+}
+
+static void wakeup_proper_softirq(unsigned int nr)
+{
+	if ((1UL << nr) & TIMER_SOFTIRQS)
+		wakeup_timer_softirqd();
+	else
+		wakeup_softirqd();
 }
 
+
 void __raise_softirq_irqoff(unsigned int nr)
 {
 	do_raise_softirq_irqoff(nr);
 	if (!in_irq() && !current->softirq_nestcnt)
-		wakeup_softirqd();
+		wakeup_proper_softirq(nr);
 }
 
 /*
@@ -663,7 +695,7 @@ void raise_softirq_irqoff(unsigned int nr)
 	 * raise a WARN() if the condition is met.
 	 */
 	if (!current->softirq_nestcnt)
-		wakeup_softirqd();
+		wakeup_proper_softirq(nr);
 }
 
 static inline int ksoftirqd_softirq_pending(void)
@@ -676,22 +708,37 @@ static inline void _local_bh_enable_nort(void) { }
 
 static inline void ksoftirqd_set_sched_params(unsigned int cpu)
 {
+	/* Take over all but timer pending softirqs when starting */
+	local_irq_disable();
+	current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
+	local_irq_enable();
+}
+
+static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
+{
 	struct sched_param param = { .sched_priority = 1 };
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
-	/* Take over all pending softirqs when starting */
+
+	/* Take over timer pending softirqs when starting */
 	local_irq_disable();
-	current->softirqs_raised = local_softirq_pending();
+	current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
 	local_irq_enable();
 }
 
-static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
+static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
+						    bool online)
 {
 	struct sched_param param = { .sched_priority = 0 };
 
 	sched_setscheduler(current, SCHED_NORMAL, &param);
 }
 
+static int ktimer_softirqd_should_run(unsigned int cpu)
+{
+	return current->softirqs_raised;
+}
+
 #endif /* PREEMPT_RT_FULL */
 /*
  * Enter an interrupt context.
@@ -741,6 +788,9 @@ static inline void invoke_softirq(void)
 	if (__this_cpu_read(ksoftirqd) &&
 			__this_cpu_read(ksoftirqd)->softirqs_raised)
 		wakeup_softirqd();
+	if (__this_cpu_read(ktimer_softirqd) &&
+			__this_cpu_read(ktimer_softirqd)->softirqs_raised)
+		wakeup_timer_softirqd();
 	local_irq_restore(flags);
 #endif
 }
@@ -1173,17 +1223,30 @@ static struct notifier_block cpu_nfb = {
 static struct smp_hotplug_thread softirq_threads = {
 	.store			= &ksoftirqd,
 	.setup			= ksoftirqd_set_sched_params,
-	.cleanup		= ksoftirqd_clr_sched_params,
 	.thread_should_run	= ksoftirqd_should_run,
 	.thread_fn		= run_ksoftirqd,
 	.thread_comm		= "ksoftirqd/%u",
 };
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+static struct smp_hotplug_thread softirq_timer_threads = {
+	.store			= &ktimer_softirqd,
+	.setup			= ktimer_softirqd_set_sched_params,
+	.cleanup		= ktimer_softirqd_clr_sched_params,
+	.thread_should_run	= ktimer_softirqd_should_run,
+	.thread_fn		= run_ksoftirqd,
+	.thread_comm		= "ktimersoftd/%u",
+};
+#endif
+
 static __init int spawn_ksoftirqd(void)
 {
 	register_cpu_notifier(&cpu_nfb);
 
 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
+#ifdef CONFIG_PREEMPT_RT_FULL
+	BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
+#endif
 
 	return 0;
 }
-- 
2.7.0



  parent reply	other threads:[~2016-03-02 15:11 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-02 15:08 [PATCH RT 00/22] Linux 4.1.15-rt18-rc1 Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 01/22] sched: reset tasks lockless wake-queues on fork() Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 02/22] ptrace: dont open IRQs in ptrace_freeze_traced() too early Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 03/22] net: move xmit_recursion to per-task variable on -RT Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 04/22] kernel/softirq: use cond_resched_rcu_qs() on -RT as well (run_ksoftirqd()) Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 05/22] net/core: protect users of napi_alloc_cache against reentrance Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 06/22] preempt-lazy: Add the lazy-preemption check to preempt_schedule() Steven Rostedt
2016-03-02 15:09 ` Steven Rostedt [this message]
2016-03-02 15:09 ` [PATCH RT 08/22] net: provide a way to delegate processing a softirq to ksoftirqd Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 09/22] latencyhist: disable jump-labels Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 10/22] arm64: replace read_lock to rcu lock in call_step_hook Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 11/22] kernel: migrate_disable() do fastpath in atomic & irqs-off Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 12/22] kernel: softirq: unlock with irqs on Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 13/22] kernel/stop_machine: partly revert "stop_machine: Use raw spinlocks" Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 14/22] tick/broadcast: Make broadcast hrtimer irqsafe Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 15/22] sched,rt: __always_inline preemptible_lazy() Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 16/22] drm,radeon,i915: Use preempt_disable/enable_rt() where recommended Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 17/22] drm,i915: Use local_lock/unlock_irq() in intel_pipe_update_start/end() Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 18/22] trace: Use rcuidle version for preemptoff_hist trace point Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 19/22] f2fs: Mutex cant be used by down_write_nest_lock() Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 20/22] rcu/torture: Comment out rcu_bh ops on PREEMPT_RT_FULL Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 21/22] kernel: sched: Fix preempt_disable_ip recodring for preempt_disable() Steven Rostedt
2016-03-02 15:09 ` [PATCH RT 22/22] Linux 4.1.15-rt18-rc1 Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160302151111.368583146@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=C.Emde@osadl.org \
    --cc=bigeasy@linutronix.de \
    --cc=jkacur@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=paul.gortmaker@windriver.com \
    --cc=stable-rt@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).