All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: linux-rt-users <linux-rt-users@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	rostedt@goodmis.org, John Kacur <jkacur@redhat.com>
Subject: [ANNOUNCE] 3.14.2-rt3
Date: Sat, 3 May 2014 19:01:11 +0200	[thread overview]
Message-ID: <20140503170110.GA10652@linutronix.de> (raw)

Dear RT folks!

I'm pleased to announce the v3.14.2-rt3 patch set.

Changes since v3.14.2-rt2
- rwsem readers are now not allowed to nest. A patch rom Steven Rostedt.
- a few bugs were fixed in the hotplug code which were made during the
  v3.14 port. Fixed by Mike Galbraith.
- Mike Galbraith sent a patch which might fix lazy preempt on x86_64.
  Patch applied and my machine still explodes therefore lazy preempt
  remains off on x86_64.
- Mike Galbraith sent a few patches to get cpu hoplug to work. This
  includes lg_global_trylock_relax().
- A few push downs of migrate_disable() (where we call migrate_disable()
  after the rt_mutex_trylock()) have been reverted. It seems hotplug is
  not too happy about this. A patch by Steven Rostedt and and Mike
  Galbraith
- There was a complaint about a backrace from run_local_timers() in UP
  mode because a spin_try_lock() failed. _This_ particular case was not
  an error. This optimization was for FULL_NO_HZ which is pointless on
  UP because there is no spare CPU. Therefore, this optimization is
  disabled in UP mode and the backtrace is gone. Reported by Stanislav
  Meduna.
- block-mq notifier uses now a spinlock and runs during CPU_POST_DEAD
  instead at CPU_DEAD time. lockdep complained about the sleeping
  ctx->lock within the rawlock (blk_mq_cpu_notify_lock) and CPU_DEAD
  runs with irqs off.

Known issues:

      - bcache is disabled.

      - lazy preempt on x86_64 leads to a crash with some load.

      - CPU hotplug works in general. Steven's test script however
        deadlocks usually on the second invocation.

The delta patch against v3.14.2-rt2 is appended below and can be found
here:
   https://www.kernel.org/pub/linux/kernel/projects/rt/3.14/incr/patch-3.14.2-rt2-rt3.patch.xz

The RT patch against 3.14.2 can be found here:

   https://www.kernel.org/pub/linux/kernel/projects/rt/3.14/patch-3.14.2-rt3.patch.xz

The split quilt queue is available at:

   https://www.kernel.org/pub/linux/kernel/projects/rt/3.14/patches-3.14.2-rt3.tar.xz

Sebastian

diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 752fe56..1e649c4 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -94,7 +94,11 @@ static __always_inline bool __preempt_count_dec_and_test(void)
 {
 	if (____preempt_count_dec_and_test())
 		return true;
+#ifdef CONFIG_PREEMPT_LAZY
 	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+	return false;
+#endif
 }
 
 /*
@@ -102,8 +106,12 @@ static __always_inline bool __preempt_count_dec_and_test(void)
  */
 static __always_inline bool should_resched(void)
 {
+#ifdef CONFIG_PREEMPT_LAZY
 	return unlikely(!__this_cpu_read_4(__preempt_count) || \
 			test_thread_flag(TIF_NEED_RESCHED_LAZY));
+#else
+	return unlikely(!__this_cpu_read_4(__preempt_count));
+#endif
 }
 
 #ifdef CONFIG_PREEMPT
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 7c8b356..5701b50 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -72,4 +72,5 @@ void common(void) {
 
 	BLANK();
 	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+	DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index fd2d976..6157ed6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -365,19 +365,22 @@ ENTRY(resume_kernel)
 need_resched:
 	# preempt count == 0 + NEED_RS set?
 	cmpl $0,PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
+	jnz restore_all
+#else
 	jz test_int_off
 
 	# atleast preempt count == 0 ?
-	cmpl $_TIF_NEED_RESCHED,PER_CPU_VAR(__preempt_count)
+	cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
 	jne restore_all
 
 	cmpl $0,TI_preempt_lazy_count(%ebp)	# non-zero preempt_lazy_count ?
 	jnz restore_all
 
-	testl $_TIF_NEED_RESCHED_LAZY, %ecx
+	testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
 	jz restore_all
-
 test_int_off:
+#endif
 	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b650b43..d893814 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -658,8 +658,8 @@ GLOBAL(system_call_after_swapgs)
 	/* Handle reschedules */
 	/* edx:	work, edi: workmask */
 sysret_careful:
-	bt $TIF_NEED_RESCHED,%edx
-	jnc sysret_signal
+	testl $_TIF_NEED_RESCHED_MASK,%edx
+	jz sysret_signal
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
@@ -771,8 +771,8 @@ GLOBAL(int_with_check)
 	/* First do a reschedule test. */
 	/* edx:	work, edi: workmask */
 int_careful:
-	bt $TIF_NEED_RESCHED,%edx
-	jnc  int_very_careful
+	testl $_TIF_NEED_RESCHED_MASK,%edx
+	jz  int_very_careful
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
@@ -1071,8 +1071,8 @@ ENTRY(native_iret)
 	/* edi: workmask, edx: work */
 retint_careful:
 	CFI_RESTORE_STATE
-	bt    $TIF_NEED_RESCHED,%edx
-	jnc   retint_signal
+	testl $_TIF_NEED_RESCHED_MASK,%edx
+	jz   retint_signal
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
@@ -1104,7 +1104,22 @@ ENTRY(native_iret)
 	/* rcx:	 threadinfo. interrupts off. */
 ENTRY(retint_kernel)
 	cmpl $0,PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
 	jnz  retint_restore_args
+#else
+	jz  check_int_off
+
+	# atleast preempt count == 0 ?
+	cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
+	jnz retint_restore_args
+
+	cmpl $0, TI_preempt_lazy_count(%rcx)
+	jnz retint_restore_args
+
+	bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
+	jnc  retint_restore_args
+check_int_off:
+#endif
 	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
 	call preempt_schedule_irq
@@ -1540,7 +1555,7 @@ ENTRY(paranoid_exit)
 	movq %rsp,%rdi			/* &pt_regs */
 	call sync_regs
 	movq %rax,%rsp			/* switch stack for scheduling */
-	testl $_TIF_NEED_RESCHED,%ebx
+	testl $_TIF_NEED_RESCHED_MASK,%ebx
 	jnz paranoid_schedule
 	movl %ebx,%edx			/* arg3: thread flags */
 	TRACE_IRQS_ON
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 136ef86..37acc3a 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -11,7 +11,7 @@
 #include "blk-mq.h"
 
 static LIST_HEAD(blk_mq_cpu_notify_list);
-static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
+static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
 
 static int blk_mq_main_cpu_notify(struct notifier_block *self,
 				  unsigned long action, void *hcpu)
@@ -19,12 +19,15 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
 	unsigned int cpu = (unsigned long) hcpu;
 	struct blk_mq_cpu_notifier *notify;
 
-	raw_spin_lock(&blk_mq_cpu_notify_lock);
+	if (action != CPU_POST_DEAD && action != CPU_POST_DEAD)
+		return NOTIFY_OK;
+
+	spin_lock(&blk_mq_cpu_notify_lock);
 
 	list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
 		notify->notify(notify->data, action, cpu);
 
-	raw_spin_unlock(&blk_mq_cpu_notify_lock);
+	spin_unlock(&blk_mq_cpu_notify_lock);
 	return NOTIFY_OK;
 }
 
@@ -32,16 +35,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 {
 	BUG_ON(!notifier->notify);
 
-	raw_spin_lock(&blk_mq_cpu_notify_lock);
+	spin_lock(&blk_mq_cpu_notify_lock);
 	list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
-	raw_spin_unlock(&blk_mq_cpu_notify_lock);
+	spin_unlock(&blk_mq_cpu_notify_lock);
 }
 
 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 {
-	raw_spin_lock(&blk_mq_cpu_notify_lock);
+	spin_lock(&blk_mq_cpu_notify_lock);
 	list_del(&notifier->list);
-	raw_spin_unlock(&blk_mq_cpu_notify_lock);
+	spin_unlock(&blk_mq_cpu_notify_lock);
 }
 
 void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a5f25f9..5fb26f7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -48,9 +48,14 @@ static struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
 	return __blk_mq_get_ctx(q, get_cpu_light());
 }
 
-static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
+static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
 {
 	spin_unlock(&ctx->cpu_lock);
+}
+
+static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
+{
+	__blk_mq_put_ctx(ctx);
 	put_cpu_light();
 }
 
@@ -966,7 +971,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	struct blk_mq_ctx *ctx;
 	LIST_HEAD(tmp);
 
-	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
+	if (action != CPU_POST_DEAD && action != CPU_POST_DEAD)
 		return;
 
 	/*
@@ -980,6 +985,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 		clear_bit(ctx->index_hw, hctx->ctx_map);
 	}
 	spin_unlock(&ctx->lock);
+	__blk_mq_put_ctx(ctx);
 
 	if (list_empty(&tmp))
 		return;
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 2b2204e..534b16e 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -74,4 +74,10 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu);
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
+#ifndef CONFIG_PREEMPT_RT_FULL
+#define lg_global_trylock_relax(name)	lg_global_lock(name)
+#else
+void lg_global_trylock_relax(struct lglock *lg);
+#endif
+
 #endif
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 116af6a..5b2cdf4 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -126,8 +126,7 @@ do { \
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
-	if (unlikely(__preempt_count_dec_and_test() || \
-				test_thread_flag(TIF_NEED_RESCHED_LAZY))) \
+	if (unlikely(__preempt_count_dec_and_test())) \
 		__preempt_schedule_context(); \
 } while (0)
 #else
diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
index 924c2d2..0065b08 100644
--- a/include/linux/rwsem_rt.h
+++ b/include/linux/rwsem_rt.h
@@ -20,7 +20,6 @@
 
 struct rw_semaphore {
 	struct rt_mutex		lock;
-	int			read_depth;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index ac6f08b..c0d1367 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -35,6 +35,7 @@ extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  */
 extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
 extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
+extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
 
 #define spin_lock(lock)				\
 	do {					\
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 041fada..ce00329 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -649,7 +649,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		smpboot_unpark_threads(cpu);
 		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
-		goto out_cancel;
+		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
 
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
index f2356df..9397974 100644
--- a/kernel/locking/lglock.c
+++ b/kernel/locking/lglock.c
@@ -105,3 +105,28 @@ void lg_global_unlock(struct lglock *lg)
 	preempt_enable_nort();
 }
 EXPORT_SYMBOL(lg_global_unlock);
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * HACK: If you use this, you get to keep the pieces.
+ * Used in queue_stop_cpus_work() when stop machinery
+ * is called from inactive CPU, so we can't schedule.
+ */
+# define lg_do_trylock_relax(l)			\
+	do {					\
+		while (!__rt_spin_trylock(l))	\
+			cpu_relax();		\
+	} while (0)
+
+void lg_global_trylock_relax(struct lglock *lg)
+{
+	int i;
+
+	lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
+	for_each_possible_cpu(i) {
+		lg_lock_ptr *lock;
+		lock = per_cpu_ptr(lg->lock, i);
+		lg_do_trylock_relax(lock);
+	}
+}
+#endif
diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c
index 5d17727..055a3df 100644
--- a/kernel/locking/rt.c
+++ b/kernel/locking/rt.c
@@ -180,12 +180,14 @@ EXPORT_SYMBOL(_mutex_unlock);
  */
 int __lockfunc rt_write_trylock(rwlock_t *rwlock)
 {
-	int ret = rt_mutex_trylock(&rwlock->lock);
+	int ret;
 
-	if (ret) {
+	migrate_disable();
+	ret = rt_mutex_trylock(&rwlock->lock);
+	if (ret)
 		rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
-		migrate_disable();
-	}
+	else
+		migrate_enable();
 
 	return ret;
 }
@@ -212,11 +214,13 @@ int __lockfunc rt_read_trylock(rwlock_t *rwlock)
 	 * write locked.
 	 */
 	if (rt_mutex_owner(lock) != current) {
+		migrate_disable();
 		ret = rt_mutex_trylock(lock);
-		if (ret) {
+		if (ret)
 			rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
-			migrate_disable();
-		}
+		else
+			migrate_enable();
+
 	} else if (!rwlock->read_depth) {
 		ret = 0;
 	}
@@ -240,13 +244,14 @@ void __lockfunc rt_read_lock(rwlock_t *rwlock)
 {
 	struct rt_mutex *lock = &rwlock->lock;
 
+
 	/*
 	 * recursive read locks succeed when current owns the lock
 	 */
 	if (rt_mutex_owner(lock) != current) {
-		rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
-		__rt_spin_lock(lock);
 		migrate_disable();
+		rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+		__rt_spin_lock(lock);
 	}
 	rwlock->read_depth++;
 }
@@ -316,10 +321,8 @@ EXPORT_SYMBOL(rt_up_write);
 
 void  rt_up_read(struct rw_semaphore *rwsem)
 {
-	if (--rwsem->read_depth == 0) {
-		rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
-		rt_mutex_unlock(&rwsem->lock);
-	}
+	rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+	rt_mutex_unlock(&rwsem->lock);
 }
 EXPORT_SYMBOL(rt_up_read);
 
@@ -330,7 +333,6 @@ EXPORT_SYMBOL(rt_up_read);
 void  rt_downgrade_write(struct rw_semaphore *rwsem)
 {
 	BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
-	rwsem->read_depth = 1;
 }
 EXPORT_SYMBOL(rt_downgrade_write);
 
@@ -367,37 +369,20 @@ void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
 
 int  rt_down_read_trylock(struct rw_semaphore *rwsem)
 {
-	struct rt_mutex *lock = &rwsem->lock;
-	int ret = 1;
-
-	/*
-	 * recursive read locks succeed when current owns the rwsem,
-	 * but not when read_depth == 0 which means that the rwsem is
-	 * write locked.
-	 */
-	if (rt_mutex_owner(lock) != current) {
-		ret = rt_mutex_trylock(&rwsem->lock);
-		if (ret)
-			rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
-	} else if (!rwsem->read_depth) {
-		ret = 0;
-	}
+	int ret;
 
+	ret = rt_mutex_trylock(&rwsem->lock);
 	if (ret)
-		rwsem->read_depth++;
+		rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
+
 	return ret;
 }
 EXPORT_SYMBOL(rt_down_read_trylock);
 
 static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
 {
-	struct rt_mutex *lock = &rwsem->lock;
-
-	if (rt_mutex_owner(lock) != current) {
-		rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
-		rt_mutex_lock(&rwsem->lock);
-	}
-	rwsem->read_depth++;
+	rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
+	rt_mutex_lock(&rwsem->lock);
 }
 
 void  rt_down_read(struct rw_semaphore *rwsem)
@@ -422,7 +407,6 @@ void  __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
 	debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
 	lockdep_init_map(&rwsem->dep_map, name, key, 0);
 #endif
-	rwsem->read_depth = 0;
 	rwsem->lock.save_state = 0;
 }
 EXPORT_SYMBOL(__rt_rwsem_init);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 42f4f28..5c5cc76 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1001,6 +1001,11 @@ void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
 }
 EXPORT_SYMBOL(rt_spin_unlock_wait);
 
+int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
+{
+	return rt_mutex_trylock(lock);
+}
+
 int __lockfunc rt_spin_trylock(spinlock_t *lock)
 {
 	int ret = rt_mutex_trylock(&lock->lock);
@@ -1045,12 +1050,12 @@ int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
 	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
 	if (atomic_add_unless(atomic, -1, 1))
 		return 0;
+	migrate_disable();
 	rt_spin_lock(lock);
-	if (atomic_dec_and_test(atomic)){
-		migrate_disable();
+	if (atomic_dec_and_test(atomic))
 		return 1;
-	}
 	rt_spin_unlock(lock);
+	migrate_enable();
 	return 0;
 }
 EXPORT_SYMBOL(atomic_dec_and_spin_lock);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index aaae9f1..bcbae9c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -266,7 +266,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 	struct irq_cpu_stop_queue_work_info call_args;
 	struct multi_stop_data msdata;
 
-	preempt_disable();
+	preempt_disable_nort();
 	msdata = (struct multi_stop_data){
 		.fn = fn,
 		.data = arg,
@@ -299,7 +299,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 	 * This relies on the stopper workqueues to be FIFO.
 	 */
 	if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
-		preempt_enable();
+		preempt_enable_nort();
 		return -ENOENT;
 	}
 
@@ -313,7 +313,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 				 &irq_cpu_stop_queue_work,
 				 &call_args, 1);
 	lg_local_unlock(&stop_cpus_lock);
-	preempt_enable();
+	preempt_enable_nort();
 
 	wait_for_stop_done(&done);
 
@@ -346,7 +346,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
 static void queue_stop_cpus_work(const struct cpumask *cpumask,
 				 cpu_stop_fn_t fn, void *arg,
-				 struct cpu_stop_done *done)
+				 struct cpu_stop_done *done, bool inactive)
 {
 	struct cpu_stop_work *work;
 	unsigned int cpu;
@@ -360,11 +360,13 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	}
 
 	/*
-	 * Disable preemption while queueing to avoid getting
-	 * preempted by a stopper which might wait for other stoppers
-	 * to enter @fn which can lead to deadlock.
+	 * Make sure that all work is queued on all cpus before
+	 * any of the cpus can execute it.
 	 */
-	lg_global_lock(&stop_cpus_lock);
+	if (!inactive)
+		lg_global_lock(&stop_cpus_lock);
+	else
+		lg_global_trylock_relax(&stop_cpus_lock);
 	for_each_cpu(cpu, cpumask)
 		cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
 	lg_global_unlock(&stop_cpus_lock);
@@ -376,7 +378,7 @@ static int __stop_cpus(const struct cpumask *cpumask,
 	struct cpu_stop_done done;
 
 	cpu_stop_init_done(&done, cpumask_weight(cpumask));
-	queue_stop_cpus_work(cpumask, fn, arg, &done);
+	queue_stop_cpus_work(cpumask, fn, arg, &done, false);
 	wait_for_stop_done(&done);
 	return done.executed ? done.ret : -ENOENT;
 }
@@ -572,6 +574,8 @@ static int __init cpu_stop_init(void)
 		INIT_LIST_HEAD(&stopper->works);
 	}
 
+	lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
+
 	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
 	stop_machine_initialized = true;
 	return 0;
@@ -667,11 +671,11 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 	set_state(&msdata, MULTI_STOP_PREPARE);
 	cpu_stop_init_done(&done, num_active_cpus());
 	queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
-			     &done);
+			     &done, true);
 	ret = multi_cpu_stop(&msdata);
 
 	/* Busy wait for completion. */
-	while (!atomic_read(&done.nr_todo))
+	while (atomic_read(&done.nr_todo))
 		cpu_relax();
 
 	mutex_unlock(&stop_cpus_mutex);
diff --git a/kernel/timer.c b/kernel/timer.c
index 54596b5..8750875 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1461,6 +1461,19 @@ void run_local_timers(void)
 	 * the timer softirq.
 	 */
 #ifdef CONFIG_PREEMPT_RT_FULL
+
+#ifndef CONFIG_SMP
+	/*
+	 * The spin_do_trylock() later may fail as the lock may be hold before
+	 * the interrupt arrived. The spin-lock debugging code will raise a
+	 * warning if the try_lock fails on UP. Since this is only an
+	 * optimization for the FULL_NO_HZ case (not to run the timer softirq on
+	 * an nohz_full CPU) we don't really care and shedule the softirq.
+	 */
+	raise_softirq(TIMER_SOFTIRQ);
+	return;
+#endif
+
 	/* On RT, irq work runs from softirq */
 	if (irq_work_needs_cpu()) {
 		raise_softirq(TIMER_SOFTIRQ);
diff --git a/localversion-rt b/localversion-rt
index c3054d0..1445cd6 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt2
+-rt3

             reply	other threads:[~2014-05-03 17:01 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-03 17:01 Sebastian Andrzej Siewior [this message]
2014-05-05  3:26 ` [ANNOUNCE] 3.14.2-rt3 Mike Galbraith
2014-05-07  7:54 ` [patchlet] locking/rt: fix rt_read_lock() lockdep annotation Mike Galbraith
2014-05-07 13:48   ` Steven Rostedt
2014-05-07 14:01     ` Mike Galbraith
2014-05-07 14:20       ` Mike Galbraith
2014-05-09 13:27         ` Sebastian Andrzej Siewior
2014-05-09 14:18           ` Mike Galbraith
2014-05-09 14:39             ` Steven Rostedt
2014-05-07 16:27 ` [ANNOUNCE] 3.14.2-rt3 Joakim Hernberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140503170110.GA10652@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=jkacur@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.