All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND] sched/preempt: fix cond_resched_lock() and cond_resched_softirq()
@ 2015-05-14 16:23 Konstantin Khlebnikov
  2015-05-27  9:09 ` Peter Zijlstra
  0 siblings, 1 reply; 3+ messages in thread
From: Konstantin Khlebnikov @ 2015-05-14 16:23 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, linux-kernel
  Cc: x86, Linus Torvalds, Oleg Nesterov, Steven Rostedt

These functions check should_resched() before unlocking spinlock/bh-enable:
preempt_count always non-zero => should_resched() always returns false.
cond_resched_lock() works iff spin_needbreak is set.

This patch adds argument "preempt_offset" to should_resched() add
rearranges preempt_count offset constants for that:

PREEMPT_OFFSET - offset after preempt_disable() (0 if CONFIG_PREEMPT_COUNT=n)
PREEMPT_LOCK_OFFSET - offset after spin_lock() (alias for PREEMPT_OFFSET)
SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable()
SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh()

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

---
 arch/powerpc/kvm/book3s_hv.c   |    2 +-
 arch/x86/include/asm/preempt.h |    4 ++--
 drivers/xen/preempt.c          |    2 +-
 include/asm-generic/preempt.h  |    4 ++--
 include/linux/preempt.h        |    5 +++--
 include/linux/preempt_mask.h   |   21 ++++++++++++++-------
 include/linux/sched.h          |    6 ------
 kernel/sched/core.c            |    6 +++---
 8 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d2ecc9..6e73b74c0c60 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2177,7 +2177,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		vc->runner = vcpu;
 		if (n_ceded == vc->n_runnable) {
 			kvmppc_vcore_blocked(vc);
-		} else if (should_resched()) {
+		} else if (should_resched(PREEMPT_LOCK_OFFSET)) {
 			vc->vcore_state = VCORE_PREEMPT;
 			/* Let something else run */
 			cond_resched_lock(&vc->lock);
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 8f3271842533..67b6cd00a44f 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
 /*
  * Returns true when we need to resched and can (barring IRQ state).
  */
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int preempt_offset)
 {
-	return unlikely(!raw_cpu_read_4(__preempt_count));
+	return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
 }
 
 #ifdef CONFIG_PREEMPT
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
index a1800c150839..46188cf41d08 100644
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
 asmlinkage __visible void xen_maybe_preempt_hcall(void)
 {
 	if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
-		     && should_resched())) {
+		     && should_resched(0))) {
 		/*
 		 * Clear flag as we may be rescheduled on a different
 		 * cpu.
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index eb6f9e6c3075..e91fb799a6da 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -71,9 +71,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
 /*
  * Returns true when we need to resched and can (barring IRQ state).
  */
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int offset)
 {
-	return unlikely(!preempt_count() && tif_need_resched());
+	return unlikely(preempt_count() == offset && tif_need_resched());
 }
 
 #ifdef CONFIG_PREEMPT
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index de83b4eb1642..8cd6725c5758 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -20,7 +20,8 @@
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
-#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
+#define preempt_count_dec_and_test() \
+	({ preempt_count_sub(1); should_resched(0); })
 #else
 #define preempt_count_add(val)	__preempt_count_add(val)
 #define preempt_count_sub(val)	__preempt_count_sub(val)
@@ -59,7 +60,7 @@ do { \
 
 #define preempt_check_resched() \
 do { \
-	if (should_resched()) \
+	if (should_resched(0)) \
 		__preempt_schedule(); \
 } while (0)
 
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index dbeec4d4a3be..a832c243da0b 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -39,7 +39,15 @@
 #define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
 #define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
 
+/*
+ * The preempt_count offset after preempt_disable();
+ */
+#ifdef CONFIG_PREEMPT_COUNT
 #define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
+#else
+#define PREEMPT_OFFSET	0
+#endif
+
 #define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
 #define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
 #define NMI_OFFSET	(1UL << NMI_SHIFT)
@@ -71,11 +79,10 @@
  */
 #define in_nmi()	(preempt_count() & NMI_MASK)
 
-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
+/*
+ * The preempt_count offset after spin_lock()
+ */
+#define PREEMPT_LOCK_OFFSET	PREEMPT_OFFSET
 
 /*
  * The preempt_count offset needed for things like:
@@ -90,7 +97,7 @@
  *
  * Work as expected.
  */
-#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET)
+#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET)
 
 /*
  * Are we running in atomic context?  WARNING: this macro cannot
@@ -106,7 +113,7 @@
  * (used by the scheduler, *after* releasing the kernel lock)
  */
 #define in_atomic_preempt_off() \
-		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_OFFSET)
 
 #ifdef CONFIG_PREEMPT_COUNT
 # define preemptible()	(preempt_count() == 0 && !irqs_disabled())
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..61f4f2d5c882 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2834,12 +2834,6 @@ extern int _cond_resched(void);
 
 extern int __cond_resched_lock(spinlock_t *lock);
 
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_LOCK_OFFSET	PREEMPT_OFFSET
-#else
-#define PREEMPT_LOCK_OFFSET	0
-#endif
-
 #define cond_resched_lock(lock) ({				\
 	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
 	__cond_resched_lock(lock);				\
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe22f7510bce..087bf36ecd46 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4223,7 +4223,7 @@ SYSCALL_DEFINE0(sched_yield)
 
 int __sched _cond_resched(void)
 {
-	if (should_resched()) {
+	if (should_resched(0)) {
 		preempt_schedule_common();
 		return 1;
 	}
@@ -4241,7 +4241,7 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int __cond_resched_lock(spinlock_t *lock)
 {
-	int resched = should_resched();
+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
 	int ret = 0;
 
 	lockdep_assert_held(lock);
@@ -4263,7 +4263,7 @@ int __sched __cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (should_resched()) {
+	if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
 		local_bh_enable();
 		preempt_schedule_common();
 		local_bh_disable();


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH RESEND] sched/preempt: fix cond_resched_lock() and cond_resched_softirq()
  2015-05-14 16:23 [PATCH RESEND] sched/preempt: fix cond_resched_lock() and cond_resched_softirq() Konstantin Khlebnikov
@ 2015-05-27  9:09 ` Peter Zijlstra
  2015-05-27  9:19   ` Konstantin Khlebnikov
  0 siblings, 1 reply; 3+ messages in thread
From: Peter Zijlstra @ 2015-05-27  9:09 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Ingo Molnar, linux-kernel, x86, Linus Torvalds, Oleg Nesterov,
	Steven Rostedt

On Thu, May 14, 2015 at 07:23:27PM +0300, Konstantin Khlebnikov wrote:
> These functions check should_resched() before unlocking spinlock/bh-enable:
> preempt_count always non-zero => should_resched() always returns false.
> cond_resched_lock() works iff spin_needbreak is set.
> 
> This patch adds argument "preempt_offset" to should_resched() add
> rearranges preempt_count offset constants for that:
> 
> PREEMPT_OFFSET - offset after preempt_disable() (0 if CONFIG_PREEMPT_COUNT=n)
> PREEMPT_LOCK_OFFSET - offset after spin_lock() (alias for PREEMPT_OFFSET)
> SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable()
> SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh()
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

Sorry, but it doesn't apply anymore because of that whole
pagefault_disable() muck we merged.

> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 48d3c5d2ecc9..6e73b74c0c60 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2177,7 +2177,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  		vc->runner = vcpu;
>  		if (n_ceded == vc->n_runnable) {
>  			kvmppc_vcore_blocked(vc);
> -		} else if (should_resched()) {
> +		} else if (should_resched(PREEMPT_LOCK_OFFSET)) {

I'm thinking this wants to be: need_resched() ?

>  			vc->vcore_state = VCORE_PREEMPT;
>  			/* Let something else run */
>  			cond_resched_lock(&vc->lock);

> diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
> index eb6f9e6c3075..e91fb799a6da 100644
> --- a/include/asm-generic/preempt.h
> +++ b/include/asm-generic/preempt.h
> @@ -71,9 +71,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
>  /*
>   * Returns true when we need to resched and can (barring IRQ state).
>   */
> -static __always_inline bool should_resched(void)
> +static __always_inline bool should_resched(int offset)
>  {
> -	return unlikely(!preempt_count() && tif_need_resched());
> +	return unlikely(preempt_count() == offset && tif_need_resched());
>  }
>  
>  #ifdef CONFIG_PREEMPT

So the reason I held off on this patch for a wee bit is because I don't
like the should_resched() change you did; although I fully understand
why you did it.

That said, I could not come up with anything better either and I suppose
that once we fix that ppc-kvm user, there really isn't a user left
outside of core code and thus we can deal with a slightly dangerous
function.

I did not really look, but it would be good if we could also get rid of
the Xen usage.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH RESEND] sched/preempt: fix cond_resched_lock() and cond_resched_softirq()
  2015-05-27  9:09 ` Peter Zijlstra
@ 2015-05-27  9:19   ` Konstantin Khlebnikov
  0 siblings, 0 replies; 3+ messages in thread
From: Konstantin Khlebnikov @ 2015-05-27  9:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, x86, Linus Torvalds, Oleg Nesterov,
	Steven Rostedt

On 27.05.2015 12:09, Peter Zijlstra wrote:
> On Thu, May 14, 2015 at 07:23:27PM +0300, Konstantin Khlebnikov wrote:
>> These functions check should_resched() before unlocking spinlock/bh-enable:
>> preempt_count always non-zero => should_resched() always returns false.
>> cond_resched_lock() works iff spin_needbreak is set.
>>
>> This patch adds argument "preempt_offset" to should_resched() add
>> rearranges preempt_count offset constants for that:
>>
>> PREEMPT_OFFSET - offset after preempt_disable() (0 if CONFIG_PREEMPT_COUNT=n)
>> PREEMPT_LOCK_OFFSET - offset after spin_lock() (alias for PREEMPT_OFFSET)
>> SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable()
>> SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh()
>>
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>
> Sorry, but it doesn't apply anymore because of that whole
> pagefault_disable() muck we merged.

No problem. I'll dig into this stuff again later.
This bug is harmless and it's here for ages.

>
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 48d3c5d2ecc9..6e73b74c0c60 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -2177,7 +2177,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>>   		vc->runner = vcpu;
>>   		if (n_ceded == vc->n_runnable) {
>>   			kvmppc_vcore_blocked(vc);
>> -		} else if (should_resched()) {
>> +		} else if (should_resched(PREEMPT_LOCK_OFFSET)) {
>
> I'm thinking this wants to be: need_resched() ?
>
>>   			vc->vcore_state = VCORE_PREEMPT;
>>   			/* Let something else run */
>>   			cond_resched_lock(&vc->lock);
>
>> diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
>> index eb6f9e6c3075..e91fb799a6da 100644
>> --- a/include/asm-generic/preempt.h
>> +++ b/include/asm-generic/preempt.h
>> @@ -71,9 +71,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
>>   /*
>>    * Returns true when we need to resched and can (barring IRQ state).
>>    */
>> -static __always_inline bool should_resched(void)
>> +static __always_inline bool should_resched(int offset)
>>   {
>> -	return unlikely(!preempt_count() && tif_need_resched());
>> +	return unlikely(preempt_count() == offset && tif_need_resched());
>>   }
>>
>>   #ifdef CONFIG_PREEMPT
>
> So the reason I held off on this patch for a wee bit is because I don't
> like the should_resched() change you did; although I fully understand
> why you did it.
>
> That said, I could not come up with anything better either and I suppose
> that once we fix that ppc-kvm user, there really isn't a user left
> outside of core code and thus we can deal with a slightly dangerous
> function.
>
> I did not really look, but it would be good if we could also get rid of
> the Xen usage.
>

-- 
Konstantin

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-05-27  9:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-14 16:23 [PATCH RESEND] sched/preempt: fix cond_resched_lock() and cond_resched_softirq() Konstantin Khlebnikov
2015-05-27  9:09 ` Peter Zijlstra
2015-05-27  9:19   ` Konstantin Khlebnikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.