All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched: fix divide by zero at {thread_group,task}_times
@ 2012-08-08  9:27 Stanislaw Gruszka
  2012-08-08 19:50 ` Mike Galbraith
  2012-08-13 16:50 ` [tip:sched/urgent] sched: fix divide by zero at {thread_group, task}_times tip-bot for Stanislaw Gruszka
  0 siblings, 2 replies; 6+ messages in thread
From: Stanislaw Gruszka @ 2012-08-08  9:27 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Ingo Molnar

On architectures where cputime_t is 64 bit type, is possible to trigger
divide by zero on do_div(temp, (__force u32) total) line, if total is a
non zero number but has lower 32 bit's zeroed. Removing casting is not
a good solution since some do_div() implementations do cast to u32
internally.

This problem can be triggered in practice on very long lived processes:

  PID: 2331   TASK: ffff880472814b00  CPU: 2   COMMAND: "oraagent.bin"
   #0 [ffff880472a51b70] machine_kexec at ffffffff8103214b
   #1 [ffff880472a51bd0] crash_kexec at ffffffff810b91c2
   #2 [ffff880472a51ca0] oops_end at ffffffff814f0b00
   #3 [ffff880472a51cd0] die at ffffffff8100f26b
   #4 [ffff880472a51d00] do_trap at ffffffff814f03f4
   #5 [ffff880472a51d60] do_divide_error at ffffffff8100cfff
   #6 [ffff880472a51e00] divide_error at ffffffff8100be7b
      [exception RIP: thread_group_times+0x56]
      RIP: ffffffff81056a16  RSP: ffff880472a51eb8  RFLAGS: 00010046
      RAX: bc3572c9fe12d194  RBX: ffff880874150800  RCX: 0000000110266fad
      RDX: 0000000000000000  RSI: ffff880472a51eb8  RDI: 001038ae7d9633dc
      RBP: ffff880472a51ef8   R8: 00000000b10a3a64   R9: ffff880874150800
      R10: 00007fcba27ab680  R11: 0000000000000202  R12: ffff880472a51f08
      R13: ffff880472a51f10  R14: 0000000000000000  R15: 0000000000000007
      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
   #7 [ffff880472a51f00] do_sys_times at ffffffff8108845d
   #8 [ffff880472a51f40] sys_times at ffffffff81088524
   #9 [ffff880472a51f80] system_call_fastpath at ffffffff8100b0f2
      RIP: 0000003808caac3a  RSP: 00007fcba27ab6d8  RFLAGS: 00000202
      RAX: 0000000000000064  RBX: ffffffff8100b0f2  RCX: 0000000000000000
      RDX: 00007fcba27ab6e0  RSI: 000000000076d58e  RDI: 00007fcba27ab6e0
      RBP: 00007fcba27ab700   R8: 0000000000000020   R9: 000000000000091b
      R10: 00007fcba27ab680  R11: 0000000000000202  R12: 00007fff9ca41940
      R13: 0000000000000000  R14: 00007fcba27ac9c0  R15: 00007fff9ca41940
      ORIG_RAX: 0000000000000064  CS: 0033  SS: 002b

Cc: stable@vger.kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 kernel/sched/core.c |   34 ++++++++++++++++++++--------------
 1 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 468bdd4..1d441b0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 # define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
 #endif
 
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+{
+	u64 temp = (__force u64) rtime;
+
+	temp *= (__force u64) utime;
+
+	if (sizeof(cputime_t) == 4)
+		do_div(temp, (__force u32) total);
+	else
+		temp = div64_u64(temp, (__force u64) total);
+
+	return (__force cputime_t) temp;
+}
+
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	 */
 	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 
-	if (total) {
-		u64 temp = (__force u64) rtime;
-
-		temp *= (__force u64) utime;
-		do_div(temp, (__force u32) total);
-		utime = (__force cputime_t) temp;
-	} else
+	if (total)
+		utime = scale_utime(utime, rtime, total);
+	else
 		utime = rtime;
 
 	/*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	total = cputime.utime + cputime.stime;
 	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
 
-	if (total) {
-		u64 temp = (__force u64) rtime;
-
-		temp *= (__force u64) cputime.utime;
-		do_div(temp, (__force u32) total);
-		utime = (__force cputime_t) temp;
-	} else
+	if (total)
+		utime = scale_utime(cputime.utime, rtime, total);
+	else
 		utime = rtime;
 
 	sig->prev_utime = max(sig->prev_utime, utime);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched: fix divide by zero at {thread_group,task}_times
  2012-08-08  9:27 [PATCH] sched: fix divide by zero at {thread_group,task}_times Stanislaw Gruszka
@ 2012-08-08 19:50 ` Mike Galbraith
  2012-08-08 20:08   ` Peter Zijlstra
  2012-08-13 16:50 ` [tip:sched/urgent] sched: fix divide by zero at {thread_group, task}_times tip-bot for Stanislaw Gruszka
  1 sibling, 1 reply; 6+ messages in thread
From: Mike Galbraith @ 2012-08-08 19:50 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: linux-kernel, Peter Zijlstra, Ingo Molnar

On Wed, 2012-08-08 at 11:27 +0200, Stanislaw Gruszka wrote: 
> On architectures where cputime_t is 64 bit type, is possible to trigger
> divide by zero on do_div(temp, (__force u32) total) line, if total is a
> non zero number but has lower 32 bit's zeroed. Removing casting is not
> a good solution since some do_div() implementations do cast to u32
> internally.

I wonder if do_div() vs div64_u64() makes a big difference.  I swiped
all the kernel bits, and took them to userland.  32bit built do_div()
and div64_u64() both sucked equally compared to 64bit.. on Q6600.  Too
bad P4 box kicked the bucket...

boring numbers..

marge:~ # gcc -O2 -m32 -DDO_DIV -g -o xx xx.c
marge:~ # taskset -c 0 perf stat -- taskset -c 3 ./xx

 Performance counter stats for 'taskset -c 3 ./xx':

      13067.570787 task-clock:HG             #    0.999 CPUs utilized          
              1117 context-switches:HG       #    0.085 K/sec                  
                11 CPU-migrations:HG         #    0.001 K/sec                  
               225 page-faults:HG            #    0.017 K/sec                  
       31211154646 cycles:HG                 #    2.388 GHz                     [50.00%]
   <not supported> stalled-cycles-frontend:HG
   <not supported> stalled-cycles-backend:HG
       69004479084 instructions:HG           #    2.21  insns per cycle         [75.00%]
       16004049251 branches:HG               # 1224.715 M/sec                   [75.00%]
            106179 branch-misses:HG          #    0.00% of all branches         [75.00%]

      13.074907234 seconds time elapsed

marge:~ # gcc -O2 -m32 -g -o xx xx.c
marge:~ # taskset -c 0 perf stat -- taskset -c 3 ./xx

 Performance counter stats for 'taskset -c 3 ./xx':

      13065.936608 task-clock:HG             #    0.999 CPUs utilized          
              1117 context-switches:HG       #    0.085 K/sec                  
                 3 CPU-migrations:HG         #    0.000 K/sec                  
               225 page-faults:HG            #    0.017 K/sec                  
       31202249014 cycles:HG                 #    2.388 GHz                     [50.01%]
   <not supported> stalled-cycles-frontend:HG
   <not supported> stalled-cycles-backend:HG
       69013127982 instructions:HG           #    2.21  insns per cycle         [75.00%]
       16003545136 branches:HG               # 1224.830 M/sec                   [75.00%]
            106156 branch-misses:HG          #    0.00% of all branches         [75.00%]

      13.073238280 seconds time elapsed

64bit is loads faster.

marge:~ # gcc -O2 -m64 -DDO_DIV -g -o xx xx.c
marge:~ # taskset -c 0 perf stat -- taskset -c 3 ./xx

 Performance counter stats for 'taskset -c 3 ./xx':

       5575.994225 task-clock:HG             #    0.999 CPUs utilized          
               473 context-switches:HG       #    0.085 K/sec                  
                 5 CPU-migrations:HG         #    0.001 K/sec                  
               251 page-faults:HG            #    0.045 K/sec                  
       13311760132 cycles:HG                 #    2.387 GHz                     [50.00%]
   <not supported> stalled-cycles-frontend:HG
   <not supported> stalled-cycles-backend:HG
        7003731700 instructions:HG           #    0.53  insns per cycle         [75.04%]
        1001242479 branches:HG               #  179.563 M/sec                   [75.05%]
             37489 branch-misses:HG          #    0.00% of all branches         [74.98%]

       5.579075361 seconds time elapsed

marge:~ # gcc -O2 -m64 -g -o xx xx.c
marge:~ # taskset -c 0 perf stat -- taskset -c 3 ./xx

 Performance counter stats for 'taskset -c 3 ./xx':

       5576.756281 task-clock:HG             #    0.999 CPUs utilized          
               482 context-switches:HG       #    0.086 K/sec                  
                 5 CPU-migrations:HG         #    0.001 K/sec                  
               251 page-faults:HG            #    0.045 K/sec                  
       13316232700 cycles:HG                 #    2.388 GHz                     [49.98%]
   <not supported> stalled-cycles-frontend:HG
   <not supported> stalled-cycles-backend:HG
        7006965347 instructions:HG           #    0.53  insns per cycle         [74.99%]
        1001095727 branches:HG               #  179.512 M/sec                   [75.04%]
             39450 branch-misses:HG          #    0.00% of all branches         [75.01%]

       5.580954618 seconds time elapsed


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched: fix divide by zero at {thread_group,task}_times
  2012-08-08 19:50 ` Mike Galbraith
@ 2012-08-08 20:08   ` Peter Zijlstra
  2012-08-13 11:48     ` Stanislaw Gruszka
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2012-08-08 20:08 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: Stanislaw Gruszka, linux-kernel, Ingo Molnar

On Wed, 2012-08-08 at 21:50 +0200, Mike Galbraith wrote:
> 32bit built do_div()
> and div64_u64() both sucked equally compared to 64bit 

/me peeks at div64_u64 fallback implementation and sees why, it still
does a single div, it does some neat fls tricks.

Ok, no point in avoiding this then.. 

I did the below little edit, no point in mixing the old and new
primitives.. those __force things annoy me, but I guess otherwise we'll
upset sparse.

---
Index: linux-2.6/kernel/sched/core.c
===================================================================
--- linux-2.6.orig/kernel/sched/core.c
+++ linux-2.6/kernel/sched/core.c
@@ -3149,7 +3149,7 @@ static cputime_t scale_utime(cputime_t u
 	temp *= (__force u64) utime;
 
 	if (sizeof(cputime_t) == 4)
-		do_div(temp, (__force u32) total);
+		temp = div_u64(temp, (__force u32) total);
 	else
 		temp = div64_u64(temp, (__force u64) total);
 



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched: fix divide by zero at {thread_group,task}_times
  2012-08-08 20:08   ` Peter Zijlstra
@ 2012-08-13 11:48     ` Stanislaw Gruszka
  2012-08-13 13:59       ` Peter Zijlstra
  0 siblings, 1 reply; 6+ messages in thread
From: Stanislaw Gruszka @ 2012-08-13 11:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mike Galbraith, linux-kernel, Ingo Molnar, Martin Schwidefsky

On Wed, Aug 08, 2012 at 10:08:20PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-08-08 at 21:50 +0200, Mike Galbraith wrote:
> > 32bit built do_div()
> > and div64_u64() both sucked equally compared to 64bit 
> 
> /me peeks at div64_u64 fallback implementation and sees why, it still
> does a single div, it does some neat fls tricks.
> 
> Ok, no point in avoiding this then.. 
> 
> I did the below little edit, no point in mixing the old and new
> primitives.. those __force things annoy me, but I guess otherwise we'll
> upset sparse.

Yeah, __force is needed for sparse, since we marked cputime_t with 
__nocast (by commit 648616343cdbe904c585a6c12e323d3b3c72e46f, which btw
looks like very nice cleanup at whole).

> Index: linux-2.6/kernel/sched/core.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched/core.c
> +++ linux-2.6/kernel/sched/core.c
> @@ -3149,7 +3149,7 @@ static cputime_t scale_utime(cputime_t u
>  	temp *= (__force u64) utime;
>  
>  	if (sizeof(cputime_t) == 4)
> -		do_div(temp, (__force u32) total);
> +		temp = div_u64(temp, (__force u32) total);
>  	else
>  		temp = div64_u64(temp, (__force u64) total);

Is this or will be queued (I do not see it queued anywhere)? Or should
I repost with above change?

Thanks
Stanislaw

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched: fix divide by zero at {thread_group,task}_times
  2012-08-13 11:48     ` Stanislaw Gruszka
@ 2012-08-13 13:59       ` Peter Zijlstra
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Zijlstra @ 2012-08-13 13:59 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: Mike Galbraith, linux-kernel, Ingo Molnar, Martin Schwidefsky

On Mon, 2012-08-13 at 13:48 +0200, Stanislaw Gruszka wrote:
> 
> Is this or will be queued (I do not see it queued anywhere)? Or should
> I repost with above change? 

I've got it.. my queue lives at:

 http://programming.kicks-ass.net/sekrit/patches.tar.bz2

until Ingo gets around to stuffing them into -tip. That's usually only a
few days, but he's currently traveling so latency is somewhat increased.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:sched/urgent] sched: fix divide by zero at {thread_group, task}_times
  2012-08-08  9:27 [PATCH] sched: fix divide by zero at {thread_group,task}_times Stanislaw Gruszka
  2012-08-08 19:50 ` Mike Galbraith
@ 2012-08-13 16:50 ` tip-bot for Stanislaw Gruszka
  1 sibling, 0 replies; 6+ messages in thread
From: tip-bot for Stanislaw Gruszka @ 2012-08-13 16:50 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, a.p.zijlstra, tglx, sgruszka

Commit-ID:  bea6832cc8c4a0a9a65dd17da6aaa657fe27bc3e
Gitweb:     http://git.kernel.org/tip/bea6832cc8c4a0a9a65dd17da6aaa657fe27bc3e
Author:     Stanislaw Gruszka <sgruszka@redhat.com>
AuthorDate: Wed, 8 Aug 2012 11:27:15 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Mon, 13 Aug 2012 18:41:54 +0200

sched: fix divide by zero at {thread_group,task}_times

On architectures where cputime_t is 64 bit type, is possible to trigger
divide by zero on do_div(temp, (__force u32) total) line, if total is a
non zero number but has lower 32 bit's zeroed. Removing casting is not
a good solution since some do_div() implementations do cast to u32
internally.

This problem can be triggered in practice on very long lived processes:

  PID: 2331   TASK: ffff880472814b00  CPU: 2   COMMAND: "oraagent.bin"
   #0 [ffff880472a51b70] machine_kexec at ffffffff8103214b
   #1 [ffff880472a51bd0] crash_kexec at ffffffff810b91c2
   #2 [ffff880472a51ca0] oops_end at ffffffff814f0b00
   #3 [ffff880472a51cd0] die at ffffffff8100f26b
   #4 [ffff880472a51d00] do_trap at ffffffff814f03f4
   #5 [ffff880472a51d60] do_divide_error at ffffffff8100cfff
   #6 [ffff880472a51e00] divide_error at ffffffff8100be7b
      [exception RIP: thread_group_times+0x56]
      RIP: ffffffff81056a16  RSP: ffff880472a51eb8  RFLAGS: 00010046
      RAX: bc3572c9fe12d194  RBX: ffff880874150800  RCX: 0000000110266fad
      RDX: 0000000000000000  RSI: ffff880472a51eb8  RDI: 001038ae7d9633dc
      RBP: ffff880472a51ef8   R8: 00000000b10a3a64   R9: ffff880874150800
      R10: 00007fcba27ab680  R11: 0000000000000202  R12: ffff880472a51f08
      R13: ffff880472a51f10  R14: 0000000000000000  R15: 0000000000000007
      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
   #7 [ffff880472a51f00] do_sys_times at ffffffff8108845d
   #8 [ffff880472a51f40] sys_times at ffffffff81088524
   #9 [ffff880472a51f80] system_call_fastpath at ffffffff8100b0f2
      RIP: 0000003808caac3a  RSP: 00007fcba27ab6d8  RFLAGS: 00000202
      RAX: 0000000000000064  RBX: ffffffff8100b0f2  RCX: 0000000000000000
      RDX: 00007fcba27ab6e0  RSI: 000000000076d58e  RDI: 00007fcba27ab6e0
      RBP: 00007fcba27ab700   R8: 0000000000000020   R9: 000000000000091b
      R10: 00007fcba27ab680  R11: 0000000000000202  R12: 00007fff9ca41940
      R13: 0000000000000000  R14: 00007fcba27ac9c0  R15: 00007fff9ca41940
      ORIG_RAX: 0000000000000064  CS: 0033  SS: 002b

Cc: stable@vger.kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120808092714.GA3580@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/core.c |   34 ++++++++++++++++++++--------------
 1 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2cb4e77..e2c5841 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 # define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
 #endif
 
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+{
+	u64 temp = (__force u64) rtime;
+
+	temp *= (__force u64) utime;
+
+	if (sizeof(cputime_t) == 4)
+		temp = div_u64(temp, (__force u32) total);
+	else
+		temp = div64_u64(temp, (__force u64) total);
+
+	return (__force cputime_t) temp;
+}
+
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	 */
 	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 
-	if (total) {
-		u64 temp = (__force u64) rtime;
-
-		temp *= (__force u64) utime;
-		do_div(temp, (__force u32) total);
-		utime = (__force cputime_t) temp;
-	} else
+	if (total)
+		utime = scale_utime(utime, rtime, total);
+	else
 		utime = rtime;
 
 	/*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	total = cputime.utime + cputime.stime;
 	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
 
-	if (total) {
-		u64 temp = (__force u64) rtime;
-
-		temp *= (__force u64) cputime.utime;
-		do_div(temp, (__force u32) total);
-		utime = (__force cputime_t) temp;
-	} else
+	if (total)
+		utime = scale_utime(cputime.utime, rtime, total);
+	else
 		utime = rtime;
 
 	sig->prev_utime = max(sig->prev_utime, utime);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-08-13 16:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-08  9:27 [PATCH] sched: fix divide by zero at {thread_group,task}_times Stanislaw Gruszka
2012-08-08 19:50 ` Mike Galbraith
2012-08-08 20:08   ` Peter Zijlstra
2012-08-13 11:48     ` Stanislaw Gruszka
2012-08-13 13:59       ` Peter Zijlstra
2012-08-13 16:50 ` [tip:sched/urgent] sched: fix divide by zero at {thread_group, task}_times tip-bot for Stanislaw Gruszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.