* [PATCH v2 1/3] MIPS: add a common mips_cyc2ns()
[not found] ` <cover.1270655886.git.wuzhangjin@gmail.com>
@ 2010-04-07 16:05 ` Wu Zhangin
2010-04-07 16:48 ` David Daney
2010-04-07 16:50 ` David Daney
2010-04-07 16:05 ` [PATCH v2 2/3] MIPS: cavium-octeon: rewrite the sched_clock() based on mips_cyc2ns() Wu Zhangin
2010-04-07 16:05 ` [PATCH v2 3/3] MIPS: r4k: Add a high resolution sched_clock() Wu Zhangin
2 siblings, 2 replies; 11+ messages in thread
From: Wu Zhangin @ 2010-04-07 16:05 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Wu Zhangjin, linux-mips, David Daney, Ralf Rösch
From: Wu Zhangjin <wuzhangjin@gmail.com>
Changes:
v1 -> v2:
o change the old mips_sched_clock() to mips_cyc2ns() and modify the
arguments to support 32bit.
o add 32bit support: use a smaller shift to avoid the quick overflow
of 64bit arithmatic and balance the overhead of the 128bit arithmatic
and the precision lost with the smaller shift.
----------------------
Because the high resolution sched_clock() for r4k has the same overflow
problem and solution mentioned in "MIPS: Octeon: Use non-overflowing
arithmetic in sched_clock".
"With typical mult and shift values, the calculation for Octeon's
sched_clock overflows when using 64-bit arithmetic. Use 128-bit
calculations instead."
To reduce the duplication, This patch abstracts the solution into an
inline funciton mips_cyc2ns() into arch/mips/include/asm/time.h from
arch/mips/cavium-octeon/csrc-octeon.c.
Two patches for Cavium and R4K will be sent out respectively to use this
common function.
Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
---
arch/mips/include/asm/time.h | 38 ++++++++++++++++++++++++++++++++++++++
1 files changed, 38 insertions(+), 0 deletions(-)
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index c7f1bfe..898f0e0 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -96,4 +96,42 @@ static inline void clockevent_set_clock(struct clock_event_device *cd,
clockevents_calc_mult_shift(cd, clock, 4);
}
+static inline unsigned long long mips_cyc2ns(u64 cyc, u64 mult, u64 shift)
+{
+#ifdef CONFIG_32BIT
+ /*
+ * To balance the overhead of 128bit-arithematic and the precision
+ * lost, we choose a smaller shift to avoid the quick overflow as the
+ * X86 & ARM does. please refer to arch/x86/kernel/tsc.c and
+ * arch/arm/plat-orion/time.c
+ */
+ return (cyc * mult) >> shift;
+#else /* CONFIG_64BIT */
+ /* 64-bit arithmatic can overflow, so use 128-bit. */
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 3))
+ u64 t1, t2, t3;
+ unsigned long long rv;
+
+ asm (
+ "dmultu\t%[cyc],%[mult]\n\t"
+ "nor\t%[t1],$0,%[shift]\n\t"
+ "mfhi\t%[t2]\n\t"
+ "mflo\t%[t3]\n\t"
+ "dsll\t%[t2],%[t2],1\n\t"
+ "dsrlv\t%[rv],%[t3],%[shift]\n\t"
+ "dsllv\t%[t1],%[t2],%[t1]\n\t"
+ "or\t%[rv],%[t1],%[rv]\n\t"
+ : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
+ : [cyc] "r" (cyc), [mult] "r" (mult), [shift] "r" (shift)
+ : "hi", "lo");
+ return rv;
+#else /* GCC > 4.3 do it the easy way. */
+ unsigned int __attribute__((mode(TI))) t = cyc;
+
+ t = (t * mult) >> shift;
+ return (unsigned long long)t;
+#endif
+#endif /* CONFIG_64BIT */
+}
+
#endif /* _ASM_TIME_H */
--
1.7.0.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/3] MIPS: add a common mips_cyc2ns()
2010-04-07 16:05 ` [PATCH v2 1/3] MIPS: add a common mips_cyc2ns() Wu Zhangin
@ 2010-04-07 16:48 ` David Daney
2010-04-08 9:36 ` Wu Zhangjin
2010-04-07 16:50 ` David Daney
1 sibling, 1 reply; 11+ messages in thread
From: David Daney @ 2010-04-07 16:48 UTC (permalink / raw)
To: Wu Zhangin; +Cc: Ralf Baechle, linux-mips, Ralf Rösch
On 04/07/2010 09:05 AM, Wu Zhangin wrote:
> From: Wu Zhangjin<wuzhangjin@gmail.com>
>
> Changes:
>
> v1 -> v2:
>
> o change the old mips_sched_clock() to mips_cyc2ns() and modify the
> arguments to support 32bit.
> o add 32bit support: use a smaller shift to avoid the quick overflow
> of 64bit arithmatic and balance the overhead of the 128bit arithmatic
> and the precision lost with the smaller shift.
>
> ----------------------
>
> Because the high resolution sched_clock() for r4k has the same overflow
> problem and solution mentioned in "MIPS: Octeon: Use non-overflowing
> arithmetic in sched_clock".
>
> "With typical mult and shift values, the calculation for Octeon's
> sched_clock overflows when using 64-bit arithmetic. Use 128-bit
> calculations instead."
>
> To reduce the duplication, This patch abstracts the solution into an
> inline funciton mips_cyc2ns() into arch/mips/include/asm/time.h from
> arch/mips/cavium-octeon/csrc-octeon.c.
>
> Two patches for Cavium and R4K will be sent out respectively to use this
> common function.
>
> Signed-off-by: Wu Zhangjin<wuzhangjin@gmail.com>
> ---
> arch/mips/include/asm/time.h | 38 ++++++++++++++++++++++++++++++++++++++
> 1 files changed, 38 insertions(+), 0 deletions(-)
>
> diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
> index c7f1bfe..898f0e0 100644
> --- a/arch/mips/include/asm/time.h
> +++ b/arch/mips/include/asm/time.h
> @@ -96,4 +96,42 @@ static inline void clockevent_set_clock(struct clock_event_device *cd,
> clockevents_calc_mult_shift(cd, clock, 4);
> }
>
> +static inline unsigned long long mips_cyc2ns(u64 cyc, u64 mult, u64 shift)
> +{
> +#ifdef CONFIG_32BIT
> + /*
> + * To balance the overhead of 128bit-arithematic and the precision
> + * lost, we choose a smaller shift to avoid the quick overflow as the
> + * X86& ARM does. please refer to arch/x86/kernel/tsc.c and
> + * arch/arm/plat-orion/time.c
> + */
> + return (cyc * mult)>> shift;
> +#else /* CONFIG_64BIT */
> + /* 64-bit arithmatic can overflow, so use 128-bit. */
> +#if (__GNUC__< 4) || ((__GNUC__ == 4)&& (__GNUC_MINOR__<= 3))
> + u64 t1, t2, t3;
> + unsigned long long rv;
> +
> + asm (
> + "dmultu\t%[cyc],%[mult]\n\t"
> + "nor\t%[t1],$0,%[shift]\n\t"
> + "mfhi\t%[t2]\n\t"
> + "mflo\t%[t3]\n\t"
> + "dsll\t%[t2],%[t2],1\n\t"
> + "dsrlv\t%[rv],%[t3],%[shift]\n\t"
> + "dsllv\t%[t1],%[t2],%[t1]\n\t"
> + "or\t%[rv],%[t1],%[rv]\n\t"
> + : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
> + : [cyc] "r" (cyc), [mult] "r" (mult), [shift] "r" (shift)
> + : "hi", "lo");
> + return rv;
> +#else /* GCC> 4.3 do it the easy way. */
> + unsigned int __attribute__((mode(TI))) t = cyc;
> +
> + t = (t * mult)>> shift;
> + return (unsigned long long)t;
> +#endif
> +#endif /* CONFIG_64BIT */
> +}
> +
> #endif /* _ASM_TIME_H */
It turns out that all GCC versions can handle the inline asm way. It
has also been noted that the default Debian compiler somehow has
problems with the 'easy way'.
Therefore, I would recommend gitting rid of the GCC version conditionals
and just leave the inline asm.
David Daney
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/3] MIPS: add a common mips_cyc2ns()
2010-04-07 16:48 ` David Daney
@ 2010-04-08 9:36 ` Wu Zhangjin
0 siblings, 0 replies; 11+ messages in thread
From: Wu Zhangjin @ 2010-04-08 9:36 UTC (permalink / raw)
To: David Daney; +Cc: Ralf Baechle, linux-mips, Ralf Rösch
On Wed, 2010-04-07 at 09:48 -0700, David Daney wrote:
[...]
> > arch/mips/include/asm/time.h | 38 ++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 38 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
> > index c7f1bfe..898f0e0 100644
> > --- a/arch/mips/include/asm/time.h
> > +++ b/arch/mips/include/asm/time.h
> > @@ -96,4 +96,42 @@ static inline void clockevent_set_clock(struct clock_event_device *cd,
> > clockevents_calc_mult_shift(cd, clock, 4);
> > }
> >
> > +static inline unsigned long long mips_cyc2ns(u64 cyc, u64 mult, u64 shift)
> > +{
> > +#ifdef CONFIG_32BIT
> > + /*
> > + * To balance the overhead of 128bit-arithematic and the precision
> > + * lost, we choose a smaller shift to avoid the quick overflow as the
> > + * X86& ARM does. please refer to arch/x86/kernel/tsc.c and
> > + * arch/arm/plat-orion/time.c
> > + */
> > + return (cyc * mult)>> shift;
> > +#else /* CONFIG_64BIT */
> > + /* 64-bit arithmatic can overflow, so use 128-bit. */
> > +#if (__GNUC__< 4) || ((__GNUC__ == 4)&& (__GNUC_MINOR__<= 3))
> > + u64 t1, t2, t3;
> > + unsigned long long rv;
> > +
> > + asm (
> > + "dmultu\t%[cyc],%[mult]\n\t"
> > + "nor\t%[t1],$0,%[shift]\n\t"
> > + "mfhi\t%[t2]\n\t"
> > + "mflo\t%[t3]\n\t"
> > + "dsll\t%[t2],%[t2],1\n\t"
> > + "dsrlv\t%[rv],%[t3],%[shift]\n\t"
> > + "dsllv\t%[t1],%[t2],%[t1]\n\t"
> > + "or\t%[rv],%[t1],%[rv]\n\t"
> > + : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
> > + : [cyc] "r" (cyc), [mult] "r" (mult), [shift] "r" (shift)
> > + : "hi", "lo");
> > + return rv;
> > +#else /* GCC> 4.3 do it the easy way. */
> > + unsigned int __attribute__((mode(TI))) t = cyc;
> > +
> > + t = (t * mult)>> shift;
> > + return (unsigned long long)t;
> > +#endif
> > +#endif /* CONFIG_64BIT */
> > +}
> > +
> > #endif /* _ASM_TIME_H */
>
> It turns out that all GCC versions can handle the inline asm way. It
> has also been noted that the default Debian compiler somehow has
> problems with the 'easy way'.
>
> Therefore, I would recommend gitting rid of the GCC version conditionals
> and just leave the inline asm.
Ok, will only reserve the asm way in the next revision, thanks!
Regards,
Wu Zhangjin
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 1/3] MIPS: add a common mips_cyc2ns()
2010-04-07 16:05 ` [PATCH v2 1/3] MIPS: add a common mips_cyc2ns() Wu Zhangin
2010-04-07 16:48 ` David Daney
@ 2010-04-07 16:50 ` David Daney
2010-04-08 9:32 ` Wu Zhangjin
1 sibling, 1 reply; 11+ messages in thread
From: David Daney @ 2010-04-07 16:50 UTC (permalink / raw)
To: Wu Zhangin; +Cc: Ralf Baechle, linux-mips, Ralf Rösch
On 04/07/2010 09:05 AM, Wu Zhangin wrote:
> From: Wu Zhangjin<wuzhangjin@gmail.com>
>
> Changes:
>
> v1 -> v2:
>
> o change the old mips_sched_clock() to mips_cyc2ns() and modify the
> arguments to support 32bit.
> o add 32bit support: use a smaller shift to avoid the quick overflow
> of 64bit arithmatic and balance the overhead of the 128bit arithmatic
> and the precision lost with the smaller shift.
>
> ----------------------
>
> Because the high resolution sched_clock() for r4k has the same overflow
> problem and solution mentioned in "MIPS: Octeon: Use non-overflowing
> arithmetic in sched_clock".
>
> "With typical mult and shift values, the calculation for Octeon's
> sched_clock overflows when using 64-bit arithmetic. Use 128-bit
> calculations instead."
>
> To reduce the duplication, This patch abstracts the solution into an
> inline funciton mips_cyc2ns() into arch/mips/include/asm/time.h from
> arch/mips/cavium-octeon/csrc-octeon.c.
>
> Two patches for Cavium and R4K will be sent out respectively to use this
> common function.
>
> Signed-off-by: Wu Zhangjin<wuzhangjin@gmail.com>
> ---
> arch/mips/include/asm/time.h | 38 ++++++++++++++++++++++++++++++++++++++
> 1 files changed, 38 insertions(+), 0 deletions(-)
>
> diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
> index c7f1bfe..898f0e0 100644
> --- a/arch/mips/include/asm/time.h
> +++ b/arch/mips/include/asm/time.h
> @@ -96,4 +96,42 @@ static inline void clockevent_set_clock(struct clock_event_device *cd,
> clockevents_calc_mult_shift(cd, clock, 4);
> }
>
> +static inline unsigned long long mips_cyc2ns(u64 cyc, u64 mult, u64 shift)
> +{
> +#ifdef CONFIG_32BIT
> + /*
> + * To balance the overhead of 128bit-arithematic and the precision
> + * lost, we choose a smaller shift to avoid the quick overflow as the
> + * X86& ARM does. please refer to arch/x86/kernel/tsc.c and
> + * arch/arm/plat-orion/time.c
> + */
> + return (cyc * mult)>> shift;
Have you tested that on a 32-bit kernel? I think it may overflow for
many cases.
David Daney
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/3] MIPS: add a common mips_cyc2ns()
2010-04-07 16:50 ` David Daney
@ 2010-04-08 9:32 ` Wu Zhangjin
2010-04-10 5:46 ` Wu Zhangjin
0 siblings, 1 reply; 11+ messages in thread
From: Wu Zhangjin @ 2010-04-08 9:32 UTC (permalink / raw)
To: David Daney; +Cc: Ralf Baechle, linux-mips, Ralf Rösch
On Wed, 2010-04-07 at 09:50 -0700, David Daney wrote:
[...]
> > +static inline unsigned long long mips_cyc2ns(u64 cyc, u64 mult, u64 shift)
> > +{
> > +#ifdef CONFIG_32BIT
> > + /*
> > + * To balance the overhead of 128bit-arithematic and the precision
> > + * lost, we choose a smaller shift to avoid the quick overflow as the
> > + * X86& ARM does. please refer to arch/x86/kernel/tsc.c and
> > + * arch/arm/plat-orion/time.c
> > + */
> > + return (cyc * mult)>> shift;
>
> Have you tested that on a 32-bit kernel? I think it may overflow for
> many cases.
>
Yes, I have done some basic testing ;)
Since a c0 count with 400MHz clock frequency will overflow after about
more than 1 hour with the scaling factor 10, I think it is enough for
the generic debugging, such as Ftrace, If it is not enough, perhaps we
can choose a smaller scaling factor, such as 8.
The core idea here is to get a smaller mult to let (cyc * mult) not
overflow that quickly but also not get a 'bad' precision, of course, we
can try to implement the 128bit arithmatic in 32bit system, but that may
increase the overhead(not tested it yet, perhaps will be worse than the
original getnstimeofday()).
Regards,
Wu Zhangjin
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/3] MIPS: add a common mips_cyc2ns()
2010-04-08 9:32 ` Wu Zhangjin
@ 2010-04-10 5:46 ` Wu Zhangjin
0 siblings, 0 replies; 11+ messages in thread
From: Wu Zhangjin @ 2010-04-10 5:46 UTC (permalink / raw)
To: David Daney; +Cc: Ralf Baechle, linux-mips, Ralf Rösch
On Thu, 2010-04-08 at 17:32 +0800, Wu Zhangjin wrote:
> On Wed, 2010-04-07 at 09:50 -0700, David Daney wrote:
> [...]
> > > +static inline unsigned long long mips_cyc2ns(u64 cyc, u64 mult, u64 shift)
> > > +{
> > > +#ifdef CONFIG_32BIT
> > > + /*
> > > + * To balance the overhead of 128bit-arithematic and the precision
> > > + * lost, we choose a smaller shift to avoid the quick overflow as the
> > > + * X86& ARM does. please refer to arch/x86/kernel/tsc.c and
> > > + * arch/arm/plat-orion/time.c
> > > + */
> > > + return (cyc * mult)>> shift;
> >
> > Have you tested that on a 32-bit kernel? I think it may overflow for
> > many cases.
> >
>
> Yes, I have done some basic testing ;)
>
> Since a c0 count with 400MHz clock frequency will overflow after about
> more than 1 hour with the scaling factor 10,
Exactly, with 10, it will overflow after counting 2^51, which means it
will overflow at 3127 hours(about 130 days), which is enough.
> I think it is enough for
> the generic debugging, such as Ftrace, If it is not enough, perhaps we
> can choose a smaller scaling factor, such as 8.
With 8, it will overflow after 12510 hours(about 521 days).
So, I will choose 8 in the next revision.
PS: ...
#include <stdio.h>
#define NSEC_PER_SEC 1000000000 /* 10^9 */
#define CLOCK_FREQ 400000000 /* 400 M*/
#define CYC2NS_SHIFT 8
int main(void)
{
unsigned long long mult, v;
unsigned long long ullint_max = ~0;
unsigned long long tmp = 2ULL<<53;
double t_ns;
int t_h, t_d;
v = NSEC_PER_SEC;
v <<= CYC2NS_SHIFT;
v += CLOCK_FREQ/2;
v = v / CLOCK_FREQ;
mult = v;
printf("sizeof(unsigned long long): %d\n", sizeof(unsigned long long));
printf("%lld (max of cycles)\n", ullint_max/mult);
printf("%lld (2^53)\n", tmp);
t_h = (double)tmp / CLOCK_FREQ / 3600;
t_d = t_h / 24;
printf("%d hours, %d days\n", t_h, t_d);
return 0;
}
$ gcc -o clock clock.c
$ $ ./clock
sizeof(unsigned long long): 8
28823037615171174 (max of cycles)
18014398509481984 (2^53)
12509 hours, 521 days
Regards,
Wu Zhangjin
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 2/3] MIPS: cavium-octeon: rewrite the sched_clock() based on mips_cyc2ns()
[not found] ` <cover.1270655886.git.wuzhangjin@gmail.com>
2010-04-07 16:05 ` [PATCH v2 1/3] MIPS: add a common mips_cyc2ns() Wu Zhangin
@ 2010-04-07 16:05 ` Wu Zhangin
2010-07-29 1:24 ` wu zhangjin
2010-04-07 16:05 ` [PATCH v2 3/3] MIPS: r4k: Add a high resolution sched_clock() Wu Zhangin
2 siblings, 1 reply; 11+ messages in thread
From: Wu Zhangin @ 2010-04-07 16:05 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Wu Zhangjin, linux-mips, David Daney, Ralf Rösch
From: Wu Zhangjin <wuzhangjin@gmail.com>
Changes from v1:
o use the new interface mips_cyc2ns() intead of the old
mips_sched_clock().
The commit "MIPS: add a common mips_cyc2ns()" have abstracted the
solution of the 64bit calculation's overflow problem into a common
mips_cyc2ns() function in arch/mips/include/asm/time.h, This patch just
rewrites the sched_clock() for cavium-octeon on it.
Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
---
arch/mips/cavium-octeon/csrc-octeon.c | 29 ++---------------------------
1 files changed, 2 insertions(+), 27 deletions(-)
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 0bf4bbe..bca0004 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -52,34 +52,9 @@ static struct clocksource clocksource_mips = {
unsigned long long notrace sched_clock(void)
{
- /* 64-bit arithmatic can overflow, so use 128-bit. */
-#if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 3))
- u64 t1, t2, t3;
- unsigned long long rv;
- u64 mult = clocksource_mips.mult;
- u64 shift = clocksource_mips.shift;
- u64 cnt = read_c0_cvmcount();
+ u64 cyc = read_c0_cvmcount();
- asm (
- "dmultu\t%[cnt],%[mult]\n\t"
- "nor\t%[t1],$0,%[shift]\n\t"
- "mfhi\t%[t2]\n\t"
- "mflo\t%[t3]\n\t"
- "dsll\t%[t2],%[t2],1\n\t"
- "dsrlv\t%[rv],%[t3],%[shift]\n\t"
- "dsllv\t%[t1],%[t2],%[t1]\n\t"
- "or\t%[rv],%[t1],%[rv]\n\t"
- : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
- : [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
- : "hi", "lo");
- return rv;
-#else
- /* GCC > 4.3 do it the easy way. */
- unsigned int __attribute__((mode(TI))) t;
- t = read_c0_cvmcount();
- t = t * clocksource_mips.mult;
- return (unsigned long long)(t >> clocksource_mips.shift);
-#endif
+ return mips_cyc2ns(cyc, clocksource_mips.mult, clocksource_mips.shift);
}
void __init plat_time_init(void)
--
1.7.0.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 2/3] MIPS: cavium-octeon: rewrite the sched_clock() based on mips_cyc2ns()
@ 2010-07-29 1:24 ` wu zhangjin
0 siblings, 0 replies; 11+ messages in thread
From: wu zhangjin @ 2010-07-29 1:24 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Wu Zhangjin, linux-mips, David Daney, Ralf Rösch
[-- Attachment #1: Type: text/plain, Size: 2683 bytes --]
Hi, Ralf
This may be not applicable for the original version is changed by
[PATCH] OCTEON: workaround linking failures with gcc-4.4.x 32-bits
toolchains
from Florian Fainelli.
Regards,
Wu Zhangjin
On Thu, Apr 8, 2010 at 12:05 AM, Wu Zhangin <wuzhangjin@gmail.com> wrote:
> From: Wu Zhangjin <wuzhangjin@gmail.com>
>
> Changes from v1:
>
> o use the new interface mips_cyc2ns() intead of the old
> mips_sched_clock().
>
> The commit "MIPS: add a common mips_cyc2ns()" have abstracted the
> solution of the 64bit calculation's overflow problem into a common
> mips_cyc2ns() function in arch/mips/include/asm/time.h, This patch just
> rewrites the sched_clock() for cavium-octeon on it.
>
> Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
> ---
> arch/mips/cavium-octeon/csrc-octeon.c | 29 ++---------------------------
> 1 files changed, 2 insertions(+), 27 deletions(-)
>
> diff --git a/arch/mips/cavium-octeon/csrc-octeon.c
> b/arch/mips/cavium-octeon/csrc-octeon.c
> index 0bf4bbe..bca0004 100644
> --- a/arch/mips/cavium-octeon/csrc-octeon.c
> +++ b/arch/mips/cavium-octeon/csrc-octeon.c
> @@ -52,34 +52,9 @@ static struct clocksource clocksource_mips = {
>
> unsigned long long notrace sched_clock(void)
> {
> - /* 64-bit arithmatic can overflow, so use 128-bit. */
> -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 3))
> - u64 t1, t2, t3;
> - unsigned long long rv;
> - u64 mult = clocksource_mips.mult;
> - u64 shift = clocksource_mips.shift;
> - u64 cnt = read_c0_cvmcount();
> + u64 cyc = read_c0_cvmcount();
>
> - asm (
> - "dmultu\t%[cnt],%[mult]\n\t"
> - "nor\t%[t1],$0,%[shift]\n\t"
> - "mfhi\t%[t2]\n\t"
> - "mflo\t%[t3]\n\t"
> - "dsll\t%[t2],%[t2],1\n\t"
> - "dsrlv\t%[rv],%[t3],%[shift]\n\t"
> - "dsllv\t%[t1],%[t2],%[t1]\n\t"
> - "or\t%[rv],%[t1],%[rv]\n\t"
> - : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3]
> "=&r" (t3)
> - : [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
> - : "hi", "lo");
> - return rv;
> -#else
> - /* GCC > 4.3 do it the easy way. */
> - unsigned int __attribute__((mode(TI))) t;
> - t = read_c0_cvmcount();
> - t = t * clocksource_mips.mult;
> - return (unsigned long long)(t >> clocksource_mips.shift);
> -#endif
> + return mips_cyc2ns(cyc, clocksource_mips.mult,
> clocksource_mips.shift);
> }
>
> void __init plat_time_init(void)
> --
> 1.7.0.1
>
>
--
MSN+Gtalk: wuzhangjin@gmail.com
Blog: http://falcon.oss.lzu.edu.cn
Tel:+86-18710032278
[-- Attachment #2: Type: text/html, Size: 3647 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 2/3] MIPS: cavium-octeon: rewrite the sched_clock() based on mips_cyc2ns()
@ 2010-07-29 1:24 ` wu zhangjin
0 siblings, 0 replies; 11+ messages in thread
From: wu zhangjin @ 2010-07-29 1:24 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Wu Zhangjin, linux-mips, David Daney, Ralf Rösch
[-- Attachment #1: Type: text/plain, Size: 2683 bytes --]
Hi, Ralf
This may be not applicable for the original version is changed by
[PATCH] OCTEON: workaround linking failures with gcc-4.4.x 32-bits
toolchains
from Florian Fainelli.
Regards,
Wu Zhangjin
On Thu, Apr 8, 2010 at 12:05 AM, Wu Zhangin <wuzhangjin@gmail.com> wrote:
> From: Wu Zhangjin <wuzhangjin@gmail.com>
>
> Changes from v1:
>
> o use the new interface mips_cyc2ns() intead of the old
> mips_sched_clock().
>
> The commit "MIPS: add a common mips_cyc2ns()" have abstracted the
> solution of the 64bit calculation's overflow problem into a common
> mips_cyc2ns() function in arch/mips/include/asm/time.h, This patch just
> rewrites the sched_clock() for cavium-octeon on it.
>
> Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
> ---
> arch/mips/cavium-octeon/csrc-octeon.c | 29 ++---------------------------
> 1 files changed, 2 insertions(+), 27 deletions(-)
>
> diff --git a/arch/mips/cavium-octeon/csrc-octeon.c
> b/arch/mips/cavium-octeon/csrc-octeon.c
> index 0bf4bbe..bca0004 100644
> --- a/arch/mips/cavium-octeon/csrc-octeon.c
> +++ b/arch/mips/cavium-octeon/csrc-octeon.c
> @@ -52,34 +52,9 @@ static struct clocksource clocksource_mips = {
>
> unsigned long long notrace sched_clock(void)
> {
> - /* 64-bit arithmatic can overflow, so use 128-bit. */
> -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 3))
> - u64 t1, t2, t3;
> - unsigned long long rv;
> - u64 mult = clocksource_mips.mult;
> - u64 shift = clocksource_mips.shift;
> - u64 cnt = read_c0_cvmcount();
> + u64 cyc = read_c0_cvmcount();
>
> - asm (
> - "dmultu\t%[cnt],%[mult]\n\t"
> - "nor\t%[t1],$0,%[shift]\n\t"
> - "mfhi\t%[t2]\n\t"
> - "mflo\t%[t3]\n\t"
> - "dsll\t%[t2],%[t2],1\n\t"
> - "dsrlv\t%[rv],%[t3],%[shift]\n\t"
> - "dsllv\t%[t1],%[t2],%[t1]\n\t"
> - "or\t%[rv],%[t1],%[rv]\n\t"
> - : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3]
> "=&r" (t3)
> - : [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
> - : "hi", "lo");
> - return rv;
> -#else
> - /* GCC > 4.3 do it the easy way. */
> - unsigned int __attribute__((mode(TI))) t;
> - t = read_c0_cvmcount();
> - t = t * clocksource_mips.mult;
> - return (unsigned long long)(t >> clocksource_mips.shift);
> -#endif
> + return mips_cyc2ns(cyc, clocksource_mips.mult,
> clocksource_mips.shift);
> }
>
> void __init plat_time_init(void)
> --
> 1.7.0.1
>
>
--
MSN+Gtalk: wuzhangjin@gmail.com
Blog: http://falcon.oss.lzu.edu.cn
Tel:+86-18710032278
[-- Attachment #2: Type: text/html, Size: 3647 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 3/3] MIPS: r4k: Add a high resolution sched_clock()
[not found] ` <cover.1270655886.git.wuzhangjin@gmail.com>
2010-04-07 16:05 ` [PATCH v2 1/3] MIPS: add a common mips_cyc2ns() Wu Zhangin
2010-04-07 16:05 ` [PATCH v2 2/3] MIPS: cavium-octeon: rewrite the sched_clock() based on mips_cyc2ns() Wu Zhangin
@ 2010-04-07 16:05 ` Wu Zhangin
2 siblings, 0 replies; 11+ messages in thread
From: Wu Zhangin @ 2010-04-07 16:05 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Wu Zhangjin, linux-mips, David Daney, Ralf Rösch
From: Wu Zhangjin <wuzhangjin@gmail.com>
(v9 -> v10:
o use the new interface mips_cyc2ns() instead of the old
mips_sched_clock()
o adds 32bit support via using a smaller shift to balance the overhead
of 128bit arithmatic and the precision lost. please refer to the method
used in X86 & ARM platforms, arch/x86/kernel/tsc.c,
arch/arm/plat-orion/time.c.
v8 -> v9:
O Make it depends on 64BIT for the current mips_cyc2ns() only
support 64bit currently.
v7 -> v8:
O Make it works with the exisiting clocksource_mips.mult,
clocksource_mips.shift and copes with the 64bit calculation's overflow
problem with the method introduced by David Daney in "MIPS: Octeon: Use
non-overflowing arithmetic in sched_clock".
To reduce the duplication, I have abstracted an inline
mips_cyc2ns() function to arch/mips/include/asm/time.h from
arch/mips/cavium-octeon/csrc-octeon.c.
v6 -> v7:
O Make it depends on !CPU_FREQ and CPU_HAS_FIXED_C0_COUNT
This sched_clock() is only available with the processor has fixed cp0
MIPS count register or even has dynamic cp0 MIPS count register but
with CPU_FREQ disabled.
NOTE: If your processor has fixed c0 count, please select
CPU_HAS_FIXED_C0_COUNT for it and send a related patch to Ralf.
v5 -> v6:
o hard-codes the cycle2ns_scale_factor as 8 for 30(cs->shift) is too
big. With 30, the return value of sched_clock() will also overflow quickly.
o moves the sched_clock() back into csrc-r4k.c as David and Sergei
recommended.
o inits c0 count as zero for PRINTK_TIME=y.
o drops the HR_SCHED_CLCOK option for the current sched_clock() is stable
enough to replace the jiffies based one.
)
This patch adds a cnt32_to_63() and MIPS c0 count based sched_clock(),
which provides high resolution.
Without it, the Ftrace for MIPS will give useless timestamp information.
Because cnt32_to_63() needs to be called at least once per half period
to work properly, Differ from the old version, this v2 revision set up a
kernel timer to ensure the requirement of some MIPSs which have short c0
count period.
And also, we init the c0 count as ZERO(just as jiffies does) in
time_init() before plat_time_init(), without it, PRINTK_TIME=y will get
wrong timestamp information. (NOTE: some platforms have initiazlied c0
count as zero, but some not, this may introduce some duplication,
perhaps a new patch is needed to remove the initialized of c0 count in
the platforms later?)
This is originally from arch/arm/plat-orion/time.c
This revision works well for function graph tracer now, and also,
PRINTK_TIME=y will get normal timestamp informatin.
Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
---
arch/mips/Kconfig | 12 +++++++
arch/mips/kernel/csrc-r4k.c | 75 +++++++++++++++++++++++++++++++++++++++++++
arch/mips/kernel/time.c | 5 +++
3 files changed, 92 insertions(+), 0 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f2ead53..b302838 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1962,6 +1962,18 @@ config NR_CPUS
source "kernel/time/Kconfig"
#
+# High Resolution sched_clock() support
+#
+
+config CPU_HAS_FIXED_C0_COUNT
+ bool
+
+config CPU_SUPPORTS_HR_SCHED_CLOCK
+ bool
+ depends on CPU_HAS_FIXED_C0_COUNT || !CPU_FREQ
+ default y
+
+#
# Timer Interrupt Frequency Configuration
#
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index e95a3cd..fa4d763 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -6,7 +6,9 @@
* Copyright (C) 2007 by Ralf Baechle
*/
#include <linux/clocksource.h>
+#include <linux/cnt32_to_63.h>
#include <linux/init.h>
+#include <linux/timer.h>
#include <asm/time.h>
@@ -22,6 +24,77 @@ static struct clocksource clocksource_mips = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+#ifdef CONFIG_CPU_SUPPORTS_HR_SCHED_CLOCK
+/*
+ * MIPS sched_clock implementation.
+ *
+ * Because the hardware timer period is quite short and because cnt32_to_63()
+ * needs to be called at least once per half period to work properly, a kernel
+ * timer is set up to ensure this requirement is always met.
+ *
+ * Please refer to include/linux/cnt32_to_63.h, arch/arm/plat-orion/time.c and
+ * arch/mips/include/asm/time.h (mips_cyc2ns)
+ */
+
+#define CYC2NS_SHIFT 10
+static u64 mult __read_mostly;
+static u64 shift __read_mostly;
+
+unsigned long long notrace sched_clock(void)
+{
+ u64 cyc = cnt32_to_63(read_c0_count());
+
+#ifdef CONFIG_64BIT
+ /* For we have used 128bit arithmatic to cope with the overflow
+ * problem, the method to clear the top bit with an event value doesn't
+ * work now, therefore, clear it at run-time is needed.
+ */
+ if (cyc & 0x8000000000000000)
+ cyc &= 0x7fffffffffffffff;
+#endif
+ return mips_cyc2ns(cyc, mult, shift);
+}
+
+static struct timer_list cnt32_to_63_keepwarm_timer;
+
+static void cnt32_to_63_keepwarm(unsigned long data)
+{
+ mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
+ sched_clock();
+}
+#endif
+
+static inline void setup_hres_sched_clock(unsigned long clock)
+{
+#ifdef CONFIG_CPU_SUPPORTS_HR_SCHED_CLOCK
+ unsigned long data;
+
+#ifdef CONFIG_32BIT
+ unsigned long long v;
+ v = NSEC_PER_SEC;
+ v <<= CYC2NS_SHIFT;
+ v += clock/2;
+ do_div(v, clock);
+ mult = v;
+ shift = CYC2NS_SHIFT;
+ /*
+ * We want an even value to automatically clear the top bit
+ * returned by cnt32_to_63() without an additional run time
+ * instruction. So if the LSB is 1 then round it up.
+ */
+ if (mult & 1)
+ mult++;
+#else
+ mult = clocksource_mips.mult;
+ shift = clocksource_mips.shift;
+#endif
+
+ data = 0x80000000UL / clock * HZ;
+ setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
+ mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
+#endif
+}
+
int __init init_r4k_clocksource(void)
{
if (!cpu_has_counter || !mips_hpt_frequency)
@@ -32,6 +105,8 @@ int __init init_r4k_clocksource(void)
clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
+ setup_hres_sched_clock(mips_hpt_frequency);
+
clocksource_register(&clocksource_mips);
return 0;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index fb74974..86cf18a 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -119,6 +119,11 @@ static __init int cpu_has_mfc0_count_bug(void)
void __init time_init(void)
{
+#ifdef CONFIG_CPU_SUPPORTS_HR_SCHED_CLOCK
+ if (!mips_clockevent_init() || !cpu_has_mfc0_count_bug())
+ write_c0_count(0);
+#endif
+
plat_time_init();
if (!mips_clockevent_init() || !cpu_has_mfc0_count_bug())
--
1.7.0.1
^ permalink raw reply related [flat|nested] 11+ messages in thread