* [U-Boot] oamp3: bug in clock.c with gcc 4.5.1?
@ 2010-12-17 14:01 Alexander Holler
2010-12-17 14:20 ` Wolfgang Denk
0 siblings, 1 reply; 3+ messages in thread
From: Alexander Holler @ 2010-12-17 14:01 UTC (permalink / raw)
To: u-boot
Hello,
I think I've nailed down, why an u-boot compiled with gcc 4.5.1 fails
here. Compiling arch/arm/cpu/armv7/omap3/clock.c with
gcc -g -Os -fno-common -ffixed-r8 -msoft-float -D__KERNEL__
-DCONFIG_SYS_TEXT_BASE=0x80008000 -I/usr/src/u-boot/include -fno-builtin
-ffreestanding -nostdinc -isystem
/usr/lib/gcc/armv7a-unknown-linux-gnueabi/4.5.1/include -pipe
-DCONFIG_ARM -D__ARM__ -marm -mabi=aapcs-linux -mno-thumb-interwork
-march=armv5 -Wall -Wstrict-prototypes -fno-stack-protector -gstabs+
-Wa,-ahldn -c clock.c >clock.s
the following code will be build for get_osc_clk_speed():
--------------
37:clock.c ****
/******************************************************************************
38:clock.c **** * get_sys_clk_speed() - determine reference
oscillator speed
39:clock.c **** * based on known 32kHz
clock and gptimer.
40:clock.c ****
*****************************************************************************/
41:clock.c **** u32 get_osc_clk_speed(void)
42:clock.c **** {
277 .LM0:
278 .LFBB1:
279
280
281
43:clock.c **** u32 start, cstart, cend, cdiff, cdiv, val;
44:clock.c **** struct prcm *prcm_base = (struct prcm
*)PRCM_BASE;
45:clock.c **** struct prm *prm_base = (struct prm
*)PRM_BASE;
46:clock.c **** struct gptimer *gpt1_base = (struct
gptimer *)OMAP34XX_GPT1;
47:clock.c **** struct s32ktimer *s32k_base = (struct
s32ktimer *)SYNC_32KTIMER_BASE;
48:clock.c ****
49:clock.c **** val = readl(&prm_base->clksrc_ctrl);
50:clock.c ****
51:clock.c **** if (val & SYSCLKDIV_2)
52:clock.c **** cdiv = 2;
53:clock.c **** else
54:clock.c **** cdiv = 1;
55:clock.c ****
56:clock.c **** /* enable timer2 */
57:clock.c **** val = readl(&prcm_base->clksel_wkup) |
CLKSEL_GPT1;
283 .LM1:
284 0000 70309FE5 ldr r3,.L7
285 0004 002093E5 ldr r2,[r3,#0]
286 0008 012082E3 orr r2,r2,#1
58:clock.c ****
59:clock.c **** /* select sys_clk for GPT1 */
60:clock.c **** writel(val, &prcm_base->clksel_wkup);
288 .LM2:
289 000c 002083E5 str r2,[r3,#0]
61:clock.c ****
62:clock.c **** /* Enable I and F Clocks for GPT1 */
63:clock.c **** val = readl(&prcm_base->iclken_wkup) |
EN_GPT1 | EN_32KSYNC;
291 .LM3:
292 0010 303043E2 sub r3,r3,#48
293 0014 002093E5 ldr r2,[r3,#0]
294 0018 052082E3 orr r2,r2,#5
64:clock.c **** writel(val, &prcm_base->iclken_wkup);
296 .LM4:
297 001c 002083E5 str r2,[r3,#0]
65:clock.c ****
66:clock.c **** val = readl(&prcm_base->fclken_wkup) |
EN_GPT1;
299 .LM5:
300 0020 103043E2 sub r3,r3,#16
301 0024 002093E5 ldr r2,[r3,#0]
302 0028 012082E3 orr r2,r2,#1
67:clock.c **** writel(val, &prcm_base->fclken_wkup);
304 .LM6:
305 002c 002083E5 str r2,[r3,#0]
68:clock.c ****
69:clock.c **** writel(0, &gpt1_base->tldr);
/* start counting at 0 */
307 .LM7:
308 0030 44309FE5 ldr r3,.L7+4
309 0034 0020A0E3 mov r2,#0
310 0038 002083E5 str r2,[r3,#0]
70:clock.c **** writel(GPT_EN, &gpt1_base->tclr);
/* enable clock */
312 .LM8:
313 003c 032082E2 add r2,r2,#3
314 0040 083043E2 sub r3,r3,#8
315 0044 002083E5 str r2,[r3,#0]
71:clock.c ****
72:clock.c **** /* enable 32kHz source, determine
sys_clk via gauging */
73:clock.c ****
74:clock.c **** /* start time in 20 cycles */
75:clock.c **** start = 20 + readl(&s32k_base->s32k_cr);
317 .LM9:
318 0048 30309FE5 ldr r3,.L7+8
319 004c 002093E5 ldr r2,[r3,#0]
320 0050 142082E2 add r2,r2,#20
321 .L2:
76:clock.c ****
77:clock.c **** /* dead loop till start time */
78:clock.c **** while (readl(&s32k_base->s32k_cr) < start);
323 .LM10:
324 0054 001093E5 ldr r1,[r3,#0]
325 0058 020051E1 cmp r1,r2
326 005c FCFFFF3A bcc .L2
79:clock.c ****
80:clock.c **** /* get start sys_clk count */
81:clock.c **** cstart = readl(&gpt1_base->tcrr);
82:clock.c ****
83:clock.c **** /* wait for 40 cycles */
84:clock.c **** while (readl(&s32k_base->s32k_cr) <
(start + 20)) ;
328 .LM11:
329 0060 142082E2 add r2,r2,#20
330 .L4:
331 0064 001093E5 ldr r1,[r3,#0]
332 0068 020051E1 cmp r1,r2
333 006c FCFFFF3A bcc .L4
85:clock.c **** cend = readl(&gpt1_base->tcrr);
/* get end sys_clk count */
86:clock.c **** cdiff = cend - cstart;
/* get elapsed ticks */
87:clock.c **** cdiff *= cdiv;
88:clock.c ****
89:clock.c **** /* based on number of ticks assign speed */
90:clock.c **** if (cdiff > 19000)
91:clock.c **** return S38_4M;
92:clock.c **** else if (cdiff > 15200)
93:clock.c **** return S26M;
94:clock.c **** else if (cdiff > 13000)
95:clock.c **** return S24M;
96:clock.c **** else if (cdiff > 9000)
97:clock.c **** return S19_2M;
98:clock.c **** else if (cdiff > 7600)
99:clock.c **** return S13M;
100:clock.c **** else
101:clock.c **** return S12M;
102:clock.c **** }
335 .LM12:
336 0070 0C009FE5 ldr r0,.L7+12
337 0074 1EFF2FE1 bx lr
338 .L8:
339 .align 2
340 .L7:
341 0078 404C0048 .word 1207979072
342 007c 2C803148 .word 1211203628
343 0080 10003248 .word 1211236368
344 0084 001BB700 .word 12000000
349 .Lscope1:
351 .stabd 78,0,0
352 .align 2
356 .global get_sys_clkin_sel
358 get_sys_clkin_sel:
359 .stabd 46,0,0
103:clock.c ****
104:clock.c ****
/******************************************************************************
105:clock.c **** * get_sys_clkin_sel() - returns the
sys_clkin_sel field value based on
106:clock.c **** * input oscillator clock
frequency.
107:clock.c ****
*****************************************************************************/
108:clock.c **** void get_sys_clkin_sel(u32 osc_clk, u32
*sys_clkin_sel)
--------------
That means get_sys_clk_speed() will allways return S12M, at least that
is what I'm reading here. And I don't see why gcc should be allowed to
optimize that cdiff to a fixed value and therefor returning only S12M.
Any comments on that?
Regards,
Alexander
^ permalink raw reply [flat|nested] 3+ messages in thread
* [U-Boot] oamp3: bug in clock.c with gcc 4.5.1?
2010-12-17 14:01 [U-Boot] oamp3: bug in clock.c with gcc 4.5.1? Alexander Holler
@ 2010-12-17 14:20 ` Wolfgang Denk
2010-12-17 20:33 ` Alexander Holler
0 siblings, 1 reply; 3+ messages in thread
From: Wolfgang Denk @ 2010-12-17 14:20 UTC (permalink / raw)
To: u-boot
Dear Alexander,
in message <4D0B6D1F.8010304@ahsoftware.de> you wrote:
>
> I think I've nailed down, why an u-boot compiled with gcc 4.5.1 fails
> here. Compiling arch/arm/cpu/armv7/omap3/clock.c with
> ...
> That means get_sys_clk_speed() will allways return S12M, at least that
> is what I'm reading here. And I don't see why gcc should be allowed to
> optimize that cdiff to a fixed value and therefor returning only S12M.
Well, if you look at the code after preprocessing it looks like this:
struct gptimer *gpt1_base = (struct gptimer *)0x48318000;
...
cstart = (*(volatile unsigned int *)(&gpt1_base->tcrr));
...
cend = (*(volatile unsigned int *)(&gpt1_base->tcrr));
cdiff = cend - cstart;
Eventually that simple definition of readl() is no longer good enough
for recent compilers. There is probably a very good reason that Linux
uses a "__iormb();" memory barrier in the definition of readl() and
similar macros.
We should probably update "arch/arm/include/asm/io.h" ...
But then, I'm not an expert for ARM.
Best regards,
Wolfgang Denk
--
DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd at denx.de
... Jesus cried with a loud voice: Lazarus, come forth; the bug hath
been found and thy program runneth. And he that was dead came
forth... -- John 11:43-44 [version 2.0?]
^ permalink raw reply [flat|nested] 3+ messages in thread
* [U-Boot] oamp3: bug in clock.c with gcc 4.5.1?
2010-12-17 14:20 ` Wolfgang Denk
@ 2010-12-17 20:33 ` Alexander Holler
0 siblings, 0 replies; 3+ messages in thread
From: Alexander Holler @ 2010-12-17 20:33 UTC (permalink / raw)
To: u-boot
Hello,
Am 17.12.2010 15:20, schrieb Wolfgang Denk:
>> I think I've nailed down, why an u-boot compiled with gcc 4.5.1 fails
>> here. Compiling arch/arm/cpu/armv7/omap3/clock.c with
>> ...
>> That means get_sys_clk_speed() will allways return S12M, at least that
>> is what I'm reading here. And I don't see why gcc should be allowed to
>> optimize that cdiff to a fixed value and therefor returning only S12M.
>
> Well, if you look at the code after preprocessing it looks like this:
>
> struct gptimer *gpt1_base = (struct gptimer *)0x48318000;
> ...
> cstart = (*(volatile unsigned int *)(&gpt1_base->tcrr));
> ...
> cend = (*(volatile unsigned int *)(&gpt1_base->tcrr));
> cdiff = cend - cstart;
>
>
> Eventually that simple definition of readl() is no longer good enough
> for recent compilers. There is probably a very good reason that Linux
> uses a "__iormb();" memory barrier in the definition of readl() and
> similar macros.
>
> We should probably update "arch/arm/include/asm/io.h" ...
I was unsure if that volatile is enough, therefor I've asked here.
Looking at the kernel would have been my next step, but I thought it
would be good to inform other ARM-users here early. In things belonging
to lowlevel ARM I'm no expert too.
Anyway a memory barrier seems to be a very good idea in readl, even when
ignoring the volatile should be considered as a bug of gcc.
Linux has a paper on that topic in
Documentation/volatile-considered-harmful.txt.
I will modify readl here, check what happens, and will post a patch if
that fixes the problem.
Regards,
Alexander
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2010-12-17 20:33 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-17 14:01 [U-Boot] oamp3: bug in clock.c with gcc 4.5.1? Alexander Holler
2010-12-17 14:20 ` Wolfgang Denk
2010-12-17 20:33 ` Alexander Holler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox