public inbox for linux-omap@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [RFC] dmtimer library is very inefficient today
       [not found]   ` <20080307071454.GC7635@atomide.com>
@ 2008-03-15  0:24     ` Woodruff, Richard
  2008-03-18 18:33       ` Kevin Hilman
  0 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-15  0:24 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: linux-omap

Hi,
  
> Yeah, I agree. We should be able to optimize at least the GPT0 writes
> into just few instructions as it's done for every timer interrupt.
> >
> > One side note is the code only enables the wake up function for
GPT1.
> > This seems wrong also.  You almost always want all devices which
should
> > be able to generate an interrupt to generate a wake up event.  If
you
> > don't do this you might wait for the dynamic tick interval before
> > recognizing an event when you are in interconnect clock stop.  You
do
> > have to clear status else you will miss chip retention, but this is
well
> > worth the trivial extra work in power savings.
> 
> Sure, that makes sense. Got any patches around?

Here is a patch to look at it.  Seems to work for me.  It adds the use
of posting for the timer.  Previously, every write could lock the
requestor for almost 3x32KHz cycles.  This now only synchronizes before
writes and reads instead of after them and it does it on a register per
register basis.  Doing it this way there is some chance to hide some of
the sync latency.  It also removes some needless reads when non-posted
mode is there.

The code probably would be a little smaller if a wpost[256] were used.
Probably the structure comes in on the cache line read so it is not like
its adding so much with the ARM running in the hundreds of MHz range. 

Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 302ad8d..c635180 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -67,6 +67,41 @@
 #define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload
enable */
 #define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer
*/
 
+/* Write posting bit shift table
+ * - Values are register shift for TWPS status bits
+ * - Use of this avoids 32KHz Synchronization penalties. The cost is
almost 
+ *   a 3x32KHz stall.  This is compared to a 166MHz posted access.
+ * - A value of '15' is a shift which always returns 0
+ *   for registers which are not postable
+ *
+ * Note: Faster code can be generated if a larger array is used.
+ */
+#define PROW	6
+#define PCOL	4
+#define WPINDEX(off)	(wpost[off >> 4][(off & 0xf) >> 2])
+
+#if defined(CONFIG_ARCH_OMAP1) || defined(CONFIG_OMAP2)
+static unsigned char wpost[PROW][PCOL] = {
+			/* 0, 4, 8, c */
+/* 00 */	{15, 15, 15, 15},
+/* 10 */	{15, 15, 15, 15},
+/* 20 */	{15,  0,  1,  2},
+/* 30 */	{ 3, 15,  4, 15},
+/* 40 */	{15, 15, 15, 15},
+/* 50 */	{15, 15, 15, 15},
+};
+#else /* OMAP3 */
+static unsigned char wpost[PROW][PCOL] = {
+			/* 0, 4, 8, c */
+/* 00 */	{15, 15, 15, 15},
+/* 10 */	{15, 15, 15, 15},
+/* 20 */	{15,  0,  1,  2},
+/* 30 */	{ 3, 15,  4, 15},
+/* 40 */	{15, 15,  5, 06},
+/* 50 */	{ 7,  8,  9, 15},
+};
+#endif
+
 struct omap_dm_timer {
 	unsigned long phys_base;
 	int irq;
@@ -76,6 +111,7 @@ struct omap_dm_timer {
 	void __iomem *io_base;
 	unsigned reserved:1;
 	unsigned enabled:1;
+	unsigned posted:1;
 };
 
 #ifdef CONFIG_ARCH_OMAP1
@@ -181,16 +217,23 @@ static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
 
-static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer,
int reg)
+u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
 {
+	/* A read of a non completed write will be a read error */
+	if(timer->posted) 
+		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG)
& 
+			   (1 << WPINDEX(reg)));
 	return readl(timer->io_base + reg);
 }
 
-static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int
reg, u32 value)
+static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int
reg, 
+
u32 value)
 {
+	/* A write on a register which has a pending write will be
thrown away */
+	if(timer->posted) 
+		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG)
& 
+			   (1 << WPINDEX(reg)));
 	writel(value, timer->io_base + reg);
-	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
-		;
 }
 
 static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
@@ -217,15 +260,16 @@ static void omap_dm_timer_reset(struct
omap_dm_timer *timer)
 	}
 	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
-	/* Set to smart-idle mode */
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
-	l |= 0x02 << 3;
+	l |= 0x02 << 3;  /* Set to smart-idle mode */
+	l |= 0x2 << 8;   /* Set clock activity to preserve f-clock on
idle */
 
-	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
-		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
+	if (cpu_class_is_omap2() && (timer == &dm_timers[0])) {
+		/* Enable wake-up only for GPT1 on OMAP2 CPUs */
+		/* FIXME: All should have this enabled and have PRCM
status clear*/
 		l |= 1 << 2;
-		/* Non-posted mode */
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
0);
+		/* Ensure posted mode */
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
(1 << 2));
 	}
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
 }
@@ -434,6 +478,8 @@ void omap_dm_timer_set_load(struct omap_dm_timer
*timer, int autoreload,
 		l &= ~OMAP_TIMER_CTRL_AR;
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+	/* ? hw-bug ttgr overtaking tldr*/
+	while(readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG));
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 
@@ -568,6 +614,7 @@ int __init omap_dm_timer_init(void)
 	for (i = 0; i < dm_timer_count; i++) {
 		timer = &dm_timers[i];
 		timer->io_base = (void __iomem
*)io_p2v(timer->phys_base);
+		timer->posted = 1;  /* post by default */
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 		if (cpu_class_is_omap2()) {
 			char clk_name[16];


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] [RFC] dmtimer library is very inefficient today
  2008-03-15  0:24     ` [PATCH] [RFC] dmtimer library is very inefficient today Woodruff, Richard
@ 2008-03-18 18:33       ` Kevin Hilman
  2008-03-18 18:51         ` Woodruff, Richard
                           ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Kevin Hilman @ 2008-03-18 18:33 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Tony Lindgren, linux-omap

"Woodruff, Richard" <r-woodruff2@ti.com> writes:

> Here is a patch to look at it.  Seems to work for me.  It adds the use
> of posting for the timer.  Previously, every write could lock the
> requestor for almost 3x32KHz cycles.  This now only synchronizes before
> writes and reads instead of after them and it does it on a register per
> register basis.  Doing it this way there is some chance to hide some of
> the sync latency.  It also removes some needless reads when non-posted
> mode is there.
>
> The code probably would be a little smaller if a wpost[256] were used.
> Probably the structure comes in on the cache line read so it is not like
> its adding so much with the ARM running in the hundreds of MHz range. 
>
> Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>

I confirm that this indeed works and noticably reduces latency in
proramming the timers.  I'm running it along with the latency tracer
which is part of the -rt patch, and have verified that this is no
longer as noticable of a source of latency.

WRT the code, any reason you removed the 'static' and 'inline' from
the read function?

Also, a few CodingStyle issues should be cleaned up:

like:

-  if(timer->posted) 
+  if (timer->posted) 

and upstream folks tend dislike the polling loops like this:

   while(condition);

and prefer the semicolon on its own line to be clear that the loop is
indeed doing nothing.

   while(condition)
           ;

> diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
> index 302ad8d..c635180 100644
> --- a/arch/arm/plat-omap/dmtimer.c
> +++ b/arch/arm/plat-omap/dmtimer.c
> @@ -67,6 +67,41 @@
>  #define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload
> enable */
>  #define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer
> */
>  
> +/* Write posting bit shift table
> + * - Values are register shift for TWPS status bits
> + * - Use of this avoids 32KHz Synchronization penalties. The cost is
> almost 
> + *   a 3x32KHz stall.  This is compared to a 166MHz posted access.
> + * - A value of '15' is a shift which always returns 0
> + *   for registers which are not postable
> + *
> + * Note: Faster code can be generated if a larger array is used.
> + */
> +#define PROW	6
> +#define PCOL	4
> +#define WPINDEX(off)	(wpost[off >> 4][(off & 0xf) >> 2])
> +
> +#if defined(CONFIG_ARCH_OMAP1) || defined(CONFIG_OMAP2)
> +static unsigned char wpost[PROW][PCOL] = {
> +			/* 0, 4, 8, c */
> +/* 00 */	{15, 15, 15, 15},
> +/* 10 */	{15, 15, 15, 15},
> +/* 20 */	{15,  0,  1,  2},
> +/* 30 */	{ 3, 15,  4, 15},
> +/* 40 */	{15, 15, 15, 15},
> +/* 50 */	{15, 15, 15, 15},
> +};
> +#else /* OMAP3 */
> +static unsigned char wpost[PROW][PCOL] = {
> +			/* 0, 4, 8, c */
> +/* 00 */	{15, 15, 15, 15},
> +/* 10 */	{15, 15, 15, 15},
> +/* 20 */	{15,  0,  1,  2},
> +/* 30 */	{ 3, 15,  4, 15},
> +/* 40 */	{15, 15,  5, 06},
> +/* 50 */	{ 7,  8,  9, 15},
> +};
> +#endif
> +
>  struct omap_dm_timer {
>  	unsigned long phys_base;
>  	int irq;
> @@ -76,6 +111,7 @@ struct omap_dm_timer {
>  	void __iomem *io_base;
>  	unsigned reserved:1;
>  	unsigned enabled:1;
> +	unsigned posted:1;
>  };
>  
>  #ifdef CONFIG_ARCH_OMAP1
> @@ -181,16 +217,23 @@ static struct clk **dm_source_clocks;
>  
>  static spinlock_t dm_timer_lock;
>  
> -static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer,
> int reg)
> +u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
>  {
> +	/* A read of a non completed write will be a read error */
> +	if(timer->posted) 
> +		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG)
> & 
> +			   (1 << WPINDEX(reg)));
>  	return readl(timer->io_base + reg);
>  }
>  
> -static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int
> reg, u32 value)
> +static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int
> reg, 
> +
> u32 value)
>  {
> +	/* A write on a register which has a pending write will be
> thrown away */
> +	if(timer->posted) 
> +		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG)
> & 
> +			   (1 << WPINDEX(reg)));
>  	writel(value, timer->io_base + reg);
> -	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
> -		;
>  }
>  
>  static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
> @@ -217,15 +260,16 @@ static void omap_dm_timer_reset(struct
> omap_dm_timer *timer)
>  	}
>  	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
>  
> -	/* Set to smart-idle mode */
>  	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
> -	l |= 0x02 << 3;
> +	l |= 0x02 << 3;  /* Set to smart-idle mode */
> +	l |= 0x2 << 8;   /* Set clock activity to preserve f-clock on
> idle */
>  
> -	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
> -		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
> +	if (cpu_class_is_omap2() && (timer == &dm_timers[0])) {
> +		/* Enable wake-up only for GPT1 on OMAP2 CPUs */
> +		/* FIXME: All should have this enabled and have PRCM
> status clear*/
>  		l |= 1 << 2;
> -		/* Non-posted mode */
> -		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
> 0);
> +		/* Ensure posted mode */
> +		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
> (1 << 2));
>  	}
>  	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
>  }
> @@ -434,6 +478,8 @@ void omap_dm_timer_set_load(struct omap_dm_timer
> *timer, int autoreload,
>  		l &= ~OMAP_TIMER_CTRL_AR;
>  	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
>  	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
> +	/* ? hw-bug ttgr overtaking tldr*/
> +	while(readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG));
>  	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
>  }
>  
> @@ -568,6 +614,7 @@ int __init omap_dm_timer_init(void)
>  	for (i = 0; i < dm_timer_count; i++) {
>  		timer = &dm_timers[i];
>  		timer->io_base = (void __iomem
> *)io_p2v(timer->phys_base);
> +		timer->posted = 1;  /* post by default */
>  #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
>  		if (cpu_class_is_omap2()) {
>  			char clk_name[16];
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH] [RFC] dmtimer library is very inefficient today
  2008-03-18 18:33       ` Kevin Hilman
@ 2008-03-18 18:51         ` Woodruff, Richard
  2008-03-19  2:40         ` [PATCH] dmtimer posting Woodruff, Richard
  2008-03-19 19:52         ` [PATCH] [RFC] dmtimer library is very inefficient today Ladislav Michl
  2 siblings, 0 replies; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-18 18:51 UTC (permalink / raw)
  To: Kevin Hilman; +Cc: Tony Lindgren, linux-omap

Hi Kevin,

> "Woodruff, Richard" <r-woodruff2@ti.com> writes:
> 
> > Here is a patch to look at it.  Seems to work for me.  It adds the
use
> > of posting for the timer.  Previously, every write could lock the
> > requestor for almost 3x32KHz cycles.  This now only synchronizes
before
> > writes and reads instead of after them and it does it on a register
per
> > register basis.  Doing it this way there is some chance to hide some
of
> > the sync latency.  It also removes some needless reads when
non-posted
> > mode is there.
> >
> > The code probably would be a little smaller if a wpost[256] were
used.
> > Probably the structure comes in on the cache line read so it is not
like
> > its adding so much with the ARM running in the hundreds of MHz
range.
> >
> > Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
> 
> I confirm that this indeed works and noticably reduces latency in
> proramming the timers.  I'm running it along with the latency tracer
> which is part of the -rt patch, and have verified that this is no
> longer as noticable of a source of latency.

Great.  Thanks for the verification.  I was expecting the same based on
lauterbach trace information.

> WRT the code, any reason you removed the 'static' and 'inline' from
> the read function?

I had removed it along the way after getting some recursive inline call.
I think those issues are fixed.  It might be they can go back in.

As the function grew in it might be the inline is not wanted.  If it is
only called inside the dmtimer code then the static can go back in.

> Also, a few CodingStyle issues should be cleaned up:
> 
> like:
> 
> -  if(timer->posted)
> +  if (timer->posted)

Ok, I can fix that.

> and upstream folks tend dislike the polling loops like this:
> 
>    while(condition);
> 
> and prefer the semicolon on its own line to be clear that the loop is
> indeed doing nothing.
> 
>    while(condition)
>            ;

Ok. I can fix that.

Regards,
Richard W.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] dmtimer posting
  2008-03-18 18:33       ` Kevin Hilman
  2008-03-18 18:51         ` Woodruff, Richard
@ 2008-03-19  2:40         ` Woodruff, Richard
  2008-03-19 14:15           ` Tony Lindgren
  2008-03-19 19:52         ` [PATCH] [RFC] dmtimer library is very inefficient today Ladislav Michl
  2 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-19  2:40 UTC (permalink / raw)
  To: Kevin Hilman, Tony Lindgren; +Cc: linux-omap

[-- Attachment #1: Type: text/plain, Size: 4791 bytes --]

Hi,

I've fixed the bits Kevin pointed out and ran checkpatch.pl and removed
the reset of the errors.

This patch adds the use of write posting for the timer.  Previously,
every write could lock the requestor for almost 3x32KHz cycles.  This
now only synchronizes before writes and reads instead of after them and
it does it on a register per register basis.  Doing it this way there is
some chance to hide some of the sync latency.  It also removes some
needless reads when non-posted mode is there.  With out this fix the
read/writes talk almost 2% CPU load @500MHz just waiting on tick timer
registers.

Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
 
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 302ad8d..e218ebd 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -67,6 +67,29 @@
 #define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload
enable */
 #define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer
*/
 
+/* Write posting bit shift table
+ * - Values are register shift for TWPS status bits
+ * - Use of this avoids 32KHz Synchronization penalties. The cost is
almost
+ *   a 3x32KHz stall.  This is comapred to a 166MHz posted access.
+ * - A value of '15' is a shift which always returns 0
+ *   for registers which are not postable
+ *
+ * Note: Faster code can be generated if a larger array is used.
+ */
+#define PROW	6
+#define PCOL	4
+#define WPINDEX(off)	(wpost[off >> 4][(off & 0xf) >> 2])
+
+static unsigned char wpost[PROW][PCOL] = {
+			/* 0, 4, 8, c */
+/* 00 */	{15, 15, 15, 15},
+/* 10 */	{15, 15, 15, 15},
+/* 20 */	{15,  0,  1,  2},
+/* 30 */	{ 3, 15,  4, 15},
+/* 40 */	{15, 15,  5, 06},
+/* 50 */	{ 7,  8,  9, 15},
+};
+
 struct omap_dm_timer {
 	unsigned long phys_base;
 	int irq;
@@ -76,6 +99,7 @@ struct omap_dm_timer {
 	void __iomem *io_base;
 	unsigned reserved:1;
 	unsigned enabled:1;
+	unsigned posted:1;
 };
 
 #ifdef CONFIG_ARCH_OMAP1
@@ -181,16 +205,25 @@ static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
 
-static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer,
int reg)
+u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
 {
+	/* A read of a non completed write will be a read error */
+	if (timer->posted)
+		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG)
&
+			(1 << WPINDEX(reg)))
+			;
 	return readl(timer->io_base + reg);
 }
 
-static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int
reg, u32 value)
+static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int
reg,
+					u32 value)
 {
+	/* A write on a register which has a pending write will be lost
*/
+	if (timer->posted)
+		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG)
&
+			(1 << WPINDEX(reg)))
+			;
 	writel(value, timer->io_base + reg);
-	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
-		;
 }
 
 static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
@@ -217,15 +250,17 @@ static void omap_dm_timer_reset(struct
omap_dm_timer *timer)
 	}
 	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
-	/* Set to smart-idle mode */
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
-	l |= 0x02 << 3;
+	l |= 0x02 << 3;  /* Set to smart-idle mode */
+	l |= 0x2 << 8;   /* Set clock activity to perserve f-clock on
idle */
 
-	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
-		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
+	if (cpu_class_is_omap2() && (timer == &dm_timers[0])) {
+		/* Enable wake-up only for GPT1 on OMAP2 CPUs */
+		/* FIXME: All should have this enabled and clear PRCM
status */
 		l |= 1 << 2;
-		/* Non-posted mode */
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
0);
+		/* Ensure posted mode */
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
+				(1 << 2));
 	}
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
 }
@@ -434,6 +469,8 @@ void omap_dm_timer_set_load(struct omap_dm_timer
*timer, int autoreload,
 		l &= ~OMAP_TIMER_CTRL_AR;
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+	/* ? hw feature, ttgr overtaking tldr */
+	while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG));
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 
@@ -568,6 +605,7 @@ int __init omap_dm_timer_init(void)
 	for (i = 0; i < dm_timer_count; i++) {
 		timer = &dm_timers[i];
 		timer->io_base = (void __iomem
*)io_p2v(timer->phys_base);
+		timer->posted = 1;  /* post by default */
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 		if (cpu_class_is_omap2()) {
 			char clk_name[16];

[-- Attachment #2: omap-git-post.diff --]
[-- Type: application/octet-stream, Size: 4543 bytes --]

This patch adds the use of write posting for the timer.  Previously, every
write could lock the requestor for almost 3x32KHz cycles.  This now only
synchronizes before writes and reads instead of after them and it does
it on a register per register basis.  Doing it this way there is some
chance to hide some of the sync latency.  It also removes some needless
reads when non-posted mode is there.  With out this fix the read/writes talk
almost 2% CPU load @500MHz just waiting on tick timer registers.

Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 302ad8d..e218ebd 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -67,6 +67,29 @@
 #define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload enable */
 #define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer */
 
+/* Write posting bit shift table
+ * - Values are register shift for TWPS status bits
+ * - Use of this avoids 32KHz Synchronization penalties. The cost is almost
+ *   a 3x32KHz stall.  This is comapred to a 166MHz posted access.
+ * - A value of '15' is a shift which always returns 0
+ *   for registers which are not postable
+ *
+ * Note: Faster code can be generated if a larger array is used.
+ */
+#define PROW	6
+#define PCOL	4
+#define WPINDEX(off)	(wpost[off >> 4][(off & 0xf) >> 2])
+
+static unsigned char wpost[PROW][PCOL] = {
+			/* 0, 4, 8, c */
+/* 00 */	{15, 15, 15, 15},
+/* 10 */	{15, 15, 15, 15},
+/* 20 */	{15,  0,  1,  2},
+/* 30 */	{ 3, 15,  4, 15},
+/* 40 */	{15, 15,  5, 06},
+/* 50 */	{ 7,  8,  9, 15},
+};
+
 struct omap_dm_timer {
 	unsigned long phys_base;
 	int irq;
@@ -76,6 +99,7 @@ struct omap_dm_timer {
 	void __iomem *io_base;
 	unsigned reserved:1;
 	unsigned enabled:1;
+	unsigned posted:1;
 };
 
 #ifdef CONFIG_ARCH_OMAP1
@@ -181,16 +205,25 @@ static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
 
-static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
+u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
 {
+	/* A read of a non completed write will be a read error */
+	if (timer->posted)
+		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG) &
+			(1 << WPINDEX(reg)))
+			;
 	return readl(timer->io_base + reg);
 }
 
-static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int reg, u32 value)
+static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int reg,
+					u32 value)
 {
+	/* A write on a register which has a pending write will be lost */
+	if (timer->posted)
+		while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG) &
+			(1 << WPINDEX(reg)))
+			;
 	writel(value, timer->io_base + reg);
-	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
-		;
 }
 
 static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
@@ -217,15 +250,17 @@ static void omap_dm_timer_reset(struct omap_dm_timer *timer)
 	}
 	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
-	/* Set to smart-idle mode */
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
-	l |= 0x02 << 3;
+	l |= 0x02 << 3;  /* Set to smart-idle mode */
+	l |= 0x2 << 8;   /* Set clock activity to perserve f-clock on idle */
 
-	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
-		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
+	if (cpu_class_is_omap2() && (timer == &dm_timers[0])) {
+		/* Enable wake-up only for GPT1 on OMAP2 CPUs */
+		/* FIXME: All should have this enabled and clear PRCM status */
 		l |= 1 << 2;
-		/* Non-posted mode */
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG, 0);
+		/* Ensure posted mode */
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
+				(1 << 2));
 	}
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
 }
@@ -434,6 +469,8 @@ void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 		l &= ~OMAP_TIMER_CTRL_AR;
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+	/* ? hw feature, ttgr overtaking tldr */
+	while (readl(timer->io_base + OMAP_TIMER_WRITE_PEND_REG));
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 
@@ -568,6 +605,7 @@ int __init omap_dm_timer_init(void)
 	for (i = 0; i < dm_timer_count; i++) {
 		timer = &dm_timers[i];
 		timer->io_base = (void __iomem *)io_p2v(timer->phys_base);
+		timer->posted = 1;  /* post by default */
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 		if (cpu_class_is_omap2()) {
 			char clk_name[16];

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] dmtimer posting
  2008-03-19  2:40         ` [PATCH] dmtimer posting Woodruff, Richard
@ 2008-03-19 14:15           ` Tony Lindgren
  2008-03-19 14:47             ` Woodruff, Richard
  0 siblings, 1 reply; 26+ messages in thread
From: Tony Lindgren @ 2008-03-19 14:15 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Kevin Hilman, linux-omap

[-- Attachment #1: Type: text/plain, Size: 1039 bytes --]

Hi,

* Woodruff, Richard <r-woodruff2@ti.com> [080319 04:40]:
> Hi,
> 
> I've fixed the bits Kevin pointed out and ran checkpatch.pl and removed
> the reset of the errors.
> 
> This patch adds the use of write posting for the timer.  Previously,
> every write could lock the requestor for almost 3x32KHz cycles.  This
> now only synchronizes before writes and reads instead of after them and
> it does it on a register per register basis.  Doing it this way there is
> some chance to hide some of the sync latency.  It also removes some
> needless reads when non-posted mode is there.  With out this fix the
> read/writes talk almost 2% CPU load @500MHz just waiting on tick timer
> registers.

Cool. Here's a version that gets rid of the lookup table by encoding the
posted write pending bit into the reg offset. This should be OK, as the
functions are used within dmtimer.c only.

The init of other timers into posted mode is not done yet, so I changed
the timer->posted handling too.

Compile and boot tested only on N810 (2420).

Tony

[-- Attachment #2: omap-dmtimer-posted-v2.patch --]
[-- Type: text/x-diff, Size: 9777 bytes --]

>From 6f6611eb1558d580c20b5a6e476e7a04bde2b56d Mon Sep 17 00:00:00 2001
From: Richard Woodruff <r-woodruff2@ti.com>
Date: Wed, 19 Mar 2008 16:00:34 +0200
Subject: [PATCH] ARM: OMAP: Use posted mode for dmtimer

This patch adds the use of write posting for the timer.  Previously, every
write could lock the requestor for almost 3x32KHz cycles.  This patch only
synchronizes before writes and reads instead of after them and it does
it on per register basis.  Doing it this way there is some chance to hide
some of the sync latency.  It also removes some needless reads when
non-posted mode is there.  With out this fix the read/writes talk almost
2% CPU load @500MHz just waiting on tick timer registers.

Also define new 34xx only registers.

Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -38,34 +38,113 @@
 #include <asm/arch/irqs.h>
 
 /* register offsets */
-#define OMAP_TIMER_ID_REG		0x00
-#define OMAP_TIMER_OCP_CFG_REG		0x10
-#define OMAP_TIMER_SYS_STAT_REG		0x14
-#define OMAP_TIMER_STAT_REG		0x18
-#define OMAP_TIMER_INT_EN_REG		0x1c
-#define OMAP_TIMER_WAKEUP_EN_REG	0x20
-#define OMAP_TIMER_CTRL_REG		0x24
-#define OMAP_TIMER_COUNTER_REG		0x28
-#define OMAP_TIMER_LOAD_REG		0x2c
-#define OMAP_TIMER_TRIGGER_REG		0x30
-#define OMAP_TIMER_WRITE_PEND_REG	0x34
-#define OMAP_TIMER_MATCH_REG		0x38
-#define OMAP_TIMER_CAPTURE_REG		0x3c
-#define OMAP_TIMER_IF_CTRL_REG		0x40
-
-/* timer control reg bits */
-#define OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
-#define OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
-#define OMAP_TIMER_CTRL_PT		(1 << 12)
-#define OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
-#define OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
-#define OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
-#define OMAP_TIMER_CTRL_SCPWM		(1 << 7)
-#define OMAP_TIMER_CTRL_CE		(1 << 6)	/* compare enable */
-#define OMAP_TIMER_CTRL_PRE		(1 << 5)	/* prescaler enable */
-#define OMAP_TIMER_CTRL_PTV_SHIFT	2		/* how much to shift the prescaler value */
-#define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload enable */
-#define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer */
+#define _OMAP_TIMER_ID_OFFSET		0x00
+#define _OMAP_TIMER_OCP_CFG_OFFSET	0x10
+#define _OMAP_TIMER_SYS_STAT_OFFSET	0x14
+#define _OMAP_TIMER_STAT_OFFSET		0x18
+#define _OMAP_TIMER_INT_EN_OFFSET	0x1c
+#define _OMAP_TIMER_WAKEUP_EN_OFFSET	0x20
+#define _OMAP_TIMER_CTRL_OFFSET		0x24
+#define _OMAP_TIMER_COUNTER_OFFSET	0x28
+#define _OMAP_TIMER_LOAD_OFFSET		0x2c
+#define _OMAP_TIMER_TRIGGER_OFFSET	0x30
+#define _OMAP_TIMER_WRITE_PEND_OFFSET	0x34
+#define		WP_NONE			0	/* no write pending bit */
+#define		WP_TCLR			(1 << 0)
+#define		WP_TCRR			(1 << 1)
+#define		WP_TLDR			(1 << 2)
+#define		WP_TTGR			(1 << 3)
+#define		WP_TMAR			(1 << 4)
+#define		WP_TPIR			(1 << 5)
+#define		WP_TNIR			(1 << 6)
+#define		WP_TCVR			(1 << 7)
+#define		WP_TOCR			(1 << 8)
+#define		WP_TOWR			(1 << 9)
+#define _OMAP_TIMER_MATCH_OFFSET	0x38
+#define _OMAP_TIMER_CAPTURE_OFFSET	0x3c
+#define _OMAP_TIMER_IF_CTRL_OFFSET	0x40
+#define		OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
+#define		OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
+#define		OMAP_TIMER_CTRL_PT		(1 << 12)
+#define		OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
+#define		OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
+#define		OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
+#define		OMAP_TIMER_CTRL_SCPWM		(1 << 7)
+#define		OMAP_TIMER_CTRL_CE		(1 << 6) /* compare enable */
+#define		OMAP_TIMER_CTRL_PRE		(1 << 5) /* prescaler enable */
+#define		OMAP_TIMER_CTRL_PTV_SHIFT	2 /* prescaler value shift */
+#define		OMAP_TIMER_CTRL_POSTED		(1 << 2)
+#define		OMAP_TIMER_CTRL_AR		(1 << 1) /* auto-reload enable */
+#define		OMAP_TIMER_CTRL_ST		(1 << 0) /* start timer */
+#define _OMAP_TIMER_CAPTURE2_OFFSET		0x44	/* TCAR2, 34xx only */
+#define _OMAP_TIMER_TICK_POS_OFFSET		0x48	/* TPIR, 34xx only */
+#define _OMAP_TIMER_TICK_NEG_OFFSET		0x4c	/* TNIR, 34xx only */
+#define _OMAP_TIMER_TICK_COUNT_OFFSET		0x50	/* TCVR, 34xx only */
+#define _OMAP_TIMER_TICK_INT_MASK_SET_OFFSET	0x54	/* TOCR, 34xx only */
+#define _OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET	0x58	/* TOWR, 34xx only */
+
+/* register offsets with the write pending bit encoded */
+#define	WPSHIFT					8
+
+#define OMAP_TIMER_ID_REG			(_OMAP_TIMER_ID_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_OCP_CFG_REG			(_OMAP_TIMER_OCP_CFG_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_SYS_STAT_REG			(_OMAP_TIMER_SYS_STAT_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_STAT_REG			(_OMAP_TIMER_STAT_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_INT_EN_REG			(_OMAP_TIMER_INT_EN_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_WAKEUP_EN_REG		(_OMAP_TIMER_WAKEUP_EN_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_CTRL_REG			(_OMAP_TIMER_CTRL_OFFSET \
+							| (WP_TCLR << WPSHIFT))
+
+#define OMAP_TIMER_COUNTER_REG			(_OMAP_TIMER_COUNTER_OFFSET \
+							| (WP_TCRR << WPSHIFT))
+
+#define OMAP_TIMER_LOAD_REG			(_OMAP_TIMER_LOAD_OFFSET \
+							| (WP_TLDR << WPSHIFT))
+
+#define OMAP_TIMER_TRIGGER_REG			(_OMAP_TIMER_TRIGGER_OFFSET \
+							| (WP_TTGR << WPSHIFT))
+
+#define OMAP_TIMER_WRITE_PEND_REG		(_OMAP_TIMER_WRITE_PEND_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_MATCH_REG			(_OMAP_TIMER_MATCH_OFFSET \
+							| (WP_TMAR << WPSHIFT))
+
+#define OMAP_TIMER_CAPTURE_REG			(_OMAP_TIMER_CAPTURE_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_IF_CTRL_REG			(_OMAP_TIMER_IF_CTRL_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_CAPTURE2_REG			(_OMAP_TIMER_CAPTURE2_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_TICK_POS_REG			(_OMAP_TIMER_TICK_POS_OFFSET \
+							| (WP_TPIR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_NEG_REG			(_OMAP_TIMER_TICK_NEG_OFFSET \
+							| (WP_TNIR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_COUNT_REG		(_OMAP_TIMER_TICK_COUNT_OFFSET \
+							| (WP_TCVR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_INT_MASK_SET_REG				\
+		(_OMAP_TIMER_TICK_INT_MASK_SET_OFFSET | (WP_TOCR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_INT_MASK_COUNT_REG				\
+		(_OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET | (WP_TOWR << WPSHIFT))
 
 struct omap_dm_timer {
 	unsigned long phys_base;
@@ -76,6 +155,7 @@ struct omap_dm_timer {
 	void __iomem *io_base;
 	unsigned reserved:1;
 	unsigned enabled:1;
+	unsigned posted:1;
 };
 
 #ifdef CONFIG_ARCH_OMAP1
@@ -181,16 +261,34 @@ static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
 
-static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
+/*
+ * Reads timer registers in posted and non-posted mode. The posted mode bit
+ * is encoded in reg. Note that in posted mode write pending bit must be
+ * checked. Otherwise a read of a non completed write will produce an error.
+ */
+static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, u32 reg)
 {
-	return readl(timer->io_base + reg);
+	if (timer->posted)
+		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
+				& (reg >> WPSHIFT))
+			;
+	return readl(timer->io_base + (reg & 0xff));
 }
 
-static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int reg, u32 value)
+/*
+ * Writes timer registers in posted and non-posted mode. The posted mode bit
+ * is encoded in reg. Note that in posted mode the write pending bit must be
+ * checked. Otherwise a write on a register which has a pending write will be
+ * lost.
+ */
+static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
+						u32 value)
 {
-	writel(value, timer->io_base + reg);
-	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
-		;
+	if (timer->posted)
+		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
+				& (reg >> WPSHIFT))
+			;
+	writel(value, timer->io_base + (reg & 0xff));
 }
 
 static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
@@ -217,15 +315,21 @@ static void omap_dm_timer_reset(struct omap_dm_timer *timer)
 	}
 	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
-	/* Set to smart-idle mode */
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
-	l |= 0x02 << 3;
-
-	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
-		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
+	l |= 0x02 << 3;  /* Set to smart-idle mode */
+	l |= 0x2 << 8;   /* Set clock activity to perserve f-clock on idle */
+
+	/*
+	 * Enable wake-up only for GPT1 on OMAP2 CPUs.
+	 * FIXME: All timers should have wake-up enabled and clear
+	 * PRCM status. Posted mode should be set by default.
+	 */
+	if (cpu_class_is_omap2() && (timer == &dm_timers[0])) {
 		l |= 1 << 2;
-		/* Non-posted mode */
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG, 0);
+		/* Ensure posted mode */
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
+							OMAP_TIMER_CTRL_POSTED);
+		timer->posted = 1;
 	}
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
 }
@@ -434,6 +538,10 @@ void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 		l &= ~OMAP_TIMER_CTRL_AR;
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+
+	/* REVISIT: hw feature, ttgr overtaking tldr? */
+	while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff)));
+
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 
@@ -568,6 +676,7 @@ int __init omap_dm_timer_init(void)
 	for (i = 0; i < dm_timer_count; i++) {
 		timer = &dm_timers[i];
 		timer->io_base = (void __iomem *)io_p2v(timer->phys_base);
+		/* REVISIT: Set posted mode by default */
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 		if (cpu_class_is_omap2()) {
 			char clk_name[16];


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH] dmtimer posting
  2008-03-19 14:15           ` Tony Lindgren
@ 2008-03-19 14:47             ` Woodruff, Richard
  2008-03-19 15:58               ` Tony Lindgren
  0 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-19 14:47 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

Hi,

> Cool. Here's a version that gets rid of the lookup table by encoding
the
> posted write pending bit into the reg offset. This should be OK, as
the
> functions are used within dmtimer.c only.

Ok, that looks nice and will generate a bit better code.  I thought
briefly on that it but wanted the change to be small and was a little
worried someone might use the offset somewhere else.  But, they way you
did it makes the 2nd aspect go away.

* You have defined WPSHIFT to 8.  However, that bit is currently taken
by WP_TOCR.  I chose 15 hoping if the register expanded it would be the
last one in a u_int16 and still can be encoded in an op code as a shift
value.  A 16 would be a good codegen value, but always assume u_int32
register.  As the registers are u_int32 in current implementations,
probably 16 is a better value.

> The init of other timers into posted mode is not done yet, so I
changed
> the timer->posted handling too.

Not inited in software, BUT the power on reset of the timer module is to
posted mode in reset.  Its better to have someone shut it off in both
places as its on by default.

I'll tweak the points mentioned and fix a typo in the description and
send a version.

Regards,
Richard W.




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] dmtimer posting
  2008-03-19 14:47             ` Woodruff, Richard
@ 2008-03-19 15:58               ` Tony Lindgren
  2008-03-19 22:13                 ` Woodruff, Richard
  0 siblings, 1 reply; 26+ messages in thread
From: Tony Lindgren @ 2008-03-19 15:58 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Kevin Hilman, linux-omap

* Woodruff, Richard <r-woodruff2@ti.com> [080319 16:47]:
> Hi,
> 
> > Cool. Here's a version that gets rid of the lookup table by encoding
> the
> > posted write pending bit into the reg offset. This should be OK, as
> the
> > functions are used within dmtimer.c only.
> 
> Ok, that looks nice and will generate a bit better code.  I thought
> briefly on that it but wanted the change to be small and was a little
> worried someone might use the offset somewhere else.  But, they way you
> did it makes the 2nd aspect go away.
> 
> * You have defined WPSHIFT to 8.  However, that bit is currently taken
> by WP_TOCR.  I chose 15 hoping if the register expanded it would be the
> last one in a u_int16 and still can be encoded in an op code as a shift
> value.  A 16 would be a good codegen value, but always assume u_int32
> register.  As the registers are u_int32 in current implementations,
> probably 16 is a better value.

OK, good catch.

> > The init of other timers into posted mode is not done yet, so I
> changed
> > the timer->posted handling too.
> 
> Not inited in software, BUT the power on reset of the timer module is to
> posted mode in reset.  Its better to have someone shut it off in both
> places as its on by default.

OK

> I'll tweak the points mentioned and fix a typo in the description and
> send a version.

OK

Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [RFC] dmtimer library is very inefficient today
  2008-03-18 18:33       ` Kevin Hilman
  2008-03-18 18:51         ` Woodruff, Richard
  2008-03-19  2:40         ` [PATCH] dmtimer posting Woodruff, Richard
@ 2008-03-19 19:52         ` Ladislav Michl
  2008-03-20  8:58           ` Tony Lindgren
  2 siblings, 1 reply; 26+ messages in thread
From: Ladislav Michl @ 2008-03-19 19:52 UTC (permalink / raw)
  To: Kevin Hilman; +Cc: linux-omap

On Tue, Mar 18, 2008 at 11:33:40AM -0700, Kevin Hilman wrote:
> and upstream folks tend dislike the polling loops like this:
> 
>    while(condition);
> 
> and prefer the semicolon on its own line to be clear that the loop is
> indeed doing nothing.
> 
>    while(condition)
>            ;
> 

Once there, polling loops usually looks like this

	while (condition)
		cpu_relax();


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] dmtimer posting
  2008-03-19 15:58               ` Tony Lindgren
@ 2008-03-19 22:13                 ` Woodruff, Richard
  2008-03-20 11:57                   ` Tony Lindgren
  0 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-19 22:13 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

[-- Attachment #1: Type: text/plain, Size: 2483 bytes --]

Hi,

> > > Cool. Here's a version that gets rid of the lookup table by
encoding
> > the
> > > posted write pending bit into the reg offset. This should be OK,
as
> > the
> > > functions are used within dmtimer.c only.

I updated your changed patch with the couple things I mentioned below.

I don't have a board handy now to check but changes are minor.

One note is some more optimization could happen later on with
reordering.  The current usage in gptimer has dmtimer_set_load()
followed by a dm_timer_start().  This causes a read/write/read/write of
the CTRL register.  Those writes could be collapsed into each other.
With a load_start().  Others maybe elsewhere.

Measurements seem to show timer CPU % usage dropping by /3 using the
original patch. Perhaps a little better with this one. However, the
timer still shows up in the profile list.

> > Ok, that looks nice and will generate a bit better code.  I thought
> > briefly on that it but wanted the change to be small and was a
little
> > worried someone might use the offset somewhere else.  But, they way
you
> > did it makes the 2nd aspect go away.
> >
> > * You have defined WPSHIFT to 8.  However, that bit is currently
taken
> > by WP_TOCR.  I chose 15 hoping if the register expanded it would be
the
> > last one in a u_int16 and still can be encoded in an op code as a
shift
> > value.  A 16 would be a good codegen value, but always assume
u_int32
> > register.  As the registers are u_int32 in current implementations,
> > probably 16 is a better value.
> 
> OK, good catch.

Now WPSHIFT is 16.

I was only half getting the intent before.  Now the offset looks like:
	31:16[posting bit]15:0[offset].

I wonder if read/write reg check is reordered if the compiler is smart
enough to not do the register read if first expression evaluates to 0.
	If( reg >> WPSHFIT) & (readl(xyz) & 0xff)

> > > The init of other timers into posted mode is not done yet, so I
> > changed
> > > the timer->posted handling too.
> >
> > Not inited in software, BUT the power on reset of the timer module
is to
> > posted mode in reset.  Its better to have someone shut it off in
both
> > places as its on by default.
> 
> OK

Now reset function does match hardware init default with posted on.
 
> > I'll tweak the points mentioned and fix a typo in the description
and
> > send a version.
> 
> OK

I fixed the typo in the description.

Regards,
Richard W.

[-- Attachment #2: omap-dmtimer-posted-v3.patch --]
[-- Type: application/octet-stream, Size: 9369 bytes --]

This patch adds the use of write posting for the timer.  Previously, every
write could lock the requestor for almost 3x32KHz cycles.  This patch only
synchronizes before writes and reads instead of after them and it does
it on per register basis.  Doing it this way there is some chance to hide
some of the sync latency.  It also removes some needless reads when
non-posted mode is there.  With out this fix the read/writes take almost
2% CPU load @500MHz just waiting on tick timer registers.

Also define new 34xx only registers.

Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 302ad8d..99c8809 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -38,34 +38,113 @@
 #include <asm/arch/irqs.h>
 
 /* register offsets */
-#define OMAP_TIMER_ID_REG		0x00
-#define OMAP_TIMER_OCP_CFG_REG		0x10
-#define OMAP_TIMER_SYS_STAT_REG		0x14
-#define OMAP_TIMER_STAT_REG		0x18
-#define OMAP_TIMER_INT_EN_REG		0x1c
-#define OMAP_TIMER_WAKEUP_EN_REG	0x20
-#define OMAP_TIMER_CTRL_REG		0x24
-#define OMAP_TIMER_COUNTER_REG		0x28
-#define OMAP_TIMER_LOAD_REG		0x2c
-#define OMAP_TIMER_TRIGGER_REG		0x30
-#define OMAP_TIMER_WRITE_PEND_REG	0x34
-#define OMAP_TIMER_MATCH_REG		0x38
-#define OMAP_TIMER_CAPTURE_REG		0x3c
-#define OMAP_TIMER_IF_CTRL_REG		0x40
-
-/* timer control reg bits */
-#define OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
-#define OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
-#define OMAP_TIMER_CTRL_PT		(1 << 12)
-#define OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
-#define OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
-#define OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
-#define OMAP_TIMER_CTRL_SCPWM		(1 << 7)
-#define OMAP_TIMER_CTRL_CE		(1 << 6)	/* compare enable */
-#define OMAP_TIMER_CTRL_PRE		(1 << 5)	/* prescaler enable */
-#define OMAP_TIMER_CTRL_PTV_SHIFT	2		/* how much to shift the prescaler value */
-#define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload enable */
-#define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer */
+#define _OMAP_TIMER_ID_OFFSET		0x00
+#define _OMAP_TIMER_OCP_CFG_OFFSET	0x10
+#define _OMAP_TIMER_SYS_STAT_OFFSET	0x14
+#define _OMAP_TIMER_STAT_OFFSET		0x18
+#define _OMAP_TIMER_INT_EN_OFFSET	0x1c
+#define _OMAP_TIMER_WAKEUP_EN_OFFSET	0x20
+#define _OMAP_TIMER_CTRL_OFFSET		0x24
+#define _OMAP_TIMER_COUNTER_OFFSET	0x28
+#define _OMAP_TIMER_LOAD_OFFSET		0x2c
+#define _OMAP_TIMER_TRIGGER_OFFSET	0x30
+#define _OMAP_TIMER_WRITE_PEND_OFFSET	0x34
+#define		WP_NONE			0	/* no write pending bit */
+#define		WP_TCLR			(1 << 0)
+#define		WP_TCRR			(1 << 1)
+#define		WP_TLDR			(1 << 2)
+#define		WP_TTGR			(1 << 3)
+#define		WP_TMAR			(1 << 4)
+#define		WP_TPIR			(1 << 5)
+#define		WP_TNIR			(1 << 6)
+#define		WP_TCVR			(1 << 7)
+#define		WP_TOCR			(1 << 8)
+#define		WP_TOWR			(1 << 9)
+#define _OMAP_TIMER_MATCH_OFFSET	0x38
+#define _OMAP_TIMER_CAPTURE_OFFSET	0x3c
+#define _OMAP_TIMER_IF_CTRL_OFFSET	0x40
+#define		OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
+#define		OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
+#define		OMAP_TIMER_CTRL_PT		(1 << 12)
+#define		OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
+#define		OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
+#define		OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
+#define		OMAP_TIMER_CTRL_SCPWM		(1 << 7)
+#define		OMAP_TIMER_CTRL_CE		(1 << 6) /* compare enable */
+#define		OMAP_TIMER_CTRL_PRE		(1 << 5) /* prescaler enable */
+#define		OMAP_TIMER_CTRL_PTV_SHIFT	2 /* prescaler value shift */
+#define		OMAP_TIMER_CTRL_POSTED		(1 << 2)
+#define		OMAP_TIMER_CTRL_AR		(1 << 1) /* auto-reload enable */
+#define		OMAP_TIMER_CTRL_ST		(1 << 0) /* start timer */
+#define _OMAP_TIMER_CAPTURE2_OFFSET		0x44	/* TCAR2, 34xx only */
+#define _OMAP_TIMER_TICK_POS_OFFSET		0x48	/* TPIR, 34xx only */
+#define _OMAP_TIMER_TICK_NEG_OFFSET		0x4c	/* TNIR, 34xx only */
+#define _OMAP_TIMER_TICK_COUNT_OFFSET		0x50	/* TCVR, 34xx only */
+#define _OMAP_TIMER_TICK_INT_MASK_SET_OFFSET	0x54	/* TOCR, 34xx only */
+#define _OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET	0x58	/* TOWR, 34xx only */
+
+/* register offsets with the write pending bit encoded */
+#define	WPSHIFT					16
+
+#define OMAP_TIMER_ID_REG			(_OMAP_TIMER_ID_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_OCP_CFG_REG			(_OMAP_TIMER_OCP_CFG_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_SYS_STAT_REG			(_OMAP_TIMER_SYS_STAT_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_STAT_REG			(_OMAP_TIMER_STAT_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_INT_EN_REG			(_OMAP_TIMER_INT_EN_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_WAKEUP_EN_REG		(_OMAP_TIMER_WAKEUP_EN_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_CTRL_REG			(_OMAP_TIMER_CTRL_OFFSET \
+							| (WP_TCLR << WPSHIFT))
+
+#define OMAP_TIMER_COUNTER_REG			(_OMAP_TIMER_COUNTER_OFFSET \
+							| (WP_TCRR << WPSHIFT))
+
+#define OMAP_TIMER_LOAD_REG			(_OMAP_TIMER_LOAD_OFFSET \
+							| (WP_TLDR << WPSHIFT))
+
+#define OMAP_TIMER_TRIGGER_REG			(_OMAP_TIMER_TRIGGER_OFFSET \
+							| (WP_TTGR << WPSHIFT))
+
+#define OMAP_TIMER_WRITE_PEND_REG		(_OMAP_TIMER_WRITE_PEND_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_MATCH_REG			(_OMAP_TIMER_MATCH_OFFSET \
+							| (WP_TMAR << WPSHIFT))
+
+#define OMAP_TIMER_CAPTURE_REG			(_OMAP_TIMER_CAPTURE_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_IF_CTRL_REG			(_OMAP_TIMER_IF_CTRL_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_CAPTURE2_REG			(_OMAP_TIMER_CAPTURE2_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_TICK_POS_REG			(_OMAP_TIMER_TICK_POS_OFFSET \
+							| (WP_TPIR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_NEG_REG			(_OMAP_TIMER_TICK_NEG_OFFSET \
+							| (WP_TNIR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_COUNT_REG		(_OMAP_TIMER_TICK_COUNT_OFFSET \
+							| (WP_TCVR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_INT_MASK_SET_REG				\
+		(_OMAP_TIMER_TICK_INT_MASK_SET_OFFSET | (WP_TOCR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_INT_MASK_COUNT_REG				\
+		(_OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET | (WP_TOWR << WPSHIFT))
 
 struct omap_dm_timer {
 	unsigned long phys_base;
@@ -76,6 +155,7 @@ struct omap_dm_timer {
 	void __iomem *io_base;
 	unsigned reserved:1;
 	unsigned enabled:1;
+	unsigned posted:1;
 };
 
 #ifdef CONFIG_ARCH_OMAP1
@@ -181,16 +261,34 @@ static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
 
-static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
+/*
+ * Reads timer registers in posted and non-posted mode. The posted mode bit
+ * is encoded in reg. Note that in posted mode write pending bit must be
+ * checked. Otherwise a read of a non completed write will produce an error.
+ */
+static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, u32 reg)
 {
-	return readl(timer->io_base + reg);
+	if (timer->posted)
+		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
+				& (reg >> WPSHIFT))
+			;
+	return readl(timer->io_base + (reg & 0xff));
 }
 
-static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int reg, u32 value)
+/*
+ * Writes timer registers in posted and non-posted mode. The posted mode bit
+ * is encoded in reg. Note that in posted mode the write pending bit must be
+ * checked. Otherwise a write on a register which has a pending write will be
+ * lost.
+ */
+static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
+						u32 value)
 {
-	writel(value, timer->io_base + reg);
-	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
-		;
+	if (timer->posted)
+		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
+				& (reg >> WPSHIFT))
+			;
+	writel(value, timer->io_base + (reg & 0xff));
 }
 
 static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
@@ -217,17 +315,23 @@ static void omap_dm_timer_reset(struct omap_dm_timer *timer)
 	}
 	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
-	/* Set to smart-idle mode */
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
-	l |= 0x02 << 3;
-
-	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
-		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
+	l |= 0x02 << 3;  /* Set to smart-idle mode */
+	l |= 0x2 << 8;   /* Set clock activity to perserve f-clock on idle */
+
+	/*
+	 * Enable wake-up only for GPT1 on OMAP2 CPUs.
+	 * FIXME: All timers should have wake-up enabled and clear
+	 * PRCM status.
+	 */
+	if (cpu_class_is_omap2() && (timer == &dm_timers[0]))
 		l |= 1 << 2;
-		/* Non-posted mode */
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG, 0);
-	}
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
+
+	/* Match hardware reset default of posted mode */
+	omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
+			OMAP_TIMER_CTRL_POSTED);
+	timer->posted = 1;
 }
 
 static void omap_dm_timer_prepare(struct omap_dm_timer *timer)
@@ -434,6 +538,10 @@ void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 		l &= ~OMAP_TIMER_CTRL_AR;
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+
+	/* REVISIT: hw feature, ttgr overtaking tldr? */
+	while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff)));
+
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] [RFC] dmtimer library is very inefficient today
  2008-03-19 19:52         ` [PATCH] [RFC] dmtimer library is very inefficient today Ladislav Michl
@ 2008-03-20  8:58           ` Tony Lindgren
  0 siblings, 0 replies; 26+ messages in thread
From: Tony Lindgren @ 2008-03-20  8:58 UTC (permalink / raw)
  To: Ladislav Michl; +Cc: Kevin Hilman, linux-omap

* Ladislav Michl <ladis@linux-mips.org> [080319 22:32]:
> On Tue, Mar 18, 2008 at 11:33:40AM -0700, Kevin Hilman wrote:
> > and upstream folks tend dislike the polling loops like this:
> > 
> >    while(condition);
> > 
> > and prefer the semicolon on its own line to be clear that the loop is
> > indeed doing nothing.
> > 
> >    while(condition)
> >            ;
> > 
> 
> Once there, polling loops usually looks like this
> 
> 	while (condition)
> 		cpu_relax();

Thanks, I'll add those to the while looops.

Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] dmtimer posting
  2008-03-19 22:13                 ` Woodruff, Richard
@ 2008-03-20 11:57                   ` Tony Lindgren
  2008-03-20 12:19                     ` Tony Lindgren
  2008-03-21  0:01                     ` Woodruff, Richard
  0 siblings, 2 replies; 26+ messages in thread
From: Tony Lindgren @ 2008-03-20 11:57 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Kevin Hilman, linux-omap

[-- Attachment #1: Type: text/plain, Size: 2892 bytes --]

* Woodruff, Richard <r-woodruff2@ti.com> [080320 00:14]:
> Hi,
> 
> > > > Cool. Here's a version that gets rid of the lookup table by
> encoding
> > > the
> > > > posted write pending bit into the reg offset. This should be OK,
> as
> > > the
> > > > functions are used within dmtimer.c only.
> 
> I updated your changed patch with the couple things I mentioned below.
> 
> I don't have a board handy now to check but changes are minor.
> 
> One note is some more optimization could happen later on with
> reordering.  The current usage in gptimer has dmtimer_set_load()
> followed by a dm_timer_start().  This causes a read/write/read/write of
> the CTRL register.  Those writes could be collapsed into each other.
> With a load_start().  Others maybe elsewhere.

OK. We might also want to optimize the timer reload function. I'll post
a separate patch on that to experiment with.

> Measurements seem to show timer CPU % usage dropping by /3 using the
> original patch. Perhaps a little better with this one. However, the
> timer still shows up in the profile list.
> 
> > > Ok, that looks nice and will generate a bit better code.  I thought
> > > briefly on that it but wanted the change to be small and was a
> little
> > > worried someone might use the offset somewhere else.  But, they way
> you
> > > did it makes the 2nd aspect go away.
> > >
> > > * You have defined WPSHIFT to 8.  However, that bit is currently
> taken
> > > by WP_TOCR.  I chose 15 hoping if the register expanded it would be
> the
> > > last one in a u_int16 and still can be encoded in an op code as a
> shift
> > > value.  A 16 would be a good codegen value, but always assume
> u_int32
> > > register.  As the registers are u_int32 in current implementations,
> > > probably 16 is a better value.
> > 
> > OK, good catch.
> 
> Now WPSHIFT is 16.
> 
> I was only half getting the intent before.  Now the offset looks like:
> 	31:16[posting bit]15:0[offset].

OK

> I wonder if read/write reg check is reordered if the compiler is smart
> enough to not do the register read if first expression evaluates to 0.
> 	If( reg >> WPSHFIT) & (readl(xyz) & 0xff)

Yeah that might work.

> > > > The init of other timers into posted mode is not done yet, so I
> > > changed
> > > > the timer->posted handling too.
> > >
> > > Not inited in software, BUT the power on reset of the timer module
> is to
> > > posted mode in reset.  Its better to have someone shut it off in
> both
> > > places as its on by default.
> > 
> > OK
> 
> Now reset function does match hardware init default with posted on.

Cool

> > > I'll tweak the points mentioned and fix a typo in the description
> and
> > > send a version.
> > 
> > OK
> 
> I fixed the typo in the description.

Attached is a rev-4 with the cpu_relax() added to while loops as
suggested by Ladislav, and CTRL register defines moved to the right
location.

Regards,

Tony

[-- Attachment #2: omap-dmtimer-posted-v4.patch --]
[-- Type: text/x-diff, Size: 19526 bytes --]

commit 5e092c5af8ccbbebde36fe5f5f21e047a77da86e
Author: Richard Woodruff <r-woodruff2@ti.com>
Date:   Wed Mar 19 16:00:34 2008 +0200

    ARM: OMAP: Use posted mode for dmtimer
    
    This patch adds the use of write posting for the timer.  Previously, every
    write could lock the requestor for almost 3x32KHz cycles.  This patch only
    synchronizes before writes and reads instead of after them and it does
    it on per register basis.  Doing it this way there is some chance to hide
    some of the sync latency.  It also removes some needless reads when
    non-posted mode is there.  With out this fix the read/writes take almost
    2% CPU load @500MHz just waiting on tick timer registers.
    
    Also define new 34xx only registers.
    
    Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
    Signed-off-by: Tony Lindgren <tony@atomide.com>
    
    --- a/arch/arm/plat-omap/dmtimer.c
    +++ b/arch/arm/plat-omap/dmtimer.c
    @@ -38,34 +38,113 @@
     #include <asm/arch/irqs.h>
    
     /* register offsets */
    -#define OMAP_TIMER_ID_REG		0x00
    -#define OMAP_TIMER_OCP_CFG_REG		0x10
    -#define OMAP_TIMER_SYS_STAT_REG		0x14
    -#define OMAP_TIMER_STAT_REG		0x18
    -#define OMAP_TIMER_INT_EN_REG		0x1c
    -#define OMAP_TIMER_WAKEUP_EN_REG	0x20
    -#define OMAP_TIMER_CTRL_REG		0x24
    -#define OMAP_TIMER_COUNTER_REG		0x28
    -#define OMAP_TIMER_LOAD_REG		0x2c
    -#define OMAP_TIMER_TRIGGER_REG		0x30
    -#define OMAP_TIMER_WRITE_PEND_REG	0x34
    -#define OMAP_TIMER_MATCH_REG		0x38
    -#define OMAP_TIMER_CAPTURE_REG		0x3c
    -#define OMAP_TIMER_IF_CTRL_REG		0x40
    -
    -/* timer control reg bits */
    -#define OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
    -#define OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
    -#define OMAP_TIMER_CTRL_PT		(1 << 12)
    -#define OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
    -#define OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
    -#define OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
    -#define OMAP_TIMER_CTRL_SCPWM		(1 << 7)
    -#define OMAP_TIMER_CTRL_CE		(1 << 6)	/* compare enable */
    -#define OMAP_TIMER_CTRL_PRE		(1 << 5)	/* prescaler enable */
    -#define OMAP_TIMER_CTRL_PTV_SHIFT	2		/* how much to shift the prescaler value */
    -#define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload enable */
    -#define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer */
    +#define _OMAP_TIMER_ID_OFFSET		0x00
    +#define _OMAP_TIMER_OCP_CFG_OFFSET	0x10
    +#define _OMAP_TIMER_SYS_STAT_OFFSET	0x14
    +#define _OMAP_TIMER_STAT_OFFSET		0x18
    +#define _OMAP_TIMER_INT_EN_OFFSET	0x1c
    +#define _OMAP_TIMER_WAKEUP_EN_OFFSET	0x20
    +#define _OMAP_TIMER_CTRL_OFFSET		0x24
    +#define _OMAP_TIMER_COUNTER_OFFSET	0x28
    +#define _OMAP_TIMER_LOAD_OFFSET		0x2c
    +#define _OMAP_TIMER_TRIGGER_OFFSET	0x30
    +#define _OMAP_TIMER_WRITE_PEND_OFFSET	0x34
    +#define		WP_NONE			0	/* no write pending bit */
    +#define		WP_TCLR			(1 << 0)
    +#define		WP_TCRR			(1 << 1)
    +#define		WP_TLDR			(1 << 2)
    +#define		WP_TTGR			(1 << 3)
    +#define		WP_TMAR			(1 << 4)
    +#define		WP_TPIR			(1 << 5)
    +#define		WP_TNIR			(1 << 6)
    +#define		WP_TCVR			(1 << 7)
    +#define		WP_TOCR			(1 << 8)
    +#define		WP_TOWR			(1 << 9)
    +#define _OMAP_TIMER_MATCH_OFFSET	0x38
    +#define _OMAP_TIMER_CAPTURE_OFFSET	0x3c
    +#define _OMAP_TIMER_IF_CTRL_OFFSET	0x40
    +#define		OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
    +#define		OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
    +#define		OMAP_TIMER_CTRL_PT		(1 << 12)
    +#define		OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
    +#define		OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
    +#define		OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
    +#define		OMAP_TIMER_CTRL_SCPWM		(1 << 7)
    +#define		OMAP_TIMER_CTRL_CE		(1 << 6) /* compare enable */
    +#define		OMAP_TIMER_CTRL_PRE		(1 << 5) /* prescaler enable */
    +#define		OMAP_TIMER_CTRL_PTV_SHIFT	2 /* prescaler value shift */
    +#define		OMAP_TIMER_CTRL_POSTED		(1 << 2)
    +#define		OMAP_TIMER_CTRL_AR		(1 << 1) /* auto-reload enable */
    +#define		OMAP_TIMER_CTRL_ST		(1 << 0) /* start timer */
    +#define _OMAP_TIMER_CAPTURE2_OFFSET		0x44	/* TCAR2, 34xx only */
    +#define _OMAP_TIMER_TICK_POS_OFFSET		0x48	/* TPIR, 34xx only */
    +#define _OMAP_TIMER_TICK_NEG_OFFSET		0x4c	/* TNIR, 34xx only */
    +#define _OMAP_TIMER_TICK_COUNT_OFFSET		0x50	/* TCVR, 34xx only */
    +#define _OMAP_TIMER_TICK_INT_MASK_SET_OFFSET	0x54	/* TOCR, 34xx only */
    +#define _OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET	0x58	/* TOWR, 34xx only */
    +
    +/* register offsets with the write pending bit encoded */
    +#define	WPSHIFT					8
    +
    +#define OMAP_TIMER_ID_REG			(_OMAP_TIMER_ID_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_OCP_CFG_REG			(_OMAP_TIMER_OCP_CFG_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_SYS_STAT_REG			(_OMAP_TIMER_SYS_STAT_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_STAT_REG			(_OMAP_TIMER_STAT_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_INT_EN_REG			(_OMAP_TIMER_INT_EN_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_WAKEUP_EN_REG		(_OMAP_TIMER_WAKEUP_EN_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_CTRL_REG			(_OMAP_TIMER_CTRL_OFFSET \
    +							| (WP_TCLR << WPSHIFT))
    +
    +#define OMAP_TIMER_COUNTER_REG			(_OMAP_TIMER_COUNTER_OFFSET \
    +							| (WP_TCRR << WPSHIFT))
    +
    +#define OMAP_TIMER_LOAD_REG			(_OMAP_TIMER_LOAD_OFFSET \
    +							| (WP_TLDR << WPSHIFT))
    +
    +#define OMAP_TIMER_TRIGGER_REG			(_OMAP_TIMER_TRIGGER_OFFSET \
    +							| (WP_TTGR << WPSHIFT))
    +
    +#define OMAP_TIMER_WRITE_PEND_REG		(_OMAP_TIMER_WRITE_PEND_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_MATCH_REG			(_OMAP_TIMER_MATCH_OFFSET \
    +							| (WP_TMAR << WPSHIFT))
    +
    +#define OMAP_TIMER_CAPTURE_REG			(_OMAP_TIMER_CAPTURE_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_IF_CTRL_REG			(_OMAP_TIMER_IF_CTRL_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_CAPTURE2_REG			(_OMAP_TIMER_CAPTURE2_OFFSET \
    +							| (WP_NONE << WPSHIFT))
    +
    +#define OMAP_TIMER_TICK_POS_REG			(_OMAP_TIMER_TICK_POS_OFFSET \
    +							| (WP_TPIR << WPSHIFT))
    +
    +#define OMAP_TIMER_TICK_NEG_REG			(_OMAP_TIMER_TICK_NEG_OFFSET \
    +							| (WP_TNIR << WPSHIFT))
    +
    +#define OMAP_TIMER_TICK_COUNT_REG		(_OMAP_TIMER_TICK_COUNT_OFFSET \
    +							| (WP_TCVR << WPSHIFT))
    +
    +#define OMAP_TIMER_TICK_INT_MASK_SET_REG				\
    +		(_OMAP_TIMER_TICK_INT_MASK_SET_OFFSET | (WP_TOCR << WPSHIFT))
    +
    +#define OMAP_TIMER_TICK_INT_MASK_COUNT_REG				\
    +		(_OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET | (WP_TOWR << WPSHIFT))
    
     struct omap_dm_timer {
     	unsigned long phys_base;
    @@ -76,6 +155,7 @@ struct omap_dm_timer {
     	void __iomem *io_base;
     	unsigned reserved:1;
     	unsigned enabled:1;
    +	unsigned posted:1;
     };
    
     #ifdef CONFIG_ARCH_OMAP1
    @@ -181,16 +261,34 @@ static struct clk **dm_source_clocks;
    
     static spinlock_t dm_timer_lock;
    
    -static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
    +/*
    + * Reads timer registers in posted and non-posted mode. The posted mode bit
    + * is encoded in reg. Note that in posted mode write pending bit must be
    + * checked. Otherwise a read of a non completed write will produce an error.
    + */
    +static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, u32 reg)
     {
    -	return readl(timer->io_base + reg);
    +	if (timer->posted)
    +		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
    +				& (reg >> WPSHIFT))
    +			;
    +	return readl(timer->io_base + (reg & 0xff));
     }
    
    -static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int reg, u32 value)
    +/*
    + * Writes timer registers in posted and non-posted mode. The posted mode bit
    + * is encoded in reg. Note that in posted mode the write pending bit must be
    + * checked. Otherwise a write on a register which has a pending write will be
    + * lost.
    + */
    +static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
    +						u32 value)
     {
    -	writel(value, timer->io_base + reg);
    -	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
    -		;
    +	if (timer->posted)
    +		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
    +				& (reg >> WPSHIFT))
    +			;
    +	writel(value, timer->io_base + (reg & 0xff));
     }
    
     static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
    @@ -217,15 +315,21 @@ static void omap_dm_timer_reset(struct omap_dm_timer *timer)
     	}
     	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
    
    -	/* Set to smart-idle mode */
     	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
    -	l |= 0x02 << 3;
    -
    -	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
    -		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
    +	l |= 0x02 << 3;  /* Set to smart-idle mode */
    +	l |= 0x2 << 8;   /* Set clock activity to perserve f-clock on idle */
    +
    +	/*
    +	 * Enable wake-up only for GPT1 on OMAP2 CPUs.
    +	 * FIXME: All timers should have wake-up enabled and clear
    +	 * PRCM status. Posted mode should be set by default.
    +	 */
    +	if (cpu_class_is_omap2() && (timer == &dm_timers[0])) {
     		l |= 1 << 2;
    -		/* Non-posted mode */
    -		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG, 0);
    +		/* Ensure posted mode */
    +		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
    +							OMAP_TIMER_CTRL_POSTED);
    +		timer->posted = 1;
     	}
     	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
     }
    @@ -434,6 +538,10 @@ void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
     		l &= ~OMAP_TIMER_CTRL_AR;
     	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
     	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
    +
    +	/* REVISIT: hw feature, ttgr overtaking tldr? */
    +	while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff)));
    +
     	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
     }
    
    @@ -568,6 +676,7 @@ int __init omap_dm_timer_init(void)
     	for (i = 0; i < dm_timer_count; i++) {
     		timer = &dm_timers[i];
     		timer->io_base = (void __iomem *)io_p2v(timer->phys_base);
    +		/* REVISIT: Set posted mode by default */
     #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
     		if (cpu_class_is_omap2()) {
     			char clk_name[16];

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 302ad8d..822b6bb 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -38,34 +38,113 @@
 #include <asm/arch/irqs.h>
 
 /* register offsets */
-#define OMAP_TIMER_ID_REG		0x00
-#define OMAP_TIMER_OCP_CFG_REG		0x10
-#define OMAP_TIMER_SYS_STAT_REG		0x14
-#define OMAP_TIMER_STAT_REG		0x18
-#define OMAP_TIMER_INT_EN_REG		0x1c
-#define OMAP_TIMER_WAKEUP_EN_REG	0x20
-#define OMAP_TIMER_CTRL_REG		0x24
-#define OMAP_TIMER_COUNTER_REG		0x28
-#define OMAP_TIMER_LOAD_REG		0x2c
-#define OMAP_TIMER_TRIGGER_REG		0x30
-#define OMAP_TIMER_WRITE_PEND_REG	0x34
-#define OMAP_TIMER_MATCH_REG		0x38
-#define OMAP_TIMER_CAPTURE_REG		0x3c
-#define OMAP_TIMER_IF_CTRL_REG		0x40
-
-/* timer control reg bits */
-#define OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
-#define OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
-#define OMAP_TIMER_CTRL_PT		(1 << 12)
-#define OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
-#define OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
-#define OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
-#define OMAP_TIMER_CTRL_SCPWM		(1 << 7)
-#define OMAP_TIMER_CTRL_CE		(1 << 6)	/* compare enable */
-#define OMAP_TIMER_CTRL_PRE		(1 << 5)	/* prescaler enable */
-#define OMAP_TIMER_CTRL_PTV_SHIFT	2		/* how much to shift the prescaler value */
-#define OMAP_TIMER_CTRL_AR		(1 << 1)	/* auto-reload enable */
-#define OMAP_TIMER_CTRL_ST		(1 << 0)	/* start timer */
+#define _OMAP_TIMER_ID_OFFSET		0x00
+#define _OMAP_TIMER_OCP_CFG_OFFSET	0x10
+#define _OMAP_TIMER_SYS_STAT_OFFSET	0x14
+#define _OMAP_TIMER_STAT_OFFSET		0x18
+#define _OMAP_TIMER_INT_EN_OFFSET	0x1c
+#define _OMAP_TIMER_WAKEUP_EN_OFFSET	0x20
+#define _OMAP_TIMER_CTRL_OFFSET		0x24
+#define		OMAP_TIMER_CTRL_GPOCFG		(1 << 14)
+#define		OMAP_TIMER_CTRL_CAPTMODE	(1 << 13)
+#define		OMAP_TIMER_CTRL_PT		(1 << 12)
+#define		OMAP_TIMER_CTRL_TCM_LOWTOHIGH	(0x1 << 8)
+#define		OMAP_TIMER_CTRL_TCM_HIGHTOLOW	(0x2 << 8)
+#define		OMAP_TIMER_CTRL_TCM_BOTHEDGES	(0x3 << 8)
+#define		OMAP_TIMER_CTRL_SCPWM		(1 << 7)
+#define		OMAP_TIMER_CTRL_CE		(1 << 6) /* compare enable */
+#define		OMAP_TIMER_CTRL_PRE		(1 << 5) /* prescaler enable */
+#define		OMAP_TIMER_CTRL_PTV_SHIFT	2 /* prescaler value shift */
+#define		OMAP_TIMER_CTRL_POSTED		(1 << 2)
+#define		OMAP_TIMER_CTRL_AR		(1 << 1) /* auto-reload enable */
+#define		OMAP_TIMER_CTRL_ST		(1 << 0) /* start timer */
+#define _OMAP_TIMER_COUNTER_OFFSET	0x28
+#define _OMAP_TIMER_LOAD_OFFSET		0x2c
+#define _OMAP_TIMER_TRIGGER_OFFSET	0x30
+#define _OMAP_TIMER_WRITE_PEND_OFFSET	0x34
+#define		WP_NONE			0	/* no write pending bit */
+#define		WP_TCLR			(1 << 0)
+#define		WP_TCRR			(1 << 1)
+#define		WP_TLDR			(1 << 2)
+#define		WP_TTGR			(1 << 3)
+#define		WP_TMAR			(1 << 4)
+#define		WP_TPIR			(1 << 5)
+#define		WP_TNIR			(1 << 6)
+#define		WP_TCVR			(1 << 7)
+#define		WP_TOCR			(1 << 8)
+#define		WP_TOWR			(1 << 9)
+#define _OMAP_TIMER_MATCH_OFFSET	0x38
+#define _OMAP_TIMER_CAPTURE_OFFSET	0x3c
+#define _OMAP_TIMER_IF_CTRL_OFFSET	0x40
+#define _OMAP_TIMER_CAPTURE2_OFFSET		0x44	/* TCAR2, 34xx only */
+#define _OMAP_TIMER_TICK_POS_OFFSET		0x48	/* TPIR, 34xx only */
+#define _OMAP_TIMER_TICK_NEG_OFFSET		0x4c	/* TNIR, 34xx only */
+#define _OMAP_TIMER_TICK_COUNT_OFFSET		0x50	/* TCVR, 34xx only */
+#define _OMAP_TIMER_TICK_INT_MASK_SET_OFFSET	0x54	/* TOCR, 34xx only */
+#define _OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET	0x58	/* TOWR, 34xx only */
+
+/* register offsets with the write pending bit encoded */
+#define	WPSHIFT					16
+
+#define OMAP_TIMER_ID_REG			(_OMAP_TIMER_ID_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_OCP_CFG_REG			(_OMAP_TIMER_OCP_CFG_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_SYS_STAT_REG			(_OMAP_TIMER_SYS_STAT_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_STAT_REG			(_OMAP_TIMER_STAT_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_INT_EN_REG			(_OMAP_TIMER_INT_EN_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_WAKEUP_EN_REG		(_OMAP_TIMER_WAKEUP_EN_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_CTRL_REG			(_OMAP_TIMER_CTRL_OFFSET \
+							| (WP_TCLR << WPSHIFT))
+
+#define OMAP_TIMER_COUNTER_REG			(_OMAP_TIMER_COUNTER_OFFSET \
+							| (WP_TCRR << WPSHIFT))
+
+#define OMAP_TIMER_LOAD_REG			(_OMAP_TIMER_LOAD_OFFSET \
+							| (WP_TLDR << WPSHIFT))
+
+#define OMAP_TIMER_TRIGGER_REG			(_OMAP_TIMER_TRIGGER_OFFSET \
+							| (WP_TTGR << WPSHIFT))
+
+#define OMAP_TIMER_WRITE_PEND_REG		(_OMAP_TIMER_WRITE_PEND_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_MATCH_REG			(_OMAP_TIMER_MATCH_OFFSET \
+							| (WP_TMAR << WPSHIFT))
+
+#define OMAP_TIMER_CAPTURE_REG			(_OMAP_TIMER_CAPTURE_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_IF_CTRL_REG			(_OMAP_TIMER_IF_CTRL_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_CAPTURE2_REG			(_OMAP_TIMER_CAPTURE2_OFFSET \
+							| (WP_NONE << WPSHIFT))
+
+#define OMAP_TIMER_TICK_POS_REG			(_OMAP_TIMER_TICK_POS_OFFSET \
+							| (WP_TPIR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_NEG_REG			(_OMAP_TIMER_TICK_NEG_OFFSET \
+							| (WP_TNIR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_COUNT_REG		(_OMAP_TIMER_TICK_COUNT_OFFSET \
+							| (WP_TCVR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_INT_MASK_SET_REG				\
+		(_OMAP_TIMER_TICK_INT_MASK_SET_OFFSET | (WP_TOCR << WPSHIFT))
+
+#define OMAP_TIMER_TICK_INT_MASK_COUNT_REG				\
+		(_OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET | (WP_TOWR << WPSHIFT))
 
 struct omap_dm_timer {
 	unsigned long phys_base;
@@ -76,6 +155,7 @@ struct omap_dm_timer {
 	void __iomem *io_base;
 	unsigned reserved:1;
 	unsigned enabled:1;
+	unsigned posted:1;
 };
 
 #ifdef CONFIG_ARCH_OMAP1
@@ -181,16 +261,34 @@ static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
 
-static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, int reg)
+/*
+ * Reads timer registers in posted and non-posted mode. The posted mode bit
+ * is encoded in reg. Note that in posted mode write pending bit must be
+ * checked. Otherwise a read of a non completed write will produce an error.
+ */
+static inline u32 omap_dm_timer_read_reg(struct omap_dm_timer *timer, u32 reg)
 {
-	return readl(timer->io_base + reg);
+	if (timer->posted)
+		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
+				& (reg >> WPSHIFT))
+			cpu_relax();
+	return readl(timer->io_base + (reg & 0xff));
 }
 
-static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, int reg, u32 value)
+/*
+ * Writes timer registers in posted and non-posted mode. The posted mode bit
+ * is encoded in reg. Note that in posted mode the write pending bit must be
+ * checked. Otherwise a write on a register which has a pending write will be
+ * lost.
+ */
+static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
+						u32 value)
 {
-	writel(value, timer->io_base + reg);
-	while (omap_dm_timer_read_reg(timer, OMAP_TIMER_WRITE_PEND_REG))
-		;
+	if (timer->posted)
+		while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff))
+				& (reg >> WPSHIFT))
+			cpu_relax();
+	writel(value, timer->io_base + (reg & 0xff));
 }
 
 static void omap_dm_timer_wait_for_reset(struct omap_dm_timer *timer)
@@ -217,17 +315,23 @@ static void omap_dm_timer_reset(struct omap_dm_timer *timer)
 	}
 	omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
-	/* Set to smart-idle mode */
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_OCP_CFG_REG);
-	l |= 0x02 << 3;
-
-	if (cpu_class_is_omap2() && timer == &dm_timers[0]) {
-		/* Enable wake-up only for GPT1 on OMAP2 CPUs*/
+	l |= 0x02 << 3;  /* Set to smart-idle mode */
+	l |= 0x2 << 8;   /* Set clock activity to perserve f-clock on idle */
+
+	/*
+	 * Enable wake-up only for GPT1 on OMAP2 CPUs.
+	 * FIXME: All timers should have wake-up enabled and clear
+	 * PRCM status.
+	 */
+	if (cpu_class_is_omap2() && (timer == &dm_timers[0]))
 		l |= 1 << 2;
-		/* Non-posted mode */
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG, 0);
-	}
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_OCP_CFG_REG, l);
+
+	/* Match hardware reset default of posted mode */
+	omap_dm_timer_write_reg(timer, OMAP_TIMER_IF_CTRL_REG,
+			OMAP_TIMER_CTRL_POSTED);
+	timer->posted = 1;
 }
 
 static void omap_dm_timer_prepare(struct omap_dm_timer *timer)
@@ -434,6 +538,11 @@ void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 		l &= ~OMAP_TIMER_CTRL_AR;
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+
+	/* REVISIT: hw feature, ttgr overtaking tldr? */
+	while (readl(timer->io_base + (OMAP_TIMER_WRITE_PEND_REG & 0xff)))
+		cpu_relax();
+
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] dmtimer posting
  2008-03-20 11:57                   ` Tony Lindgren
@ 2008-03-20 12:19                     ` Tony Lindgren
  2008-03-21  0:13                       ` Woodruff, Richard
  2008-03-21  0:01                     ` Woodruff, Richard
  1 sibling, 1 reply; 26+ messages in thread
From: Tony Lindgren @ 2008-03-20 12:19 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Kevin Hilman, linux-omap

[-- Attachment #1: Type: text/plain, Size: 903 bytes --]

* Tony Lindgren <tony@atomide.com> [080320 13:58]:
> * Woodruff, Richard <r-woodruff2@ti.com> [080320 00:14]:
> > 
> > One note is some more optimization could happen later on with
> > reordering.  The current usage in gptimer has dmtimer_set_load()
> > followed by a dm_timer_start().  This causes a read/write/read/write of
> > the CTRL register.  Those writes could be collapsed into each other.
> > With a load_start().  Others maybe elsewhere.
> 
> OK. We might also want to optimize the timer reload function. I'll post
> a separate patch on that to experiment with.

Here's an experimental patch that attempts to optimize the timer
reloading for gp_timer0. I don't know if this improves the latency
or performance, but might be worth testing.

I guess there's no way to update the timer without having to write
TCLR to (re)start it?

If the patch helps, then we can clean it up a bit more.

Tony

[-- Attachment #2: gptimer0-reload.patch --]
[-- Type: text/x-diff, Size: 3288 bytes --]

commit 25146a33220bb264f06dd61e407435d16c371288
Author: Tony Lindgren <tony@atomide.com>
Date:   Thu Mar 20 13:47:23 2008 +0200

    ARM: OMAP: Optimize timer reload by bypassing omap_gp_timer_set_next_event()
    
    This patch optimizes the timer reprogramming that happens during every
    timer interrupt.
    
    Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 78d05f2..7805fa2 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -65,6 +65,58 @@ static int omap2_gp_timer_set_next_event(unsigned long cycles,
 	return 0;
 }
 
+#define OMAP2420_GP_TIMER0_BASE		0x48028000
+#define OMAP2430_GP_TIMER0_BASE		0x49018000
+#define OMAP34XX_GP_TIMER0_BASE		0x48318000
+#define GP_TIMER_TCLR			0x24
+#define GP_TIMER_TCRR			0x28
+#define GP_TIMER_TWPS			0x34
+#define		TWPS_MASK		0x3	/* Check for TCLR and TCRR */
+
+/*
+ * Reloads gp_timer0 value. Assumes that gp_timer0 has be set into posted mode
+ * during init. Bypassess the gp_timer functions to optimize timer reloading
+ * during timer interrupts.
+ */
+#define GP_TIMER0_RELOAD(cycles, base)						\
+{										\
+	while ((__raw_readl(IO_ADDRESS((base) + GP_TIMER_TWPS)) & TWPS_MASK))	\
+		cpu_relax();							\
+	__raw_writel(0xffffffff - (cycles),					\
+			IO_ADDRESS((base) + GP_TIMER_TCRR));			\
+	__raw_writel(0x3, IO_ADDRESS((base) + GP_TIMER_TCLR));			\
+}
+
+#ifdef CONFIG_ARCH_OMAP24XX
+static int omap2420_gp_timer0_reload(unsigned long cycles,
+					struct clock_event_device *evt)
+{
+	GP_TIMER0_RELOAD(cycles, OMAP2420_GP_TIMER0_BASE);
+	return 0;
+}
+
+static int omap2430_gp_timer0_reload(unsigned long cycles,
+					struct clock_event_device *evt)
+{
+	GP_TIMER0_RELOAD(cycles, OMAP2430_GP_TIMER0_BASE);
+	return 0;
+}
+#else
+#define omap2420_gp_timer0_reload	NULL
+#define omap2430_gp_timer0_reload	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP34XX
+static int omap34xx_gp_timer0_reload(unsigned long cycles,
+					struct clock_event_device *evt)
+{
+	GP_TIMER0_RELOAD(cycles, OMAP34XX_GP_TIMER0_BASE);
+	return 0;
+}
+#else
+#define omap34xx_gp_timer0_reload	NULL
+#endif
+
 static void omap2_gp_timer_set_mode(enum clock_event_mode mode,
 				    struct clock_event_device *evt)
 {
@@ -93,7 +145,7 @@ static struct clock_event_device clockevent_gpt = {
 	.name		= "gp timer",
 	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
-	.set_next_event	= omap2_gp_timer_set_next_event,
+	.set_next_event	= omap2_gp_timer_set_next_event, /* Init can rewrite */
 	.set_mode	= omap2_gp_timer_set_mode,
 };
 
@@ -111,6 +163,15 @@ static void __init omap2_gp_clockevent_init(void)
 #endif
 	tick_rate = clk_get_rate(omap_dm_timer_get_fclk(gptimer));
 
+	if (cpu_is_omap2420())
+		clockevent_gpt.set_next_event = omap2420_gp_timer0_reload;
+	else if (cpu_is_omap2430())
+		clockevent_gpt.set_next_event = omap2430_gp_timer0_reload;
+	else if (cpu_is_omap34xx())
+		clockevent_gpt.set_next_event = omap34xx_gp_timer0_reload;
+	else
+		clockevent_gpt.set_next_event = omap2_gp_timer_set_next_event;
+
 	omap2_gp_timer_irq.dev_id = (void *)gptimer;
 	setup_irq(omap_dm_timer_get_irq(gptimer), &omap2_gp_timer_irq);
 	omap_dm_timer_set_int_enable(gptimer, OMAP_TIMER_INT_OVERFLOW);

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [PATCH] dmtimer posting
  2008-03-20 11:57                   ` Tony Lindgren
  2008-03-20 12:19                     ` Tony Lindgren
@ 2008-03-21  0:01                     ` Woodruff, Richard
  2008-03-31 10:49                       ` Tony Lindgren
  1 sibling, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-21  0:01 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

Hi

> Attached is a rev-4 with the cpu_relax() added to while loops as
> suggested by Ladislav, and CTRL register defines moved to the right
> location.

I confirm this version of the patch works on 3430 with latest kernel.

I don't have by break out board for trace to get time stamps right now.
Next week I can get that back or I can pass a kernel off to someone else
like I was doing before.  Maybe Kevin can also make a test run on it.

If you just look the instructions trace from ETB alone, the load
function shows up here but not in the older version.  Reason for that is
the long stalls are not accounted for just the number of loops, and in
non-posted mode there is nothing to loop on, just wait.  When you hook
up ETM, it will correlate with an external time base then you get the
actual time, and its here where you see it stalled out for a long time.

Seems like its good to commit.  Maybe someone could make sure a 1710
works.

Regards,
Richard W.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH] dmtimer posting
  2008-03-20 12:19                     ` Tony Lindgren
@ 2008-03-21  0:13                       ` Woodruff, Richard
  0 siblings, 0 replies; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-21  0:13 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

Hi,
 
> * Tony Lindgren <tony@atomide.com> [080320 13:58]:
> > * Woodruff, Richard <r-woodruff2@ti.com> [080320 00:14]:
> > >
> > > One note is some more optimization could happen later on with
> > > reordering.  The current usage in gptimer has dmtimer_set_load()
> > > followed by a dm_timer_start().  This causes a
read/write/read/write
> of
> > > the CTRL register.  Those writes could be collapsed into each
other.
> > > With a load_start().  Others maybe elsewhere.
> >
> > OK. We might also want to optimize the timer reload function. I'll
post
> > a separate patch on that to experiment with.
> 
> Here's an experimental patch that attempts to optimize the timer
> reloading for gp_timer0. I don't know if this improves the latency
> or performance, but might be worth testing.
> 
> I guess there's no way to update the timer without having to write
> TCLR to (re)start it?

If its been stopped you need to write.

> If the patch helps, then we can clean it up a bit more.

This seems good. It does boot and suppress ticks and keep time on my
3430-ES2.1 SDP.

With ETB trace I can't even see it show up in profiling.  As I said in
the last thread I don't have ETM right now to get the real external
time.  But as it is written you get full posting and little need to
synchronize after the write.  If this works reliably it should remove
the overhead completely.

Question: don't you think you need to also update the TLDR in
GP_TIMER0_RELOAD?  Your patch directly writes to the count register, and
forces auto-reload mode in the TLCR.  If the wrong value is in the TLDR
when it overflows you could be here for a while.  Skipping the trigger
write to WTGR for the kick off is ok as is done.

If its in autoreload mode, I'm not sure if it will always call back to
this call.  Perhaps only when changing modes?  I've not so familiar with
the flow above this.  When I look in the debugger, it does have expected
values in all registers (including TLDR) but I'm not sure of the
ordering of the set.

+#define GP_TIMER0_RELOAD(cycles, base)
\
+{
\
+	while ((__raw_readl(IO_ADDRESS((base) + GP_TIMER_TWPS)) &
TWPS_MASK))	\
+		cpu_relax();
\
+	__raw_writel(0xffffffff - (cycles),
\
+			IO_ADDRESS((base) + GP_TIMER_TCRR));
\
+	__raw_writel(0x3, IO_ADDRESS((base) + GP_TIMER_TCLR));
\
+}
+

I'll try a quick back port to CDP kernel and see the effect.  The
previous patch reduced load and saved a few mA at run time which was
good.  This might get another couple mA and MHz back.

Regards,
Richard W.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] dmtimer posting
  2008-03-21  0:01                     ` Woodruff, Richard
@ 2008-03-31 10:49                       ` Tony Lindgren
  2008-03-31 12:18                         ` Woodruff, Richard
  0 siblings, 1 reply; 26+ messages in thread
From: Tony Lindgren @ 2008-03-31 10:49 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Kevin Hilman, linux-omap

* Woodruff, Richard <r-woodruff2@ti.com> [080321 02:01]:
> Hi
> 
> > Attached is a rev-4 with the cpu_relax() added to while loops as
> > suggested by Ladislav, and CTRL register defines moved to the right
> > location.
> 
> I confirm this version of the patch works on 3430 with latest kernel.

I'll push the rev-4 version of the patch today. The second patch needs
a bit more work still.

Tony


> I don't have by break out board for trace to get time stamps right now.
> Next week I can get that back or I can pass a kernel off to someone else
> like I was doing before.  Maybe Kevin can also make a test run on it.
> 
> If you just look the instructions trace from ETB alone, the load
> function shows up here but not in the older version.  Reason for that is
> the long stalls are not accounted for just the number of loops, and in
> non-posted mode there is nothing to loop on, just wait.  When you hook
> up ETM, it will correlate with an external time base then you get the
> actual time, and its here where you see it stalled out for a long time.
> 
> Seems like its good to commit.  Maybe someone could make sure a 1710
> works.
> 
> Regards,
> Richard W.
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH] dmtimer posting
  2008-03-31 10:49                       ` Tony Lindgren
@ 2008-03-31 12:18                         ` Woodruff, Richard
  2008-04-02  7:23                           ` Tony Lindgren
  0 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-03-31 12:18 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

> > > Attached is a rev-4 with the cpu_relax() added to while loops as
> > > suggested by Ladislav, and CTRL register defines moved to the
right
> > > location.
> >
> > I confirm this version of the patch works on 3430 with latest
kernel.
> 
> I'll push the rev-4 version of the patch today. The second patch needs
> a bit more work still.

Great.  

I've been testing on a few systems with a functional variant of the 1st
+ 2nd patch back ported to older TI CDP kernels.  So far I've only ran
into positive side effects.  No noticeable load, pass normal functional
tests, and still able to hit all low power states.

In that variant I just added a load_start() function in addition to the
load function and converted all the in tree callers to use this as they
all called load() then start() in order anyway.  This left the API
behavior the same but optimized all current users.

Regards,
Richard W.

I did get my ETM adaptor board back so I have been able to verify
directly.  ETM is cool in that you can get (depending on filtering)
almost a 60 second complete capture of instruction execution during some
use case.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] dmtimer posting
  2008-03-31 12:18                         ` Woodruff, Richard
@ 2008-04-02  7:23                           ` Tony Lindgren
  2008-04-04  4:50                             ` Woodruff, Richard
                                               ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Tony Lindgren @ 2008-04-02  7:23 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: Kevin Hilman, linux-omap

* Woodruff, Richard <r-woodruff2@ti.com> [080331 15:18]:
> > > > Attached is a rev-4 with the cpu_relax() added to while loops as
> > > > suggested by Ladislav, and CTRL register defines moved to the
> right
> > > > location.
> > >
> > > I confirm this version of the patch works on 3430 with latest
> kernel.
> > 
> > I'll push the rev-4 version of the patch today. The second patch needs
> > a bit more work still.
> 
> Great.  
> 
> I've been testing on a few systems with a functional variant of the 1st
> + 2nd patch back ported to older TI CDP kernels.  So far I've only ran
> into positive side effects.  No noticeable load, pass normal functional
> tests, and still able to hit all low power states.
> 
> In that variant I just added a load_start() function in addition to the
> load function and converted all the in tree callers to use this as they
> all called load() then start() in order anyway.  This left the API
> behavior the same but optimized all current users.

Can you send a patch against l-o tree for the load_start()? That might
be optimized enough :)

Tony


> Regards,
> Richard W.
> 
> I did get my ETM adaptor board back so I have been able to verify
> directly.  ETM is cool in that you can get (depending on filtering)
> almost a 60 second complete capture of instruction execution during some
> use case.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH] dmtimer posting
  2008-04-02  7:23                           ` Tony Lindgren
@ 2008-04-04  4:50                             ` Woodruff, Richard
  2008-04-04 20:03                             ` [PATCH] timer optimization part 2 Woodruff, Richard
       [not found]                             ` <49DA3A9F04A0E5498FF06EFCC343DCF90203DA56FF@dlee13.ent.ti.com>
  2 siblings, 0 replies; 26+ messages in thread
From: Woodruff, Richard @ 2008-04-04  4:50 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

> >
> > In that variant I just added a load_start() function in addition to
the
> > load function and converted all the in tree callers to use this as
they
> > all called load() then start() in order anyway.  This left the API
> > behavior the same but optimized all current users.
> 
> Can you send a patch against l-o tree for the load_start()? That might
> be optimized enough :)

Sure, I'll see about it hopefully tomorrow.

Regards,
Richard W.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] timer optimization part 2.
  2008-04-02  7:23                           ` Tony Lindgren
  2008-04-04  4:50                             ` Woodruff, Richard
@ 2008-04-04 20:03                             ` Woodruff, Richard
  2008-04-04 20:23                               ` Idle picture for those interested Woodruff, Richard
       [not found]                             ` <49DA3A9F04A0E5498FF06EFCC343DCF90203DA56FF@dlee13.ent.ti.com>
  2 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-04-04 20:03 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: Kevin Hilman, linux-omap

[-- Attachment #1: Type: text/plain, Size: 1222 bytes --]

Hi,

This patch adds a new function timer_load_start() which optimizes the
system timer. In tree users have been updated to use this function.

This was tested and working on a pull as of 4 April 08 on
2.6.25-rc8-omap1-04408-g722c80b on 3430SDP.  A previous version has been
working with out issue for a while now.

* Before patch register writes are taking up .078% @ 500MHz.

 Address                 |total  |min  |max      |avr     |count|ratio%
 old\process\default_idle|7.369s |0.0us|999.902ms|14.477ms|509. |62.661%
 ld\Global\cpu_v7_do_idle|4.265s |0.0us|375.786ms|24.374ms|175. |36.270%
                (UNKNOWN)|17.503ms|0.us|531.080us|5.119us|3419. |0.148%
 r\omap_dm_timer_set_load|8.135ms|0.0us|79.887us|15.065us|540.  |0.069%
<--
 \vmlinux-old\Global\_end|2.023ms|0.0us|4.000us|0.560us|3613.   |0.017%
 -old\Global\__raw_readsw|1.962ms|0.0us|108.610us|9.167us|214.  |0.016%
 old\smc91x\smc_interrupt|1.353ms|0.0us|10.212us|2.348us|576.   |0.011%
 s/namei\__link_path_walk|1.161ms|0.0us|4.310us|0.762us|  1524. |0.009%
 \omap_dm_timer_write_reg|1.085ms|0.0us|126.150us|2.153us|504.  |0.009%
<--

* After patch timer functions do not show up in top listings.

Regards,
Richard W.


[-- Attachment #2: timer-optimize.diff --]
[-- Type: application/octet-stream, Size: 4453 bytes --]

This patch optimizes the timer load and start sequence.  By combining the
load and start a needless posted wait can be removed from the system timer
execution path.

* Before patch register writes are taking up .078% @ 500MHz during idle.

 Address                 |total  |min  |max      |avr     |count|ratio%
 old\process\default_idle|7.369s |0.0us|999.902ms|14.477ms|509. |62.661%
 ld\Global\cpu_v7_do_idle|4.265s |0.0us|375.786ms|24.374ms|175. |36.270%
                (UNKNOWN)|17.503ms|0.us|531.080us|5.119us|3419. |0.148%
 r\omap_dm_timer_set_load|8.135ms|0.0us|79.887us|15.065us|540.  |0.069% <--
 \vmlinux-old\Global\_end|2.023ms|0.0us|4.000us|0.560us|3613.   |0.017%
 -old\Global\__raw_readsw|1.962ms|0.0us|108.610us|9.167us|214.  |0.016%
 old\smc91x\smc_interrupt|1.353ms|0.0us|10.212us|2.348us|576.   |0.011%
 s/namei\__link_path_walk|1.161ms|0.0us|4.310us|0.762us|  1524. |0.009%
 \omap_dm_timer_write_reg|1.085ms|0.0us|126.150us|2.153us|504.  |0.009% <--

* After patch timer functions do not show up in top listings for long captures.

Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>

diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 78d05f2..557603f 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -59,8 +59,7 @@ static struct irqaction omap2_gp_timer_irq = {
 static int omap2_gp_timer_set_next_event(unsigned long cycles,
 					 struct clock_event_device *evt)
 {
-	omap_dm_timer_set_load(gptimer, 0, 0xffffffff - cycles);
-	omap_dm_timer_start(gptimer);
+	omap_dm_timer_set_load_start(gptimer, 0, 0xffffffff - cycles);
 
 	return 0;
 }
@@ -77,8 +76,7 @@ static void omap2_gp_timer_set_mode(enum clock_event_mode mode,
 		period = clk_get_rate(omap_dm_timer_get_fclk(gptimer)) / HZ;
 		period -= 1;
 
-		omap_dm_timer_set_load(gptimer, 1, 0xffffffff - period);
-		omap_dm_timer_start(gptimer);
+		omap_dm_timer_set_load_start(gptimer, 1, 0xffffffff - period);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
 		break;
@@ -172,8 +170,7 @@ static void __init omap2_gp_clocksource_init(void)
 	tick_rate = clk_get_rate(omap_dm_timer_get_fclk(gpt));
 	tick_period = (tick_rate / HZ) - 1;
 
-	omap_dm_timer_set_load(gpt, 1, 0);
-	omap_dm_timer_start(gpt);
+	omap_dm_timer_set_load_start(gpt, 1, 0);
 
 	clocksource_gpt.mult =
 		clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift);
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 822b6bb..f22506a 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -546,6 +546,24 @@ void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
 }
 
+/* Optimized set_load which removes costly spin wait in timer_start */
+void omap_dm_timer_set_load_start(struct omap_dm_timer *timer, int autoreload,
+                            unsigned int load)
+{
+	u32 l;
+
+	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
+	if (autoreload)
+		l |= OMAP_TIMER_CTRL_AR;
+	else
+		l &= ~OMAP_TIMER_CTRL_AR;
+	l |= OMAP_TIMER_CTRL_ST;
+
+	omap_dm_timer_write_reg(timer, OMAP_TIMER_COUNTER_REG, load);
+	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
+}
+
 void omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable,
 			     unsigned int match)
 {
@@ -560,7 +578,6 @@ void omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable,
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_MATCH_REG, match);
 }
 
-
 void omap_dm_timer_set_pwm(struct omap_dm_timer *timer, int def_on,
 			   int toggle, int trigger)
 {
diff --git a/include/asm-arm/arch-omap/dmtimer.h b/include/asm-arm/arch-omap/dmtimer.h
index fefb276..02b29e8 100644
--- a/include/asm-arm/arch-omap/dmtimer.h
+++ b/include/asm-arm/arch-omap/dmtimer.h
@@ -66,6 +66,7 @@ void omap_dm_timer_stop(struct omap_dm_timer *timer);
 
 void omap_dm_timer_set_source(struct omap_dm_timer *timer, int source);
 void omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload, unsigned int value);
+void omap_dm_timer_set_load_start(struct omap_dm_timer *timer, int autoreload, unsigned int value);
 void omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable, unsigned int match);
 void omap_dm_timer_set_pwm(struct omap_dm_timer *timer, int def_on, int toggle, int trigger);
 void omap_dm_timer_set_prescaler(struct omap_dm_timer *timer, int prescaler);

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Idle picture for those interested.
  2008-04-04 20:03                             ` [PATCH] timer optimization part 2 Woodruff, Richard
@ 2008-04-04 20:23                               ` Woodruff, Richard
  2008-04-04 20:56                                 ` David Brownell
  0 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-04-04 20:23 UTC (permalink / raw)
  To: linux-omap

[-- Attachment #1: Type: text/plain, Size: 274 bytes --]

Hi,

For any interested I'm posting a small snippet of what an ARM-OMAP idle
looks like.  Having a nice tool can go a long ways.  I'm not sure if the
picture will make it, but it's worth a try.  Knowing what is out there
for tools is useful.

Regards,
Richard W.


[-- Attachment #2: idle_trace.PNG --]
[-- Type: image/png, Size: 58498 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Idle picture for those interested.
  2008-04-04 20:23                               ` Idle picture for those interested Woodruff, Richard
@ 2008-04-04 20:56                                 ` David Brownell
  2008-04-04 21:45                                   ` Woodruff, Richard
  2008-04-04 21:56                                   ` Woodruff, Richard
  0 siblings, 2 replies; 26+ messages in thread
From: David Brownell @ 2008-04-04 20:56 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: linux-omap

On Friday 04 April 2008, Woodruff, Richard wrote:
> For any interested I'm posting a small snippet of what an ARM-OMAP idle
> looks like.  Having a nice tool can go a long ways.  I'm not sure if the
> picture will make it, but it's worth a try.  Knowing what is out there
> for tools is useful.

Presumably this is from the CPU trace port, OMAP3 with NO_HZ.

Do I read this right:  18 usec tick processing overhead after
receiving an IRQ, before even getting to the generic OMAP GPIO
irq logic (which then has to dispatch to the real IRQ handler)?
Over 5 usec just for updating jiffies64 ...


--
To unsubscribe from this list: send the line "unsubscribe linux-omap" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: Idle picture for those interested.
  2008-04-04 20:56                                 ` David Brownell
@ 2008-04-04 21:45                                   ` Woodruff, Richard
  2008-04-04 21:56                                   ` Woodruff, Richard
  1 sibling, 0 replies; 26+ messages in thread
From: Woodruff, Richard @ 2008-04-04 21:45 UTC (permalink / raw)
  To: David Brownell; +Cc: linux-omap

> Presumably this is from the CPU trace port, OMAP3 with NO_HZ.

Yes. Just using git-latest kernel and NO_HZ.  You can get the same data
on OMAP2 also through ETM.

I don't see much profiling on the 'live' tree here.  Internal trees at
TI generally get looked at more critically.  It might be interesting to
compare.

> Do I read this right:  18 usec tick processing overhead after
> receiving an IRQ, before even getting to the generic OMAP GPIO
> irq logic (which then has to dispatch to the real IRQ handler)?
> Over 5 usec just for updating jiffies64 ...

I've not calibrated to verify but that seems to be what the picture is
showing so there is some chance the time base is off.  All the
instruction trace is there to walk it if necessary.  There have been
some comments about NO_HZ adding some overhead from those who measure
MHz around here for things like MP3 load models.  I hadn't looked very
closely.

I suppose this might be an ether interrupt.  If it were just a timer
interrupt it shouldn't need to make its way to the GPIO handler.

Trace does give a unique view into the system which is hard to get
otherwise.

Regards,
Richard W.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: Idle picture for those interested.
  2008-04-04 20:56                                 ` David Brownell
  2008-04-04 21:45                                   ` Woodruff, Richard
@ 2008-04-04 21:56                                   ` Woodruff, Richard
  2008-04-04 22:07                                     ` David Brownell
  1 sibling, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-04-04 21:56 UTC (permalink / raw)
  To: David Brownell; +Cc: linux-omap

> I've not calibrated to verify but that seems to be what the picture is
> showing so there is some chance the time base is off.  All the
instruction
> trace is there to walk it if necessary.  There have been some comments
> about NO_HZ adding some overhead from those who measure MHz around
here
> for things like MP3 load models.  I hadn't looked very closely.

One question might be is if the timekeeping could be deferred until
after you have serviced the IRQ.  Is there a reason to make the ISR wait
for housekeeping?

Regards,
Richard W.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Idle picture for those interested.
  2008-04-04 21:56                                   ` Woodruff, Richard
@ 2008-04-04 22:07                                     ` David Brownell
  0 siblings, 0 replies; 26+ messages in thread
From: David Brownell @ 2008-04-04 22:07 UTC (permalink / raw)
  To: r-woodruff2; +Cc: linux-omap

> > I've not calibrated to verify but that seems to be what the picture is
> > showing so there is some chance the time base is off.  All the
> > instruction
> > trace is there to walk it if necessary.  There have been some comments
> > about NO_HZ adding some overhead from those who measure MHz around
> > here for things like MP3 load models.  I hadn't looked very closely.
>
> One question might be is if the timekeeping could be deferred until
> after you have serviced the IRQ.  Is there a reason to make the ISR wait
> for housekeeping?

If the system is in NO_HZ mode, the "jiffies" value could be invalid.
Some IRQ handlers need it to be valid.

- Dave


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH] timer optimization part 2.
       [not found]                             ` <49DA3A9F04A0E5498FF06EFCC343DCF90203DA56FF@dlee13.ent.ti.com>
@ 2008-05-07 23:46                               ` Woodruff, Richard
  2008-05-08 17:40                                 ` Tony Lindgren
  0 siblings, 1 reply; 26+ messages in thread
From: Woodruff, Richard @ 2008-05-07 23:46 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: linux-omap

So what about this?

> -----Original Message-----
> From: Woodruff, Richard
> Sent: Friday, April 04, 2008 3:04 PM
> To: Tony Lindgren
> Cc: Kevin Hilman; linux-omap@vger.kernel.org
> Subject: [PATCH] timer optimization part 2.
> 
> Hi,
> 
> This patch adds a new function timer_load_start() which optimizes the
> system timer. In tree users have been updated to use this function.
> 
> This was tested and working on a pull as of 4 April 08 on 2.6.25-rc8-
> omap1-04408-g722c80b on 3430SDP.  A previous version has been working
> with out issue for a while now.
> 
> * Before patch register writes are taking up .078% @ 500MHz.
> 
>  Address                 |total  |min  |max      |avr
|count|ratio%
>  old\process\default_idle|7.369s |0.0us|999.902ms|14.477ms|509.
|62.661%
>  ld\Global\cpu_v7_do_idle|4.265s |0.0us|375.786ms|24.374ms|175.
|36.270%
>                 (UNKNOWN)|17.503ms|0.us|531.080us|5.119us|3419.
|0.148%
>  r\omap_dm_timer_set_load|8.135ms|0.0us|79.887us|15.065us|540.
|0.069%
> <--
>  \vmlinux-old\Global\_end|2.023ms|0.0us|4.000us|0.560us|3613.
|0.017%
>  -old\Global\__raw_readsw|1.962ms|0.0us|108.610us|9.167us|214.
|0.016%
>  old\smc91x\smc_interrupt|1.353ms|0.0us|10.212us|2.348us|576.
|0.011%
>  s/namei\__link_path_walk|1.161ms|0.0us|4.310us|0.762us|  1524.
|0.009%
>  \omap_dm_timer_write_reg|1.085ms|0.0us|126.150us|2.153us|504.
|0.009%
> <--
> 
> * After patch timer functions do not show up in top listings.
> 
> Regards,
> Richard W.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] timer optimization part 2.
  2008-05-07 23:46                               ` [PATCH] timer optimization part 2 Woodruff, Richard
@ 2008-05-08 17:40                                 ` Tony Lindgren
  0 siblings, 0 replies; 26+ messages in thread
From: Tony Lindgren @ 2008-05-08 17:40 UTC (permalink / raw)
  To: Woodruff, Richard; +Cc: linux-omap

* Woodruff, Richard <r-woodruff2@ti.com> [080507 16:47]:
> So what about this?

Yeah, looks good, I'll push it today.

Tony

> > -----Original Message-----
> > From: Woodruff, Richard
> > Sent: Friday, April 04, 2008 3:04 PM
> > To: Tony Lindgren
> > Cc: Kevin Hilman; linux-omap@vger.kernel.org
> > Subject: [PATCH] timer optimization part 2.
> > 
> > Hi,
> > 
> > This patch adds a new function timer_load_start() which optimizes the
> > system timer. In tree users have been updated to use this function.
> > 
> > This was tested and working on a pull as of 4 April 08 on 2.6.25-rc8-
> > omap1-04408-g722c80b on 3430SDP.  A previous version has been working
> > with out issue for a while now.
> > 
> > * Before patch register writes are taking up .078% @ 500MHz.
> > 
> >  Address                 |total  |min  |max      |avr
> |count|ratio%
> >  old\process\default_idle|7.369s |0.0us|999.902ms|14.477ms|509.
> |62.661%
> >  ld\Global\cpu_v7_do_idle|4.265s |0.0us|375.786ms|24.374ms|175.
> |36.270%
> >                 (UNKNOWN)|17.503ms|0.us|531.080us|5.119us|3419.
> |0.148%
> >  r\omap_dm_timer_set_load|8.135ms|0.0us|79.887us|15.065us|540.
> |0.069%
> > <--
> >  \vmlinux-old\Global\_end|2.023ms|0.0us|4.000us|0.560us|3613.
> |0.017%
> >  -old\Global\__raw_readsw|1.962ms|0.0us|108.610us|9.167us|214.
> |0.016%
> >  old\smc91x\smc_interrupt|1.353ms|0.0us|10.212us|2.348us|576.
> |0.011%
> >  s/namei\__link_path_walk|1.161ms|0.0us|4.310us|0.762us|  1524.
> |0.009%
> >  \omap_dm_timer_write_reg|1.085ms|0.0us|126.150us|2.153us|504.
> |0.009%
> > <--
> > 
> > * After patch timer functions do not show up in top listings.
> > 
> > Regards,
> > Richard W.
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-05-08 17:40 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <ae36f8040803061301y6e0823a8pbdf0b6f0b47e5639@mail.gmail.com>
     [not found] ` <3B6D69C3A9EBCA4BA5DA60D91302742903AFC880@dlee13.ent.ti.com>
     [not found]   ` <20080307071454.GC7635@atomide.com>
2008-03-15  0:24     ` [PATCH] [RFC] dmtimer library is very inefficient today Woodruff, Richard
2008-03-18 18:33       ` Kevin Hilman
2008-03-18 18:51         ` Woodruff, Richard
2008-03-19  2:40         ` [PATCH] dmtimer posting Woodruff, Richard
2008-03-19 14:15           ` Tony Lindgren
2008-03-19 14:47             ` Woodruff, Richard
2008-03-19 15:58               ` Tony Lindgren
2008-03-19 22:13                 ` Woodruff, Richard
2008-03-20 11:57                   ` Tony Lindgren
2008-03-20 12:19                     ` Tony Lindgren
2008-03-21  0:13                       ` Woodruff, Richard
2008-03-21  0:01                     ` Woodruff, Richard
2008-03-31 10:49                       ` Tony Lindgren
2008-03-31 12:18                         ` Woodruff, Richard
2008-04-02  7:23                           ` Tony Lindgren
2008-04-04  4:50                             ` Woodruff, Richard
2008-04-04 20:03                             ` [PATCH] timer optimization part 2 Woodruff, Richard
2008-04-04 20:23                               ` Idle picture for those interested Woodruff, Richard
2008-04-04 20:56                                 ` David Brownell
2008-04-04 21:45                                   ` Woodruff, Richard
2008-04-04 21:56                                   ` Woodruff, Richard
2008-04-04 22:07                                     ` David Brownell
     [not found]                             ` <49DA3A9F04A0E5498FF06EFCC343DCF90203DA56FF@dlee13.ent.ti.com>
2008-05-07 23:46                               ` [PATCH] timer optimization part 2 Woodruff, Richard
2008-05-08 17:40                                 ` Tony Lindgren
2008-03-19 19:52         ` [PATCH] [RFC] dmtimer library is very inefficient today Ladislav Michl
2008-03-20  8:58           ` Tony Lindgren

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox