* [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling
@ 2014-11-19 14:40 Arnd Bergmann
2014-11-19 14:41 ` [PATCH 2/2] ARM: cache-l2x0: optimize aurora range operations Arnd Bergmann
2014-11-19 14:50 ` [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Russell King - ARM Linux
0 siblings, 2 replies; 5+ messages in thread
From: Arnd Bergmann @ 2014-11-19 14:40 UTC (permalink / raw)
To: linux-arm-kernel
The aurora cache controller is the only remaining user of a couple
of functions in this file and are completely unused when that is
disabled, leading to build warnings:
arch/arm/mm/cache-l2x0.c:167:13: warning: 'l2x0_cache_sync' defined but not used [-Wunused-function]
arch/arm/mm/cache-l2x0.c:184:13: warning: 'l2x0_flush_all' defined but not used [-Wunused-function]
arch/arm/mm/cache-l2x0.c:194:13: warning: 'l2x0_disable' defined but not used [-Wunused-function]
With the knowledge that the code is now aurora-specific, we can
simplify it noticeably:
- The pl310 errata workarounds are not needed on aurora and can be removed
- The cache_wait() macro is never needed since this is not an l210/l220
- aurora_pa_range can keep the spinlock while syncing the cache
- We can load the l2x0_base into a local variable across operations
There should be no functional change in this patch, but readability
and the generated object code improves, along with avoiding the
warnings.
Acked-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
(on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
DMA traffic by reading data from a SD card)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
I still had this in my old patch queue, will add it to the patch tracker
unless there are any other concerns.
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 5e65ca8dea62..c61f4151162f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -136,73 +136,6 @@ static void l2c_disable(void)
dsb(st);
}
-#ifdef CONFIG_CACHE_PL310
-static inline void cache_wait(void __iomem *reg, unsigned long mask)
-{
- /* cache operations by line are atomic on PL310 */
-}
-#else
-#define cache_wait l2c_wait_mask
-#endif
-
-static inline void cache_sync(void)
-{
- void __iomem *base = l2x0_base;
-
- writel_relaxed(0, base + sync_reg_offset);
- cache_wait(base + L2X0_CACHE_SYNC, 1);
-}
-
-#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
-static inline void debug_writel(unsigned long val)
-{
- l2c_set_debug(l2x0_base, val);
-}
-#else
-/* Optimised out for non-errata case */
-static inline void debug_writel(unsigned long val)
-{
-}
-#endif
-
-static void l2x0_cache_sync(void)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&l2x0_lock, flags);
- cache_sync();
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void __l2x0_flush_all(void)
-{
- debug_writel(0x03);
- __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
- cache_sync();
- debug_writel(0x00);
-}
-
-static void l2x0_flush_all(void)
-{
- unsigned long flags;
-
- /* clean all ways */
- raw_spin_lock_irqsave(&l2x0_lock, flags);
- __l2x0_flush_all();
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_disable(void)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&l2x0_lock, flags);
- __l2x0_flush_all();
- l2c_write_sec(0, l2x0_base, L2X0_CTRL);
- dsb(st);
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
static void l2c_save(void __iomem *base)
{
l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
@@ -1257,14 +1190,14 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
static void aurora_pa_range(unsigned long start, unsigned long end,
unsigned long offset)
{
+ void __iomem *base = l2x0_base;
unsigned long flags;
raw_spin_lock_irqsave(&l2x0_lock, flags);
- writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
- writel_relaxed(end, l2x0_base + offset);
+ writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+ writel_relaxed(end, base + offset);
+ writel_relaxed(0, base + AURORA_SYNC_REG);
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-
- cache_sync();
}
static void aurora_inv_range(unsigned long start, unsigned long end)
@@ -1324,6 +1257,40 @@ static void aurora_flush_range(unsigned long start, unsigned long end)
}
}
+static void aurora_flush_all(void)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
+ /* clean all ways */
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+ writel_relaxed(0, base + AURORA_SYNC_REG);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void aurora_cache_sync(void)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ writel_relaxed(0, l2x0_base + AURORA_SYNC_REG);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void aurora_disable(void)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+ writel_relaxed(0, base + AURORA_SYNC_REG);
+ l2c_write_sec(0, base, L2X0_CTRL);
+ dsb(st);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
static void aurora_save(void __iomem *base)
{
l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
@@ -1398,9 +1365,9 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
.inv_range = aurora_inv_range,
.clean_range = aurora_clean_range,
.flush_range = aurora_flush_range,
- .flush_all = l2x0_flush_all,
- .disable = l2x0_disable,
- .sync = l2x0_cache_sync,
+ .flush_all = aurora_flush_all,
+ .disable = aurora_disable,
+ .sync = aurora_cache_sync,
.resume = aurora_resume,
},
};
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] ARM: cache-l2x0: optimize aurora range operations
2014-11-19 14:40 [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Arnd Bergmann
@ 2014-11-19 14:41 ` Arnd Bergmann
2014-11-19 14:50 ` [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Russell King - ARM Linux
1 sibling, 0 replies; 5+ messages in thread
From: Arnd Bergmann @ 2014-11-19 14:41 UTC (permalink / raw)
To: linux-arm-kernel
The aurora_inv_range(), aurora_clean_range() and aurora_flush_range()
functions are highly redundant, both in source and in object code, and
they are harder to understand than necessary.
By moving the range loop into the aurora_pa_range() function, they
become trivial wrappers, and the object code start looking like what
one would expect for an optimal implementation.
Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
(on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
DMA traffic by reading data from a SD card)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c61f4151162f..737426b6bba6 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1164,7 +1164,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
* noninclusive, while the hardware cache range operations use
* inclusive start and end addresses.
*/
-static unsigned long calc_range_end(unsigned long start, unsigned long end)
+static unsigned long aurora_range_end(unsigned long start, unsigned long end)
{
/*
* Limit the number of cache lines processed at once,
@@ -1183,25 +1183,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
return end;
}
-/*
- * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
- * and range operations only do a TLB lookup on the start address.
- */
static void aurora_pa_range(unsigned long start, unsigned long end,
- unsigned long offset)
+ unsigned long offset)
{
void __iomem *base = l2x0_base;
+ unsigned long range_end;
unsigned long flags;
- raw_spin_lock_irqsave(&l2x0_lock, flags);
- writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
- writel_relaxed(end, base + offset);
- writel_relaxed(0, base + AURORA_SYNC_REG);
- raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void aurora_inv_range(unsigned long start, unsigned long end)
-{
/*
* round start and end adresses up to cache line size
*/
@@ -1209,15 +1197,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end)
end = ALIGN(end, CACHE_LINE_SIZE);
/*
- * Invalidate all full cache lines between 'start' and 'end'.
+ * perform operation on all full cache lines between 'start' and 'end'
*/
while (start < end) {
- unsigned long range_end = calc_range_end(start, end);
- aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
- AURORA_INVAL_RANGE_REG);
+ range_end = aurora_range_end(start, end);
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+ writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
+ writel_relaxed(0, base + AURORA_SYNC_REG);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
start = range_end;
}
}
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+ aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+}
static void aurora_clean_range(unsigned long start, unsigned long end)
{
@@ -1225,36 +1222,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end)
* If L2 is forced to WT, the L2 will always be clean and we
* don't need to do anything here.
*/
- if (!l2_wt_override) {
- start &= ~(CACHE_LINE_SIZE - 1);
- end = ALIGN(end, CACHE_LINE_SIZE);
- while (start != end) {
- unsigned long range_end = calc_range_end(start, end);
- aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
- AURORA_CLEAN_RANGE_REG);
- start = range_end;
- }
- }
+ if (!l2_wt_override)
+ aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
}
static void aurora_flush_range(unsigned long start, unsigned long end)
{
- start &= ~(CACHE_LINE_SIZE - 1);
- end = ALIGN(end, CACHE_LINE_SIZE);
- while (start != end) {
- unsigned long range_end = calc_range_end(start, end);
- /*
- * If L2 is forced to WT, the L2 will always be clean and we
- * just need to invalidate.
- */
- if (l2_wt_override)
- aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
- AURORA_INVAL_RANGE_REG);
- else
- aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
- AURORA_FLUSH_RANGE_REG);
- start = range_end;
- }
+ if (l2_wt_override)
+ aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+ else
+ aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
}
static void aurora_flush_all(void)
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling
2014-11-19 14:40 [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Arnd Bergmann
2014-11-19 14:41 ` [PATCH 2/2] ARM: cache-l2x0: optimize aurora range operations Arnd Bergmann
@ 2014-11-19 14:50 ` Russell King - ARM Linux
2014-11-19 15:06 ` Arnd Bergmann
1 sibling, 1 reply; 5+ messages in thread
From: Russell King - ARM Linux @ 2014-11-19 14:50 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 19, 2014 at 03:40:12PM +0100, Arnd Bergmann wrote:
> The aurora cache controller is the only remaining user of a couple
> of functions in this file and are completely unused when that is
> disabled, leading to build warnings:
>
> arch/arm/mm/cache-l2x0.c:167:13: warning: 'l2x0_cache_sync' defined but not used [-Wunused-function]
> arch/arm/mm/cache-l2x0.c:184:13: warning: 'l2x0_flush_all' defined but not used [-Wunused-function]
> arch/arm/mm/cache-l2x0.c:194:13: warning: 'l2x0_disable' defined but not used [-Wunused-function]
>
> With the knowledge that the code is now aurora-specific, we can
> simplify it noticeably:
>
> - The pl310 errata workarounds are not needed on aurora and can be removed
> - The cache_wait() macro is never needed since this is not an l210/l220
How do we know? You claim it's "not a L210/L220" but how do we actually
know that we don't have to wait for the cache operation to complete?
When this is built without L310 support, Aurora will wait for the cache
operations to complete, but when built with L310 support, it won't.
> - aurora_pa_range can keep the spinlock while syncing the cache
If the operations complete synchronously (in other words, the L2 cache
doesn't accept another transaction until the cache operation has completed)
the spinlocks should not be necessary.
They're necessary for some of the ARM caches which allow cache operations
to run asynchronously (hence the wait stuff) but if you don't have the
wait stuff as you imply above, they can't be asynchronous.
--
FTTC broadband for 0.8mile line: currently at 9.5Mbps down 400kbps up
according to speedtest.net.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling
2014-11-19 14:50 ` [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Russell King - ARM Linux
@ 2014-11-19 15:06 ` Arnd Bergmann
2014-11-19 15:10 ` Arnd Bergmann
0 siblings, 1 reply; 5+ messages in thread
From: Arnd Bergmann @ 2014-11-19 15:06 UTC (permalink / raw)
To: linux-arm-kernel
On Wednesday 19 November 2014 14:50:15 Russell King - ARM Linux wrote:
> On Wed, Nov 19, 2014 at 03:40:12PM +0100, Arnd Bergmann wrote:
> > The aurora cache controller is the only remaining user of a couple
> > of functions in this file and are completely unused when that is
> > disabled, leading to build warnings:
> >
> > arch/arm/mm/cache-l2x0.c:167:13: warning: 'l2x0_cache_sync' defined but not used [-Wunused-function]
> > arch/arm/mm/cache-l2x0.c:184:13: warning: 'l2x0_flush_all' defined but not used [-Wunused-function]
> > arch/arm/mm/cache-l2x0.c:194:13: warning: 'l2x0_disable' defined but not used [-Wunused-function]
> >
> > With the knowledge that the code is now aurora-specific, we can
> > simplify it noticeably:
> >
> > - The pl310 errata workarounds are not needed on aurora and can be removed
> > - The cache_wait() macro is never needed since this is not an l210/l220
>
> How do we know? You claim it's "not a L210/L220" but how do we actually
> know that we don't have to wait for the cache operation to complete?
>
> When this is built without L310 support, Aurora will wait for the cache
> operations to complete, but when built with L310 support, it won't.
I'll update the description to say:
- As confirmed by Thomas Petazzoni from the data sheet, the cache_wait()
macro is never needed.
based on the previous email exchange. Thanks for pointing out that the
description doesn't reflect what we had discussed already.
> > - aurora_pa_range can keep the spinlock while syncing the cache
>
> If the operations complete synchronously (in other words, the L2 cache
> doesn't accept another transaction until the cache operation has completed)
> the spinlocks should not be necessary.
>
> They're necessary for some of the ARM caches which allow cache operations
> to run asynchronously (hence the wait stuff) but if you don't have the
> wait stuff as you imply above, they can't be asynchronous.
I think the spinlock is still needed, it now only protects this sequence:
raw_spin_lock_irqsave(&l2x0_lock, flags);
writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
writel_relaxed(0, base + AURORA_SYNC_REG);
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
without the lock, two concurrent processes might start writing into
AURORA_RANGE_BASE_ADDR_REG before the first one writes the operation
register.
Arnd
^ permalink raw reply [flat|nested] 5+ messages in thread* [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling
2014-11-19 15:06 ` Arnd Bergmann
@ 2014-11-19 15:10 ` Arnd Bergmann
0 siblings, 0 replies; 5+ messages in thread
From: Arnd Bergmann @ 2014-11-19 15:10 UTC (permalink / raw)
To: linux-arm-kernel
On Wednesday 19 November 2014 16:06:31 Arnd Bergmann wrote:
> On Wednesday 19 November 2014 14:50:15 Russell King - ARM Linux wrote:
> >
> > If the operations complete synchronously (in other words, the L2 cache
> > doesn't accept another transaction until the cache operation has completed)
> > the spinlocks should not be necessary.
> >
> > They're necessary for some of the ARM caches which allow cache operations
> > to run asynchronously (hence the wait stuff) but if you don't have the
> > wait stuff as you imply above, they can't be asynchronous.
>
> I think the spinlock is still needed, it now only protects this sequence:
>
> raw_spin_lock_irqsave(&l2x0_lock, flags);
> writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
> writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
> writel_relaxed(0, base + AURORA_SYNC_REG);
> raw_spin_unlock_irqrestore(&l2x0_lock, flags);
>
> without the lock, two concurrent processes might start writing into
> AURORA_RANGE_BASE_ADDR_REG before the first one writes the operation
> register.
After taking another look, I do get your point. Yes, we only need the lock
across the first two operations, not for the third one, which can be done
outside of the lock. I'll change the patch accordingly.
Arnd
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-11-19 15:10 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-19 14:40 [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Arnd Bergmann
2014-11-19 14:41 ` [PATCH 2/2] ARM: cache-l2x0: optimize aurora range operations Arnd Bergmann
2014-11-19 14:50 ` [PATCH 1/2] ARM: cache-l2x0: clean up aurora cache handling Russell King - ARM Linux
2014-11-19 15:06 ` Arnd Bergmann
2014-11-19 15:10 ` Arnd Bergmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox