[PATCH v2 1/2] io: prevent compiler reordering on the default writeX() implementation

linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 1/2] io: prevent compiler reordering on the default writeX() implementation
@ 2018-03-30 15:58 Sinan Kaya
  2018-03-30 15:58 ` [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation Sinan Kaya
  0 siblings, 1 reply; 14+ messages in thread
From: Sinan Kaya @ 2018-03-30 15:58 UTC (permalink / raw)
  To: linux-arm-kernel

The default implementation of mapping writeX() to __raw_writeX() is wrong.
writeX() has stronger ordering semantics. Compiler is allowed to reorder
__raw_writeX().

In the abscence of a write barrier or when using a strongly ordered
architecture, writeX() should at least have a compiler barrier in
it to prevent commpiler from clobbering the execution order.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 include/asm-generic/io.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index b4531e3..e8c2078 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -144,6 +144,7 @@ static inline u64 readq(const volatile void __iomem *addr)
 #define writeb writeb
 static inline void writeb(u8 value, volatile void __iomem *addr)
 {
+	barrier();
 	__raw_writeb(value, addr);
 }
 #endif
@@ -152,6 +153,7 @@ static inline void writeb(u8 value, volatile void __iomem *addr)
 #define writew writew
 static inline void writew(u16 value, volatile void __iomem *addr)
 {
+	barrier();
 	__raw_writew(cpu_to_le16(value), addr);
 }
 #endif
@@ -160,6 +162,7 @@ static inline void writew(u16 value, volatile void __iomem *addr)
 #define writel writel
 static inline void writel(u32 value, volatile void __iomem *addr)
 {
+	barrier();
 	__raw_writel(__cpu_to_le32(value), addr);
 }
 #endif
@@ -169,6 +172,7 @@ static inline void writel(u32 value, volatile void __iomem *addr)
 #define writeq writeq
 static inline void writeq(u64 value, volatile void __iomem *addr)
 {
+	barrier();
 	__raw_writeq(__cpu_to_le64(value), addr);
 }
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-03-30 15:58 [PATCH v2 1/2] io: prevent compiler reordering on the default writeX() implementation Sinan Kaya
@ 2018-03-30 15:58 ` Sinan Kaya
  2018-04-03 10:49   ` Mark Rutland
  0 siblings, 1 reply; 14+ messages in thread
From: Sinan Kaya @ 2018-03-30 15:58 UTC (permalink / raw)
  To: linux-arm-kernel

The default implementation of mapping readX() to __raw_readX() is wrong.
readX() has stronger ordering semantics. Compiler is allowed to reorder
__raw_readX().

In the abscence of a read barrier or when using a strongly ordered
architecture, readX() should at least have a compiler barrier in
it to prevent commpiler from clobbering the execution order.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 include/asm-generic/io.h | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index e8c2078..2554f15 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -110,7 +110,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
 #define readb readb
 static inline u8 readb(const volatile void __iomem *addr)
 {
-	return __raw_readb(addr);
+	u8 val;
+
+	val = __raw_readb(addr);
+	barrier();
+
+	return val;
 }
 #endif
 
@@ -118,7 +123,12 @@ static inline u8 readb(const volatile void __iomem *addr)
 #define readw readw
 static inline u16 readw(const volatile void __iomem *addr)
 {
-	return __le16_to_cpu(__raw_readw(addr));
+	u16 val;
+
+	val = __le16_to_cpu(__raw_readw(addr));
+	barrier();
+
+	return val;
 }
 #endif
 
@@ -126,7 +136,12 @@ static inline u16 readw(const volatile void __iomem *addr)
 #define readl readl
 static inline u32 readl(const volatile void __iomem *addr)
 {
-	return __le32_to_cpu(__raw_readl(addr));
+	u32 val;
+
+	val = __le32_to_cpu(__raw_readl(addr));
+	barrier();
+
+	return val;
 }
 #endif
 
@@ -135,7 +150,12 @@ static inline u32 readl(const volatile void __iomem *addr)
 #define readq readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
-	return __le64_to_cpu(__raw_readq(addr));
+	u64 val;
+
+	val = __le64_to_cpu(__raw_readq(addr));
+	barrier();
+
+	return val;
 }
 #endif
 #endif /* CONFIG_64BIT */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-03-30 15:58 ` [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation Sinan Kaya
@ 2018-04-03 10:49   ` Mark Rutland
  2018-04-03 11:13     ` Arnd Bergmann
  0 siblings, 1 reply; 14+ messages in thread
From: Mark Rutland @ 2018-04-03 10:49 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
> The default implementation of mapping readX() to __raw_readX() is wrong.
> readX() has stronger ordering semantics. Compiler is allowed to reorder
> __raw_readX().

Could you please specify what the compiler is potentially reordering
__raw_readX() against, and why this would be wrong?

e.g. do we care about prior normal memory accesses, subsequent normal
memory accesses, and/or other IO accesses?

I assume that the asm-generic __raw_{read,write}X() implementations are
all ordered w.r.t. each other (at least for a specific device).

Thanks,
Mark.

> In the abscence of a read barrier or when using a strongly ordered
> architecture, readX() should at least have a compiler barrier in
> it to prevent commpiler from clobbering the execution order.
> 
> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  include/asm-generic/io.h | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
> index e8c2078..2554f15 100644
> --- a/include/asm-generic/io.h
> +++ b/include/asm-generic/io.h
> @@ -110,7 +110,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
>  #define readb readb
>  static inline u8 readb(const volatile void __iomem *addr)
>  {
> -	return __raw_readb(addr);
> +	u8 val;
> +
> +	val = __raw_readb(addr);
> +	barrier();
> +
> +	return val;
>  }
>  #endif
>  
> @@ -118,7 +123,12 @@ static inline u8 readb(const volatile void __iomem *addr)
>  #define readw readw
>  static inline u16 readw(const volatile void __iomem *addr)
>  {
> -	return __le16_to_cpu(__raw_readw(addr));
> +	u16 val;
> +
> +	val = __le16_to_cpu(__raw_readw(addr));
> +	barrier();
> +
> +	return val;
>  }
>  #endif
>  
> @@ -126,7 +136,12 @@ static inline u16 readw(const volatile void __iomem *addr)
>  #define readl readl
>  static inline u32 readl(const volatile void __iomem *addr)
>  {
> -	return __le32_to_cpu(__raw_readl(addr));
> +	u32 val;
> +
> +	val = __le32_to_cpu(__raw_readl(addr));
> +	barrier();
> +
> +	return val;
>  }
>  #endif
>  
> @@ -135,7 +150,12 @@ static inline u32 readl(const volatile void __iomem *addr)
>  #define readq readq
>  static inline u64 readq(const volatile void __iomem *addr)
>  {
> -	return __le64_to_cpu(__raw_readq(addr));
> +	u64 val;
> +
> +	val = __le64_to_cpu(__raw_readq(addr));
> +	barrier();
> +
> +	return val;
>  }
>  #endif
>  #endif /* CONFIG_64BIT */
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-03 10:49   ` Mark Rutland
@ 2018-04-03 11:13     ` Arnd Bergmann
  2018-04-03 12:44       ` Sinan Kaya
  0 siblings, 1 reply; 14+ messages in thread
From: Arnd Bergmann @ 2018-04-03 11:13 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 3, 2018 at 12:49 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi,
>
> On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
>> The default implementation of mapping readX() to __raw_readX() is wrong.
>> readX() has stronger ordering semantics. Compiler is allowed to reorder
>> __raw_readX().
>
> Could you please specify what the compiler is potentially reordering
> __raw_readX() against, and why this would be wrong?
>
> e.g. do we care about prior normal memory accesses, subsequent normal
> memory accesses, and/or other IO accesses?
>
> I assume that the asm-generic __raw_{read,write}X() implementations are
> all ordered w.r.t. each other (at least for a specific device).

I think that is correct: the compiler won't reorder those because of the
'volatile' pointer dereference, but it can reorder access to a normal
pointer against a __raw_readl()/__raw_writel(), which breaks the scenario
of using writel to trigger a DMA, or using a readl to see if a DMA has
completed.

The question is whether we should use a stronger barrier such
as rmb() amd wmb() here rather than a simple compiler barrier.

I would assume that on complex architectures with write buffers and
out-of-order prefetching, those are required, while on architectures
without those features, the barriers are cheap.

      Arnd

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-03 11:13     ` Arnd Bergmann
@ 2018-04-03 12:44       ` Sinan Kaya
  2018-04-03 12:56         ` Arnd Bergmann
  0 siblings, 1 reply; 14+ messages in thread
From: Sinan Kaya @ 2018-04-03 12:44 UTC (permalink / raw)
  To: linux-arm-kernel

On 4/3/2018 7:13 AM, Arnd Bergmann wrote:
> On Tue, Apr 3, 2018 at 12:49 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> Hi,
>>
>> On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
>>> The default implementation of mapping readX() to __raw_readX() is wrong.
>>> readX() has stronger ordering semantics. Compiler is allowed to reorder
>>> __raw_readX().
>>
>> Could you please specify what the compiler is potentially reordering
>> __raw_readX() against, and why this would be wrong?
>>
>> e.g. do we care about prior normal memory accesses, subsequent normal
>> memory accesses, and/or other IO accesses?
>>
>> I assume that the asm-generic __raw_{read,write}X() implementations are
>> all ordered w.r.t. each other (at least for a specific device).
> 
> I think that is correct: the compiler won't reorder those because of the
> 'volatile' pointer dereference, but it can reorder access to a normal
> pointer against a __raw_readl()/__raw_writel(), which breaks the scenario
> of using writel to trigger a DMA, or using a readl to see if a DMA has
> completed.

Yes, we are worried about memory update vs. IO update ordering here.
That was the reason why barrier() was introduced in this patch. I'll try to
clarify that better in the commit text.

> 
> The question is whether we should use a stronger barrier such
> as rmb() amd wmb() here rather than a simple compiler barrier.
> 
> I would assume that on complex architectures with write buffers and
> out-of-order prefetching, those are required, while on architectures
> without those features, the barriers are cheap.

That's my reasoning too. I'm trying to follow the x86 example here where there
is a compiler barrier in writeX() and readX() family of functions.


> 
>       Arnd
> 


-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-03 12:44       ` Sinan Kaya
@ 2018-04-03 12:56         ` Arnd Bergmann
  2018-04-03 13:06           ` Sinan Kaya
  2018-04-03 22:29           ` Palmer Dabbelt
  0 siblings, 2 replies; 14+ messages in thread
From: Arnd Bergmann @ 2018-04-03 12:56 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 3, 2018 at 2:44 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
> On 4/3/2018 7:13 AM, Arnd Bergmann wrote:
>> On Tue, Apr 3, 2018 at 12:49 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>>> Hi,
>>>
>>> On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
>>>> The default implementation of mapping readX() to __raw_readX() is wrong.
>>>> readX() has stronger ordering semantics. Compiler is allowed to reorder
>>>> __raw_readX().
>>>
>>> Could you please specify what the compiler is potentially reordering
>>> __raw_readX() against, and why this would be wrong?
>>>
>>> e.g. do we care about prior normal memory accesses, subsequent normal
>>> memory accesses, and/or other IO accesses?
>>>
>>> I assume that the asm-generic __raw_{read,write}X() implementations are
>>> all ordered w.r.t. each other (at least for a specific device).
>>
>> I think that is correct: the compiler won't reorder those because of the
>> 'volatile' pointer dereference, but it can reorder access to a normal
>> pointer against a __raw_readl()/__raw_writel(), which breaks the scenario
>> of using writel to trigger a DMA, or using a readl to see if a DMA has
>> completed.
>
> Yes, we are worried about memory update vs. IO update ordering here.
> That was the reason why barrier() was introduced in this patch. I'll try to
> clarify that better in the commit text.
>
>>
>> The question is whether we should use a stronger barrier such
>> as rmb() amd wmb() here rather than a simple compiler barrier.
>>
>> I would assume that on complex architectures with write buffers and
>> out-of-order prefetching, those are required, while on architectures
>> without those features, the barriers are cheap.
>
> That's my reasoning too. I'm trying to follow the x86 example here where there
> is a compiler barrier in writeX() and readX() family of functions.

I think x86 is the special case here because it implicitly guarantees
the strict ordering in the hardware, as long as the compiler gets it
right. For the asm-generic version, it may be better to play safe and
do the safest version, requiring architectures to override that barrier
if they want to be faster.

We could use the same macros that riscv has, using __io_br(),
__io_ar(), __io_bw() and __io_aw() for before/after read/write.

      Arnd

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-03 12:56         ` Arnd Bergmann
@ 2018-04-03 13:06           ` Sinan Kaya
  2018-04-03 22:29           ` Palmer Dabbelt
  1 sibling, 0 replies; 14+ messages in thread
From: Sinan Kaya @ 2018-04-03 13:06 UTC (permalink / raw)
  To: linux-arm-kernel

On 4/3/2018 8:56 AM, Arnd Bergmann wrote:
> On Tue, Apr 3, 2018 at 2:44 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
>> On 4/3/2018 7:13 AM, Arnd Bergmann wrote:
>>> On Tue, Apr 3, 2018 at 12:49 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>>>> Hi,
>>>>
>>>> On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
>>>>> The default implementation of mapping readX() to __raw_readX() is wrong.
>>>>> readX() has stronger ordering semantics. Compiler is allowed to reorder
>>>>> __raw_readX().
>>>>
>>>> Could you please specify what the compiler is potentially reordering
>>>> __raw_readX() against, and why this would be wrong?
>>>>
>>>> e.g. do we care about prior normal memory accesses, subsequent normal
>>>> memory accesses, and/or other IO accesses?
>>>>
>>>> I assume that the asm-generic __raw_{read,write}X() implementations are
>>>> all ordered w.r.t. each other (at least for a specific device).
>>>
>>> I think that is correct: the compiler won't reorder those because of the
>>> 'volatile' pointer dereference, but it can reorder access to a normal
>>> pointer against a __raw_readl()/__raw_writel(), which breaks the scenario
>>> of using writel to trigger a DMA, or using a readl to see if a DMA has
>>> completed.
>>
>> Yes, we are worried about memory update vs. IO update ordering here.
>> That was the reason why barrier() was introduced in this patch. I'll try to
>> clarify that better in the commit text.
>>
>>>
>>> The question is whether we should use a stronger barrier such
>>> as rmb() amd wmb() here rather than a simple compiler barrier.
>>>
>>> I would assume that on complex architectures with write buffers and
>>> out-of-order prefetching, those are required, while on architectures
>>> without those features, the barriers are cheap.
>>
>> That's my reasoning too. I'm trying to follow the x86 example here where there
>> is a compiler barrier in writeX() and readX() family of functions.
> 
> I think x86 is the special case here because it implicitly guarantees
> the strict ordering in the hardware, as long as the compiler gets it
> right. For the asm-generic version, it may be better to play safe and
> do the safest version, requiring architectures to override that barrier
> if they want to be faster.
> 
> We could use the same macros that riscv has, using __io_br(),
> __io_ar(), __io_bw() and __io_aw() for before/after read/write.

Sure, let me take a stab at it. 

> 
>       Arnd
> 


-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-03 12:56         ` Arnd Bergmann
  2018-04-03 13:06           ` Sinan Kaya
@ 2018-04-03 22:29           ` Palmer Dabbelt
  2018-04-04 15:52             ` Sinan Kaya
  1 sibling, 1 reply; 14+ messages in thread
From: Palmer Dabbelt @ 2018-04-03 22:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 03 Apr 2018 05:56:18 PDT (-0700), Arnd Bergmann wrote:
> On Tue, Apr 3, 2018 at 2:44 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
>> On 4/3/2018 7:13 AM, Arnd Bergmann wrote:
>>> On Tue, Apr 3, 2018 at 12:49 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>>>> Hi,
>>>>
>>>> On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
>>>>> The default implementation of mapping readX() to __raw_readX() is wrong.
>>>>> readX() has stronger ordering semantics. Compiler is allowed to reorder
>>>>> __raw_readX().
>>>>
>>>> Could you please specify what the compiler is potentially reordering
>>>> __raw_readX() against, and why this would be wrong?
>>>>
>>>> e.g. do we care about prior normal memory accesses, subsequent normal
>>>> memory accesses, and/or other IO accesses?
>>>>
>>>> I assume that the asm-generic __raw_{read,write}X() implementations are
>>>> all ordered w.r.t. each other (at least for a specific device).
>>>
>>> I think that is correct: the compiler won't reorder those because of the
>>> 'volatile' pointer dereference, but it can reorder access to a normal
>>> pointer against a __raw_readl()/__raw_writel(), which breaks the scenario
>>> of using writel to trigger a DMA, or using a readl to see if a DMA has
>>> completed.
>>
>> Yes, we are worried about memory update vs. IO update ordering here.
>> That was the reason why barrier() was introduced in this patch. I'll try to
>> clarify that better in the commit text.
>>
>>>
>>> The question is whether we should use a stronger barrier such
>>> as rmb() amd wmb() here rather than a simple compiler barrier.
>>>
>>> I would assume that on complex architectures with write buffers and
>>> out-of-order prefetching, those are required, while on architectures
>>> without those features, the barriers are cheap.
>>
>> That's my reasoning too. I'm trying to follow the x86 example here where there
>> is a compiler barrier in writeX() and readX() family of functions.
>
> I think x86 is the special case here because it implicitly guarantees
> the strict ordering in the hardware, as long as the compiler gets it
> right. For the asm-generic version, it may be better to play safe and
> do the safest version, requiring architectures to override that barrier
> if they want to be faster.
>
> We could use the same macros that riscv has, using __io_br(),
> __io_ar(), __io_bw() and __io_aw() for before/after read/write.

FWIW, when I wrote this I wasn't sure what the RISC-V memory model was going to 
be so I just picked something generic.  In other words, it's already a generic 
interface, just one that we're the only users of :).

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-03 22:29           ` Palmer Dabbelt
@ 2018-04-04 15:52             ` Sinan Kaya
  2018-04-04 15:55               ` Arnd Bergmann
  0 siblings, 1 reply; 14+ messages in thread
From: Sinan Kaya @ 2018-04-04 15:52 UTC (permalink / raw)
  To: linux-arm-kernel

On 4/3/2018 6:29 PM, Palmer Dabbelt wrote:
> On Tue, 03 Apr 2018 05:56:18 PDT (-0700), Arnd Bergmann wrote:
>> On Tue, Apr 3, 2018 at 2:44 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
>>> On 4/3/2018 7:13 AM, Arnd Bergmann wrote:
>>>> On Tue, Apr 3, 2018 at 12:49 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>>>>> Hi,
>>>>>
>>>>> On Fri, Mar 30, 2018 at 11:58:13AM -0400, Sinan Kaya wrote:
>>>>>> The default implementation of mapping readX() to __raw_readX() is wrong.
>>>>>> readX() has stronger ordering semantics. Compiler is allowed to reorder
>>>>>> __raw_readX().
>>>>>
>>>>> Could you please specify what the compiler is potentially reordering
>>>>> __raw_readX() against, and why this would be wrong?
>>>>>
>>>>> e.g. do we care about prior normal memory accesses, subsequent normal
>>>>> memory accesses, and/or other IO accesses?
>>>>>
>>>>> I assume that the asm-generic __raw_{read,write}X() implementations are
>>>>> all ordered w.r.t. each other (at least for a specific device).
>>>>
>>>> I think that is correct: the compiler won't reorder those because of the
>>>> 'volatile' pointer dereference, but it can reorder access to a normal
>>>> pointer against a __raw_readl()/__raw_writel(), which breaks the scenario
>>>> of using writel to trigger a DMA, or using a readl to see if a DMA has
>>>> completed.
>>>
>>> Yes, we are worried about memory update vs. IO update ordering here.
>>> That was the reason why barrier() was introduced in this patch. I'll try to
>>> clarify that better in the commit text.
>>>
>>>>
>>>> The question is whether we should use a stronger barrier such
>>>> as rmb() amd wmb() here rather than a simple compiler barrier.
>>>>
>>>> I would assume that on complex architectures with write buffers and
>>>> out-of-order prefetching, those are required, while on architectures
>>>> without those features, the barriers are cheap.
>>>
>>> That's my reasoning too. I'm trying to follow the x86 example here where there
>>> is a compiler barrier in writeX() and readX() family of functions.
>>
>> I think x86 is the special case here because it implicitly guarantees
>> the strict ordering in the hardware, as long as the compiler gets it
>> right. For the asm-generic version, it may be better to play safe and
>> do the safest version, requiring architectures to override that barrier
>> if they want to be faster.
>>
>> We could use the same macros that riscv has, using __io_br(),
>> __io_ar(), __io_bw() and __io_aw() for before/after read/write.
> 
> FWIW, when I wrote this I wasn't sure what the RISC-V memory model was going to be so I just picked something generic.? In other words, it's already a generic interface, just one that we're the only users of :).
> 

Are we looking for something like this?


diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index e8c2078..693a82f 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -101,6 +101,16 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
 #endif
 #endif /* CONFIG_64BIT */
 
+#ifndef __io_br()
+#define __io_br()	do {} while (0)
+#endif
+
+#ifdef rmb
+#define __io_ar()	rmb();
+#else
+#define __io_ar()	barrier();
+#endif
+
 /*
  * {read,write}{b,w,l,q}() access little endian memory and return result in
  * native endianness.
@@ -108,35 +118,46 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
 
 #ifndef readb
 #define readb readb
-static inline u8 readb(const volatile void __iomem *addr)
-{
-	return __raw_readb(addr);
-}
+#define readb(c)				\
+	({ u8  __v;				\
+	 __io_br();				\
+	 __v = __raw_readb(c);			\
+	 __io_ar();				\
+	 __v; })
 #endif
 
 #ifndef readw
 #define readw readw
-static inline u16 readw(const volatile void __iomem *addr)
-{
-	return __le16_to_cpu(__raw_readw(addr));
-}
+#define readw(c)				\
+    ({ u16 __v;					\
+						\
+     __io_br();					\
+      __v = __le16_to_cpu(__raw_readw(c));	\
+     __io_ar();					\
+     __v; })
 #endif
 
 #ifndef readl
 #define readl readl
-static inline u32 readl(const volatile void __iomem *addr)
-{
-	return __le32_to_cpu(__raw_readl(addr));
-}
+#define readl(c)				\
+    ({ u32 __v;					\
+						\
+     __io_br();					\
+      __v = __le32_to_cpu(__raw_readl(c));	\
+     __io_ar();					\
+     __v; })
 #endif
 
 #ifdef CONFIG_64BIT
 #ifndef readq
 #define readq readq
-static inline u64 readq(const volatile void __iomem *addr)
-{
-	return __le64_to_cpu(__raw_readq(addr));
-}
+#define readq(c)				\
+    ({ u64 __v;					\
+						\
+     __io_br();					\
+      __v = __le64_to_cpu(__raw_readq(c));	\
+     __io_ar();					\
+     __v; })
 #endif
 #endif /* CONFIG_64BIT */
  


-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-04 15:52             ` Sinan Kaya
@ 2018-04-04 15:55               ` Arnd Bergmann
  2018-04-04 15:57                 ` Sinan Kaya
  2018-04-04 17:48                 ` Sinan Kaya
  0 siblings, 2 replies; 14+ messages in thread
From: Arnd Bergmann @ 2018-04-04 15:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Apr 4, 2018 at 5:52 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
> On 4/3/2018 6:29 PM, Palmer Dabbelt wrote:
>>
>
> Are we looking for something like this?

Yes, exactly, plus the same for write and in/out of course.

> diff --git a/inc
>  #ifndef readb
>  #define readb readb
> -static inline u8 readb(const volatile void __iomem *addr)
> -{
> -       return __raw_readb(addr);
> -}
> +#define readb(c)                               \
> +       ({ u8  __v;                             \
> +        __io_br();                             \
> +        __v = __raw_readb(c);                  \
> +        __io_ar();                             \
> +        __v; })
>  #endif

I would prefer leaving these as inline functions, but that's only
a cosmetic difference.

       Arnd

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-04 15:55               ` Arnd Bergmann
@ 2018-04-04 15:57                 ` Sinan Kaya
  2018-04-04 17:48                 ` Sinan Kaya
  1 sibling, 0 replies; 14+ messages in thread
From: Sinan Kaya @ 2018-04-04 15:57 UTC (permalink / raw)
  To: linux-arm-kernel

On 4/4/2018 11:55 AM, Arnd Bergmann wrote:
> On Wed, Apr 4, 2018 at 5:52 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
>> On 4/3/2018 6:29 PM, Palmer Dabbelt wrote:
>>>
>>
>> Are we looking for something like this?
> 
> Yes, exactly, plus the same for write and in/out of course.
> 

OK. I just wanted to double check first.

>> diff --git a/inc
>>  #ifndef readb
>>  #define readb readb
>> -static inline u8 readb(const volatile void __iomem *addr)
>> -{
>> -       return __raw_readb(addr);
>> -}
>> +#define readb(c)                               \
>> +       ({ u8  __v;                             \
>> +        __io_br();                             \
>> +        __v = __raw_readb(c);                  \
>> +        __io_ar();                             \
>> +        __v; })
>>  #endif
> 
> I would prefer leaving these as inline functions, but that's only
> a cosmetic difference.

sure, I'll leave these as inline functions.

> 
>        Arnd
> 


-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-04 15:55               ` Arnd Bergmann
  2018-04-04 15:57                 ` Sinan Kaya
@ 2018-04-04 17:48                 ` Sinan Kaya
  2018-04-04 19:50                   ` Arnd Bergmann
  1 sibling, 1 reply; 14+ messages in thread
From: Sinan Kaya @ 2018-04-04 17:48 UTC (permalink / raw)
  To: linux-arm-kernel

On 4/4/2018 11:55 AM, Arnd Bergmann wrote:
> Yes, exactly, plus the same for write and in/out of course.

I was looking at this...

inb() and outb() seem to be calling writeb(). It gets the wmb/barrier automatically
when we fix writeb().

Did I miss something?

-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-04 17:48                 ` Sinan Kaya
@ 2018-04-04 19:50                   ` Arnd Bergmann
  2018-04-05  0:06                     ` Sinan Kaya
  0 siblings, 1 reply; 14+ messages in thread
From: Arnd Bergmann @ 2018-04-04 19:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Apr 4, 2018 at 7:48 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
> On 4/4/2018 11:55 AM, Arnd Bergmann wrote:
>> Yes, exactly, plus the same for write and in/out of course.
>
> I was looking at this...
>
> inb() and outb() seem to be calling writeb(). It gets the wmb/barrier automatically
> when we fix writeb().
>
> Did I miss something?

At least outb() needs stricter barriers than writeb() in theory, what
we want here
is that outb() has not just made it out to the device but that the
write has been
confirmed completed by the device. Some architectures can't do it, but those
that can should have an easy way to hook into that using a separate set of
barriers.

Using the riscv barrier names, we could do this like

#ifndef __io_bw()
#define __io_bw()      wmb()
#endif

#ifndef __io_aw
#define __io_aw()      barrier()
#endif

#ifndef __io_pbw
#define __io_pbw()     __io_bw()
#endif

#ifndef __io_paw
#define __io_paw()     __io_aw()
#endif

and the same thing for reads. This way, an architecture could override
any of those, but still get reasonable defaults for the others.
For __io_bw(), I picked barrier() instead of do {} while (0), no idea
if that's any better, I just play safe here.

     Arnd

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation
  2018-04-04 19:50                   ` Arnd Bergmann
@ 2018-04-05  0:06                     ` Sinan Kaya
  0 siblings, 0 replies; 14+ messages in thread
From: Sinan Kaya @ 2018-04-05  0:06 UTC (permalink / raw)
  To: linux-arm-kernel

On 4/4/2018 3:50 PM, Arnd Bergmann wrote:
> On Wed, Apr 4, 2018 at 7:48 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
>> On 4/4/2018 11:55 AM, Arnd Bergmann wrote:
>>> Yes, exactly, plus the same for write and in/out of course.
>>
>> I was looking at this...
>>
>> inb() and outb() seem to be calling writeb(). It gets the wmb/barrier automatically
>> when we fix writeb().
>>
>> Did I miss something?
> 
> At least outb() needs stricter barriers than writeb() in theory, what
> we want here
> is that outb() has not just made it out to the device but that the
> write has been
> confirmed completed by the device. Some architectures can't do it, but those
> that can should have an easy way to hook into that using a separate set of
> barriers.
> 
> Using the riscv barrier names, we could do this like
> 
> #ifndef __io_bw()
> #define __io_bw()      wmb()
> #endif
> 
> #ifndef __io_aw
> #define __io_aw()      barrier()
> #endif
> 
> #ifndef __io_pbw
> #define __io_pbw()     __io_bw()
> #endif
> 
> #ifndef __io_paw
> #define __io_paw()     __io_aw()
> #endif
> 
> and the same thing for reads. This way, an architecture could override
> any of those, but still get reasonable defaults for the others.
> For __io_bw(), I picked barrier() instead of do {} while (0), no idea
> if that's any better, I just play safe here.

I posted V3. I hope I captured what you mean above correctly.

> 
>      Arnd
> 


-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-04-05  0:06 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-03-30 15:58 [PATCH v2 1/2] io: prevent compiler reordering on the default writeX() implementation Sinan Kaya
2018-03-30 15:58 ` [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation Sinan Kaya
2018-04-03 10:49   ` Mark Rutland
2018-04-03 11:13     ` Arnd Bergmann
2018-04-03 12:44       ` Sinan Kaya
2018-04-03 12:56         ` Arnd Bergmann
2018-04-03 13:06           ` Sinan Kaya
2018-04-03 22:29           ` Palmer Dabbelt
2018-04-04 15:52             ` Sinan Kaya
2018-04-04 15:55               ` Arnd Bergmann
2018-04-04 15:57                 ` Sinan Kaya
2018-04-04 17:48                 ` Sinan Kaya
2018-04-04 19:50                   ` Arnd Bergmann
2018-04-05  0:06                     ` Sinan Kaya

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).