public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
@ 2026-01-25  6:39 guoren
  2026-02-18 12:06 ` Han Gao
  2026-02-19  8:04 ` Yao Zi
  0 siblings, 2 replies; 4+ messages in thread
From: guoren @ 2026-01-25  6:39 UTC (permalink / raw)
  To: paul.walmsley, palmer, guoren, alex, leobras, djordje.todorovic,
	aleksa.paunovic, arnd, rabenda.cn
  Cc: linux-riscv, linux-kernel, linux-arch, Han Gao, Yao Zi,
	Alexandre Ghiti, Paul Walmsley

From: "Guo Ren (Alibaba DAMO Academy)" <guoren@kernel.org>

The early version of XuanTie C910 core has a store merge buffer
delay problem. The store merge buffer could improve the store queue
performance by merging multi-store requests, but when there are not
continued store requests, the prior single store request would be
waiting in the store queue for a long time. That would cause
significant problems for communication between multi-cores. This
problem was found on sg2042 & th1520 platforms with the qspinlock
lock torture test.

So appending a fence w.o could immediately flush the store merge
buffer and let other cores see the write result.

This will apply the WRITE_ONCE errata to handle the non-standard
behavior via appending a fence w.o instruction for WRITE_ONCE().

This problem is only observed on the sg2042 hardware platform by
running the lock_torture test program for half an hour. The problem
was not found in the user space application, because interrupt can
break the livelock.

Reviewed-by: Leonardo Bras <leobras@redhat.com>
Tested-by: Han Gao <gaohan@iscas.ac.cn>
Cc: Yao Zi <me@ziyao.cc>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Paul Walmsley <pjw@kernel.org>
Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guoren@kernel.org>
---
Changelog

v3:
 - Rebase on 6.19-rc6.
 - Remove errata_list_vendors patch which has been merged.
 - Add Tested-by tag.

v2:
https://lore.kernel.org/linux-riscv/20250713155321.2064856-1-guoren@kernel.org/
 - Add new header file for errata_list_vendors.
 - Rebase newest kernel version.

v1:
https://lore.kernel.org/all/20241214143039.4139398-1-guoren@kernel.org/
---
 arch/riscv/Kconfig.errata                    | 17 ++++++++++
 arch/riscv/errata/thead/errata.c             | 20 ++++++++++++
 arch/riscv/include/asm/errata_list_vendors.h |  3 +-
 arch/riscv/include/asm/rwonce.h              | 34 ++++++++++++++++++++
 include/asm-generic/rwonce.h                 |  2 ++
 5 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/rwonce.h

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 3c945d086c7d..4d3f13522da6 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -154,4 +154,21 @@ config ERRATA_THEAD_GHOSTWRITE
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_WRITE_ONCE
+	bool "Apply T-Head WRITE_ONCE errata"
+	depends on ERRATA_THEAD
+	default y
+	help
+	  The early version of T-Head C9xx cores of sg2042 & th1520 have a store
+	  merge buffer delay problem. The store merge buffer could improve the
+	  store queue performance by merging multi-store requests, but when there
+	  are no continued store requests, the prior single store request would be
+	  waiting in the store queue for a long time. That would cause signifi-
+	  cant problems for communication between multi-cores. Appending a
+	  fence w.o could immediately flush the store merge buffer and let other
+	  cores see the write result.
+
+	  This will apply the WRITE_ONCE errata to handle the non-standard beh-
+	  avior via appending a fence w.o instruction for WRITE_ONCE().
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 0b942183f708..fbe46f2fa8fb 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -168,6 +168,23 @@ static bool errata_probe_ghostwrite(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_write_once(unsigned int stage,
+				    unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_BOOT ||
+	    stage == RISCV_ALTERNATIVES_MODULE)
+		return true;
+
+	return false;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -183,6 +200,9 @@ static u32 thead_errata_probe(unsigned int stage,
 
 	errata_probe_ghostwrite(stage, archid, impid);
 
+	if (errata_probe_write_once(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/errata_list_vendors.h b/arch/riscv/include/asm/errata_list_vendors.h
index ec7eba373437..8fd7c36307e2 100644
--- a/arch/riscv/include/asm/errata_list_vendors.h
+++ b/arch/riscv/include/asm/errata_list_vendors.h
@@ -18,7 +18,8 @@
 #define	ERRATA_THEAD_MAE 0
 #define	ERRATA_THEAD_PMU 1
 #define	ERRATA_THEAD_GHOSTWRITE 2
-#define	ERRATA_THEAD_NUMBER 3
+#define	ERRATA_THEAD_WRITE_ONCE 3
+#define	ERRATA_THEAD_NUMBER 4
 #endif
 
 #ifdef CONFIG_ERRATA_MIPS
diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
new file mode 100644
index 000000000000..081793d4d772
--- /dev/null
+++ b/arch/riscv/include/asm/rwonce.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+#include <asm/vendorid_list.h>
+#include <asm/errata_list_vendors.h>
+
+#if defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE)
+
+#define write_once_fence()				\
+do {							\
+	asm volatile(ALTERNATIVE(			\
+		"nop",					\
+		"fence w, o",				\
+		THEAD_VENDOR_ID,			\
+		ERRATA_THEAD_WRITE_ONCE,		\
+		CONFIG_ERRATA_THEAD_WRITE_ONCE)		\
+		: : : "memory");			\
+} while (0)
+
+#define __WRITE_ONCE(x, val)				\
+do {							\
+	*(volatile typeof(x) *)&(x) = (val);		\
+	write_once_fence();				\
+} while (0)
+
+#endif /* defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE) */
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_RWONCE_H */
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 52b969c7cef9..4e2d941f15a1 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -50,10 +50,12 @@
 	__READ_ONCE(x);							\
 })
 
+#ifndef __WRITE_ONCE
 #define __WRITE_ONCE(x, val)						\
 do {									\
 	*(volatile typeof(x) *)&(x) = (val);				\
 } while (0)
+#endif
 
 #define WRITE_ONCE(x, val)						\
 do {									\
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
  2026-01-25  6:39 [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup guoren
@ 2026-02-18 12:06 ` Han Gao
  2026-02-19  8:04 ` Yao Zi
  1 sibling, 0 replies; 4+ messages in thread
From: Han Gao @ 2026-02-18 12:06 UTC (permalink / raw)
  To: Paul Walmsley
  Cc: Han Gao, paul.walmsley, palmer, alex, leobras, djordje.todorovic,
	aleksa.paunovic, arnd, rabenda.cn, linux-riscv, linux-kernel,
	linux-arch, Yao Zi, Alexandre Ghiti, Paul Walmsley, Guo Ren

Hi, Paul

There was a response to this patch in Patch V2, but no further action has been taken.

I'd like to ask if there are any other issues with this patch, or if it lacks sufficient review feedback or testing.

> On Jan 25, 2026, at 14:39, guoren@kernel.org wrote:
> 
> From: "Guo Ren (Alibaba DAMO Academy)" <guoren@kernel.org>
> 
> The early version of XuanTie C910 core has a store merge buffer
> delay problem. The store merge buffer could improve the store queue
> performance by merging multi-store requests, but when there are not
> continued store requests, the prior single store request would be
> waiting in the store queue for a long time. That would cause
> significant problems for communication between multi-cores. This
> problem was found on sg2042 & th1520 platforms with the qspinlock
> lock torture test.
> 
> So appending a fence w.o could immediately flush the store merge
> buffer and let other cores see the write result.
> 
> This will apply the WRITE_ONCE errata to handle the non-standard
> behavior via appending a fence w.o instruction for WRITE_ONCE().
> 
> This problem is only observed on the sg2042 hardware platform by
> running the lock_torture test program for half an hour. The problem
> was not found in the user space application, because interrupt can
> break the livelock.
> 
> Reviewed-by: Leonardo Bras <leobras@redhat.com>
> Tested-by: Han Gao <gaohan@iscas.ac.cn>
> Cc: Yao Zi <me@ziyao.cc>
> Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> Cc: Paul Walmsley <pjw@kernel.org>
> Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guoren@kernel.org>
> ---
> Changelog
> 
> v3:
> - Rebase on 6.19-rc6.
> - Remove errata_list_vendors patch which has been merged.
> - Add Tested-by tag.
> 
> v2:
> https://lore.kernel.org/linux-riscv/20250713155321.2064856-1-guoren@kernel.org/
> - Add new header file for errata_list_vendors.
> - Rebase newest kernel version.
> 
> v1:
> https://lore.kernel.org/all/20241214143039.4139398-1-guoren@kernel.org/
> ---
> arch/riscv/Kconfig.errata                    | 17 ++++++++++
> arch/riscv/errata/thead/errata.c             | 20 ++++++++++++
> arch/riscv/include/asm/errata_list_vendors.h |  3 +-
> arch/riscv/include/asm/rwonce.h              | 34 ++++++++++++++++++++
> include/asm-generic/rwonce.h                 |  2 ++
> 5 files changed, 75 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/include/asm/rwonce.h
> 
> diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> index 3c945d086c7d..4d3f13522da6 100644
> --- a/arch/riscv/Kconfig.errata
> +++ b/arch/riscv/Kconfig.errata
> @@ -154,4 +154,21 @@ config ERRATA_THEAD_GHOSTWRITE
> 
>  If you don't know what to do here, say "Y".
> 
> +config ERRATA_THEAD_WRITE_ONCE
> + bool "Apply T-Head WRITE_ONCE errata"
> + depends on ERRATA_THEAD
> + default y
> + help
> +  The early version of T-Head C9xx cores of sg2042 & th1520 have a store
> +  merge buffer delay problem. The store merge buffer could improve the
> +  store queue performance by merging multi-store requests, but when there
> +  are no continued store requests, the prior single store request would be
> +  waiting in the store queue for a long time. That would cause signifi-
> +  cant problems for communication between multi-cores. Appending a
> +  fence w.o could immediately flush the store merge buffer and let other
> +  cores see the write result.
> +
> +  This will apply the WRITE_ONCE errata to handle the non-standard beh-
> +  avior via appending a fence w.o instruction for WRITE_ONCE().
> +
> endmenu # "CPU errata selection"
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index 0b942183f708..fbe46f2fa8fb 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -168,6 +168,23 @@ static bool errata_probe_ghostwrite(unsigned int stage,
> return true;
> }
> 
> +static bool errata_probe_write_once(unsigned int stage,
> +    unsigned long arch_id, unsigned long impid)
> +{
> + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
> + return false;
> +
> + /* target-c9xx cores report arch_id and impid as 0 */
> + if (arch_id != 0 || impid != 0)
> + return false;
> +
> + if (stage == RISCV_ALTERNATIVES_BOOT ||
> +    stage == RISCV_ALTERNATIVES_MODULE)
> + return true;
> +
> + return false;
> +}
> +
> static u32 thead_errata_probe(unsigned int stage,
>      unsigned long archid, unsigned long impid)
> {
> @@ -183,6 +200,9 @@ static u32 thead_errata_probe(unsigned int stage,
> 
> errata_probe_ghostwrite(stage, archid, impid);
> 
> + if (errata_probe_write_once(stage, archid, impid))
> + cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
> +
> return cpu_req_errata;
> }
> 
> diff --git a/arch/riscv/include/asm/errata_list_vendors.h b/arch/riscv/include/asm/errata_list_vendors.h
> index ec7eba373437..8fd7c36307e2 100644
> --- a/arch/riscv/include/asm/errata_list_vendors.h
> +++ b/arch/riscv/include/asm/errata_list_vendors.h
> @@ -18,7 +18,8 @@
> #define ERRATA_THEAD_MAE 0
> #define ERRATA_THEAD_PMU 1
> #define ERRATA_THEAD_GHOSTWRITE 2
> -#define ERRATA_THEAD_NUMBER 3
> +#define ERRATA_THEAD_WRITE_ONCE 3
> +#define ERRATA_THEAD_NUMBER 4
> #endif
> 
> #ifdef CONFIG_ERRATA_MIPS
> diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
> new file mode 100644
> index 000000000000..081793d4d772
> --- /dev/null
> +++ b/arch/riscv/include/asm/rwonce.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_RWONCE_H
> +#define __ASM_RWONCE_H
> +
> +#include <linux/compiler_types.h>
> +#include <asm/alternative-macros.h>
> +#include <asm/vendorid_list.h>
> +#include <asm/errata_list_vendors.h>
> +
> +#if defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE)
> +
> +#define write_once_fence() \
> +do { \
> + asm volatile(ALTERNATIVE( \
> + "nop", \
> + "fence w, o", \
> + THEAD_VENDOR_ID, \
> + ERRATA_THEAD_WRITE_ONCE, \
> + CONFIG_ERRATA_THEAD_WRITE_ONCE) \
> + : : : "memory"); \
> +} while (0)
> +
> +#define __WRITE_ONCE(x, val) \
> +do { \
> + *(volatile typeof(x) *)&(x) = (val); \
> + write_once_fence(); \
> +} while (0)
> +
> +#endif /* defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE) */
> +
> +#include <asm-generic/rwonce.h>
> +
> +#endif /* __ASM_RWONCE_H */
> diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
> index 52b969c7cef9..4e2d941f15a1 100644
> --- a/include/asm-generic/rwonce.h
> +++ b/include/asm-generic/rwonce.h
> @@ -50,10 +50,12 @@
> __READ_ONCE(x); \
> })
> 
> +#ifndef __WRITE_ONCE
> #define __WRITE_ONCE(x, val) \
> do { \
> *(volatile typeof(x) *)&(x) = (val); \
> } while (0)
> +#endif
> 
> #define WRITE_ONCE(x, val) \
> do { \
> -- 
> 2.40.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
  2026-01-25  6:39 [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup guoren
  2026-02-18 12:06 ` Han Gao
@ 2026-02-19  8:04 ` Yao Zi
  2026-02-24  2:04   ` Guo Ren
  1 sibling, 1 reply; 4+ messages in thread
From: Yao Zi @ 2026-02-19  8:04 UTC (permalink / raw)
  To: guoren, paul.walmsley, palmer, alex, leobras, djordje.todorovic,
	aleksa.paunovic, arnd, rabenda.cn
  Cc: linux-riscv, linux-kernel, linux-arch, Han Gao, Alexandre Ghiti,
	Paul Walmsley

On Sun, Jan 25, 2026 at 01:39:41AM -0500, guoren@kernel.org wrote:
> From: "Guo Ren (Alibaba DAMO Academy)" <guoren@kernel.org>
> 
> The early version of XuanTie C910 core has a store merge buffer
> delay problem. The store merge buffer could improve the store queue
> performance by merging multi-store requests, but when there are not
> continued store requests, the prior single store request would be
> waiting in the store queue for a long time. That would cause
> significant problems for communication between multi-cores. This
> problem was found on sg2042 & th1520 platforms with the qspinlock
> lock torture test.
> 
> So appending a fence w.o could immediately flush the store merge
> buffer and let other cores see the write result.
> 
> This will apply the WRITE_ONCE errata to handle the non-standard
> behavior via appending a fence w.o instruction for WRITE_ONCE().
> 
> This problem is only observed on the sg2042 hardware platform by
> running the lock_torture test program for half an hour. The problem
> was not found in the user space application, because interrupt can
> break the livelock.
> 
> Reviewed-by: Leonardo Bras <leobras@redhat.com>
> Tested-by: Han Gao <gaohan@iscas.ac.cn>
> Cc: Yao Zi <me@ziyao.cc>
> Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> Cc: Paul Walmsley <pjw@kernel.org>
> Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guoren@kernel.org>

With the patch, I've run heavy multi-core compilation load on SG2042 for
more than 12 hours, and observed no stability issues.

Tested-by: Yao Zi <me@ziyao.cc>

Thanks,
Yao Zi

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
  2026-02-19  8:04 ` Yao Zi
@ 2026-02-24  2:04   ` Guo Ren
  0 siblings, 0 replies; 4+ messages in thread
From: Guo Ren @ 2026-02-24  2:04 UTC (permalink / raw)
  To: Yao Zi
  Cc: paul.walmsley, palmer, alex, leobras, djordje.todorovic,
	aleksa.paunovic, arnd, rabenda.cn, linux-riscv, linux-kernel,
	linux-arch, Han Gao, Alexandre Ghiti, Paul Walmsley

On Thu, Feb 19, 2026 at 4:05 PM Yao Zi <me@ziyao.cc> wrote:
>
> On Sun, Jan 25, 2026 at 01:39:41AM -0500, guoren@kernel.org wrote:
> > From: "Guo Ren (Alibaba DAMO Academy)" <guoren@kernel.org>
> >
> > The early version of XuanTie C910 core has a store merge buffer
> > delay problem. The store merge buffer could improve the store queue
> > performance by merging multi-store requests, but when there are not
> > continued store requests, the prior single store request would be
> > waiting in the store queue for a long time. That would cause
> > significant problems for communication between multi-cores. This
> > problem was found on sg2042 & th1520 platforms with the qspinlock
> > lock torture test.
> >
> > So appending a fence w.o could immediately flush the store merge
> > buffer and let other cores see the write result.
> >
> > This will apply the WRITE_ONCE errata to handle the non-standard
> > behavior via appending a fence w.o instruction for WRITE_ONCE().
> >
> > This problem is only observed on the sg2042 hardware platform by
> > running the lock_torture test program for half an hour. The problem
> > was not found in the user space application, because interrupt can
> > break the livelock.
> >
> > Reviewed-by: Leonardo Bras <leobras@redhat.com>
> > Tested-by: Han Gao <gaohan@iscas.ac.cn>
> > Cc: Yao Zi <me@ziyao.cc>
> > Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> > Cc: Paul Walmsley <pjw@kernel.org>
> > Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guoren@kernel.org>
>
> With the patch, I've run heavy multi-core compilation load on SG2042 for
> more than 12 hours, and observed no stability issues.
>
> Tested-by: Yao Zi <me@ziyao.cc>

Thx for the Tested-by, hope this patch could be adopted.

-- 
Best Regards
 Guo Ren

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-02-24  2:04 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-25  6:39 [PATCH V3] riscv: errata: Add ERRATA_THEAD_WRITE_ONCE fixup guoren
2026-02-18 12:06 ` Han Gao
2026-02-19  8:04 ` Yao Zi
2026-02-24  2:04   ` Guo Ren

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox