All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
       [not found] <20250127202017.1043240-1-Raj.Vishwanathan>
@ 2025-03-18 18:51 ` Raj Vishwanathan
  2025-03-19  1:19   ` Xiang W
                     ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Raj Vishwanathan @ 2025-03-18 18:51 UTC (permalink / raw)
  To: opensbi

We set the scratch allocation alignment to cacheline size,specified by
riscv,cbom-block-size in the dts file to avoid two atomic variables from
the same cache line causing livelock on some platforms. If the cacheline
is not specified, we set it a default value.

Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
---
Changes in V3:
    Remove platform specific references to 64 Bytes.
Changes in V2:
    Added a new configuration to get the alignment size.
Change in V1:
    Original Patch
---
 include/sbi/sbi_platform.h         |  2 ++
 include/sbi_utils/fdt/fdt_helper.h |  1 +
 lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
 lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
 platform/generic/platform.c        |  7 +++++++
 5 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
index 6d5fbc7..0cea0fe 100644
--- a/include/sbi/sbi_platform.h
+++ b/include/sbi/sbi_platform.h
@@ -197,6 +197,8 @@ struct sbi_platform {
 	 * 2. HART id < SBI_HARTMASK_MAX_BITS
 	 */
 	const u32 *hart_index2id;
+	/** Allocation alignment for Scratch */
+	u32 cbom_block_size;
 };
 
 /**
diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
index 7329b84..0b82159 100644
--- a/include/sbi_utils/fdt/fdt_helper.h
+++ b/include/sbi_utils/fdt/fdt_helper.h
@@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
 
 int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
 
+int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
 int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
 
 int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
index ccbbc68..fdb9e20 100644
--- a/lib/sbi/sbi_scratch.c
+++ b/lib/sbi/sbi_scratch.c
@@ -14,6 +14,8 @@
 #include <sbi/sbi_scratch.h>
 #include <sbi/sbi_string.h>
 
+#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
+
 u32 last_hartindex_having_scratch = 0;
 u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
 struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
@@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
 static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
 static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
 
+static u32 sbi_get_scratch_alloc_align(void)
+{
+	const struct sbi_platform *plat;
+	/*
+	 * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
+	 * or riscv,cbom_block_size
+	 */
+	plat = sbi_platform_thishart_ptr();
+	if (!plat)
+		return DEFAULT_SCRATCH_ALLOC_ALIGN;
+	return plat->cbom_block_size ? plat->cbom_block_size : \
+                                  DEFAULT_SCRATCH_ALLOC_ALIGN;
+
+}
 u32 sbi_hartid_to_hartindex(u32 hartid)
 {
 	u32 i;
@@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
 	void *ptr;
 	unsigned long ret = 0;
 	struct sbi_scratch *rscratch;
+	u32 scratch_alloc_align = 0;
 
 	/*
 	 * We have a simple brain-dead allocator which never expects
@@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
 	if (!size)
 		return 0;
 
-	size += __SIZEOF_POINTER__ - 1;
-	size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
+	scratch_alloc_align = sbi_get_scratch_alloc_align();
+
+	/*
+	 * We let the allocation align to cacheline bytes to avoid livelock on
+	 * certain platforms due to atomic variables from the same cache line.
+	 */
+    size += scratch_alloc_align - 1;
+    size &= ~((unsigned long)scratch_alloc_align - 1);
 
 	spin_lock(&extra_lock);
 
diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
index cb350e5..bea4fdc 100644
--- a/lib/utils/fdt/fdt_helper.c
+++ b/lib/utils/fdt/fdt_helper.c
@@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
 
 	return 0;
 }
+int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
+{
+    int len;
+    const void *prop;
+    const fdt32_t *val;
+
+    if (!fdt || cpu_offset < 0)
+        return SBI_EINVAL;
+
+    prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
+    if (!prop || !len)
+        return SBI_EINVAL;
+    if (strncmp (prop, "cpu", strlen ("cpu")))
+        return SBI_EINVAL;
+
+    val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
+    if (!val || len < sizeof(fdt32_t))
+        return SBI_EINVAL;
+
+    if (cbom_block_size)
+        *cbom_block_size = fdt32_to_cpu(*val);
+    return 0;
+
+}
 
 int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
 {
diff --git a/platform/generic/platform.c b/platform/generic/platform.c
index c03ed88..0ff8d46 100644
--- a/platform/generic/platform.c
+++ b/platform/generic/platform.c
@@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
 	const void *fdt = (void *)arg1;
 	u32 hartid, hart_count = 0;
 	int rc, root_offset, cpus_offset, cpu_offset, len;
+	u32 cbom_block_size = 0;
+	u32 tmp=0;
 
 	root_offset = fdt_path_offset(fdt, "/");
 	if (root_offset < 0)
@@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
 			continue;
 
 		generic_hart_index2id[hart_count++] = hartid;
+		rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
+		if (rc)
+			continue;
+		cbom_block_size = MAX(tmp,cbom_block_size);
 	}
 
 	platform.hart_count = hart_count;
 	platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
 	platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
+	platform.cbom_block_size = cbom_block_size;
 
 	fw_platform_coldboot_harts_init(fdt);
 
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
  2025-03-18 18:51 ` [PATCH v4] Set the scratch allocation to alignment to cacheline size Raj Vishwanathan
@ 2025-03-19  1:19   ` Xiang W
  2025-03-19 11:59   ` Andrew Jones
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Xiang W @ 2025-03-19  1:19 UTC (permalink / raw)
  To: opensbi

? 2025-03-18?? 11:51 -0700?Raj Vishwanathan???
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.
> 
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
LGTM

Reviewed-by: Xiang W <wxjstz@126.com>
> ---
> Changes in V3:
> ??? Remove platform specific references to 64 Bytes.
> Changes in V2:
> ??? Added a new configuration to get the alignment size.
> Change in V1:
> ??? Original Patch
> ---
> ?include/sbi/sbi_platform.h???????? |? 2 ++
> ?include/sbi_utils/fdt/fdt_helper.h |? 1 +
> ?lib/sbi/sbi_scratch.c????????????? | 27 +++++++++++++++++++++++++--
> ?lib/utils/fdt/fdt_helper.c???????? | 24 ++++++++++++++++++++++++
> ?platform/generic/platform.c??????? |? 7 +++++++
> ?5 files changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 6d5fbc7..0cea0fe 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -197,6 +197,8 @@ struct sbi_platform {
> ?	 * 2. HART id < SBI_HARTMASK_MAX_BITS
> ?	 */
> ?	const u32 *hart_index2id;
> +	/** Allocation alignment for Scratch */
> +	u32 cbom_block_size;
> ?};
> ?
> ?/**
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index 7329b84..0b82159 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
> ?
> ?int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
> ?
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
> ?int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
> ?
> ?int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index ccbbc68..fdb9e20 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
> ?#include <sbi/sbi_scratch.h>
> ?#include <sbi/sbi_string.h>
> ?
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
> ?u32 last_hartindex_having_scratch = 0;
> ?u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
> ?struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
> ?static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
> ?static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
> ?
> +static u32 sbi_get_scratch_alloc_align(void)
> +{
> +	const struct sbi_platform *plat;
> +	/*
> +	 * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +	 * or riscv,cbom_block_size
> +	 */
> +	plat = sbi_platform_thishart_ptr();
> +	if (!plat)
> +		return DEFAULT_SCRATCH_ALLOC_ALIGN;
> +	return plat->cbom_block_size ? plat->cbom_block_size : \
> +????????????????????????????????? DEFAULT_SCRATCH_ALLOC_ALIGN;
> +
> +}
> ?u32 sbi_hartid_to_hartindex(u32 hartid)
> ?{
> ?	u32 i;
> @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> ?	void *ptr;
> ?	unsigned long ret = 0;
> ?	struct sbi_scratch *rscratch;
> +	u32 scratch_alloc_align = 0;
> ?
> ?	/*
> ?	 * We have a simple brain-dead allocator which never expects
> @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> ?	if (!size)
> ?		return 0;
> ?
> -	size += __SIZEOF_POINTER__ - 1;
> -	size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +	scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +	/*
> +	 * We let the allocation align to cacheline bytes to avoid livelock on
> +	 * certain platforms due to atomic variables from the same cache line.
> +	 */
> +??? size += scratch_alloc_align - 1;
> +??? size &= ~((unsigned long)scratch_alloc_align - 1);
> ?
> ?	spin_lock(&extra_lock);
> ?
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index cb350e5..bea4fdc 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
> ?
> ?	return 0;
> ?}
> +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> +{
> +??? int len;
> +??? const void *prop;
> +??? const fdt32_t *val;
> +
> +??? if (!fdt || cpu_offset < 0)
> +??????? return SBI_EINVAL;
> +
> +??? prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +??? if (!prop || !len)
> +??????? return SBI_EINVAL;
> +??? if (strncmp (prop, "cpu", strlen ("cpu")))
> +??????? return SBI_EINVAL;
> +
> +??? val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +??? if (!val || len < sizeof(fdt32_t))
> +??????? return SBI_EINVAL;
> +
> +??? if (cbom_block_size)
> +??????? *cbom_block_size = fdt32_to_cpu(*val);
> +??? return 0;
> +
> +}
> ?
> ?int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
> ?{
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index c03ed88..0ff8d46 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> ?	const void *fdt = (void *)arg1;
> ?	u32 hartid, hart_count = 0;
> ?	int rc, root_offset, cpus_offset, cpu_offset, len;
> +	u32 cbom_block_size = 0;
> +	u32 tmp=0;
> ?
> ?	root_offset = fdt_path_offset(fdt, "/");
> ?	if (root_offset < 0)
> @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> ?			continue;
> ?
> ?		generic_hart_index2id[hart_count++] = hartid;
> +		rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> +		if (rc)
> +			continue;
> +		cbom_block_size = MAX(tmp,cbom_block_size);
> ?	}
> ?
> ?	platform.hart_count = hart_count;
> ?	platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
> ?	platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +	platform.cbom_block_size = cbom_block_size;
> ?
> ?	fw_platform_coldboot_harts_init(fdt);
> ?
> -- 
> 2.43.0
> 



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
  2025-03-18 18:51 ` [PATCH v4] Set the scratch allocation to alignment to cacheline size Raj Vishwanathan
  2025-03-19  1:19   ` Xiang W
@ 2025-03-19 11:59   ` Andrew Jones
  2025-03-21  5:46     ` Raj Vishwanathan
  2025-03-25  2:46   ` Samuel Holland
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Andrew Jones @ 2025-03-19 11:59 UTC (permalink / raw)
  To: opensbi

On Tue, Mar 18, 2025 at 11:51:11AM -0700, Raj Vishwanathan wrote:
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.
> 
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> ---
> Changes in V3:
>     Remove platform specific references to 64 Bytes.
> Changes in V2:
>     Added a new configuration to get the alignment size.
> Change in V1:
>     Original Patch
> ---
>  include/sbi/sbi_platform.h         |  2 ++
>  include/sbi_utils/fdt/fdt_helper.h |  1 +
>  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
>  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
>  platform/generic/platform.c        |  7 +++++++
>  5 files changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 6d5fbc7..0cea0fe 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -197,6 +197,8 @@ struct sbi_platform {
>  	 * 2. HART id < SBI_HARTMASK_MAX_BITS
>  	 */
>  	const u32 *hart_index2id;
> +	/** Allocation alignment for Scratch */
> +	u32 cbom_block_size;
>  };
>  
>  /**
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index 7329b84..0b82159 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
>  
>  int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
>  
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
>  
>  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index ccbbc68..fdb9e20 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
>  #include <sbi/sbi_scratch.h>
>  #include <sbi/sbi_string.h>
>  
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
>  u32 last_hartindex_having_scratch = 0;
>  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
>  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
>  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
>  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
>  
> +static u32 sbi_get_scratch_alloc_align(void)
> +{
> +	const struct sbi_platform *plat;
> +	/*
> +	 * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +	 * or riscv,cbom_block_size
> +	 */
> +	plat = sbi_platform_thishart_ptr();
> +	if (!plat)
> +		return DEFAULT_SCRATCH_ALLOC_ALIGN;
> +	return plat->cbom_block_size ? plat->cbom_block_size : \
> +                                  DEFAULT_SCRATCH_ALLOC_ALIGN;
> +
> +}
>  u32 sbi_hartid_to_hartindex(u32 hartid)
>  {
>  	u32 i;
> @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>  	void *ptr;
>  	unsigned long ret = 0;
>  	struct sbi_scratch *rscratch;
> +	u32 scratch_alloc_align = 0;
>  
>  	/*
>  	 * We have a simple brain-dead allocator which never expects
> @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>  	if (!size)
>  		return 0;
>  
> -	size += __SIZEOF_POINTER__ - 1;
> -	size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +	scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +	/*
> +	 * We let the allocation align to cacheline bytes to avoid livelock on
> +	 * certain platforms due to atomic variables from the same cache line.
> +	 */
> +    size += scratch_alloc_align - 1;
> +    size &= ~((unsigned long)scratch_alloc_align - 1);
>  
>  	spin_lock(&extra_lock);
>  
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index cb350e5..bea4fdc 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
>  
>  	return 0;
>  }
> +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> +{
> +    int len;
> +    const void *prop;
> +    const fdt32_t *val;
> +
> +    if (!fdt || cpu_offset < 0)
> +        return SBI_EINVAL;
> +
> +    prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +    if (!prop || !len)
> +        return SBI_EINVAL;
> +    if (strncmp (prop, "cpu", strlen ("cpu")))
> +        return SBI_EINVAL;
> +
> +    val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +    if (!val || len < sizeof(fdt32_t))
> +        return SBI_EINVAL;

There's another way to get the CBO block sizes, which we haven't started
doing in Linux yet, but at some point maybe we should. If we don't have
the riscv,cbom-block-size nodes, then we can still check for the existence
of the Zic64b extension in the isa-string/isa-extensions. The existence of
that extension states that all cache blocks (cbom/cboz) are 64 bytes in
size.

Thanks,
drew

> +
> +    if (cbom_block_size)
> +        *cbom_block_size = fdt32_to_cpu(*val);
> +    return 0;
> +
> +}
>  
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
>  {
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index c03ed88..0ff8d46 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>  	const void *fdt = (void *)arg1;
>  	u32 hartid, hart_count = 0;
>  	int rc, root_offset, cpus_offset, cpu_offset, len;
> +	u32 cbom_block_size = 0;
> +	u32 tmp=0;
>  
>  	root_offset = fdt_path_offset(fdt, "/");
>  	if (root_offset < 0)
> @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>  			continue;
>  
>  		generic_hart_index2id[hart_count++] = hartid;
> +		rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> +		if (rc)
> +			continue;
> +		cbom_block_size = MAX(tmp,cbom_block_size);
>  	}
>  
>  	platform.hart_count = hart_count;
>  	platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
>  	platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +	platform.cbom_block_size = cbom_block_size;
>  
>  	fw_platform_coldboot_harts_init(fdt);
>  
> -- 
> 2.43.0
> 
> 
> -- 
> opensbi mailing list
> opensbi at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
  2025-03-19 11:59   ` Andrew Jones
@ 2025-03-21  5:46     ` Raj Vishwanathan
  2025-03-21  7:50       ` Andrew Jones
  0 siblings, 1 reply; 14+ messages in thread
From: Raj Vishwanathan @ 2025-03-21  5:46 UTC (permalink / raw)
  To: opensbi

Andrew,

Thanks for the tip. It may be the way to in the future. Just out of
curiosity, does the absence of Zic64b extension mean that cache blocks
are not 64 bytes in size?

We are testing on a system that does not have this but a 64byte is
still the cacheline size

Thanks

Raj

On Wed, Mar 19, 2025 at 4:59?AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Tue, Mar 18, 2025 at 11:51:11AM -0700, Raj Vishwanathan wrote:
> > We set the scratch allocation alignment to cacheline size,specified by
> > riscv,cbom-block-size in the dts file to avoid two atomic variables from
> > the same cache line causing livelock on some platforms. If the cacheline
> > is not specified, we set it a default value.
> >
> > Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> > ---
> > Changes in V3:
> >     Remove platform specific references to 64 Bytes.
> > Changes in V2:
> >     Added a new configuration to get the alignment size.
> > Change in V1:
> >     Original Patch
> > ---
> >  include/sbi/sbi_platform.h         |  2 ++
> >  include/sbi_utils/fdt/fdt_helper.h |  1 +
> >  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
> >  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
> >  platform/generic/platform.c        |  7 +++++++
> >  5 files changed, 59 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> > index 6d5fbc7..0cea0fe 100644
> > --- a/include/sbi/sbi_platform.h
> > +++ b/include/sbi/sbi_platform.h
> > @@ -197,6 +197,8 @@ struct sbi_platform {
> >        * 2. HART id < SBI_HARTMASK_MAX_BITS
> >        */
> >       const u32 *hart_index2id;
> > +     /** Allocation alignment for Scratch */
> > +     u32 cbom_block_size;
> >  };
> >
> >  /**
> > diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> > index 7329b84..0b82159 100644
> > --- a/include/sbi_utils/fdt/fdt_helper.h
> > +++ b/include/sbi_utils/fdt/fdt_helper.h
> > @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
> >
> >  int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
> >
> > +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
> >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
> >
> >  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> > diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> > index ccbbc68..fdb9e20 100644
> > --- a/lib/sbi/sbi_scratch.c
> > +++ b/lib/sbi/sbi_scratch.c
> > @@ -14,6 +14,8 @@
> >  #include <sbi/sbi_scratch.h>
> >  #include <sbi/sbi_string.h>
> >
> > +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> > +
> >  u32 last_hartindex_having_scratch = 0;
> >  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
> >  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> > @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
> >  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
> >  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
> >
> > +static u32 sbi_get_scratch_alloc_align(void)
> > +{
> > +     const struct sbi_platform *plat;
> > +     /*
> > +      * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> > +      * or riscv,cbom_block_size
> > +      */
> > +     plat = sbi_platform_thishart_ptr();
> > +     if (!plat)
> > +             return DEFAULT_SCRATCH_ALLOC_ALIGN;
> > +     return plat->cbom_block_size ? plat->cbom_block_size : \
> > +                                  DEFAULT_SCRATCH_ALLOC_ALIGN;
> > +
> > +}
> >  u32 sbi_hartid_to_hartindex(u32 hartid)
> >  {
> >       u32 i;
> > @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> >       void *ptr;
> >       unsigned long ret = 0;
> >       struct sbi_scratch *rscratch;
> > +     u32 scratch_alloc_align = 0;
> >
> >       /*
> >        * We have a simple brain-dead allocator which never expects
> > @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> >       if (!size)
> >               return 0;
> >
> > -     size += __SIZEOF_POINTER__ - 1;
> > -     size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> > +     scratch_alloc_align = sbi_get_scratch_alloc_align();
> > +
> > +     /*
> > +      * We let the allocation align to cacheline bytes to avoid livelock on
> > +      * certain platforms due to atomic variables from the same cache line.
> > +      */
> > +    size += scratch_alloc_align - 1;
> > +    size &= ~((unsigned long)scratch_alloc_align - 1);
> >
> >       spin_lock(&extra_lock);
> >
> > diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> > index cb350e5..bea4fdc 100644
> > --- a/lib/utils/fdt/fdt_helper.c
> > +++ b/lib/utils/fdt/fdt_helper.c
> > @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
> >
> >       return 0;
> >  }
> > +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> > +{
> > +    int len;
> > +    const void *prop;
> > +    const fdt32_t *val;
> > +
> > +    if (!fdt || cpu_offset < 0)
> > +        return SBI_EINVAL;
> > +
> > +    prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> > +    if (!prop || !len)
> > +        return SBI_EINVAL;
> > +    if (strncmp (prop, "cpu", strlen ("cpu")))
> > +        return SBI_EINVAL;
> > +
> > +    val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> > +    if (!val || len < sizeof(fdt32_t))
> > +        return SBI_EINVAL;
>
> There's another way to get the CBO block sizes, which we haven't started
> doing in Linux yet, but at some point maybe we should. If we don't have
> the riscv,cbom-block-size nodes, then we can still check for the existence
> of the Zic64b extension in the isa-string/isa-extensions. The existence of
> that extension states that all cache blocks (cbom/cboz) are 64 bytes in
> size.
>
> Thanks,
> drew
>
> > +
> > +    if (cbom_block_size)
> > +        *cbom_block_size = fdt32_to_cpu(*val);
> > +    return 0;
> > +
> > +}
> >
> >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
> >  {
> > diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> > index c03ed88..0ff8d46 100644
> > --- a/platform/generic/platform.c
> > +++ b/platform/generic/platform.c
> > @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> >       const void *fdt = (void *)arg1;
> >       u32 hartid, hart_count = 0;
> >       int rc, root_offset, cpus_offset, cpu_offset, len;
> > +     u32 cbom_block_size = 0;
> > +     u32 tmp=0;
> >
> >       root_offset = fdt_path_offset(fdt, "/");
> >       if (root_offset < 0)
> > @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> >                       continue;
> >
> >               generic_hart_index2id[hart_count++] = hartid;
> > +             rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> > +             if (rc)
> > +                     continue;
> > +             cbom_block_size = MAX(tmp,cbom_block_size);
> >       }
> >
> >       platform.hart_count = hart_count;
> >       platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
> >       platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> > +     platform.cbom_block_size = cbom_block_size;
> >
> >       fw_platform_coldboot_harts_init(fdt);
> >
> > --
> > 2.43.0
> >
> >
> > --
> > opensbi mailing list
> > opensbi at lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/opensbi


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
  2025-03-21  5:46     ` Raj Vishwanathan
@ 2025-03-21  7:50       ` Andrew Jones
  2025-03-21 17:42         ` Raj Vishwanathan
  0 siblings, 1 reply; 14+ messages in thread
From: Andrew Jones @ 2025-03-21  7:50 UTC (permalink / raw)
  To: opensbi

On Thu, Mar 20, 2025 at 10:46:02PM -0700, Raj Vishwanathan wrote:
> Andrew,
> 
> Thanks for the tip. It may be the way to in the future. Just out of
> curiosity, does the absence of Zic64b extension mean that cache blocks
> are not 64 bytes in size?

Nope, it just means the cache block sizes will need to be determined in
some other way, such as with the riscv,cbom/cbo{m,p,z}-block-size nodes.

My comment on the patch is to not give up when riscv,cbom-block-size
is missing, but rather to also search the extension list for zic64b.
Or vice versa, search for zic64b, if it's not present, then search
for the block-size nodes.

Thanks,
drew

> 
> We are testing on a system that does not have this but a 64byte is
> still the cacheline size
> 
> Thanks
> 
> Raj
> 
> On Wed, Mar 19, 2025 at 4:59?AM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Tue, Mar 18, 2025 at 11:51:11AM -0700, Raj Vishwanathan wrote:
> > > We set the scratch allocation alignment to cacheline size,specified by
> > > riscv,cbom-block-size in the dts file to avoid two atomic variables from
> > > the same cache line causing livelock on some platforms. If the cacheline
> > > is not specified, we set it a default value.
> > >
> > > Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> > > ---
> > > Changes in V3:
> > >     Remove platform specific references to 64 Bytes.
> > > Changes in V2:
> > >     Added a new configuration to get the alignment size.
> > > Change in V1:
> > >     Original Patch
> > > ---
> > >  include/sbi/sbi_platform.h         |  2 ++
> > >  include/sbi_utils/fdt/fdt_helper.h |  1 +
> > >  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
> > >  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
> > >  platform/generic/platform.c        |  7 +++++++
> > >  5 files changed, 59 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> > > index 6d5fbc7..0cea0fe 100644
> > > --- a/include/sbi/sbi_platform.h
> > > +++ b/include/sbi/sbi_platform.h
> > > @@ -197,6 +197,8 @@ struct sbi_platform {
> > >        * 2. HART id < SBI_HARTMASK_MAX_BITS
> > >        */
> > >       const u32 *hart_index2id;
> > > +     /** Allocation alignment for Scratch */
> > > +     u32 cbom_block_size;
> > >  };
> > >
> > >  /**
> > > diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> > > index 7329b84..0b82159 100644
> > > --- a/include/sbi_utils/fdt/fdt_helper.h
> > > +++ b/include/sbi_utils/fdt/fdt_helper.h
> > > @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
> > >
> > >  int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
> > >
> > > +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
> > >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
> > >
> > >  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> > > diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> > > index ccbbc68..fdb9e20 100644
> > > --- a/lib/sbi/sbi_scratch.c
> > > +++ b/lib/sbi/sbi_scratch.c
> > > @@ -14,6 +14,8 @@
> > >  #include <sbi/sbi_scratch.h>
> > >  #include <sbi/sbi_string.h>
> > >
> > > +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> > > +
> > >  u32 last_hartindex_having_scratch = 0;
> > >  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
> > >  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> > > @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
> > >  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
> > >  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
> > >
> > > +static u32 sbi_get_scratch_alloc_align(void)
> > > +{
> > > +     const struct sbi_platform *plat;
> > > +     /*
> > > +      * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> > > +      * or riscv,cbom_block_size
> > > +      */
> > > +     plat = sbi_platform_thishart_ptr();
> > > +     if (!plat)
> > > +             return DEFAULT_SCRATCH_ALLOC_ALIGN;
> > > +     return plat->cbom_block_size ? plat->cbom_block_size : \
> > > +                                  DEFAULT_SCRATCH_ALLOC_ALIGN;
> > > +
> > > +}
> > >  u32 sbi_hartid_to_hartindex(u32 hartid)
> > >  {
> > >       u32 i;
> > > @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> > >       void *ptr;
> > >       unsigned long ret = 0;
> > >       struct sbi_scratch *rscratch;
> > > +     u32 scratch_alloc_align = 0;
> > >
> > >       /*
> > >        * We have a simple brain-dead allocator which never expects
> > > @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> > >       if (!size)
> > >               return 0;
> > >
> > > -     size += __SIZEOF_POINTER__ - 1;
> > > -     size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> > > +     scratch_alloc_align = sbi_get_scratch_alloc_align();
> > > +
> > > +     /*
> > > +      * We let the allocation align to cacheline bytes to avoid livelock on
> > > +      * certain platforms due to atomic variables from the same cache line.
> > > +      */
> > > +    size += scratch_alloc_align - 1;
> > > +    size &= ~((unsigned long)scratch_alloc_align - 1);
> > >
> > >       spin_lock(&extra_lock);
> > >
> > > diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> > > index cb350e5..bea4fdc 100644
> > > --- a/lib/utils/fdt/fdt_helper.c
> > > +++ b/lib/utils/fdt/fdt_helper.c
> > > @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
> > >
> > >       return 0;
> > >  }
> > > +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> > > +{
> > > +    int len;
> > > +    const void *prop;
> > > +    const fdt32_t *val;
> > > +
> > > +    if (!fdt || cpu_offset < 0)
> > > +        return SBI_EINVAL;
> > > +
> > > +    prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> > > +    if (!prop || !len)
> > > +        return SBI_EINVAL;
> > > +    if (strncmp (prop, "cpu", strlen ("cpu")))
> > > +        return SBI_EINVAL;
> > > +
> > > +    val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> > > +    if (!val || len < sizeof(fdt32_t))
> > > +        return SBI_EINVAL;
> >
> > There's another way to get the CBO block sizes, which we haven't started
> > doing in Linux yet, but at some point maybe we should. If we don't have
> > the riscv,cbom-block-size nodes, then we can still check for the existence
> > of the Zic64b extension in the isa-string/isa-extensions. The existence of
> > that extension states that all cache blocks (cbom/cboz) are 64 bytes in
> > size.
> >
> > Thanks,
> > drew
> >
> > > +
> > > +    if (cbom_block_size)
> > > +        *cbom_block_size = fdt32_to_cpu(*val);
> > > +    return 0;
> > > +
> > > +}
> > >
> > >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
> > >  {
> > > diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> > > index c03ed88..0ff8d46 100644
> > > --- a/platform/generic/platform.c
> > > +++ b/platform/generic/platform.c
> > > @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> > >       const void *fdt = (void *)arg1;
> > >       u32 hartid, hart_count = 0;
> > >       int rc, root_offset, cpus_offset, cpu_offset, len;
> > > +     u32 cbom_block_size = 0;
> > > +     u32 tmp=0;
> > >
> > >       root_offset = fdt_path_offset(fdt, "/");
> > >       if (root_offset < 0)
> > > @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> > >                       continue;
> > >
> > >               generic_hart_index2id[hart_count++] = hartid;
> > > +             rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> > > +             if (rc)
> > > +                     continue;
> > > +             cbom_block_size = MAX(tmp,cbom_block_size);
> > >       }
> > >
> > >       platform.hart_count = hart_count;
> > >       platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
> > >       platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> > > +     platform.cbom_block_size = cbom_block_size;
> > >
> > >       fw_platform_coldboot_harts_init(fdt);
> > >
> > > --
> > > 2.43.0
> > >
> > >
> > > --
> > > opensbi mailing list
> > > opensbi at lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/opensbi


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
  2025-03-21  7:50       ` Andrew Jones
@ 2025-03-21 17:42         ` Raj Vishwanathan
  0 siblings, 0 replies; 14+ messages in thread
From: Raj Vishwanathan @ 2025-03-21 17:42 UTC (permalink / raw)
  To: opensbi

Andrew

Thanks for the explanation. I would prefer to do this in two different
patches. The first one being the one that I sent out and when it is
ready in Linux I can add this to OpenSBI.
The current patch cleanly resolves the livelock issue we have and
maintains the default for the scratch allocation. If we add the Zic64b
check, this could change the default value for scratch allocation.

Thanks

Raj

On Fri, Mar 21, 2025 at 12:50?AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Thu, Mar 20, 2025 at 10:46:02PM -0700, Raj Vishwanathan wrote:
> > Andrew,
> >
> > Thanks for the tip. It may be the way to in the future. Just out of
> > curiosity, does the absence of Zic64b extension mean that cache blocks
> > are not 64 bytes in size?
>
> Nope, it just means the cache block sizes will need to be determined in
> some other way, such as with the riscv,cbom/cbo{m,p,z}-block-size nodes.
>
> My comment on the patch is to not give up when riscv,cbom-block-size
> is missing, but rather to also search the extension list for zic64b.
> Or vice versa, search for zic64b, if it's not present, then search
> for the block-size nodes.
>
> Thanks,
> drew
>
> >
> > We are testing on a system that does not have this but a 64byte is
> > still the cacheline size
> >
> > Thanks
> >
> > Raj
> >
> > On Wed, Mar 19, 2025 at 4:59?AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > On Tue, Mar 18, 2025 at 11:51:11AM -0700, Raj Vishwanathan wrote:
> > > > We set the scratch allocation alignment to cacheline size,specified by
> > > > riscv,cbom-block-size in the dts file to avoid two atomic variables from
> > > > the same cache line causing livelock on some platforms. If the cacheline
> > > > is not specified, we set it a default value.
> > > >
> > > > Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> > > > ---
> > > > Changes in V3:
> > > >     Remove platform specific references to 64 Bytes.
> > > > Changes in V2:
> > > >     Added a new configuration to get the alignment size.
> > > > Change in V1:
> > > >     Original Patch
> > > > ---
> > > >  include/sbi/sbi_platform.h         |  2 ++
> > > >  include/sbi_utils/fdt/fdt_helper.h |  1 +
> > > >  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
> > > >  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
> > > >  platform/generic/platform.c        |  7 +++++++
> > > >  5 files changed, 59 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> > > > index 6d5fbc7..0cea0fe 100644
> > > > --- a/include/sbi/sbi_platform.h
> > > > +++ b/include/sbi/sbi_platform.h
> > > > @@ -197,6 +197,8 @@ struct sbi_platform {
> > > >        * 2. HART id < SBI_HARTMASK_MAX_BITS
> > > >        */
> > > >       const u32 *hart_index2id;
> > > > +     /** Allocation alignment for Scratch */
> > > > +     u32 cbom_block_size;
> > > >  };
> > > >
> > > >  /**
> > > > diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> > > > index 7329b84..0b82159 100644
> > > > --- a/include/sbi_utils/fdt/fdt_helper.h
> > > > +++ b/include/sbi_utils/fdt/fdt_helper.h
> > > > @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
> > > >
> > > >  int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
> > > >
> > > > +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
> > > >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
> > > >
> > > >  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> > > > diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> > > > index ccbbc68..fdb9e20 100644
> > > > --- a/lib/sbi/sbi_scratch.c
> > > > +++ b/lib/sbi/sbi_scratch.c
> > > > @@ -14,6 +14,8 @@
> > > >  #include <sbi/sbi_scratch.h>
> > > >  #include <sbi/sbi_string.h>
> > > >
> > > > +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> > > > +
> > > >  u32 last_hartindex_having_scratch = 0;
> > > >  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
> > > >  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> > > > @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
> > > >  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
> > > >  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
> > > >
> > > > +static u32 sbi_get_scratch_alloc_align(void)
> > > > +{
> > > > +     const struct sbi_platform *plat;
> > > > +     /*
> > > > +      * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> > > > +      * or riscv,cbom_block_size
> > > > +      */
> > > > +     plat = sbi_platform_thishart_ptr();
> > > > +     if (!plat)
> > > > +             return DEFAULT_SCRATCH_ALLOC_ALIGN;
> > > > +     return plat->cbom_block_size ? plat->cbom_block_size : \
> > > > +                                  DEFAULT_SCRATCH_ALLOC_ALIGN;
> > > > +
> > > > +}
> > > >  u32 sbi_hartid_to_hartindex(u32 hartid)
> > > >  {
> > > >       u32 i;
> > > > @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> > > >       void *ptr;
> > > >       unsigned long ret = 0;
> > > >       struct sbi_scratch *rscratch;
> > > > +     u32 scratch_alloc_align = 0;
> > > >
> > > >       /*
> > > >        * We have a simple brain-dead allocator which never expects
> > > > @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> > > >       if (!size)
> > > >               return 0;
> > > >
> > > > -     size += __SIZEOF_POINTER__ - 1;
> > > > -     size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> > > > +     scratch_alloc_align = sbi_get_scratch_alloc_align();
> > > > +
> > > > +     /*
> > > > +      * We let the allocation align to cacheline bytes to avoid livelock on
> > > > +      * certain platforms due to atomic variables from the same cache line.
> > > > +      */
> > > > +    size += scratch_alloc_align - 1;
> > > > +    size &= ~((unsigned long)scratch_alloc_align - 1);
> > > >
> > > >       spin_lock(&extra_lock);
> > > >
> > > > diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> > > > index cb350e5..bea4fdc 100644
> > > > --- a/lib/utils/fdt/fdt_helper.c
> > > > +++ b/lib/utils/fdt/fdt_helper.c
> > > > @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
> > > >
> > > >       return 0;
> > > >  }
> > > > +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> > > > +{
> > > > +    int len;
> > > > +    const void *prop;
> > > > +    const fdt32_t *val;
> > > > +
> > > > +    if (!fdt || cpu_offset < 0)
> > > > +        return SBI_EINVAL;
> > > > +
> > > > +    prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> > > > +    if (!prop || !len)
> > > > +        return SBI_EINVAL;
> > > > +    if (strncmp (prop, "cpu", strlen ("cpu")))
> > > > +        return SBI_EINVAL;
> > > > +
> > > > +    val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> > > > +    if (!val || len < sizeof(fdt32_t))
> > > > +        return SBI_EINVAL;
> > >
> > > There's another way to get the CBO block sizes, which we haven't started
> > > doing in Linux yet, but at some point maybe we should. If we don't have
> > > the riscv,cbom-block-size nodes, then we can still check for the existence
> > > of the Zic64b extension in the isa-string/isa-extensions. The existence of
> > > that extension states that all cache blocks (cbom/cboz) are 64 bytes in
> > > size.
> > >
> > > Thanks,
> > > drew
> > >
> > > > +
> > > > +    if (cbom_block_size)
> > > > +        *cbom_block_size = fdt32_to_cpu(*val);
> > > > +    return 0;
> > > > +
> > > > +}
> > > >
> > > >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
> > > >  {
> > > > diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> > > > index c03ed88..0ff8d46 100644
> > > > --- a/platform/generic/platform.c
> > > > +++ b/platform/generic/platform.c
> > > > @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> > > >       const void *fdt = (void *)arg1;
> > > >       u32 hartid, hart_count = 0;
> > > >       int rc, root_offset, cpus_offset, cpu_offset, len;
> > > > +     u32 cbom_block_size = 0;
> > > > +     u32 tmp=0;
> > > >
> > > >       root_offset = fdt_path_offset(fdt, "/");
> > > >       if (root_offset < 0)
> > > > @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> > > >                       continue;
> > > >
> > > >               generic_hart_index2id[hart_count++] = hartid;
> > > > +             rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> > > > +             if (rc)
> > > > +                     continue;
> > > > +             cbom_block_size = MAX(tmp,cbom_block_size);
> > > >       }
> > > >
> > > >       platform.hart_count = hart_count;
> > > >       platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
> > > >       platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> > > > +     platform.cbom_block_size = cbom_block_size;
> > > >
> > > >       fw_platform_coldboot_harts_init(fdt);
> > > >
> > > > --
> > > > 2.43.0
> > > >
> > > >
> > > > --
> > > > opensbi mailing list
> > > > opensbi at lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/opensbi


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] Set the scratch allocation to alignment to cacheline size.
  2025-03-18 18:51 ` [PATCH v4] Set the scratch allocation to alignment to cacheline size Raj Vishwanathan
  2025-03-19  1:19   ` Xiang W
  2025-03-19 11:59   ` Andrew Jones
@ 2025-03-25  2:46   ` Samuel Holland
       [not found]   ` <20250326192730.587788-1-Raj.Vishwanathan@gmail.com>
  2025-04-23 22:50   ` [PATCH v6] lib: sbi: " Raj Vishwanathan
  4 siblings, 0 replies; 14+ messages in thread
From: Samuel Holland @ 2025-03-25  2:46 UTC (permalink / raw)
  To: opensbi

Hi Raj,

On 2025-03-18 1:51 PM, Raj Vishwanathan wrote:
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.
> 
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> ---
> Changes in V3:
>     Remove platform specific references to 64 Bytes.
> Changes in V2:
>     Added a new configuration to get the alignment size.
> Change in V1:
>     Original Patch
> ---
>  include/sbi/sbi_platform.h         |  2 ++
>  include/sbi_utils/fdt/fdt_helper.h |  1 +
>  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
>  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
>  platform/generic/platform.c        |  7 +++++++
>  5 files changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 6d5fbc7..0cea0fe 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -197,6 +197,8 @@ struct sbi_platform {
>  	 * 2. HART id < SBI_HARTMASK_MAX_BITS
>  	 */
>  	const u32 *hart_index2id;
> +	/** Allocation alignment for Scratch */
> +	u32 cbom_block_size;
>  };
>  
>  /**
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index 7329b84..0b82159 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
>  
>  int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
>  
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);

Please match the surrounding style (blank line between declarations).

>  
>  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index ccbbc68..fdb9e20 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
>  #include <sbi/sbi_scratch.h>
>  #include <sbi/sbi_string.h>
>  
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
>  u32 last_hartindex_having_scratch = 0;
>  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
>  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
>  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
>  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
>  
> +static u32 sbi_get_scratch_alloc_align(void)
> +{
> +	const struct sbi_platform *plat;
> +	/*
> +	 * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +	 * or riscv,cbom_block_size
> +	 */
> +	plat = sbi_platform_thishart_ptr();
> +	if (!plat)
> +		return DEFAULT_SCRATCH_ALLOC_ALIGN;
> +	return plat->cbom_block_size ? plat->cbom_block_size : \
> +                                  DEFAULT_SCRATCH_ALLOC_ALIGN;

You can simplify this:

	if (!plat || !plat->cbom_block_size)
		return DEFAULT_SCRATCH_ALLOC_ALIGN;
	return plat->cbom_block_size;

> +
> +}
>  u32 sbi_hartid_to_hartindex(u32 hartid)

Please match the surrounding style (blank line between functions).

>  {
>  	u32 i;
> @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>  	void *ptr;
>  	unsigned long ret = 0;
>  	struct sbi_scratch *rscratch;
> +	u32 scratch_alloc_align = 0;

Since the only user of this value needs it to be unsigned long, you can change
the type here (and the return type of sbi_get_scratch_alloc_align()) and drop
the cast below.

>  
>  	/*
>  	 * We have a simple brain-dead allocator which never expects
> @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>  	if (!size)
>  		return 0;
>  
> -	size += __SIZEOF_POINTER__ - 1;
> -	size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +	scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +	/*
> +	 * We let the allocation align to cacheline bytes to avoid livelock on
> +	 * certain platforms due to atomic variables from the same cache line.
> +	 */
> +    size += scratch_alloc_align - 1;
> +    size &= ~((unsigned long)scratch_alloc_align - 1);

Please match the documented coding style (use tabs for indent).

>  
>  	spin_lock(&extra_lock);
>  
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index cb350e5..bea4fdc 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
>  
>  	return 0;
>  }
> +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> +{
> +    int len;

Please match the documented coding style (use tabs for indent).

> +    const void *prop;
> +    const fdt32_t *val;
> +
> +    if (!fdt || cpu_offset < 0)
> +        return SBI_EINVAL;
> +
> +    prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +    if (!prop || !len)
> +        return SBI_EINVAL;
> +    if (strncmp (prop, "cpu", strlen ("cpu")))
> +        return SBI_EINVAL;
> +
> +    val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +    if (!val || len < sizeof(fdt32_t))
> +        return SBI_EINVAL;
> +
> +    if (cbom_block_size)
> +        *cbom_block_size = fdt32_to_cpu(*val);
> +    return 0;
> +

Please match the surrounding coding style (no blank line here).

> +}
>  
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
>  {
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index c03ed88..0ff8d46 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>  	const void *fdt = (void *)arg1;
>  	u32 hartid, hart_count = 0;
>  	int rc, root_offset, cpus_offset, cpu_offset, len;
> +	u32 cbom_block_size = 0;
> +	u32 tmp=0;

Please match the surrounding coding style (spaces around assignments).

>  
>  	root_offset = fdt_path_offset(fdt, "/");
>  	if (root_offset < 0)
> @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>  			continue;
>  
>  		generic_hart_index2id[hart_count++] = hartid;

Please add a blank line here.

Regards,
Samuel

> +		rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> +		if (rc)
> +			continue;
> +		cbom_block_size = MAX(tmp,cbom_block_size);
>  	}
>  
>  	platform.hart_count = hart_count;
>  	platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
>  	platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +	platform.cbom_block_size = cbom_block_size;
>  
>  	fw_platform_coldboot_harts_init(fdt);
>  



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v5] Set the scratch allocation to alignment to cacheline size.
       [not found]   ` <20250326192730.587788-1-Raj.Vishwanathan@gmail.com>
@ 2025-04-21 22:13     ` Raj Vishwanathan
  2025-04-22  4:41     ` Anup Patel
  1 sibling, 0 replies; 14+ messages in thread
From: Raj Vishwanathan @ 2025-04-21 22:13 UTC (permalink / raw)
  To: opensbi

Hi

Any update on this patch?

Best regards

Raj


On Wed, Mar 26, 2025 at 12:27 PM Raj Vishwanathan
<raj.vishwanathan@gmail.com> wrote:
>
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.
>
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> ---
> Changes in V4:
>         Pickup the cacheline size from the dts file
> Changes in V3:
>     Remove platform specific references to 64 Bytes.
> Changes in V2:
>     Added a new configuration to get the alignment size.
> Change in V1:
>     Original Patch
> ---
>  include/sbi/sbi_platform.h         |  2 ++
>  include/sbi_utils/fdt/fdt_helper.h |  2 ++
>  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
>  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
>  platform/generic/platform.c        |  9 +++++++++
>  5 files changed, 62 insertions(+), 2 deletions(-)
>
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 3c99d43..d69215b 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -192,6 +192,8 @@ struct sbi_platform {
>          *     hart_index2id[<abc>] = <abc>
>          */
>         const u32 *hart_index2id;
> +       /** Allocation alignment for Scratch */
> +       unsigned long cbom_block_size;
>  };
>
>  /**
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index ff83002..92de9d9 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -53,6 +53,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
>
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
>
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long  *cbom_block_size);
> +
>  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
>
>  int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid,
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index 8c7eeaf..ca88204 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
>  #include <sbi/sbi_scratch.h>
>  #include <sbi/sbi_string.h>
>
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
>  u32 sbi_scratch_hart_count;
>  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U };
>  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
> @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
>  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
>  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
>
> +static unsigned long sbi_get_scratch_alloc_align(void)
> +{
> +       const struct sbi_platform *plat;
> +       /*
> +        * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +        * or riscv,cbom_block_size
> +        */
> +       plat = sbi_platform_thishart_ptr();
> +       if (!plat)
> +               return DEFAULT_SCRATCH_ALLOC_ALIGN;
> +       return plat->cbom_block_size ? plat->cbom_block_size : \
> +                                                                       DEFAULT_SCRATCH_ALLOC_ALIGN;
> +}
> +
>  u32 sbi_hartid_to_hartindex(u32 hartid)
>  {
>         sbi_for_each_hartindex(i)
> @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>         void *ptr;
>         unsigned long ret = 0;
>         struct sbi_scratch *rscratch;
> +       unsigned long scratch_alloc_align = 0;
>
>         /*
>          * We have a simple brain-dead allocator which never expects
> @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>         if (!size)
>                 return 0;
>
> -       size += __SIZEOF_POINTER__ - 1;
> -       size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +       scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +       /*
> +        * We let the allocation align to cacheline bytes to avoid livelock on
> +        * certain platforms due to atomic variables from the same cache line.
> +        */
> +       size += scratch_alloc_align - 1;
> +       size &= ~(scratch_alloc_align - 1);
>
>         spin_lock(&extra_lock);
>
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index bc357b2..be3e19c 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -265,6 +265,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
>         return 0;
>  }
>
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size)
> +{
> +       int len;
> +       const void *prop;
> +       const fdt32_t *val;
> +
> +       if (!fdt || cpu_offset < 0)
> +               return SBI_EINVAL;
> +
> +       prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +       if (!prop || !len)
> +               return SBI_EINVAL;
> +       if (strncmp (prop, "cpu", strlen ("cpu")))
> +               return SBI_EINVAL;
> +
> +       val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +       if (!val || len < sizeof(fdt32_t))
> +               return SBI_EINVAL;
> +
> +       if (cbom_block_size)
> +               *cbom_block_size = fdt32_to_cpu(*val);
> +       return 0;
> +}
> +
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
>  {
>         u32 hartid;
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index b2f29e8..da77ea3 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -171,6 +171,9 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>         const void *fdt = (void *)arg1;
>         u32 hartid, hart_count = 0;
>         int rc, root_offset, cpus_offset, cpu_offset, len;
> +       unsigned long cbom_block_size = 0;
> +       unsigned long tmp = 0;
> +
>
>         root_offset = fdt_path_offset(fdt, "/");
>         if (root_offset < 0)
> @@ -204,11 +207,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>                         continue;
>
>                 generic_hart_index2id[hart_count++] = hartid;
> +
> +               rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp);
> +               if (rc)
> +                       continue;
> +               cbom_block_size = MAX(tmp, cbom_block_size);
>         }
>
>         platform.hart_count = hart_count;
>         platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
>         platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +       platform.cbom_block_size = cbom_block_size;
>
>         fw_platform_coldboot_harts_init(fdt);
>
> --
> 2.43.0
>

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v5] Set the scratch allocation to alignment to cacheline size.
       [not found]   ` <20250326192730.587788-1-Raj.Vishwanathan@gmail.com>
  2025-04-21 22:13     ` [PATCH v5] " Raj Vishwanathan
@ 2025-04-22  4:41     ` Anup Patel
  2025-04-23 12:58       ` Anup Patel
  1 sibling, 1 reply; 14+ messages in thread
From: Anup Patel @ 2025-04-22  4:41 UTC (permalink / raw)
  To: Raj Vishwanathan; +Cc: opensbi, rvishwanathan

On Thu, Mar 27, 2025 at 12:57 AM Raj Vishwanathan
<raj.vishwanathan@gmail.com> wrote:
>
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.

Add "lib: sbi: " prefix to patch the subject.

>
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> ---
> Changes in V4:
>         Pickup the cacheline size from the dts file
> Changes in V3:
>     Remove platform specific references to 64 Bytes.
> Changes in V2:
>     Added a new configuration to get the alignment size.
> Change in V1:
>     Original Patch
> ---
>  include/sbi/sbi_platform.h         |  2 ++
>  include/sbi_utils/fdt/fdt_helper.h |  2 ++
>  lib/sbi/sbi_scratch.c              | 27 +++++++++++++++++++++++++--
>  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
>  platform/generic/platform.c        |  9 +++++++++
>  5 files changed, 62 insertions(+), 2 deletions(-)
>
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 3c99d43..d69215b 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -192,6 +192,8 @@ struct sbi_platform {
>          *     hart_index2id[<abc>] = <abc>
>          */
>         const u32 *hart_index2id;
> +       /** Allocation alignment for Scratch */
> +       unsigned long cbom_block_size;
>  };

Please add below an assert_member_offset() line for this new member.

>
>  /**
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index ff83002..92de9d9 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -53,6 +53,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
>
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
>
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long  *cbom_block_size);
> +
>  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
>
>  int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid,
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index 8c7eeaf..ca88204 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
>  #include <sbi/sbi_scratch.h>
>  #include <sbi/sbi_string.h>
>
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
>  u32 sbi_scratch_hart_count;
>  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U };
>  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
> @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
>  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
>  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
>
> +static unsigned long sbi_get_scratch_alloc_align(void)
> +{
> +       const struct sbi_platform *plat;
> +       /*
> +        * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +        * or riscv,cbom_block_size
> +        */
> +       plat = sbi_platform_thishart_ptr();
> +       if (!plat)
> +               return DEFAULT_SCRATCH_ALLOC_ALIGN;

Like Samuel commented, this can be:

if (!plat || !plat->cbom_block_size)
     return DEFAULT_SCRATCH_ALLOC_ALIGN;
return plat->cbom_block_size;

> +       return plat->cbom_block_size ? plat->cbom_block_size : \
> +                                                                       DEFAULT_SCRATCH_ALLOC_ALIGN;
> +}
> +
>  u32 sbi_hartid_to_hartindex(u32 hartid)
>  {
>         sbi_for_each_hartindex(i)
> @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>         void *ptr;
>         unsigned long ret = 0;
>         struct sbi_scratch *rscratch;
> +       unsigned long scratch_alloc_align = 0;
>
>         /*
>          * We have a simple brain-dead allocator which never expects
> @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>         if (!size)
>                 return 0;
>
> -       size += __SIZEOF_POINTER__ - 1;
> -       size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +       scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +       /*
> +        * We let the allocation align to cacheline bytes to avoid livelock on
> +        * certain platforms due to atomic variables from the same cache line.
> +        */
> +       size += scratch_alloc_align - 1;
> +       size &= ~(scratch_alloc_align - 1);
>
>         spin_lock(&extra_lock);
>
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index bc357b2..be3e19c 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -265,6 +265,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
>         return 0;
>  }
>
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size)
> +{
> +       int len;
> +       const void *prop;
> +       const fdt32_t *val;
> +
> +       if (!fdt || cpu_offset < 0)
> +               return SBI_EINVAL;
> +
> +       prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +       if (!prop || !len)
> +               return SBI_EINVAL;
> +       if (strncmp (prop, "cpu", strlen ("cpu")))
> +               return SBI_EINVAL;
> +
> +       val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +       if (!val || len < sizeof(fdt32_t))
> +               return SBI_EINVAL;
> +
> +       if (cbom_block_size)
> +               *cbom_block_size = fdt32_to_cpu(*val);
> +       return 0;
> +}
> +
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
>  {
>         u32 hartid;
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index b2f29e8..da77ea3 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -171,6 +171,9 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>         const void *fdt = (void *)arg1;
>         u32 hartid, hart_count = 0;
>         int rc, root_offset, cpus_offset, cpu_offset, len;
> +       unsigned long cbom_block_size = 0;
> +       unsigned long tmp = 0;
> +
>
>         root_offset = fdt_path_offset(fdt, "/");
>         if (root_offset < 0)
> @@ -204,11 +207,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>                         continue;
>
>                 generic_hart_index2id[hart_count++] = hartid;
> +
> +               rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp);
> +               if (rc)
> +                       continue;
> +               cbom_block_size = MAX(tmp, cbom_block_size);
>         }
>
>         platform.hart_count = hart_count;
>         platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
>         platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +       platform.cbom_block_size = cbom_block_size;
>
>         fw_platform_coldboot_harts_init(fdt);
>
> --
> 2.43.0
>
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi

Otherwise, this looks good to me.

Reviewed-by: Anup Patel <anup@brainfault.org>

Regards,
Anup

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v5] Set the scratch allocation to alignment to cacheline size.
  2025-04-22  4:41     ` Anup Patel
@ 2025-04-23 12:58       ` Anup Patel
  0 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-04-23 12:58 UTC (permalink / raw)
  To: Raj Vishwanathan; +Cc: opensbi, rvishwanathan

Hi Raj,

On Tue, Apr 22, 2025 at 10:11 AM Anup Patel <anup@brainfault.org> wrote:
>
> On Thu, Mar 27, 2025 at 12:57 AM Raj Vishwanathan
> <raj.vishwanathan@gmail.com> wrote:
> >
> > We set the scratch allocation alignment to cacheline size,specified by
> > riscv,cbom-block-size in the dts file to avoid two atomic variables from
> > the same cache line causing livelock on some platforms. If the cacheline
> > is not specified, we set it a default value.
>

Do you mind sending v6 of this patch ?

Regards,
Anup

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v6] lib: sbi: Set the scratch allocation to alignment to cacheline size.
  2025-03-18 18:51 ` [PATCH v4] Set the scratch allocation to alignment to cacheline size Raj Vishwanathan
                     ` (3 preceding siblings ...)
       [not found]   ` <20250326192730.587788-1-Raj.Vishwanathan@gmail.com>
@ 2025-04-23 22:50   ` Raj Vishwanathan
  2025-04-24  1:17     ` Samuel Holland
  2025-04-24  5:46     ` Anup Patel
  4 siblings, 2 replies; 14+ messages in thread
From: Raj Vishwanathan @ 2025-04-23 22:50 UTC (permalink / raw)
  To: opensbi; +Cc: rvishwanathan, Raj Vishwanathan

We set the scratch allocation alignment to cacheline size,specified by
riscv,cbom-block-size in the dts file to avoid two atomic variables from
the same cache line causing livelock on some platforms. If the cacheline
is not specified, we set it a default value.

Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
--
Changes in V6:
	Add assert for assert_member_offset for cbom_block_size
	Update the subject line
Changes in V5:
	Formatting issues
Changes in V4:
	Pickup the cacheline size from the dts file
Changes in V3:
    Remove platform specific references to 64 Bytes.
Changes in V2:
    Added a new configuration to get the alignment size.
Change in V1:
    Original Patch
---
 include/sbi/sbi_platform.h         |  5 +++++
 include/sbi_utils/fdt/fdt_helper.h |  2 ++
 lib/sbi/sbi_scratch.c              | 26 ++++++++++++++++++++++++--
 lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
 platform/generic/platform.c        |  9 +++++++++
 5 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
index 82840ae..08ece32 100644
--- a/include/sbi/sbi_platform.h
+++ b/include/sbi/sbi_platform.h
@@ -39,6 +39,8 @@
 #define SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET (0x60 + __SIZEOF_POINTER__)
 /** Offset of hart_index2id in struct sbi_platform */
 #define SBI_PLATFORM_HART_INDEX2ID_OFFSET (0x60 + (__SIZEOF_POINTER__ * 2))
+/** Offset of cbom_block_size in struct sbi_platform */
+#define SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET (0x60 + (__SIZEOF_POINTER__ * 3))
 
 #define SBI_PLATFORM_TLB_RANGE_FLUSH_LIMIT_DEFAULT		(1UL << 12)
 
@@ -190,6 +192,8 @@ struct sbi_platform {
 	 *     hart_index2id[<abc>] = <abc>
 	 */
 	const u32 *hart_index2id;
+	/** Allocation alignment for Scratch */
+	unsigned long cbom_block_size;
 };
 
 /**
@@ -207,6 +211,7 @@ assert_member_offset(struct sbi_platform, reserved, SBI_PLATFORM_RESERVED_OFFSET
 assert_member_offset(struct sbi_platform, platform_ops_addr, SBI_PLATFORM_OPS_OFFSET);
 assert_member_offset(struct sbi_platform, firmware_context, SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET);
 assert_member_offset(struct sbi_platform, hart_index2id, SBI_PLATFORM_HART_INDEX2ID_OFFSET);
+assert_member_offset(struct sbi_platform, cbom_block_size, SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET);
 
 /** Get pointer to sbi_platform for sbi_scratch pointer */
 #define sbi_platform_ptr(__s) \
diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
index 5875880..04c850c 100644
--- a/include/sbi_utils/fdt/fdt_helper.h
+++ b/include/sbi_utils/fdt/fdt_helper.h
@@ -50,6 +50,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
 
 int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
 
+int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long  *cbom_block_size);
+
 int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
 
 int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid,
diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
index 8c7eeaf..916f283 100644
--- a/lib/sbi/sbi_scratch.c
+++ b/lib/sbi/sbi_scratch.c
@@ -14,6 +14,8 @@
 #include <sbi/sbi_scratch.h>
 #include <sbi/sbi_string.h>
 
+#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
+
 u32 sbi_scratch_hart_count;
 u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U };
 struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
@@ -21,6 +23,19 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
 static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
 static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
 
+static unsigned long sbi_get_scratch_alloc_align(void)
+{
+	const struct sbi_platform *plat;
+	/*
+	 * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
+	 * or riscv,cbom_block_size
+	 */
+	plat = sbi_platform_thishart_ptr();
+	if (!plat || !plat->cbom_block_size)
+		return DEFAULT_SCRATCH_ALLOC_ALIGN;
+	return plat->cbom_block_size;
+}
+
 u32 sbi_hartid_to_hartindex(u32 hartid)
 {
 	sbi_for_each_hartindex(i)
@@ -57,6 +72,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
 	void *ptr;
 	unsigned long ret = 0;
 	struct sbi_scratch *rscratch;
+	unsigned long scratch_alloc_align = 0;
 
 	/*
 	 * We have a simple brain-dead allocator which never expects
@@ -70,8 +86,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
 	if (!size)
 		return 0;
 
-	size += __SIZEOF_POINTER__ - 1;
-	size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
+	scratch_alloc_align = sbi_get_scratch_alloc_align();
+
+	/*
+	 * We let the allocation align to cacheline bytes to avoid livelock on
+	 * certain platforms due to atomic variables from the same cache line.
+	 */
+	size += scratch_alloc_align - 1;
+	size &= ~(scratch_alloc_align - 1);
 
 	spin_lock(&extra_lock);
 
diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
index 79e59dd..b2d91fd 100644
--- a/lib/utils/fdt/fdt_helper.c
+++ b/lib/utils/fdt/fdt_helper.c
@@ -246,6 +246,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
 	return 0;
 }
 
+int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size)
+{
+	int len;
+	const void *prop;
+	const fdt32_t *val;
+
+	if (!fdt || cpu_offset < 0)
+		return SBI_EINVAL;
+
+	prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
+	if (!prop || !len)
+		return SBI_EINVAL;
+	if (strncmp (prop, "cpu", strlen ("cpu")))
+		return SBI_EINVAL;
+
+	val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
+	if (!val || len < sizeof(fdt32_t))
+		return SBI_EINVAL;
+
+	if (cbom_block_size)
+		*cbom_block_size = fdt32_to_cpu(*val);
+	return 0;
+}
+
 int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
 {
 	u32 hartid;
diff --git a/platform/generic/platform.c b/platform/generic/platform.c
index f3072be..2b1b949 100644
--- a/platform/generic/platform.c
+++ b/platform/generic/platform.c
@@ -147,6 +147,9 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
 	const void *fdt = (void *)arg1;
 	u32 hartid, hart_count = 0;
 	int rc, root_offset, cpus_offset, cpu_offset, len;
+	unsigned long cbom_block_size = 0;
+	unsigned long tmp = 0;
+
 
 	root_offset = fdt_path_offset(fdt, "/");
 	if (root_offset < 0)
@@ -174,11 +177,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
 			continue;
 
 		generic_hart_index2id[hart_count++] = hartid;
+
+		rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp);
+		if (rc)
+			continue;
+		cbom_block_size = MAX(tmp, cbom_block_size);
 	}
 
 	platform.hart_count = hart_count;
 	platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
 	platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
+	platform.cbom_block_size = cbom_block_size;
 
 	fw_platform_coldboot_harts_init(fdt);
 
-- 
2.43.0


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v6] lib: sbi: Set the scratch allocation to alignment to cacheline size.
  2025-04-23 22:50   ` [PATCH v6] lib: sbi: " Raj Vishwanathan
@ 2025-04-24  1:17     ` Samuel Holland
  2025-04-24  5:08       ` Raj Vishwanathan
  2025-04-24  5:46     ` Anup Patel
  1 sibling, 1 reply; 14+ messages in thread
From: Samuel Holland @ 2025-04-24  1:17 UTC (permalink / raw)
  To: Raj Vishwanathan, opensbi; +Cc: rvishwanathan

Hi Raj,

On 2025-04-23 5:50 PM, Raj Vishwanathan wrote:
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.
> 
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> --
> Changes in V6:
> 	Add assert for assert_member_offset for cbom_block_size
> 	Update the subject line
> Changes in V5:
> 	Formatting issues
> Changes in V4:
> 	Pickup the cacheline size from the dts file
> Changes in V3:
>     Remove platform specific references to 64 Bytes.
> Changes in V2:
>     Added a new configuration to get the alignment size.
> Change in V1:
>     Original Patch
> ---
>  include/sbi/sbi_platform.h         |  5 +++++
>  include/sbi_utils/fdt/fdt_helper.h |  2 ++
>  lib/sbi/sbi_scratch.c              | 26 ++++++++++++++++++++++++--
>  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
>  platform/generic/platform.c        |  9 +++++++++
>  5 files changed, 64 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 82840ae..08ece32 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -39,6 +39,8 @@
>  #define SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET (0x60 + __SIZEOF_POINTER__)
>  /** Offset of hart_index2id in struct sbi_platform */
>  #define SBI_PLATFORM_HART_INDEX2ID_OFFSET (0x60 + (__SIZEOF_POINTER__ * 2))
> +/** Offset of cbom_block_size in struct sbi_platform */
> +#define SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET (0x60 + (__SIZEOF_POINTER__ * 3))
>  
>  #define SBI_PLATFORM_TLB_RANGE_FLUSH_LIMIT_DEFAULT		(1UL << 12)
>  
> @@ -190,6 +192,8 @@ struct sbi_platform {
>  	 *     hart_index2id[<abc>] = <abc>
>  	 */
>  	const u32 *hart_index2id;
> +	/** Allocation alignment for Scratch */
> +	unsigned long cbom_block_size;
>  };
>  
>  /**
> @@ -207,6 +211,7 @@ assert_member_offset(struct sbi_platform, reserved, SBI_PLATFORM_RESERVED_OFFSET
>  assert_member_offset(struct sbi_platform, platform_ops_addr, SBI_PLATFORM_OPS_OFFSET);
>  assert_member_offset(struct sbi_platform, firmware_context, SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET);
>  assert_member_offset(struct sbi_platform, hart_index2id, SBI_PLATFORM_HART_INDEX2ID_OFFSET);
> +assert_member_offset(struct sbi_platform, cbom_block_size, SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET);
>  
>  /** Get pointer to sbi_platform for sbi_scratch pointer */
>  #define sbi_platform_ptr(__s) \
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index 5875880..04c850c 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -50,6 +50,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
>  
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
>  
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long  *cbom_block_size);
> +
>  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
>  
>  int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid,
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index 8c7eeaf..916f283 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
>  #include <sbi/sbi_scratch.h>
>  #include <sbi/sbi_string.h>
>  
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
>  u32 sbi_scratch_hart_count;
>  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U };
>  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
> @@ -21,6 +23,19 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
>  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
>  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
>  
> +static unsigned long sbi_get_scratch_alloc_align(void)
> +{
> +	const struct sbi_platform *plat;
> +	/*
> +	 * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +	 * or riscv,cbom_block_size
> +	 */
> +	plat = sbi_platform_thishart_ptr();
> +	if (!plat || !plat->cbom_block_size)
> +		return DEFAULT_SCRATCH_ALLOC_ALIGN;
> +	return plat->cbom_block_size;
> +}
> +
>  u32 sbi_hartid_to_hartindex(u32 hartid)
>  {
>  	sbi_for_each_hartindex(i)
> @@ -57,6 +72,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>  	void *ptr;
>  	unsigned long ret = 0;
>  	struct sbi_scratch *rscratch;
> +	unsigned long scratch_alloc_align = 0;

This variable is unconditionally assigned below, so it doesn't need to be
initialized here.

>  
>  	/*
>  	 * We have a simple brain-dead allocator which never expects
> @@ -70,8 +86,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>  	if (!size)
>  		return 0;
>  
> -	size += __SIZEOF_POINTER__ - 1;
> -	size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +	scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +	/*
> +	 * We let the allocation align to cacheline bytes to avoid livelock on
> +	 * certain platforms due to atomic variables from the same cache line.
> +	 */
> +	size += scratch_alloc_align - 1;
> +	size &= ~(scratch_alloc_align - 1);
>  
>  	spin_lock(&extra_lock);
>  
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index 79e59dd..b2d91fd 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -246,6 +246,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
>  	return 0;
>  }
>  
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size)
> +{
> +	int len;
> +	const void *prop;
> +	const fdt32_t *val;
> +
> +	if (!fdt || cpu_offset < 0)
> +		return SBI_EINVAL;
> +
> +	prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +	if (!prop || !len)
> +		return SBI_EINVAL;
> +	if (strncmp (prop, "cpu", strlen ("cpu")))
> +		return SBI_EINVAL;
> +
> +	val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +	if (!val || len < sizeof(fdt32_t))
> +		return SBI_EINVAL;
> +
> +	if (cbom_block_size)
> +		*cbom_block_size = fdt32_to_cpu(*val);
> +	return 0;
> +}
> +
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
>  {
>  	u32 hartid;
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index f3072be..2b1b949 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -147,6 +147,9 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>  	const void *fdt = (void *)arg1;
>  	u32 hartid, hart_count = 0;
>  	int rc, root_offset, cpus_offset, cpu_offset, len;
> +	unsigned long cbom_block_size = 0;
> +	unsigned long tmp = 0;
> +
>  

Extra blank line here. These are trivial issues, so:

Reviewed-by: Samuel Holland <samuel.holland@sifive.com>

>  	root_offset = fdt_path_offset(fdt, "/");
>  	if (root_offset < 0)
> @@ -174,11 +177,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>  			continue;
>  
>  		generic_hart_index2id[hart_count++] = hartid;
> +
> +		rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp);
> +		if (rc)
> +			continue;
> +		cbom_block_size = MAX(tmp, cbom_block_size);
>  	}
>  
>  	platform.hart_count = hart_count;
>  	platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
>  	platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +	platform.cbom_block_size = cbom_block_size;
>  
>  	fw_platform_coldboot_harts_init(fdt);
>  


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6] lib: sbi: Set the scratch allocation to alignment to cacheline size.
  2025-04-24  1:17     ` Samuel Holland
@ 2025-04-24  5:08       ` Raj Vishwanathan
  0 siblings, 0 replies; 14+ messages in thread
From: Raj Vishwanathan @ 2025-04-24  5:08 UTC (permalink / raw)
  To: Samuel Holland; +Cc: opensbi, rvishwanathan

Samuel

Thank you for the code review.  Would you like me send an updated
patch?  I am confused by "These are trivial issues, so:"

Raj


On Wed, Apr 23, 2025 at 6:17 PM Samuel Holland
<samuel.holland@sifive.com> wrote:
>
> Hi Raj,
>
> On 2025-04-23 5:50 PM, Raj Vishwanathan wrote:
> > We set the scratch allocation alignment to cacheline size,specified by
> > riscv,cbom-block-size in the dts file to avoid two atomic variables from
> > the same cache line causing livelock on some platforms. If the cacheline
> > is not specified, we set it a default value.
> >
> > Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>
> > --
> > Changes in V6:
> >       Add assert for assert_member_offset for cbom_block_size
> >       Update the subject line
> > Changes in V5:
> >       Formatting issues
> > Changes in V4:
> >       Pickup the cacheline size from the dts file
> > Changes in V3:
> >     Remove platform specific references to 64 Bytes.
> > Changes in V2:
> >     Added a new configuration to get the alignment size.
> > Change in V1:
> >     Original Patch
> > ---
> >  include/sbi/sbi_platform.h         |  5 +++++
> >  include/sbi_utils/fdt/fdt_helper.h |  2 ++
> >  lib/sbi/sbi_scratch.c              | 26 ++++++++++++++++++++++++--
> >  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
> >  platform/generic/platform.c        |  9 +++++++++
> >  5 files changed, 64 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> > index 82840ae..08ece32 100644
> > --- a/include/sbi/sbi_platform.h
> > +++ b/include/sbi/sbi_platform.h
> > @@ -39,6 +39,8 @@
> >  #define SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET (0x60 + __SIZEOF_POINTER__)
> >  /** Offset of hart_index2id in struct sbi_platform */
> >  #define SBI_PLATFORM_HART_INDEX2ID_OFFSET (0x60 + (__SIZEOF_POINTER__ * 2))
> > +/** Offset of cbom_block_size in struct sbi_platform */
> > +#define SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET (0x60 + (__SIZEOF_POINTER__ * 3))
> >
> >  #define SBI_PLATFORM_TLB_RANGE_FLUSH_LIMIT_DEFAULT           (1UL << 12)
> >
> > @@ -190,6 +192,8 @@ struct sbi_platform {
> >        *     hart_index2id[<abc>] = <abc>
> >        */
> >       const u32 *hart_index2id;
> > +     /** Allocation alignment for Scratch */
> > +     unsigned long cbom_block_size;
> >  };
> >
> >  /**
> > @@ -207,6 +211,7 @@ assert_member_offset(struct sbi_platform, reserved, SBI_PLATFORM_RESERVED_OFFSET
> >  assert_member_offset(struct sbi_platform, platform_ops_addr, SBI_PLATFORM_OPS_OFFSET);
> >  assert_member_offset(struct sbi_platform, firmware_context, SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET);
> >  assert_member_offset(struct sbi_platform, hart_index2id, SBI_PLATFORM_HART_INDEX2ID_OFFSET);
> > +assert_member_offset(struct sbi_platform, cbom_block_size, SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET);
> >
> >  /** Get pointer to sbi_platform for sbi_scratch pointer */
> >  #define sbi_platform_ptr(__s) \
> > diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> > index 5875880..04c850c 100644
> > --- a/include/sbi_utils/fdt/fdt_helper.h
> > +++ b/include/sbi_utils/fdt/fdt_helper.h
> > @@ -50,6 +50,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
> >
> >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
> >
> > +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long  *cbom_block_size);
> > +
> >  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> >
> >  int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid,
> > diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> > index 8c7eeaf..916f283 100644
> > --- a/lib/sbi/sbi_scratch.c
> > +++ b/lib/sbi/sbi_scratch.c
> > @@ -14,6 +14,8 @@
> >  #include <sbi/sbi_scratch.h>
> >  #include <sbi/sbi_string.h>
> >
> > +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> > +
> >  u32 sbi_scratch_hart_count;
> >  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U };
> >  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
> > @@ -21,6 +23,19 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
> >  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
> >  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
> >
> > +static unsigned long sbi_get_scratch_alloc_align(void)
> > +{
> > +     const struct sbi_platform *plat;
> > +     /*
> > +      * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> > +      * or riscv,cbom_block_size
> > +      */
> > +     plat = sbi_platform_thishart_ptr();
> > +     if (!plat || !plat->cbom_block_size)
> > +             return DEFAULT_SCRATCH_ALLOC_ALIGN;
> > +     return plat->cbom_block_size;
> > +}
> > +
> >  u32 sbi_hartid_to_hartindex(u32 hartid)
> >  {
> >       sbi_for_each_hartindex(i)
> > @@ -57,6 +72,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> >       void *ptr;
> >       unsigned long ret = 0;
> >       struct sbi_scratch *rscratch;
> > +     unsigned long scratch_alloc_align = 0;
>
> This variable is unconditionally assigned below, so it doesn't need to be
> initialized here.
>
> >
> >       /*
> >        * We have a simple brain-dead allocator which never expects
> > @@ -70,8 +86,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> >       if (!size)
> >               return 0;
> >
> > -     size += __SIZEOF_POINTER__ - 1;
> > -     size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> > +     scratch_alloc_align = sbi_get_scratch_alloc_align();
> > +
> > +     /*
> > +      * We let the allocation align to cacheline bytes to avoid livelock on
> > +      * certain platforms due to atomic variables from the same cache line.
> > +      */
> > +     size += scratch_alloc_align - 1;
> > +     size &= ~(scratch_alloc_align - 1);
> >
> >       spin_lock(&extra_lock);
> >
> > diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> > index 79e59dd..b2d91fd 100644
> > --- a/lib/utils/fdt/fdt_helper.c
> > +++ b/lib/utils/fdt/fdt_helper.c
> > @@ -246,6 +246,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
> >       return 0;
> >  }
> >
> > +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size)
> > +{
> > +     int len;
> > +     const void *prop;
> > +     const fdt32_t *val;
> > +
> > +     if (!fdt || cpu_offset < 0)
> > +             return SBI_EINVAL;
> > +
> > +     prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> > +     if (!prop || !len)
> > +             return SBI_EINVAL;
> > +     if (strncmp (prop, "cpu", strlen ("cpu")))
> > +             return SBI_EINVAL;
> > +
> > +     val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> > +     if (!val || len < sizeof(fdt32_t))
> > +             return SBI_EINVAL;
> > +
> > +     if (cbom_block_size)
> > +             *cbom_block_size = fdt32_to_cpu(*val);
> > +     return 0;
> > +}
> > +
> >  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
> >  {
> >       u32 hartid;
> > diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> > index f3072be..2b1b949 100644
> > --- a/platform/generic/platform.c
> > +++ b/platform/generic/platform.c
> > @@ -147,6 +147,9 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> >       const void *fdt = (void *)arg1;
> >       u32 hartid, hart_count = 0;
> >       int rc, root_offset, cpus_offset, cpu_offset, len;
> > +     unsigned long cbom_block_size = 0;
> > +     unsigned long tmp = 0;
> > +
> >
>
> Extra blank line here. These are trivial issues, so:
>
> Reviewed-by: Samuel Holland <samuel.holland@sifive.com>
>
> >       root_offset = fdt_path_offset(fdt, "/");
> >       if (root_offset < 0)
> > @@ -174,11 +177,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> >                       continue;
> >
> >               generic_hart_index2id[hart_count++] = hartid;
> > +
> > +             rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp);
> > +             if (rc)
> > +                     continue;
> > +             cbom_block_size = MAX(tmp, cbom_block_size);
> >       }
> >
> >       platform.hart_count = hart_count;
> >       platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
> >       platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> > +     platform.cbom_block_size = cbom_block_size;
> >
> >       fw_platform_coldboot_harts_init(fdt);
> >
>

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6] lib: sbi: Set the scratch allocation to alignment to cacheline size.
  2025-04-23 22:50   ` [PATCH v6] lib: sbi: " Raj Vishwanathan
  2025-04-24  1:17     ` Samuel Holland
@ 2025-04-24  5:46     ` Anup Patel
  1 sibling, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-04-24  5:46 UTC (permalink / raw)
  To: Raj Vishwanathan; +Cc: opensbi, rvishwanathan

On Thu, Apr 24, 2025 at 4:20 AM Raj Vishwanathan
<raj.vishwanathan@gmail.com> wrote:
>
> We set the scratch allocation alignment to cacheline size,specified by
> riscv,cbom-block-size in the dts file to avoid two atomic variables from
> the same cache line causing livelock on some platforms. If the cacheline
> is not specified, we set it a default value.
>
> Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan@gmail.com>

I have taken care of Samuel's comments at the time of merging.

Please carry Reviewed-by tags obtained in previous versions
over here.

Applied this patch to the riscv/opensbi repo.

Thanks,
Anup


> --
> Changes in V6:
>         Add assert for assert_member_offset for cbom_block_size
>         Update the subject line
> Changes in V5:
>         Formatting issues
> Changes in V4:
>         Pickup the cacheline size from the dts file
> Changes in V3:
>     Remove platform specific references to 64 Bytes.
> Changes in V2:
>     Added a new configuration to get the alignment size.
> Change in V1:
>     Original Patch
> ---
>  include/sbi/sbi_platform.h         |  5 +++++
>  include/sbi_utils/fdt/fdt_helper.h |  2 ++
>  lib/sbi/sbi_scratch.c              | 26 ++++++++++++++++++++++++--
>  lib/utils/fdt/fdt_helper.c         | 24 ++++++++++++++++++++++++
>  platform/generic/platform.c        |  9 +++++++++
>  5 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> index 82840ae..08ece32 100644
> --- a/include/sbi/sbi_platform.h
> +++ b/include/sbi/sbi_platform.h
> @@ -39,6 +39,8 @@
>  #define SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET (0x60 + __SIZEOF_POINTER__)
>  /** Offset of hart_index2id in struct sbi_platform */
>  #define SBI_PLATFORM_HART_INDEX2ID_OFFSET (0x60 + (__SIZEOF_POINTER__ * 2))
> +/** Offset of cbom_block_size in struct sbi_platform */
> +#define SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET (0x60 + (__SIZEOF_POINTER__ * 3))
>
>  #define SBI_PLATFORM_TLB_RANGE_FLUSH_LIMIT_DEFAULT             (1UL << 12)
>
> @@ -190,6 +192,8 @@ struct sbi_platform {
>          *     hart_index2id[<abc>] = <abc>
>          */
>         const u32 *hart_index2id;
> +       /** Allocation alignment for Scratch */
> +       unsigned long cbom_block_size;
>  };
>
>  /**
> @@ -207,6 +211,7 @@ assert_member_offset(struct sbi_platform, reserved, SBI_PLATFORM_RESERVED_OFFSET
>  assert_member_offset(struct sbi_platform, platform_ops_addr, SBI_PLATFORM_OPS_OFFSET);
>  assert_member_offset(struct sbi_platform, firmware_context, SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET);
>  assert_member_offset(struct sbi_platform, hart_index2id, SBI_PLATFORM_HART_INDEX2ID_OFFSET);
> +assert_member_offset(struct sbi_platform, cbom_block_size, SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET);
>
>  /** Get pointer to sbi_platform for sbi_scratch pointer */
>  #define sbi_platform_ptr(__s) \
> diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> index 5875880..04c850c 100644
> --- a/include/sbi_utils/fdt/fdt_helper.h
> +++ b/include/sbi_utils/fdt/fdt_helper.h
> @@ -50,6 +50,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
>
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
>
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long  *cbom_block_size);
> +
>  int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
>
>  int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid,
> diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> index 8c7eeaf..916f283 100644
> --- a/lib/sbi/sbi_scratch.c
> +++ b/lib/sbi/sbi_scratch.c
> @@ -14,6 +14,8 @@
>  #include <sbi/sbi_scratch.h>
>  #include <sbi/sbi_string.h>
>
> +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> +
>  u32 sbi_scratch_hart_count;
>  u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U };
>  struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
> @@ -21,6 +23,19 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS];
>  static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
>  static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
>
> +static unsigned long sbi_get_scratch_alloc_align(void)
> +{
> +       const struct sbi_platform *plat;
> +       /*
> +        * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> +        * or riscv,cbom_block_size
> +        */
> +       plat = sbi_platform_thishart_ptr();
> +       if (!plat || !plat->cbom_block_size)
> +               return DEFAULT_SCRATCH_ALLOC_ALIGN;
> +       return plat->cbom_block_size;
> +}
> +
>  u32 sbi_hartid_to_hartindex(u32 hartid)
>  {
>         sbi_for_each_hartindex(i)
> @@ -57,6 +72,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>         void *ptr;
>         unsigned long ret = 0;
>         struct sbi_scratch *rscratch;
> +       unsigned long scratch_alloc_align = 0;
>
>         /*
>          * We have a simple brain-dead allocator which never expects
> @@ -70,8 +86,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
>         if (!size)
>                 return 0;
>
> -       size += __SIZEOF_POINTER__ - 1;
> -       size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> +       scratch_alloc_align = sbi_get_scratch_alloc_align();
> +
> +       /*
> +        * We let the allocation align to cacheline bytes to avoid livelock on
> +        * certain platforms due to atomic variables from the same cache line.
> +        */
> +       size += scratch_alloc_align - 1;
> +       size &= ~(scratch_alloc_align - 1);
>
>         spin_lock(&extra_lock);
>
> diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> index 79e59dd..b2d91fd 100644
> --- a/lib/utils/fdt/fdt_helper.c
> +++ b/lib/utils/fdt/fdt_helper.c
> @@ -246,6 +246,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
>         return 0;
>  }
>
> +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size)
> +{
> +       int len;
> +       const void *prop;
> +       const fdt32_t *val;
> +
> +       if (!fdt || cpu_offset < 0)
> +               return SBI_EINVAL;
> +
> +       prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> +       if (!prop || !len)
> +               return SBI_EINVAL;
> +       if (strncmp (prop, "cpu", strlen ("cpu")))
> +               return SBI_EINVAL;
> +
> +       val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> +       if (!val || len < sizeof(fdt32_t))
> +               return SBI_EINVAL;
> +
> +       if (cbom_block_size)
> +               *cbom_block_size = fdt32_to_cpu(*val);
> +       return 0;
> +}
> +
>  int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
>  {
>         u32 hartid;
> diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> index f3072be..2b1b949 100644
> --- a/platform/generic/platform.c
> +++ b/platform/generic/platform.c
> @@ -147,6 +147,9 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>         const void *fdt = (void *)arg1;
>         u32 hartid, hart_count = 0;
>         int rc, root_offset, cpus_offset, cpu_offset, len;
> +       unsigned long cbom_block_size = 0;
> +       unsigned long tmp = 0;
> +
>
>         root_offset = fdt_path_offset(fdt, "/");
>         if (root_offset < 0)
> @@ -174,11 +177,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
>                         continue;
>
>                 generic_hart_index2id[hart_count++] = hartid;
> +
> +               rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp);
> +               if (rc)
> +                       continue;
> +               cbom_block_size = MAX(tmp, cbom_block_size);
>         }
>
>         platform.hart_count = hart_count;
>         platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
>         platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> +       platform.cbom_block_size = cbom_block_size;
>
>         fw_platform_coldboot_harts_init(fdt);
>
> --
> 2.43.0
>
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-04-24  5:47 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20250127202017.1043240-1-Raj.Vishwanathan>
2025-03-18 18:51 ` [PATCH v4] Set the scratch allocation to alignment to cacheline size Raj Vishwanathan
2025-03-19  1:19   ` Xiang W
2025-03-19 11:59   ` Andrew Jones
2025-03-21  5:46     ` Raj Vishwanathan
2025-03-21  7:50       ` Andrew Jones
2025-03-21 17:42         ` Raj Vishwanathan
2025-03-25  2:46   ` Samuel Holland
     [not found]   ` <20250326192730.587788-1-Raj.Vishwanathan@gmail.com>
2025-04-21 22:13     ` [PATCH v5] " Raj Vishwanathan
2025-04-22  4:41     ` Anup Patel
2025-04-23 12:58       ` Anup Patel
2025-04-23 22:50   ` [PATCH v6] lib: sbi: " Raj Vishwanathan
2025-04-24  1:17     ` Samuel Holland
2025-04-24  5:08       ` Raj Vishwanathan
2025-04-24  5:46     ` Anup Patel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.