* [PATCH 1/9] riscv: Annotate unaligned access init functions
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-13 12:59 ` Alexandre Ghiti
2025-02-07 16:19 ` [PATCH 2/9] riscv: Fix riscv_online_cpu_vec Andrew Jones
` (8 subsequent siblings)
9 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
Several functions used in unaligned access probing are only run at
init time. Annotate them appropriately.
Fixes: f413aae96cda ("riscv: Set unaligned access speed at compile time")
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/include/asm/cpufeature.h | 4 ++--
arch/riscv/kernel/traps_misaligned.c | 8 ++++----
arch/riscv/kernel/unaligned_access_speed.c | 14 +++++++-------
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 569140d6e639..19defdc2002d 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -63,7 +63,7 @@ void __init riscv_user_isa_enable(void);
#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
-bool check_unaligned_access_emulated_all_cpus(void);
+bool __init check_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void);
@@ -76,7 +76,7 @@ static inline bool unaligned_ctl_available(void)
}
#endif
-bool check_vector_unaligned_access_emulated_all_cpus(void);
+bool __init check_vector_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
DECLARE_PER_CPU(long, vector_misaligned_access);
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 7cc108aed74e..aacbd9d7196e 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -605,7 +605,7 @@ void check_vector_unaligned_access_emulated(struct work_struct *work __always_un
kernel_vector_end();
}
-bool check_vector_unaligned_access_emulated_all_cpus(void)
+bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;
@@ -625,7 +625,7 @@ bool check_vector_unaligned_access_emulated_all_cpus(void)
return true;
}
#else
-bool check_vector_unaligned_access_emulated_all_cpus(void)
+bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
return false;
}
@@ -659,7 +659,7 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
}
}
-bool check_unaligned_access_emulated_all_cpus(void)
+bool __init check_unaligned_access_emulated_all_cpus(void)
{
int cpu;
@@ -684,7 +684,7 @@ bool unaligned_ctl_available(void)
return unaligned_ctl;
}
#else
-bool check_unaligned_access_emulated_all_cpus(void)
+bool __init check_unaligned_access_emulated_all_cpus(void)
{
return false;
}
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 91f189cf1611..b7a8ff7ba6df 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -121,7 +121,7 @@ static int check_unaligned_access(void *param)
return 0;
}
-static void check_unaligned_access_nonboot_cpu(void *param)
+static void __init check_unaligned_access_nonboot_cpu(void *param)
{
unsigned int cpu = smp_processor_id();
struct page **pages = param;
@@ -175,7 +175,7 @@ static void set_unaligned_access_static_branches(void)
modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}
-static int lock_and_set_unaligned_access_static_branch(void)
+static int __init lock_and_set_unaligned_access_static_branch(void)
{
cpus_read_lock();
set_unaligned_access_static_branches();
@@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu)
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int check_unaligned_access_speed_all_cpus(void)
+static int __init check_unaligned_access_speed_all_cpus(void)
{
unsigned int cpu;
unsigned int cpu_count = num_possible_cpus();
@@ -264,7 +264,7 @@ static int check_unaligned_access_speed_all_cpus(void)
return 0;
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
-static int check_unaligned_access_speed_all_cpus(void)
+static int __init check_unaligned_access_speed_all_cpus(void)
{
return 0;
}
@@ -379,7 +379,7 @@ static int riscv_online_cpu_vec(unsigned int cpu)
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
schedule_on_each_cpu(check_vector_unaligned_access);
@@ -393,13 +393,13 @@ static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unuse
return 0;
}
#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
-static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
return 0;
}
#endif
-static int check_unaligned_access_all_cpus(void)
+static int __init check_unaligned_access_all_cpus(void)
{
bool all_cpus_emulated, all_cpus_vec_unsupported;
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 1/9] riscv: Annotate unaligned access init functions
2025-02-07 16:19 ` [PATCH 1/9] riscv: Annotate unaligned access init functions Andrew Jones
@ 2025-02-13 12:59 ` Alexandre Ghiti
0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 12:59 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
Hi Drew,
On 07/02/2025 17:19, Andrew Jones wrote:
> Several functions used in unaligned access probing are only run at
> init time. Annotate them appropriately.
>
> Fixes: f413aae96cda ("riscv: Set unaligned access speed at compile time")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/include/asm/cpufeature.h | 4 ++--
> arch/riscv/kernel/traps_misaligned.c | 8 ++++----
> arch/riscv/kernel/unaligned_access_speed.c | 14 +++++++-------
> 3 files changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index 569140d6e639..19defdc2002d 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -63,7 +63,7 @@ void __init riscv_user_isa_enable(void);
> #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
> _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
>
> -bool check_unaligned_access_emulated_all_cpus(void);
> +bool __init check_unaligned_access_emulated_all_cpus(void);
> #if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
> void check_unaligned_access_emulated(struct work_struct *work __always_unused);
> void unaligned_emulation_finish(void);
> @@ -76,7 +76,7 @@ static inline bool unaligned_ctl_available(void)
> }
> #endif
>
> -bool check_vector_unaligned_access_emulated_all_cpus(void);
> +bool __init check_vector_unaligned_access_emulated_all_cpus(void);
> #if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
> void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
> DECLARE_PER_CPU(long, vector_misaligned_access);
> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> index 7cc108aed74e..aacbd9d7196e 100644
> --- a/arch/riscv/kernel/traps_misaligned.c
> +++ b/arch/riscv/kernel/traps_misaligned.c
> @@ -605,7 +605,7 @@ void check_vector_unaligned_access_emulated(struct work_struct *work __always_un
> kernel_vector_end();
> }
>
> -bool check_vector_unaligned_access_emulated_all_cpus(void)
> +bool __init check_vector_unaligned_access_emulated_all_cpus(void)
> {
> int cpu;
>
> @@ -625,7 +625,7 @@ bool check_vector_unaligned_access_emulated_all_cpus(void)
> return true;
> }
> #else
> -bool check_vector_unaligned_access_emulated_all_cpus(void)
> +bool __init check_vector_unaligned_access_emulated_all_cpus(void)
> {
> return false;
> }
> @@ -659,7 +659,7 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
> }
> }
>
> -bool check_unaligned_access_emulated_all_cpus(void)
> +bool __init check_unaligned_access_emulated_all_cpus(void)
> {
> int cpu;
>
> @@ -684,7 +684,7 @@ bool unaligned_ctl_available(void)
> return unaligned_ctl;
> }
> #else
> -bool check_unaligned_access_emulated_all_cpus(void)
> +bool __init check_unaligned_access_emulated_all_cpus(void)
> {
> return false;
> }
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 91f189cf1611..b7a8ff7ba6df 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -121,7 +121,7 @@ static int check_unaligned_access(void *param)
> return 0;
> }
>
> -static void check_unaligned_access_nonboot_cpu(void *param)
> +static void __init check_unaligned_access_nonboot_cpu(void *param)
> {
> unsigned int cpu = smp_processor_id();
> struct page **pages = param;
> @@ -175,7 +175,7 @@ static void set_unaligned_access_static_branches(void)
> modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
> }
>
> -static int lock_and_set_unaligned_access_static_branch(void)
> +static int __init lock_and_set_unaligned_access_static_branch(void)
> {
> cpus_read_lock();
> set_unaligned_access_static_branches();
> @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu)
> }
>
> /* Measure unaligned access speed on all CPUs present at boot in parallel. */
> -static int check_unaligned_access_speed_all_cpus(void)
> +static int __init check_unaligned_access_speed_all_cpus(void)
> {
> unsigned int cpu;
> unsigned int cpu_count = num_possible_cpus();
> @@ -264,7 +264,7 @@ static int check_unaligned_access_speed_all_cpus(void)
> return 0;
> }
> #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> -static int check_unaligned_access_speed_all_cpus(void)
> +static int __init check_unaligned_access_speed_all_cpus(void)
> {
> return 0;
> }
> @@ -379,7 +379,7 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> }
>
> /* Measure unaligned access speed on all CPUs present at boot in parallel. */
> -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> {
> schedule_on_each_cpu(check_vector_unaligned_access);
>
> @@ -393,13 +393,13 @@ static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unuse
> return 0;
> }
> #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> {
> return 0;
> }
> #endif
>
> -static int check_unaligned_access_all_cpus(void)
> +static int __init check_unaligned_access_all_cpus(void)
> {
> bool all_cpus_emulated, all_cpus_vec_unsupported;
>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 2/9] riscv: Fix riscv_online_cpu_vec
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
2025-02-07 16:19 ` [PATCH 1/9] riscv: Annotate unaligned access init functions Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-07 16:47 ` Clément Léger
2025-02-13 13:02 ` Alexandre Ghiti
2025-02-07 16:19 ` [PATCH 3/9] riscv: Fix check_unaligned_access_all_cpus Andrew Jones
` (7 subsequent siblings)
9 siblings, 2 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
We shouldn't probe when we already know vector is unsupported and
we should probe when we see we don't yet know whether it's supported.
Furthermore, we should ensure we've set the access type to
unsupported when we don't have vector at all.
Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/unaligned_access_speed.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index b7a8ff7ba6df..161964cf2abc 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
static int riscv_online_cpu_vec(unsigned int cpu)
{
- if (!has_vector())
+ if (!has_vector()) {
+ per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
return 0;
+ }
- if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
return 0;
check_vector_unaligned_access_emulated(NULL);
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 2/9] riscv: Fix riscv_online_cpu_vec
2025-02-07 16:19 ` [PATCH 2/9] riscv: Fix riscv_online_cpu_vec Andrew Jones
@ 2025-02-07 16:47 ` Clément Léger
2025-02-07 17:08 ` Andrew Jones
2025-02-13 13:02 ` Alexandre Ghiti
1 sibling, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-07 16:47 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> We shouldn't probe when we already know vector is unsupported and
> we should probe when we see we don't yet know whether it's supported.
> Furthermore, we should ensure we've set the access type to
> unsupported when we don't have vector at all.
>
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index b7a8ff7ba6df..161964cf2abc 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
>
> static int riscv_online_cpu_vec(unsigned int cpu)
> {
> - if (!has_vector())
> + if (!has_vector()) {
> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> return 0;
> + }
>
> - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
> + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> return 0;
>
> check_vector_unaligned_access_emulated(NULL);
Hi Andrew,
Wouldn't it be easier just not to register the hotplug callback in case
!has_vector() ? In which case just set all possible cpus
vector_misaligned_access to RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED
at startup.
Thanks,
Clément
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 2/9] riscv: Fix riscv_online_cpu_vec
2025-02-07 16:47 ` Clément Léger
@ 2025-02-07 17:08 ` Andrew Jones
2025-02-07 17:43 ` Clément Léger
0 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 17:08 UTC (permalink / raw)
To: Clément Léger
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse,
Anup Patel
On Fri, Feb 07, 2025 at 05:47:28PM +0100, Clément Léger wrote:
>
>
> On 07/02/2025 17:19, Andrew Jones wrote:
> > We shouldn't probe when we already know vector is unsupported and
> > we should probe when we see we don't yet know whether it's supported.
> > Furthermore, we should ensure we've set the access type to
> > unsupported when we don't have vector at all.
> >
> > Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> > arch/riscv/kernel/unaligned_access_speed.c | 6 ++++--
> > 1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> > index b7a8ff7ba6df..161964cf2abc 100644
> > --- a/arch/riscv/kernel/unaligned_access_speed.c
> > +++ b/arch/riscv/kernel/unaligned_access_speed.c
> > @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
> >
> > static int riscv_online_cpu_vec(unsigned int cpu)
> > {
> > - if (!has_vector())
> > + if (!has_vector()) {
> > + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> > return 0;
> > + }
> >
> > - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
> > + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> > return 0;
> >
> > check_vector_unaligned_access_emulated(NULL);
>
> Hi Andrew,
>
> Wouldn't it be easier just not to register the hotplug callback in case
> !has_vector() ? In which case just set all possible cpus
> vector_misaligned_access to RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED
> at startup.
We could do that, but I have another use for the hotplug callback that
you'll see near the end of the series.
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 2/9] riscv: Fix riscv_online_cpu_vec
2025-02-07 17:08 ` Andrew Jones
@ 2025-02-07 17:43 ` Clément Léger
2025-02-07 18:08 ` Andrew Jones
0 siblings, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-07 17:43 UTC (permalink / raw)
To: Andrew Jones
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse,
Anup Patel
On 07/02/2025 18:08, Andrew Jones wrote:
> On Fri, Feb 07, 2025 at 05:47:28PM +0100, Clément Léger wrote:
>>
>>
>> On 07/02/2025 17:19, Andrew Jones wrote:
>>> We shouldn't probe when we already know vector is unsupported and
>>> we should probe when we see we don't yet know whether it's supported.
>>> Furthermore, we should ensure we've set the access type to
>>> unsupported when we don't have vector at all.
>>>
>>> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
>>> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
>>> ---
>>> arch/riscv/kernel/unaligned_access_speed.c | 6 ++++--
>>> 1 file changed, 4 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
>>> index b7a8ff7ba6df..161964cf2abc 100644
>>> --- a/arch/riscv/kernel/unaligned_access_speed.c
>>> +++ b/arch/riscv/kernel/unaligned_access_speed.c
>>> @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
>>>
>>> static int riscv_online_cpu_vec(unsigned int cpu)
>>> {
>>> - if (!has_vector())
>>> + if (!has_vector()) {
>>> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
>>> return 0;
>>> + }
>>>
>>> - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
>>> + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
>>> return 0;
>>>
>>> check_vector_unaligned_access_emulated(NULL);
>>
>> Hi Andrew,
>>
>> Wouldn't it be easier just not to register the hotplug callback in case
>> !has_vector() ? In which case just set all possible cpus
>> vector_misaligned_access to RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED
>> at startup.
>
> We could do that, but I have another use for the hotplug callback that
> you'll see near the end of the series.
Ok, I saw patch 7/9.
BTW, what happened to patch 8 ? I thought it was in my spam but even
lore.kernel.org does not have it:
https://lore.kernel.org/linux-riscv/20250207161939.46139-11-ajones@ventanamicro.com/T/#t
Thanks,
Clément
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 2/9] riscv: Fix riscv_online_cpu_vec
2025-02-07 17:43 ` Clément Léger
@ 2025-02-07 18:08 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 18:08 UTC (permalink / raw)
To: Clément Léger
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse,
Anup Patel
On Fri, Feb 07, 2025 at 06:43:26PM +0100, Clément Léger wrote:
>
>
> On 07/02/2025 18:08, Andrew Jones wrote:
> > On Fri, Feb 07, 2025 at 05:47:28PM +0100, Clément Léger wrote:
> >>
> >>
> >> On 07/02/2025 17:19, Andrew Jones wrote:
> >>> We shouldn't probe when we already know vector is unsupported and
> >>> we should probe when we see we don't yet know whether it's supported.
> >>> Furthermore, we should ensure we've set the access type to
> >>> unsupported when we don't have vector at all.
> >>>
> >>> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> >>> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> >>> ---
> >>> arch/riscv/kernel/unaligned_access_speed.c | 6 ++++--
> >>> 1 file changed, 4 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> >>> index b7a8ff7ba6df..161964cf2abc 100644
> >>> --- a/arch/riscv/kernel/unaligned_access_speed.c
> >>> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> >>> @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
> >>>
> >>> static int riscv_online_cpu_vec(unsigned int cpu)
> >>> {
> >>> - if (!has_vector())
> >>> + if (!has_vector()) {
> >>> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> >>> return 0;
> >>> + }
> >>>
> >>> - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
> >>> + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> >>> return 0;
> >>>
> >>> check_vector_unaligned_access_emulated(NULL);
> >>
> >> Hi Andrew,
> >>
> >> Wouldn't it be easier just not to register the hotplug callback in case
> >> !has_vector() ? In which case just set all possible cpus
> >> vector_misaligned_access to RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED
> >> at startup.
> >
> > We could do that, but I have another use for the hotplug callback that
> > you'll see near the end of the series.
>
> Ok, I saw patch 7/9.
>
> BTW, what happened to patch 8 ? I thought it was in my spam but even
> lore.kernel.org does not have it:
> https://lore.kernel.org/linux-riscv/20250207161939.46139-11-ajones@ventanamicro.com/T/#t
I see it on lore for lkml, so linux-riscv dropped it for no good reason...
I'll send patch 8 again (this time with feeling) just to linux-riscv and
we'll see if gets through.
https://lore.kernel.org/all/20250207161939.46139-19-ajones@ventanamicro.com/
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 2/9] riscv: Fix riscv_online_cpu_vec
2025-02-07 16:19 ` [PATCH 2/9] riscv: Fix riscv_online_cpu_vec Andrew Jones
2025-02-07 16:47 ` Clément Léger
@ 2025-02-13 13:02 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 13:02 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> We shouldn't probe when we already know vector is unsupported and
> we should probe when we see we don't yet know whether it's supported.
> Furthermore, we should ensure we've set the access type to
> unsupported when we don't have vector at all.
>
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index b7a8ff7ba6df..161964cf2abc 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
>
> static int riscv_online_cpu_vec(unsigned int cpu)
> {
> - if (!has_vector())
> + if (!has_vector()) {
> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> return 0;
> + }
>
> - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
> + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> return 0;
>
> check_vector_unaligned_access_emulated(NULL);
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 3/9] riscv: Fix check_unaligned_access_all_cpus
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
2025-02-07 16:19 ` [PATCH 1/9] riscv: Annotate unaligned access init functions Andrew Jones
2025-02-07 16:19 ` [PATCH 2/9] riscv: Fix riscv_online_cpu_vec Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-13 13:12 ` Alexandre Ghiti
2025-02-07 16:19 ` [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void Andrew Jones
` (6 subsequent siblings)
9 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
check_vector_unaligned_access_emulated_all_cpus(), like its name
suggests, will return true when all cpus emulate unaligned vector
accesses. If the function returned false it may have been because
vector isn't supported at all (!has_vector()) or because at least
one cpu doesn't emulate unaligned vector accesses. Since false may
be returned for two cases, checking for it isn't sufficient when
attempting to determine if we should proceed with the vector speed
check. Move the !has_vector() functionality to
check_unaligned_access_all_cpus() in order for
check_vector_unaligned_access_emulated_all_cpus() to return false
for a single case.
Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/traps_misaligned.c | 6 ------
arch/riscv/kernel/unaligned_access_speed.c | 11 +++++++----
2 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index aacbd9d7196e..4354c87c0376 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -609,12 +609,6 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;
- if (!has_vector()) {
- for_each_online_cpu(cpu)
- per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
- return false;
- }
-
schedule_on_each_cpu(check_vector_unaligned_access_emulated);
for_each_online_cpu(cpu)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 161964cf2abc..02b485dc4bc4 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -403,13 +403,16 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
static int __init check_unaligned_access_all_cpus(void)
{
- bool all_cpus_emulated, all_cpus_vec_unsupported;
+ bool all_cpus_emulated;
+ int cpu;
all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
- all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus();
- if (!all_cpus_vec_unsupported &&
- IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
+ if (!has_vector()) {
+ for_each_online_cpu(cpu)
+ per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+ } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
+ IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
kthread_run(vec_check_unaligned_access_speed_all_cpus,
NULL, "vec_check_unaligned_access_speed_all_cpus");
}
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 3/9] riscv: Fix check_unaligned_access_all_cpus
2025-02-07 16:19 ` [PATCH 3/9] riscv: Fix check_unaligned_access_all_cpus Andrew Jones
@ 2025-02-13 13:12 ` Alexandre Ghiti
0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 13:12 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> check_vector_unaligned_access_emulated_all_cpus(), like its name
> suggests, will return true when all cpus emulate unaligned vector
> accesses. If the function returned false it may have been because
> vector isn't supported at all (!has_vector()) or because at least
> one cpu doesn't emulate unaligned vector accesses. Since false may
> be returned for two cases, checking for it isn't sufficient when
> attempting to determine if we should proceed with the vector speed
> check. Move the !has_vector() functionality to
> check_unaligned_access_all_cpus() in order for
> check_vector_unaligned_access_emulated_all_cpus() to return false
> for a single case.
>
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/traps_misaligned.c | 6 ------
> arch/riscv/kernel/unaligned_access_speed.c | 11 +++++++----
> 2 files changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> index aacbd9d7196e..4354c87c0376 100644
> --- a/arch/riscv/kernel/traps_misaligned.c
> +++ b/arch/riscv/kernel/traps_misaligned.c
> @@ -609,12 +609,6 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void)
> {
> int cpu;
>
> - if (!has_vector()) {
> - for_each_online_cpu(cpu)
> - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> - return false;
> - }
> -
> schedule_on_each_cpu(check_vector_unaligned_access_emulated);
>
> for_each_online_cpu(cpu)
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 161964cf2abc..02b485dc4bc4 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -403,13 +403,16 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>
> static int __init check_unaligned_access_all_cpus(void)
> {
> - bool all_cpus_emulated, all_cpus_vec_unsupported;
> + bool all_cpus_emulated;
> + int cpu;
>
> all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
> - all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus();
>
> - if (!all_cpus_vec_unsupported &&
> - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> + if (!has_vector()) {
> + for_each_online_cpu(cpu)
> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> + } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
> + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> kthread_run(vec_check_unaligned_access_speed_all_cpus,
> NULL, "vec_check_unaligned_access_speed_all_cpus");
> }
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (2 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 3/9] riscv: Fix check_unaligned_access_all_cpus Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-07 16:42 ` Clément Léger
2025-02-13 13:15 ` Alexandre Ghiti
2025-02-07 16:19 ` [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks Andrew Jones
` (5 subsequent siblings)
9 siblings, 2 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
The return value of check_unaligned_access_speed_all_cpus() is always
zero, so make the function void so we don't need to concern ourselves
with it. The change also allows us to tidy up
check_unaligned_access_all_cpus() a bit.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/unaligned_access_speed.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 02b485dc4bc4..780f1c5f512a 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu)
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int __init check_unaligned_access_speed_all_cpus(void)
+static void __init check_unaligned_access_speed_all_cpus(void)
{
unsigned int cpu;
unsigned int cpu_count = num_possible_cpus();
@@ -226,7 +226,7 @@ static int __init check_unaligned_access_speed_all_cpus(void)
if (!bufs) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
- return 0;
+ return;
}
/*
@@ -261,12 +261,10 @@ static int __init check_unaligned_access_speed_all_cpus(void)
}
kfree(bufs);
- return 0;
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
-static int __init check_unaligned_access_speed_all_cpus(void)
+static void __init check_unaligned_access_speed_all_cpus(void)
{
- return 0;
}
#endif
@@ -403,10 +401,10 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
static int __init check_unaligned_access_all_cpus(void)
{
- bool all_cpus_emulated;
int cpu;
- all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+ if (!check_unaligned_access_emulated_all_cpus())
+ check_unaligned_access_speed_all_cpus();
if (!has_vector()) {
for_each_online_cpu(cpu)
@@ -417,9 +415,6 @@ static int __init check_unaligned_access_all_cpus(void)
NULL, "vec_check_unaligned_access_speed_all_cpus");
}
- if (!all_cpus_emulated)
- return check_unaligned_access_speed_all_cpus();
-
return 0;
}
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void
2025-02-07 16:19 ` [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void Andrew Jones
@ 2025-02-07 16:42 ` Clément Léger
2025-02-13 13:15 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Clément Léger @ 2025-02-07 16:42 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> The return value of check_unaligned_access_speed_all_cpus() is always
> zero, so make the function void so we don't need to concern ourselves
> with it. The change also allows us to tidy up
> check_unaligned_access_all_cpus() a bit.
>
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 15 +++++----------
> 1 file changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 02b485dc4bc4..780f1c5f512a 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu)
> }
>
> /* Measure unaligned access speed on all CPUs present at boot in parallel. */
> -static int __init check_unaligned_access_speed_all_cpus(void)
> +static void __init check_unaligned_access_speed_all_cpus(void)
> {
> unsigned int cpu;
> unsigned int cpu_count = num_possible_cpus();
> @@ -226,7 +226,7 @@ static int __init check_unaligned_access_speed_all_cpus(void)
>
> if (!bufs) {
> pr_warn("Allocation failure, not measuring misaligned performance\n");
> - return 0;
> + return;
> }
>
> /*
> @@ -261,12 +261,10 @@ static int __init check_unaligned_access_speed_all_cpus(void)
> }
>
> kfree(bufs);
> - return 0;
> }
> #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> -static int __init check_unaligned_access_speed_all_cpus(void)
> +static void __init check_unaligned_access_speed_all_cpus(void)
> {
> - return 0;
> }
> #endif
>
> @@ -403,10 +401,10 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>
> static int __init check_unaligned_access_all_cpus(void)
> {
> - bool all_cpus_emulated;
> int cpu;
>
> - all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
> + if (!check_unaligned_access_emulated_all_cpus())
> + check_unaligned_access_speed_all_cpus();
>
> if (!has_vector()) {
> for_each_online_cpu(cpu)
> @@ -417,9 +415,6 @@ static int __init check_unaligned_access_all_cpus(void)
> NULL, "vec_check_unaligned_access_speed_all_cpus");
> }
>
> - if (!all_cpus_emulated)
> - return check_unaligned_access_speed_all_cpus();
> -
> return 0;
> }
>
Hi Andrew,
I had a similar patch in an upcoming series but you were faster !
Reviewed-by: Clément Léger <cleger@rivosinc.com>
Thanks,
Clément
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void
2025-02-07 16:19 ` [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void Andrew Jones
2025-02-07 16:42 ` Clément Léger
@ 2025-02-13 13:15 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 13:15 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> The return value of check_unaligned_access_speed_all_cpus() is always
> zero, so make the function void so we don't need to concern ourselves
> with it. The change also allows us to tidy up
> check_unaligned_access_all_cpus() a bit.
>
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 15 +++++----------
> 1 file changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 02b485dc4bc4..780f1c5f512a 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu)
> }
>
> /* Measure unaligned access speed on all CPUs present at boot in parallel. */
> -static int __init check_unaligned_access_speed_all_cpus(void)
> +static void __init check_unaligned_access_speed_all_cpus(void)
> {
> unsigned int cpu;
> unsigned int cpu_count = num_possible_cpus();
> @@ -226,7 +226,7 @@ static int __init check_unaligned_access_speed_all_cpus(void)
>
> if (!bufs) {
> pr_warn("Allocation failure, not measuring misaligned performance\n");
> - return 0;
> + return;
> }
>
> /*
> @@ -261,12 +261,10 @@ static int __init check_unaligned_access_speed_all_cpus(void)
> }
>
> kfree(bufs);
> - return 0;
> }
> #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> -static int __init check_unaligned_access_speed_all_cpus(void)
> +static void __init check_unaligned_access_speed_all_cpus(void)
> {
> - return 0;
> }
> #endif
>
> @@ -403,10 +401,10 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>
> static int __init check_unaligned_access_all_cpus(void)
> {
> - bool all_cpus_emulated;
> int cpu;
>
> - all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
> + if (!check_unaligned_access_emulated_all_cpus())
> + check_unaligned_access_speed_all_cpus();
>
> if (!has_vector()) {
> for_each_online_cpu(cpu)
> @@ -417,9 +415,6 @@ static int __init check_unaligned_access_all_cpus(void)
> NULL, "vec_check_unaligned_access_speed_all_cpus");
> }
>
> - if (!all_cpus_emulated)
> - return check_unaligned_access_speed_all_cpus();
> -
> return 0;
> }
>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (3 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 4/9] riscv: Change check_unaligned_access_speed_all_cpus to void Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-07 16:44 ` Clément Léger
` (2 more replies)
2025-02-07 16:19 ` [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback Andrew Jones
` (4 subsequent siblings)
9 siblings, 3 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
CPU hotplug callbacks should be set up even if we detected all
current cpus emulate misaligned accesses, since we want to
ensure our expectations of all cpus emulating is maintained.
Fixes: 6e5ce7f2eae3 ("riscv: Decouple emulated unaligned accesses from access speed")
Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/unaligned_access_speed.c | 27 +++++++++++-----------
1 file changed, 13 insertions(+), 14 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 780f1c5f512a..c9d3237649bb 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -247,13 +247,6 @@ static void __init check_unaligned_access_speed_all_cpus(void)
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
- /*
- * Setup hotplug callbacks for any new CPUs that come online or go
- * offline.
- */
- cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
- riscv_online_cpu, riscv_offline_cpu);
-
out:
for_each_cpu(cpu, cpu_online_mask) {
if (bufs[cpu])
@@ -383,13 +376,6 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
{
schedule_on_each_cpu(check_vector_unaligned_access);
- /*
- * Setup hotplug callbacks for any new CPUs that come online or go
- * offline.
- */
- cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
- riscv_online_cpu_vec, NULL);
-
return 0;
}
#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
@@ -415,6 +401,19 @@ static int __init check_unaligned_access_all_cpus(void)
NULL, "vec_check_unaligned_access_speed_all_cpus");
}
+ /*
+ * Setup hotplug callbacks for any new CPUs that come online or go
+ * offline.
+ */
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu, riscv_offline_cpu);
+#endif
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu_vec, NULL);
+#endif
+
return 0;
}
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks
2025-02-07 16:19 ` [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks Andrew Jones
@ 2025-02-07 16:44 ` Clément Léger
2025-02-13 13:25 ` Alexandre Ghiti
2025-02-13 13:33 ` Alexandre Ghiti
2 siblings, 0 replies; 47+ messages in thread
From: Clément Léger @ 2025-02-07 16:44 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> CPU hotplug callbacks should be set up even if we detected all
> current cpus emulate misaligned accesses, since we want to
> ensure our expectations of all cpus emulating is maintained.
>
> Fixes: 6e5ce7f2eae3 ("riscv: Decouple emulated unaligned accesses from access speed")
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 27 +++++++++++-----------
> 1 file changed, 13 insertions(+), 14 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 780f1c5f512a..c9d3237649bb 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -247,13 +247,6 @@ static void __init check_unaligned_access_speed_all_cpus(void)
> /* Check core 0. */
> smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
>
> - /*
> - * Setup hotplug callbacks for any new CPUs that come online or go
> - * offline.
> - */
> - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu, riscv_offline_cpu);
> -
> out:
> for_each_cpu(cpu, cpu_online_mask) {
> if (bufs[cpu])
> @@ -383,13 +376,6 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> {
> schedule_on_each_cpu(check_vector_unaligned_access);
>
> - /*
> - * Setup hotplug callbacks for any new CPUs that come online or go
> - * offline.
> - */
> - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu_vec, NULL);
> -
> return 0;
> }
> #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> @@ -415,6 +401,19 @@ static int __init check_unaligned_access_all_cpus(void)
> NULL, "vec_check_unaligned_access_speed_all_cpus");
> }
>
> + /*
> + * Setup hotplug callbacks for any new CPUs that come online or go
> + * offline.
> + */
> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
> + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> + riscv_online_cpu, riscv_offline_cpu);
> +#endif
> +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> + riscv_online_cpu_vec, NULL);
> +#endif
> +
> return 0;
> }
>
Hi Andrew,
Reviewed-by: Clément Léger <cleger@rivosinc.com>
Thanks,
Clément
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks
2025-02-07 16:19 ` [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks Andrew Jones
2025-02-07 16:44 ` Clément Léger
@ 2025-02-13 13:25 ` Alexandre Ghiti
2025-02-13 13:33 ` Alexandre Ghiti
2 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 13:25 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> CPU hotplug callbacks should be set up even if we detected all
> current cpus emulate misaligned accesses, since we want to
> ensure our expectations of all cpus emulating is maintained.
>
> Fixes: 6e5ce7f2eae3 ("riscv: Decouple emulated unaligned accesses from access speed")
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 27 +++++++++++-----------
> 1 file changed, 13 insertions(+), 14 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 780f1c5f512a..c9d3237649bb 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -247,13 +247,6 @@ static void __init check_unaligned_access_speed_all_cpus(void)
> /* Check core 0. */
> smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
>
> - /*
> - * Setup hotplug callbacks for any new CPUs that come online or go
> - * offline.
> - */
> - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu, riscv_offline_cpu);
> -
> out:
> for_each_cpu(cpu, cpu_online_mask) {
> if (bufs[cpu])
> @@ -383,13 +376,6 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> {
> schedule_on_each_cpu(check_vector_unaligned_access);
>
> - /*
> - * Setup hotplug callbacks for any new CPUs that come online or go
> - * offline.
> - */
> - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu_vec, NULL);
> -
> return 0;
> }
> #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> @@ -415,6 +401,19 @@ static int __init check_unaligned_access_all_cpus(void)
> NULL, "vec_check_unaligned_access_speed_all_cpus");
> }
>
> + /*
> + * Setup hotplug callbacks for any new CPUs that come online or go
> + * offline.
> + */
> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
> + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> + riscv_online_cpu, riscv_offline_cpu);
> +#endif
> +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> + riscv_online_cpu_vec, NULL);
> +#endif
> +
> return 0;
> }
>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks
2025-02-07 16:19 ` [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks Andrew Jones
2025-02-07 16:44 ` Clément Léger
2025-02-13 13:25 ` Alexandre Ghiti
@ 2025-02-13 13:33 ` Alexandre Ghiti
2 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 13:33 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> CPU hotplug callbacks should be set up even if we detected all
> current cpus emulate misaligned accesses, since we want to
> ensure our expectations of all cpus emulating is maintained.
>
> Fixes: 6e5ce7f2eae3 ("riscv: Decouple emulated unaligned accesses from access speed")
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 27 +++++++++++-----------
> 1 file changed, 13 insertions(+), 14 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index 780f1c5f512a..c9d3237649bb 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -247,13 +247,6 @@ static void __init check_unaligned_access_speed_all_cpus(void)
> /* Check core 0. */
> smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
>
> - /*
> - * Setup hotplug callbacks for any new CPUs that come online or go
> - * offline.
> - */
> - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu, riscv_offline_cpu);
> -
> out:
> for_each_cpu(cpu, cpu_online_mask) {
> if (bufs[cpu])
> @@ -383,13 +376,6 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> {
> schedule_on_each_cpu(check_vector_unaligned_access);
>
> - /*
> - * Setup hotplug callbacks for any new CPUs that come online or go
> - * offline.
> - */
> - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu_vec, NULL);
> -
> return 0;
> }
> #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> @@ -415,6 +401,19 @@ static int __init check_unaligned_access_all_cpus(void)
> NULL, "vec_check_unaligned_access_speed_all_cpus");
> }
>
> + /*
> + * Setup hotplug callbacks for any new CPUs that come online or go
> + * offline.
> + */
> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
> + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> + riscv_online_cpu, riscv_offline_cpu);
> +#endif
> +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> + riscv_online_cpu_vec, NULL);
> +#endif
> +
> return 0;
> }
>
This patch 5 does not apply without patch 4 that is not a fix which, in
theory, should not be sent into the next -rcX...I'd say it is not a
problem as patch 4 is small enough.
Just to say that maybe you'll be requested to rearrange the patchset!
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (4 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 5/9] riscv: Fix set up of cpu hotplug callbacks Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-07 17:36 ` Clément Léger
2025-02-13 13:28 ` Alexandre Ghiti
2025-02-07 16:19 ` [PATCH 7/9] riscv: Prepare for unaligned access type table lookups Andrew Jones
` (3 subsequent siblings)
9 siblings, 2 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
Whether or not we have RISCV_PROBE_VECTOR_UNALIGNED_ACCESS we need to
set up a cpu hotplug callback to check if we have vector at all,
since, when we don't have vector, we need to set
vector_misaligned_access to unsupported rather than leave it the
default of unknown.
Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/unaligned_access_speed.c | 31 +++++++++++-----------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index c9d3237649bb..d9d4ca1fadc7 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -356,6 +356,20 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
per_cpu(vector_misaligned_access, cpu) = speed;
}
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+{
+ schedule_on_each_cpu(check_vector_unaligned_access);
+
+ return 0;
+}
+#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
+static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+{
+ return 0;
+}
+#endif
+
static int riscv_online_cpu_vec(unsigned int cpu)
{
if (!has_vector()) {
@@ -363,27 +377,16 @@ static int riscv_online_cpu_vec(unsigned int cpu)
return 0;
}
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
return 0;
check_vector_unaligned_access_emulated(NULL);
check_vector_unaligned_access(NULL);
- return 0;
-}
-
-/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
-{
- schedule_on_each_cpu(check_vector_unaligned_access);
+#endif
return 0;
}
-#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
-{
- return 0;
-}
-#endif
static int __init check_unaligned_access_all_cpus(void)
{
@@ -409,10 +412,8 @@ static int __init check_unaligned_access_all_cpus(void)
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
#endif
-#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu_vec, NULL);
-#endif
return 0;
}
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback
2025-02-07 16:19 ` [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback Andrew Jones
@ 2025-02-07 17:36 ` Clément Léger
2025-02-07 18:15 ` Andrew Jones
2025-02-13 13:28 ` Alexandre Ghiti
1 sibling, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-07 17:36 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> Whether or not we have RISCV_PROBE_VECTOR_UNALIGNED_ACCESS we need to
> set up a cpu hotplug callback to check if we have vector at all,
> since, when we don't have vector, we need to set
> vector_misaligned_access to unsupported rather than leave it the
> default of unknown.
>
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 31 +++++++++++-----------
> 1 file changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index c9d3237649bb..d9d4ca1fadc7 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -356,6 +356,20 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
> per_cpu(vector_misaligned_access, cpu) = speed;
> }
>
> +/* Measure unaligned access speed on all CPUs present at boot in parallel. */
> +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> +{
> + schedule_on_each_cpu(check_vector_unaligned_access);
Hey Andrew,
While at it, could you add a comment stating that schedule_on_cpu()
(while documented as really slow) is used due to kernel_vector_begin()
needing interrupts to be enabled ? I stumbled upon that while reworking
misaligned.
Thanks,
Clément
> +
> + return 0;
> +}
> +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> +{
> + return 0;
> +}
> +#endif
> +
> static int riscv_online_cpu_vec(unsigned int cpu)
> {
> if (!has_vector()) {
> @@ -363,27 +377,16 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> return 0;
> }
>
> +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> return 0;
>
> check_vector_unaligned_access_emulated(NULL);
> check_vector_unaligned_access(NULL);
> - return 0;
> -}
> -
> -/* Measure unaligned access speed on all CPUs present at boot in parallel. */
> -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> -{
> - schedule_on_each_cpu(check_vector_unaligned_access);
> +#endif
>
> return 0;
> }
> -#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> -{
> - return 0;
> -}
> -#endif
>
> static int __init check_unaligned_access_all_cpus(void)
> {
> @@ -409,10 +412,8 @@ static int __init check_unaligned_access_all_cpus(void)
> cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> riscv_online_cpu, riscv_offline_cpu);
> #endif
> -#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> riscv_online_cpu_vec, NULL);
> -#endif
>
> return 0;
> }
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback
2025-02-07 17:36 ` Clément Léger
@ 2025-02-07 18:15 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 18:15 UTC (permalink / raw)
To: Clément Léger
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse,
Anup Patel
On Fri, Feb 07, 2025 at 06:36:28PM +0100, Clément Léger wrote:
>
>
> On 07/02/2025 17:19, Andrew Jones wrote:
> > Whether or not we have RISCV_PROBE_VECTOR_UNALIGNED_ACCESS we need to
> > set up a cpu hotplug callback to check if we have vector at all,
> > since, when we don't have vector, we need to set
> > vector_misaligned_access to unsupported rather than leave it the
> > default of unknown.
> >
> > Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> > arch/riscv/kernel/unaligned_access_speed.c | 31 +++++++++++-----------
> > 1 file changed, 16 insertions(+), 15 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> > index c9d3237649bb..d9d4ca1fadc7 100644
> > --- a/arch/riscv/kernel/unaligned_access_speed.c
> > +++ b/arch/riscv/kernel/unaligned_access_speed.c
> > @@ -356,6 +356,20 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
> > per_cpu(vector_misaligned_access, cpu) = speed;
> > }
> >
> > +/* Measure unaligned access speed on all CPUs present at boot in parallel. */
> > +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> > +{
> > + schedule_on_each_cpu(check_vector_unaligned_access);
> Hey Andrew,
>
> While at it, could you add a comment stating that schedule_on_cpu()
> (while documented as really slow) is used due to kernel_vector_begin()
> needing interrupts to be enabled ? I stumbled upon that while reworking
> misaligned.
That should be a separate patch, since this patch is mostly just moving
code (not even this function was "moved", but git-diff prefers to say it
was moved rather than what was actually moved...)
I guess the comment patch you suggest should go in your rework series.
Thanks,
drew
>
> Thanks,
>
> Clément
>
> > +
> > + return 0;
> > +}
> > +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> > +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> > +{
> > + return 0;
> > +}
> > +#endif
> > +
> > static int riscv_online_cpu_vec(unsigned int cpu)
> > {
> > if (!has_vector()) {
> > @@ -363,27 +377,16 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> > return 0;
> > }
> >
> > +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> > if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> > return 0;
> >
> > check_vector_unaligned_access_emulated(NULL);
> > check_vector_unaligned_access(NULL);
> > - return 0;
> > -}
> > -
> > -/* Measure unaligned access speed on all CPUs present at boot in parallel. */
> > -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> > -{
> > - schedule_on_each_cpu(check_vector_unaligned_access);
> > +#endif
> >
> > return 0;
> > }
> > -#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> > -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> > -{
> > - return 0;
> > -}
> > -#endif
> >
> > static int __init check_unaligned_access_all_cpus(void)
> > {
> > @@ -409,10 +412,8 @@ static int __init check_unaligned_access_all_cpus(void)
> > cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> > riscv_online_cpu, riscv_offline_cpu);
> > #endif
> > -#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> > cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> > riscv_online_cpu_vec, NULL);
> > -#endif
> >
> > return 0;
> > }
>
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback
2025-02-07 16:19 ` [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback Andrew Jones
2025-02-07 17:36 ` Clément Léger
@ 2025-02-13 13:28 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2025-02-13 13:28 UTC (permalink / raw)
To: Andrew Jones, linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
On 07/02/2025 17:19, Andrew Jones wrote:
> Whether or not we have RISCV_PROBE_VECTOR_UNALIGNED_ACCESS we need to
> set up a cpu hotplug callback to check if we have vector at all,
> since, when we don't have vector, we need to set
> vector_misaligned_access to unsupported rather than leave it the
> default of unknown.
>
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> arch/riscv/kernel/unaligned_access_speed.c | 31 +++++++++++-----------
> 1 file changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index c9d3237649bb..d9d4ca1fadc7 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -356,6 +356,20 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
> per_cpu(vector_misaligned_access, cpu) = speed;
> }
>
> +/* Measure unaligned access speed on all CPUs present at boot in parallel. */
> +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> +{
> + schedule_on_each_cpu(check_vector_unaligned_access);
> +
> + return 0;
> +}
> +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> +{
> + return 0;
> +}
> +#endif
> +
> static int riscv_online_cpu_vec(unsigned int cpu)
> {
> if (!has_vector()) {
> @@ -363,27 +377,16 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> return 0;
> }
>
> +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> return 0;
>
> check_vector_unaligned_access_emulated(NULL);
> check_vector_unaligned_access(NULL);
> - return 0;
> -}
> -
> -/* Measure unaligned access speed on all CPUs present at boot in parallel. */
> -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> -{
> - schedule_on_each_cpu(check_vector_unaligned_access);
> +#endif
>
> return 0;
> }
> -#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
> -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
> -{
> - return 0;
> -}
> -#endif
>
> static int __init check_unaligned_access_all_cpus(void)
> {
> @@ -409,10 +412,8 @@ static int __init check_unaligned_access_all_cpus(void)
> cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> riscv_online_cpu, riscv_offline_cpu);
> #endif
> -#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> riscv_online_cpu_vec, NULL);
> -#endif
>
> return 0;
> }
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Thanks,
Alex
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (5 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 6/9] riscv: Fix set up of vector cpu hotplug callback Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-08 1:22 ` Charlie Jenkins
2025-02-07 16:19 ` [PATCH 8/9] riscv: Implement check_unaligned_access_table Andrew Jones
` (2 subsequent siblings)
9 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
Probing unaligned accesses on boot is time consuming. Provide a
function which will be used to look up the access type in a table
by id registers. Vendors which provide table entries can then skip
the probing.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/unaligned_access_speed.c | 114 ++++++++++++---------
1 file changed, 66 insertions(+), 48 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index d9d4ca1fadc7..f8497097e79d 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -130,6 +130,50 @@ static void __init check_unaligned_access_nonboot_cpu(void *param)
check_unaligned_access(pages[cpu]);
}
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static void __init check_unaligned_access_speed_all_cpus(void)
+{
+ unsigned int cpu;
+ unsigned int cpu_count = num_possible_cpus();
+ struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
+
+ if (!bufs) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ return;
+ }
+
+ /*
+ * Allocate separate buffers for each CPU so there's no fighting over
+ * cache lines.
+ */
+ for_each_cpu(cpu, cpu_online_mask) {
+ bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!bufs[cpu]) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ goto out;
+ }
+ }
+
+ /* Check everybody except 0, who stays behind to tend jiffies. */
+ on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+ /* Check core 0. */
+ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+out:
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (bufs[cpu])
+ __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+ }
+
+ kfree(bufs);
+}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static void __init check_unaligned_access_speed_all_cpus(void)
+{
+}
+#endif
+
DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
@@ -186,8 +230,17 @@ static int __init lock_and_set_unaligned_access_static_branch(void)
arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+static bool check_unaligned_access_table(void)
+{
+ return false;
+}
+
static int riscv_online_cpu(unsigned int cpu)
{
+ if (check_unaligned_access_table())
+ goto exit;
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static struct page *buf;
/* We are already set since the last check */
@@ -203,6 +256,7 @@ static int riscv_online_cpu(unsigned int cpu)
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
+#endif
exit:
set_unaligned_access_static_branches();
@@ -217,50 +271,6 @@ static int riscv_offline_cpu(unsigned int cpu)
return 0;
}
-/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static void __init check_unaligned_access_speed_all_cpus(void)
-{
- unsigned int cpu;
- unsigned int cpu_count = num_possible_cpus();
- struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
-
- if (!bufs) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- return;
- }
-
- /*
- * Allocate separate buffers for each CPU so there's no fighting over
- * cache lines.
- */
- for_each_cpu(cpu, cpu_online_mask) {
- bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
- if (!bufs[cpu]) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- goto out;
- }
- }
-
- /* Check everybody except 0, who stays behind to tend jiffies. */
- on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
-
- /* Check core 0. */
- smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
-
-out:
- for_each_cpu(cpu, cpu_online_mask) {
- if (bufs[cpu])
- __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
- }
-
- kfree(bufs);
-}
-#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
-static void __init check_unaligned_access_speed_all_cpus(void)
-{
-}
-#endif
-
#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
static void check_vector_unaligned_access(struct work_struct *work __always_unused)
{
@@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
}
#endif
+static bool check_vector_unaligned_access_table(void)
+{
+ return false;
+}
+
static int riscv_online_cpu_vec(unsigned int cpu)
{
if (!has_vector()) {
@@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
return 0;
}
+ if (check_vector_unaligned_access_table())
+ return 0;
+
#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
return 0;
@@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
{
int cpu;
- if (!check_unaligned_access_emulated_all_cpus())
+ if (!check_unaligned_access_table() &&
+ !check_unaligned_access_emulated_all_cpus())
check_unaligned_access_speed_all_cpus();
if (!has_vector()) {
for_each_online_cpu(cpu)
per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
- } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
+ } else if (!check_vector_unaligned_access_table() &&
+ !check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
kthread_run(vec_check_unaligned_access_speed_all_cpus,
NULL, "vec_check_unaligned_access_speed_all_cpus");
@@ -408,10 +428,8 @@ static int __init check_unaligned_access_all_cpus(void)
* Setup hotplug callbacks for any new CPUs that come online or go
* offline.
*/
-#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
-#endif
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu_vec, NULL);
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-07 16:19 ` [PATCH 7/9] riscv: Prepare for unaligned access type table lookups Andrew Jones
@ 2025-02-08 1:22 ` Charlie Jenkins
2025-02-10 9:43 ` Andrew Jones
2025-02-10 10:16 ` Anup Patel
0 siblings, 2 replies; 47+ messages in thread
From: Charlie Jenkins @ 2025-02-08 1:22 UTC (permalink / raw)
To: Andrew Jones
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, jesse,
Anup Patel
On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> Probing unaligned accesses on boot is time consuming. Provide a
> function which will be used to look up the access type in a table
> by id registers. Vendors which provide table entries can then skip
> the probing.
The access checker in my experience is only time consuming on slow
hardware. Hardware that supports fast unaligned accesses isn't really
impacted by this? Avoiding a list of hardware that has slow/fast
unaligned accesses in the kernel was the main reason for dynamically
checking. We did introduce the config option to compile the kernel with
assumed slow/fast accesses, which of course has the downside of
recompiling the kernel and I assume that you already considered that.
Instead of having a table in the kernel, something that would be more
platform agnostic would be to have an extension that signals this
information. That seems like it would accomplish the same goal and
leverage the existing infrastructure in the kernel, albeit with the need
to make a new extension.
- Charlie
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-08 1:22 ` Charlie Jenkins
@ 2025-02-10 9:43 ` Andrew Jones
2025-02-10 17:10 ` Charlie Jenkins
2025-02-10 10:16 ` Anup Patel
1 sibling, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-10 9:43 UTC (permalink / raw)
To: Charlie Jenkins
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, jesse,
Anup Patel
On Fri, Feb 07, 2025 at 05:22:52PM -0800, Charlie Jenkins wrote:
> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> > Probing unaligned accesses on boot is time consuming. Provide a
> > function which will be used to look up the access type in a table
> > by id registers. Vendors which provide table entries can then skip
> > the probing.
>
> The access checker in my experience is only time consuming on slow
> hardware. Hardware that supports fast unaligned accesses isn't really
> impacted by this?
That's true, but...
> Avoiding a list of hardware that has slow/fast
> unaligned accesses in the kernel was the main reason for dynamically
> checking.
...I'm not sure why we should try to avoid determining hardware support
by its description when a description can be provided.
> We did introduce the config option to compile the kernel with
> assumed slow/fast accesses, which of course has the downside of
> recompiling the kernel and I assume that you already considered that.
yup
>
> Instead of having a table in the kernel, something that would be more
> platform agnostic would be to have an extension that signals this
> information. That seems like it would accomplish the same goal and
> leverage the existing infrastructure in the kernel, albeit with the need
> to make a new extension.
Yes, I agree that another profile "named feature" may be the best
approach. I'll consider proposing one, but [1] implies there may be
some resistance to creating something like that.
[1] https://github.com/riscv/riscv-isa-manual/issues/1611
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 9:43 ` Andrew Jones
@ 2025-02-10 17:10 ` Charlie Jenkins
0 siblings, 0 replies; 47+ messages in thread
From: Charlie Jenkins @ 2025-02-10 17:10 UTC (permalink / raw)
To: Andrew Jones
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, jesse,
Anup Patel
On Mon, Feb 10, 2025 at 10:43:18AM +0100, Andrew Jones wrote:
> On Fri, Feb 07, 2025 at 05:22:52PM -0800, Charlie Jenkins wrote:
> > On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> > > Probing unaligned accesses on boot is time consuming. Provide a
> > > function which will be used to look up the access type in a table
> > > by id registers. Vendors which provide table entries can then skip
> > > the probing.
> >
> > The access checker in my experience is only time consuming on slow
> > hardware. Hardware that supports fast unaligned accesses isn't really
> > impacted by this?
>
> That's true, but...
>
> > Avoiding a list of hardware that has slow/fast
> > unaligned accesses in the kernel was the main reason for dynamically
> > checking.
>
> ...I'm not sure why we should try to avoid determining hardware support
> by its description when a description can be provided.
I worry about scalability of this. This to me seems like a slippery
slope of hardcoding performance tables into the kernel. There are a lot
of riscv vendors and allowing anybody to add a table to the kernel to
dynamically change behavior specifically for their hardware could become
a maintainability nightmare. Avoiding this maintainability issue was the
motivation for the runtime checker.
>
> > We did introduce the config option to compile the kernel with
> > assumed slow/fast accesses, which of course has the downside of
> > recompiling the kernel and I assume that you already considered that.
>
> yup
>
> >
> > Instead of having a table in the kernel, something that would be more
> > platform agnostic would be to have an extension that signals this
> > information. That seems like it would accomplish the same goal and
> > leverage the existing infrastructure in the kernel, albeit with the need
> > to make a new extension.
>
> Yes, I agree that another profile "named feature" may be the best
> approach. I'll consider proposing one, but [1] implies there may be
Yeah that thread does highlight the unfortunate way that riscv has
evolved, but I hope that wouldn't be a prohibiting factor here.
- Charlie
> some resistance to creating something like that.
>
> [1] https://github.com/riscv/riscv-isa-manual/issues/1611
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-08 1:22 ` Charlie Jenkins
2025-02-10 9:43 ` Andrew Jones
@ 2025-02-10 10:16 ` Anup Patel
2025-02-10 11:07 ` Clément Léger
2025-02-10 17:19 ` Charlie Jenkins
1 sibling, 2 replies; 47+ messages in thread
From: Anup Patel @ 2025-02-10 10:16 UTC (permalink / raw)
To: Charlie Jenkins
Cc: Andrew Jones, linux-riscv, linux-kernel, paul.walmsley, palmer,
jesse, Anup Patel
On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> > Probing unaligned accesses on boot is time consuming. Provide a
> > function which will be used to look up the access type in a table
> > by id registers. Vendors which provide table entries can then skip
> > the probing.
>
> The access checker in my experience is only time consuming on slow
> hardware. Hardware that supports fast unaligned accesses isn't really
> impacted by this? Avoiding a list of hardware that has slow/fast
> unaligned accesses in the kernel was the main reason for dynamically
> checking. We did introduce the config option to compile the kernel with
> assumed slow/fast accesses, which of course has the downside of
> recompiling the kernel and I assume that you already considered that.
The kconfig option does not align with the vision of running the same
kernel image across platforms.
>
> Instead of having a table in the kernel, something that would be more
> platform agnostic would be to have an extension that signals this
> information. That seems like it would accomplish the same goal and
> leverage the existing infrastructure in the kernel, albeit with the need
> to make a new extension.
>
IMO, expecting an ISA extension to be defined for all possible
microarchitectural choices is not going to scale so it is better
to have infrastructure in kernel itself to infer microarchitectural
choices based on RISC-V implementation ID.
Regards,
Anup
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 10:16 ` Anup Patel
@ 2025-02-10 11:07 ` Clément Léger
2025-02-10 14:06 ` Andrew Jones
2025-02-10 17:19 ` Charlie Jenkins
1 sibling, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-10 11:07 UTC (permalink / raw)
To: Anup Patel, Charlie Jenkins
Cc: Andrew Jones, linux-riscv, linux-kernel, paul.walmsley, palmer,
jesse, Anup Patel
On 10/02/2025 11:16, Anup Patel wrote:
> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>
>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>>> Probing unaligned accesses on boot is time consuming. Provide a
>>> function which will be used to look up the access type in a table
>>> by id registers. Vendors which provide table entries can then skip
>>> the probing.
>>
>> The access checker in my experience is only time consuming on slow
>> hardware. Hardware that supports fast unaligned accesses isn't really
>> impacted by this? Avoiding a list of hardware that has slow/fast
>> unaligned accesses in the kernel was the main reason for dynamically
>> checking. We did introduce the config option to compile the kernel with
>> assumed slow/fast accesses, which of course has the downside of
>> recompiling the kernel and I assume that you already considered that.
>
> The kconfig option does not align with the vision of running the same
> kernel image across platforms.
I'd would be advocating to remove compile time options as well and use
another way to skip the probe (see below).
>
>>
>> Instead of having a table in the kernel, something that would be more
>> platform agnostic would be to have an extension that signals this
>> information. That seems like it would accomplish the same goal and
>> leverage the existing infrastructure in the kernel, albeit with the need
>> to make a new extension.
>>
>
> IMO, expecting an ISA extension to be defined for all possible
> microarchitectural choices is not going to scale so it is better
> to have infrastructure in kernel itself to infer microarchitectural
> choices based on RISC-V implementation ID.
Since adding an extension seems quite unlikely, and that a device-tree
property is likely DT centric and not applicable to ACPI as well, was a
command line argument considered ?
Thanks,
Clément
>
> Regards,
> Anup
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 11:07 ` Clément Léger
@ 2025-02-10 14:06 ` Andrew Jones
2025-02-10 14:20 ` Clément Léger
0 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-10 14:06 UTC (permalink / raw)
To: Clément Léger
Cc: Anup Patel, Charlie Jenkins, linux-riscv, linux-kernel,
paul.walmsley, palmer, jesse, Anup Patel
On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
>
>
> On 10/02/2025 11:16, Anup Patel wrote:
> > On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>
> >> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> >>> Probing unaligned accesses on boot is time consuming. Provide a
> >>> function which will be used to look up the access type in a table
> >>> by id registers. Vendors which provide table entries can then skip
> >>> the probing.
> >>
> >> The access checker in my experience is only time consuming on slow
> >> hardware. Hardware that supports fast unaligned accesses isn't really
> >> impacted by this? Avoiding a list of hardware that has slow/fast
> >> unaligned accesses in the kernel was the main reason for dynamically
> >> checking. We did introduce the config option to compile the kernel with
> >> assumed slow/fast accesses, which of course has the downside of
> >> recompiling the kernel and I assume that you already considered that.
> >
> > The kconfig option does not align with the vision of running the same
> > kernel image across platforms.
>
> I'd would be advocating to remove compile time options as well and use
> another way to skip the probe (see below).
>
> >
> >>
> >> Instead of having a table in the kernel, something that would be more
> >> platform agnostic would be to have an extension that signals this
> >> information. That seems like it would accomplish the same goal and
> >> leverage the existing infrastructure in the kernel, albeit with the need
> >> to make a new extension.
> >>
> >
> > IMO, expecting an ISA extension to be defined for all possible
> > microarchitectural choices is not going to scale so it is better
> > to have infrastructure in kernel itself to infer microarchitectural
> > choices based on RISC-V implementation ID.
>
> Since adding an extension seems quite unlikely, and that a device-tree
> property is likely DT centric and not applicable to ACPI as well, was a
> command line argument considered ?
>
I did consider adding a command line option in addition to the table,
allowing platforms which neither have a table entry [yet] nor want to do
the speed test, to set whatever they like. In the end, I dropped it, since
I don't have a use case at this time. However, if we really don't want a
table, then I can look into the command line option instead.
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 14:06 ` Andrew Jones
@ 2025-02-10 14:20 ` Clément Léger
2025-02-10 17:20 ` Charlie Jenkins
0 siblings, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-10 14:20 UTC (permalink / raw)
To: Andrew Jones
Cc: Anup Patel, Charlie Jenkins, linux-riscv, linux-kernel,
paul.walmsley, palmer, jesse, Anup Patel
On 10/02/2025 15:06, Andrew Jones wrote:
> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
>>
>>
>> On 10/02/2025 11:16, Anup Patel wrote:
>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>
>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>>>>> Probing unaligned accesses on boot is time consuming. Provide a
>>>>> function which will be used to look up the access type in a table
>>>>> by id registers. Vendors which provide table entries can then skip
>>>>> the probing.
>>>>
>>>> The access checker in my experience is only time consuming on slow
>>>> hardware. Hardware that supports fast unaligned accesses isn't really
>>>> impacted by this? Avoiding a list of hardware that has slow/fast
>>>> unaligned accesses in the kernel was the main reason for dynamically
>>>> checking. We did introduce the config option to compile the kernel with
>>>> assumed slow/fast accesses, which of course has the downside of
>>>> recompiling the kernel and I assume that you already considered that.
>>>
>>> The kconfig option does not align with the vision of running the same
>>> kernel image across platforms.
>>
>> I'd would be advocating to remove compile time options as well and use
>> another way to skip the probe (see below).
>>
>>>
>>>>
>>>> Instead of having a table in the kernel, something that would be more
>>>> platform agnostic would be to have an extension that signals this
>>>> information. That seems like it would accomplish the same goal and
>>>> leverage the existing infrastructure in the kernel, albeit with the need
>>>> to make a new extension.
>>>>
>>>
>>> IMO, expecting an ISA extension to be defined for all possible
>>> microarchitectural choices is not going to scale so it is better
>>> to have infrastructure in kernel itself to infer microarchitectural
>>> choices based on RISC-V implementation ID.
>>
>> Since adding an extension seems quite unlikely, and that a device-tree
>> property is likely DT centric and not applicable to ACPI as well, was a
>> command line argument considered ?
>>
>
> I did consider adding a command line option in addition to the table,
> allowing platforms which neither have a table entry [yet] nor want to do
> the speed test, to set whatever they like. In the end, I dropped it, since
> I don't have a use case at this time. However, if we really don't want a
> table, then I can look into the command line option instead.
Sorry if I wasn't clear, I wasn't considering this as a replacement for
your table but rather as a replacement to Charlie's compile time define
to skip misaligned speed probing since it is like "lpj=<x>". You can
specify it on command line if you want to skip the loop time detection
of loops per jiffies and have faster boot.
Regarding your table, it feels like a bit going back to old hardcoded
platform description ;). I think some kind of auto-detection of speed
(not builtin the kernel) for platforms could be good as well to skip
probing.
A DT property also seems ok to me since the goal is to describe
hardware. Would a common DT/ACPI property be appropriate ? The
device_property API unified both so if we used some common property to
describe the misaligned access speed (both in DT cpu node/ ACPI CPU
device package), we could keep a single parsing method. But I'm no ACPI
expert so I don't know if that really make sense.
Thanks,
Clément
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 14:20 ` Clément Léger
@ 2025-02-10 17:20 ` Charlie Jenkins
2025-02-10 20:42 ` Clément Léger
0 siblings, 1 reply; 47+ messages in thread
From: Charlie Jenkins @ 2025-02-10 17:20 UTC (permalink / raw)
To: Clément Léger
Cc: Andrew Jones, Anup Patel, linux-riscv, linux-kernel,
paul.walmsley, palmer, jesse, Anup Patel
On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
>
>
> On 10/02/2025 15:06, Andrew Jones wrote:
> > On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
> >>
> >>
> >> On 10/02/2025 11:16, Anup Patel wrote:
> >>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>>>
> >>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> >>>>> Probing unaligned accesses on boot is time consuming. Provide a
> >>>>> function which will be used to look up the access type in a table
> >>>>> by id registers. Vendors which provide table entries can then skip
> >>>>> the probing.
> >>>>
> >>>> The access checker in my experience is only time consuming on slow
> >>>> hardware. Hardware that supports fast unaligned accesses isn't really
> >>>> impacted by this? Avoiding a list of hardware that has slow/fast
> >>>> unaligned accesses in the kernel was the main reason for dynamically
> >>>> checking. We did introduce the config option to compile the kernel with
> >>>> assumed slow/fast accesses, which of course has the downside of
> >>>> recompiling the kernel and I assume that you already considered that.
> >>>
> >>> The kconfig option does not align with the vision of running the same
> >>> kernel image across platforms.
> >>
> >> I'd would be advocating to remove compile time options as well and use
> >> another way to skip the probe (see below).
> >>
> >>>
> >>>>
> >>>> Instead of having a table in the kernel, something that would be more
> >>>> platform agnostic would be to have an extension that signals this
> >>>> information. That seems like it would accomplish the same goal and
> >>>> leverage the existing infrastructure in the kernel, albeit with the need
> >>>> to make a new extension.
> >>>>
> >>>
> >>> IMO, expecting an ISA extension to be defined for all possible
> >>> microarchitectural choices is not going to scale so it is better
> >>> to have infrastructure in kernel itself to infer microarchitectural
> >>> choices based on RISC-V implementation ID.
> >>
> >> Since adding an extension seems quite unlikely, and that a device-tree
> >> property is likely DT centric and not applicable to ACPI as well, was a
> >> command line argument considered ?
> >>
> >
> > I did consider adding a command line option in addition to the table,
> > allowing platforms which neither have a table entry [yet] nor want to do
> > the speed test, to set whatever they like. In the end, I dropped it, since
> > I don't have a use case at this time. However, if we really don't want a
> > table, then I can look into the command line option instead.
>
> Sorry if I wasn't clear, I wasn't considering this as a replacement for
> your table but rather as a replacement to Charlie's compile time define
> to skip misaligned speed probing since it is like "lpj=<x>". You can
> specify it on command line if you want to skip the loop time detection
> of loops per jiffies and have faster boot.
Jesse sent out a patch for a kernel parameter to set the access speed to
whatever is desired [1].
- Charlie
> -}
> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> -static void __init check_unaligned_access_speed_all_cpus(void)
> -{
> -}
> -#endif
> -
> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
> {
> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> }
> #endif
>
> +static bool check_vector_unaligned_access_table(void)
> +{
> + return false;
> +}
> +
> static int riscv_online_cpu_vec(unsigned int cpu)
> {
> if (!has_vector()) {
> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> return 0;
> }
>
> + if (check_vector_unaligned_access_table())
> + return 0;
> +
> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> return 0;
> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
> {
> int cpu;
>
> - if (!check_unaligned_access_emulated_all_cpus())
> + if (!check_unaligned_access_table() &&
> + !check_unaligned_access_emulated_all_cpus())
> check_unaligned_access_speed_all_cpus();
>
> if (!has_vector()) {
> for_each_online_cpu(cpu)
> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
> + } else if (!check_vector_unaligned_access_table() &&
> + !check_vector_unaligned_access_emulated_all_cpus() &&
> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> kthread_run(vec_check_unaligned_access_speed_all_cpus,
> NULL, "vec_check_unaligned_access_speed_all_cpus");
>
> Regarding your table, it feels like a bit going back to old hardcoded
> platform description ;). I think some kind of auto-detection of speed
> (not builtin the kernel) for platforms could be good as well to skip
> probing.
>
> A DT property also seems ok to me since the goal is to describe
> hardware. Would a common DT/ACPI property be appropriate ? The
> device_property API unified both so if we used some common property to
> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
> device package), we could keep a single parsing method. But I'm no ACPI
> expert so I don't know if that really make sense.
>
> Thanks,
>
> Clément
>
> >
> > Thanks,
> > drew
>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 17:20 ` Charlie Jenkins
@ 2025-02-10 20:42 ` Clément Léger
2025-02-10 20:53 ` Charlie Jenkins
0 siblings, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-10 20:42 UTC (permalink / raw)
To: Charlie Jenkins
Cc: Andrew Jones, Anup Patel, linux-riscv, linux-kernel,
paul.walmsley, palmer, jesse, Anup Patel
On 10/02/2025 18:20, Charlie Jenkins wrote:
> On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
>>
>>
>> On 10/02/2025 15:06, Andrew Jones wrote:
>>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
>>>>
>>>>
>>>> On 10/02/2025 11:16, Anup Patel wrote:
>>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>
>>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
>>>>>>> function which will be used to look up the access type in a table
>>>>>>> by id registers. Vendors which provide table entries can then skip
>>>>>>> the probing.
>>>>>>
>>>>>> The access checker in my experience is only time consuming on slow
>>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
>>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
>>>>>> unaligned accesses in the kernel was the main reason for dynamically
>>>>>> checking. We did introduce the config option to compile the kernel with
>>>>>> assumed slow/fast accesses, which of course has the downside of
>>>>>> recompiling the kernel and I assume that you already considered that.
>>>>>
>>>>> The kconfig option does not align with the vision of running the same
>>>>> kernel image across platforms.
>>>>
>>>> I'd would be advocating to remove compile time options as well and use
>>>> another way to skip the probe (see below).
>>>>
>>>>>
>>>>>>
>>>>>> Instead of having a table in the kernel, something that would be more
>>>>>> platform agnostic would be to have an extension that signals this
>>>>>> information. That seems like it would accomplish the same goal and
>>>>>> leverage the existing infrastructure in the kernel, albeit with the need
>>>>>> to make a new extension.
>>>>>>
>>>>>
>>>>> IMO, expecting an ISA extension to be defined for all possible
>>>>> microarchitectural choices is not going to scale so it is better
>>>>> to have infrastructure in kernel itself to infer microarchitectural
>>>>> choices based on RISC-V implementation ID.
>>>>
>>>> Since adding an extension seems quite unlikely, and that a device-tree
>>>> property is likely DT centric and not applicable to ACPI as well, was a
>>>> command line argument considered ?
>>>>
>>>
>>> I did consider adding a command line option in addition to the table,
>>> allowing platforms which neither have a table entry [yet] nor want to do
>>> the speed test, to set whatever they like. In the end, I dropped it, since
>>> I don't have a use case at this time. However, if we really don't want a
>>> table, then I can look into the command line option instead.
>>
>> Sorry if I wasn't clear, I wasn't considering this as a replacement for
>> your table but rather as a replacement to Charlie's compile time define
>> to skip misaligned speed probing since it is like "lpj=<x>". You can
>> specify it on command line if you want to skip the loop time detection
>> of loops per jiffies and have faster boot.
>
> Jesse sent out a patch for a kernel parameter to set the access speed to
> whatever is desired [1].
Hey Charlie,
Thanks but it seems you forgot to add the link ?
Having configuration option + command line option seems like something
particularly heavy for such feature. The ifdefery/config options
involved in the misaligned probing code is already quite complicated. If
another mean to specify the misaligned speed access is added, I think
all configuration options to set the speed of accesses can then be
removed and just keep the command line. That will certainly simplify the
ifdef/config options.
Clément
>
> - Charlie
>
>> -}
>> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
>> -static void __init check_unaligned_access_speed_all_cpus(void)
>> -{
>> -}
>> -#endif
>> -
>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
>> {
>> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>> }
>> #endif
>>
>> +static bool check_vector_unaligned_access_table(void)
>> +{
>> + return false;
>> +}
>> +
>> static int riscv_online_cpu_vec(unsigned int cpu)
>> {
>> if (!has_vector()) {
>> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
>> return 0;
>> }
>>
>> + if (check_vector_unaligned_access_table())
>> + return 0;
>> +
>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
>> return 0;
>> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
>> {
>> int cpu;
>>
>> - if (!check_unaligned_access_emulated_all_cpus())
>> + if (!check_unaligned_access_table() &&
>> + !check_unaligned_access_emulated_all_cpus())
>> check_unaligned_access_speed_all_cpus();
>>
>> if (!has_vector()) {
>> for_each_online_cpu(cpu)
>> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
>> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
>> + } else if (!check_vector_unaligned_access_table() &&
>> + !check_vector_unaligned_access_emulated_all_cpus() &&
>> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
>> kthread_run(vec_check_unaligned_access_speed_all_cpus,
>> NULL, "vec_check_unaligned_access_speed_all_cpus");
>
>>
>> Regarding your table, it feels like a bit going back to old hardcoded
>> platform description ;). I think some kind of auto-detection of speed
>> (not builtin the kernel) for platforms could be good as well to skip
>> probing.
>>
>> A DT property also seems ok to me since the goal is to describe
>> hardware. Would a common DT/ACPI property be appropriate ? The
>> device_property API unified both so if we used some common property to
>> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
>> device package), we could keep a single parsing method. But I'm no ACPI
>> expert so I don't know if that really make sense.
>>
>> Thanks,
>>
>> Clément
>>
>>>
>>> Thanks,
>>> drew
>>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 20:42 ` Clément Léger
@ 2025-02-10 20:53 ` Charlie Jenkins
2025-02-10 20:57 ` Clément Léger
0 siblings, 1 reply; 47+ messages in thread
From: Charlie Jenkins @ 2025-02-10 20:53 UTC (permalink / raw)
To: Clément Léger
Cc: Andrew Jones, Anup Patel, linux-riscv, linux-kernel,
paul.walmsley, palmer@dabbelt.com Anup Patel
On Mon, Feb 10, 2025 at 09:42:25PM +0100, Clément Léger wrote:
>
>
> On 10/02/2025 18:20, Charlie Jenkins wrote:
> > On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
> >>
> >>
> >> On 10/02/2025 15:06, Andrew Jones wrote:
> >>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
> >>>>
> >>>>
> >>>> On 10/02/2025 11:16, Anup Patel wrote:
> >>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>>>>>
> >>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> >>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
> >>>>>>> function which will be used to look up the access type in a table
> >>>>>>> by id registers. Vendors which provide table entries can then skip
> >>>>>>> the probing.
> >>>>>>
> >>>>>> The access checker in my experience is only time consuming on slow
> >>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
> >>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
> >>>>>> unaligned accesses in the kernel was the main reason for dynamically
> >>>>>> checking. We did introduce the config option to compile the kernel with
> >>>>>> assumed slow/fast accesses, which of course has the downside of
> >>>>>> recompiling the kernel and I assume that you already considered that.
> >>>>>
> >>>>> The kconfig option does not align with the vision of running the same
> >>>>> kernel image across platforms.
> >>>>
> >>>> I'd would be advocating to remove compile time options as well and use
> >>>> another way to skip the probe (see below).
> >>>>
> >>>>>
> >>>>>>
> >>>>>> Instead of having a table in the kernel, something that would be more
> >>>>>> platform agnostic would be to have an extension that signals this
> >>>>>> information. That seems like it would accomplish the same goal and
> >>>>>> leverage the existing infrastructure in the kernel, albeit with the need
> >>>>>> to make a new extension.
> >>>>>>
> >>>>>
> >>>>> IMO, expecting an ISA extension to be defined for all possible
> >>>>> microarchitectural choices is not going to scale so it is better
> >>>>> to have infrastructure in kernel itself to infer microarchitectural
> >>>>> choices based on RISC-V implementation ID.
> >>>>
> >>>> Since adding an extension seems quite unlikely, and that a device-tree
> >>>> property is likely DT centric and not applicable to ACPI as well, was a
> >>>> command line argument considered ?
> >>>>
> >>>
> >>> I did consider adding a command line option in addition to the table,
> >>> allowing platforms which neither have a table entry [yet] nor want to do
> >>> the speed test, to set whatever they like. In the end, I dropped it, since
> >>> I don't have a use case at this time. However, if we really don't want a
> >>> table, then I can look into the command line option instead.
> >>
> >> Sorry if I wasn't clear, I wasn't considering this as a replacement for
> >> your table but rather as a replacement to Charlie's compile time define
> >> to skip misaligned speed probing since it is like "lpj=<x>". You can
> >> specify it on command line if you want to skip the loop time detection
> >> of loops per jiffies and have faster boot.
> >
> > Jesse sent out a patch for a kernel parameter to set the access speed to
> > whatever is desired [1].
>
> Hey Charlie,
>
> Thanks but it seems you forgot to add the link ?
Oops, I frequently do that...
https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/
>
> Having configuration option + command line option seems like something
> particularly heavy for such feature. The ifdefery/config options
> involved in the misaligned probing code is already quite complicated. If
> another mean to specify the misaligned speed access is added, I think
> all configuration options to set the speed of accesses can then be
> removed and just keep the command line. That will certainly simplify the
> ifdef/config options.
Yeah that's why it didn't get merged because it felt like overkill. I
responded on the thread to Anup as why I would prefer config options. It
just comes down to config options being required to enable compiler
features. The kernel is only built with rv64gc and usage of all other
extensions requires hand written assembly. There are easy performance
gains when compiling the kernel with rv64gc_zba_zbb_zbkb etc.
Performance focused kernels will need to be recompiled anyway so I am of
the opinion that grouping in other performance features as config
options like this is the easiest thing to do and reduces the amount of
code in the kernel.
- Charlie
>
> Clément
>
> >
> > - Charlie
> >
> >> -}
> >> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> >> -static void __init check_unaligned_access_speed_all_cpus(void)
> >> -{
> >> -}
> >> -#endif
> >> -
> >> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> >> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
> >> {
> >> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> >> }
> >> #endif
> >>
> >> +static bool check_vector_unaligned_access_table(void)
> >> +{
> >> + return false;
> >> +}
> >> +
> >> static int riscv_online_cpu_vec(unsigned int cpu)
> >> {
> >> if (!has_vector()) {
> >> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> >> return 0;
> >> }
> >>
> >> + if (check_vector_unaligned_access_table())
> >> + return 0;
> >> +
> >> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> >> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> >> return 0;
> >> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
> >> {
> >> int cpu;
> >>
> >> - if (!check_unaligned_access_emulated_all_cpus())
> >> + if (!check_unaligned_access_table() &&
> >> + !check_unaligned_access_emulated_all_cpus())
> >> check_unaligned_access_speed_all_cpus();
> >>
> >> if (!has_vector()) {
> >> for_each_online_cpu(cpu)
> >> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> >> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
> >> + } else if (!check_vector_unaligned_access_table() &&
> >> + !check_vector_unaligned_access_emulated_all_cpus() &&
> >> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> >> kthread_run(vec_check_unaligned_access_speed_all_cpus,
> >> NULL, "vec_check_unaligned_access_speed_all_cpus");
> >
> >>
> >> Regarding your table, it feels like a bit going back to old hardcoded
> >> platform description ;). I think some kind of auto-detection of speed
> >> (not builtin the kernel) for platforms could be good as well to skip
> >> probing.
> >>
> >> A DT property also seems ok to me since the goal is to describe
> >> hardware. Would a common DT/ACPI property be appropriate ? The
> >> device_property API unified both so if we used some common property to
> >> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
> >> device package), we could keep a single parsing method. But I'm no ACPI
> >> expert so I don't know if that really make sense.
> >>
> >> Thanks,
> >>
> >> Clément
> >>
> >>>
> >>> Thanks,
> >>> drew
> >>
>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 20:53 ` Charlie Jenkins
@ 2025-02-10 20:57 ` Clément Léger
2025-02-10 21:13 ` Charlie Jenkins
2025-02-11 4:26 ` Anup Patel
0 siblings, 2 replies; 47+ messages in thread
From: Clément Léger @ 2025-02-10 20:57 UTC (permalink / raw)
To: Charlie Jenkins
Cc: Andrew Jones, Anup Patel, linux-riscv, linux-kernel,
paul.walmsley, palmer@dabbelt.com Anup Patel
On 10/02/2025 21:53, Charlie Jenkins wrote:
> On Mon, Feb 10, 2025 at 09:42:25PM +0100, Clément Léger wrote:
>>
>>
>> On 10/02/2025 18:20, Charlie Jenkins wrote:
>>> On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
>>>>
>>>>
>>>> On 10/02/2025 15:06, Andrew Jones wrote:
>>>>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
>>>>>>
>>>>>>
>>>>>> On 10/02/2025 11:16, Anup Patel wrote:
>>>>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>>>
>>>>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>>>>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
>>>>>>>>> function which will be used to look up the access type in a table
>>>>>>>>> by id registers. Vendors which provide table entries can then skip
>>>>>>>>> the probing.
>>>>>>>>
>>>>>>>> The access checker in my experience is only time consuming on slow
>>>>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
>>>>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
>>>>>>>> unaligned accesses in the kernel was the main reason for dynamically
>>>>>>>> checking. We did introduce the config option to compile the kernel with
>>>>>>>> assumed slow/fast accesses, which of course has the downside of
>>>>>>>> recompiling the kernel and I assume that you already considered that.
>>>>>>>
>>>>>>> The kconfig option does not align with the vision of running the same
>>>>>>> kernel image across platforms.
>>>>>>
>>>>>> I'd would be advocating to remove compile time options as well and use
>>>>>> another way to skip the probe (see below).
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Instead of having a table in the kernel, something that would be more
>>>>>>>> platform agnostic would be to have an extension that signals this
>>>>>>>> information. That seems like it would accomplish the same goal and
>>>>>>>> leverage the existing infrastructure in the kernel, albeit with the need
>>>>>>>> to make a new extension.
>>>>>>>>
>>>>>>>
>>>>>>> IMO, expecting an ISA extension to be defined for all possible
>>>>>>> microarchitectural choices is not going to scale so it is better
>>>>>>> to have infrastructure in kernel itself to infer microarchitectural
>>>>>>> choices based on RISC-V implementation ID.
>>>>>>
>>>>>> Since adding an extension seems quite unlikely, and that a device-tree
>>>>>> property is likely DT centric and not applicable to ACPI as well, was a
>>>>>> command line argument considered ?
>>>>>>
>>>>>
>>>>> I did consider adding a command line option in addition to the table,
>>>>> allowing platforms which neither have a table entry [yet] nor want to do
>>>>> the speed test, to set whatever they like. In the end, I dropped it, since
>>>>> I don't have a use case at this time. However, if we really don't want a
>>>>> table, then I can look into the command line option instead.
>>>>
>>>> Sorry if I wasn't clear, I wasn't considering this as a replacement for
>>>> your table but rather as a replacement to Charlie's compile time define
>>>> to skip misaligned speed probing since it is like "lpj=<x>". You can
>>>> specify it on command line if you want to skip the loop time detection
>>>> of loops per jiffies and have faster boot.
>>>
>>> Jesse sent out a patch for a kernel parameter to set the access speed to
>>> whatever is desired [1].
>>
>> Hey Charlie,
>>
>> Thanks but it seems you forgot to add the link ?
>
> Oops, I frequently do that...
>
> https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/
>
>>
>> Having configuration option + command line option seems like something
>> particularly heavy for such feature. The ifdefery/config options
>> involved in the misaligned probing code is already quite complicated. If
>> another mean to specify the misaligned speed access is added, I think
>> all configuration options to set the speed of accesses can then be
>> removed and just keep the command line. That will certainly simplify the
>> ifdef/config options.
>
> Yeah that's why it didn't get merged because it felt like overkill. I
> responded on the thread to Anup as why I would prefer config options. It
> just comes down to config options being required to enable compiler
> features. The kernel is only built with rv64gc and usage of all other
> extensions requires hand written assembly. There are easy performance
> gains when compiling the kernel with rv64gc_zba_zbb_zbkb etc.
> Performance focused kernels will need to be recompiled anyway so I am of
> the opinion that grouping in other performance features as config
> options like this is the easiest thing to do and reduces the amount of
> code in the kernel.
As answered on the other thread, totally agree, except for the
misaligned accesses probing config options ;). Ultimately, we need
profiles configuration, either via defconfigs that enables a bunch of
optimization via ISA extension or configuration options that groups
these config options.
Clément
>
> - Charlie
>
>>
>> Clément
>>
>>>
>>> - Charlie
>>>
>>>> -}
>>>> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
>>>> -static void __init check_unaligned_access_speed_all_cpus(void)
>>>> -{
>>>> -}
>>>> -#endif
>>>> -
>>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>>>> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
>>>> {
>>>> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>>>> }
>>>> #endif
>>>>
>>>> +static bool check_vector_unaligned_access_table(void)
>>>> +{
>>>> + return false;
>>>> +}
>>>> +
>>>> static int riscv_online_cpu_vec(unsigned int cpu)
>>>> {
>>>> if (!has_vector()) {
>>>> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
>>>> return 0;
>>>> }
>>>>
>>>> + if (check_vector_unaligned_access_table())
>>>> + return 0;
>>>> +
>>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>>>> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
>>>> return 0;
>>>> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
>>>> {
>>>> int cpu;
>>>>
>>>> - if (!check_unaligned_access_emulated_all_cpus())
>>>> + if (!check_unaligned_access_table() &&
>>>> + !check_unaligned_access_emulated_all_cpus())
>>>> check_unaligned_access_speed_all_cpus();
>>>>
>>>> if (!has_vector()) {
>>>> for_each_online_cpu(cpu)
>>>> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
>>>> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
>>>> + } else if (!check_vector_unaligned_access_table() &&
>>>> + !check_vector_unaligned_access_emulated_all_cpus() &&
>>>> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
>>>> kthread_run(vec_check_unaligned_access_speed_all_cpus,
>>>> NULL, "vec_check_unaligned_access_speed_all_cpus");
>>>
>>>>
>>>> Regarding your table, it feels like a bit going back to old hardcoded
>>>> platform description ;). I think some kind of auto-detection of speed
>>>> (not builtin the kernel) for platforms could be good as well to skip
>>>> probing.
>>>>
>>>> A DT property also seems ok to me since the goal is to describe
>>>> hardware. Would a common DT/ACPI property be appropriate ? The
>>>> device_property API unified both so if we used some common property to
>>>> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
>>>> device package), we could keep a single parsing method. But I'm no ACPI
>>>> expert so I don't know if that really make sense.
>>>>
>>>> Thanks,
>>>>
>>>> Clément
>>>>
>>>>>
>>>>> Thanks,
>>>>> drew
>>>>
>>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 20:57 ` Clément Léger
@ 2025-02-10 21:13 ` Charlie Jenkins
2025-02-11 4:26 ` Anup Patel
1 sibling, 0 replies; 47+ messages in thread
From: Charlie Jenkins @ 2025-02-10 21:13 UTC (permalink / raw)
To: Clément Léger
Cc: Andrew Jones, Anup Patel, linux-riscv, linux-kernel,
paul.walmsley, palmer@dabbelt.com Anup Patel
On Mon, Feb 10, 2025 at 09:57:26PM +0100, Clément Léger wrote:
>
>
> On 10/02/2025 21:53, Charlie Jenkins wrote:
> > On Mon, Feb 10, 2025 at 09:42:25PM +0100, Clément Léger wrote:
> >>
> >>
> >> On 10/02/2025 18:20, Charlie Jenkins wrote:
> >>> On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
> >>>>
> >>>>
> >>>> On 10/02/2025 15:06, Andrew Jones wrote:
> >>>>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 10/02/2025 11:16, Anup Patel wrote:
> >>>>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>>>>>>>
> >>>>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> >>>>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
> >>>>>>>>> function which will be used to look up the access type in a table
> >>>>>>>>> by id registers. Vendors which provide table entries can then skip
> >>>>>>>>> the probing.
> >>>>>>>>
> >>>>>>>> The access checker in my experience is only time consuming on slow
> >>>>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
> >>>>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
> >>>>>>>> unaligned accesses in the kernel was the main reason for dynamically
> >>>>>>>> checking. We did introduce the config option to compile the kernel with
> >>>>>>>> assumed slow/fast accesses, which of course has the downside of
> >>>>>>>> recompiling the kernel and I assume that you already considered that.
> >>>>>>>
> >>>>>>> The kconfig option does not align with the vision of running the same
> >>>>>>> kernel image across platforms.
> >>>>>>
> >>>>>> I'd would be advocating to remove compile time options as well and use
> >>>>>> another way to skip the probe (see below).
> >>>>>>
> >>>>>>>
> >>>>>>>>
> >>>>>>>> Instead of having a table in the kernel, something that would be more
> >>>>>>>> platform agnostic would be to have an extension that signals this
> >>>>>>>> information. That seems like it would accomplish the same goal and
> >>>>>>>> leverage the existing infrastructure in the kernel, albeit with the need
> >>>>>>>> to make a new extension.
> >>>>>>>>
> >>>>>>>
> >>>>>>> IMO, expecting an ISA extension to be defined for all possible
> >>>>>>> microarchitectural choices is not going to scale so it is better
> >>>>>>> to have infrastructure in kernel itself to infer microarchitectural
> >>>>>>> choices based on RISC-V implementation ID.
> >>>>>>
> >>>>>> Since adding an extension seems quite unlikely, and that a device-tree
> >>>>>> property is likely DT centric and not applicable to ACPI as well, was a
> >>>>>> command line argument considered ?
> >>>>>>
> >>>>>
> >>>>> I did consider adding a command line option in addition to the table,
> >>>>> allowing platforms which neither have a table entry [yet] nor want to do
> >>>>> the speed test, to set whatever they like. In the end, I dropped it, since
> >>>>> I don't have a use case at this time. However, if we really don't want a
> >>>>> table, then I can look into the command line option instead.
> >>>>
> >>>> Sorry if I wasn't clear, I wasn't considering this as a replacement for
> >>>> your table but rather as a replacement to Charlie's compile time define
> >>>> to skip misaligned speed probing since it is like "lpj=<x>". You can
> >>>> specify it on command line if you want to skip the loop time detection
> >>>> of loops per jiffies and have faster boot.
> >>>
> >>> Jesse sent out a patch for a kernel parameter to set the access speed to
> >>> whatever is desired [1].
> >>
> >> Hey Charlie,
> >>
> >> Thanks but it seems you forgot to add the link ?
> >
> > Oops, I frequently do that...
> >
> > https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/
> >
> >>
> >> Having configuration option + command line option seems like something
> >> particularly heavy for such feature. The ifdefery/config options
> >> involved in the misaligned probing code is already quite complicated. If
> >> another mean to specify the misaligned speed access is added, I think
> >> all configuration options to set the speed of accesses can then be
> >> removed and just keep the command line. That will certainly simplify the
> >> ifdef/config options.
> >
> > Yeah that's why it didn't get merged because it felt like overkill. I
> > responded on the thread to Anup as why I would prefer config options. It
> > just comes down to config options being required to enable compiler
> > features. The kernel is only built with rv64gc and usage of all other
> > extensions requires hand written assembly. There are easy performance
> > gains when compiling the kernel with rv64gc_zba_zbb_zbkb etc.
> > Performance focused kernels will need to be recompiled anyway so I am of
> > the opinion that grouping in other performance features as config
> > options like this is the easiest thing to do and reduces the amount of
> > code in the kernel.
>
> As answered on the other thread, totally agree, except for the
> misaligned accesses probing config options ;).
Oh! I have missed that response, where is that?
> Ultimately, we need
> profiles configuration, either via defconfigs that enables a bunch of
> optimization via ISA extension or configuration options that groups
> these config options.
Why do you agree with profile configs for other things but not for
misaligned access probing?
>
> Clément
>
> >
> > - Charlie
> >
> >>
> >> Clément
> >>
> >>>
> >>> - Charlie
> >>>
> >>>> -}
> >>>> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> >>>> -static void __init check_unaligned_access_speed_all_cpus(void)
> >>>> -{
> >>>> -}
> >>>> -#endif
> >>>> -
> >>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> >>>> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
> >>>> {
> >>>> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> >>>> }
> >>>> #endif
> >>>>
> >>>> +static bool check_vector_unaligned_access_table(void)
> >>>> +{
> >>>> + return false;
> >>>> +}
> >>>> +
> >>>> static int riscv_online_cpu_vec(unsigned int cpu)
> >>>> {
> >>>> if (!has_vector()) {
> >>>> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> >>>> return 0;
> >>>> }
> >>>>
> >>>> + if (check_vector_unaligned_access_table())
> >>>> + return 0;
> >>>> +
> >>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> >>>> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> >>>> return 0;
> >>>> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
> >>>> {
> >>>> int cpu;
> >>>>
> >>>> - if (!check_unaligned_access_emulated_all_cpus())
> >>>> + if (!check_unaligned_access_table() &&
> >>>> + !check_unaligned_access_emulated_all_cpus())
> >>>> check_unaligned_access_speed_all_cpus();
> >>>>
> >>>> if (!has_vector()) {
> >>>> for_each_online_cpu(cpu)
> >>>> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> >>>> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
> >>>> + } else if (!check_vector_unaligned_access_table() &&
> >>>> + !check_vector_unaligned_access_emulated_all_cpus() &&
> >>>> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> >>>> kthread_run(vec_check_unaligned_access_speed_all_cpus,
> >>>> NULL, "vec_check_unaligned_access_speed_all_cpus");
> >>>
> >>>>
> >>>> Regarding your table, it feels like a bit going back to old hardcoded
> >>>> platform description ;). I think some kind of auto-detection of speed
> >>>> (not builtin the kernel) for platforms could be good as well to skip
> >>>> probing.
> >>>>
> >>>> A DT property also seems ok to me since the goal is to describe
> >>>> hardware. Would a common DT/ACPI property be appropriate ? The
> >>>> device_property API unified both so if we used some common property to
> >>>> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
> >>>> device package), we could keep a single parsing method. But I'm no ACPI
> >>>> expert so I don't know if that really make sense.
> >>>>
> >>>> Thanks,
> >>>>
> >>>> Clément
> >>>>
> >>>>>
> >>>>> Thanks,
> >>>>> drew
> >>>>
> >>
>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 20:57 ` Clément Léger
2025-02-10 21:13 ` Charlie Jenkins
@ 2025-02-11 4:26 ` Anup Patel
2025-02-11 8:37 ` Clément Léger
2025-02-11 18:09 ` Palmer Dabbelt
1 sibling, 2 replies; 47+ messages in thread
From: Anup Patel @ 2025-02-11 4:26 UTC (permalink / raw)
To: Clément Léger
Cc: Charlie Jenkins, Andrew Jones, Anup Patel, linux-riscv,
linux-kernel, paul.walmsley
On Tue, Feb 11, 2025 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>
>
>
> On 10/02/2025 21:53, Charlie Jenkins wrote:
> > On Mon, Feb 10, 2025 at 09:42:25PM +0100, Clément Léger wrote:
> >>
> >>
> >> On 10/02/2025 18:20, Charlie Jenkins wrote:
> >>> On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
> >>>>
> >>>>
> >>>> On 10/02/2025 15:06, Andrew Jones wrote:
> >>>>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 10/02/2025 11:16, Anup Patel wrote:
> >>>>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>>>>>>>
> >>>>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> >>>>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
> >>>>>>>>> function which will be used to look up the access type in a table
> >>>>>>>>> by id registers. Vendors which provide table entries can then skip
> >>>>>>>>> the probing.
> >>>>>>>>
> >>>>>>>> The access checker in my experience is only time consuming on slow
> >>>>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
> >>>>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
> >>>>>>>> unaligned accesses in the kernel was the main reason for dynamically
> >>>>>>>> checking. We did introduce the config option to compile the kernel with
> >>>>>>>> assumed slow/fast accesses, which of course has the downside of
> >>>>>>>> recompiling the kernel and I assume that you already considered that.
> >>>>>>>
> >>>>>>> The kconfig option does not align with the vision of running the same
> >>>>>>> kernel image across platforms.
> >>>>>>
> >>>>>> I'd would be advocating to remove compile time options as well and use
> >>>>>> another way to skip the probe (see below).
> >>>>>>
> >>>>>>>
> >>>>>>>>
> >>>>>>>> Instead of having a table in the kernel, something that would be more
> >>>>>>>> platform agnostic would be to have an extension that signals this
> >>>>>>>> information. That seems like it would accomplish the same goal and
> >>>>>>>> leverage the existing infrastructure in the kernel, albeit with the need
> >>>>>>>> to make a new extension.
> >>>>>>>>
> >>>>>>>
> >>>>>>> IMO, expecting an ISA extension to be defined for all possible
> >>>>>>> microarchitectural choices is not going to scale so it is better
> >>>>>>> to have infrastructure in kernel itself to infer microarchitectural
> >>>>>>> choices based on RISC-V implementation ID.
> >>>>>>
> >>>>>> Since adding an extension seems quite unlikely, and that a device-tree
> >>>>>> property is likely DT centric and not applicable to ACPI as well, was a
> >>>>>> command line argument considered ?
> >>>>>>
> >>>>>
> >>>>> I did consider adding a command line option in addition to the table,
> >>>>> allowing platforms which neither have a table entry [yet] nor want to do
> >>>>> the speed test, to set whatever they like. In the end, I dropped it, since
> >>>>> I don't have a use case at this time. However, if we really don't want a
> >>>>> table, then I can look into the command line option instead.
> >>>>
> >>>> Sorry if I wasn't clear, I wasn't considering this as a replacement for
> >>>> your table but rather as a replacement to Charlie's compile time define
> >>>> to skip misaligned speed probing since it is like "lpj=<x>". You can
> >>>> specify it on command line if you want to skip the loop time detection
> >>>> of loops per jiffies and have faster boot.
> >>>
> >>> Jesse sent out a patch for a kernel parameter to set the access speed to
> >>> whatever is desired [1].
> >>
> >> Hey Charlie,
> >>
> >> Thanks but it seems you forgot to add the link ?
> >
> > Oops, I frequently do that...
> >
> > https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/
> >
> >>
> >> Having configuration option + command line option seems like something
> >> particularly heavy for such feature. The ifdefery/config options
> >> involved in the misaligned probing code is already quite complicated. If
> >> another mean to specify the misaligned speed access is added, I think
> >> all configuration options to set the speed of accesses can then be
> >> removed and just keep the command line. That will certainly simplify the
> >> ifdef/config options.
> >
> > Yeah that's why it didn't get merged because it felt like overkill. I
> > responded on the thread to Anup as why I would prefer config options. It
> > just comes down to config options being required to enable compiler
> > features. The kernel is only built with rv64gc and usage of all other
> > extensions requires hand written assembly. There are easy performance
> > gains when compiling the kernel with rv64gc_zba_zbb_zbkb etc.
> > Performance focused kernels will need to be recompiled anyway so I am of
> > the opinion that grouping in other performance features as config
> > options like this is the easiest thing to do and reduces the amount of
> > code in the kernel.
>
> As answered on the other thread, totally agree, except for the
> misaligned accesses probing config options ;). Ultimately, we need
> profiles configuration, either via defconfigs that enables a bunch of
> optimization via ISA extension or configuration options that groups
> these config options.
I agree.
Kconfig options for ISA extensions are fine but not for
misaligned access. The kernel command-line option to skip
misaligned access probe is fine as well. I still think RISC-V
implementation ID based microarchitecture feature discovery
will be eventually required as well so why not add it now.
For profiles, how about having an incremental rva23.config
similar to 64-bit.config ? This will minimize the duplication
by re-using the same base defconfig.
Regards,
Anup
>
> Clément
>
> >
> > - Charlie
> >
> >>
> >> Clément
> >>
> >>>
> >>> - Charlie
> >>>
> >>>> -}
> >>>> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
> >>>> -static void __init check_unaligned_access_speed_all_cpus(void)
> >>>> -{
> >>>> -}
> >>>> -#endif
> >>>> -
> >>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> >>>> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
> >>>> {
> >>>> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
> >>>> }
> >>>> #endif
> >>>>
> >>>> +static bool check_vector_unaligned_access_table(void)
> >>>> +{
> >>>> + return false;
> >>>> +}
> >>>> +
> >>>> static int riscv_online_cpu_vec(unsigned int cpu)
> >>>> {
> >>>> if (!has_vector()) {
> >>>> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
> >>>> return 0;
> >>>> }
> >>>>
> >>>> + if (check_vector_unaligned_access_table())
> >>>> + return 0;
> >>>> +
> >>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
> >>>> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
> >>>> return 0;
> >>>> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
> >>>> {
> >>>> int cpu;
> >>>>
> >>>> - if (!check_unaligned_access_emulated_all_cpus())
> >>>> + if (!check_unaligned_access_table() &&
> >>>> + !check_unaligned_access_emulated_all_cpus())
> >>>> check_unaligned_access_speed_all_cpus();
> >>>>
> >>>> if (!has_vector()) {
> >>>> for_each_online_cpu(cpu)
> >>>> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
> >>>> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
> >>>> + } else if (!check_vector_unaligned_access_table() &&
> >>>> + !check_vector_unaligned_access_emulated_all_cpus() &&
> >>>> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> >>>> kthread_run(vec_check_unaligned_access_speed_all_cpus,
> >>>> NULL, "vec_check_unaligned_access_speed_all_cpus");
> >>>
> >>>>
> >>>> Regarding your table, it feels like a bit going back to old hardcoded
> >>>> platform description ;). I think some kind of auto-detection of speed
> >>>> (not builtin the kernel) for platforms could be good as well to skip
> >>>> probing.
> >>>>
> >>>> A DT property also seems ok to me since the goal is to describe
> >>>> hardware. Would a common DT/ACPI property be appropriate ? The
> >>>> device_property API unified both so if we used some common property to
> >>>> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
> >>>> device package), we could keep a single parsing method. But I'm no ACPI
> >>>> expert so I don't know if that really make sense.
> >>>>
> >>>> Thanks,
> >>>>
> >>>> Clément
> >>>>
> >>>>>
> >>>>> Thanks,
> >>>>> drew
> >>>>
> >>
>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-11 4:26 ` Anup Patel
@ 2025-02-11 8:37 ` Clément Léger
2025-02-11 18:09 ` Palmer Dabbelt
1 sibling, 0 replies; 47+ messages in thread
From: Clément Léger @ 2025-02-11 8:37 UTC (permalink / raw)
To: Anup Patel
Cc: Charlie Jenkins, Andrew Jones, Anup Patel, linux-riscv,
linux-kernel, paul.walmsley
On 11/02/2025 05:26, Anup Patel wrote:
> On Tue, Feb 11, 2025 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>>
>>
>>
>> On 10/02/2025 21:53, Charlie Jenkins wrote:
>>> On Mon, Feb 10, 2025 at 09:42:25PM +0100, Clément Léger wrote:
>>>>
>>>>
>>>> On 10/02/2025 18:20, Charlie Jenkins wrote:
>>>>> On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
>>>>>>
>>>>>>
>>>>>> On 10/02/2025 15:06, Andrew Jones wrote:
>>>>>>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 10/02/2025 11:16, Anup Patel wrote:
>>>>>>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>>>>>>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
>>>>>>>>>>> function which will be used to look up the access type in a table
>>>>>>>>>>> by id registers. Vendors which provide table entries can then skip
>>>>>>>>>>> the probing.
>>>>>>>>>>
>>>>>>>>>> The access checker in my experience is only time consuming on slow
>>>>>>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
>>>>>>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
>>>>>>>>>> unaligned accesses in the kernel was the main reason for dynamically
>>>>>>>>>> checking. We did introduce the config option to compile the kernel with
>>>>>>>>>> assumed slow/fast accesses, which of course has the downside of
>>>>>>>>>> recompiling the kernel and I assume that you already considered that.
>>>>>>>>>
>>>>>>>>> The kconfig option does not align with the vision of running the same
>>>>>>>>> kernel image across platforms.
>>>>>>>>
>>>>>>>> I'd would be advocating to remove compile time options as well and use
>>>>>>>> another way to skip the probe (see below).
>>>>>>>>
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Instead of having a table in the kernel, something that would be more
>>>>>>>>>> platform agnostic would be to have an extension that signals this
>>>>>>>>>> information. That seems like it would accomplish the same goal and
>>>>>>>>>> leverage the existing infrastructure in the kernel, albeit with the need
>>>>>>>>>> to make a new extension.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> IMO, expecting an ISA extension to be defined for all possible
>>>>>>>>> microarchitectural choices is not going to scale so it is better
>>>>>>>>> to have infrastructure in kernel itself to infer microarchitectural
>>>>>>>>> choices based on RISC-V implementation ID.
>>>>>>>>
>>>>>>>> Since adding an extension seems quite unlikely, and that a device-tree
>>>>>>>> property is likely DT centric and not applicable to ACPI as well, was a
>>>>>>>> command line argument considered ?
>>>>>>>>
>>>>>>>
>>>>>>> I did consider adding a command line option in addition to the table,
>>>>>>> allowing platforms which neither have a table entry [yet] nor want to do
>>>>>>> the speed test, to set whatever they like. In the end, I dropped it, since
>>>>>>> I don't have a use case at this time. However, if we really don't want a
>>>>>>> table, then I can look into the command line option instead.
>>>>>>
>>>>>> Sorry if I wasn't clear, I wasn't considering this as a replacement for
>>>>>> your table but rather as a replacement to Charlie's compile time define
>>>>>> to skip misaligned speed probing since it is like "lpj=<x>". You can
>>>>>> specify it on command line if you want to skip the loop time detection
>>>>>> of loops per jiffies and have faster boot.
>>>>>
>>>>> Jesse sent out a patch for a kernel parameter to set the access speed to
>>>>> whatever is desired [1].
>>>>
>>>> Hey Charlie,
>>>>
>>>> Thanks but it seems you forgot to add the link ?
>>>
>>> Oops, I frequently do that...
>>>
>>> https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/
>>>
>>>>
>>>> Having configuration option + command line option seems like something
>>>> particularly heavy for such feature. The ifdefery/config options
>>>> involved in the misaligned probing code is already quite complicated. If
>>>> another mean to specify the misaligned speed access is added, I think
>>>> all configuration options to set the speed of accesses can then be
>>>> removed and just keep the command line. That will certainly simplify the
>>>> ifdef/config options.
>>>
>>> Yeah that's why it didn't get merged because it felt like overkill. I
>>> responded on the thread to Anup as why I would prefer config options. It
>>> just comes down to config options being required to enable compiler
>>> features. The kernel is only built with rv64gc and usage of all other
>>> extensions requires hand written assembly. There are easy performance
>>> gains when compiling the kernel with rv64gc_zba_zbb_zbkb etc.
>>> Performance focused kernels will need to be recompiled anyway so I am of
>>> the opinion that grouping in other performance features as config
>>> options like this is the easiest thing to do and reduces the amount of
>>> code in the kernel.
>>
>> As answered on the other thread, totally agree, except for the
>> misaligned accesses probing config options ;). Ultimately, we need
>> profiles configuration, either via defconfigs that enables a bunch of
>> optimization via ISA extension or configuration options that groups
>> these config options.
>
> I agree.
>
> Kconfig options for ISA extensions are fine but not for
> misaligned access. The kernel command-line option to skip
> misaligned access probe is fine as well. I still think RISC-V
> implementation ID based microarchitecture feature discovery
> will be eventually required as well so why not add it now.
As said by Charlie, keeping an internal list of IDs that needs to be
updated/submitted to add new ones is quite a burden to maintain. IMHO,
this would be better to be put in ACPI or DT description since it purely
describe hardware behavior.
But you said it will be "eventually required" so you might have some
specific use-case in mind ?
>
> For profiles, how about having an incremental rva23.config
> similar to 64-bit.config ? This will minimize the duplication
> by re-using the same base defconfig.
.config or a CONFIG_PROFILE option that allow to select the profile for
to be supported (CONFIG_PROFILE_RVA23). Using profile config options
might be better though since we can have better dependencies/select
between profiles rather than duplicating them in various profile
.config. So I might be in favor of a CONFIG_PROFILE rather than a
.config per profile.
BTW, that's not totally exclusive, if we want to add a rva23.config that
contains CONFIG_PROFILE_RVA23=y, that works as well, although a bit
overkill ;).
Regards,
Clément
>
> Regards,
> Anup
>
>
>
>>
>> Clément
>>
>>>
>>> - Charlie
>>>
>>>>
>>>> Clément
>>>>
>>>>>
>>>>> - Charlie
>>>>>
>>>>>> -}
>>>>>> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
>>>>>> -static void __init check_unaligned_access_speed_all_cpus(void)
>>>>>> -{
>>>>>> -}
>>>>>> -#endif
>>>>>> -
>>>>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>>>>>> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
>>>>>> {
>>>>>> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>>>>>> }
>>>>>> #endif
>>>>>>
>>>>>> +static bool check_vector_unaligned_access_table(void)
>>>>>> +{
>>>>>> + return false;
>>>>>> +}
>>>>>> +
>>>>>> static int riscv_online_cpu_vec(unsigned int cpu)
>>>>>> {
>>>>>> if (!has_vector()) {
>>>>>> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
>>>>>> return 0;
>>>>>> }
>>>>>>
>>>>>> + if (check_vector_unaligned_access_table())
>>>>>> + return 0;
>>>>>> +
>>>>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>>>>>> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
>>>>>> return 0;
>>>>>> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
>>>>>> {
>>>>>> int cpu;
>>>>>>
>>>>>> - if (!check_unaligned_access_emulated_all_cpus())
>>>>>> + if (!check_unaligned_access_table() &&
>>>>>> + !check_unaligned_access_emulated_all_cpus())
>>>>>> check_unaligned_access_speed_all_cpus();
>>>>>>
>>>>>> if (!has_vector()) {
>>>>>> for_each_online_cpu(cpu)
>>>>>> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
>>>>>> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
>>>>>> + } else if (!check_vector_unaligned_access_table() &&
>>>>>> + !check_vector_unaligned_access_emulated_all_cpus() &&
>>>>>> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
>>>>>> kthread_run(vec_check_unaligned_access_speed_all_cpus,
>>>>>> NULL, "vec_check_unaligned_access_speed_all_cpus");
>>>>>
>>>>>>
>>>>>> Regarding your table, it feels like a bit going back to old hardcoded
>>>>>> platform description ;). I think some kind of auto-detection of speed
>>>>>> (not builtin the kernel) for platforms could be good as well to skip
>>>>>> probing.
>>>>>>
>>>>>> A DT property also seems ok to me since the goal is to describe
>>>>>> hardware. Would a common DT/ACPI property be appropriate ? The
>>>>>> device_property API unified both so if we used some common property to
>>>>>> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
>>>>>> device package), we could keep a single parsing method. But I'm no ACPI
>>>>>> expert so I don't know if that really make sense.
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>> Clément
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>> drew
>>>>>>
>>>>
>>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-11 4:26 ` Anup Patel
2025-02-11 8:37 ` Clément Léger
@ 2025-02-11 18:09 ` Palmer Dabbelt
1 sibling, 0 replies; 47+ messages in thread
From: Palmer Dabbelt @ 2025-02-11 18:09 UTC (permalink / raw)
To: apatel, Conor Dooley
Cc: cleger, Charlie Jenkins, ajones, anup, linux-riscv, linux-kernel,
Paul Walmsley
On Mon, 10 Feb 2025 20:26:32 PST (-0800), apatel@ventanamicro.com wrote:
> On Tue, Feb 11, 2025 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>>
>>
>>
>> On 10/02/2025 21:53, Charlie Jenkins wrote:
>> > On Mon, Feb 10, 2025 at 09:42:25PM +0100, Clément Léger wrote:
>> >>
>> >>
>> >> On 10/02/2025 18:20, Charlie Jenkins wrote:
>> >>> On Mon, Feb 10, 2025 at 03:20:34PM +0100, Clément Léger wrote:
>> >>>>
>> >>>>
>> >>>> On 10/02/2025 15:06, Andrew Jones wrote:
>> >>>>> On Mon, Feb 10, 2025 at 12:07:40PM +0100, Clément Léger wrote:
>> >>>>>>
>> >>>>>>
>> >>>>>> On 10/02/2025 11:16, Anup Patel wrote:
>> >>>>>>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>> >>>>>>>>
>> >>>>>>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>> >>>>>>>>> Probing unaligned accesses on boot is time consuming. Provide a
>> >>>>>>>>> function which will be used to look up the access type in a table
>> >>>>>>>>> by id registers. Vendors which provide table entries can then skip
>> >>>>>>>>> the probing.
>> >>>>>>>>
>> >>>>>>>> The access checker in my experience is only time consuming on slow
>> >>>>>>>> hardware. Hardware that supports fast unaligned accesses isn't really
>> >>>>>>>> impacted by this? Avoiding a list of hardware that has slow/fast
>> >>>>>>>> unaligned accesses in the kernel was the main reason for dynamically
>> >>>>>>>> checking. We did introduce the config option to compile the kernel with
>> >>>>>>>> assumed slow/fast accesses, which of course has the downside of
>> >>>>>>>> recompiling the kernel and I assume that you already considered that.
>> >>>>>>>
>> >>>>>>> The kconfig option does not align with the vision of running the same
>> >>>>>>> kernel image across platforms.
>> >>>>>>
>> >>>>>> I'd would be advocating to remove compile time options as well and use
>> >>>>>> another way to skip the probe (see below).
>> >>>>>>
>> >>>>>>>
>> >>>>>>>>
>> >>>>>>>> Instead of having a table in the kernel, something that would be more
>> >>>>>>>> platform agnostic would be to have an extension that signals this
>> >>>>>>>> information. That seems like it would accomplish the same goal and
>> >>>>>>>> leverage the existing infrastructure in the kernel, albeit with the need
>> >>>>>>>> to make a new extension.
>> >>>>>>>>
>> >>>>>>>
>> >>>>>>> IMO, expecting an ISA extension to be defined for all possible
>> >>>>>>> microarchitectural choices is not going to scale so it is better
>> >>>>>>> to have infrastructure in kernel itself to infer microarchitectural
>> >>>>>>> choices based on RISC-V implementation ID.
>> >>>>>>
>> >>>>>> Since adding an extension seems quite unlikely, and that a device-tree
>> >>>>>> property is likely DT centric and not applicable to ACPI as well, was a
>> >>>>>> command line argument considered ?
>> >>>>>>
>> >>>>>
>> >>>>> I did consider adding a command line option in addition to the table,
>> >>>>> allowing platforms which neither have a table entry [yet] nor want to do
>> >>>>> the speed test, to set whatever they like. In the end, I dropped it, since
>> >>>>> I don't have a use case at this time. However, if we really don't want a
>> >>>>> table, then I can look into the command line option instead.
>> >>>>
>> >>>> Sorry if I wasn't clear, I wasn't considering this as a replacement for
>> >>>> your table but rather as a replacement to Charlie's compile time define
>> >>>> to skip misaligned speed probing since it is like "lpj=<x>". You can
>> >>>> specify it on command line if you want to skip the loop time detection
>> >>>> of loops per jiffies and have faster boot.
>> >>>
>> >>> Jesse sent out a patch for a kernel parameter to set the access speed to
>> >>> whatever is desired [1].
>> >>
>> >> Hey Charlie,
>> >>
>> >> Thanks but it seems you forgot to add the link ?
>> >
>> > Oops, I frequently do that...
>> >
>> > https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/
>> >
>> >>
>> >> Having configuration option + command line option seems like something
>> >> particularly heavy for such feature. The ifdefery/config options
>> >> involved in the misaligned probing code is already quite complicated. If
>> >> another mean to specify the misaligned speed access is added, I think
>> >> all configuration options to set the speed of accesses can then be
>> >> removed and just keep the command line. That will certainly simplify the
>> >> ifdef/config options.
>> >
>> > Yeah that's why it didn't get merged because it felt like overkill. I
>> > responded on the thread to Anup as why I would prefer config options. It
>> > just comes down to config options being required to enable compiler
>> > features. The kernel is only built with rv64gc and usage of all other
>> > extensions requires hand written assembly. There are easy performance
>> > gains when compiling the kernel with rv64gc_zba_zbb_zbkb etc.
>> > Performance focused kernels will need to be recompiled anyway so I am of
>> > the opinion that grouping in other performance features as config
>> > options like this is the easiest thing to do and reduces the amount of
>> > code in the kernel.
>>
>> As answered on the other thread, totally agree, except for the
>> misaligned accesses probing config options ;). Ultimately, we need
>> profiles configuration, either via defconfigs that enables a bunch of
>> optimization via ISA extension or configuration options that groups
>> these config options.
>
> I agree.
>
> Kconfig options for ISA extensions are fine but not for
> misaligned access. The kernel command-line option to skip
> misaligned access probe is fine as well.
IMO it's reasonable to have a Kconfig option that does something like
"build a kernel that assumes misaligned accesses are fast and
supported". We've got a bunch of other platform-level opt ins along
those lines (drivers, extensions, etrrata, memory stuff, etc). It would
almost certainly be a measurable performance boost to enable misaligned
accesses when building Linux, it is for every other big codebase.
> I still think RISC-V
> implementation ID based microarchitecture feature discovery
> will be eventually required as well so why not add it now.
Ya, the probing loops are nasty. We originally tried adding this to the
DT so we didn't have to probe for it, but that got shot down. IMO it's
way cleaner than having tables of vendor IDs in the kernel, maybe if
that's the backup option it'll convince the DT people to change their
minds?
> For profiles, how about having an incremental rva23.config
> similar to 64-bit.config ? This will minimize the duplication
> by re-using the same base defconfig.
The profiles don't really tell us anything, though. They're really just
marketing material, vendors just ignore the actually requirements and
stamp whatever they ship as compliant. If we start adding configs we
should just go with some sort of "support what shipped in 2024" type
definitions, that'd let us Kconfig-off some cruft for supporting old
systems while still being able to actually target something concrete.
>
> Regards,
> Anup
>
>
>
>>
>> Clément
>>
>> >
>> > - Charlie
>> >
>> >>
>> >> Clément
>> >>
>> >>>
>> >>> - Charlie
>> >>>
>> >>>> -}
>> >>>> -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
>> >>>> -static void __init check_unaligned_access_speed_all_cpus(void)
>> >>>> -{
>> >>>> -}
>> >>>> -#endif
>> >>>> -
>> >>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>> >>>> static void check_vector_unaligned_access(struct work_struct *work __always_unused)
>> >>>> {
>> >>>> @@ -370,6 +380,11 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
>> >>>> }
>> >>>> #endif
>> >>>>
>> >>>> +static bool check_vector_unaligned_access_table(void)
>> >>>> +{
>> >>>> + return false;
>> >>>> +}
>> >>>> +
>> >>>> static int riscv_online_cpu_vec(unsigned int cpu)
>> >>>> {
>> >>>> if (!has_vector()) {
>> >>>> @@ -377,6 +392,9 @@ static int riscv_online_cpu_vec(unsigned int cpu)
>> >>>> return 0;
>> >>>> }
>> >>>>
>> >>>> + if (check_vector_unaligned_access_table())
>> >>>> + return 0;
>> >>>> +
>> >>>> #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
>> >>>> if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
>> >>>> return 0;
>> >>>> @@ -392,13 +410,15 @@ static int __init check_unaligned_access_all_cpus(void)
>> >>>> {
>> >>>> int cpu;
>> >>>>
>> >>>> - if (!check_unaligned_access_emulated_all_cpus())
>> >>>> + if (!check_unaligned_access_table() &&
>> >>>> + !check_unaligned_access_emulated_all_cpus())
>> >>>> check_unaligned_access_speed_all_cpus();
>> >>>>
>> >>>> if (!has_vector()) {
>> >>>> for_each_online_cpu(cpu)
>> >>>> per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
>> >>>> - } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
>> >>>> + } else if (!check_vector_unaligned_access_table() &&
>> >>>> + !check_vector_unaligned_access_emulated_all_cpus() &&
>> >>>> IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
>> >>>> kthread_run(vec_check_unaligned_access_speed_all_cpus,
>> >>>> NULL, "vec_check_unaligned_access_speed_all_cpus");
>> >>>
>> >>>>
>> >>>> Regarding your table, it feels like a bit going back to old hardcoded
>> >>>> platform description ;). I think some kind of auto-detection of speed
>> >>>> (not builtin the kernel) for platforms could be good as well to skip
>> >>>> probing.
>> >>>>
>> >>>> A DT property also seems ok to me since the goal is to describe
>> >>>> hardware. Would a common DT/ACPI property be appropriate ? The
>> >>>> device_property API unified both so if we used some common property to
>> >>>> describe the misaligned access speed (both in DT cpu node/ ACPI CPU
>> >>>> device package), we could keep a single parsing method. But I'm no ACPI
>> >>>> expert so I don't know if that really make sense.
>> >>>>
>> >>>> Thanks,
>> >>>>
>> >>>> Clément
>> >>>>
>> >>>>>
>> >>>>> Thanks,
>> >>>>> drew
>> >>>>
>> >>
>>
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 10:16 ` Anup Patel
2025-02-10 11:07 ` Clément Léger
@ 2025-02-10 17:19 ` Charlie Jenkins
2025-02-10 20:37 ` Clément Léger
1 sibling, 1 reply; 47+ messages in thread
From: Charlie Jenkins @ 2025-02-10 17:19 UTC (permalink / raw)
To: Anup Patel
Cc: Andrew Jones, linux-riscv, linux-kernel, paul.walmsley, palmer,
jesse, Anup Patel
On Mon, Feb 10, 2025 at 03:46:46PM +0530, Anup Patel wrote:
> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> > > Probing unaligned accesses on boot is time consuming. Provide a
> > > function which will be used to look up the access type in a table
> > > by id registers. Vendors which provide table entries can then skip
> > > the probing.
> >
> > The access checker in my experience is only time consuming on slow
> > hardware. Hardware that supports fast unaligned accesses isn't really
> > impacted by this? Avoiding a list of hardware that has slow/fast
> > unaligned accesses in the kernel was the main reason for dynamically
> > checking. We did introduce the config option to compile the kernel with
> > assumed slow/fast accesses, which of course has the downside of
> > recompiling the kernel and I assume that you already considered that.
>
> The kconfig option does not align with the vision of running the same
> kernel image across platforms.
I just don't think that vision is realistic.
I am a proponent for compile time defines because ri ght now we are
catering the kernel to both microcontrollers and for high performance
platforms. I am in favor of having a set of configur ations that are
ideal for these microcontrollers and a different set for high
performance platforms. This is where the RVI profile s would ideally
come in, having different configs for different profiles that target low
performance/high performance.
Compiler optimizations for extensions are not possib le to do by just
having these different methods of selecting at runti me. By enabling
extra extensions like the bitmanip extensions during compilation via a
config flag we can optimize the entire kernel. It is not possible to
push all optimizations off to runtime detection.
>
> >
> > Instead of having a table in the kernel, something that would be more
> > platform agnostic would be to have an extension that signals this
> > information. That seems like it would accomplish the same goal and
> > leverage the existing infrastructure in the kernel, albeit with the need
> > to make a new extension.
> >
>
> IMO, expecting an ISA extension to be defined for all possible
> microarchitectural choices is not going to scale so it is better
> to have infrastructure in kernel itself to infer microarchitectural
> choices based on RISC-V implementation ID.
How is keeping tables in the kernel for all microarchitectural details
any more scalable than having extensions that do the same thing? I would
argue that having it in the kernel is less scalable since it needs to be
described for all implementation IDs, and all changes require going
through the kernel review process. Dynamic probing avoids these issues.
Having an extension has the one-time process of getting the extension
into something like a profile, but then anybody could use it without
needing a kernel patch.
- Charlie
>
> Regards,
> Anup
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 17:19 ` Charlie Jenkins
@ 2025-02-10 20:37 ` Clément Léger
2025-02-11 9:04 ` Andrew Jones
0 siblings, 1 reply; 47+ messages in thread
From: Clément Léger @ 2025-02-10 20:37 UTC (permalink / raw)
To: Charlie Jenkins, Anup Patel
Cc: Andrew Jones, linux-riscv, linux-kernel, paul.walmsley, palmer,
jesse, Anup Patel
On 10/02/2025 18:19, Charlie Jenkins wrote:
> On Mon, Feb 10, 2025 at 03:46:46PM +0530, Anup Patel wrote:
>> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>
>>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
>>>> Probing unaligned accesses on boot is time consuming. Provide a
>>>> function which will be used to look up the access type in a table
>>>> by id registers. Vendors which provide table entries can then skip
>>>> the probing.
>>>
>>> The access checker in my experience is only time consuming on slow
>>> hardware. Hardware that supports fast unaligned accesses isn't really
>>> impacted by this? Avoiding a list of hardware that has slow/fast
>>> unaligned accesses in the kernel was the main reason for dynamically
>>> checking. We did introduce the config option to compile the kernel with
>>> assumed slow/fast accesses, which of course has the downside of
>>> recompiling the kernel and I assume that you already considered that.
>>
>> The kconfig option does not align with the vision of running the same
>> kernel image across platforms.
>
> I just don't think that vision is realistic.
>
> I am a proponent for compile time defines because ri ght now we are
> catering the kernel to both microcontrollers and for high performance
> platforms. I am in favor of having a set of configur ations that are
> ideal for these microcontrollers and a different set for high
> performance platforms. This is where the RVI profile s would ideally
> come in, having different configs for different profiles that target low
> performance/high performance.
>
> Compiler optimizations for extensions are not possib le to do by just
> having these different methods of selecting at runti me. By enabling
> extra extensions like the bitmanip extensions during compilation via a
> config flag we can optimize the entire kernel. It is not possible to
> push all optimizations off to runtime detection.
While this might be true for the bitmanip extension and other extensions
that the compiler can take advantage of, that isn't true for the
misaligned speed probing code. The only meaningful misaligned access
configuration option for kernel "speed" optimization is
RISCV_EFFICIENT_UNALIGNED_ACCESS (which is ironically not easily
selectable since it is under NON_PORTABLE).
Currently all the config options that have been added around misaligned
support "just" allows to get rid of the probing code at compile time and
set a predefined speed. That does not really improve the kernel
performance itself, just allows for a faster boot. That could as well be
supported using a command line option as suggested.
That being said, I agree that some additional configuration options
should be added to enable additional extension support at compile time,
enabling a faster kernel. Having a single image for all hardware is as
you said not realistic but profiles configuration as you proposed might
be the key for this support.
Clément
>
>>
>>>
>>> Instead of having a table in the kernel, something that would be more
>>> platform agnostic would be to have an extension that signals this
>>> information. That seems like it would accomplish the same goal and
>>> leverage the existing infrastructure in the kernel, albeit with the need
>>> to make a new extension.
>>>
>>
>> IMO, expecting an ISA extension to be defined for all possible
>> microarchitectural choices is not going to scale so it is better
>> to have infrastructure in kernel itself to infer microarchitectural
>> choices based on RISC-V implementation ID.
>
> How is keeping tables in the kernel for all microarchitectural details
> any more scalable than having extensions that do the same thing? I would
> argue that having it in the kernel is less scalable since it needs to be
> described for all implementation IDs, and all changes require going
> through the kernel review process. Dynamic probing avoids these issues.
> Having an extension has the one-time process of getting the extension
> into something like a profile, but then anybody could use it without
> needing a kernel patch.
>
> - Charlie
>
>>
>> Regards,
>> Anup
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 7/9] riscv: Prepare for unaligned access type table lookups
2025-02-10 20:37 ` Clément Léger
@ 2025-02-11 9:04 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-11 9:04 UTC (permalink / raw)
To: Clément Léger
Cc: Charlie Jenkins, Anup Patel, linux-riscv, linux-kernel,
paul.walmsley, palmer, jesse, Anup Patel
On Mon, Feb 10, 2025 at 09:37:10PM +0100, Clément Léger wrote:
>
>
> On 10/02/2025 18:19, Charlie Jenkins wrote:
> > On Mon, Feb 10, 2025 at 03:46:46PM +0530, Anup Patel wrote:
> >> On Sat, Feb 8, 2025 at 6:53 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>>
> >>> On Fri, Feb 07, 2025 at 05:19:47PM +0100, Andrew Jones wrote:
> >>>> Probing unaligned accesses on boot is time consuming. Provide a
> >>>> function which will be used to look up the access type in a table
> >>>> by id registers. Vendors which provide table entries can then skip
> >>>> the probing.
> >>>
> >>> The access checker in my experience is only time consuming on slow
> >>> hardware. Hardware that supports fast unaligned accesses isn't really
> >>> impacted by this? Avoiding a list of hardware that has slow/fast
> >>> unaligned accesses in the kernel was the main reason for dynamically
> >>> checking. We did introduce the config option to compile the kernel with
> >>> assumed slow/fast accesses, which of course has the downside of
> >>> recompiling the kernel and I assume that you already considered that.
> >>
> >> The kconfig option does not align with the vision of running the same
> >> kernel image across platforms.
> >
> > I just don't think that vision is realistic.
> >
> > I am a proponent for compile time defines because ri ght now we are
> > catering the kernel to both microcontrollers and for high performance
> > platforms. I am in favor of having a set of configur ations that are
> > ideal for these microcontrollers and a different set for high
> > performance platforms. This is where the RVI profile s would ideally
> > come in, having different configs for different profiles that target low
> > performance/high performance.
> >
> > Compiler optimizations for extensions are not possib le to do by just
> > having these different methods of selecting at runti me. By enabling
> > extra extensions like the bitmanip extensions during compilation via a
> > config flag we can optimize the entire kernel. It is not possible to
> > push all optimizations off to runtime detection.
>
> While this might be true for the bitmanip extension and other extensions
> that the compiler can take advantage of, that isn't true for the
> misaligned speed probing code. The only meaningful misaligned access
> configuration option for kernel "speed" optimization is
> RISCV_EFFICIENT_UNALIGNED_ACCESS (which is ironically not easily
> selectable since it is under NON_PORTABLE).
>
> Currently all the config options that have been added around misaligned
> support "just" allows to get rid of the probing code at compile time and
> set a predefined speed. That does not really improve the kernel
> performance itself, just allows for a faster boot. That could as well be
> supported using a command line option as suggested.
I agree. I'll look into stripping config options and picking up Jesse's
command line option for v2. I'll also drop the table approach of this
series since I don't have a strong enough justification for it over the
command line at this time.
Thanks,
drew
>
> That being said, I agree that some additional configuration options
> should be added to enable additional extension support at compile time,
> enabling a faster kernel. Having a single image for all hardware is as
> you said not realistic but profiles configuration as you proposed might
> be the key for this support.
>
> Clément
>
> >
> >>
> >>>
> >>> Instead of having a table in the kernel, something that would be more
> >>> platform agnostic would be to have an extension that signals this
> >>> information. That seems like it would accomplish the same goal and
> >>> leverage the existing infrastructure in the kernel, albeit with the need
> >>> to make a new extension.
> >>>
> >>
> >> IMO, expecting an ISA extension to be defined for all possible
> >> microarchitectural choices is not going to scale so it is better
> >> to have infrastructure in kernel itself to infer microarchitectural
> >> choices based on RISC-V implementation ID.
> >
> > How is keeping tables in the kernel for all microarchitectural details
> > any more scalable than having extensions that do the same thing? I would
> > argue that having it in the kernel is less scalable since it needs to be
> > described for all implementation IDs, and all changes require going
> > through the kernel review process. Dynamic probing avoids these issues.
> > Having an extension has the one-time process of getting the extension
> > into something like a profile, but then anybody could use it without
> > needing a kernel patch.
> >
> > - Charlie
> >
> >>
> >> Regards,
> >> Anup
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH 8/9] riscv: Implement check_unaligned_access_table
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (6 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 7/9] riscv: Prepare for unaligned access type table lookups Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-07 16:19 ` [PATCH 9/9] riscv: Add Ventana unaligned access table entries Andrew Jones
2025-02-08 7:59 ` [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Anup Patel
9 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
Define the table entry type and implement the table lookup to find
unaligned access types by id registers which is used to skip probing.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/kernel/unaligned_access_speed.c | 91 +++++++++++++++++++++-
1 file changed, 89 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index f8497097e79d..bd6db4c42daf 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
+#include <asm/sbi.h>
#include <asm/vector.h>
#include "copy-unaligned.h"
@@ -230,11 +231,89 @@ static int __init lock_and_set_unaligned_access_static_branch(void)
arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
-static bool check_unaligned_access_table(void)
+/*
+ * An unaligned_access_table_entry maps harts (or collections of harts) to
+ * unaligned access types. @level is used to determine whether @marchid and/or
+ * @mimpid should to be considered. All (level, mvendorid, marchid, mimpid)
+ * tuples formed from each table entry must be unique.
+ */
+enum id_level {
+ LEVEL_VENDOR,
+ LEVEL_ARCH,
+ LEVEL_IMP,
+};
+struct unaligned_access_table_entry {
+ enum id_level level;
+ u32 mvendorid;
+ ulong marchid;
+ ulong mimpid;
+ long type;
+};
+
+static struct unaligned_access_table_entry unaligned_access_table_entries[] = {
+};
+
+/*
+ * Search unaligned_access_table_entries[] for the most specific match,
+ * i.e. if there are two entries, one with mvendorid = V and level = VENDOR
+ * and another with mvendorid = V, level = ARCH, and marchid = A, then
+ * a hart with {V,A,?} will match the latter while a hart with {V,!A,?}
+ * will match the former.
+ */
+static bool __check_unaligned_access_table(int cpu, long *ptr, int nr_entries,
+ struct unaligned_access_table_entry table[])
{
+ struct unaligned_access_table_entry *entry, *match = NULL;
+ u32 mvendorid = riscv_cached_mvendorid(cpu);
+ ulong marchid = riscv_cached_marchid(cpu);
+ ulong mimpid = riscv_cached_mimpid(cpu);
+ int i;
+
+ for (i = 0; i < nr_entries; ++i) {
+ entry = &table[i];
+
+ switch (entry->level) {
+ case LEVEL_VENDOR:
+ if (!match && entry->mvendorid == mvendorid) {
+ /* The match, unless we find an ARCH or IMP level match. */
+ match = entry;
+ }
+ break;
+ case LEVEL_ARCH:
+ if (entry->mvendorid == mvendorid && entry->marchid == marchid) {
+ /* The match, unless we find an IMP level match. */
+ match = entry;
+ }
+ break;
+ case LEVEL_IMP:
+ if (entry->mvendorid == mvendorid && entry->marchid == marchid &&
+ entry->mimpid == mimpid) {
+ match = entry;
+ goto matched;
+ }
+ break;
+ }
+ }
+
+ if (match) {
+matched:
+ *ptr = match->type;
+ return true;
+ }
+
return false;
}
+static bool check_unaligned_access_table(void)
+{
+ int cpu = smp_processor_id();
+ long *ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+
+ return __check_unaligned_access_table(cpu, ptr,
+ ARRAY_SIZE(unaligned_access_table_entries),
+ unaligned_access_table_entries);
+}
+
static int riscv_online_cpu(unsigned int cpu)
{
if (check_unaligned_access_table())
@@ -380,9 +459,17 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
}
#endif
+static struct unaligned_access_table_entry vec_unaligned_access_table_entries[] = {
+};
+
static bool check_vector_unaligned_access_table(void)
{
- return false;
+ int cpu = smp_processor_id();
+ long *ptr = per_cpu_ptr(&vector_misaligned_access, cpu);
+
+ return __check_unaligned_access_table(cpu, ptr,
+ ARRAY_SIZE(vec_unaligned_access_table_entries),
+ vec_unaligned_access_table_entries);
}
static int riscv_online_cpu_vec(unsigned int cpu)
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* [PATCH 9/9] riscv: Add Ventana unaligned access table entries
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (7 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 8/9] riscv: Implement check_unaligned_access_table Andrew Jones
@ 2025-02-07 16:19 ` Andrew Jones
2025-02-08 7:59 ` [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Anup Patel
9 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-07 16:19 UTC (permalink / raw)
To: linux-riscv, linux-kernel
Cc: paul.walmsley, palmer, charlie, jesse, Anup Patel
Ventana harts always have fast unaligned access speeds, so skip the
unnecessary probing.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/include/asm/vendorid_list.h | 1 +
arch/riscv/kernel/unaligned_access_speed.c | 3 +++
2 files changed, 4 insertions(+)
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index a5150cdf34d8..8dd55a847893 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -9,5 +9,6 @@
#define MICROCHIP_VENDOR_ID 0x029
#define SIFIVE_VENDOR_ID 0x489
#define THEAD_VENDOR_ID 0x5b7
+#define VENTANA_VENDOR_ID 0x61f
#endif
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index bd6db4c42daf..ff9905274c60 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -14,6 +14,7 @@
#include <asm/hwprobe.h>
#include <asm/sbi.h>
#include <asm/vector.h>
+#include <asm/vendorid_list.h>
#include "copy-unaligned.h"
@@ -251,6 +252,7 @@ struct unaligned_access_table_entry {
};
static struct unaligned_access_table_entry unaligned_access_table_entries[] = {
+ { LEVEL_VENDOR, VENTANA_VENDOR_ID, 0, 0, RISCV_HWPROBE_MISALIGNED_SCALAR_FAST },
};
/*
@@ -460,6 +462,7 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
#endif
static struct unaligned_access_table_entry vec_unaligned_access_table_entries[] = {
+ { LEVEL_VENDOR, VENTANA_VENDOR_ID, 0, 0, RISCV_HWPROBE_MISALIGNED_VECTOR_FAST },
};
static bool check_vector_unaligned_access_table(void)
--
2.48.1
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping
2025-02-07 16:19 [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Andrew Jones
` (8 preceding siblings ...)
2025-02-07 16:19 ` [PATCH 9/9] riscv: Add Ventana unaligned access table entries Andrew Jones
@ 2025-02-08 7:59 ` Anup Patel
2025-02-10 9:26 ` Andrew Jones
9 siblings, 1 reply; 47+ messages in thread
From: Anup Patel @ 2025-02-08 7:59 UTC (permalink / raw)
To: Andrew Jones
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse
On Fri, Feb 7, 2025 at 9:49 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> The first six patches of this series are fixes and cleanups of the
> unaligned access speed probing code. The next two patches introduce
> support to skip probing by matching vendor/arch/imp ids and checking a
> table for the access speed type. The last patch applies the new skip
> support to Ventana harts.
Alternatively, we can also skip probing misaligned access when Zicclsm
extension is present in the ISA string. The Zicclsm extension is defined
as part of the ratified RVA23 profile.
Regards,
Anup
>
> (I'd be happy to split the fixes from the new skip support if we want to
> discuss the skip support independently, but I want to base on the fixes
> and I'm not sure if patchwork supports Based-on: $MESSAGE_ID/$LORE_URL
> or not at the moment, so I'm just posting together for now in order to
> be able to check for my patchwork green lights!)
>
> Thanks,
> drew
>
> Andrew Jones (9):
> riscv: Annotate unaligned access init functions
> riscv: Fix riscv_online_cpu_vec
> riscv: Fix check_unaligned_access_all_cpus
> riscv: Change check_unaligned_access_speed_all_cpus to void
> riscv: Fix set up of cpu hotplug callbacks
> riscv: Fix set up of vector cpu hotplug callback
> riscv: Prepare for unaligned access type table lookups
> riscv: Implement check_unaligned_access_table
> riscv: Add Ventana unaligned access table entries
>
> arch/riscv/include/asm/cpufeature.h | 4 +-
> arch/riscv/include/asm/vendorid_list.h | 1 +
> arch/riscv/kernel/traps_misaligned.c | 14 +-
> arch/riscv/kernel/unaligned_access_speed.c | 278 ++++++++++++++-------
> 4 files changed, 200 insertions(+), 97 deletions(-)
>
> --
> 2.48.1
>
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping
2025-02-08 7:59 ` [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping Anup Patel
@ 2025-02-10 9:26 ` Andrew Jones
2025-02-10 9:58 ` Anup Patel
0 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2025-02-10 9:26 UTC (permalink / raw)
To: Anup Patel
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse
On Sat, Feb 08, 2025 at 01:29:42PM +0530, Anup Patel wrote:
> On Fri, Feb 7, 2025 at 9:49 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > The first six patches of this series are fixes and cleanups of the
> > unaligned access speed probing code. The next two patches introduce
> > support to skip probing by matching vendor/arch/imp ids and checking a
> > table for the access speed type. The last patch applies the new skip
> > support to Ventana harts.
>
> Alternatively, we can also skip probing misaligned access when Zicclsm
> extension is present in the ISA string. The Zicclsm extension is defined
> as part of the ratified RVA23 profile.
The definition of Zicclsm doesn't explicitly state that misaligned word
accesses will be faster than byte accesses to the same addresses. There's
also this spec issue[1] which appears to state that Zicclsm cannot be used
to infer fast misaligned accesses.
But, like Charlie suggests, maybe we should advocate the creation of an
extension (or "named feature") which allows specifically advertising that
misaligned accesses are fast.
[1] https://github.com/riscv/riscv-isa-manual/issues/1611
Thanks,
drew
>
> Regards,
> Anup
>
> >
> > (I'd be happy to split the fixes from the new skip support if we want to
> > discuss the skip support independently, but I want to base on the fixes
> > and I'm not sure if patchwork supports Based-on: $MESSAGE_ID/$LORE_URL
> > or not at the moment, so I'm just posting together for now in order to
> > be able to check for my patchwork green lights!)
> >
> > Thanks,
> > drew
> >
> > Andrew Jones (9):
> > riscv: Annotate unaligned access init functions
> > riscv: Fix riscv_online_cpu_vec
> > riscv: Fix check_unaligned_access_all_cpus
> > riscv: Change check_unaligned_access_speed_all_cpus to void
> > riscv: Fix set up of cpu hotplug callbacks
> > riscv: Fix set up of vector cpu hotplug callback
> > riscv: Prepare for unaligned access type table lookups
> > riscv: Implement check_unaligned_access_table
> > riscv: Add Ventana unaligned access table entries
> >
> > arch/riscv/include/asm/cpufeature.h | 4 +-
> > arch/riscv/include/asm/vendorid_list.h | 1 +
> > arch/riscv/kernel/traps_misaligned.c | 14 +-
> > arch/riscv/kernel/unaligned_access_speed.c | 278 ++++++++++++++-------
> > 4 files changed, 200 insertions(+), 97 deletions(-)
> >
> > --
> > 2.48.1
> >
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping
2025-02-10 9:26 ` Andrew Jones
@ 2025-02-10 9:58 ` Anup Patel
2025-02-10 11:01 ` Andrew Jones
0 siblings, 1 reply; 47+ messages in thread
From: Anup Patel @ 2025-02-10 9:58 UTC (permalink / raw)
To: Andrew Jones
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse
On Mon, Feb 10, 2025 at 2:56 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Sat, Feb 08, 2025 at 01:29:42PM +0530, Anup Patel wrote:
> > On Fri, Feb 7, 2025 at 9:49 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > The first six patches of this series are fixes and cleanups of the
> > > unaligned access speed probing code. The next two patches introduce
> > > support to skip probing by matching vendor/arch/imp ids and checking a
> > > table for the access speed type. The last patch applies the new skip
> > > support to Ventana harts.
> >
> > Alternatively, we can also skip probing misaligned access when Zicclsm
> > extension is present in the ISA string. The Zicclsm extension is defined
> > as part of the ratified RVA23 profile.
>
> The definition of Zicclsm doesn't explicitly state that misaligned word
> accesses will be faster than byte accesses to the same addresses. There's
> also this spec issue[1] which appears to state that Zicclsm cannot be used
> to infer fast misaligned accesses.
>
> But, like Charlie suggests, maybe we should advocate the creation of an
> extension (or "named feature") which allows specifically advertising that
> misaligned accesses are fast.
>
> [1] https://github.com/riscv/riscv-isa-manual/issues/1611
I am not sure when such an extension would show up so for now
skipping unaligned tests based on implementation ID seems
reasonable.
Also, it seems this series is totally skipping the existing boot-time
print for fast unaligned access. Please try to keep the boot-time
print in some form.
Regards,
Anup
>
> Thanks,
> drew
>
> >
> > Regards,
> > Anup
> >
> > >
> > > (I'd be happy to split the fixes from the new skip support if we want to
> > > discuss the skip support independently, but I want to base on the fixes
> > > and I'm not sure if patchwork supports Based-on: $MESSAGE_ID/$LORE_URL
> > > or not at the moment, so I'm just posting together for now in order to
> > > be able to check for my patchwork green lights!)
> > >
> > > Thanks,
> > > drew
> > >
> > > Andrew Jones (9):
> > > riscv: Annotate unaligned access init functions
> > > riscv: Fix riscv_online_cpu_vec
> > > riscv: Fix check_unaligned_access_all_cpus
> > > riscv: Change check_unaligned_access_speed_all_cpus to void
> > > riscv: Fix set up of cpu hotplug callbacks
> > > riscv: Fix set up of vector cpu hotplug callback
> > > riscv: Prepare for unaligned access type table lookups
> > > riscv: Implement check_unaligned_access_table
> > > riscv: Add Ventana unaligned access table entries
> > >
> > > arch/riscv/include/asm/cpufeature.h | 4 +-
> > > arch/riscv/include/asm/vendorid_list.h | 1 +
> > > arch/riscv/kernel/traps_misaligned.c | 14 +-
> > > arch/riscv/kernel/unaligned_access_speed.c | 278 ++++++++++++++-------
> > > 4 files changed, 200 insertions(+), 97 deletions(-)
> > >
> > > --
> > > 2.48.1
> > >
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH 0/9] riscv: Unaligned access speed probing fixes and skipping
2025-02-10 9:58 ` Anup Patel
@ 2025-02-10 11:01 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2025-02-10 11:01 UTC (permalink / raw)
To: Anup Patel
Cc: linux-riscv, linux-kernel, paul.walmsley, palmer, charlie, jesse
On Mon, Feb 10, 2025 at 03:28:13PM +0530, Anup Patel wrote:
> On Mon, Feb 10, 2025 at 2:56 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Sat, Feb 08, 2025 at 01:29:42PM +0530, Anup Patel wrote:
> > > On Fri, Feb 7, 2025 at 9:49 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > > >
> > > > The first six patches of this series are fixes and cleanups of the
> > > > unaligned access speed probing code. The next two patches introduce
> > > > support to skip probing by matching vendor/arch/imp ids and checking a
> > > > table for the access speed type. The last patch applies the new skip
> > > > support to Ventana harts.
> > >
> > > Alternatively, we can also skip probing misaligned access when Zicclsm
> > > extension is present in the ISA string. The Zicclsm extension is defined
> > > as part of the ratified RVA23 profile.
> >
> > The definition of Zicclsm doesn't explicitly state that misaligned word
> > accesses will be faster than byte accesses to the same addresses. There's
> > also this spec issue[1] which appears to state that Zicclsm cannot be used
> > to infer fast misaligned accesses.
> >
> > But, like Charlie suggests, maybe we should advocate the creation of an
> > extension (or "named feature") which allows specifically advertising that
> > misaligned accesses are fast.
> >
> > [1] https://github.com/riscv/riscv-isa-manual/issues/1611
>
> I am not sure when such an extension would show up so for now
> skipping unaligned tests based on implementation ID seems
> reasonable.
>
> Also, it seems this series is totally skipping the existing boot-time
> print for fast unaligned access. Please try to keep the boot-time
> print in some form.
Sure. We have hwprobe, but now that people are likely used to seeing it
in dmesg, then we should probably keep something there.
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread