* [PATCH] riscv: drop __init from vec_check_unaligned_access_speed_all_cpus
@ 2026-06-12 16:24 ` Anirudh Srinivasan
0 siblings, 0 replies; 4+ messages in thread
From: Anirudh Srinivasan @ 2026-06-12 16:24 UTC (permalink / raw)
To: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Andrew Jones
Cc: linux-riscv, linux-kernel, andrew.jones, ganboing,
Anirudh Srinivasan
This function runs within a kthread and need not necessarily finish
before system finishes boot and free_initmem() unmaps the .init.text
section. This function makes calls to SBI for probing unaligned access
speed, and if this is slow for some reason (say some debug prints were
added to SBI), the kthread can still be running at this point and result
in an instruction page fault when trying to fetch from the freed region.
[ 25.642087] Unable to handle kernel paging request at virtual address ffffffff80a04ef8
[ 25.646694] Current vec_check_unali pgtable: 4K pagesize, 48-bit VAs, pgdp=0x00004000316e9000
[ 25.653170] [ffffffff80a04ef8] pgd=000010004be7e401, p4d=000010004be7e401, pud=000010004be7e001, pmd=000010000c3000e3
[ 25.661244] Oops [#1]
[ 25.662997] Modules linked in:
[ 25.665357] CPU: 3 UID: 0 PID: 42 Comm: vec_check_unali Not tainted 7.0.0-tt-blackhole-asrinivasan-00007-g30ff73f18211 #570 PREEMPTLAZY
[ 25.674669] Hardware name: Tenstorrent Blackhole (DT)
[ 25.678545] epc : vec_check_unaligned_access_speed_all_cpus+0x18/0x2c
[ 25.683458] ra : vec_check_unaligned_access_speed_all_cpus+0x18/0x2c
[ 25.688372] epc : ffffffff80a04ef8 ra : ffffffff80a04ef8 sp : ffff8f8000203e20
[ 25.693874] gp : ffffffff814dc168 tp : ffffaf8001ad9900 t0 : 0000000000000000
[ 25.699401] t1 : fffffffffffffff0 t2 : ffffaf8001ad9a10 s0 : ffff8f8000203e30
[ 25.704912] s1 : ffffaf80018dc780 a0 : 0000000000000000 a1 : 0000000000000002
[ 25.710407] a2 : 00000000000001f0 a3 : 0000000000000018 a4 : 0000000000000000
[ 25.715917] a5 : 0000000000000000 a6 : ffffaf8001c03d98 a7 : ffffaf8001c03e30
[ 25.721419] s2 : ffff8f8000023c98 s3 : ffffaf8001aa1240 s4 : ffffffff80a04ee0
[ 25.726937] s5 : 0000000000000000 s6 : 0000000000000000 s7 : 0000000000000000
[ 25.732450] s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000000000000000
[ 25.737944] s11: 0000000000000000 t3 : 0000000000000002 t4 : 0000000000000402
[ 25.743481] t5 : 0000000000000040 t6 : 0000000000000004 ssp : 0000000000000000
[ 25.749024] status: 0000000200000120 badaddr: ffffffff80a04ef8 cause: 000000000000000c
[ 25.755060] [<ffffffff80a04ef8>] vec_check_unaligned_access_speed_all_cpus+0x18/0x2c
[ 25.760964] [<ffffffff80047a10>] kthread+0xd8/0xfc
[ 25.764660] [<ffffffff80010c48>] ret_from_fork_kernel+0x18/0x1c4
[ 25.769220] [<ffffffff80895fe6>] ret_from_fork_kernel_asm+0x16/0x18
[ 25.774018] Code: cccc cccc cccc cccc cccc cccc cccc cccc cccc cccc (cccc) cccc
Drop __init from its signature so that this doesn't happen.
Fixes: a00e022be531 ("riscv: Annotate unaligned access init functions")
Signed-off-by: Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com>
Assisted-by: Claude:claude-opus-4-6
---
arch/riscv/kernel/unaligned_access_speed.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 485ab1d105d36..366baca73dc27 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -325,7 +325,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
schedule_on_each_cpu(check_vector_unaligned_access);
riscv_hwprobe_complete_async_probe();
@@ -333,7 +333,7 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
return 0;
}
#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
return 0;
}
---
base-commit: 863f9b49d618e02e3ecadeb89ebd39064ba8c2fd
change-id: 20260612-vec_unaligned_drop_init-c7af506a375e
Best regards,
--
Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH] riscv: drop __init from vec_check_unaligned_access_speed_all_cpus
@ 2026-06-12 16:24 ` Anirudh Srinivasan
0 siblings, 0 replies; 4+ messages in thread
From: Anirudh Srinivasan @ 2026-06-12 16:24 UTC (permalink / raw)
To: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Andrew Jones
Cc: linux-riscv, linux-kernel, andrew.jones, ganboing,
Anirudh Srinivasan
This function runs within a kthread and need not necessarily finish
before system finishes boot and free_initmem() unmaps the .init.text
section. This function makes calls to SBI for probing unaligned access
speed, and if this is slow for some reason (say some debug prints were
added to SBI), the kthread can still be running at this point and result
in an instruction page fault when trying to fetch from the freed region.
[ 25.642087] Unable to handle kernel paging request at virtual address ffffffff80a04ef8
[ 25.646694] Current vec_check_unali pgtable: 4K pagesize, 48-bit VAs, pgdp=0x00004000316e9000
[ 25.653170] [ffffffff80a04ef8] pgd=000010004be7e401, p4d=000010004be7e401, pud=000010004be7e001, pmd=000010000c3000e3
[ 25.661244] Oops [#1]
[ 25.662997] Modules linked in:
[ 25.665357] CPU: 3 UID: 0 PID: 42 Comm: vec_check_unali Not tainted 7.0.0-tt-blackhole-asrinivasan-00007-g30ff73f18211 #570 PREEMPTLAZY
[ 25.674669] Hardware name: Tenstorrent Blackhole (DT)
[ 25.678545] epc : vec_check_unaligned_access_speed_all_cpus+0x18/0x2c
[ 25.683458] ra : vec_check_unaligned_access_speed_all_cpus+0x18/0x2c
[ 25.688372] epc : ffffffff80a04ef8 ra : ffffffff80a04ef8 sp : ffff8f8000203e20
[ 25.693874] gp : ffffffff814dc168 tp : ffffaf8001ad9900 t0 : 0000000000000000
[ 25.699401] t1 : fffffffffffffff0 t2 : ffffaf8001ad9a10 s0 : ffff8f8000203e30
[ 25.704912] s1 : ffffaf80018dc780 a0 : 0000000000000000 a1 : 0000000000000002
[ 25.710407] a2 : 00000000000001f0 a3 : 0000000000000018 a4 : 0000000000000000
[ 25.715917] a5 : 0000000000000000 a6 : ffffaf8001c03d98 a7 : ffffaf8001c03e30
[ 25.721419] s2 : ffff8f8000023c98 s3 : ffffaf8001aa1240 s4 : ffffffff80a04ee0
[ 25.726937] s5 : 0000000000000000 s6 : 0000000000000000 s7 : 0000000000000000
[ 25.732450] s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000000000000000
[ 25.737944] s11: 0000000000000000 t3 : 0000000000000002 t4 : 0000000000000402
[ 25.743481] t5 : 0000000000000040 t6 : 0000000000000004 ssp : 0000000000000000
[ 25.749024] status: 0000000200000120 badaddr: ffffffff80a04ef8 cause: 000000000000000c
[ 25.755060] [<ffffffff80a04ef8>] vec_check_unaligned_access_speed_all_cpus+0x18/0x2c
[ 25.760964] [<ffffffff80047a10>] kthread+0xd8/0xfc
[ 25.764660] [<ffffffff80010c48>] ret_from_fork_kernel+0x18/0x1c4
[ 25.769220] [<ffffffff80895fe6>] ret_from_fork_kernel_asm+0x16/0x18
[ 25.774018] Code: cccc cccc cccc cccc cccc cccc cccc cccc cccc cccc (cccc) cccc
Drop __init from its signature so that this doesn't happen.
Fixes: a00e022be531 ("riscv: Annotate unaligned access init functions")
Signed-off-by: Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com>
Assisted-by: Claude:claude-opus-4-6
---
arch/riscv/kernel/unaligned_access_speed.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 485ab1d105d36..366baca73dc27 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -325,7 +325,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
schedule_on_each_cpu(check_vector_unaligned_access);
riscv_hwprobe_complete_async_probe();
@@ -333,7 +333,7 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway
return 0;
}
#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
return 0;
}
---
base-commit: 863f9b49d618e02e3ecadeb89ebd39064ba8c2fd
change-id: 20260612-vec_unaligned_drop_init-c7af506a375e
Best regards,
--
Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com>
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] riscv: drop __init from vec_check_unaligned_access_speed_all_cpus
2026-06-12 16:24 ` Anirudh Srinivasan
@ 2026-06-12 21:23 ` Nam Cao
-1 siblings, 0 replies; 4+ messages in thread
From: Nam Cao @ 2026-06-12 21:23 UTC (permalink / raw)
To: Anirudh Srinivasan, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Alexandre Ghiti, Andrew Jones
Cc: linux-riscv, linux-kernel, andrew.jones, ganboing,
Anirudh Srinivasan
Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com> writes:
> This function runs within a kthread and need not necessarily finish
> before system finishes boot and free_initmem() unmaps the .init.text
> section. This function makes calls to SBI for probing unaligned access
> speed, and if this is slow for some reason (say some debug prints were
> added to SBI), the kthread can still be running at this point and result
> in an instruction page fault when trying to fetch from the freed region.
...
> Drop __init from its signature so that this doesn't happen.
That should work.
But we should really take a step back and reconsider whether running the
vector access speed probe in a kthread is really a good idea.
We have a problem in the past that the kthread may not complete before
user reads vdso, and user gets incorrect values. That was addressed by
5d15d2ad36b0 ("riscv: hwprobe: Fix stale vDSO data for late-initialized
keys at boot") which complicates things.
And now you discover another issue.
The motivation for using a kthread is to avoid boot time slow down. But
this has been bothering me for quite a while now, because I am not sure
if using kthread really speeds things up. Sooner or later, the kthread
has to run. If it runs before the kernel is done booting, then the boot
is even slower due to overhead of the kthread. If it runs after the
kernel finishes booting, then we run into these kinds of
headache. Unfortunately I do not have a riscv cpu with vector to confirm
my suspicion.
Furthermore, the vector access speed probe takes the same amount of time
as scalar access speed probe. The scalar one is done without any
kthread, and no one ever complained about boot time issue (well, someone
did complain but that has nothing to do with kthread. Their 64-core (?)
system is slower because the probe was done serially, and we switched to
parallel probe and it was fine).
So I think we should really get rid of that kthread entirely, the
headache is not worth. That also allows reverting 5d15d2ad36b0 ("riscv:
hwprobe: Fix stale vDSO data for late-initialized keys at boot"), making
the code simplier.
Below is a patch that has only been tested with qemu. It reverts the
mentioned commit and removes the kthread.
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 8c572a464719..2f278c395af9 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -42,11 +42,4 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
return pair->value == other_pair->value;
}
-#ifdef CONFIG_MMU
-void riscv_hwprobe_register_async_probe(void);
-void riscv_hwprobe_complete_async_probe(void);
-#else
-static inline void riscv_hwprobe_register_async_probe(void) {}
-static inline void riscv_hwprobe_complete_async_probe(void) {}
-#endif
#endif
diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h
index 88b37af55175..da57a3786f7a 100644
--- a/arch/riscv/include/asm/vdso/arch_data.h
+++ b/arch/riscv/include/asm/vdso/arch_data.h
@@ -12,12 +12,6 @@ struct vdso_arch_data {
/* Boolean indicating all CPUs have the same static hwprobe values. */
__u8 homogeneous_cpus;
-
- /*
- * A gate to check and see if the hwprobe data is actually ready, as
- * probing is deferred to avoid boot slowdowns.
- */
- __u8 ready;
};
#endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 0f701ace3bb9..f3ed4fd396fb 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -5,9 +5,6 @@
* more details.
*/
#include <linux/syscalls.h>
-#include <linux/completion.h>
-#include <linux/atomic.h>
-#include <linux/once.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
@@ -470,32 +467,28 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
return 0;
}
-#ifdef CONFIG_MMU
-
-static DECLARE_COMPLETION(boot_probes_done);
-static atomic_t pending_boot_probes = ATOMIC_INIT(1);
-
-void riscv_hwprobe_register_async_probe(void)
+static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
+ size_t pair_count, size_t cpusetsize,
+ unsigned long __user *cpus_user,
+ unsigned int flags)
{
- atomic_inc(&pending_boot_probes);
-}
+ if (flags & RISCV_HWPROBE_WHICH_CPUS)
+ return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
+ cpus_user, flags);
-void riscv_hwprobe_complete_async_probe(void)
-{
- if (atomic_dec_and_test(&pending_boot_probes))
- complete(&boot_probes_done);
+ return hwprobe_get_values(pairs, pair_count, cpusetsize,
+ cpus_user, flags);
}
-static int complete_hwprobe_vdso_data(void)
+#ifdef CONFIG_MMU
+
+static int __init init_hwprobe_vdso_data(void)
{
struct vdso_arch_data *avd = vdso_k_arch_data;
u64 id_bitsmash = 0;
struct riscv_hwprobe pair;
int key;
- if (unlikely(!atomic_dec_and_test(&pending_boot_probes)))
- wait_for_completion(&boot_probes_done);
-
/*
* Initialize vDSO data with the answers for the "all CPUs" case, to
* save a syscall in the common case.
@@ -523,52 +516,13 @@ static int complete_hwprobe_vdso_data(void)
* vDSO should defer to the kernel for exotic cpu masks.
*/
avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
-
- /*
- * Make sure all the VDSO values are visible before we look at them.
- * This pairs with the implicit "no speculativly visible accesses"
- * barrier in the VDSO hwprobe code.
- */
- smp_wmb();
- avd->ready = true;
- return 0;
-}
-
-static int __init init_hwprobe_vdso_data(void)
-{
- struct vdso_arch_data *avd = vdso_k_arch_data;
-
- /*
- * Prevent the vDSO cached values from being used, as they're not ready
- * yet.
- */
- avd->ready = false;
return 0;
}
arch_initcall_sync(init_hwprobe_vdso_data);
-#else
-
-static int complete_hwprobe_vdso_data(void) { return 0; }
-
#endif /* CONFIG_MMU */
-static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
- size_t pair_count, size_t cpusetsize,
- unsigned long __user *cpus_user,
- unsigned int flags)
-{
- DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data);
-
- if (flags & RISCV_HWPROBE_WHICH_CPUS)
- return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
- cpus_user, flags);
-
- return hwprobe_get_values(pairs, pair_count, cpusetsize,
- cpus_user, flags);
-}
-
SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
size_t, pair_count, size_t, cpusetsize, unsigned long __user *,
cpus, unsigned int, flags)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 70b5e6927620..6a725eee5acd 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -375,19 +375,6 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
__free_pages(page, MISALIGNED_BUFFER_ORDER);
}
-/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
-{
- schedule_on_each_cpu(check_vector_unaligned_access);
- riscv_hwprobe_complete_async_probe();
-
- return 0;
-}
-#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
-{
- return 0;
-}
#endif
static int riscv_online_cpu_vec(unsigned int cpu)
@@ -474,12 +461,7 @@ static int __init check_unaligned_access_all_cpus(void)
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
} else if (!check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
- riscv_hwprobe_register_async_probe();
- if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus,
- NULL, "vec_check_unaligned_access_speed_all_cpus"))) {
- pr_warn("Failed to create vec_unalign_check kthread\n");
- riscv_hwprobe_complete_async_probe();
- }
+ schedule_on_each_cpu(check_vector_unaligned_access);
}
/*
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index 8f45500d0a6e..2ddeba6c68dd 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
* homogeneous, then this function can handle requests for arbitrary
* masks.
*/
- if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready))
+ if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus))
return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
/* This is something we can handle, fill out the pairs. */
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] riscv: drop __init from vec_check_unaligned_access_speed_all_cpus
@ 2026-06-12 21:23 ` Nam Cao
0 siblings, 0 replies; 4+ messages in thread
From: Nam Cao @ 2026-06-12 21:23 UTC (permalink / raw)
To: Anirudh Srinivasan, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Alexandre Ghiti, Andrew Jones
Cc: linux-riscv, linux-kernel, andrew.jones, ganboing,
Anirudh Srinivasan
Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com> writes:
> This function runs within a kthread and need not necessarily finish
> before system finishes boot and free_initmem() unmaps the .init.text
> section. This function makes calls to SBI for probing unaligned access
> speed, and if this is slow for some reason (say some debug prints were
> added to SBI), the kthread can still be running at this point and result
> in an instruction page fault when trying to fetch from the freed region.
...
> Drop __init from its signature so that this doesn't happen.
That should work.
But we should really take a step back and reconsider whether running the
vector access speed probe in a kthread is really a good idea.
We have a problem in the past that the kthread may not complete before
user reads vdso, and user gets incorrect values. That was addressed by
5d15d2ad36b0 ("riscv: hwprobe: Fix stale vDSO data for late-initialized
keys at boot") which complicates things.
And now you discover another issue.
The motivation for using a kthread is to avoid boot time slow down. But
this has been bothering me for quite a while now, because I am not sure
if using kthread really speeds things up. Sooner or later, the kthread
has to run. If it runs before the kernel is done booting, then the boot
is even slower due to overhead of the kthread. If it runs after the
kernel finishes booting, then we run into these kinds of
headache. Unfortunately I do not have a riscv cpu with vector to confirm
my suspicion.
Furthermore, the vector access speed probe takes the same amount of time
as scalar access speed probe. The scalar one is done without any
kthread, and no one ever complained about boot time issue (well, someone
did complain but that has nothing to do with kthread. Their 64-core (?)
system is slower because the probe was done serially, and we switched to
parallel probe and it was fine).
So I think we should really get rid of that kthread entirely, the
headache is not worth. That also allows reverting 5d15d2ad36b0 ("riscv:
hwprobe: Fix stale vDSO data for late-initialized keys at boot"), making
the code simplier.
Below is a patch that has only been tested with qemu. It reverts the
mentioned commit and removes the kthread.
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 8c572a464719..2f278c395af9 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -42,11 +42,4 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
return pair->value == other_pair->value;
}
-#ifdef CONFIG_MMU
-void riscv_hwprobe_register_async_probe(void);
-void riscv_hwprobe_complete_async_probe(void);
-#else
-static inline void riscv_hwprobe_register_async_probe(void) {}
-static inline void riscv_hwprobe_complete_async_probe(void) {}
-#endif
#endif
diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h
index 88b37af55175..da57a3786f7a 100644
--- a/arch/riscv/include/asm/vdso/arch_data.h
+++ b/arch/riscv/include/asm/vdso/arch_data.h
@@ -12,12 +12,6 @@ struct vdso_arch_data {
/* Boolean indicating all CPUs have the same static hwprobe values. */
__u8 homogeneous_cpus;
-
- /*
- * A gate to check and see if the hwprobe data is actually ready, as
- * probing is deferred to avoid boot slowdowns.
- */
- __u8 ready;
};
#endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 0f701ace3bb9..f3ed4fd396fb 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -5,9 +5,6 @@
* more details.
*/
#include <linux/syscalls.h>
-#include <linux/completion.h>
-#include <linux/atomic.h>
-#include <linux/once.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
@@ -470,32 +467,28 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
return 0;
}
-#ifdef CONFIG_MMU
-
-static DECLARE_COMPLETION(boot_probes_done);
-static atomic_t pending_boot_probes = ATOMIC_INIT(1);
-
-void riscv_hwprobe_register_async_probe(void)
+static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
+ size_t pair_count, size_t cpusetsize,
+ unsigned long __user *cpus_user,
+ unsigned int flags)
{
- atomic_inc(&pending_boot_probes);
-}
+ if (flags & RISCV_HWPROBE_WHICH_CPUS)
+ return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
+ cpus_user, flags);
-void riscv_hwprobe_complete_async_probe(void)
-{
- if (atomic_dec_and_test(&pending_boot_probes))
- complete(&boot_probes_done);
+ return hwprobe_get_values(pairs, pair_count, cpusetsize,
+ cpus_user, flags);
}
-static int complete_hwprobe_vdso_data(void)
+#ifdef CONFIG_MMU
+
+static int __init init_hwprobe_vdso_data(void)
{
struct vdso_arch_data *avd = vdso_k_arch_data;
u64 id_bitsmash = 0;
struct riscv_hwprobe pair;
int key;
- if (unlikely(!atomic_dec_and_test(&pending_boot_probes)))
- wait_for_completion(&boot_probes_done);
-
/*
* Initialize vDSO data with the answers for the "all CPUs" case, to
* save a syscall in the common case.
@@ -523,52 +516,13 @@ static int complete_hwprobe_vdso_data(void)
* vDSO should defer to the kernel for exotic cpu masks.
*/
avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
-
- /*
- * Make sure all the VDSO values are visible before we look at them.
- * This pairs with the implicit "no speculativly visible accesses"
- * barrier in the VDSO hwprobe code.
- */
- smp_wmb();
- avd->ready = true;
- return 0;
-}
-
-static int __init init_hwprobe_vdso_data(void)
-{
- struct vdso_arch_data *avd = vdso_k_arch_data;
-
- /*
- * Prevent the vDSO cached values from being used, as they're not ready
- * yet.
- */
- avd->ready = false;
return 0;
}
arch_initcall_sync(init_hwprobe_vdso_data);
-#else
-
-static int complete_hwprobe_vdso_data(void) { return 0; }
-
#endif /* CONFIG_MMU */
-static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
- size_t pair_count, size_t cpusetsize,
- unsigned long __user *cpus_user,
- unsigned int flags)
-{
- DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data);
-
- if (flags & RISCV_HWPROBE_WHICH_CPUS)
- return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
- cpus_user, flags);
-
- return hwprobe_get_values(pairs, pair_count, cpusetsize,
- cpus_user, flags);
-}
-
SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
size_t, pair_count, size_t, cpusetsize, unsigned long __user *,
cpus, unsigned int, flags)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 70b5e6927620..6a725eee5acd 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -375,19 +375,6 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
__free_pages(page, MISALIGNED_BUFFER_ORDER);
}
-/* Measure unaligned access speed on all CPUs present at boot in parallel. */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
-{
- schedule_on_each_cpu(check_vector_unaligned_access);
- riscv_hwprobe_complete_async_probe();
-
- return 0;
-}
-#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
-static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
-{
- return 0;
-}
#endif
static int riscv_online_cpu_vec(unsigned int cpu)
@@ -474,12 +461,7 @@ static int __init check_unaligned_access_all_cpus(void)
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
} else if (!check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
- riscv_hwprobe_register_async_probe();
- if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus,
- NULL, "vec_check_unaligned_access_speed_all_cpus"))) {
- pr_warn("Failed to create vec_unalign_check kthread\n");
- riscv_hwprobe_complete_async_probe();
- }
+ schedule_on_each_cpu(check_vector_unaligned_access);
}
/*
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index 8f45500d0a6e..2ddeba6c68dd 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
* homogeneous, then this function can handle requests for arbitrary
* masks.
*/
- if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready))
+ if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus))
return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
/* This is something we can handle, fill out the pairs. */
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-06-12 21:25 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-12 16:24 [PATCH] riscv: drop __init from vec_check_unaligned_access_speed_all_cpus Anirudh Srinivasan
2026-06-12 16:24 ` Anirudh Srinivasan
2026-06-12 21:23 ` Nam Cao
2026-06-12 21:23 ` Nam Cao
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.