* [RFC 2/2] kvm: retry nx_huge_page_recovery_thread creation
@ 2025-02-26 2:43 Keith Busch
2025-02-26 20:13 ` Sean Christopherson
0 siblings, 1 reply; 3+ messages in thread
From: Keith Busch @ 2025-02-26 2:43 UTC (permalink / raw)
To: pbonzini, seanjc, kvm; +Cc: x86, virtualization, linux-kernel, Keith Busch
From: Keith Busch <kbusch@kernel.org>
A VMM may send a signal to its threads while they've entered KVM_RUN. If
that thread happens to be trying to make the huge page recovery vhost
task, then it fails with -ERESTARTNOINTR. We need to retry if that
happens, so we can't use call_once anymore. Replace it with a simple
mutex and return the appropriate error instead of defaulting to ENOMEM.
One downside is that everyone will retry if it fails, which can cause
some additional pressure on low memory situations. Another downside is
we're taking a mutex on every KVM run, even if we were previously
successful in starting the vhost task, but that's really not such a
common operation that needs to be optimized to avoid this lock.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
arch/x86/include/asm/kvm_host.h | 3 +--
arch/x86/kvm/mmu/mmu.c | 23 +++++++++--------------
arch/x86/kvm/x86.c | 2 +-
3 files changed, 11 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b7af5902ff75..597c8e66fc204 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -26,7 +26,6 @@
#include <linux/irqbypass.h>
#include <linux/kfifo.h>
#include <linux/sched/vhost_task.h>
-#include <linux/call_once.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -1466,7 +1465,7 @@ struct kvm_arch {
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
struct vhost_task *nx_huge_page_recovery_thread;
u64 nx_huge_page_last;
- struct once nx_once;
+ struct mutex nx_lock;
#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 18ca1ea6dc240..eb6b625f6f43a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7460,34 +7460,29 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
}
-static void kvm_mmu_start_lpage_recovery(struct once *once)
+int kvm_mmu_post_init_vm(struct kvm *kvm)
{
- struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
- struct kvm *kvm = container_of(ka, struct kvm, arch);
struct vhost_task *nx_thread;
+ if (nx_hugepage_mitigation_hard_disabled)
+ return 0;
+
+ guard(mutex)(&kvm->arch.nx_lock);
+ if (kvm->arch.nx_huge_page_recovery_thread)
+ return 0;
+
kvm->arch.nx_huge_page_last = get_jiffies_64();
nx_thread = vhost_task_create(kvm_nx_huge_page_recovery_worker,
kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");
if (IS_ERR(nx_thread))
- return;
+ return PTR_ERR(nx_thread);
vhost_task_start(nx_thread);
/* Make the task visible only once it is fully started. */
WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
-}
-
-int kvm_mmu_post_init_vm(struct kvm *kvm)
-{
- if (nx_hugepage_mitigation_hard_disabled)
- return 0;
-
- call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
- if (!kvm->arch.nx_huge_page_recovery_thread)
- return -ENOMEM;
return 0;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02159c967d29e..872498566b540 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12744,7 +12744,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
"does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
}
- once_init(&kvm->arch.nx_once);
+ mutex_init(&kvm->arch.nx_lock);
return 0;
out_uninit_mmu:
--
2.43.5
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC 2/2] kvm: retry nx_huge_page_recovery_thread creation
2025-02-26 2:43 [RFC 2/2] kvm: retry nx_huge_page_recovery_thread creation Keith Busch
@ 2025-02-26 20:13 ` Sean Christopherson
2025-02-26 20:22 ` Keith Busch
0 siblings, 1 reply; 3+ messages in thread
From: Sean Christopherson @ 2025-02-26 20:13 UTC (permalink / raw)
To: Keith Busch; +Cc: pbonzini, kvm, x86, virtualization, linux-kernel, Keith Busch
On Tue, Feb 25, 2025, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> A VMM may send a signal to its threads while they've entered KVM_RUN. If
> that thread happens to be trying to make the huge page recovery vhost
> task, then it fails with -ERESTARTNOINTR. We need to retry if that
> happens, so we can't use call_once anymore. Replace it with a simple
> mutex and return the appropriate error instead of defaulting to ENOMEM.
>
> One downside is that everyone will retry if it fails, which can cause
> some additional pressure on low memory situations. Another downside is
> we're taking a mutex on every KVM run, even if we were previously
> successful in starting the vhost task, but that's really not such a
> common operation that needs to be optimized to avoid this lock.
Yes, it is. NAK to taking a VM-wide mutex on KVM_RUN.
I much prefer my (misguided in the original context[*]) approach of marking the
call_once() COMPLETED if and only if it succeeds.
diff --git a/include/linux/call_once.h b/include/linux/call_once.h
index 6261aa0b3fb0..9d47ed50139b 100644
--- a/include/linux/call_once.h
+++ b/include/linux/call_once.h
@@ -26,20 +26,28 @@ do { \
__once_init((once), #once, &__key); \
} while (0)
-static inline void call_once(struct once *once, void (*cb)(struct once *))
+static inline int call_once(struct once *once, int (*cb)(struct once *))
{
+ int r;
+
/* Pairs with atomic_set_release() below. */
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
- return;
+ return 0;
guard(mutex)(&once->lock);
WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
if (atomic_read(&once->state) != ONCE_NOT_STARTED)
- return;
+ return -EINVAL;
atomic_set(&once->state, ONCE_RUNNING);
- cb(once);
+ r = cb(once);
+ if (r) {
+ atomic_set(&once->state, ONCE_NOT_STARTED);
+ return r;
+ }
+
atomic_set_release(&once->state, ONCE_COMPLETED);
+ return 0;
}
#endif /* _LINUX_CALL_ONCE_H */
[*] https://lore.kernel.org/all/Z5e4w7IlEEk2cpH-@google.com
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC 2/2] kvm: retry nx_huge_page_recovery_thread creation
2025-02-26 20:13 ` Sean Christopherson
@ 2025-02-26 20:22 ` Keith Busch
0 siblings, 0 replies; 3+ messages in thread
From: Keith Busch @ 2025-02-26 20:22 UTC (permalink / raw)
To: Sean Christopherson
Cc: Keith Busch, pbonzini, kvm, x86, virtualization, linux-kernel
On Wed, Feb 26, 2025 at 12:13:42PM -0800, Sean Christopherson wrote:
> I much prefer my (misguided in the original context[*]) approach of marking the
> call_once() COMPLETED if and only if it succeeds.
I have a new appreciation for this approach given our recent
discoveries. I was mistaken in assuming there were no retryable errors
here.
Thanks for the suggestion, I'll merge your propsal with the kvm side and
give it a test.
> diff --git a/include/linux/call_once.h b/include/linux/call_once.h
> index 6261aa0b3fb0..9d47ed50139b 100644
> --- a/include/linux/call_once.h
> +++ b/include/linux/call_once.h
> @@ -26,20 +26,28 @@ do { \
> __once_init((once), #once, &__key); \
> } while (0)
>
> -static inline void call_once(struct once *once, void (*cb)(struct once *))
> +static inline int call_once(struct once *once, int (*cb)(struct once *))
> {
> + int r;
> +
> /* Pairs with atomic_set_release() below. */
> if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
> - return;
> + return 0;
>
> guard(mutex)(&once->lock);
> WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
> if (atomic_read(&once->state) != ONCE_NOT_STARTED)
> - return;
> + return -EINVAL;
>
> atomic_set(&once->state, ONCE_RUNNING);
> - cb(once);
> + r = cb(once);
> + if (r) {
> + atomic_set(&once->state, ONCE_NOT_STARTED);
> + return r;
> + }
> +
> atomic_set_release(&once->state, ONCE_COMPLETED);
> + return 0;
> }
>
> #endif /* _LINUX_CALL_ONCE_H */
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-02-26 20:22 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-26 2:43 [RFC 2/2] kvm: retry nx_huge_page_recovery_thread creation Keith Busch
2025-02-26 20:13 ` Sean Christopherson
2025-02-26 20:22 ` Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox