public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm: defer huge page recovery vhost task to later
@ 2025-01-23 15:35 Keith Busch
  2025-01-24 15:28 ` Paolo Bonzini
  2025-01-24 20:07 ` Sean Christopherson
  0 siblings, 2 replies; 8+ messages in thread
From: Keith Busch @ 2025-01-23 15:35 UTC (permalink / raw)
  To: kvm, x86, linux-kernel
  Cc: Vlad Poenaru, tj, Keith Busch, Sean Christopherson, Paolo Bonzini,
	Alyssa Ross

From: Keith Busch <kbusch@kernel.org>

Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.

Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so this patch introduces infrastructure
to do that (Suggested-by Paolo).

Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Alyssa Ross <hi@alyssa.is>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 arch/x86/include/asm/kvm_call_once.h | 44 ++++++++++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h      |  2 ++
 arch/x86/kvm/mmu/mmu.c               | 18 ++++++++----
 arch/x86/kvm/x86.c                   |  7 ++++-
 4 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/include/asm/kvm_call_once.h

diff --git a/arch/x86/include/asm/kvm_call_once.h b/arch/x86/include/asm/kvm_call_once.h
new file mode 100644
index 0000000000000..451cc87084aa7
--- /dev/null
+++ b/arch/x86/include/asm/kvm_call_once.h
@@ -0,0 +1,44 @@
+#ifndef _LINUX_CALL_ONCE_H
+#define _LINUX_CALL_ONCE_H
+
+#include <linux/types.h>
+
+#define ONCE_NOT_STARTED 0
+#define ONCE_RUNNING     1
+#define ONCE_COMPLETED   2
+
+struct once {
+        atomic_t state;
+        struct mutex lock;
+};
+
+static inline void __once_init(struct once *once, const char *name,
+			       struct lock_class_key *key)
+{
+        atomic_set(&once->state, ONCE_NOT_STARTED);
+        __mutex_init(&once->lock, name, key);
+}
+
+#define once_init(once)							\
+do {									\
+	static struct lock_class_key __key;				\
+	__once_init((once), #once, &__key);				\
+} while (0)
+
+static inline void call_once(struct once *once, void (*cb)(struct once *))
+{
+        /* Pairs with atomic_set_release() below.  */
+        if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
+                return;
+
+        guard(mutex)(&once->lock);
+        WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
+        if (atomic_read(&once->state) != ONCE_NOT_STARTED)
+                return;
+
+        atomic_set(&once->state, ONCE_RUNNING);
+        cb(once);
+        atomic_set_release(&once->state, ONCE_COMPLETED);
+}
+
+#endif /* _LINUX_CALL_ONCE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2f442701dc755..e1eb8155e6a82 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,6 +37,7 @@
 #include <asm/kvm_page_track.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/hyperv-tlfs.h>
+#include <asm/kvm_call_once.h>
 #include <asm/reboot.h>
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -1466,6 +1467,7 @@ struct kvm_arch {
 	struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
 	struct vhost_task *nx_huge_page_recovery_thread;
 	u64 nx_huge_page_last;
+	struct once nx_once;
 
 #ifdef CONFIG_X86_64
 	/* The number of TDP MMU pages across all roots. */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 26b4ba7e7cb5e..a45ae60e84ab4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7447,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
 	return true;
 }
 
-int kvm_mmu_post_init_vm(struct kvm *kvm)
+static void kvm_mmu_start_lpage_recovery(struct once *once)
 {
-	if (nx_hugepage_mitigation_hard_disabled)
-		return 0;
+	struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
+	struct kvm *kvm = container_of(ka, struct kvm, arch);
 
 	kvm->arch.nx_huge_page_last = get_jiffies_64();
 	kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
 		kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
 		kvm, "kvm-nx-lpage-recovery");
 
+	if (kvm->arch.nx_huge_page_recovery_thread)
+		vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+	if (nx_hugepage_mitigation_hard_disabled)
+		return 0;
+
+	call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
 	if (!kvm->arch.nx_huge_page_recovery_thread)
 		return -ENOMEM;
-
-	vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6e248152fa134..6d4a6734b2d69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11471,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	struct kvm_run *kvm_run = vcpu->run;
 	int r;
 
+	r = kvm_mmu_post_init_vm(vcpu->kvm);
+	if (r)
+		return r;
+
 	vcpu_load(vcpu);
 	kvm_sigset_activate(vcpu);
 	kvm_run->flags = 0;
@@ -12748,7 +12752,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 int kvm_arch_post_init_vm(struct kvm *kvm)
 {
-	return kvm_mmu_post_init_vm(kvm);
+	once_init(&kvm->arch.nx_once);
+	return 0;
 }
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread
* [PATCH] kvm: defer huge page recovery vhost task to later
@ 2025-01-14 18:22 Keith Busch
  0 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2025-01-14 18:22 UTC (permalink / raw)
  To: kvm, pbonzini; +Cc: linux-kernel, tj, Keith Busch

From: Keith Busch <kbusch@kernel.org>

Some libraries ensure they are single threaded before forking. This
assumption breaks after making kvm hugepage recovery thread a vhost task
of the user process. The minijail library used by crosvm is one such
affected application.

Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed child processes and should be
safe to start the vhost task.

Link: https://lore.kernel.org/kvm/Z2RYyagu3phDFIac@kbusch-mbp.dhcp.thefacebook.com/
Fixes: d96c77bd4eeba46 ("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 arch/x86/kvm/mmu/mmu.c   |  2 ++
 arch/x86/kvm/x86.c       |  9 ++++-----
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      | 15 ---------------
 4 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2401606db2604..422b6b06de4fe 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7415,6 +7415,8 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
 {
 	if (nx_hugepage_mitigation_hard_disabled)
 		return 0;
+	if (kvm->arch.nx_huge_page_recovery_thread)
+		return 0;
 
 	kvm->arch.nx_huge_page_last = get_jiffies_64();
 	kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c79a8cc57ba42..263363c46626b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11463,6 +11463,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	struct kvm_run *kvm_run = vcpu->run;
 	int r;
 
+	r = kvm_mmu_post_init_vm(vcpu->kvm);
+	if (r)
+		return r;
+
 	vcpu_load(vcpu);
 	kvm_sigset_activate(vcpu);
 	kvm_run->flags = 0;
@@ -12740,11 +12744,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	return ret;
 }
 
-int kvm_arch_post_init_vm(struct kvm *kvm)
-{
-	return kvm_mmu_post_init_vm(kvm);
-}
-
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
 	vcpu_load(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 401439bb21e3e..a219bd2d8aec8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1596,7 +1596,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
 bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
 bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
-int kvm_arch_post_init_vm(struct kvm *kvm);
 void kvm_arch_pre_destroy_vm(struct kvm *kvm);
 void kvm_arch_create_vm_debugfs(struct kvm *kvm);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de2c11dae2316..adacc6eaa7d9d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1065,15 +1065,6 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
 	return ret;
 }
 
-/*
- * Called after the VM is otherwise initialized, but just before adding it to
- * the vm_list.
- */
-int __weak kvm_arch_post_init_vm(struct kvm *kvm)
-{
-	return 0;
-}
-
 /*
  * Called just after removing the VM from the vm_list, but before doing any
  * other destruction.
@@ -1194,10 +1185,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 	if (r)
 		goto out_err_no_debugfs;
 
-	r = kvm_arch_post_init_vm(kvm);
-	if (r)
-		goto out_err;
-
 	mutex_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	mutex_unlock(&kvm_lock);
@@ -1207,8 +1194,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 
 	return kvm;
 
-out_err:
-	kvm_destroy_vm_debugfs(kvm);
 out_err_no_debugfs:
 	kvm_coalesced_mmio_free(kvm);
 out_no_coalesced_mmio:
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-01-25  4:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-23 15:35 [PATCH] kvm: defer huge page recovery vhost task to later Keith Busch
2025-01-24 15:28 ` Paolo Bonzini
2025-01-24 16:48   ` Keith Busch
2025-01-24 20:07 ` Sean Christopherson
2025-01-24 20:54   ` Keith Busch
2025-01-25  0:10     ` Sean Christopherson
2025-01-25  4:05       ` Keith Busch
  -- strict thread matches above, loose matches on Subject: below --
2025-01-14 18:22 Keith Busch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox