public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	Ben Gardon <bgardon@google.com>,
	Makarand Sonare <makarandsonare@google.com>
Subject: [PATCH 11/14] KVM: VMX: Dynamically enable/disable PML based on memslot dirty logging
Date: Fri, 12 Feb 2021 16:50:12 -0800	[thread overview]
Message-ID: <20210213005015.1651772-12-seanjc@google.com> (raw)
In-Reply-To: <20210213005015.1651772-1-seanjc@google.com>

From: Makarand Sonare <makarandsonare@google.com>

Currently, if enable_pml=1 PML remains enabled for the entire lifetime
of the VM irrespective of whether dirty logging is enable or disabled.
When dirty logging is disabled, all the pages of the VM are manually
marked dirty, so that PML is effectively non-operational.  Setting
the dirty bits is an expensive operation which can cause severe MMU
lock contention in a performance sensitive path when dirty logging is
disabled after a failed or canceled live migration.

Manually setting dirty bits also fails to prevent PML activity if some
code path clears dirty bits, which can incur unnecessary VM-Exits.

In order to avoid this extra overhead, dynamically enable/disable PML
when dirty logging gets turned on/off for the first/last memslot.

Signed-off-by: Makarand Sonare <makarandsonare@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  4 ++++
 arch/x86/kvm/vmx/nested.c          |  5 +++++
 arch/x86/kvm/vmx/vmx.c             | 28 +++++++++++++++++++++++-
 arch/x86/kvm/vmx/vmx.h             |  2 ++
 arch/x86/kvm/x86.c                 | 35 ++++++++++++++++++++++++++----
 6 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 90affdb2cbbc..323641097f63 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -93,6 +93,7 @@ KVM_X86_OP(check_intercept)
 KVM_X86_OP(handle_exit_irqoff)
 KVM_X86_OP_NULL(request_immediate_exit)
 KVM_X86_OP(sched_in)
+KVM_X86_OP_NULL(update_cpu_dirty_logging)
 KVM_X86_OP_NULL(pre_block)
 KVM_X86_OP_NULL(post_block)
 KVM_X86_OP_NULL(vcpu_blocking)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5cf382ec48b0..ffcfa84c969d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -89,6 +89,8 @@
 	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
 #define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
+#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1007,6 +1009,7 @@ struct kvm_arch {
 	u32 bsp_vcpu_id;
 
 	u64 disabled_quirks;
+	int cpu_dirty_logging_count;
 
 	enum kvm_irqchip_mode irqchip_mode;
 	u8 nr_reserved_ioapic_pins;
@@ -1275,6 +1278,7 @@ struct kvm_x86_ops {
 	 * value indicates CPU dirty logging is unsupported or disabled.
 	 */
 	int cpu_dirty_log_size;
+	void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
 
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c6dda9980a6..a63da447ede9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4493,6 +4493,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 		vmx_set_virtual_apic_mode(vcpu);
 	}
 
+	if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
+		vmx->nested.update_vmcs01_cpu_dirty_logging = false;
+		vmx_update_cpu_dirty_logging(vcpu);
+	}
+
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		kvm_release_page_clean(vmx->nested.apic_access_page);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 862d1f5627e7..1204e5f0fe67 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4277,7 +4277,12 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	*/
 	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
 
-	if (!enable_pml)
+	/*
+	 * PML is enabled/disabled when dirty logging of memsmlots changes, but
+	 * it needs to be set here when dirty logging is already active, e.g.
+	 * if this vCPU was created after dirty logging was enabled.
+	 */
+	if (!vcpu->kvm->arch.cpu_dirty_logging_count)
 		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
 	if (cpu_has_vmx_xsaves()) {
@@ -7499,6 +7504,26 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 		shrink_ple_window(vcpu);
 }
 
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (is_guest_mode(vcpu)) {
+		vmx->nested.update_vmcs01_cpu_dirty_logging = true;
+		return;
+	}
+
+	/*
+	 * Note, cpu_dirty_logging_count can be changed concurrent with this
+	 * code, but in that case another update request will be made and so
+	 * the guest will never run with a stale PML value.
+	 */
+	if (vcpu->kvm->arch.cpu_dirty_logging_count)
+		secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
+	else
+		secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
+}
+
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
 {
 	if (pi_pre_block(vcpu))
@@ -7706,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.sched_in = vmx_sched_in,
 
 	.cpu_dirty_log_size = PML_ENTITY_NUM,
+	.update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
 
 	.pre_block = vmx_pre_block,
 	.post_block = vmx_post_block,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 12c53d05a902..89da5e1251f1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -165,6 +165,7 @@ struct nested_vmx {
 
 	bool change_vmcs01_virtual_apic_mode;
 	bool reload_vmcs01_apic_access_page;
+	bool update_vmcs01_cpu_dirty_logging;
 
 	/*
 	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -393,6 +394,7 @@ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
 	u32 msr, int type, bool value);
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
 
 static inline u8 vmx_get_rvi(void)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c0d22f19aed0..b9a8c8af9713 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8987,6 +8987,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_check_async_pf_completion(vcpu);
 		if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
 			static_call(kvm_x86_msr_filter_changed)(vcpu);
+
+		if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
+			static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -10755,14 +10758,38 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+
+static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
+{
+	struct kvm_arch *ka = &kvm->arch;
+
+	if (!kvm_x86_ops.cpu_dirty_log_size)
+		return;
+
+	if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
+	    (!enable && --ka->cpu_dirty_logging_count == 0))
+		kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
+
+	WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
+}
+
 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 				     struct kvm_memory_slot *old,
 				     struct kvm_memory_slot *new,
 				     enum kvm_mr_change change)
 {
+	bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+
 	/*
-	 * Nothing to do for RO slots (which can't be dirtied and can't be made
-	 * writable) or CREATE/MOVE/DELETE of a slot.  See comments below.
+	 * Update CPU dirty logging if dirty logging is being toggled.  This
+	 * applies to all operations.
+	 */
+	if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
+		kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
+
+	/*
+	 * Nothing more to do for RO slots (which can't be dirtied and can't be
+	 * made writable) or CREATE/MOVE/DELETE of a slot.  See comments below.
 	 */
 	if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
 		return;
@@ -10792,7 +10819,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 	 * MOVE/DELETE: The old mappings will already have been cleaned up by
 	 *		kvm_arch_flush_shadow_memslot()
 	 */
-	if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+	if (!log_dirty_pages)
 		kvm_mmu_zap_collapsible_sptes(kvm, new);
 
 	/*
@@ -10823,7 +10850,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 	 * initial-all-set state.  Otherwise, depending on whether pml
 	 * is enabled the D-bit or the W-bit will be cleared.
 	 */
-	if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+	if (log_dirty_pages) {
 		if (kvm_x86_ops.cpu_dirty_log_size) {
 			if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
 				kvm_mmu_slot_leaf_clear_dirty(kvm, new);
-- 
2.30.0.478.g8a0d178c01-goog


  parent reply	other threads:[~2021-02-13  0:52 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-13  0:50 [PATCH 00/14] KVM: x86/mmu: Dirty logging fixes and improvements Sean Christopherson
2021-02-13  0:50 ` [PATCH 01/14] KVM: x86/mmu: Expand collapsible SPTE zap for TDP MMU to ZONE_DEVICE pages Sean Christopherson
2021-02-18 12:36   ` Paolo Bonzini
2021-02-13  0:50 ` [PATCH 02/14] KVM: x86/mmu: Don't unnecessarily write-protect small pages in TDP MMU Sean Christopherson
2021-02-13  0:50 ` [PATCH 03/14] KVM: x86/mmu: Split out max mapping level calculation to helper Sean Christopherson
2021-02-13  0:50 ` [PATCH 04/14] KVM: x86/mmu: Pass the memslot to the rmap callbacks Sean Christopherson
2021-02-13  0:50 ` [PATCH 05/14] KVM: x86/mmu: Consult max mapping level when zapping collapsible SPTEs Sean Christopherson
2021-02-18 12:43   ` Paolo Bonzini
2021-02-18 16:23     ` Sean Christopherson
2021-02-18 22:30       ` Mike Kravetz
2021-02-19  1:31         ` Sean Christopherson
2021-02-13  0:50 ` [PATCH 06/14] KVM: nVMX: Disable PML in hardware when running L2 Sean Christopherson
2021-02-13  0:50 ` [PATCH 07/14] KVM: x86/mmu: Expand on the comment in kvm_vcpu_ad_need_write_protect() Sean Christopherson
2021-02-13  0:50 ` [PATCH 08/14] KVM: x86/mmu: Make dirty log size hook (PML) a value, not a function Sean Christopherson
2021-02-18 12:45   ` Paolo Bonzini
2021-02-13  0:50 ` [PATCH 09/14] KVM: x86: Move MMU's PML logic to common code Sean Christopherson
2021-02-13  0:50 ` [PATCH 10/14] KVM: x86: Further clarify the logic and comments for toggling log dirty Sean Christopherson
2021-02-18 12:50   ` Paolo Bonzini
2021-02-18 16:15     ` Sean Christopherson
2021-02-18 16:56       ` Paolo Bonzini
2021-02-13  0:50 ` Sean Christopherson [this message]
2021-02-13  0:50 ` [PATCH 12/14] KVM: x86/mmu: Don't set dirty bits when disabling dirty logging w/ PML Sean Christopherson
2021-02-18 17:08   ` Paolo Bonzini
2021-02-13  0:50 ` [PATCH 13/14] KVM: x86: Fold "write-protect large" use case into generic write-protect Sean Christopherson
2021-02-13  0:50 ` [PATCH 14/14] KVM: x86/mmu: Remove a variety of unnecessary exports Sean Christopherson
2021-02-17 22:50 ` [PATCH 00/14] KVM: x86/mmu: Dirty logging fixes and improvements Sean Christopherson
2021-02-18 12:57 ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210213005015.1651772-12-seanjc@google.com \
    --to=seanjc@google.com \
    --cc=bgardon@google.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=makarandsonare@google.com \
    --cc=pbonzini@redhat.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox