All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: kvm@vger.kernel.org, Paolo Bonzini <pbonzini@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>,
	Michael Kelley <mikelley@microsoft.com>,
	Siddharth Chandrasekaran <sidcha@amazon.de>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 03/31] KVM: x86: hyper-v: Handle HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls gently
Date: Thu, 7 Apr 2022 17:33:00 +0000	[thread overview]
Message-ID: <Yk8gTB+x2UVE34Ds@google.com> (raw)
In-Reply-To: <20220407155645.940890-4-vkuznets@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 6695 bytes --]

On Thu, Apr 07, 2022, Vitaly Kuznetsov wrote:
> Currently, HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls are handled
> the exact same way as HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE{,EX}: by
> flushing the whole VPID and this is sub-optimal. Switch to handling
> these requests with 'flush_tlb_gva()' hooks instead. Use the newly
> introduced TLB flush ring to queue the requests.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/hyperv.c | 141 ++++++++++++++++++++++++++++++++++++------
>  1 file changed, 121 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 81c44e0eadf9..a54d41656f30 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1792,6 +1792,35 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
>  			      var_cnt * sizeof(*sparse_banks));
>  }
>  
> +static int kvm_hv_get_tlbflush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[],
> +				       u32 data_offset, int consumed_xmm_halves)

data_offset should be gpa_t, and the order of params should be consistent between
this and kvm_get_sparse_vp_set().

> +{
> +	int i;
> +
> +	if (hc->fast) {
> +		/*
> +		 * Each XMM holds two entries, but do not count halves that
> +		 * have already been consumed.
> +		 */
> +		if (hc->rep_cnt > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves))
> +			return -EINVAL;
> +
> +		for (i = 0; i < hc->rep_cnt; i++) {
> +			int j = i + consumed_xmm_halves;
> +
> +			if (j % 2)
> +				entries[i] = sse128_hi(hc->xmm[j / 2]);
> +			else
> +				entries[i] = sse128_lo(hc->xmm[j / 2]);
> +		}
> +
> +		return 0;
> +	}
> +
> +	return kvm_read_guest(kvm, hc->ingpa + data_offset,
> +			      entries, hc->rep_cnt * sizeof(entries[0]));

This is almost verbatim copy+pasted from kvm_get_sparse_vp_set().  If you slot in
the attached patched before this, then this function becomes:

static int kvm_hv_get_tlbflush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[],
				       int consumed_xmm_halves, gpa_t offset)
{
	return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt,
				  entries, consumed_xmm_halves, offset);
}


> +}

...

> @@ -1840,15 +1891,47 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_vcpu_hv_tlbflush_ring *tlb_flush_ring;
>  	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> -
> -	kvm_vcpu_flush_tlb_guest(vcpu);
> -
> -	if (!hv_vcpu)
> +	struct kvm_vcpu_hv_tlbflush_entry *entry;
> +	int read_idx, write_idx;
> +	u64 address;
> +	u32 count;
> +	int i, j;
> +
> +	if (!tdp_enabled || !hv_vcpu) {
> +		kvm_vcpu_flush_tlb_guest(vcpu);
>  		return;
> +	}
>  
>  	tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
> +	read_idx = READ_ONCE(tlb_flush_ring->read_idx);
> +	write_idx = READ_ONCE(tlb_flush_ring->write_idx);
> +
> +	/* Pairs with smp_wmb() in hv_tlb_flush_ring_enqueue() */
> +	smp_rmb();
>  
> -	tlb_flush_ring->read_idx = tlb_flush_ring->write_idx;
> +	for (i = read_idx; i != write_idx; i = (i + 1) % KVM_HV_TLB_FLUSH_RING_SIZE) {
> +		entry = &tlb_flush_ring->entries[i];
> +
> +		if (entry->flush_all)
> +			goto out_flush_all;
> +
> +		/*
> +		 * Lower 12 bits of 'address' encode the number of additional
> +		 * pages to flush.
> +		 */
> +		address = entry->addr & PAGE_MASK;
> +		count = (entry->addr & ~PAGE_MASK) + 1;
> +		for (j = 0; j < count; j++)
> +			static_call(kvm_x86_flush_tlb_gva)(vcpu, address + j * PAGE_SIZE);
> +	}
> +	++vcpu->stat.tlb_flush;
> +	goto out_empty_ring;
> +
> +out_flush_all:
> +	kvm_vcpu_flush_tlb_guest(vcpu);
> +
> +out_empty_ring:
> +	tlb_flush_ring->read_idx = write_idx;
>  }
>  
>  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> @@ -1857,12 +1940,13 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  	struct hv_tlb_flush_ex flush_ex;
>  	struct hv_tlb_flush flush;
>  	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
> +	u64 entries[KVM_HV_TLB_FLUSH_RING_SIZE - 2];

What's up with the -2?  And given the multitude of things going on in this code,
I'd strongly prefer this be tlbflush_entries.

Actually, if you do:

	u64 __tlbflush_entries[KVM_HV_TLB_FLUSH_RING_SIZE - 2];
	u64 *tlbflush_entries;

and drop all_addr, the code to get entries can be

	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX ||
	    hc->rep_cnt > ARRAY_SIZE(tlbflush_entries)) {
		tlbfluish_entries = NULL;
	} else {
		if (kvm_hv_get_tlbflush_entries(kvm, hc, __tlbflush_entries,
						consumed_xmm_halves, data_offset))
			return HV_STATUS_INVALID_HYPERCALL_INPUT;
		tlbfluish_entries = __tlbflush_entries;
	}

and the calls to queue flushes becomes

			hv_tlb_flush_ring_enqueue(v, tlbflush_entries, hc->rep_cnt);

That way a bug will "just" be a NULL pointer dereference and not consumption of
uninitialized data (though such a bug might be caught be caught by the compiler).

>  	u64 valid_bank_mask;
>  	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
>  	struct kvm_vcpu *v;
>  	unsigned long i;
> -	bool all_cpus;
> -
> +	bool all_cpus, all_addr;
> +	int data_offset = 0, consumed_xmm_halves = 0;

data_offset should be a gpa_t.

>  	/*
>  	 * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the
>  	 * valid mask is a u64.  Fail the build if KVM's max allowed number of

...

> +read_flush_entries:
> +	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
> +	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX ||
> +	    hc->rep_cnt > (KVM_HV_TLB_FLUSH_RING_SIZE - 2)) {

Rather than duplicate the -2 magic, it's far better to do:


> +		all_addr = true;
> +	} else {
> +		if (kvm_hv_get_tlbflush_entries(kvm, hc, entries,
> +						data_offset, consumed_xmm_halves))

As mentioned, the order for this call should match kvm_get_sparse_vp_set().

>  			return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +		all_addr = false;
>  	}
>  
> -do_flush:
> +
>  	/*
>  	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
>  	 * analyze it here, flush TLB regardless of the specified address space.
>  	 */
>  	if (all_cpus) {
>  		kvm_for_each_vcpu(i, v, kvm)
> -			hv_tlb_flush_ring_enqueue(v);
> +			hv_tlb_flush_ring_enqueue(v, all_addr, entries, hc->rep_cnt);
>  
>  		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
>  	} else {
> @@ -1951,7 +2052,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  			v = kvm_get_vcpu(kvm, i);
>  			if (!v)
>  				continue;
> -			hv_tlb_flush_ring_enqueue(v);
> +			hv_tlb_flush_ring_enqueue(v, all_addr, entries, hc->rep_cnt);
>  		}
>  
>  		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
> -- 
> 2.35.1
> 

[-- Attachment #2: 0001-KVM-x86-hyper-v-Add-helper-to-read-hypercall-data-fo.patch --]
[-- Type: text/x-diff, Size: 4043 bytes --]

From ad6033048d498baba7889ae0e14788c92d4baacb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 7 Apr 2022 09:52:46 -0700
Subject: [PATCH] KVM: x86: hyper-v: Add helper to read hypercall data for
 arrary

Move the guts of kvm_get_sparse_vp_set() to a helper so that the code for
reading a guest-provided array can be reused in the future, e.g. for
getting a list of virtual addresses whose TLB entries need to be flushed.

Opportunisticaly swap the order of the data and XMM adjustment so that
the XMM/gpa offsets are bundled together.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/hyperv.c | 53 +++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index e4f381b46a28..58e7aff6057a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1782,38 +1782,51 @@ struct kvm_hv_hcall {
 	sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
 };
 
-static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
-				 int consumed_xmm_halves,
-				 u64 *sparse_banks, gpa_t offset)
+
+static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
+			      u16 orig_cnt, u16 cnt_cap, u64 *data,
+			      int consumed_xmm_halves, gpa_t offset)
 {
-	u16 var_cnt;
-	int i;
-
-	if (hc->var_cnt > 64)
-		return -EINVAL;
-
-	/* Ignore banks that cannot possibly contain a legal VP index. */
-	var_cnt = min_t(u16, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS);
+	/*
+	 * Preserve the original count when ignoring entries via a "cap", KVM
+	 * still needs to validate the guest input (though the non-XMM path
+	 * punts on the checks).
+	 */
+	u16 cnt = min(orig_cnt, cnt_cap);
+	int i, j;
 
 	if (hc->fast) {
 		/*
 		 * Each XMM holds two sparse banks, but do not count halves that
 		 * have already been consumed for hypercall parameters.
 		 */
-		if (hc->var_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
+		if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
-		for (i = 0; i < var_cnt; i++) {
-			int j = i + consumed_xmm_halves;
+
+		for (i = 0; i < cnt; i++) {
+			j = i + consumed_xmm_halves;
 			if (j % 2)
-				sparse_banks[i] = sse128_hi(hc->xmm[j / 2]);
+				data[i] = sse128_hi(hc->xmm[j / 2]);
 			else
-				sparse_banks[i] = sse128_lo(hc->xmm[j / 2]);
+				data[i] = sse128_lo(hc->xmm[j / 2]);
 		}
 		return 0;
 	}
 
-	return kvm_read_guest(kvm, hc->ingpa + offset, sparse_banks,
-			      var_cnt * sizeof(*sparse_banks));
+	return kvm_read_guest(kvm, hc->ingpa + offset, data,
+			      cnt * sizeof(*data));
+}
+
+static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
+				 u64 *sparse_banks, int consumed_xmm_halves,
+				 gpa_t offset)
+{
+	if (hc->var_cnt > 64)
+		return -EINVAL;
+
+	/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
+	return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
+				  sparse_banks, consumed_xmm_halves, offset);
 }
 
 static inline int hv_tlb_flush_ring_free(struct kvm_vcpu_hv *hv_vcpu,
@@ -1952,7 +1965,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		if (!hc->var_cnt)
 			goto ret_success;
 
-		if (kvm_get_sparse_vp_set(kvm, hc, 2, sparse_banks,
+		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 2,
 					  offsetof(struct hv_tlb_flush_ex,
 						   hv_vp_set.bank_contents)))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -2063,7 +2076,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		if (!hc->var_cnt)
 			goto ret_success;
 
-		if (kvm_get_sparse_vp_set(kvm, hc, 1, sparse_banks,
+		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1,
 					  offsetof(struct hv_send_ipi_ex,
 						   vp_set.bank_contents)))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;

base-commit: 9e28f2680fd1606225ab456bb28d30598110a520
-- 
2.35.1.1178.g4f1659d476-goog


  reply	other threads:[~2022-04-07 17:33 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-07 15:56 [PATCH v2 00/31] KVM: x86: hyper-v: Fine-grained TLB flush + Direct TLB flush feature Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 01/31] KVM: x86: hyper-v: Resurrect dedicated KVM_REQ_HV_TLB_FLUSH flag Vitaly Kuznetsov
2022-04-07 18:02   ` Sean Christopherson
2022-04-07 15:56 ` [PATCH v2 02/31] KVM: x86: hyper-v: Introduce TLB flush ring Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 03/31] KVM: x86: hyper-v: Handle HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls gently Vitaly Kuznetsov
2022-04-07 17:33   ` Sean Christopherson [this message]
2022-04-07 17:47     ` Sean Christopherson
2022-04-11 11:15     ` Vitaly Kuznetsov
2022-04-07 17:44   ` Sean Christopherson
2022-04-11 11:31     ` Vitaly Kuznetsov
2022-04-11 20:37       ` Sean Christopherson
2022-04-07 15:56 ` [PATCH v2 04/31] KVM: x86: hyper-v: Expose support for extended gva ranges for flush hypercalls Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 05/31] KVM: x86: Prepare kvm_hv_flush_tlb() to handle L2's GPAs Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 06/31] KVM: x86: hyper-v: Don't use sparse_set_to_vcpu_mask() in kvm_hv_send_ipi() Vitaly Kuznetsov
2022-04-07 17:48   ` Sean Christopherson
2022-04-07 15:56 ` [PATCH v2 07/31] KVM: x86: hyper-v: Create a separate ring for Direct TLB flush Vitaly Kuznetsov
2022-04-07 17:57   ` Sean Christopherson
2022-04-07 15:56 ` [PATCH v2 08/31] KVM: x86: hyper-v: Use preallocated buffer in 'struct kvm_vcpu_hv' instead of on-stack 'sparse_banks' Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 09/31] KVM: nVMX: Keep track of hv_vm_id/hv_vp_id when eVMCS is in use Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 10/31] KVM: nSVM: Keep track of Hyper-V hv_vm_id/hv_vp_id Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 11/31] KVM: x86: Introduce .post_hv_direct_flush() nested hook Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 12/31] KVM: x86: hyper-v: Introduce kvm_hv_is_tlb_flush_hcall() Vitaly Kuznetsov
2022-04-07 18:07   ` Sean Christopherson
2022-04-07 15:56 ` [PATCH v2 13/31] KVM: x86: hyper-v: Direct TLB flush Vitaly Kuznetsov
2022-04-07 18:27   ` Sean Christopherson
2022-04-14 12:24     ` Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 14/31] KVM: x86: hyper-v: Introduce fast kvm_hv_direct_tlb_flush_exposed() check Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 15/31] x86/hyperv: Fix 'struct hv_enlightened_vmcs' definition Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 16/31] KVM: nVMX: hyper-v: Direct TLB flush Vitaly Kuznetsov
2022-04-07 18:47   ` Sean Christopherson
2022-04-11 11:19     ` Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 17/31] KVM: x86: KVM_REQ_TLB_FLUSH_CURRENT is a superset of KVM_REQ_HV_TLB_FLUSH too Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 18/31] KVM: nSVM: hyper-v: Direct TLB flush Vitaly Kuznetsov
2022-04-07 18:50   ` Sean Christopherson
2022-04-07 15:56 ` [PATCH v2 19/31] KVM: x86: Expose Hyper-V Direct TLB flush feature Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 20/31] KVM: selftests: add hyperv_svm_test to .gitignore Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 21/31] KVM: selftests: Better XMM read/write helpers Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 22/31] KVM: selftests: Hyper-V PV IPI selftest Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 23/31] KVM: selftests: Make it possible to replace PTEs with __virt_pg_map() Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 24/31] KVM: selftests: Hyper-V PV TLB flush selftest Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 25/31] KVM: selftests: Sync 'struct hv_enlightened_vmcs' definition with hyperv-tlfs.h Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 26/31] KVM: selftests: nVMX: Allocate Hyper-V partition assist page Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 27/31] KVM: selftests: nSVM: Allocate Hyper-V partition assist and VP assist pages Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 28/31] KVM: selftests: Sync 'struct hv_vp_assist_page' definition with hyperv-tlfs.h Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 29/31] KVM: selftests: evmcs_test: Direct TLB flush test Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 30/31] KVM: selftests: Move Hyper-V VP assist page enablement out of evmcs.h Vitaly Kuznetsov
2022-04-07 15:56 ` [PATCH v2 31/31] KVM: selftests: hyperv_svm_test: Add Direct TLB flush test Vitaly Kuznetsov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Yk8gTB+x2UVE34Ds@google.com \
    --to=seanjc@google.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=pbonzini@redhat.com \
    --cc=sidcha@amazon.de \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.