Re: [PATCH 0/4] KVM: x86: hyper-v: XMM fast hypercalls fixes

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Vitaly Kuznetsov <vkuznets@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>, kvm@vger.kernel.org
Cc: Sean Christopherson <seanjc@google.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>,
	Siddharth Chandrasekaran <sidcha@amazon.de>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 0/4] KVM: x86: hyper-v: XMM fast hypercalls fixes
Date: Fri, 25 Feb 2022 14:13:58 +0100	[thread overview]
Message-ID: <871qzrdr6x.fsf@redhat.com> (raw)
In-Reply-To: <b466b80c-21d1-f298-b4cd-a4b58988f767@redhat.com>

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 2/22/22 16:46, Vitaly Kuznetsov wrote:
>> While working on some Hyper-V TLB flush improvements and Direct TLB flush
>> feature for Hyper-V on KVM I experienced Windows Server 2019 crashes on
>> boot when XMM fast hypercall input feature is advertised. Turns out,
>> HVCALL_SEND_IPI_EX is also an XMM fast hypercall and returning an error
>> kills the guest. This is fixed in PATCH4. PATCH3 fixes erroneous capping
>> of sparse CPU banks for XMM fast TLB flush hypercalls. The problem should
>> be reproducible with >360 vCPUs.
>> 
>> Vitaly Kuznetsov (4):
>>    KVM: x86: hyper-v: Drop redundant 'ex' parameter from
>>      kvm_hv_send_ipi()
>>    KVM: x86: hyper-v: Drop redundant 'ex' parameter from
>>      kvm_hv_flush_tlb()
>>    KVM: x86: hyper-v: Fix the maximum number of sparse banks for XMM fast
>>      TLB flush hypercalls
>>    KVM: x86: hyper-v: HVCALL_SEND_IPI_EX is an XMM fast hypercall
>> 
>>   arch/x86/kvm/hyperv.c | 84 +++++++++++++++++++++++--------------------
>>   1 file changed, 45 insertions(+), 39 deletions(-)
>> 
>
> Merging this in 5.18 is a bit messy.  Please check that the below
> patch against kvm/next makes sense:

Something is wrong with the diff as it doesn't apply :-(

>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 653e08c993c4..98fb998c31ce 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1770,9 +1770,11 @@ struct kvm_hv_hcall {
>   };
>   
>   static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
> +				 int consumed_xmm_halves,
>   				 u64 *sparse_banks, gpa_t offset)
>   {
>   	u16 var_cnt;
> +	int i;
>   
>   	if (hc->var_cnt > 64)
>   		return -EINVAL;
> @@ -1780,13 +1782,29 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
>   	/* Ignore banks that cannot possibly contain a legal VP index. */
>   	var_cnt = min_t(u16, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS);
>   
> +	if (hc->fast) {
> +		/*
> +		 * Each XMM holds two sparse banks, but do not count halves that
> +		 * have already been consumed for hypercall parameters.
> +		 */
> +		if (hc->var_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
> +			return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +		for (i = 0; i < var_cnt; i++) {
> +			int j = i + consumed_xmm_halves;
> +			if (j % 2)
> +				sparse_banks[i] = sse128_lo(hc->xmm[j / 2]);
> +			else
> +				sparse_banks[i] = sse128_hi(hc->xmm[j / 2]);

Let's say we have 1 half of XMM0 consumed. Now:

 i = 0;
 j = 1;
 if (1) 
     sparse_banks[0] = sse128_lo(hc->xmm[0]); 

 This doesn't look right as we need to get the upper half of XMM0.

 I guess it should be reversed, 

     if (j % 2)
         sparse_banks[i] = sse128_hi(hc->xmm[j / 2]);
     else
         sparse_banks[i] = sse128_lo(hc->xmm[j / 2]);

> +		}
> +		return 0;
> +	}
> +
>   	return kvm_read_guest(kvm, hc->ingpa + offset, sparse_banks,
>   			      var_cnt * sizeof(*sparse_banks));
>   }
>   
> -static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
> +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>   {
> -	int i;
>   	struct kvm *kvm = vcpu->kvm;
>   	struct hv_tlb_flush_ex flush_ex;
>   	struct hv_tlb_flush flush;
> @@ -1803,7 +1821,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>   	 */
>   	BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64);
>   
> -	if (!ex) {
> +	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
> +	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) {

In case you're trying to come up with a smaller patch for 5.18, we can
certainly drop these 'ex'/'non-ex' changes as these are merely
cosmetic.

>   		if (hc->fast) {
>   			flush.address_space = hc->ingpa;
>   			flush.flags = hc->outgpa;
> @@ -1859,17 +1878,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>   		if (!hc->var_cnt)
>   			goto ret_success;
>   
> -		if (hc->fast) {
> -			if (hc->var_cnt > HV_HYPERCALL_MAX_XMM_REGISTERS - 1)
> -				return HV_STATUS_INVALID_HYPERCALL_INPUT;
> -			for (i = 0; i < hc->var_cnt; i += 2) {
> -				sparse_banks[i] = sse128_lo(hc->xmm[i / 2 + 1]);
> -				sparse_banks[i + 1] = sse128_hi(hc->xmm[i / 2 + 1]);
> -			}
> -			goto do_flush;
> -		}
> -
> -		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks,
> +		if (kvm_get_sparse_vp_set(kvm, hc, 2, sparse_banks,
>   					  offsetof(struct hv_tlb_flush_ex,
>   						   hv_vp_set.bank_contents)))

I like your idea to put 'consumed_xmm_halves' into
kvm_get_sparse_vp_set() as kvm_hv_flush_tlb is getting too big.

>   			return HV_STATUS_INVALID_HYPERCALL_INPUT;
> @@ -1913,7 +1922,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
>   	}
>   }
>   
> -static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
> +static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>   {
>   	struct kvm *kvm = vcpu->kvm;
>   	struct hv_send_ipi_ex send_ipi_ex;
> @@ -1924,7 +1933,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>   	u32 vector;
>   	bool all_cpus;
>   
> -	if (!ex) {
> +	if (hc->code == HVCALL_SEND_IPI) {
>   		if (!hc->fast) {
>   			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
>   						    sizeof(send_ipi))))
> @@ -1943,9 +1952,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>   
>   		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
>   	} else {
> -		if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
> -					    sizeof(send_ipi_ex))))
> -			return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +		if (!hc->fast) {
> +			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
> +						    sizeof(send_ipi_ex))))
> +				return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +		} else {
> +			send_ipi_ex.vector = (u32)hc->ingpa;
> +			send_ipi_ex.vp_set.format = hc->outgpa;
> +			send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]);
> +		}
>   
>   		trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
>   					 send_ipi_ex.vp_set.format,
> @@ -1964,7 +1979,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>   		if (!hc->var_cnt)
>   			goto ret_success;
>   
> -		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks,
> +		if (kvm_get_sparse_vp_set(kvm, hc, 1, sparse_banks,
>   					  offsetof(struct hv_send_ipi_ex,
>   						   vp_set.bank_contents)))
>   			return HV_STATUS_INVALID_HYPERCALL_INPUT;
> @@ -2126,6 +2141,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
>   	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
>   	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
>   	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
> +	case HVCALL_SEND_IPI_EX:
>   		return true;
>   	}
>   
> @@ -2283,46 +2299,43 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>   				kvm_hv_hypercall_complete_userspace;
>   		return 0;
>   	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
> -		if (unlikely(!hc.rep_cnt || hc.rep_idx || hc.var_cnt)) {
> +		if (unlikely(hc.var_cnt)) {
>   			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
>   			break;
>   		}
> -		ret = kvm_hv_flush_tlb(vcpu, &hc, false);
> -		break;
> -	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
> -		if (unlikely(hc.rep || hc.var_cnt)) {
> -			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> -			break;
> -		}
> -		ret = kvm_hv_flush_tlb(vcpu, &hc, false);
> -		break;
> +		fallthrough;
>   	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
>   		if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
>   			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
>   			break;
>   		}
> -		ret = kvm_hv_flush_tlb(vcpu, &hc, true);
> +		ret = kvm_hv_flush_tlb(vcpu, &hc);
>   		break;
> +	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
> +		if (unlikely(hc.var_cnt)) {
> +			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> +			break;
> +		}
> +		fallthrough;
>   	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
>   		if (unlikely(hc.rep)) {
>   			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
>   			break;
>   		}
> -		ret = kvm_hv_flush_tlb(vcpu, &hc, true);
> +		ret = kvm_hv_flush_tlb(vcpu, &hc);
>   		break;
>   	case HVCALL_SEND_IPI:
> -		if (unlikely(hc.rep || hc.var_cnt)) {
> +		if (unlikely(hc.var_cnt)) {
>   			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
>   			break;
>   		}
> -		ret = kvm_hv_send_ipi(vcpu, &hc, false);
> -		break;
> +		fallthrough;
>   	case HVCALL_SEND_IPI_EX:
> -		if (unlikely(hc.fast || hc.rep)) {
> +		if (unlikely(hc.rep)) {
>   			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
>   			break;
>   		}
> -		ret = kvm_hv_send_ipi(vcpu, &hc, true);
> +		ret = kvm_hv_send_ipi(vcpu, &hc);
>   		break;
>   	case HVCALL_POST_DEBUG_DATA:
>   	case HVCALL_RETRIEVE_DEBUG_DATA:

I've smoke tested this (with the change I've mentioned above) and WS2019
booted with 65 vCPUs. This is a good sign)

>
>
> The resulting merge commit is already in kvm/queue shortly (which should
> become the next kvm/next as soon as tests complete).
>

I see, please swap sse128_lo()/sse128_hi() there too :-)

-- 
Vitaly

next prev parent reply	other threads:[~2022-02-25 13:14 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-22 15:46 [PATCH 0/4] KVM: x86: hyper-v: XMM fast hypercalls fixes Vitaly Kuznetsov
2022-02-22 15:46 ` [PATCH 1/4] KVM: x86: hyper-v: Drop redundant 'ex' parameter from kvm_hv_send_ipi() Vitaly Kuznetsov
2022-02-25 18:21   ` Maxim Levitsky
2022-02-28  9:58     ` Vitaly Kuznetsov
2022-02-28 10:45   ` Siddharth Chandrasekaran
2022-02-22 15:46 ` [PATCH 2/4] KVM: x86: hyper-v: Drop redundant 'ex' parameter from kvm_hv_flush_tlb() Vitaly Kuznetsov
2022-02-25 18:22   ` Maxim Levitsky
2022-02-28 10:46   ` Siddharth Chandrasekaran
2022-02-22 15:46 ` [PATCH 3/4] KVM: x86: hyper-v: Fix the maximum number of sparse banks for XMM fast TLB flush hypercalls Vitaly Kuznetsov
2022-02-28 10:47   ` Siddharth Chandrasekaran
2022-02-22 15:46 ` [PATCH 4/4] KVM: x86: hyper-v: HVCALL_SEND_IPI_EX is an XMM fast hypercall Vitaly Kuznetsov
2022-02-25 18:33   ` Maxim Levitsky
2022-02-28 10:02     ` Vitaly Kuznetsov
2022-02-28 10:48   ` Siddharth Chandrasekaran
2022-02-25 11:55 ` [PATCH 0/4] KVM: x86: hyper-v: XMM fast hypercalls fixes Paolo Bonzini
2022-02-25 13:13   ` Vitaly Kuznetsov [this message]
2022-02-25 13:17     ` Paolo Bonzini
2022-02-28 10:52       ` Siddharth Chandrasekaran
2022-02-28 11:09         ` Vitaly Kuznetsov
2022-02-28 16:18           ` Siddharth Chandrasekaran

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=871qzrdr6x.fsf@redhat.com \
    --to=vkuznets@redhat.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=sidcha@amazon.de \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.