From: Andrey Smetanin <asmetanin@virtuozzo.com>
To: Peter Hornyack <peterhornyack@google.com>
Cc: kvm list <kvm@vger.kernel.org>, Gleb Natapov <gleb@kernel.org>,
qemu-devel@nongnu.org, Mike Waychison <mikew@google.com>,
Roman Kagan <rkagan@virtuozzo.com>,
"Denis V. Lunev" <den@openvz.org>,
Paolo Bonzini <pbonzini@redhat.com>, Feng Min <fmin@google.com>
Subject: Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
Date: Wed, 6 Jan 2016 12:22:08 +0300 [thread overview]
Message-ID: <568CDCC0.7050609@virtuozzo.com> (raw)
In-Reply-To: <CA+0KQ4N=JMAM-q0wpvRr1w+r42DhxagayhL=JGwJKgB3kf4RNg@mail.gmail.com>
On 01/06/2016 12:48 AM, Peter Hornyack wrote:
> On Thu, Dec 24, 2015 at 1:33 AM, Andrey Smetanin
> <asmetanin@virtuozzo.com> wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
> Reviewed-by: Peter Hornyack <peterhornyack@google.com>
>
>>
>> ---
>> arch/x86/kvm/hyperv.c | 117 +++++++++++++++++++++++++++++++++++++++++------
>> arch/x86/kvm/hyperv.h | 2 +
>> arch/x86/kvm/x86.c | 12 +++++
>> include/linux/kvm_host.h | 1 +
>> 4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>> return 0;
>> }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> + /*
>> + * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> + * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> + * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> + * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> + * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> + */
>> + return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> + PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> + if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> + tsc_ref, sizeof(*tsc_ref)))
>> + return 1;
>> + mark_page_dirty(kvm, gfn);
>> + return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> + PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> + if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> + tsc_ref, sizeof(*tsc_ref)))
>> + return 1;
>> + return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> + PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> + u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> + return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> + + tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> + HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> + memset(&tsc_ref, 0, sizeof(tsc_ref));
>> + return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> + struct kvm *kvm = vcpu->kvm;
>> + struct kvm_hv *hv = &kvm->arch.hyperv;
>> + HV_REFERENCE_TSC_PAGE tsc_ref;
>> + u32 tsc_khz;
>> + int r;
>> + u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> + if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> + return -EINVAL;
>> +
>> + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> + vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> + tsc_khz = vcpu->arch.virtual_tsc_khz;
>> + if (!tsc_khz) {
>> + vcpu_unimpl(vcpu, "no tsc khz\n");
>> + return setup_blank_tsc_page(vcpu, gfn);
>> + }
>> +
>> + r = read_tsc_page(kvm, gfn, &tsc_ref);
>> + if (r) {
>> + vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> + return r;
>> + }
>> +
>> + tsc_scale = calc_tsc_page_scale(tsc_khz);
>> + ref_time = get_time_ref_counter(kvm);
>> + tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> + /* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> + tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> + vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> + tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> + tsc_ref.tsc_sequence++;
>> + if (tsc_ref.tsc_sequence == 0)
>
> Also avoid tsc_sequence == 0xffffffff here. In the Hyper-V TLFS 4.0
> (Win2012 R2) 0xffffffff is the special sequence number to disable the
> reference TSC page.
>
we already discussed with Microsoft
that documentation contains wrong sequence number
- 0xffffffff (instead of 0). please take a look into details here:
https://lkml.org/lkml/2015/11/2/655
>> + tsc_ref.tsc_sequence = 1;
>> +
>> + tsc_ref.tsc_scale = tsc_scale;
>> + tsc_ref.tsc_offset = tsc_offset;
>> +
>> + vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> + calc_tsc_page_time(vcpu, &tsc_ref),
>> + get_time_ref_counter(kvm));
>> +
>> + return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>> static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>> bool host)
>> {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>> mark_page_dirty(kvm, gfn);
>> break;
>> }
>> - case HV_X64_MSR_REFERENCE_TSC: {
>> - u64 gfn;
>> - HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> - memset(&tsc_ref, 0, sizeof(tsc_ref));
>> + case HV_X64_MSR_REFERENCE_TSC:
>> hv->hv_tsc_page = data;
>> - if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> - break;
>> - gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> - if (kvm_write_guest(
>> - kvm,
>> - gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> - &tsc_ref, sizeof(tsc_ref)))
>> - return 1;
>> - mark_page_dirty(kvm, gfn);
>> + if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> + kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>> break;
>> - }
>> case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>> return kvm_hv_msr_set_crash_data(vcpu,
>> msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>> void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>> #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>> */
>> if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>> kvm_hv_process_stimers(vcpu);
>> +
>> + /*
>> + * KVM_REQ_HV_TSC_PAGE setup should be after
>> + * KVM_REQ_CLOCK_UPDATE processing because
>> + * Hyper-V tsc page content depends
>> + * on kvm->arch.kvmclock_offset value.
>> + */
>> + if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> + r = kvm_hv_setup_tsc_page(vcpu);
>> + if (unlikely(r))
>> + goto out;
>> + }
>> }
>>
>> /*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>> #define KVM_REQ_HV_RESET 29
>> #define KVM_REQ_HV_EXIT 30
>> #define KVM_REQ_HV_STIMER 31
>> +#define KVM_REQ_HV_TSC_PAGE 32
>>
>> #define KVM_REQ_MAX 64
>>
>> --
>> 2.4.3
>
> Looks good, nice helpful comments.
Thank you for review.
>
> I will note that HV_X64_MSR_REFERENCE_TSC is a good candidate to be
> handled in user space rather than in kvm. With my proposed MSR exits
> patches that have had some discussion recently, this code to implement
> the reference TSC page could be put in qemu instead of kvm.
Yes, this functionality can be implemented in QEMU when MSR exits
API appears inside KVM code.
>
> Peter
>
next prev parent reply other threads:[~2016-01-06 9:42 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-24 9:33 [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup Andrey Smetanin
2016-01-05 21:48 ` Peter Hornyack
2016-01-06 9:22 ` Andrey Smetanin [this message]
2016-01-12 7:43 ` Andrey Smetanin
2016-01-19 7:48 ` Denis V. Lunev
2016-01-20 14:05 ` Paolo Bonzini
2016-01-20 14:41 ` Andrey Smetanin
2016-01-20 14:44 ` Denis V. Lunev
2016-01-20 14:52 ` Roman Kagan
2016-01-20 14:54 ` Denis V. Lunev
2016-01-20 21:10 ` Paolo Bonzini
2016-01-22 10:08 ` Paolo Bonzini
2016-01-22 10:15 ` Andrey Smetanin
2016-01-22 11:02 ` Paolo Bonzini
2016-01-22 11:11 ` Andrey Smetanin
2016-01-22 11:31 ` Andrey Smetanin
2016-01-22 11:53 ` Paolo Bonzini
2016-01-22 11:59 ` Andrey Smetanin
2016-01-22 13:13 ` Andrey Smetanin
2016-01-22 13:21 ` Paolo Bonzini
2016-01-22 13:34 ` Andrey Smetanin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=568CDCC0.7050609@virtuozzo.com \
--to=asmetanin@virtuozzo.com \
--cc=den@openvz.org \
--cc=fmin@google.com \
--cc=gleb@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=mikew@google.com \
--cc=pbonzini@redhat.com \
--cc=peterhornyack@google.com \
--cc=qemu-devel@nongnu.org \
--cc=rkagan@virtuozzo.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).