From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:57095) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QvQem-0004We-3F for qemu-devel@nongnu.org; Mon, 22 Aug 2011 05:18:57 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1QvQek-0003HU-Ie for qemu-devel@nongnu.org; Mon, 22 Aug 2011 05:18:55 -0400 Received: from mx1.redhat.com ([209.132.183.28]:17728) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QvQek-0003HQ-8M for qemu-devel@nongnu.org; Mon, 22 Aug 2011 05:18:54 -0400 Message-ID: <4E521EF9.7020404@redhat.com> Date: Mon, 22 Aug 2011 12:18:49 +0300 From: Avi Kivity MIME-Version: 1.0 References: In-Reply-To: Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH] KVM: emulate lapic tsc deadline timer for hvm List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: "Liu, Jinsong" Cc: "qemu-devel@nongnu.org" , "kvm@vger.kernel.org" On 08/17/2011 07:19 AM, Liu, Jinsong wrote: > From a9670ddff84080c56183e2d678189e100f891174 Mon Sep 17 00:00:00 2001 > From: Liu, Jinsong > Date: Wed, 17 Aug 2011 11:36:28 +0800 > Subject: [PATCH] KVM: emulate lapic tsc deadline timer for hvm kvm doesn't have hvm. > This patch emulate lapic tsc deadline timer for hvm: > Enumerate tsc deadline timer capacibility by CPUID; > Enable tsc deadline timer mode by LAPIC MMIO; > Start tsc deadline timer by MSR; > diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h > index 4258aac..28bcf48 100644 > --- a/arch/x86/include/asm/cpufeature.h > +++ b/arch/x86/include/asm/cpufeature.h > @@ -120,6 +120,7 @@ > #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ > #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ > #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ > +#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ > #define X86_FEATURE_AES (4*32+25) /* AES instructions */ > #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ > #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 307e3cf..28f7128 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -635,6 +635,7 @@ struct kvm_x86_ops { > int (*check_intercept)(struct kvm_vcpu *vcpu, > struct x86_instruction_info *info, > enum x86_intercept_stage stage); > + u64 (*guest_to_host_tsc)(u64 guest_tsc); > }; Please put this near the other tsc functions. Add a comment explaining what value is returned under nested virtualization. Please add the svm callback implementation. > > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -229,6 +229,8 @@ > #define MSR_IA32_APICBASE_ENABLE (1<<11) > #define MSR_IA32_APICBASE_BASE (0xfffff<<12) > > +#define MSR_IA32_TSCDEADLINE 0x000006e0 > + > #define MSR_IA32_UCODE_WRITE 0x00000079 > #define MSR_IA32_UCODE_REV 0x0000008b > Need to add to msrs_to_save so live migration works. > > @@ -665,28 +682,30 @@ static void update_divide_count(struct kvm_lapic *apic) > static void start_apic_timer(struct kvm_lapic *apic) > { > ktime_t now = apic->lapic_timer.timer.base->get_time(); > - > - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * > - APIC_BUS_CYCLE_NS * apic->divide_count; > atomic_set(&apic->lapic_timer.pending, 0); > > - if (!apic->lapic_timer.period) > - return; > - /* > - * Do not allow the guest to program periodic timers with small > - * interval, since the hrtimers are not throttled by the host > - * scheduler. > - */ > - if (apic_lvtt_period(apic)) { > - if (apic->lapic_timer.period< NSEC_PER_MSEC/2) > - apic->lapic_timer.period = NSEC_PER_MSEC/2; > - } > + /* lapic timer in oneshot or peroidic mode */ > + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { > + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) > + * APIC_BUS_CYCLE_NS * apic->divide_count; > > - hrtimer_start(&apic->lapic_timer.timer, > - ktime_add_ns(now, apic->lapic_timer.period), > - HRTIMER_MODE_ABS); > + if (!apic->lapic_timer.period) > + return; > + /* > + * Do not allow the guest to program periodic timers with small > + * interval, since the hrtimers are not throttled by the host > + * scheduler. > + */ > + if (apic_lvtt_period(apic)) { > + if (apic->lapic_timer.period< NSEC_PER_MSEC/2) > + apic->lapic_timer.period = NSEC_PER_MSEC/2; > + } > > - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > + hrtimer_start(&apic->lapic_timer.timer, > + ktime_add_ns(now, apic->lapic_timer.period), > + HRTIMER_MODE_ABS); > + > + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > PRIx64 ", " > "timer initial count 0x%x, period %lldns, " > "expire @ 0x%016" PRIx64 ".\n", __func__, > @@ -695,6 +714,26 @@ static void start_apic_timer(struct kvm_lapic *apic) > apic->lapic_timer.period, > ktime_to_ns(ktime_add_ns(now, > apic->lapic_timer.period))); > + } > + > + /* lapic timer in tsc deadline mode */ > + if (apic_lvtt_tscdeadline(apic)) { 'else if' is slightly better, since it shows the reader the options are mutually exclusive. > + u64 tsc_now, tsc_target, tsc_delta, nsec; > + > + if (!apic->lapic_timer.tscdeadline) > + return; > + > + tsc_target = kvm_x86_ops-> > + guest_to_host_tsc(apic->lapic_timer.tscdeadline); > + rdtscll(tsc_now); > + tsc_delta = tsc_target - tsc_now; This only works if we have a constant tsc, that's not true for large multiboard machines. Need to do this with irqs disabled as well (reading both 'now' and 'tsc_now' in the same critical section). > + if (tsc_delta< 0) > + tsc_delta = 0; > + > + nsec = tsc_delta * 1000000L / tsc_khz; > + hrtimer_start(&apic->lapic_timer.timer, > + ktime_add_ns(now, nsec), HRTIMER_MODE_ABS); > + } > } > > @@ -883,6 +936,28 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) > *---------------------------------------------------------------------- > */ > > +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) > +{ > + struct kvm_lapic *apic = vcpu->arch.apic; > + > + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) > + return 0; Why? > + > + return apic->lapic_timer.tscdeadline; > +} > + > +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) > +{ > + struct kvm_lapic *apic = vcpu->arch.apic; > + > + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) > + return; > + > + hrtimer_cancel(&apic->lapic_timer.timer); > + apic->lapic_timer.tscdeadline = data; > + start_apic_timer(apic); Shouldn't the msr value be updated even if we're outside tsc-deadline mode? > +} > + > /* > * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ > * ioctl. In this case the call-back should update internal vmx state to make > @@ -6270,6 +6278,16 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > } > } > } > + > + /* > + * Emulate Intel lapic tsc deadline timer even if host not support it. > + * Open CPUID.1.ECX[24] and use bit17/18 as timer mode mask. > + */ > + best = kvm_find_cpuid_entry(vcpu, 1, 0); > + if (best) { > + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); > + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3<< 17); > + } > } Should be in common code; there is nothing vmx specific about it (although it is Intel specific at present). -- error compiling committee.c: too many arguments to function