qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: "Liu, Jinsong" <jinsong.liu@intel.com>
Cc: "qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Subject: Re: [Qemu-devel] [PATCH] KVM: emulate lapic tsc deadline timer for hvm
Date: Mon, 22 Aug 2011 12:18:49 +0300	[thread overview]
Message-ID: <4E521EF9.7020404@redhat.com> (raw)
In-Reply-To: <BC00F5384FCFC9499AF06F92E8B78A9E24888339F4@shsmsx502.ccr.corp.intel.com>

On 08/17/2011 07:19 AM, Liu, Jinsong wrote:
>  From a9670ddff84080c56183e2d678189e100f891174 Mon Sep 17 00:00:00 2001
> From: Liu, Jinsong<jinsong.liu@intel.com>
> Date: Wed, 17 Aug 2011 11:36:28 +0800
> Subject: [PATCH] KVM: emulate lapic tsc deadline timer for hvm

kvm doesn't have hvm.

> This patch emulate lapic tsc deadline timer for hvm:
> Enumerate tsc deadline timer capacibility by CPUID;
> Enable tsc deadline timer mode by LAPIC MMIO;
> Start tsc deadline timer by MSR;

> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index 4258aac..28bcf48 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -120,6 +120,7 @@
>   #define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
>   #define X86_FEATURE_MOVBE	(4*32+22) /* MOVBE instruction */
>   #define X86_FEATURE_POPCNT      (4*32+23) /* POPCNT instruction */
> +#define X86_FEATURE_TSC_DEADLINE_TIMER    (4*32+24) /* Tsc deadline timer */
>   #define X86_FEATURE_AES		(4*32+25) /* AES instructions */
>   #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
>   #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 307e3cf..28f7128 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -635,6 +635,7 @@ struct kvm_x86_ops {
>   	int (*check_intercept)(struct kvm_vcpu *vcpu,
>   			       struct x86_instruction_info *info,
>   			       enum x86_intercept_stage stage);
> +	u64 (*guest_to_host_tsc)(u64 guest_tsc);
>   };

Please put this near the other tsc functions.  Add a comment explaining 
what value is returned under nested virtualization.

Please add the svm callback implementation.

>
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -229,6 +229,8 @@
>   #define MSR_IA32_APICBASE_ENABLE	(1<<11)
>   #define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
>
> +#define MSR_IA32_TSCDEADLINE		0x000006e0
> +
>   #define MSR_IA32_UCODE_WRITE		0x00000079
>   #define MSR_IA32_UCODE_REV		0x0000008b
>   

Need to add to msrs_to_save so live migration works.

>
> @@ -665,28 +682,30 @@ static void update_divide_count(struct kvm_lapic *apic)
>   static void start_apic_timer(struct kvm_lapic *apic)
>   {
>   	ktime_t now = apic->lapic_timer.timer.base->get_time();
> -
> -	apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
> -		    APIC_BUS_CYCLE_NS * apic->divide_count;
>   	atomic_set(&apic->lapic_timer.pending, 0);
>
> -	if (!apic->lapic_timer.period)
> -		return;
> -	/*
> -	 * Do not allow the guest to program periodic timers with small
> -	 * interval, since the hrtimers are not throttled by the host
> -	 * scheduler.
> -	 */
> -	if (apic_lvtt_period(apic)) {
> -		if (apic->lapic_timer.period<  NSEC_PER_MSEC/2)
> -			apic->lapic_timer.period = NSEC_PER_MSEC/2;
> -	}
> +	/* lapic timer in oneshot or peroidic mode */
> +	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
> +		apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
> +			    * APIC_BUS_CYCLE_NS * apic->divide_count;
>
> -	hrtimer_start(&apic->lapic_timer.timer,
> -		      ktime_add_ns(now, apic->lapic_timer.period),
> -		      HRTIMER_MODE_ABS);
> +		if (!apic->lapic_timer.period)
> +			return;
> +		/*
> +		 * Do not allow the guest to program periodic timers with small
> +		 * interval, since the hrtimers are not throttled by the host
> +		 * scheduler.
> +		 */
> +		if (apic_lvtt_period(apic)) {
> +			if (apic->lapic_timer.period<  NSEC_PER_MSEC/2)
> +				apic->lapic_timer.period = NSEC_PER_MSEC/2;
> +		}
>
> -	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
> +		hrtimer_start(&apic->lapic_timer.timer,
> +			      ktime_add_ns(now, apic->lapic_timer.period),
> +			      HRTIMER_MODE_ABS);
> +
> +		apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
>   			   PRIx64 ", "
>   			   "timer initial count 0x%x, period %lldns, "
>   			   "expire @ 0x%016" PRIx64 ".\n", __func__,
> @@ -695,6 +714,26 @@ static void start_apic_timer(struct kvm_lapic *apic)
>   			   apic->lapic_timer.period,
>   			   ktime_to_ns(ktime_add_ns(now,
>   					apic->lapic_timer.period)));
> +	}
> +
> +	/* lapic timer in tsc deadline mode */
> +	if (apic_lvtt_tscdeadline(apic)) {

'else if' is slightly better, since it shows the reader the options are 
mutually exclusive.

> +		u64 tsc_now, tsc_target, tsc_delta, nsec;
> +
> +		if (!apic->lapic_timer.tscdeadline)
> +			return;
> +
> +		tsc_target = kvm_x86_ops->
> +			guest_to_host_tsc(apic->lapic_timer.tscdeadline);
> +		rdtscll(tsc_now);
> +		tsc_delta = tsc_target - tsc_now;

This only works if we have a constant tsc, that's not true for large 
multiboard machines.  Need to do this with irqs disabled as well 
(reading both 'now' and 'tsc_now' in the same critical section).

> +		if (tsc_delta<  0)
> +			tsc_delta = 0;
> +
> +		nsec = tsc_delta * 1000000L / tsc_khz;
> +		hrtimer_start(&apic->lapic_timer.timer,
> +			ktime_add_ns(now, nsec), HRTIMER_MODE_ABS);
> +	}
>   }
>
> @@ -883,6 +936,28 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
>    *----------------------------------------------------------------------
>    */
>
> +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
> +		return 0;

Why?

> +
> +	return apic->lapic_timer.tscdeadline;
> +}
> +
> +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
> +		return;
> +
> +	hrtimer_cancel(&apic->lapic_timer.timer);
> +	apic->lapic_timer.tscdeadline = data;
> +	start_apic_timer(apic);

Shouldn't the msr value be updated even if we're outside tsc-deadline mode?

> +}
> +


>   /*
>    * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
>    * ioctl. In this case the call-back should update internal vmx state to make
> @@ -6270,6 +6278,16 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>   			}
>   		}
>   	}
> +
> +	/*
> +	 * Emulate Intel lapic tsc deadline timer even if host not support it.
> +	 * Open CPUID.1.ECX[24] and use bit17/18 as timer mode mask.
> +	 */
> +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
> +	if (best) {
> +		best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
> +		vcpu->arch.apic->lapic_timer.timer_mode_mask = (3<<  17);
> +	}
>   }

Should be in common code; there is nothing vmx specific about it 
(although it is Intel specific at present).


-- 
error compiling committee.c: too many arguments to function

  reply	other threads:[~2011-08-22  9:18 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-08-17  4:19 [Qemu-devel] [PATCH] KVM: emulate lapic tsc deadline timer for hvm Liu, Jinsong
2011-08-22  9:18 ` Avi Kivity [this message]
2011-08-23 10:47   ` Marcelo Tosatti
2011-08-29  6:47     ` Tian, Kevin
2011-09-06 11:21     ` Liu, Jinsong
2011-09-08 17:12     ` Liu, Jinsong
2011-09-09 12:56       ` Marcelo Tosatti
2011-09-09 18:11         ` Liu, Jinsong
2011-09-09 18:36           ` Marcelo Tosatti
2011-09-09 18:47             ` Liu, Jinsong
2011-09-09 18:21         ` Liu, Jinsong
2011-09-06 11:18   ` Liu, Jinsong
2011-09-06 11:26     ` Avi Kivity
2011-09-07 16:45   ` Liu, Jinsong
2011-09-07 17:06     ` Avi Kivity
2011-09-07 17:33       ` Liu, Jinsong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E521EF9.7020404@redhat.com \
    --to=avi@redhat.com \
    --cc=jinsong.liu@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).