public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v3 0/2]  Hyper-V timers
@ 2013-12-08 11:33 Vadim Rozenfeld
  2013-12-08 11:33 ` [RFC PATCH v3 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
  2013-12-08 11:33 ` [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment Vadim Rozenfeld
  0 siblings, 2 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2013-12-08 11:33 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, pl, pbonzini, Vadim Rozenfeld

This RFC series adds support for two Hyper-V timer services -
a per-partition reference time counter, and a  partition reference
time enlightenment.

Vadim Rozenfeld (2):
  add support for Hyper-V reference time counter
  add support for Hyper-V partition reference time enlightenment

 arch/x86/include/asm/kvm_host.h    |  3 ++
 arch/x86/include/uapi/asm/hyperv.h | 13 ++++++++
 arch/x86/kvm/x86.c                 | 64 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h           |  1 +
 4 files changed, 80 insertions(+), 1 deletion(-)

-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-08 11:33 [RFC PATCH v3 0/2] Hyper-V timers Vadim Rozenfeld
@ 2013-12-08 11:33 ` Vadim Rozenfeld
  2013-12-09 14:23   ` Paolo Bonzini
  2013-12-11 18:53   ` Marcelo Tosatti
  2013-12-08 11:33 ` [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment Vadim Rozenfeld
  1 sibling, 2 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2013-12-08 11:33 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, pl, pbonzini, Vadim Rozenfeld

Signed-off: Peter Lieven <pl@dlh.net>
Signed-off: Gleb Natapov <gleb@redhat.com>
Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>

v1 -> v2
1. mark TSC page dirty as suggested by 
    Eric Northup <digitaleric@google.com> and Gleb
2. disable local irq when calling get_kernel_ns, 
    as it was done by Peter Lieven <pl@dlhnet.de>
3. move check for TSC page enable from second patch
    to this one.

---
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
 arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h           |  1 +
 4 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d783..2fd0753 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,8 @@ struct kvm_arch {
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
+	u64 hv_ref_count;
+	u64 hv_tsc_page;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c01..462efe7 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
 
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+
 /*
  * There is a single feature flag that signifies the presence of the MSR
  * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
+#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
+
 #define HV_PROCESSOR_POWER_STATE_C0		0
 #define HV_PROCESSOR_POWER_STATE_C1		1
 #define HV_PROCESSOR_POWER_STATE_C2		2
@@ -210,4 +216,11 @@
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+typedef struct _HV_REFERENCE_TSC_PAGE {
+	__u32 tsc_sequence;
+	__u32 res1;
+	__u64 tsc_scale;
+	__s64 tsc_offset;
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21ef1ba..5e4e495a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
 		r = true;
 		break;
 	}
@@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (__copy_to_user((void __user *)addr, instructions, 4))
 			return 1;
 		kvm->arch.hv_hypercall = data;
+		local_irq_disable();
+		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		unsigned long addr;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+		tsc_ref.tsc_sequence = 0;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
+			kvm->arch.hv_tsc_page = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, data >>
+			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		kvm->arch.hv_tsc_page = data;
 		break;
 	}
 	default:
@@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 		data = kvm->arch.hv_hypercall;
 		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		u64 now_ns;
+		local_irq_disable();
+		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
+		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = kvm->arch.hv_tsc_page;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
 #endif
+	case KVM_CAP_HYPERV_TIME:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 902f124..686c1ca 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_SPAPR_MULTITCE 94
 #define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_HYPERV_TIME 96
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-08 11:33 [RFC PATCH v3 0/2] Hyper-V timers Vadim Rozenfeld
  2013-12-08 11:33 ` [RFC PATCH v3 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
@ 2013-12-08 11:33 ` Vadim Rozenfeld
  2013-12-09 14:32   ` Paolo Bonzini
  2013-12-11 19:27   ` Marcelo Tosatti
  1 sibling, 2 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2013-12-08 11:33 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, pl, pbonzini, Vadim Rozenfeld

The following patch allows to activate a partition reference
time enlightenment that is based on the host platform's support
for an Invariant Time Stamp Counter (iTSC).

v2 -> v3
Handle TSC sequence, scale, and offest changing during migration.

---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 29 +++++++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2fd0753..81fdff0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -607,6 +607,7 @@ struct kvm_arch {
 	u64 hv_hypercall;
 	u64 hv_ref_count;
 	u64 hv_tsc_page;
+	u64 hv_ref_time;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5e4e495a..cb6766a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 			break;
 		}
 		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		addr = gfn_to_hva(kvm, data >>
-			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+		addr = gfn_to_hva(kvm, gfn);
 		if (kvm_is_error_hva(addr))
 			return 1;
+		tsc_ref.tsc_sequence =
+			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
+		tsc_ref.tsc_scale =
+			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
+		tsc_ref.tsc_offset = 0;
 		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
 			return 1;
 		mark_page_dirty(kvm, gfn);
 		kvm->arch.hv_tsc_page = data;
+		kvm->arch.hv_ref_count = 0;
 		break;
 	}
 	default:
@@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		local_irq_enable();
 		kvm->arch.kvmclock_offset = delta;
 		kvm_gen_update_masterclock(kvm);
+
+		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+			HV_REFERENCE_TSC_PAGE* tsc_ref;
+			u64 curr_time;
+			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
+				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+			tsc_ref->tsc_sequence =
+				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
+			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
+			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
+				tsc_ref->tsc_offset;
+			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
+		}
 		break;
 	}
 	case KVM_GET_CLOCK: {
@@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
 			goto out;
 		r = 0;
+		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+			HV_REFERENCE_TSC_PAGE* tsc_ref;
+			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
+				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+			kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * 
+				native_read_tsc()) >> 32) + tsc_ref->tsc_offset;
+		}
 		break;
 	}
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-08 11:33 ` [RFC PATCH v3 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
@ 2013-12-09 14:23   ` Paolo Bonzini
  2013-12-10 10:46     ` Vadim Rozenfeld
  2013-12-11 18:53   ` Marcelo Tosatti
  1 sibling, 1 reply; 37+ messages in thread
From: Paolo Bonzini @ 2013-12-09 14:23 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, mtosatti, pl

Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
> 
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ae5d783..2fd0753 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -605,6 +605,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b8f1c01..462efe7 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -28,6 +28,9 @@
>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /*
>   * There is a single feature flag that signifies the presence of the MSR
>   * that can be used to retrieve both the local APIC Timer frequency as
> @@ -198,6 +201,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -210,4 +216,11 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 tsc_sequence;
> +	__u32 res1;
> +	__u64 tsc_scale;
> +	__s64 tsc_offset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 21ef1ba..5e4e495a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,

You need to bump KVM_SAVE_MSRS_BEGIN.

>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		local_irq_enable();

Please add a patch that moves these four lines from KVM_GET_CLOCK and
KVM_SET_CLOCK

                local_irq_disable();
                now_ns = get_kernel_ns();
                delta = user_ns.clock - now_ns;
                local_irq_enable();
                kvm->arch.kvmclock_offset = delta;
                kvm_gen_update_masterclock(kvm);

                local_irq_disable();
                now_ns = get_kernel_ns();
                user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
                local_irq_enable();

For example u64 kvm_get_clock_ns(struct kvm *) and void
kvm_set_clock_ns(struct kvm *, u64).  You can then use the
kvm_get_clock_ns function in this patch.

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.tsc_sequence = 0;

Please zero it with memset.  You're leaking values from the stack to the
guest.

> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();

Another possible user of kvm_get_clock_ns.

The patch should be good with these changes.

Paolo

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;
>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
>  #endif
> +	case KVM_CAP_HYPERV_TIME:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 902f124..686c1ca 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_ARM_EL1_32BIT 93
>  #define KVM_CAP_SPAPR_MULTITCE 94
>  #define KVM_CAP_EXT_EMUL_CPUID 95
> +#define KVM_CAP_HYPERV_TIME 96
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-08 11:33 ` [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment Vadim Rozenfeld
@ 2013-12-09 14:32   ` Paolo Bonzini
  2013-12-10 11:23     ` Vadim Rozenfeld
  2013-12-11 19:27   ` Marcelo Tosatti
  1 sibling, 1 reply; 37+ messages in thread
From: Paolo Bonzini @ 2013-12-09 14:32 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, mtosatti, pl

Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> +		tsc_ref.tsc_sequence =
> +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> +		tsc_ref.tsc_scale =
> +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.tsc_offset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
>  		kvm->arch.hv_tsc_page = data;
> +		kvm->arch.hv_ref_count = 0;
>  		break;
>  	}
>  	default:
> @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		local_irq_enable();
>  		kvm->arch.kvmclock_offset = delta;
>  		kvm_gen_update_masterclock(kvm);
> +
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> +			u64 curr_time;
> +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +			tsc_ref->tsc_sequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;

Why shouldn't this be vcpu->arch.virtual_tsc_khz?

> +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> +				tsc_ref->tsc_offset;
> +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> +		}

The difference in setting tsc_ref->tsc_scale is the only important
change between the two occurrences.  If you can avoid that difference
and you move this to a separate function, you can reuse that new
function in set_msr_hyperv_pw as well.

Also, kvm_set_tsc_khz should recompute the reference page's values as
well, so you'd have three uses.

Paolo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-09 14:23   ` Paolo Bonzini
@ 2013-12-10 10:46     ` Vadim Rozenfeld
  0 siblings, 0 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2013-12-10 10:46 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, mtosatti, pl

On Mon, 2013-12-09 at 15:23 +0100, Paolo Bonzini wrote:
> Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 54 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..2fd0753 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 21ef1ba..5e4e495a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> 
> You need to bump KVM_SAVE_MSRS_BEGIN.
> 
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		local_irq_enable();
> 
> Please add a patch that moves these four lines from KVM_GET_CLOCK and
> KVM_SET_CLOCK
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 delta = user_ns.clock - now_ns;
>                 local_irq_enable();
>                 kvm->arch.kvmclock_offset = delta;
>                 kvm_gen_update_masterclock(kvm);
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
>                 local_irq_enable();
> 
> For example u64 kvm_get_clock_ns(struct kvm *) and void
> kvm_set_clock_ns(struct kvm *, u64).  You can then use the
> kvm_get_clock_ns function in this patch.
> 

OK.

> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC: {
> > +		u64 gfn;
> > +		unsigned long addr;
> > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > +		tsc_ref.tsc_sequence = 0;
> 
> Please zero it with memset.  You're leaking values from the stack to the
> guest.

I can do it, but it is probably not necessary, mostly because guest
allocates one page from nonpaged pool and uses it exclusively for
vTSC purpose only, and host only interested in three values, located at
the beginning of this page.    

> 
> > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> > +			kvm->arch.hv_tsc_page = data;
> > +			break;
> > +		}
> > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > +		addr = gfn_to_hva(kvm, data >>
> > +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +		if (kvm_is_error_hva(addr))
> > +			return 1;
> > +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> > +			return 1;
> > +		mark_page_dirty(kvm, gfn);
> > +		kvm->arch.hv_tsc_page = data;
> >  		break;
> >  	}
> >  	default:
> > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  	case HV_X64_MSR_HYPERCALL:
> >  		data = kvm->arch.hv_hypercall;
> >  		break;
> > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > +		u64 now_ns;
> > +		local_irq_disable();
> > +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> > +		local_irq_enable();
> 
> Another possible user of kvm_get_clock_ns.
> 
> The patch should be good with these changes.

Thanks,
Vadim.
> 
> Paolo
> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +		data = kvm->arch.hv_tsc_page;
> > +		break;
> >  	default:
> >  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> >  		return 1;
> > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >  	case KVM_CAP_PCI_2_3:
> >  #endif
> > +	case KVM_CAP_HYPERV_TIME:
> >  		r = 1;
> >  		break;
> >  	case KVM_CAP_COALESCED_MMIO:
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 902f124..686c1ca 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_ARM_EL1_32BIT 93
> >  #define KVM_CAP_SPAPR_MULTITCE 94
> >  #define KVM_CAP_EXT_EMUL_CPUID 95
> > +#define KVM_CAP_HYPERV_TIME 96
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > 
> 



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-09 14:32   ` Paolo Bonzini
@ 2013-12-10 11:23     ` Vadim Rozenfeld
  2013-12-10 16:52       ` Paolo Bonzini
  2013-12-11 19:28       ` Marcelo Tosatti
  0 siblings, 2 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2013-12-10 11:23 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, mtosatti, pl

On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote:
> Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> > +		tsc_ref.tsc_sequence =
> > +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > +		tsc_ref.tsc_scale =
> > +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> > +		tsc_ref.tsc_offset = 0;
> >  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> >  			return 1;
> >  		mark_page_dirty(kvm, gfn);
> >  		kvm->arch.hv_tsc_page = data;
> > +		kvm->arch.hv_ref_count = 0;
> >  		break;
> >  	}
> >  	default:
> > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >  		local_irq_enable();
> >  		kvm->arch.kvmclock_offset = delta;
> >  		kvm_gen_update_masterclock(kvm);
> > +
> > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> > +			u64 curr_time;
> > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +			tsc_ref->tsc_sequence =
> > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> > +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> 
> Why shouldn't this be vcpu->arch.virtual_tsc_khz?
Yeah, I was thinking about that, but we need a vcpu instance for this.

> 
> > +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> > +				tsc_ref->tsc_offset;
> > +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> > +		}
> 
> The difference in setting tsc_ref->tsc_scale is the only important
> change between the two occurrences.  If you can avoid that difference
> and you move this to a separate function, you can reuse that new
> function in set_msr_hyperv_pw as well.

Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
partition creation time and KVM_SET_CLOCK which happens during resume 
after partition pause? If so - there are several differences, where
the offset calculation probably is the most important one.

Vadim.

> 
> Also, kvm_set_tsc_khz should recompute the reference page's values as
> well, so you'd have three uses.
> 
> Paolo



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-10 11:23     ` Vadim Rozenfeld
@ 2013-12-10 16:52       ` Paolo Bonzini
  2013-12-11 10:58         ` Vadim Rozenfeld
  2013-12-11 19:28       ` Marcelo Tosatti
  1 sibling, 1 reply; 37+ messages in thread
From: Paolo Bonzini @ 2013-12-10 16:52 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, mtosatti, pl

Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto:
> > > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> > > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> > > +			u64 curr_time;
> > > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> > > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > > +			tsc_ref->tsc_sequence =
> > > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> > > +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> > 
> > Why shouldn't this be vcpu->arch.virtual_tsc_khz?
> 
> Yeah, I was thinking about that, but we need a vcpu instance for this.

You can perhaps store the value from vcpu->arch.virtual_tsc_khz to 
kvm->arch when the MSR is first written?

> Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
> partition creation time and KVM_SET_CLOCK which happens during resume 
> after partition pause? If so - there are several differences, where
> the offset calculation probably is the most important one.

The offset and frequence are the only differences.

+			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
+				tsc_ref->tsc_offset;
+			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;

Why do you need kvm->arch.hv_ref_time at all?  Can you just use
"get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count"?
Then the same code can set tsc_ref->tsc_offset in both cases.

In fact, it's not clear to me what hv_ref_time is for, and how it
is different from 

By the way, a small nit:

> 
> +		tsc_ref.tsc_sequence =
> +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> +		tsc_ref.tsc_scale =
> +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.tsc_offset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
>  		kvm->arch.hv_tsc_page = data;
> +		kvm->arch.hv_ref_count = 0;
>  		break;

This setting of kvm->arch.hv_ref_count belongs in the previous patch.

Paolo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-10 16:52       ` Paolo Bonzini
@ 2013-12-11 10:58         ` Vadim Rozenfeld
  2013-12-11 12:28           ` Paolo Bonzini
  0 siblings, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2013-12-11 10:58 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, mtosatti, pl

On Tue, 2013-12-10 at 17:52 +0100, Paolo Bonzini wrote:
> Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto:
> > > > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> > > > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> > > > +			u64 curr_time;
> > > > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> > > > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > > > +			tsc_ref->tsc_sequence =
> > > > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> > > > +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> > > 
> > > Why shouldn't this be vcpu->arch.virtual_tsc_khz?
> > 
> > Yeah, I was thinking about that, but we need a vcpu instance for this.
> 
> You can perhaps store the value from vcpu->arch.virtual_tsc_khz to 
> kvm->arch when the MSR is first written?
> 
> > Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
> > partition creation time and KVM_SET_CLOCK which happens during resume 
> > after partition pause? If so - there are several differences, where
> > the offset calculation probably is the most important one.
> 
> The offset and frequence are the only differences.
> 
> +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> +				tsc_ref->tsc_offset;
> +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> 
> Why do you need kvm->arch.hv_ref_time at all?  Can you just use
> "get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count"?
> Then the same code can set tsc_ref->tsc_offset in both cases.
> 
> In fact, it's not clear to me what hv_ref_time is for, and how it
> is different from 

OK, let me explain how it works.
Hyper-V allows guest to use invariant TSC provided by host as a time
stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and
normalizing it to 10MHz frequency, it is why we need "tsc_scale".
"tsc_offset" is needed for migration or pause/resume cycles.
When we pause a VM, we need to save the current vTSC value
("hv_ref_time"), which is rdtsc * tsc_scale + tsc_offset.
Then, during resume, we need to recalculate the new tsc_scale
as well as the new tsc_offset value. 
tsc_offset = old(saved) vTSC - new vTSC

So maybe hv_ref_time is not a good name, but we use it 
for keeping the old vTSC value, saved before stopping VM.

Vadim.

> 
> By the way, a small nit:
> 
> > 
> > +		tsc_ref.tsc_sequence =
> > +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > +		tsc_ref.tsc_scale =
> > +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> > +		tsc_ref.tsc_offset = 0;
> >  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> >  			return 1;
> >  		mark_page_dirty(kvm, gfn);
> >  		kvm->arch.hv_tsc_page = data;
> > +		kvm->arch.hv_ref_count = 0;
> >  		break;
> 
> This setting of kvm->arch.hv_ref_count belongs in the previous patch.
> 
> Paolo



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-11 10:58         ` Vadim Rozenfeld
@ 2013-12-11 12:28           ` Paolo Bonzini
  0 siblings, 0 replies; 37+ messages in thread
From: Paolo Bonzini @ 2013-12-11 12:28 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, mtosatti, pl

Il 11/12/2013 11:58, Vadim Rozenfeld ha scritto:
>> > +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
>> > +				tsc_ref->tsc_offset;
>> > +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
>> > 
>> > Why do you need kvm->arch.hv_ref_time at all?  Can you just use
>> > "get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count"?
>> > Then the same code can set tsc_ref->tsc_offset in both cases.
>> > 
>> > In fact, it's not clear to me what hv_ref_time is for, and how it
>> > is different from 
> OK, let me explain how it works.
> Hyper-V allows guest to use invariant TSC provided by host as a time
> stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and
> normalizing it to 10MHz frequency, it is why we need "tsc_scale".
> "tsc_offset" is needed for migration or pause/resume cycles.
> When we pause a VM, we need to save the current vTSC value
> ("hv_ref_time"), which is rdtsc * tsc_scale + tsc_offset.
> Then, during resume, we need to recalculate the new tsc_scale
> as well as the new tsc_offset value. 
> tsc_offset = old(saved) vTSC - new vTSC

In practice "save" means KVM_GET_CLOCK, and "restore" means
KVM_SET_CLOCK, right?

> So maybe hv_ref_time is not a good name, but we use it 
> for keeping the old vTSC value, saved before stopping VM.

Ok, this was roughly my understanding as well.

My understanding is also that (((tsc_ref->tsc_scale >> 32) *
native_read_tsc()) >> 32) + tsc_ref->tsc_offset returns exactly the same
value as HV_X64_MSR_TIME_REF_COUNT.  Thus we do not need
kvm->arch.hv_ref_time.  We can use the value of
HV_X64_MSR_TIME_REF_COUNT, which is "(get_kernel_ns() +
kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count) / 100", to compute
tsc_offset, like this:

  curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32);
  tsc_ref->tsc_offset = get_hv_x64_msr_time_ref_count() - curr_time;

This code can be applied always: when the TSC page is initialized and
when KVM_SET_CLOCK is called.  You do not need to do anything for
KVM_GET_CLOCK.

Paolo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-08 11:33 ` [RFC PATCH v3 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
  2013-12-09 14:23   ` Paolo Bonzini
@ 2013-12-11 18:53   ` Marcelo Tosatti
  2013-12-11 18:59     ` Marcelo Tosatti
                       ` (2 more replies)
  1 sibling, 3 replies; 37+ messages in thread
From: Marcelo Tosatti @ 2013-12-11 18:53 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, pl, pbonzini

On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
> 
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ae5d783..2fd0753 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -605,6 +605,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b8f1c01..462efe7 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -28,6 +28,9 @@
>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /*
>   * There is a single feature flag that signifies the presence of the MSR
>   * that can be used to retrieve both the local APIC Timer frequency as
> @@ -198,6 +201,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -210,4 +216,11 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 tsc_sequence;
> +	__u32 res1;
> +	__u64 tsc_scale;
> +	__s64 tsc_offset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 21ef1ba..5e4e495a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		local_irq_enable();

Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
starts counting?

No need to store kvmclock_offset in hv_ref_count? (moreover
the name is weird, better name would be "hv_ref_start_time".

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.tsc_sequence = 0;
> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();

No need for irq disable/enable pairs.

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;

Does it return non-zero data even if not enabled?

>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
>  #endif
> +	case KVM_CAP_HYPERV_TIME:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 902f124..686c1ca 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_ARM_EL1_32BIT 93
>  #define KVM_CAP_SPAPR_MULTITCE 94
>  #define KVM_CAP_EXT_EMUL_CPUID 95
> +#define KVM_CAP_HYPERV_TIME 96
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> -- 
> 1.8.1.4

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-11 18:53   ` Marcelo Tosatti
@ 2013-12-11 18:59     ` Marcelo Tosatti
  2013-12-12  9:33       ` Paolo Bonzini
  2014-01-02 16:52       ` Peter Lieven
  2014-01-02 13:15     ` Peter Lieven
  2014-01-13 12:10     ` Vadim Rozenfeld
  2 siblings, 2 replies; 37+ messages in thread
From: Marcelo Tosatti @ 2013-12-11 18:59 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, pl, pbonzini

On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 54 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..2fd0753 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 21ef1ba..5e4e495a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		local_irq_enable();
> 
> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> starts counting?
> 
> No need to store kvmclock_offset in hv_ref_count? (moreover
> the name is weird, better name would be "hv_ref_start_time".

Just add kvmclock_offset when reading the values (otherwise you have a
"stale copy" of kvmclock_offset in hv_ref_count).


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-08 11:33 ` [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment Vadim Rozenfeld
  2013-12-09 14:32   ` Paolo Bonzini
@ 2013-12-11 19:27   ` Marcelo Tosatti
  2013-12-12  9:34     ` Paolo Bonzini
  2014-01-14  4:11     ` Vadim Rozenfeld
  1 sibling, 2 replies; 37+ messages in thread
From: Marcelo Tosatti @ 2013-12-11 19:27 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, pl, pbonzini

On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote:
> The following patch allows to activate a partition reference
> time enlightenment that is based on the host platform's support
> for an Invariant Time Stamp Counter (iTSC).
> 
> v2 -> v3
> Handle TSC sequence, scale, and offest changing during migration.
> 
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/x86.c              | 29 +++++++++++++++++++++++++++--
>  2 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2fd0753..81fdff0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -607,6 +607,7 @@ struct kvm_arch {
>  	u64 hv_hypercall;
>  	u64 hv_ref_count;
>  	u64 hv_tsc_page;
> +	u64 hv_ref_time;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5e4e495a..cb6766a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  			break;
>  		}
>  		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		addr = gfn_to_hva(kvm, data >>
> -			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		addr = gfn_to_hva(kvm, gfn);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		tsc_ref.tsc_sequence =
> +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> +		tsc_ref.tsc_scale =
> +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.tsc_offset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
>  		kvm->arch.hv_tsc_page = data;
> +		kvm->arch.hv_ref_count = 0;
>  		break;
>  	}
>  	default:
> @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		local_irq_enable();
>  		kvm->arch.kvmclock_offset = delta;
>  		kvm_gen_update_masterclock(kvm);
> +
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> +			u64 curr_time;
> +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +			tsc_ref->tsc_sequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> +				tsc_ref->tsc_offset;
> +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> +		}
>  		break;
>  	}
>  	case KVM_GET_CLOCK: {
> @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
>  			goto out;
>  		r = 0;
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
> +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);

kvm_read_guest_cached.

> +			kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * 
> +				native_read_tsc()) >> 32) + tsc_ref->tsc_offset;

Why native_read_tsc and not ->read_l1_tsc?

It is easier to trust on the host to check reliability of the TSC: if
it uses TSC clocksource, then the TSCs are stable. So could condition
exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update.
And hook into pvclock_gtod_notify.

So in addition to X86_FEATURE_CONSTANT_TSC, check
ka->use_master_clock=1



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-10 11:23     ` Vadim Rozenfeld
  2013-12-10 16:52       ` Paolo Bonzini
@ 2013-12-11 19:28       ` Marcelo Tosatti
  1 sibling, 0 replies; 37+ messages in thread
From: Marcelo Tosatti @ 2013-12-11 19:28 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Paolo Bonzini, kvm, pl

On Tue, Dec 10, 2013 at 10:23:17PM +1100, Vadim Rozenfeld wrote:
> On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote:
> > Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> > > +		tsc_ref.tsc_sequence =
> > > +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > > +		tsc_ref.tsc_scale =
> > > +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> > > +		tsc_ref.tsc_offset = 0;
> > >  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> > >  			return 1;
> > >  		mark_page_dirty(kvm, gfn);
> > >  		kvm->arch.hv_tsc_page = data;
> > > +		kvm->arch.hv_ref_count = 0;
> > >  		break;
> > >  	}
> > >  	default:
> > > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > >  		local_irq_enable();
> > >  		kvm->arch.kvmclock_offset = delta;
> > >  		kvm_gen_update_masterclock(kvm);
> > > +
> > > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> > > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> > > +			u64 curr_time;
> > > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> > > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > > +			tsc_ref->tsc_sequence =
> > > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> > > +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> > 
> > Why shouldn't this be vcpu->arch.virtual_tsc_khz?
> Yeah, I was thinking about that, but we need a vcpu instance for this.

Move it to kvm_guest_time_update time (which is necessary anyway for the
pvclock gtod notifier changes etc).

> > > +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> > > +				tsc_ref->tsc_offset;
> > > +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> > > +		}
> > 
> > The difference in setting tsc_ref->tsc_scale is the only important
> > change between the two occurrences.  If you can avoid that difference
> > and you move this to a separate function, you can reuse that new
> > function in set_msr_hyperv_pw as well.
> 
> Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
> partition creation time and KVM_SET_CLOCK which happens during resume 
> after partition pause? If so - there are several differences, where
> the offset calculation probably is the most important one.
> 
> Vadim.
> 
> > 
> > Also, kvm_set_tsc_khz should recompute the reference page's values as
> > well, so you'd have three uses.
> > 
> > Paolo
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-11 18:59     ` Marcelo Tosatti
@ 2013-12-12  9:33       ` Paolo Bonzini
  2014-01-02 16:52       ` Peter Lieven
  1 sibling, 0 replies; 37+ messages in thread
From: Paolo Bonzini @ 2013-12-12  9:33 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pl

Il 11/12/2013 19:59, Marcelo Tosatti ha scritto:
> > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> > starts counting?
> > 
> > No need to store kvmclock_offset in hv_ref_count? (moreover
> > the name is weird, better name would be "hv_ref_start_time".
> 
> Just add kvmclock_offset when reading the values (otherwise you have a
> "stale copy" of kvmclock_offset in hv_ref_count).

As mentioned in my review, v4 will probably read kvmclock to provide the
reference time counter.  So all the complexity of kvmclock_offset will
be hidden behind a function.

Paolo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-11 19:27   ` Marcelo Tosatti
@ 2013-12-12  9:34     ` Paolo Bonzini
  2014-01-14  4:11     ` Vadim Rozenfeld
  1 sibling, 0 replies; 37+ messages in thread
From: Paolo Bonzini @ 2013-12-12  9:34 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pl

Il 11/12/2013 20:27, Marcelo Tosatti ha scritto:
>> > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
>> > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
>> > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
>> > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> kvm_read_guest_cached.
> 
>> > +			kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * 
>> > +				native_read_tsc()) >> 32) + tsc_ref->tsc_offset;
> Why native_read_tsc and not ->read_l1_tsc?
> 
> It is easier to trust on the host to check reliability of the TSC: if
> it uses TSC clocksource, then the TSCs are stable. So could condition
> exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update.
> And hook into pvclock_gtod_notify.
> 
> So in addition to X86_FEATURE_CONSTANT_TSC, check
> ka->use_master_clock=1

FWIW, I agree with all these comments from Marcelo.

Paolo


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-11 18:53   ` Marcelo Tosatti
  2013-12-11 18:59     ` Marcelo Tosatti
@ 2014-01-02 13:15     ` Peter Lieven
  2014-01-02 13:57       ` Marcelo Tosatti
  2014-01-13 12:10     ` Vadim Rozenfeld
  2 siblings, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-02 13:15 UTC (permalink / raw)
  To: Marcelo Tosatti, Vadim Rozenfeld; +Cc: kvm, pbonzini

Am 11.12.2013 19:53, schrieb Marcelo Tosatti:
> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>> Signed-off: Peter Lieven <pl@dlh.net>
>> Signed-off: Gleb Natapov <gleb@redhat.com>
>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>
>> v1 -> v2
>> 1. mark TSC page dirty as suggested by 
>>     Eric Northup <digitaleric@google.com> and Gleb
>> 2. disable local irq when calling get_kernel_ns, 
>>     as it was done by Peter Lieven <pl@dlhnet.de>
>> 3. move check for TSC page enable from second patch
>>     to this one.
>>
>> ---
>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>  include/uapi/linux/kvm.h           |  1 +
>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index ae5d783..2fd0753 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>  	/* fields used by HYPER-V emulation */
>>  	u64 hv_guest_os_id;
>>  	u64 hv_hypercall;
>> +	u64 hv_ref_count;
>> +	u64 hv_tsc_page;
>>  
>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>  	int audit_point;
>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>> index b8f1c01..462efe7 100644
>> --- a/arch/x86/include/uapi/asm/hyperv.h
>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>> @@ -28,6 +28,9 @@
>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>  
>> +/* A partition's reference time stamp counter (TSC) page */
>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>> +
>>  /*
>>   * There is a single feature flag that signifies the presence of the MSR
>>   * that can be used to retrieve both the local APIC Timer frequency as
>> @@ -198,6 +201,9 @@
>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>  
>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>> +
>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>> @@ -210,4 +216,11 @@
>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>  
>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>> +	__u32 tsc_sequence;
>> +	__u32 res1;
>> +	__u64 tsc_scale;
>> +	__s64 tsc_offset;
>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>> +
>>  #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 21ef1ba..5e4e495a 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>  static u32 msrs_to_save[] = {
>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>  	MSR_KVM_PV_EOI_EN,
>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>  	switch (msr) {
>>  	case HV_X64_MSR_GUEST_OS_ID:
>>  	case HV_X64_MSR_HYPERCALL:
>> +	case HV_X64_MSR_REFERENCE_TSC:
>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>  		r = true;
>>  		break;
>>  	}
>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>  			return 1;
>>  		kvm->arch.hv_hypercall = data;
>> +		local_irq_disable();
>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>> +		local_irq_enable();
> 
> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> starts counting?
> 
> No need to store kvmclock_offset in hv_ref_count? (moreover
> the name is weird, better name would be "hv_ref_start_time".
> 
>> +		break;
>> +	}
>> +	case HV_X64_MSR_REFERENCE_TSC: {
>> +		u64 gfn;
>> +		unsigned long addr;
>> +		HV_REFERENCE_TSC_PAGE tsc_ref;
>> +		tsc_ref.tsc_sequence = 0;
>> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>> +			kvm->arch.hv_tsc_page = data;
>> +			break;
>> +		}
>> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +		addr = gfn_to_hva(kvm, data >>
>> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>> +		if (kvm_is_error_hva(addr))
>> +			return 1;
>> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>> +			return 1;
>> +		mark_page_dirty(kvm, gfn);
>> +		kvm->arch.hv_tsc_page = data;
>>  		break;
>>  	}
>>  	default:
>> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>  	case HV_X64_MSR_HYPERCALL:
>>  		data = kvm->arch.hv_hypercall;
>>  		break;
>> +	case HV_X64_MSR_TIME_REF_COUNT: {
>> +		u64 now_ns;
>> +		local_irq_disable();
>> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
>> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
>> +		local_irq_enable();
> 
> No need for irq disable/enable pairs.

KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right?

Peter

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-02 13:15     ` Peter Lieven
@ 2014-01-02 13:57       ` Marcelo Tosatti
  2014-01-02 16:08         ` Peter Lieven
  0 siblings, 1 reply; 37+ messages in thread
From: Marcelo Tosatti @ 2014-01-02 13:57 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Vadim Rozenfeld, kvm, pbonzini

On Thu, Jan 02, 2014 at 02:15:48PM +0100, Peter Lieven wrote:
> Am 11.12.2013 19:53, schrieb Marcelo Tosatti:
> > On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >> Signed-off: Peter Lieven <pl@dlh.net>
> >> Signed-off: Gleb Natapov <gleb@redhat.com>
> >> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>
> >> v1 -> v2
> >> 1. mark TSC page dirty as suggested by 
> >>     Eric Northup <digitaleric@google.com> and Gleb
> >> 2. disable local irq when calling get_kernel_ns, 
> >>     as it was done by Peter Lieven <pl@dlhnet.de>
> >> 3. move check for TSC page enable from second patch
> >>     to this one.
> >>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h    |  2 ++
> >>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>  include/uapi/linux/kvm.h           |  1 +
> >>  4 files changed, 54 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >> index ae5d783..2fd0753 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>  	/* fields used by HYPER-V emulation */
> >>  	u64 hv_guest_os_id;
> >>  	u64 hv_hypercall;
> >> +	u64 hv_ref_count;
> >> +	u64 hv_tsc_page;
> >>  
> >>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>  	int audit_point;
> >> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >> index b8f1c01..462efe7 100644
> >> --- a/arch/x86/include/uapi/asm/hyperv.h
> >> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >> @@ -28,6 +28,9 @@
> >>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>  
> >> +/* A partition's reference time stamp counter (TSC) page */
> >> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >> +
> >>  /*
> >>   * There is a single feature flag that signifies the presence of the MSR
> >>   * that can be used to retrieve both the local APIC Timer frequency as
> >> @@ -198,6 +201,9 @@
> >>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>  
> >> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >> +
> >>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >> @@ -210,4 +216,11 @@
> >>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>  
> >> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >> +	__u32 tsc_sequence;
> >> +	__u32 res1;
> >> +	__u64 tsc_scale;
> >> +	__s64 tsc_offset;
> >> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >> +
> >>  #endif
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 21ef1ba..5e4e495a 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>  static u32 msrs_to_save[] = {
> >>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>  	MSR_KVM_PV_EOI_EN,
> >>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>  	switch (msr) {
> >>  	case HV_X64_MSR_GUEST_OS_ID:
> >>  	case HV_X64_MSR_HYPERCALL:
> >> +	case HV_X64_MSR_REFERENCE_TSC:
> >> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>  		r = true;
> >>  		break;
> >>  	}
> >> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>  			return 1;
> >>  		kvm->arch.hv_hypercall = data;
> >> +		local_irq_disable();
> >> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >> +		local_irq_enable();
> > 
> > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> > starts counting?
> > 
> > No need to store kvmclock_offset in hv_ref_count? (moreover
> > the name is weird, better name would be "hv_ref_start_time".
> > 
> >> +		break;
> >> +	}
> >> +	case HV_X64_MSR_REFERENCE_TSC: {
> >> +		u64 gfn;
> >> +		unsigned long addr;
> >> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> >> +		tsc_ref.tsc_sequence = 0;
> >> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> >> +			kvm->arch.hv_tsc_page = data;
> >> +			break;
> >> +		}
> >> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> >> +		addr = gfn_to_hva(kvm, data >>
> >> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> >> +		if (kvm_is_error_hva(addr))
> >> +			return 1;
> >> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> >> +			return 1;
> >> +		mark_page_dirty(kvm, gfn);
> >> +		kvm->arch.hv_tsc_page = data;
> >>  		break;
> >>  	}
> >>  	default:
> >> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >>  	case HV_X64_MSR_HYPERCALL:
> >>  		data = kvm->arch.hv_hypercall;
> >>  		break;
> >> +	case HV_X64_MSR_TIME_REF_COUNT: {
> >> +		u64 now_ns;
> >> +		local_irq_disable();
> >> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> >> +		local_irq_enable();
> > 
> > No need for irq disable/enable pairs.
> 
> KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right?
> 
> Peter


                local_irq_disable();
                now_ns = get_kernel_ns();
                delta = user_ns.clock - now_ns;
                local_irq_enable();

Not using irq disable/enable pairs. The subtraction is not dependant on
any particular time.

                local_irq_disable();
                now_ns = get_kernel_ns();
                local_irq_enable();
                delta = user_ns.clock - now_ns;

Any interrupt that would affect the value of get_kernel_ns(), would
have a similar effect before the interrupts are disabled. So the 
disable/enable pair achieves nothing in practice. It was copied from
kvm_guest_time_update.





^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-02 13:57       ` Marcelo Tosatti
@ 2014-01-02 16:08         ` Peter Lieven
  2014-01-02 20:05           ` Marcelo Tosatti
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-02 16:08 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pbonzini

Am 02.01.2014 14:57, schrieb Marcelo Tosatti:
> On Thu, Jan 02, 2014 at 02:15:48PM +0100, Peter Lieven wrote:
>> Am 11.12.2013 19:53, schrieb Marcelo Tosatti:
>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>
>>>> v1 -> v2
>>>> 1. mark TSC page dirty as suggested by 
>>>>     Eric Northup <digitaleric@google.com> and Gleb
>>>> 2. disable local irq when calling get_kernel_ns, 
>>>>     as it was done by Peter Lieven <pl@dlhnet.de>
>>>> 3. move check for TSC page enable from second patch
>>>>     to this one.
>>>>
>>>> ---
>>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>  include/uapi/linux/kvm.h           |  1 +
>>>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>> index ae5d783..2fd0753 100644
>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>  	/* fields used by HYPER-V emulation */
>>>>  	u64 hv_guest_os_id;
>>>>  	u64 hv_hypercall;
>>>> +	u64 hv_ref_count;
>>>> +	u64 hv_tsc_page;
>>>>  
>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>  	int audit_point;
>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>> index b8f1c01..462efe7 100644
>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>> @@ -28,6 +28,9 @@
>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>  
>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>> +
>>>>  /*
>>>>   * There is a single feature flag that signifies the presence of the MSR
>>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>>> @@ -198,6 +201,9 @@
>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>  
>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>> +
>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>>> @@ -210,4 +216,11 @@
>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>  
>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>> +	__u32 tsc_sequence;
>>>> +	__u32 res1;
>>>> +	__u64 tsc_scale;
>>>> +	__s64 tsc_offset;
>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>> +
>>>>  #endif
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 21ef1ba..5e4e495a 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>  static u32 msrs_to_save[] = {
>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>  	MSR_KVM_PV_EOI_EN,
>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>  	switch (msr) {
>>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>>  	case HV_X64_MSR_HYPERCALL:
>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>  		r = true;
>>>>  		break;
>>>>  	}
>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>  			return 1;
>>>>  		kvm->arch.hv_hypercall = data;
>>>> +		local_irq_disable();
>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>> +		local_irq_enable();
>>>
>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>> starts counting?
>>>
>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>> the name is weird, better name would be "hv_ref_start_time".
>>>
>>>> +		break;
>>>> +	}
>>>> +	case HV_X64_MSR_REFERENCE_TSC: {
>>>> +		u64 gfn;
>>>> +		unsigned long addr;
>>>> +		HV_REFERENCE_TSC_PAGE tsc_ref;
>>>> +		tsc_ref.tsc_sequence = 0;
>>>> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>>>> +			kvm->arch.hv_tsc_page = data;
>>>> +			break;
>>>> +		}
>>>> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>>>> +		addr = gfn_to_hva(kvm, data >>
>>>> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>>>> +		if (kvm_is_error_hva(addr))
>>>> +			return 1;
>>>> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>>>> +			return 1;
>>>> +		mark_page_dirty(kvm, gfn);
>>>> +		kvm->arch.hv_tsc_page = data;
>>>>  		break;
>>>>  	}
>>>>  	default:
>>>> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>>>  	case HV_X64_MSR_HYPERCALL:
>>>>  		data = kvm->arch.hv_hypercall;
>>>>  		break;
>>>> +	case HV_X64_MSR_TIME_REF_COUNT: {
>>>> +		u64 now_ns;
>>>> +		local_irq_disable();
>>>> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
>>>> +		local_irq_enable();
>>>
>>> No need for irq disable/enable pairs.
>>
>> KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right?
>>
>> Peter
> 
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 delta = user_ns.clock - now_ns;
>                 local_irq_enable();
> 
> Not using irq disable/enable pairs. The subtraction is not dependant on
> any particular time.
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 local_irq_enable();
>                 delta = user_ns.clock - now_ns;
> 
> Any interrupt that would affect the value of get_kernel_ns(), would
> have a similar effect before the interrupts are disabled. So the 
> disable/enable pair achieves nothing in practice. It was copied from
> kvm_guest_time_update.
> 
> 

Thanks for clarifying. What about get_kernel_ns() can't this be interrupted?

Peter



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-11 18:59     ` Marcelo Tosatti
  2013-12-12  9:33       ` Paolo Bonzini
@ 2014-01-02 16:52       ` Peter Lieven
  2014-01-07  9:36         ` Vadim Rozenfeld
  1 sibling, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-02 16:52 UTC (permalink / raw)
  To: Marcelo Tosatti, Vadim Rozenfeld; +Cc: kvm, pbonzini

Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>> Signed-off: Peter Lieven <pl@dlh.net>
>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>
>>> v1 -> v2
>>> 1. mark TSC page dirty as suggested by 
>>>     Eric Northup <digitaleric@google.com> and Gleb
>>> 2. disable local irq when calling get_kernel_ns, 
>>>     as it was done by Peter Lieven <pl@dlhnet.de>
>>> 3. move check for TSC page enable from second patch
>>>     to this one.
>>>
>>> ---
>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>  include/uapi/linux/kvm.h           |  1 +
>>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index ae5d783..2fd0753 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>  	/* fields used by HYPER-V emulation */
>>>  	u64 hv_guest_os_id;
>>>  	u64 hv_hypercall;
>>> +	u64 hv_ref_count;
>>> +	u64 hv_tsc_page;
>>>  
>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>  	int audit_point;
>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>> index b8f1c01..462efe7 100644
>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>> @@ -28,6 +28,9 @@
>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>  
>>> +/* A partition's reference time stamp counter (TSC) page */
>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>> +
>>>  /*
>>>   * There is a single feature flag that signifies the presence of the MSR
>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>> @@ -198,6 +201,9 @@
>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>  
>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>> +
>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>> @@ -210,4 +216,11 @@
>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>  
>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>> +	__u32 tsc_sequence;
>>> +	__u32 res1;
>>> +	__u64 tsc_scale;
>>> +	__s64 tsc_offset;
>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>> +
>>>  #endif
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 21ef1ba..5e4e495a 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>  static u32 msrs_to_save[] = {
>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>  	MSR_KVM_PV_EOI_EN,
>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>  	switch (msr) {
>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>  	case HV_X64_MSR_HYPERCALL:
>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>  		r = true;
>>>  		break;
>>>  	}
>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>  			return 1;
>>>  		kvm->arch.hv_hypercall = data;
>>> +		local_irq_disable();
>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>> +		local_irq_enable();
>>
>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>> starts counting?
>>
>> No need to store kvmclock_offset in hv_ref_count? (moreover
>> the name is weird, better name would be "hv_ref_start_time".
> 
> Just add kvmclock_offset when reading the values (otherwise you have a
> "stale copy" of kvmclock_offset in hv_ref_count).
> 

After some experiments I think we do no need kvm->arch.hv_ref_count at all.

I was debugging some weird clockjump issues and I think the problem is that after live migration
kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
hypercall was set up this can lead to series jumps.

So I would suggest to completely drop kvm->arch.hv_ref_count.

And use simply this in get_msr_hyperv_pw().

        case HV_X64_MSR_TIME_REF_COUNT: {
                data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
                break;
        }

It seems that get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vServer uptime
in nanoseconds which starts counting at 0.

Peter



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-02 16:08         ` Peter Lieven
@ 2014-01-02 20:05           ` Marcelo Tosatti
  0 siblings, 0 replies; 37+ messages in thread
From: Marcelo Tosatti @ 2014-01-02 20:05 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Vadim Rozenfeld, kvm, pbonzini

On Thu, Jan 02, 2014 at 05:08:07PM +0100, Peter Lieven wrote:
> > Not using irq disable/enable pairs. The subtraction is not dependant on
> > any particular time.
> > 
> >                 local_irq_disable();
> >                 now_ns = get_kernel_ns();
> >                 local_irq_enable();
> >                 delta = user_ns.clock - now_ns;
> > 
> > Any interrupt that would affect the value of get_kernel_ns(), would
> > have a similar effect before the interrupts are disabled. So the 
> > disable/enable pair achieves nothing in practice. It was copied from
> > kvm_guest_time_update.
> > 
> > 
> 
> Thanks for clarifying. What about get_kernel_ns() can't this be interrupted?
> 
> Peter

It can handle interrupts. 

Disabling interrupts there is used when a
{rdtsc, system_time=get_kernel_ns()} tuple is wanted, to reduce
the physical time difference between execution of rdtsc and get_kernel_ns().



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-02 16:52       ` Peter Lieven
@ 2014-01-07  9:36         ` Vadim Rozenfeld
  2014-01-07 17:52           ` Peter Lieven
  0 siblings, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-07  9:36 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> > On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>> Signed-off: Peter Lieven <pl@dlh.net>
> >>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>
> >>> v1 -> v2
> >>> 1. mark TSC page dirty as suggested by 
> >>>     Eric Northup <digitaleric@google.com> and Gleb
> >>> 2. disable local irq when calling get_kernel_ns, 
> >>>     as it was done by Peter Lieven <pl@dlhnet.de>
> >>> 3. move check for TSC page enable from second patch
> >>>     to this one.
> >>>
> >>> ---
> >>>  arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>  include/uapi/linux/kvm.h           |  1 +
> >>>  4 files changed, 54 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>> index ae5d783..2fd0753 100644
> >>> --- a/arch/x86/include/asm/kvm_host.h
> >>> +++ b/arch/x86/include/asm/kvm_host.h
> >>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>  	/* fields used by HYPER-V emulation */
> >>>  	u64 hv_guest_os_id;
> >>>  	u64 hv_hypercall;
> >>> +	u64 hv_ref_count;
> >>> +	u64 hv_tsc_page;
> >>>  
> >>>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>  	int audit_point;
> >>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>> index b8f1c01..462efe7 100644
> >>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>> @@ -28,6 +28,9 @@
> >>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>  
> >>> +/* A partition's reference time stamp counter (TSC) page */
> >>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>> +
> >>>  /*
> >>>   * There is a single feature flag that signifies the presence of the MSR
> >>>   * that can be used to retrieve both the local APIC Timer frequency as
> >>> @@ -198,6 +201,9 @@
> >>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>  
> >>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>> +
> >>>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >>> @@ -210,4 +216,11 @@
> >>>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>  
> >>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>> +	__u32 tsc_sequence;
> >>> +	__u32 res1;
> >>> +	__u64 tsc_scale;
> >>> +	__s64 tsc_offset;
> >>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>> +
> >>>  #endif
> >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>> index 21ef1ba..5e4e495a 100644
> >>> --- a/arch/x86/kvm/x86.c
> >>> +++ b/arch/x86/kvm/x86.c
> >>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>  static u32 msrs_to_save[] = {
> >>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>  	MSR_KVM_PV_EOI_EN,
> >>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>  	switch (msr) {
> >>>  	case HV_X64_MSR_GUEST_OS_ID:
> >>>  	case HV_X64_MSR_HYPERCALL:
> >>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>  		r = true;
> >>>  		break;
> >>>  	}
> >>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>  			return 1;
> >>>  		kvm->arch.hv_hypercall = data;
> >>> +		local_irq_disable();
> >>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>> +		local_irq_enable();
> >>
> >> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >> starts counting?
> >>
> >> No need to store kvmclock_offset in hv_ref_count? (moreover
> >> the name is weird, better name would be "hv_ref_start_time".
> > 
> > Just add kvmclock_offset when reading the values (otherwise you have a
> > "stale copy" of kvmclock_offset in hv_ref_count).
> > 
> 
> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> 
> I was debugging some weird clockjump issues and I think the problem is that after live migration
> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> hypercall was set up this can lead to series jumps.
> 
> So I would suggest to completely drop kvm->arch.hv_ref_count.
> 
> And use simply this in get_msr_hyperv_pw().
> 
>         case HV_X64_MSR_TIME_REF_COUNT: {
>                 data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>                 break;
>         }
> 

Agreed. It should work as long as we rely on kvmclock_offset.
Vadim.

> It seems that get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vServer uptime
> in nanoseconds which starts counting at 0.
> 
> Peter
> 
> 



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-07  9:36         ` Vadim Rozenfeld
@ 2014-01-07 17:52           ` Peter Lieven
  2014-01-08  9:40             ` Vadim Rozenfeld
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-07 17:52 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, pbonzini

Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>
>>>>> v1 -> v2
>>>>> 1. mark TSC page dirty as suggested by 
>>>>>     Eric Northup <digitaleric@google.com> and Gleb
>>>>> 2. disable local irq when calling get_kernel_ns, 
>>>>>     as it was done by Peter Lieven <pl@dlhnet.de>
>>>>> 3. move check for TSC page enable from second patch
>>>>>     to this one.
>>>>>
>>>>> ---
>>>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>  include/uapi/linux/kvm.h           |  1 +
>>>>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>> index ae5d783..2fd0753 100644
>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>  	/* fields used by HYPER-V emulation */
>>>>>  	u64 hv_guest_os_id;
>>>>>  	u64 hv_hypercall;
>>>>> +	u64 hv_ref_count;
>>>>> +	u64 hv_tsc_page;
>>>>>  
>>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>  	int audit_point;
>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>> index b8f1c01..462efe7 100644
>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>> @@ -28,6 +28,9 @@
>>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>  
>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>> +
>>>>>  /*
>>>>>   * There is a single feature flag that signifies the presence of the MSR
>>>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>>>> @@ -198,6 +201,9 @@
>>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>  
>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>> +
>>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>> @@ -210,4 +216,11 @@
>>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>  
>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>> +	__u32 tsc_sequence;
>>>>> +	__u32 res1;
>>>>> +	__u64 tsc_scale;
>>>>> +	__s64 tsc_offset;
>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>> +
>>>>>  #endif
>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>> index 21ef1ba..5e4e495a 100644
>>>>> --- a/arch/x86/kvm/x86.c
>>>>> +++ b/arch/x86/kvm/x86.c
>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>  static u32 msrs_to_save[] = {
>>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>  	MSR_KVM_PV_EOI_EN,
>>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>  	switch (msr) {
>>>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>>>  	case HV_X64_MSR_HYPERCALL:
>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>  		r = true;
>>>>>  		break;
>>>>>  	}
>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>  			return 1;
>>>>>  		kvm->arch.hv_hypercall = data;
>>>>> +		local_irq_disable();
>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>> +		local_irq_enable();
>>>>
>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>> starts counting?
>>>>
>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>> the name is weird, better name would be "hv_ref_start_time".
>>>
>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>
>>
>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>
>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>> hypercall was set up this can lead to series jumps.
>>
>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>
>> And use simply this in get_msr_hyperv_pw().
>>
>>         case HV_X64_MSR_TIME_REF_COUNT: {
>>                 data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>                 break;
>>         }
>>
> 
> Agreed. It should work as long as we rely on kvmclock_offset.
> Vadim.

I think we can rely on kvmclock_offset. Had you had a chance to do further testing
during the weekend?

Peter

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-07 17:52           ` Peter Lieven
@ 2014-01-08  9:40             ` Vadim Rozenfeld
  2014-01-08 10:15               ` Peter Lieven
  0 siblings, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-08  9:40 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> > On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>
> >>>>> v1 -> v2
> >>>>> 1. mark TSC page dirty as suggested by 
> >>>>>     Eric Northup <digitaleric@google.com> and Gleb
> >>>>> 2. disable local irq when calling get_kernel_ns, 
> >>>>>     as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>> 3. move check for TSC page enable from second patch
> >>>>>     to this one.
> >>>>>
> >>>>> ---
> >>>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>  include/uapi/linux/kvm.h           |  1 +
> >>>>>  4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>> index ae5d783..2fd0753 100644
> >>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>  	/* fields used by HYPER-V emulation */
> >>>>>  	u64 hv_guest_os_id;
> >>>>>  	u64 hv_hypercall;
> >>>>> +	u64 hv_ref_count;
> >>>>> +	u64 hv_tsc_page;
> >>>>>  
> >>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>  	int audit_point;
> >>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>> index b8f1c01..462efe7 100644
> >>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>> @@ -28,6 +28,9 @@
> >>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>  
> >>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>> +
> >>>>>  /*
> >>>>>   * There is a single feature flag that signifies the presence of the MSR
> >>>>>   * that can be used to retrieve both the local APIC Timer frequency as
> >>>>> @@ -198,6 +201,9 @@
> >>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>  
> >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>> +
> >>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>> @@ -210,4 +216,11 @@
> >>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>  
> >>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>> +	__u32 tsc_sequence;
> >>>>> +	__u32 res1;
> >>>>> +	__u64 tsc_scale;
> >>>>> +	__s64 tsc_offset;
> >>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>> +
> >>>>>  #endif
> >>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>> index 21ef1ba..5e4e495a 100644
> >>>>> --- a/arch/x86/kvm/x86.c
> >>>>> +++ b/arch/x86/kvm/x86.c
> >>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>  static u32 msrs_to_save[] = {
> >>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>  	MSR_KVM_PV_EOI_EN,
> >>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>  	switch (msr) {
> >>>>>  	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>  	case HV_X64_MSR_HYPERCALL:
> >>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>  		r = true;
> >>>>>  		break;
> >>>>>  	}
> >>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>  			return 1;
> >>>>>  		kvm->arch.hv_hypercall = data;
> >>>>> +		local_irq_disable();
> >>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>> +		local_irq_enable();
> >>>>
> >>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>> starts counting?
> >>>>
> >>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>> the name is weird, better name would be "hv_ref_start_time".
> >>>
> >>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>
> >>
> >> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>
> >> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >> hypercall was set up this can lead to series jumps.
> >>
> >> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>
> >> And use simply this in get_msr_hyperv_pw().
> >>
> >>         case HV_X64_MSR_TIME_REF_COUNT: {
> >>                 data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>                 break;
> >>         }
> >>
> > 
> > Agreed. It should work as long as we rely on kvmclock_offset.
> > Vadim.
> 
> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> during the weekend?

Yes, and still testing. 

There is still some inconsistency in the value returned by
QueryPerformanceCounter during migration.

Vadim. 
> 
> Peter
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08  9:40             ` Vadim Rozenfeld
@ 2014-01-08 10:15               ` Peter Lieven
  2014-01-08 10:44                 ` Vadim Rozenfeld
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-08 10:15 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, pbonzini

On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>
>>>>>>> v1 -> v2
>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>      Eric Northup <digitaleric@google.com> and Gleb
>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>      as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>      to this one.
>>>>>>>
>>>>>>> ---
>>>>>>>   arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>   arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>   arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>   include/uapi/linux/kvm.h           |  1 +
>>>>>>>   4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>> index ae5d783..2fd0753 100644
>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>   	/* fields used by HYPER-V emulation */
>>>>>>>   	u64 hv_guest_os_id;
>>>>>>>   	u64 hv_hypercall;
>>>>>>> +	u64 hv_ref_count;
>>>>>>> +	u64 hv_tsc_page;
>>>>>>>
>>>>>>>   	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>   	int audit_point;
>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>> index b8f1c01..462efe7 100644
>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>   /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>   #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>
>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>> +
>>>>>>>   /*
>>>>>>>    * There is a single feature flag that signifies the presence of the MSR
>>>>>>>    * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>   #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>   		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>
>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>> +
>>>>>>>   #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>   #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>   #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>   #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>   #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>
>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>> +	__u32 tsc_sequence;
>>>>>>> +	__u32 res1;
>>>>>>> +	__u64 tsc_scale;
>>>>>>> +	__s64 tsc_offset;
>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>> +
>>>>>>>   #endif
>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>   static u32 msrs_to_save[] = {
>>>>>>>   	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>   	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>   	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>   	MSR_KVM_PV_EOI_EN,
>>>>>>>   	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>   	switch (msr) {
>>>>>>>   	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>   	case HV_X64_MSR_HYPERCALL:
>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>   		r = true;
>>>>>>>   		break;
>>>>>>>   	}
>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>   			return 1;
>>>>>>>   		kvm->arch.hv_hypercall = data;
>>>>>>> +		local_irq_disable();
>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>> +		local_irq_enable();
>>>>>>
>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>> starts counting?
>>>>>>
>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>
>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>
>>>>
>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>
>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>> hypercall was set up this can lead to series jumps.
>>>>
>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>
>>>> And use simply this in get_msr_hyperv_pw().
>>>>
>>>>          case HV_X64_MSR_TIME_REF_COUNT: {
>>>>                  data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>                  break;
>>>>          }
>>>>
>>>
>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>> Vadim.
>>
>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>> during the weekend?
>
> Yes, and still testing.
>
> There is still some inconsistency in the value returned by
> QueryPerformanceCounter during migration.

With my proposal?

Peter


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 10:15               ` Peter Lieven
@ 2014-01-08 10:44                 ` Vadim Rozenfeld
  2014-01-08 11:48                   ` Peter Lieven
  0 siblings, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-08 10:44 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> > On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>
> >>>>>>> v1 -> v2
> >>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>      Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>      as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>      to this one.
> >>>>>>>
> >>>>>>> ---
> >>>>>>>   arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>   arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>   arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>   include/uapi/linux/kvm.h           |  1 +
> >>>>>>>   4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>
> >>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>> index ae5d783..2fd0753 100644
> >>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>   	/* fields used by HYPER-V emulation */
> >>>>>>>   	u64 hv_guest_os_id;
> >>>>>>>   	u64 hv_hypercall;
> >>>>>>> +	u64 hv_ref_count;
> >>>>>>> +	u64 hv_tsc_page;
> >>>>>>>
> >>>>>>>   	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>   	int audit_point;
> >>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>> index b8f1c01..462efe7 100644
> >>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>   /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>   #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>
> >>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>> +
> >>>>>>>   /*
> >>>>>>>    * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>    * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>   #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>   		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>
> >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>> +
> >>>>>>>   #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>   #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>   #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>   #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>   #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>
> >>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>> +	__u32 tsc_sequence;
> >>>>>>> +	__u32 res1;
> >>>>>>> +	__u64 tsc_scale;
> >>>>>>> +	__s64 tsc_offset;
> >>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>> +
> >>>>>>>   #endif
> >>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>   static u32 msrs_to_save[] = {
> >>>>>>>   	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>   	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>   	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>   	MSR_KVM_PV_EOI_EN,
> >>>>>>>   	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>   	switch (msr) {
> >>>>>>>   	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>   	case HV_X64_MSR_HYPERCALL:
> >>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>   		r = true;
> >>>>>>>   		break;
> >>>>>>>   	}
> >>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>   			return 1;
> >>>>>>>   		kvm->arch.hv_hypercall = data;
> >>>>>>> +		local_irq_disable();
> >>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>> +		local_irq_enable();
> >>>>>>
> >>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>> starts counting?
> >>>>>>
> >>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>
> >>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>
> >>>>
> >>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>
> >>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>> hypercall was set up this can lead to series jumps.
> >>>>
> >>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>
> >>>> And use simply this in get_msr_hyperv_pw().
> >>>>
> >>>>          case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>                  data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>                  break;
> >>>>          }
> >>>>
> >>>
> >>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>> Vadim.
> >>
> >> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >> during the weekend?
> >
> > Yes, and still testing.
> >
> > There is still some inconsistency in the value returned by
> > QueryPerformanceCounter during migration.
> 
> With my proposal?
> 

Right. 
Please see below QTC vs. NTP time stamp
before and after migration:

C:\timedrift\objchk_win7_x86\i386>timedrift.exe
quantum = 15
QPC     NTP     Delta
1311    1307    4
2621    2618    3
3932    3931    1
.......
.......
38018   38014   4
Unable to wait for NTP reply from the SNTP server, GetLastError returns
7279088
142210  40113   102097
143458  41357   102101
.......
.......
156500  54615   101885

NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
mSec)

Vadim.

> Peter
> 



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 10:44                 ` Vadim Rozenfeld
@ 2014-01-08 11:48                   ` Peter Lieven
  2014-01-08 12:12                     ` Vadim Rozenfeld
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-08 11:48 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, pbonzini

On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>
>>>>>>>>> v1 -> v2
>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>       Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>       as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>       to this one.
>>>>>>>>>
>>>>>>>>> ---
>>>>>>>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>    arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>    arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>    include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>    4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>    	/* fields used by HYPER-V emulation */
>>>>>>>>>    	u64 hv_guest_os_id;
>>>>>>>>>    	u64 hv_hypercall;
>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>
>>>>>>>>>    	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>    	int audit_point;
>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>    /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>    #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>
>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>> +
>>>>>>>>>    /*
>>>>>>>>>     * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>     * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>    #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>    		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>
>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>> +
>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>    #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>    #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>
>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>> +	__u32 res1;
>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>> +
>>>>>>>>>    #endif
>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>    static u32 msrs_to_save[] = {
>>>>>>>>>    	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>    	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>    	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>    	MSR_KVM_PV_EOI_EN,
>>>>>>>>>    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>    	switch (msr) {
>>>>>>>>>    	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>    	case HV_X64_MSR_HYPERCALL:
>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>    		r = true;
>>>>>>>>>    		break;
>>>>>>>>>    	}
>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>    			return 1;
>>>>>>>>>    		kvm->arch.hv_hypercall = data;
>>>>>>>>> +		local_irq_disable();
>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>> +		local_irq_enable();
>>>>>>>>
>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>> starts counting?
>>>>>>>>
>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>
>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>
>>>>>>
>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>
>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>
>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>
>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>
>>>>>>           case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>                   data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>                   break;
>>>>>>           }
>>>>>>
>>>>>
>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>> Vadim.
>>>>
>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>> during the weekend?
>>>
>>> Yes, and still testing.
>>>
>>> There is still some inconsistency in the value returned by
>>> QueryPerformanceCounter during migration.
>>
>> With my proposal?
>>
>
> Right.
> Please see below QTC vs. NTP time stamp
> before and after migration:
>
> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> quantum = 15
> QPC     NTP     Delta
> 1311    1307    4
> 2621    2618    3
> 3932    3931    1
> .......
> .......
> 38018   38014   4
> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> 7279088
> 142210  40113   102097
> 143458  41357   102101
> .......
> .......
> 156500  54615   101885
>
> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> mSec)

How long was the downtime when you migrated the vServer?

I have done similar tests and have not seen this...
Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.

Peter


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 11:48                   ` Peter Lieven
@ 2014-01-08 12:12                     ` Vadim Rozenfeld
  2014-01-08 14:54                       ` Peter Lieven
  0 siblings, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-08 12:12 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> > On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>
> >>>>>>>>> v1 -> v2
> >>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>       Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>       as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>       to this one.
> >>>>>>>>>
> >>>>>>>>> ---
> >>>>>>>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>    arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>    arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>    include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>    4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>
> >>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>    	/* fields used by HYPER-V emulation */
> >>>>>>>>>    	u64 hv_guest_os_id;
> >>>>>>>>>    	u64 hv_hypercall;
> >>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>
> >>>>>>>>>    	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>    	int audit_point;
> >>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>    /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>    #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>
> >>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>> +
> >>>>>>>>>    /*
> >>>>>>>>>     * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>     * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>    #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>    		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>
> >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>> +
> >>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>    #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>    #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>
> >>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>> +	__u32 res1;
> >>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>> +
> >>>>>>>>>    #endif
> >>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>    static u32 msrs_to_save[] = {
> >>>>>>>>>    	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>    	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>    	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>    	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>    	switch (msr) {
> >>>>>>>>>    	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>    	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>    		r = true;
> >>>>>>>>>    		break;
> >>>>>>>>>    	}
> >>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>    			return 1;
> >>>>>>>>>    		kvm->arch.hv_hypercall = data;
> >>>>>>>>> +		local_irq_disable();
> >>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>> +		local_irq_enable();
> >>>>>>>>
> >>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>> starts counting?
> >>>>>>>>
> >>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>
> >>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>
> >>>>>>
> >>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>
> >>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>
> >>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>
> >>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>
> >>>>>>           case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>                   data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>                   break;
> >>>>>>           }
> >>>>>>
> >>>>>
> >>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>> Vadim.
> >>>>
> >>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>> during the weekend?
> >>>
> >>> Yes, and still testing.
> >>>
> >>> There is still some inconsistency in the value returned by
> >>> QueryPerformanceCounter during migration.
> >>
> >> With my proposal?
> >>
> >
> > Right.
> > Please see below QTC vs. NTP time stamp
> > before and after migration:
> >
> > C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> > quantum = 15
> > QPC     NTP     Delta
> > 1311    1307    4
> > 2621    2618    3
> > 3932    3931    1
> > .......
> > .......
> > 38018   38014   4
> > Unable to wait for NTP reply from the SNTP server, GetLastError returns
> > 7279088
> > 142210  40113   102097
> > 143458  41357   102101
> > .......
> > .......
> > 156500  54615   101885
> >
> > NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> > while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> > mSec)
> 
> How long was the downtime when you migrated the vServer?
I use Win7-32 for testing.
> 
> I have done similar tests and have not seen this...
> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
Yes, I also test it for reference counter.

        case HV_X64_MSR_TIME_REF_COUNT: {
//              u64 now_ns;
//              local_irq_disable();
//              now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
//              data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
//              local_irq_enable();
                data = div_u64(get_kernel_ns() +
kvm->arch.kvmclock_offset, 100);
                break;
        }


> 
> Peter
> 



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 12:12                     ` Vadim Rozenfeld
@ 2014-01-08 14:54                       ` Peter Lieven
  2014-01-08 20:08                         ` Vadim Rozenfeld
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Lieven @ 2014-01-08 14:54 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, pbonzini

On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>>>
>>>>>>>>>>> v1 -> v2
>>>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>>>        to this one.
>>>>>>>>>>>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
>>>>>>>>>>>     	u64 hv_guest_os_id;
>>>>>>>>>>>     	u64 hv_hypercall;
>>>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>>>
>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>>>     	int audit_point;
>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>>>
>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>>>> +
>>>>>>>>>>>     /*
>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>>>
>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>>>> +
>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>>>
>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>>>> +	__u32 res1;
>>>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>>>> +
>>>>>>>>>>>     #endif
>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>>>     static u32 msrs_to_save[] = {
>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>>>     	switch (msr) {
>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>>>     		r = true;
>>>>>>>>>>>     		break;
>>>>>>>>>>>     	}
>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>>>     			return 1;
>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
>>>>>>>>>>> +		local_irq_disable();
>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>>>> +		local_irq_enable();
>>>>>>>>>>
>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>>>> starts counting?
>>>>>>>>>>
>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>>>
>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>>>
>>>>>>>>
>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>>>
>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>>>
>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>>>
>>>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>>>
>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>>>                    break;
>>>>>>>>            }
>>>>>>>>
>>>>>>>
>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>>>> Vadim.
>>>>>>
>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>>>> during the weekend?
>>>>>
>>>>> Yes, and still testing.
>>>>>
>>>>> There is still some inconsistency in the value returned by
>>>>> QueryPerformanceCounter during migration.
>>>>
>>>> With my proposal?
>>>>
>>>
>>> Right.
>>> Please see below QTC vs. NTP time stamp
>>> before and after migration:
>>>
>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
>>> quantum = 15
>>> QPC     NTP     Delta
>>> 1311    1307    4
>>> 2621    2618    3
>>> 3932    3931    1
>>> .......
>>> .......
>>> 38018   38014   4
>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
>>> 7279088
>>> 142210  40113   102097
>>> 143458  41357   102101
>>> .......
>>> .......
>>> 156500  54615   101885
>>>
>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
>>> mSec)
>>
>> How long was the downtime when you migrated the vServer?
> I use Win7-32 for testing.
>>
>> I have done similar tests and have not seen this...
>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> Yes, I also test it for reference counter.

Can you use these parameters for testing:

-cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
-rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet

If you look in the HMP at "info migrate" after the migration what does the downtime info say?

Peter


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 14:54                       ` Peter Lieven
@ 2014-01-08 20:08                         ` Vadim Rozenfeld
  2014-01-08 22:20                           ` Peter Lieven
  0 siblings, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-08 20:08 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> > On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> >> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> >>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>>>
> >>>>>>>>>>> v1 -> v2
> >>>>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>>>        to this one.
> >>>>>>>>>>>
> >>>>>>>>>>> ---
> >>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>>>
> >>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>>>     	/* fields used by HYPER-V emulation */
> >>>>>>>>>>>     	u64 hv_guest_os_id;
> >>>>>>>>>>>     	u64 hv_hypercall;
> >>>>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>>>
> >>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>>>     	int audit_point;
> >>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>>>
> >>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>>>> +
> >>>>>>>>>>>     /*
> >>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>>>
> >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>>>> +
> >>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>>>
> >>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>>>> +	__u32 res1;
> >>>>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>>>> +
> >>>>>>>>>>>     #endif
> >>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>>>     static u32 msrs_to_save[] = {
> >>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>>>     	switch (msr) {
> >>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>>>     		r = true;
> >>>>>>>>>>>     		break;
> >>>>>>>>>>>     	}
> >>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>>>     			return 1;
> >>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
> >>>>>>>>>>> +		local_irq_disable();
> >>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>>>> +		local_irq_enable();
> >>>>>>>>>>
> >>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>>>> starts counting?
> >>>>>>>>>>
> >>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>>>
> >>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>>>
> >>>>>>>>
> >>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>>>
> >>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>>>
> >>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>>>
> >>>>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>>>
> >>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>>>                    break;
> >>>>>>>>            }
> >>>>>>>>
> >>>>>>>
> >>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>>>> Vadim.
> >>>>>>
> >>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>>>> during the weekend?
> >>>>>
> >>>>> Yes, and still testing.
> >>>>>
> >>>>> There is still some inconsistency in the value returned by
> >>>>> QueryPerformanceCounter during migration.
> >>>>
> >>>> With my proposal?
> >>>>
> >>>
> >>> Right.
> >>> Please see below QTC vs. NTP time stamp
> >>> before and after migration:
> >>>
> >>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> >>> quantum = 15
> >>> QPC     NTP     Delta
> >>> 1311    1307    4
> >>> 2621    2618    3
> >>> 3932    3931    1
> >>> .......
> >>> .......
> >>> 38018   38014   4
> >>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> >>> 7279088
> >>> 142210  40113   102097
> >>> 143458  41357   102101
> >>> .......
> >>> .......
> >>> 156500  54615   101885
> >>>
> >>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> >>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> >>> mSec)
> >>
> >> How long was the downtime when you migrated the vServer?
> > I use Win7-32 for testing.
> >>
> >> I have done similar tests and have not seen this...
> >> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> > Yes, I also test it for reference counter.
> 
> Can you use these parameters for testing:
> 
> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet

My settings are:
-cpu qemu64,
+x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
-smp 4

> 
> If you look in the HMP at "info migrate" after the migration what does the downtime info say?

downtime: 4 milliseconds

> 
> Peter
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 20:08                         ` Vadim Rozenfeld
@ 2014-01-08 22:20                           ` Peter Lieven
  2014-01-09 11:10                             ` Vadim Rozenfeld
  2014-01-12 12:08                             ` Vadim Rozenfeld
  0 siblings, 2 replies; 37+ messages in thread
From: Peter Lieven @ 2014-01-08 22:20 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, pbonzini

Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
> On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
>> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
>>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
>>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
>>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>>>>>
>>>>>>>>>>>>> v1 -> v2
>>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>>>>>        to this one.
>>>>>>>>>>>>>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
>>>>>>>>>>>>>     	u64 hv_guest_os_id;
>>>>>>>>>>>>>     	u64 hv_hypercall;
>>>>>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>>>>>
>>>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>>>>>     	int audit_point;
>>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>>>>>
>>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>>>>>> +
>>>>>>>>>>>>>     /*
>>>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>>>>>
>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>>>>>> +
>>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>>>>>
>>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>>>>>> +	__u32 res1;
>>>>>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>>>>>> +
>>>>>>>>>>>>>     #endif
>>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>>>>>     static u32 msrs_to_save[] = {
>>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
>>>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>>>>>     	switch (msr) {
>>>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
>>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>>>>>     		r = true;
>>>>>>>>>>>>>     		break;
>>>>>>>>>>>>>     	}
>>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>>>>>     			return 1;
>>>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
>>>>>>>>>>>>> +		local_irq_disable();
>>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>>>>>> +		local_irq_enable();
>>>>>>>>>>>>
>>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>>>>>> starts counting?
>>>>>>>>>>>>
>>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>>>>>
>>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>>>>>
>>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>>>>>
>>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>>>>>
>>>>>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>>>>>
>>>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>>>>>                    break;
>>>>>>>>>>            }
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>>>>>> Vadim.
>>>>>>>>
>>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>>>>>> during the weekend?
>>>>>>>
>>>>>>> Yes, and still testing.
>>>>>>>
>>>>>>> There is still some inconsistency in the value returned by
>>>>>>> QueryPerformanceCounter during migration.
>>>>>>
>>>>>> With my proposal?
>>>>>>
>>>>>
>>>>> Right.
>>>>> Please see below QTC vs. NTP time stamp
>>>>> before and after migration:
>>>>>
>>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
>>>>> quantum = 15
>>>>> QPC     NTP     Delta
>>>>> 1311    1307    4
>>>>> 2621    2618    3
>>>>> 3932    3931    1
>>>>> .......
>>>>> .......
>>>>> 38018   38014   4
>>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
>>>>> 7279088
>>>>> 142210  40113   102097
>>>>> 143458  41357   102101
>>>>> .......
>>>>> .......
>>>>> 156500  54615   101885
>>>>>
>>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
>>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
>>>>> mSec)
>>>>
>>>> How long was the downtime when you migrated the vServer?
>>> I use Win7-32 for testing.
>>>>
>>>> I have done similar tests and have not seen this...
>>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
>>> Yes, I also test it for reference counter.
>>
>> Can you use these parameters for testing:
>>
>> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
>> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
> 
> My settings are:
> -cpu qemu64,
> +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
> -smp 4

do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?

Peter

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 22:20                           ` Peter Lieven
@ 2014-01-09 11:10                             ` Vadim Rozenfeld
  2014-01-12 12:08                             ` Vadim Rozenfeld
  1 sibling, 0 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-09 11:10 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote:
> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
> > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
> >> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> v1 -> v2
> >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>>>>>        to this one.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ---
> >>>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
> >>>>>>>>>>>>>     	u64 hv_guest_os_id;
> >>>>>>>>>>>>>     	u64 hv_hypercall;
> >>>>>>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>>>>>     	int audit_point;
> >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     /*
> >>>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>>>>>> +	__u32 res1;
> >>>>>>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #endif
> >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>>>>>     static u32 msrs_to_save[] = {
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>>>>>     	switch (msr) {
> >>>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>>>>>     		r = true;
> >>>>>>>>>>>>>     		break;
> >>>>>>>>>>>>>     	}
> >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>>>>>     			return 1;
> >>>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
> >>>>>>>>>>>>> +		local_irq_disable();
> >>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>>>>>> +		local_irq_enable();
> >>>>>>>>>>>>
> >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>>>>>> starts counting?
> >>>>>>>>>>>>
> >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>>>>>
> >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>>>>>
> >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>>>>>
> >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>>>>>
> >>>>>>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>>>>>
> >>>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>>>>>                    break;
> >>>>>>>>>>            }
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>>>>>> Vadim.
> >>>>>>>>
> >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>>>>>> during the weekend?
> >>>>>>>
> >>>>>>> Yes, and still testing.
> >>>>>>>
> >>>>>>> There is still some inconsistency in the value returned by
> >>>>>>> QueryPerformanceCounter during migration.
> >>>>>>
> >>>>>> With my proposal?
> >>>>>>
> >>>>>
> >>>>> Right.
> >>>>> Please see below QTC vs. NTP time stamp
> >>>>> before and after migration:
> >>>>>
> >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> >>>>> quantum = 15
> >>>>> QPC     NTP     Delta
> >>>>> 1311    1307    4
> >>>>> 2621    2618    3
> >>>>> 3932    3931    1
> >>>>> .......
> >>>>> .......
> >>>>> 38018   38014   4
> >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> >>>>> 7279088
> >>>>> 142210  40113   102097
> >>>>> 143458  41357   102101
> >>>>> .......
> >>>>> .......
> >>>>> 156500  54615   101885
> >>>>>
> >>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> >>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> >>>>> mSec)
> >>>>
> >>>> How long was the downtime when you migrated the vServer?
> >>> I use Win7-32 for testing.
> >>>>
> >>>> I have done similar tests and have not seen this...
> >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> >>> Yes, I also test it for reference counter.
> >>
> >> Can you use these parameters for testing:
> >>
> >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
> >> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
> > 
> > My settings are:
> > -cpu qemu64,
> > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
> > -smp 4
> 
> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?

Yes, almost the same results. I will try to re-check it on a different
machine tomorrow.
Best regards,
Vadim.

> 
> Peter
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-08 22:20                           ` Peter Lieven
  2014-01-09 11:10                             ` Vadim Rozenfeld
@ 2014-01-12 12:08                             ` Vadim Rozenfeld
  2014-01-12 20:35                               ` Peter Lieven
  1 sibling, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-12 12:08 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, pbonzini

On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote:
> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
> > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
> >> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> v1 -> v2
> >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>>>>>        to this one.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ---
> >>>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
> >>>>>>>>>>>>>     	u64 hv_guest_os_id;
> >>>>>>>>>>>>>     	u64 hv_hypercall;
> >>>>>>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>>>>>     	int audit_point;
> >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     /*
> >>>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>>>>>> +	__u32 res1;
> >>>>>>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #endif
> >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>>>>>     static u32 msrs_to_save[] = {
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>>>>>     	switch (msr) {
> >>>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>>>>>     		r = true;
> >>>>>>>>>>>>>     		break;
> >>>>>>>>>>>>>     	}
> >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>>>>>     			return 1;
> >>>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
> >>>>>>>>>>>>> +		local_irq_disable();
> >>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>>>>>> +		local_irq_enable();
> >>>>>>>>>>>>
> >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>>>>>> starts counting?
> >>>>>>>>>>>>
> >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>>>>>
> >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>>>>>
> >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>>>>>
> >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>>>>>
> >>>>>>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>>>>>
> >>>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>>>>>                    break;
> >>>>>>>>>>            }
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>>>>>> Vadim.
> >>>>>>>>
> >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>>>>>> during the weekend?
> >>>>>>>
> >>>>>>> Yes, and still testing.

It seems to be working fine. However, I would to keep hv_ref_count as is
for the following reason: 
" A partition's reference time counter is initialized to zero when the
partition is created."
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
28v=vs.85%29.aspx

Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR
write handler) can be treated as such event. In this case  hv_ref_count
is no more than just an offset, which should be subtracted from the
current time (  get_kernel_ns() + kvm->arch.kvmclock_offset ) when
handling HV_X64_MSR_TIME_REF_COUNT MSR read request.

Best regards,
Vadim.

> >>>>>>>
> >>>>>>> There is still some inconsistency in the value returned by
> >>>>>>> QueryPerformanceCounter during migration.
> >>>>>>
> >>>>>> With my proposal?
> >>>>>>
> >>>>>
> >>>>> Right.
> >>>>> Please see below QTC vs. NTP time stamp
> >>>>> before and after migration:
> >>>>>
> >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> >>>>> quantum = 15
> >>>>> QPC     NTP     Delta
> >>>>> 1311    1307    4
> >>>>> 2621    2618    3
> >>>>> 3932    3931    1
> >>>>> .......
> >>>>> .......
> >>>>> 38018   38014   4
> >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> >>>>> 7279088
> >>>>> 142210  40113   102097
> >>>>> 143458  41357   102101
> >>>>> .......
> >>>>> .......
> >>>>> 156500  54615   101885
> >>>>>
> >>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> >>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> >>>>> mSec)
> >>>>
> >>>> How long was the downtime when you migrated the vServer?
> >>> I use Win7-32 for testing.
> >>>>
> >>>> I have done similar tests and have not seen this...
> >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> >>> Yes, I also test it for reference counter.
> >>
> >> Can you use these parameters for testing:
> >>
> >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
> >> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
> > 
> > My settings are:
> > -cpu qemu64,
> > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
> > -smp 4
> 
> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?
> 
> Peter



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2014-01-12 12:08                             ` Vadim Rozenfeld
@ 2014-01-12 20:35                               ` Peter Lieven
  0 siblings, 0 replies; 37+ messages in thread
From: Peter Lieven @ 2014-01-12 20:35 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, pbonzini


Am 12.01.2014 um 13:08 schrieb Vadim Rozenfeld <vrozenfe@redhat.com>:

> On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote:
>> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
>>> On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
>>>> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
>>>>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
>>>>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
>>>>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>>>>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>>>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> v1 -> v2
>>>>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>>>>>>>       Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>>>>>>>       as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>>>>>>>       to this one.
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>>>>>>>    arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>>>>>>>    arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>>>>>>    include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>>>>>>>    4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>>>>>>>    	/* fields used by HYPER-V emulation */
>>>>>>>>>>>>>>>    	u64 hv_guest_os_id;
>>>>>>>>>>>>>>>    	u64 hv_hypercall;
>>>>>>>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>    	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>>>>>>>    	int audit_point;
>>>>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>>>>>>>    /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>>>>>>>    #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>    /*
>>>>>>>>>>>>>>>     * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>>>>>>>     * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>>>>>>>    #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>>>>>>>    		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>>>>>>>    #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>>>>>>>    #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>>>>>>>> +	__u32 res1;
>>>>>>>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>    #endif
>>>>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>>>>>>>    static u32 msrs_to_save[] = {
>>>>>>>>>>>>>>>    	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>>>>>>>    	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>>>>>>>    	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>>>>>>>    	MSR_KVM_PV_EOI_EN,
>>>>>>>>>>>>>>>    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>>>>>>>    	switch (msr) {
>>>>>>>>>>>>>>>    	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>>>>>>>    	case HV_X64_MSR_HYPERCALL:
>>>>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>>>>>>>    		r = true;
>>>>>>>>>>>>>>>    		break;
>>>>>>>>>>>>>>>    	}
>>>>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>>>>>>>    			return 1;
>>>>>>>>>>>>>>>    		kvm->arch.hv_hypercall = data;
>>>>>>>>>>>>>>> +		local_irq_disable();
>>>>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>>>>>>>> +		local_irq_enable();
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>>>>>>>> starts counting?
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>>>>>>> 
>>>>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>>>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>>>>>>> 
>>>>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>>>>>>> 
>>>>>>>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>>>>>>> 
>>>>>>>>>>>>           case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>>>>>>>                   data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>>>>>>>                   break;
>>>>>>>>>>>>           }
>>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>>>>>>>> Vadim.
>>>>>>>>>> 
>>>>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>>>>>>>> during the weekend?
>>>>>>>>> 
>>>>>>>>> Yes, and still testing.
> 
> It seems to be working fine. However, I would to keep hv_ref_count as is
> for the following reason: 
> " A partition's reference time counter is initialized to zero when the
> partition is created."
> http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
> 28v=vs.85%29.aspx
> 
> Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR
> write handler) can be treated as such event. In this case  hv_ref_count
> is no more than just an offset, which should be subtracted from the
> current time (  get_kernel_ns() + kvm->arch.kvmclock_offset ) when
> handling HV_X64_MSR_TIME_REF_COUNT MSR read request.

my 2 cents:

a) if you treat the start of the vm as partition create event get_kernel_ns() + kvm->arch.kvmclock_offset
is exactly the vm life time in nanoseconds.

b) afaik currently the value of hv_ref_count is not restored after a live migration on the destination. this
needs to be implemented for proper function. if we go for dropping this offset completely we can drop
the requirement of restoring hv_ref_count as well.

Peter

> 
> Best regards,
> Vadim.
> 
>>>>>>>>> 
>>>>>>>>> There is still some inconsistency in the value returned by
>>>>>>>>> QueryPerformanceCounter during migration.
>>>>>>>> 
>>>>>>>> With my proposal?
>>>>>>>> 
>>>>>>> 
>>>>>>> Right.
>>>>>>> Please see below QTC vs. NTP time stamp
>>>>>>> before and after migration:
>>>>>>> 
>>>>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
>>>>>>> quantum = 15
>>>>>>> QPC     NTP     Delta
>>>>>>> 1311    1307    4
>>>>>>> 2621    2618    3
>>>>>>> 3932    3931    1
>>>>>>> .......
>>>>>>> .......
>>>>>>> 38018   38014   4
>>>>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
>>>>>>> 7279088
>>>>>>> 142210  40113   102097
>>>>>>> 143458  41357   102101
>>>>>>> .......
>>>>>>> .......
>>>>>>> 156500  54615   101885
>>>>>>> 
>>>>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
>>>>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
>>>>>>> mSec)
>>>>>> 
>>>>>> How long was the downtime when you migrated the vServer?
>>>>> I use Win7-32 for testing.
>>>>>> 
>>>>>> I have done similar tests and have not seen this...
>>>>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
>>>>> Yes, I also test it for reference counter.
>>>> 
>>>> Can you use these parameters for testing:
>>>> 
>>>> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
>>>> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
>>> 
>>> My settings are:
>>> -cpu qemu64,
>>> +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
>>> -smp 4
>> 
>> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?
>> 
>> Peter
> 
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 1/2] add support for Hyper-V reference time counter
  2013-12-11 18:53   ` Marcelo Tosatti
  2013-12-11 18:59     ` Marcelo Tosatti
  2014-01-02 13:15     ` Peter Lieven
@ 2014-01-13 12:10     ` Vadim Rozenfeld
  2 siblings, 0 replies; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-13 12:10 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, pl, pbonzini

On Wed, 2013-12-11 at 16:53 -0200, Marcelo Tosatti wrote:
> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 54 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..2fd0753 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 21ef1ba..5e4e495a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		local_irq_enable();
> 
> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> starts counting?

Sorry for delay in reply

Access to HV_X64_MSR_HYPERCALL happens during execution KiSystemStartup
function code, which I think can be treated as a partition create time. 
 
> 
> No need to store kvmclock_offset in hv_ref_count? (moreover
> the name is weird, better name would be "hv_ref_start_time".
> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC: {
> > +		u64 gfn;
> > +		unsigned long addr;
> > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > +		tsc_ref.tsc_sequence = 0;
> > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> > +			kvm->arch.hv_tsc_page = data;
> > +			break;
> > +		}
> > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > +		addr = gfn_to_hva(kvm, data >>
> > +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +		if (kvm_is_error_hva(addr))
> > +			return 1;
> > +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> > +			return 1;
> > +		mark_page_dirty(kvm, gfn);
> > +		kvm->arch.hv_tsc_page = data;
> >  		break;
> >  	}
> >  	default:
> > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  	case HV_X64_MSR_HYPERCALL:
> >  		data = kvm->arch.hv_hypercall;
> >  		break;
> > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > +		u64 now_ns;
> > +		local_irq_disable();
> > +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> > +		local_irq_enable();
> 
> No need for irq disable/enable pairs.
> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +		data = kvm->arch.hv_tsc_page;
> > +		break;
> 
> Does it return non-zero data even if not enabled?

kernel reads HV_X64_MSR_REFERENCE_TSC MSR only once during hyperv
initialization and checks checks enable bit
 
> 
> >  	default:
> >  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> >  		return 1;
> > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >  	case KVM_CAP_PCI_2_3:
> >  #endif
> > +	case KVM_CAP_HYPERV_TIME:
> >  		r = 1;
> >  		break;
> >  	case KVM_CAP_COALESCED_MMIO:
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 902f124..686c1ca 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_ARM_EL1_32BIT 93
> >  #define KVM_CAP_SPAPR_MULTITCE 94
> >  #define KVM_CAP_EXT_EMUL_CPUID 95
> > +#define KVM_CAP_HYPERV_TIME 96
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > -- 
> > 1.8.1.4
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2013-12-11 19:27   ` Marcelo Tosatti
  2013-12-12  9:34     ` Paolo Bonzini
@ 2014-01-14  4:11     ` Vadim Rozenfeld
  2014-01-14 13:54       ` Marcelo Tosatti
  1 sibling, 1 reply; 37+ messages in thread
From: Vadim Rozenfeld @ 2014-01-14  4:11 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, pl, pbonzini



----- Original Message -----
From: "Marcelo Tosatti" <mtosatti@redhat.com>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: kvm@vger.kernel.org, pl@dlhnet.de, pbonzini@redhat.com
Sent: Thursday, December 12, 2013 6:27:00 AM
Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote:
> The following patch allows to activate a partition reference
> time enlightenment that is based on the host platform's support
> for an Invariant Time Stamp Counter (iTSC).
> 
> v2 -> v3
> Handle TSC sequence, scale, and offest changing during migration.
> 
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/x86.c              | 29 +++++++++++++++++++++++++++--
>  2 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2fd0753..81fdff0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -607,6 +607,7 @@ struct kvm_arch {
>  	u64 hv_hypercall;
>  	u64 hv_ref_count;
>  	u64 hv_tsc_page;
> +	u64 hv_ref_time;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5e4e495a..cb6766a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  			break;
>  		}
>  		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		addr = gfn_to_hva(kvm, data >>
> -			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		addr = gfn_to_hva(kvm, gfn);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		tsc_ref.tsc_sequence =
> +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> +		tsc_ref.tsc_scale =
> +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.tsc_offset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
>  		kvm->arch.hv_tsc_page = data;
> +		kvm->arch.hv_ref_count = 0;
>  		break;
>  	}
>  	default:
> @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		local_irq_enable();
>  		kvm->arch.kvmclock_offset = delta;
>  		kvm_gen_update_masterclock(kvm);
> +
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> +			u64 curr_time;
> +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +			tsc_ref->tsc_sequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> +				tsc_ref->tsc_offset;
> +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> +		}
>  		break;
>  	}
>  	case KVM_GET_CLOCK: {
> @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
>  			goto out;
>  		r = 0;
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
> +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);

kvm_read_guest_cached.

> +			kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * 
> +				native_read_tsc()) >> 32) + tsc_ref->tsc_offset;

Why native_read_tsc and not ->read_l1_tsc?

[VR]
Is it possible to get pointer to the vcpu instance at this point?
Thanks,
Vadim. 

It is easier to trust on the host to check reliability of the TSC: if
it uses TSC clocksource, then the TSCs are stable. So could condition
exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update.
And hook into pvclock_gtod_notify.

So in addition to X86_FEATURE_CONSTANT_TSC, check
ka->use_master_clock=1



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
  2014-01-14  4:11     ` Vadim Rozenfeld
@ 2014-01-14 13:54       ` Marcelo Tosatti
  0 siblings, 0 replies; 37+ messages in thread
From: Marcelo Tosatti @ 2014-01-14 13:54 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, pl, pbonzini

On Mon, Jan 13, 2014 at 11:11:40PM -0500, Vadim Rozenfeld wrote:
> 
> 
> ----- Original Message -----
> From: "Marcelo Tosatti" <mtosatti@redhat.com>
> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
> Cc: kvm@vger.kernel.org, pl@dlhnet.de, pbonzini@redhat.com
> Sent: Thursday, December 12, 2013 6:27:00 AM
> Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
> 
> On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote:
> > The following patch allows to activate a partition reference
> > time enlightenment that is based on the host platform's support
> > for an Invariant Time Stamp Counter (iTSC).
> > 
> > v2 -> v3
> > Handle TSC sequence, scale, and offest changing during migration.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/x86.c              | 29 +++++++++++++++++++++++++++--
> >  2 files changed, 28 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 2fd0753..81fdff0 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -607,6 +607,7 @@ struct kvm_arch {
> >  	u64 hv_hypercall;
> >  	u64 hv_ref_count;
> >  	u64 hv_tsc_page;
> > +	u64 hv_ref_time;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 5e4e495a..cb6766a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  			break;
> >  		}
> >  		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > -		addr = gfn_to_hva(kvm, data >>
> > -			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +		addr = gfn_to_hva(kvm, gfn);
> >  		if (kvm_is_error_hva(addr))
> >  			return 1;
> > +		tsc_ref.tsc_sequence =
> > +			boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > +		tsc_ref.tsc_scale =
> > +			((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> > +		tsc_ref.tsc_offset = 0;
> >  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> >  			return 1;
> >  		mark_page_dirty(kvm, gfn);
> >  		kvm->arch.hv_tsc_page = data;
> > +		kvm->arch.hv_ref_count = 0;
> >  		break;
> >  	}
> >  	default:
> > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >  		local_irq_enable();
> >  		kvm->arch.kvmclock_offset = delta;
> >  		kvm_gen_update_masterclock(kvm);
> > +
> > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> > +			u64 curr_time;
> > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
> > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +			tsc_ref->tsc_sequence =
> > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0;
> > +			tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32;
> > +			curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + 
> > +				tsc_ref->tsc_offset;
> > +			tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time;
> > +		}
> >  		break;
> >  	}
> >  	case KVM_GET_CLOCK: {
> > @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >  		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
> >  			goto out;
> >  		r = 0;
> > +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> > +			HV_REFERENCE_TSC_PAGE* tsc_ref;
> > +			tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
> > +				kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> 
> kvm_read_guest_cached.
> 
> > +			kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * 
> > +				native_read_tsc()) >> 32) + tsc_ref->tsc_offset;
> 
> Why native_read_tsc and not ->read_l1_tsc?
> 
> [VR]
> Is it possible to get pointer to the vcpu instance at this point?

See the suggestion to move this code to kvm_guest_time_update.


> Thanks,
> Vadim. 
> 
> It is easier to trust on the host to check reliability of the TSC: if
> it uses TSC clocksource, then the TSCs are stable. So could condition
> exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update.
> And hook into pvclock_gtod_notify.
> 
> So in addition to X86_FEATURE_CONSTANT_TSC, check
> ka->use_master_clock=1
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2014-01-14 15:49 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-12-08 11:33 [RFC PATCH v3 0/2] Hyper-V timers Vadim Rozenfeld
2013-12-08 11:33 ` [RFC PATCH v3 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
2013-12-09 14:23   ` Paolo Bonzini
2013-12-10 10:46     ` Vadim Rozenfeld
2013-12-11 18:53   ` Marcelo Tosatti
2013-12-11 18:59     ` Marcelo Tosatti
2013-12-12  9:33       ` Paolo Bonzini
2014-01-02 16:52       ` Peter Lieven
2014-01-07  9:36         ` Vadim Rozenfeld
2014-01-07 17:52           ` Peter Lieven
2014-01-08  9:40             ` Vadim Rozenfeld
2014-01-08 10:15               ` Peter Lieven
2014-01-08 10:44                 ` Vadim Rozenfeld
2014-01-08 11:48                   ` Peter Lieven
2014-01-08 12:12                     ` Vadim Rozenfeld
2014-01-08 14:54                       ` Peter Lieven
2014-01-08 20:08                         ` Vadim Rozenfeld
2014-01-08 22:20                           ` Peter Lieven
2014-01-09 11:10                             ` Vadim Rozenfeld
2014-01-12 12:08                             ` Vadim Rozenfeld
2014-01-12 20:35                               ` Peter Lieven
2014-01-02 13:15     ` Peter Lieven
2014-01-02 13:57       ` Marcelo Tosatti
2014-01-02 16:08         ` Peter Lieven
2014-01-02 20:05           ` Marcelo Tosatti
2014-01-13 12:10     ` Vadim Rozenfeld
2013-12-08 11:33 ` [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment Vadim Rozenfeld
2013-12-09 14:32   ` Paolo Bonzini
2013-12-10 11:23     ` Vadim Rozenfeld
2013-12-10 16:52       ` Paolo Bonzini
2013-12-11 10:58         ` Vadim Rozenfeld
2013-12-11 12:28           ` Paolo Bonzini
2013-12-11 19:28       ` Marcelo Tosatti
2013-12-11 19:27   ` Marcelo Tosatti
2013-12-12  9:34     ` Paolo Bonzini
2014-01-14  4:11     ` Vadim Rozenfeld
2014-01-14 13:54       ` Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox