kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org
Cc: johnstul@us.ibm.com, jeremy@goop.org, glommer@parallels.com,
	zamsden@gmail.com, gleb@redhat.com, avi@redhat.com,
	pbonzini@redhat.com, Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 08/16] KVM: x86: introduce facility to support vsyscall pvclock, via MSR
Date: Wed, 31 Oct 2012 20:47:04 -0200	[thread overview]
Message-ID: <20121031224824.199331603@redhat.com> (raw)
In-Reply-To: 20121031224656.417434866@redhat.com

[-- Attachment #1: 11-host-add-userspace-time-msr --]
[-- Type: text/plain, Size: 9580 bytes --]

Allow a guest to register a second location for the VCPU time info

structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
This is intended to allow the guest kernel to map this information
into a usermode accessible page, so that usermode can efficiently
calculate system time from the TSC without having to make a syscall.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: vsyscall/arch/x86/include/asm/kvm_para.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_para.h
+++ vsyscall/arch/x86/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_USERSPACE_CLOCKSOURCE 7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -39,6 +40,7 @@
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_USERSPACE_TIME      0x4b564d05
 
 struct kvm_steal_time {
 	__u64 steal;
Index: vsyscall/Documentation/virtual/kvm/msr.txt
===================================================================
--- vsyscall.orig/Documentation/virtual/kvm/msr.txt
+++ vsyscall/Documentation/virtual/kvm/msr.txt
@@ -125,6 +125,22 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
 	leaf prior to usage.
 
+MSR_KVM_USERSPACE_TIME:  0x4b564d05
+
+Allow a guest to register a second location for the VCPU time info
+structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
+This is intended to allow the guest kernel to map this information
+into a usermode accessible page, so that usermode can efficiently
+calculate system time from the TSC without having to make a syscall.
+
+Relationship with master copy (MSR_KVM_SYSTEM_TIME_NEW):
+
+- This MSR must be enabled only when the master is enabled.
+- Disabling updates to the master automatically disables
+updates for this copy.
+
+Availability of this MSR must be checked via bit 7 in 0x4000001 cpuid
+leaf prior to usage.
 
 MSR_KVM_WALL_CLOCK:  0x11
 
Index: vsyscall/arch/x86/include/asm/kvm_host.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_host.h
+++ vsyscall/arch/x86/include/asm/kvm_host.h
@@ -415,10 +415,13 @@ struct kvm_vcpu_arch {
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
+	gpa_t uspace_time;
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
 	unsigned int time_offset;
+	unsigned int uspace_time_offset;
 	struct page *time_page;
+	struct page *uspace_time_page;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
Index: vsyscall/arch/x86/kvm/x86.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/x86.c
+++ vsyscall/arch/x86/kvm/x86.c
@@ -809,13 +809,13 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	10
+#define KVM_SAVE_MSRS_BEGIN	11
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-	MSR_KVM_PV_EOI_EN,
+	MSR_KVM_PV_EOI_EN, MSR_KVM_USERSPACE_TIME,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_STAR,
 #ifdef CONFIG_X86_64
@@ -1135,16 +1135,43 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
 
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
+static void kvm_write_pvtime(struct kvm_vcpu *v, struct page *page,
+			     unsigned int offset_in_page, gpa_t gpa)
+{
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+	void *shared_kaddr;
+	struct pvclock_vcpu_time_info *guest_hv_clock;
+	u8 pvclock_flags;
+
+	shared_kaddr = kmap_atomic(page);
+
+	guest_hv_clock = shared_kaddr + offset_in_page;
+
+	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+	if (vcpu->pvclock_set_guest_stopped_request) {
+		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+		vcpu->pvclock_set_guest_stopped_request = false;
+	}
+
+	vcpu->hv_clock.flags = pvclock_flags;
+
+	memcpy(shared_kaddr + offset_in_page, &vcpu->hv_clock,
+	       sizeof(vcpu->hv_clock));
+
+	kunmap_atomic(shared_kaddr);
+
+	mark_page_dirty(v->kvm, gpa >> PAGE_SHIFT);
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
-	void *shared_kaddr;
 	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
 	u64 tsc_timestamp;
-	struct pvclock_vcpu_time_info *guest_hv_clock;
-	u8 pvclock_flags;
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -1235,26 +1262,11 @@ static int kvm_guest_time_update(struct 
 	 */
 	vcpu->hv_clock.version += 2;
 
-	shared_kaddr = kmap_atomic(vcpu->time_page);
-
-	guest_hv_clock = shared_kaddr + vcpu->time_offset;
-
-	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+ 	kvm_write_pvtime(v, vcpu->time_page, vcpu->time_offset, vcpu->time);
+ 	if (vcpu->uspace_time_page)
+ 		kvm_write_pvtime(v, vcpu->uspace_time_page,
+ 				 vcpu->uspace_time_offset, vcpu->uspace_time);
 
-	if (vcpu->pvclock_set_guest_stopped_request) {
-		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
-		vcpu->pvclock_set_guest_stopped_request = false;
-	}
-
-	vcpu->hv_clock.flags = pvclock_flags;
-
-	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-	       sizeof(vcpu->hv_clock));
-
-	kunmap_atomic(shared_kaddr);
-
-	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 	return 0;
 }
 
@@ -1549,6 +1561,15 @@ static void kvmclock_reset(struct kvm_vc
 	}
 }
 
+static void kvmclock_uspace_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.uspace_time = 0;
+	if (vcpu->arch.uspace_time_page) {
+		kvm_release_page_dirty(vcpu->arch.uspace_time_page);
+		vcpu->arch.uspace_time_page = NULL;
+	}
+}
+
 static void accumulate_steal_time(struct kvm_vcpu *vcpu)
 {
 	u64 delta;
@@ -1639,6 +1660,31 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		vcpu->kvm->arch.wall_clock = data;
 		kvm_write_wall_clock(vcpu->kvm, data);
 		break;
+	case MSR_KVM_USERSPACE_TIME: {
+		kvmclock_uspace_reset(vcpu);
+
+		if (!vcpu->arch.time_page && (data & 1))
+			return 1;
+
+		vcpu->arch.uspace_time = data;
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+		/* we verify if the enable bit is set... */
+		if (!(data & 1))
+			break;
+
+		/* ...but clean it before doing the actual write */
+		vcpu->arch.uspace_time_offset = data & ~(PAGE_MASK | 1);
+
+		vcpu->arch.uspace_time_page = gfn_to_page(vcpu->kvm,
+							  data >> PAGE_SHIFT);
+
+		if (is_error_page(vcpu->arch.uspace_time_page)) {
+			kvm_release_page_clean(vcpu->arch.uspace_time_page);
+			vcpu->arch.uspace_time_page = NULL;
+		}
+		break;
+	}
 	case MSR_KVM_SYSTEM_TIME_NEW:
 	case MSR_KVM_SYSTEM_TIME: {
 		kvmclock_reset(vcpu);
@@ -1647,8 +1693,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
 		/* we verify if the enable bit is set... */
-		if (!(data & 1))
+		if (!(data & 1)) {
+			kvmclock_uspace_reset(vcpu);
 			break;
+		}
 
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
@@ -1656,8 +1704,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-		if (is_error_page(vcpu->arch.time_page))
+		if (is_error_page(vcpu->arch.time_page)) {
 			vcpu->arch.time_page = NULL;
+			kvmclock_uspace_reset(vcpu);
+		}
 
 		break;
 	}
@@ -2010,6 +2060,9 @@ int kvm_get_msr_common(struct kvm_vcpu *
 	case MSR_KVM_SYSTEM_TIME_NEW:
 		data = vcpu->arch.time;
 		break;
+	case MSR_KVM_USERSPACE_TIME:
+		data = vcpu->arch.uspace_time;
+		break;
 	case MSR_KVM_ASYNC_PF_EN:
 		data = vcpu->arch.apf.msr_val;
 		break;
@@ -2195,6 +2248,7 @@ int kvm_dev_ioctl_check_extension(long e
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
 	case KVM_CAP_IRQFD_RESAMPLE:
+	case KVM_CAP_USERSPACE_CLOCKSOURCE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -6017,6 +6071,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	kvmclock_uspace_reset(vcpu);
 	kvmclock_reset(vcpu);
 
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
Index: vsyscall/arch/x86/kvm/cpuid.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/cpuid.c
+++ vsyscall/arch/x86/kvm/cpuid.c
@@ -411,7 +411,9 @@ static int do_cpuid_ent(struct kvm_cpuid
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
 			     (1 << KVM_FEATURE_ASYNC_PF) |
 			     (1 << KVM_FEATURE_PV_EOI) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+			     (1 << KVM_FEATURE_USERSPACE_CLOCKSOURCE);
+
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
Index: vsyscall/include/uapi/linux/kvm.h
===================================================================
--- vsyscall.orig/include/uapi/linux/kvm.h
+++ vsyscall/include/uapi/linux/kvm.h
@@ -626,6 +626,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_READONLY_MEM 81
 #endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_USERSPACE_CLOCKSOURCE 83
 
 #ifdef KVM_CAP_IRQ_ROUTING
 



  parent reply	other threads:[~2012-10-31 22:47 UTC|newest]

Thread overview: 94+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-24 13:13 [patch 00/18] pvclock vsyscall support + KVM hypervisor support (v2) Marcelo Tosatti
2012-10-24 13:13 ` [patch 01/18] KVM: x86: retain pvclock guest stopped bit in guest memory Marcelo Tosatti
2012-10-24 13:13 ` [patch 02/18] x86: pvclock: make sure rdtsc doesnt speculate out of region Marcelo Tosatti
2012-10-24 13:13 ` [patch 03/18] x86: pvclock: remove pvclock_shadow_time Marcelo Tosatti
2012-10-30  9:23   ` Avi Kivity
2012-10-30  9:24     ` Avi Kivity
2012-10-24 13:13 ` [patch 04/18] x86: pvclock: create helper for pvclock data retrieval Marcelo Tosatti
2012-10-24 13:13 ` [patch 05/18] x86: pvclock: fix flags usage race Marcelo Tosatti
2012-10-24 13:13 ` [patch 06/18] x86: pvclock: introduce helper to read flags Marcelo Tosatti
2012-10-24 13:13 ` [patch 07/18] sched: add notifier for cross-cpu migrations Marcelo Tosatti
2012-10-24 13:13 ` [patch 08/18] x86: pvclock: generic pvclock vsyscall initialization Marcelo Tosatti
2012-10-29 14:18   ` Glauber Costa
2012-10-29 14:54     ` Marcelo Tosatti
2012-10-29 17:46       ` Jeremy Fitzhardinge
2012-10-29 14:39   ` Glauber Costa
2012-10-24 13:13 ` [patch 09/18] KVM: x86: introduce facility to support vsyscall pvclock, via MSR Marcelo Tosatti
2012-10-29 14:45   ` Glauber Costa
2012-10-29 17:44     ` Jeremy Fitzhardinge
2012-10-29 18:40       ` Marcelo Tosatti
2012-10-30  7:41         ` Glauber Costa
2012-10-30  9:39         ` Avi Kivity
2012-10-31  3:12           ` Marcelo Tosatti
2012-11-02 10:21             ` Glauber Costa
2012-10-30  7:38       ` Glauber Costa
2012-10-24 13:13 ` [patch 10/18] x86: kvm guest: pvclock vsyscall support Marcelo Tosatti
2012-10-24 13:13 ` [patch 11/18] x86: vsyscall: pass mode to gettime backend Marcelo Tosatti
2012-10-29 14:47   ` Glauber Costa
2012-10-29 18:41     ` Marcelo Tosatti
2012-10-30  7:42       ` Glauber Costa
2012-10-24 13:13 ` [patch 12/18] x86: vdso: pvclock gettime support Marcelo Tosatti
2012-10-29 14:59   ` Glauber Costa
2012-10-29 18:42     ` Marcelo Tosatti
2012-10-30  7:49       ` Glauber Costa
2012-10-31  3:16         ` Marcelo Tosatti
2012-10-24 13:13 ` [patch 13/18] KVM: x86: pass host_tsc to read_l1_tsc Marcelo Tosatti
2012-10-29 15:04   ` Glauber Costa
2012-10-29 18:45     ` Marcelo Tosatti
2012-10-30  7:55       ` Glauber Costa
2012-10-24 13:13 ` [patch 14/18] time: export time information for KVM pvclock Marcelo Tosatti
2012-11-10  1:02   ` John Stultz
2012-11-13 21:07     ` Marcelo Tosatti
2012-10-24 13:13 ` [patch 15/18] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag Marcelo Tosatti
2012-10-30  8:34   ` Glauber Costa
2012-10-31  3:19     ` [patch 15/18] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag\ Marcelo Tosatti
2012-10-24 13:13 ` [patch 16/18] KVM: x86: notifier for clocksource changes Marcelo Tosatti
2012-10-24 13:13 ` [patch 17/18] KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization Marcelo Tosatti
2012-10-24 13:13 ` [patch 18/18] KVM: x86: require matched TSC offsets for master clock Marcelo Tosatti
2012-10-31 22:46 ` [patch 00/16] pvclock vsyscall support + KVM hypervisor support (v3) Marcelo Tosatti
2012-10-31 22:46   ` [patch 01/16] KVM: x86: retain pvclock guest stopped bit in guest memory Marcelo Tosatti
2012-11-01 10:39     ` Gleb Natapov
2012-11-01 20:51       ` Marcelo Tosatti
2012-11-01 13:44     ` Glauber Costa
2012-10-31 22:46   ` [patch 02/16] x86: pvclock: make sure rdtsc doesnt speculate out of region Marcelo Tosatti
2012-11-01 11:48     ` Gleb Natapov
2012-11-01 13:49       ` Glauber Costa
2012-11-01 13:51         ` Gleb Natapov
2012-11-01 20:56         ` Marcelo Tosatti
2012-11-01 22:13           ` Gleb Natapov
2012-11-01 22:21             ` Marcelo Tosatti
2012-11-02  6:02               ` Gleb Natapov
2012-10-31 22:46   ` [patch 03/16] x86: pvclock: remove pvclock_shadow_time Marcelo Tosatti
2012-11-01 13:52     ` Glauber Costa
2012-10-31 22:47   ` [patch 04/16] x86: pvclock: create helper for pvclock data retrieval Marcelo Tosatti
2012-11-01 14:04     ` Glauber Costa
2012-11-01 20:57       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 05/16] x86: pvclock: introduce helper to read flags Marcelo Tosatti
2012-11-01 14:07     ` Glauber Costa
2012-11-01 21:08       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 06/16] sched: add notifier for cross-cpu migrations Marcelo Tosatti
2012-11-01 14:08     ` Glauber Costa
2012-10-31 22:47   ` [patch 07/16] x86: pvclock: generic pvclock vsyscall initialization Marcelo Tosatti
2012-11-01 14:19     ` Glauber Costa
2012-10-31 22:47   ` Marcelo Tosatti [this message]
2012-11-01 14:28     ` [patch 08/16] KVM: x86: introduce facility to support vsyscall pvclock, via MSR Glauber Costa
2012-11-01 21:39       ` Marcelo Tosatti
2012-11-02 10:23         ` Glauber Costa
2012-11-02 13:00           ` Marcelo Tosatti
2012-11-05  8:03             ` Glauber Costa
2012-10-31 22:47   ` [patch 09/16] x86: kvm guest: pvclock vsyscall support Marcelo Tosatti
2012-11-02  9:42     ` Glauber Costa
2012-11-05  8:35       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 10/16] x86: vdso: pvclock gettime support Marcelo Tosatti
2012-11-01 14:41     ` Glauber Costa
2012-11-01 21:42       ` Marcelo Tosatti
2012-11-02  0:33         ` Marcelo Tosatti
2012-11-02 10:25           ` Glauber Costa
2012-11-14 10:42     ` Gleb Natapov
2012-11-14 22:42       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 11/16] KVM: x86: pass host_tsc to read_l1_tsc Marcelo Tosatti
2012-10-31 22:47   ` [patch 12/16] time: export time information for KVM pvclock Marcelo Tosatti
2012-10-31 22:47   ` [patch 13/16] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag Marcelo Tosatti
2012-10-31 22:47   ` [patch 14/16] KVM: x86: notifier for clocksource changes Marcelo Tosatti
2012-10-31 22:47   ` [patch 15/16] KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization Marcelo Tosatti
2012-10-31 22:47   ` [patch 16/16] KVM: x86: require matched TSC offsets for master clock Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121031224824.199331603@redhat.com \
    --to=mtosatti@redhat.com \
    --cc=avi@redhat.com \
    --cc=gleb@redhat.com \
    --cc=glommer@parallels.com \
    --cc=jeremy@goop.org \
    --cc=johnstul@us.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=zamsden@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).