All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org
Cc: johnstul@us.ibm.com, jeremy@goop.org, glommer@parallels.com,
	zamsden@gmail.com, gleb@redhat.com, avi@redhat.com,
	pbonzini@redhat.com, Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 09/18] KVM: x86: introduce facility to support vsyscall pvclock, via MSR
Date: Wed, 24 Oct 2012 11:13:49 -0200	[thread overview]
Message-ID: <20121024131621.707068244@redhat.com> (raw)
In-Reply-To: 20121024131340.742340256@redhat.com

[-- Attachment #1: 11-host-add-userspace-time-msr --]
[-- Type: text/plain, Size: 9580 bytes --]

Allow a guest to register a second location for the VCPU time info

structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
This is intended to allow the guest kernel to map this information
into a usermode accessible page, so that usermode can efficiently
calculate system time from the TSC without having to make a syscall.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: vsyscall/arch/x86/include/asm/kvm_para.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_para.h
+++ vsyscall/arch/x86/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_USERSPACE_CLOCKSOURCE 7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -39,6 +40,7 @@
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_USERSPACE_TIME      0x4b564d05
 
 struct kvm_steal_time {
 	__u64 steal;
Index: vsyscall/Documentation/virtual/kvm/msr.txt
===================================================================
--- vsyscall.orig/Documentation/virtual/kvm/msr.txt
+++ vsyscall/Documentation/virtual/kvm/msr.txt
@@ -125,6 +125,22 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
 	leaf prior to usage.
 
+MSR_KVM_USERSPACE_TIME:  0x4b564d05
+
+Allow a guest to register a second location for the VCPU time info
+structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
+This is intended to allow the guest kernel to map this information
+into a usermode accessible page, so that usermode can efficiently
+calculate system time from the TSC without having to make a syscall.
+
+Relationship with master copy (MSR_KVM_SYSTEM_TIME_NEW):
+
+- This MSR must be enabled only when the master is enabled.
+- Disabling updates to the master automatically disables
+updates for this copy.
+
+Availability of this MSR must be checked via bit 7 in 0x4000001 cpuid
+leaf prior to usage.
 
 MSR_KVM_WALL_CLOCK:  0x11
 
Index: vsyscall/arch/x86/include/asm/kvm_host.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_host.h
+++ vsyscall/arch/x86/include/asm/kvm_host.h
@@ -415,10 +415,13 @@ struct kvm_vcpu_arch {
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
+	gpa_t uspace_time;
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
 	unsigned int time_offset;
+	unsigned int uspace_time_offset;
 	struct page *time_page;
+	struct page *uspace_time_page;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
Index: vsyscall/arch/x86/kvm/x86.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/x86.c
+++ vsyscall/arch/x86/kvm/x86.c
@@ -809,13 +809,13 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	10
+#define KVM_SAVE_MSRS_BEGIN	11
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-	MSR_KVM_PV_EOI_EN,
+	MSR_KVM_PV_EOI_EN, MSR_KVM_USERSPACE_TIME,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_STAR,
 #ifdef CONFIG_X86_64
@@ -1135,16 +1135,43 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
 
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
+static void kvm_write_pvtime(struct kvm_vcpu *v, struct page *page,
+			     unsigned int offset_in_page, gpa_t gpa)
+{
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+	void *shared_kaddr;
+	struct pvclock_vcpu_time_info *guest_hv_clock;
+	u8 pvclock_flags;
+
+	shared_kaddr = kmap_atomic(page);
+
+	guest_hv_clock = shared_kaddr + offset_in_page;
+
+	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+	if (vcpu->pvclock_set_guest_stopped_request) {
+		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+		vcpu->pvclock_set_guest_stopped_request = false;
+	}
+
+	vcpu->hv_clock.flags = pvclock_flags;
+
+	memcpy(shared_kaddr + offset_in_page, &vcpu->hv_clock,
+	       sizeof(vcpu->hv_clock));
+
+	kunmap_atomic(shared_kaddr);
+
+	mark_page_dirty(v->kvm, gpa >> PAGE_SHIFT);
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
-	void *shared_kaddr;
 	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
 	u64 tsc_timestamp;
-	struct pvclock_vcpu_time_info *guest_hv_clock;
-	u8 pvclock_flags;
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -1235,26 +1262,11 @@ static int kvm_guest_time_update(struct 
 	 */
 	vcpu->hv_clock.version += 2;
 
-	shared_kaddr = kmap_atomic(vcpu->time_page);
-
-	guest_hv_clock = shared_kaddr + vcpu->time_offset;
-
-	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+ 	kvm_write_pvtime(v, vcpu->time_page, vcpu->time_offset, vcpu->time);
+ 	if (vcpu->uspace_time_page)
+ 		kvm_write_pvtime(v, vcpu->uspace_time_page,
+ 				 vcpu->uspace_time_offset, vcpu->uspace_time);
 
-	if (vcpu->pvclock_set_guest_stopped_request) {
-		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
-		vcpu->pvclock_set_guest_stopped_request = false;
-	}
-
-	vcpu->hv_clock.flags = pvclock_flags;
-
-	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-	       sizeof(vcpu->hv_clock));
-
-	kunmap_atomic(shared_kaddr);
-
-	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 	return 0;
 }
 
@@ -1549,6 +1561,15 @@ static void kvmclock_reset(struct kvm_vc
 	}
 }
 
+static void kvmclock_uspace_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.uspace_time = 0;
+	if (vcpu->arch.uspace_time_page) {
+		kvm_release_page_dirty(vcpu->arch.uspace_time_page);
+		vcpu->arch.uspace_time_page = NULL;
+	}
+}
+
 static void accumulate_steal_time(struct kvm_vcpu *vcpu)
 {
 	u64 delta;
@@ -1639,6 +1660,31 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		vcpu->kvm->arch.wall_clock = data;
 		kvm_write_wall_clock(vcpu->kvm, data);
 		break;
+	case MSR_KVM_USERSPACE_TIME: {
+		kvmclock_uspace_reset(vcpu);
+
+		if (!vcpu->arch.time_page && (data & 1))
+			return 1;
+
+		vcpu->arch.uspace_time = data;
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+		/* we verify if the enable bit is set... */
+		if (!(data & 1))
+			break;
+
+		/* ...but clean it before doing the actual write */
+		vcpu->arch.uspace_time_offset = data & ~(PAGE_MASK | 1);
+
+		vcpu->arch.uspace_time_page = gfn_to_page(vcpu->kvm,
+							  data >> PAGE_SHIFT);
+
+		if (is_error_page(vcpu->arch.uspace_time_page)) {
+			kvm_release_page_clean(vcpu->arch.uspace_time_page);
+			vcpu->arch.uspace_time_page = NULL;
+		}
+		break;
+	}
 	case MSR_KVM_SYSTEM_TIME_NEW:
 	case MSR_KVM_SYSTEM_TIME: {
 		kvmclock_reset(vcpu);
@@ -1647,8 +1693,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
 		/* we verify if the enable bit is set... */
-		if (!(data & 1))
+		if (!(data & 1)) {
+			kvmclock_uspace_reset(vcpu);
 			break;
+		}
 
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
@@ -1656,8 +1704,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-		if (is_error_page(vcpu->arch.time_page))
+		if (is_error_page(vcpu->arch.time_page)) {
 			vcpu->arch.time_page = NULL;
+			kvmclock_uspace_reset(vcpu);
+		}
 
 		break;
 	}
@@ -2010,6 +2060,9 @@ int kvm_get_msr_common(struct kvm_vcpu *
 	case MSR_KVM_SYSTEM_TIME_NEW:
 		data = vcpu->arch.time;
 		break;
+	case MSR_KVM_USERSPACE_TIME:
+		data = vcpu->arch.uspace_time;
+		break;
 	case MSR_KVM_ASYNC_PF_EN:
 		data = vcpu->arch.apf.msr_val;
 		break;
@@ -2195,6 +2248,7 @@ int kvm_dev_ioctl_check_extension(long e
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
 	case KVM_CAP_IRQFD_RESAMPLE:
+	case KVM_CAP_USERSPACE_CLOCKSOURCE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -6017,6 +6071,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	kvmclock_uspace_reset(vcpu);
 	kvmclock_reset(vcpu);
 
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
Index: vsyscall/arch/x86/kvm/cpuid.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/cpuid.c
+++ vsyscall/arch/x86/kvm/cpuid.c
@@ -411,7 +411,9 @@ static int do_cpuid_ent(struct kvm_cpuid
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
 			     (1 << KVM_FEATURE_ASYNC_PF) |
 			     (1 << KVM_FEATURE_PV_EOI) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+			     (1 << KVM_FEATURE_USERSPACE_CLOCKSOURCE);
+
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
Index: vsyscall/include/uapi/linux/kvm.h
===================================================================
--- vsyscall.orig/include/uapi/linux/kvm.h
+++ vsyscall/include/uapi/linux/kvm.h
@@ -626,6 +626,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_READONLY_MEM 81
 #endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_USERSPACE_CLOCKSOURCE 83
 
 #ifdef KVM_CAP_IRQ_ROUTING
 



  parent reply	other threads:[~2012-10-24 13:16 UTC|newest]

Thread overview: 94+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-24 13:13 [patch 00/18] pvclock vsyscall support + KVM hypervisor support (v2) Marcelo Tosatti
2012-10-24 13:13 ` [patch 01/18] KVM: x86: retain pvclock guest stopped bit in guest memory Marcelo Tosatti
2012-10-24 13:13 ` [patch 02/18] x86: pvclock: make sure rdtsc doesnt speculate out of region Marcelo Tosatti
2012-10-24 13:13 ` [patch 03/18] x86: pvclock: remove pvclock_shadow_time Marcelo Tosatti
2012-10-30  9:23   ` Avi Kivity
2012-10-30  9:24     ` Avi Kivity
2012-10-24 13:13 ` [patch 04/18] x86: pvclock: create helper for pvclock data retrieval Marcelo Tosatti
2012-10-24 13:13 ` [patch 05/18] x86: pvclock: fix flags usage race Marcelo Tosatti
2012-10-24 13:13 ` [patch 06/18] x86: pvclock: introduce helper to read flags Marcelo Tosatti
2012-10-24 13:13 ` [patch 07/18] sched: add notifier for cross-cpu migrations Marcelo Tosatti
2012-10-24 13:13 ` [patch 08/18] x86: pvclock: generic pvclock vsyscall initialization Marcelo Tosatti
2012-10-29 14:18   ` Glauber Costa
2012-10-29 14:54     ` Marcelo Tosatti
2012-10-29 17:46       ` Jeremy Fitzhardinge
2012-10-29 14:39   ` Glauber Costa
2012-10-24 13:13 ` Marcelo Tosatti [this message]
2012-10-29 14:45   ` [patch 09/18] KVM: x86: introduce facility to support vsyscall pvclock, via MSR Glauber Costa
2012-10-29 17:44     ` Jeremy Fitzhardinge
2012-10-29 18:40       ` Marcelo Tosatti
2012-10-30  7:41         ` Glauber Costa
2012-10-30  9:39         ` Avi Kivity
2012-10-31  3:12           ` Marcelo Tosatti
2012-11-02 10:21             ` Glauber Costa
2012-10-30  7:38       ` Glauber Costa
2012-10-24 13:13 ` [patch 10/18] x86: kvm guest: pvclock vsyscall support Marcelo Tosatti
2012-10-24 13:13 ` [patch 11/18] x86: vsyscall: pass mode to gettime backend Marcelo Tosatti
2012-10-29 14:47   ` Glauber Costa
2012-10-29 18:41     ` Marcelo Tosatti
2012-10-30  7:42       ` Glauber Costa
2012-10-24 13:13 ` [patch 12/18] x86: vdso: pvclock gettime support Marcelo Tosatti
2012-10-29 14:59   ` Glauber Costa
2012-10-29 18:42     ` Marcelo Tosatti
2012-10-30  7:49       ` Glauber Costa
2012-10-31  3:16         ` Marcelo Tosatti
2012-10-24 13:13 ` [patch 13/18] KVM: x86: pass host_tsc to read_l1_tsc Marcelo Tosatti
2012-10-29 15:04   ` Glauber Costa
2012-10-29 18:45     ` Marcelo Tosatti
2012-10-30  7:55       ` Glauber Costa
2012-10-24 13:13 ` [patch 14/18] time: export time information for KVM pvclock Marcelo Tosatti
2012-11-10  1:02   ` John Stultz
2012-11-13 21:07     ` Marcelo Tosatti
2012-10-24 13:13 ` [patch 15/18] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag Marcelo Tosatti
2012-10-30  8:34   ` Glauber Costa
2012-10-31  3:19     ` [patch 15/18] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag\ Marcelo Tosatti
2012-10-24 13:13 ` [patch 16/18] KVM: x86: notifier for clocksource changes Marcelo Tosatti
2012-10-24 13:13 ` [patch 17/18] KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization Marcelo Tosatti
2012-10-24 13:13 ` [patch 18/18] KVM: x86: require matched TSC offsets for master clock Marcelo Tosatti
2012-10-31 22:46 ` [patch 00/16] pvclock vsyscall support + KVM hypervisor support (v3) Marcelo Tosatti
2012-10-31 22:46   ` [patch 01/16] KVM: x86: retain pvclock guest stopped bit in guest memory Marcelo Tosatti
2012-11-01 10:39     ` Gleb Natapov
2012-11-01 20:51       ` Marcelo Tosatti
2012-11-01 13:44     ` Glauber Costa
2012-10-31 22:46   ` [patch 02/16] x86: pvclock: make sure rdtsc doesnt speculate out of region Marcelo Tosatti
2012-11-01 11:48     ` Gleb Natapov
2012-11-01 13:49       ` Glauber Costa
2012-11-01 13:51         ` Gleb Natapov
2012-11-01 20:56         ` Marcelo Tosatti
2012-11-01 22:13           ` Gleb Natapov
2012-11-01 22:21             ` Marcelo Tosatti
2012-11-02  6:02               ` Gleb Natapov
2012-10-31 22:46   ` [patch 03/16] x86: pvclock: remove pvclock_shadow_time Marcelo Tosatti
2012-11-01 13:52     ` Glauber Costa
2012-10-31 22:47   ` [patch 04/16] x86: pvclock: create helper for pvclock data retrieval Marcelo Tosatti
2012-11-01 14:04     ` Glauber Costa
2012-11-01 20:57       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 05/16] x86: pvclock: introduce helper to read flags Marcelo Tosatti
2012-11-01 14:07     ` Glauber Costa
2012-11-01 21:08       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 06/16] sched: add notifier for cross-cpu migrations Marcelo Tosatti
2012-11-01 14:08     ` Glauber Costa
2012-10-31 22:47   ` [patch 07/16] x86: pvclock: generic pvclock vsyscall initialization Marcelo Tosatti
2012-11-01 14:19     ` Glauber Costa
2012-10-31 22:47   ` [patch 08/16] KVM: x86: introduce facility to support vsyscall pvclock, via MSR Marcelo Tosatti
2012-11-01 14:28     ` Glauber Costa
2012-11-01 21:39       ` Marcelo Tosatti
2012-11-02 10:23         ` Glauber Costa
2012-11-02 13:00           ` Marcelo Tosatti
2012-11-05  8:03             ` Glauber Costa
2012-10-31 22:47   ` [patch 09/16] x86: kvm guest: pvclock vsyscall support Marcelo Tosatti
2012-11-02  9:42     ` Glauber Costa
2012-11-05  8:35       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 10/16] x86: vdso: pvclock gettime support Marcelo Tosatti
2012-11-01 14:41     ` Glauber Costa
2012-11-01 21:42       ` Marcelo Tosatti
2012-11-02  0:33         ` Marcelo Tosatti
2012-11-02 10:25           ` Glauber Costa
2012-11-14 10:42     ` Gleb Natapov
2012-11-14 22:42       ` Marcelo Tosatti
2012-10-31 22:47   ` [patch 11/16] KVM: x86: pass host_tsc to read_l1_tsc Marcelo Tosatti
2012-10-31 22:47   ` [patch 12/16] time: export time information for KVM pvclock Marcelo Tosatti
2012-10-31 22:47   ` [patch 13/16] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag Marcelo Tosatti
2012-10-31 22:47   ` [patch 14/16] KVM: x86: notifier for clocksource changes Marcelo Tosatti
2012-10-31 22:47   ` [patch 15/16] KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization Marcelo Tosatti
2012-10-31 22:47   ` [patch 16/16] KVM: x86: require matched TSC offsets for master clock Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121024131621.707068244@redhat.com \
    --to=mtosatti@redhat.com \
    --cc=avi@redhat.com \
    --cc=gleb@redhat.com \
    --cc=glommer@parallels.com \
    --cc=jeremy@goop.org \
    --cc=johnstul@us.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=zamsden@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.