From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org
Cc: johnstul@us.ibm.com, jeremy@goop.org, glommer@parallels.com,
zamsden@gmail.com, gleb@redhat.com, avi@redhat.com,
pbonzini@redhat.com, Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 08/16] KVM: x86: introduce facility to support vsyscall pvclock, via MSR
Date: Wed, 31 Oct 2012 20:47:04 -0200 [thread overview]
Message-ID: <20121031224824.199331603@redhat.com> (raw)
In-Reply-To: 20121031224656.417434866@redhat.com
[-- Attachment #1: 11-host-add-userspace-time-msr --]
[-- Type: text/plain, Size: 9580 bytes --]
Allow a guest to register a second location for the VCPU time info
structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
This is intended to allow the guest kernel to map this information
into a usermode accessible page, so that usermode can efficiently
calculate system time from the TSC without having to make a syscall.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: vsyscall/arch/x86/include/asm/kvm_para.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_para.h
+++ vsyscall/arch/x86/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
#define KVM_FEATURE_ASYNC_PF 4
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
+#define KVM_FEATURE_USERSPACE_CLOCKSOURCE 7
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -39,6 +40,7 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_USERSPACE_TIME 0x4b564d05
struct kvm_steal_time {
__u64 steal;
Index: vsyscall/Documentation/virtual/kvm/msr.txt
===================================================================
--- vsyscall.orig/Documentation/virtual/kvm/msr.txt
+++ vsyscall/Documentation/virtual/kvm/msr.txt
@@ -125,6 +125,22 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
leaf prior to usage.
+MSR_KVM_USERSPACE_TIME: 0x4b564d05
+
+Allow a guest to register a second location for the VCPU time info
+structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
+This is intended to allow the guest kernel to map this information
+into a usermode accessible page, so that usermode can efficiently
+calculate system time from the TSC without having to make a syscall.
+
+Relationship with master copy (MSR_KVM_SYSTEM_TIME_NEW):
+
+- This MSR must be enabled only when the master is enabled.
+- Disabling updates to the master automatically disables
+updates for this copy.
+
+Availability of this MSR must be checked via bit 7 in 0x4000001 cpuid
+leaf prior to usage.
MSR_KVM_WALL_CLOCK: 0x11
Index: vsyscall/arch/x86/include/asm/kvm_host.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_host.h
+++ vsyscall/arch/x86/include/asm/kvm_host.h
@@ -415,10 +415,13 @@ struct kvm_vcpu_arch {
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
gpa_t time;
+ gpa_t uspace_time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
unsigned int time_offset;
+ unsigned int uspace_time_offset;
struct page *time_page;
+ struct page *uspace_time_page;
/* set guest stopped flag in pvclock flags field */
bool pvclock_set_guest_stopped_request;
Index: vsyscall/arch/x86/kvm/x86.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/x86.c
+++ vsyscall/arch/x86/kvm/x86.c
@@ -809,13 +809,13 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
* kvm-specific. Those are put in the beginning of the list.
*/
-#define KVM_SAVE_MSRS_BEGIN 10
+#define KVM_SAVE_MSRS_BEGIN 11
static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
- MSR_KVM_PV_EOI_EN,
+ MSR_KVM_PV_EOI_EN, MSR_KVM_USERSPACE_TIME,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
@@ -1135,16 +1135,43 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
EXPORT_SYMBOL_GPL(kvm_write_tsc);
+static void kvm_write_pvtime(struct kvm_vcpu *v, struct page *page,
+ unsigned int offset_in_page, gpa_t gpa)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+ void *shared_kaddr;
+ struct pvclock_vcpu_time_info *guest_hv_clock;
+ u8 pvclock_flags;
+
+ shared_kaddr = kmap_atomic(page);
+
+ guest_hv_clock = shared_kaddr + offset_in_page;
+
+ /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+ pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+ if (vcpu->pvclock_set_guest_stopped_request) {
+ pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+ vcpu->pvclock_set_guest_stopped_request = false;
+ }
+
+ vcpu->hv_clock.flags = pvclock_flags;
+
+ memcpy(shared_kaddr + offset_in_page, &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
+
+ kunmap_atomic(shared_kaddr);
+
+ mark_page_dirty(v->kvm, gpa >> PAGE_SHIFT);
+}
+
static int kvm_guest_time_update(struct kvm_vcpu *v)
{
unsigned long flags;
struct kvm_vcpu_arch *vcpu = &v->arch;
- void *shared_kaddr;
unsigned long this_tsc_khz;
s64 kernel_ns, max_kernel_ns;
u64 tsc_timestamp;
- struct pvclock_vcpu_time_info *guest_hv_clock;
- u8 pvclock_flags;
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
@@ -1235,26 +1262,11 @@ static int kvm_guest_time_update(struct
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page);
-
- guest_hv_clock = shared_kaddr + vcpu->time_offset;
-
- /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+ kvm_write_pvtime(v, vcpu->time_page, vcpu->time_offset, vcpu->time);
+ if (vcpu->uspace_time_page)
+ kvm_write_pvtime(v, vcpu->uspace_time_page,
+ vcpu->uspace_time_offset, vcpu->uspace_time);
- if (vcpu->pvclock_set_guest_stopped_request) {
- pvclock_flags |= PVCLOCK_GUEST_STOPPED;
- vcpu->pvclock_set_guest_stopped_request = false;
- }
-
- vcpu->hv_clock.flags = pvclock_flags;
-
- memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
-
- kunmap_atomic(shared_kaddr);
-
- mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
return 0;
}
@@ -1549,6 +1561,15 @@ static void kvmclock_reset(struct kvm_vc
}
}
+static void kvmclock_uspace_reset(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.uspace_time = 0;
+ if (vcpu->arch.uspace_time_page) {
+ kvm_release_page_dirty(vcpu->arch.uspace_time_page);
+ vcpu->arch.uspace_time_page = NULL;
+ }
+}
+
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
{
u64 delta;
@@ -1639,6 +1660,31 @@ int kvm_set_msr_common(struct kvm_vcpu *
vcpu->kvm->arch.wall_clock = data;
kvm_write_wall_clock(vcpu->kvm, data);
break;
+ case MSR_KVM_USERSPACE_TIME: {
+ kvmclock_uspace_reset(vcpu);
+
+ if (!vcpu->arch.time_page && (data & 1))
+ return 1;
+
+ vcpu->arch.uspace_time = data;
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+ /* we verify if the enable bit is set... */
+ if (!(data & 1))
+ break;
+
+ /* ...but clean it before doing the actual write */
+ vcpu->arch.uspace_time_offset = data & ~(PAGE_MASK | 1);
+
+ vcpu->arch.uspace_time_page = gfn_to_page(vcpu->kvm,
+ data >> PAGE_SHIFT);
+
+ if (is_error_page(vcpu->arch.uspace_time_page)) {
+ kvm_release_page_clean(vcpu->arch.uspace_time_page);
+ vcpu->arch.uspace_time_page = NULL;
+ }
+ break;
+ }
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
kvmclock_reset(vcpu);
@@ -1647,8 +1693,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
/* we verify if the enable bit is set... */
- if (!(data & 1))
+ if (!(data & 1)) {
+ kvmclock_uspace_reset(vcpu);
break;
+ }
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
@@ -1656,8 +1704,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
vcpu->arch.time_page =
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
- if (is_error_page(vcpu->arch.time_page))
+ if (is_error_page(vcpu->arch.time_page)) {
vcpu->arch.time_page = NULL;
+ kvmclock_uspace_reset(vcpu);
+ }
break;
}
@@ -2010,6 +2060,9 @@ int kvm_get_msr_common(struct kvm_vcpu *
case MSR_KVM_SYSTEM_TIME_NEW:
data = vcpu->arch.time;
break;
+ case MSR_KVM_USERSPACE_TIME:
+ data = vcpu->arch.uspace_time;
+ break;
case MSR_KVM_ASYNC_PF_EN:
data = vcpu->arch.apf.msr_val;
break;
@@ -2195,6 +2248,7 @@ int kvm_dev_ioctl_check_extension(long e
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_IRQFD_RESAMPLE:
+ case KVM_CAP_USERSPACE_CLOCKSOURCE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -6017,6 +6071,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
+ kvmclock_uspace_reset(vcpu);
kvmclock_reset(vcpu);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
Index: vsyscall/arch/x86/kvm/cpuid.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/cpuid.c
+++ vsyscall/arch/x86/kvm/cpuid.c
@@ -411,7 +411,9 @@ static int do_cpuid_ent(struct kvm_cpuid
(1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
- (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+ (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+ (1 << KVM_FEATURE_USERSPACE_CLOCKSOURCE);
+
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
Index: vsyscall/include/uapi/linux/kvm.h
===================================================================
--- vsyscall.orig/include/uapi/linux/kvm.h
+++ vsyscall/include/uapi/linux/kvm.h
@@ -626,6 +626,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_READONLY_MEM 81
#endif
#define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_USERSPACE_CLOCKSOURCE 83
#ifdef KVM_CAP_IRQ_ROUTING
next prev parent reply other threads:[~2012-10-31 22:47 UTC|newest]
Thread overview: 94+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-10-24 13:13 [patch 00/18] pvclock vsyscall support + KVM hypervisor support (v2) Marcelo Tosatti
2012-10-24 13:13 ` [patch 01/18] KVM: x86: retain pvclock guest stopped bit in guest memory Marcelo Tosatti
2012-10-24 13:13 ` [patch 02/18] x86: pvclock: make sure rdtsc doesnt speculate out of region Marcelo Tosatti
2012-10-24 13:13 ` [patch 03/18] x86: pvclock: remove pvclock_shadow_time Marcelo Tosatti
2012-10-30 9:23 ` Avi Kivity
2012-10-30 9:24 ` Avi Kivity
2012-10-24 13:13 ` [patch 04/18] x86: pvclock: create helper for pvclock data retrieval Marcelo Tosatti
2012-10-24 13:13 ` [patch 05/18] x86: pvclock: fix flags usage race Marcelo Tosatti
2012-10-24 13:13 ` [patch 06/18] x86: pvclock: introduce helper to read flags Marcelo Tosatti
2012-10-24 13:13 ` [patch 07/18] sched: add notifier for cross-cpu migrations Marcelo Tosatti
2012-10-24 13:13 ` [patch 08/18] x86: pvclock: generic pvclock vsyscall initialization Marcelo Tosatti
2012-10-29 14:18 ` Glauber Costa
2012-10-29 14:54 ` Marcelo Tosatti
2012-10-29 17:46 ` Jeremy Fitzhardinge
2012-10-29 14:39 ` Glauber Costa
2012-10-24 13:13 ` [patch 09/18] KVM: x86: introduce facility to support vsyscall pvclock, via MSR Marcelo Tosatti
2012-10-29 14:45 ` Glauber Costa
2012-10-29 17:44 ` Jeremy Fitzhardinge
2012-10-29 18:40 ` Marcelo Tosatti
2012-10-30 7:41 ` Glauber Costa
2012-10-30 9:39 ` Avi Kivity
2012-10-31 3:12 ` Marcelo Tosatti
2012-11-02 10:21 ` Glauber Costa
2012-10-30 7:38 ` Glauber Costa
2012-10-24 13:13 ` [patch 10/18] x86: kvm guest: pvclock vsyscall support Marcelo Tosatti
2012-10-24 13:13 ` [patch 11/18] x86: vsyscall: pass mode to gettime backend Marcelo Tosatti
2012-10-29 14:47 ` Glauber Costa
2012-10-29 18:41 ` Marcelo Tosatti
2012-10-30 7:42 ` Glauber Costa
2012-10-24 13:13 ` [patch 12/18] x86: vdso: pvclock gettime support Marcelo Tosatti
2012-10-29 14:59 ` Glauber Costa
2012-10-29 18:42 ` Marcelo Tosatti
2012-10-30 7:49 ` Glauber Costa
2012-10-31 3:16 ` Marcelo Tosatti
2012-10-24 13:13 ` [patch 13/18] KVM: x86: pass host_tsc to read_l1_tsc Marcelo Tosatti
2012-10-29 15:04 ` Glauber Costa
2012-10-29 18:45 ` Marcelo Tosatti
2012-10-30 7:55 ` Glauber Costa
2012-10-24 13:13 ` [patch 14/18] time: export time information for KVM pvclock Marcelo Tosatti
2012-11-10 1:02 ` John Stultz
2012-11-13 21:07 ` Marcelo Tosatti
2012-10-24 13:13 ` [patch 15/18] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag Marcelo Tosatti
2012-10-30 8:34 ` Glauber Costa
2012-10-31 3:19 ` [patch 15/18] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag\ Marcelo Tosatti
2012-10-24 13:13 ` [patch 16/18] KVM: x86: notifier for clocksource changes Marcelo Tosatti
2012-10-24 13:13 ` [patch 17/18] KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization Marcelo Tosatti
2012-10-24 13:13 ` [patch 18/18] KVM: x86: require matched TSC offsets for master clock Marcelo Tosatti
2012-10-31 22:46 ` [patch 00/16] pvclock vsyscall support + KVM hypervisor support (v3) Marcelo Tosatti
2012-10-31 22:46 ` [patch 01/16] KVM: x86: retain pvclock guest stopped bit in guest memory Marcelo Tosatti
2012-11-01 10:39 ` Gleb Natapov
2012-11-01 20:51 ` Marcelo Tosatti
2012-11-01 13:44 ` Glauber Costa
2012-10-31 22:46 ` [patch 02/16] x86: pvclock: make sure rdtsc doesnt speculate out of region Marcelo Tosatti
2012-11-01 11:48 ` Gleb Natapov
2012-11-01 13:49 ` Glauber Costa
2012-11-01 13:51 ` Gleb Natapov
2012-11-01 20:56 ` Marcelo Tosatti
2012-11-01 22:13 ` Gleb Natapov
2012-11-01 22:21 ` Marcelo Tosatti
2012-11-02 6:02 ` Gleb Natapov
2012-10-31 22:46 ` [patch 03/16] x86: pvclock: remove pvclock_shadow_time Marcelo Tosatti
2012-11-01 13:52 ` Glauber Costa
2012-10-31 22:47 ` [patch 04/16] x86: pvclock: create helper for pvclock data retrieval Marcelo Tosatti
2012-11-01 14:04 ` Glauber Costa
2012-11-01 20:57 ` Marcelo Tosatti
2012-10-31 22:47 ` [patch 05/16] x86: pvclock: introduce helper to read flags Marcelo Tosatti
2012-11-01 14:07 ` Glauber Costa
2012-11-01 21:08 ` Marcelo Tosatti
2012-10-31 22:47 ` [patch 06/16] sched: add notifier for cross-cpu migrations Marcelo Tosatti
2012-11-01 14:08 ` Glauber Costa
2012-10-31 22:47 ` [patch 07/16] x86: pvclock: generic pvclock vsyscall initialization Marcelo Tosatti
2012-11-01 14:19 ` Glauber Costa
2012-10-31 22:47 ` Marcelo Tosatti [this message]
2012-11-01 14:28 ` [patch 08/16] KVM: x86: introduce facility to support vsyscall pvclock, via MSR Glauber Costa
2012-11-01 21:39 ` Marcelo Tosatti
2012-11-02 10:23 ` Glauber Costa
2012-11-02 13:00 ` Marcelo Tosatti
2012-11-05 8:03 ` Glauber Costa
2012-10-31 22:47 ` [patch 09/16] x86: kvm guest: pvclock vsyscall support Marcelo Tosatti
2012-11-02 9:42 ` Glauber Costa
2012-11-05 8:35 ` Marcelo Tosatti
2012-10-31 22:47 ` [patch 10/16] x86: vdso: pvclock gettime support Marcelo Tosatti
2012-11-01 14:41 ` Glauber Costa
2012-11-01 21:42 ` Marcelo Tosatti
2012-11-02 0:33 ` Marcelo Tosatti
2012-11-02 10:25 ` Glauber Costa
2012-11-14 10:42 ` Gleb Natapov
2012-11-14 22:42 ` Marcelo Tosatti
2012-10-31 22:47 ` [patch 11/16] KVM: x86: pass host_tsc to read_l1_tsc Marcelo Tosatti
2012-10-31 22:47 ` [patch 12/16] time: export time information for KVM pvclock Marcelo Tosatti
2012-10-31 22:47 ` [patch 13/16] KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag Marcelo Tosatti
2012-10-31 22:47 ` [patch 14/16] KVM: x86: notifier for clocksource changes Marcelo Tosatti
2012-10-31 22:47 ` [patch 15/16] KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization Marcelo Tosatti
2012-10-31 22:47 ` [patch 16/16] KVM: x86: require matched TSC offsets for master clock Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20121031224824.199331603@redhat.com \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=gleb@redhat.com \
--cc=glommer@parallels.com \
--cc=jeremy@goop.org \
--cc=johnstul@us.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=zamsden@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).