All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Allow guest to register a secondary vcpu_time_info structure
@ 2009-10-06  0:10 Jeremy Fitzhardinge
  2009-10-06  7:10 ` Keir Fraser
  2009-10-06  9:26 ` Jan Beulich
  0 siblings, 2 replies; 5+ messages in thread
From: Jeremy Fitzhardinge @ 2009-10-06  0:10 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Dan Magenheimer, Xen-devel

Allow a guest to register a second location for the VCPU time info
structure for each vcpu.  This is intended to allow the guest kernel to
map this information into a usermode accessible page, so that usermode
can efficiently calculate system time from the TSC without having to
make a syscall.

The second vcpu_time_info structure is updated by copy, rather than
being a shared page between the guest and Xen.  It is not directly
updated by copy; instead, Xen preserves and increments the existing
version number in place.  This allows the guest to also update the
version number (useful to indicate vcpu context switches to usermode). 
This assumes that the guest will only ever update the structure for a
given vcpu on that vcpu (as Xen does, so there are never any cross-cpu
accesses).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff -r 6472342c8ab0 -r 1dc86d83b352 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Wed Sep 30 08:51:21 2009 +0100
+++ b/xen/arch/x86/domain.c	Fri Oct 02 17:01:26 2009 -0700
@@ -964,6 +964,24 @@
         break;
     }
 
+    case VCPUOP_register_vcpu_time_memory_area:
+    {
+        struct vcpu_register_time_memory_area area;
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+            break;
+
+        if ( !guest_handle_okay(area.addr.h, 1) )
+            break;
+
+        rc = 0;
+        v->time_info_guest = area.addr.h;
+
+        __update_vcpu_system_time(v, 1);
+
+        break;
+    }
+
     case VCPUOP_get_physid:
     {
         struct vcpu_get_physid cpu_id;
diff -r 6472342c8ab0 -r 1dc86d83b352 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Wed Sep 30 08:51:21 2009 +0100
+++ b/xen/arch/x86/time.c	Fri Oct 02 17:01:26 2009 -0700
@@ -22,6 +22,7 @@
 #include <xen/irq.h>
 #include <xen/softirq.h>
 #include <xen/keyhandler.h>
+#include <xen/guest_access.h>
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/mpspec.h>
@@ -826,7 +827,31 @@
     (*version)++;
 }
 
-void update_vcpu_system_time(struct vcpu *v)
+static void update_guest_time_info(struct vcpu *v, struct vcpu_time_info *u)
+{
+    struct vcpu_time_info info;
+    u32 version;
+
+    if ( guest_handle_is_null(v->time_info_guest) )
+        return;
+
+    if ( __copy_from_guest(&info, v->time_info_guest, 1) )
+        return;
+
+    /*
+     * Update the guest copy of the time info.  We need to make sure
+     * we update the guest's version of the version number rather than
+     * use a verbtim copy of the master one, because the guest may
+     * update the version for its own purposes.
+     */
+    version = info.version;
+    info = *u;
+    info.version = (version + 2) & ~1;
+
+    __copy_to_guest(v->time_info_guest, &info, 1);
+}
+
+void __update_vcpu_system_time(struct vcpu *v, int force)
 {
     struct cpu_time       *t;
     struct vcpu_time_info *u;
@@ -837,7 +862,7 @@
     t = &this_cpu(cpu_time);
     u = &vcpu_info(v, time);
 
-    if ( u->tsc_timestamp == t->local_tsc_stamp )
+    if ( !force && u->tsc_timestamp == t->local_tsc_stamp )
         return;
 
     version_update_begin(&u->version);
@@ -848,6 +873,13 @@
     u->tsc_shift         = (s8)t->tsc_scale.shift;
 
     version_update_end(&u->version);
+
+    update_guest_time_info(v, u);
+}
+
+void update_vcpu_system_time(struct vcpu *v)
+{
+    __update_vcpu_system_time(v, 0);
 }
 
 void update_domain_wallclock_time(struct domain *d)
diff -r 6472342c8ab0 -r 1dc86d83b352 xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h	Wed Sep 30 08:51:21 2009 +0100
+++ b/xen/include/public/vcpu.h	Fri Oct 02 17:01:26 2009 -0700
@@ -202,6 +202,49 @@
 #define xen_vcpu_physid_to_x86_acpiid(physid) \
     ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32)))
 
+/* 
+ * Register a memory location to get a secondary copy of the vcpu time
+ * parameters.  The master copy still exists as part of the vcpu
+ * shared memory area, and this secondary copy is updated whenever the
+ * master copy is updated.
+ *
+ * The intent is that this copy may be mapped (RO) into userspace so
+ * that usermode can compute system time using the time info and the
+ * tsc.  Usermode will see an array of vcpu_time_info structures, one
+ * for each vcpu, and choose the right one by an existing mechanism
+ * which allows it to get the current vcpu number (such as via a
+ * segment limit).  It can then apply the normal algorithm to compute
+ * system time from the tsc.
+ *
+ * However, because usermode threads are subject to two levels of
+ * scheduling (kernel scheduling of threads to vcpus, and Xen
+ * scheduling of vcpus to pcpus), we must make sure that the thread
+ * knows it has had a race with either (or both) of these two events.
+ * To allow this, the guest kernel updates the time_info version
+ * number when the vcpu does a context switch, so that usermode will
+ * always see a version number change when the parameters need to be
+ * revalidated.  Xen makes sure that it always updates the guest's
+ * version rather than overwriting it.  (It assumes that a vcpu will
+ * always update its own version number, so there are no cross-cpu
+ * synchronization issues; the only concern is that if the guest
+ * kernel gets preempted by Xen it doesn't revert the version number
+ * to an older value.)
+ *
+ * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
+ */
+#define VCPUOP_register_vcpu_time_memory_area   13
+
+DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t);
+struct vcpu_register_time_memory_area {
+    union {
+        XEN_GUEST_HANDLE(vcpu_time_info_t) h;
+        struct vcpu_time_info *v;
+        uint64_t p;
+    } addr;
+};
+typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff -r 6472342c8ab0 -r 1dc86d83b352 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h	Wed Sep 30 08:51:21 2009 +0100
+++ b/xen/include/xen/sched.h	Fri Oct 02 17:01:26 2009 -0700
@@ -102,6 +102,9 @@
     } runstate_guest; /* guest address */
 #endif
 
+    /* A secondary copy of the vcpu time info */
+    XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
diff -r 6472342c8ab0 -r 1dc86d83b352 xen/include/xen/time.h
--- a/xen/include/xen/time.h	Wed Sep 30 08:51:21 2009 +0100
+++ b/xen/include/xen/time.h	Fri Oct 02 17:01:26 2009 -0700
@@ -54,6 +54,7 @@
 #define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
 
 extern void update_vcpu_system_time(struct vcpu *v);
+extern void __update_vcpu_system_time(struct vcpu *v, int force);
 extern void update_domain_wallclock_time(struct domain *d);
 
 extern void do_settime(

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Allow guest to register a secondary vcpu_time_info structure
  2009-10-06  0:10 [PATCH] Allow guest to register a secondary vcpu_time_info structure Jeremy Fitzhardinge
@ 2009-10-06  7:10 ` Keir Fraser
  2009-10-06 16:50   ` Jeremy Fitzhardinge
  2009-10-06  9:26 ` Jan Beulich
  1 sibling, 1 reply; 5+ messages in thread
From: Keir Fraser @ 2009-10-06  7:10 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Dan Magenheimer, Xen-devel

On 06/10/2009 01:10, "Jeremy Fitzhardinge" <jeremy@goop.org> wrote:

> The second vcpu_time_info structure is updated by copy, rather than
> being a shared page between the guest and Xen.  It is not directly
> updated by copy; instead, Xen preserves and increments the existing
> version number in place.  This allows the guest to also update the
> version number (useful to indicate vcpu context switches to usermode).
> This assumes that the guest will only ever update the structure for a
> given vcpu on that vcpu (as Xen does, so there are never any cross-cpu
> accesses).

How do you feel about the 32-bit version number? We have space to easily
expand it to 64 bits, and now would be the time to do it while we're
extending the ABI.

 -- Keir

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Allow guest to register a secondary vcpu_time_info structure
  2009-10-06  0:10 [PATCH] Allow guest to register a secondary vcpu_time_info structure Jeremy Fitzhardinge
  2009-10-06  7:10 ` Keir Fraser
@ 2009-10-06  9:26 ` Jan Beulich
  2009-10-06 16:50   ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2009-10-06  9:26 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Dan Magenheimer, Xen-devel, Keir Fraser

>>> Jeremy Fitzhardinge <jeremy@goop.org> 06.10.09 02:10 >>>
>-void update_vcpu_system_time(struct vcpu *v)
>+static void update_guest_time_info(struct vcpu *v, struct vcpu_time_info *u)
>+{
>+    struct vcpu_time_info info;
>+    u32 version;
>+
>+    if ( guest_handle_is_null(v->time_info_guest) )
>+        return;
>+
>+    if ( __copy_from_guest(&info, v->time_info_guest, 1) )
>+        return;

I'd suggest using __copy_field_from_guest() here to get just the
version member, after copying *u into info.

>+
>+    /*
>+     * Update the guest copy of the time info.  We need to make sure
>+     * we update the guest's version of the version number rather than
>+     * use a verbtim copy of the master one, because the guest may
>+     * update the version for its own purposes.
>+     */
>+    version = info.version;
>+    info = *u;
>+    info.version = (version + 2) & ~1;
>+
>+    __copy_to_guest(v->time_info_guest, &info, 1);
>+}

Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Allow guest to register a secondary vcpu_time_info structure
  2009-10-06  7:10 ` Keir Fraser
@ 2009-10-06 16:50   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 5+ messages in thread
From: Jeremy Fitzhardinge @ 2009-10-06 16:50 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Dan Magenheimer, Xen-devel

On 10/06/09 00:10, Keir Fraser wrote:
> On 06/10/2009 01:10, "Jeremy Fitzhardinge" <jeremy@goop.org> wrote:
>
>   
>> The second vcpu_time_info structure is updated by copy, rather than
>> being a shared page between the guest and Xen.  It is not directly
>> updated by copy; instead, Xen preserves and increments the existing
>> version number in place.  This allows the guest to also update the
>> version number (useful to indicate vcpu context switches to usermode).
>> This assumes that the guest will only ever update the structure for a
>> given vcpu on that vcpu (as Xen does, so there are never any cross-cpu
>> accesses).
>>     
> How do you feel about the 32-bit version number? We have space to easily
> expand it to 64 bits, and now would be the time to do it while we're
> extending the ABI.
>   

I think 32 bit (well, 31) is fine:

The version number doesn't increment that often, so I think it would
take a long time to wrap.  The only failure mode is if the version wraps
and gets an aliased value between the "get version" and "re-test
version" phases of the algorithm, which is only a few tens of
instructions at most.  I would think it unlikely the version increments
once in that period, let alone 2^31 times.  As far as I know, nobody is
keeping long-term copies of the time info.

(One unlikely scenario is that you have usermode in the middle of a
vsyscall gettimeofday when the domain gets suspended/migrated and it is
suspended for some long period of time.  But this is OK because the
version gets saved along with the rest of the domain and incremented
from there.  I'm not sure if the same applies to the shared_info
version, but that's less of a concern because the kernel will never
preempt itself doing a clocksource_read to suspend.)

Aside from that, this isn't really an ABI extension.  The registration
of the secondary vcpu_time_info is a new ABI, but from there it is
identical to the existing pvclock ABI; I use the same code for both the
kernel and in the vsyscall page, so making them different would be
awkward (of course I could change the kernel to use the "secondary" time
values too, but it would still need to support old hypervisors so it
wouldn't save anything).  KVM is also using this ABI, so I wouldn't want
to introduce an arbitrary ABI change.

Also, a >32 bit version field would be hard for 32 bit guests to deal
with; it's not easy to either read or update its value in a single
instruction. (32 bit Xen too, but I guess we're less concerned about that.)

    J

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Allow guest to register a secondary  vcpu_time_info structure
  2009-10-06  9:26 ` Jan Beulich
@ 2009-10-06 16:50   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 5+ messages in thread
From: Jeremy Fitzhardinge @ 2009-10-06 16:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Dan Magenheimer, Xen-devel, Keir Fraser

On 10/06/09 02:26, Jan Beulich wrote:
>>>> Jeremy Fitzhardinge <jeremy@goop.org> 06.10.09 02:10 >>>
>>>>         
>> -void update_vcpu_system_time(struct vcpu *v)
>> +static void update_guest_time_info(struct vcpu *v, struct vcpu_time_info *u)
>> +{
>> +    struct vcpu_time_info info;
>> +    u32 version;
>> +
>> +    if ( guest_handle_is_null(v->time_info_guest) )
>> +        return;
>> +
>> +    if ( __copy_from_guest(&info, v->time_info_guest, 1) )
>> +        return;
>>     
> I'd suggest using __copy_field_from_guest() here to get just the
> version member, after copying *u into info.
>   

Good idea.  I'll update the patch.

    J

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-10-06 16:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-06  0:10 [PATCH] Allow guest to register a secondary vcpu_time_info structure Jeremy Fitzhardinge
2009-10-06  7:10 ` Keir Fraser
2009-10-06 16:50   ` Jeremy Fitzhardinge
2009-10-06  9:26 ` Jan Beulich
2009-10-06 16:50   ` Jeremy Fitzhardinge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.