From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH] x86: don't drop guest visible state updates when 64-bit PV guest is in user mode Date: Tue, 21 Jan 2014 16:55:57 +0000 Message-ID: <52DEA69D.4010102@citrix.com> References: <52DEA16B02000078001156E8@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============2147995466282644967==" Return-path: Received: from mail6.bemta4.messagelabs.com ([85.158.143.247]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1W5ecN-0006Ee-K7 for xen-devel@lists.xenproject.org; Tue, 21 Jan 2014 16:56:03 +0000 In-Reply-To: <52DEA16B02000078001156E8@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich , xen-devel Cc: Keir Fraser List-Id: xen-devel@lists.xenproject.org --===============2147995466282644967== Content-Type: multipart/alternative; boundary="------------000303050600060402060603" --------------000303050600060402060603 Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit On 21/01/2014 15:33, Jan Beulich wrote: > Since 64-bit PV uses separate kernel and user mode page tables, kernel > addresses (as usually provided via VCPUOP_register_runstate_memory_area > and possibly via VCPUOP_register_vcpu_time_memory_area) aren't > necessarily accessible when the respective updating occurs. Add logic > for toggle_guest_mode() to take care of this (if necessary) the next > time the vCPU switches to kernel mode. > > Signed-off-by: Jan Beulich > > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1323,10 +1323,10 @@ static void paravirt_ctxt_switch_to(stru > } > > /* Update per-VCPU guest runstate shared memory area (if registered). */ > -static void update_runstate_area(struct vcpu *v) > +bool_t update_runstate_area(const struct vcpu *v) Can you adjust the comment to indicate what the return value means. The logic is quite opaque, but I believe it is "true if the runstate has been suitably updated". > { > if ( guest_handle_is_null(runstate_guest(v)) ) > - return; > + return 1; > > if ( has_32bit_shinfo(v->domain) ) > { > @@ -1334,10 +1334,18 @@ static void update_runstate_area(struct > > XLAT_vcpu_runstate_info(&info, &v->runstate); > __copy_to_guest(v->runstate_guest.compat, &info, 1); > - return; > + return 1; > } > > - __copy_to_guest(runstate_guest(v), &v->runstate, 1); > + return __copy_to_guest(runstate_guest(v), &v->runstate, 1) != > + sizeof(v->runstate); > +} > + > +static void _update_runstate_area(struct vcpu *v) > +{ > + if ( !update_runstate_area(v) && is_pv_vcpu(v) && > + !(v->arch.flags & TF_kernel_mode) ) > + v->arch.pv_vcpu.need_update_runstate_area = 1; > } > > static inline int need_full_gdt(struct vcpu *v) > @@ -1443,8 +1451,8 @@ void context_switch(struct vcpu *prev, s > flush_tlb_mask(&dirty_mask); > } > > - if (prev != next) > - update_runstate_area(prev); > + if ( prev != next ) > + _update_runstate_area(prev); > > if ( is_hvm_vcpu(prev) ) > { > @@ -1497,8 +1505,8 @@ void context_switch(struct vcpu *prev, s > > context_saved(prev); > > - if (prev != next) > - update_runstate_area(next); > + if ( prev != next ) > + _update_runstate_area(next); > > /* Ensure that the vcpu has an up-to-date time base. */ > update_vcpu_system_time(next); > --- a/xen/arch/x86/time.c > +++ b/xen/arch/x86/time.c > @@ -740,7 +740,6 @@ static void __update_vcpu_system_time(st > { > struct cpu_time *t; > struct vcpu_time_info *u, _u; > - XEN_GUEST_HANDLE(vcpu_time_info_t) user_u; > struct domain *d = v->domain; > s_time_t tsc_stamp = 0; > > @@ -805,19 +804,31 @@ static void __update_vcpu_system_time(st > /* 3. Update guest kernel version. */ > u->version = version_update_end(u->version); > > - user_u = v->arch.time_info_guest; > - if ( !guest_handle_is_null(user_u) ) > - { > - /* 1. Update userspace version. */ > - __copy_field_to_guest(user_u, &_u, version); > - wmb(); > - /* 2. Update all other userspavce fields. */ > - __copy_to_guest(user_u, &_u, 1); > - wmb(); > - /* 3. Update userspace version. */ > - _u.version = version_update_end(_u.version); > - __copy_field_to_guest(user_u, &_u, version); > - } > + if ( !update_secondary_system_time(v, &_u) && is_pv_domain(d) && > + !is_pv_32bit_domain(d) && !(v->arch.flags & TF_kernel_mode) ) > + v->arch.pv_vcpu.pending_system_time = _u; > +} > + > +bool_t update_secondary_system_time(const struct vcpu *v, > + struct vcpu_time_info *u) > +{ > + XEN_GUEST_HANDLE(vcpu_time_info_t) user_u = v->arch.time_info_guest; > + > + if ( guest_handle_is_null(user_u) ) > + return 1; > + > + /* 1. Update userspace version. */ > + if ( __copy_field_to_guest(user_u, u, version) == sizeof(u->version) ) > + return 0; > + wmb(); > + /* 2. Update all other userspace fields. */ > + __copy_to_guest(user_u, u, 1); > + wmb(); > + /* 3. Update userspace version. */ > + u->version = version_update_end(u->version); > + __copy_field_to_guest(user_u, u, version); > + > + return 1; > } > > void update_vcpu_system_time(struct vcpu *v) > --- a/xen/arch/x86/x86_64/traps.c > +++ b/xen/arch/x86/x86_64/traps.c > @@ -266,6 +266,18 @@ void toggle_guest_mode(struct vcpu *v) > #else > write_ptbase(v); > #endif > + > + if ( !(v->arch.flags & TF_kernel_mode) ) > + return; > + > + if ( v->arch.pv_vcpu.need_update_runstate_area && > + update_runstate_area(v) ) > + v->arch.pv_vcpu.need_update_runstate_area = 0; > + > + if ( v->arch.pv_vcpu.pending_system_time.version && > + update_secondary_system_time(v, > + &v->arch.pv_vcpu.pending_system_time) ) > + v->arch.pv_vcpu.pending_system_time.version = 0; What would happen now if a guest kernel loads a faulting address for its runstate info (or edits its pagetables so a previously good address now faults)? It appears as if we could retry writing to the same bad address each time we try to toggle into kernel mode. Given that we know we are running on the correct set of pagetables, I think both of these pending variable can be unconditionally cleared, whether or not update{runstate_area,secondary_system_time} succeed or fail. ~Andrew > } > > unsigned long do_iret(void) > --- a/xen/include/asm-x86/domain.h > +++ b/xen/include/asm-x86/domain.h > @@ -373,6 +373,10 @@ struct pv_vcpu > /* Current LDT details. */ > unsigned long shadow_ldt_mapcnt; > spinlock_t shadow_ldt_lock; > + > + /* Deferred VA-based update state. */ > + bool_t need_update_runstate_area; > + struct vcpu_time_info pending_system_time; > }; > > struct arch_vcpu > @@ -445,6 +449,10 @@ struct arch_vcpu > #define hvm_vmx hvm_vcpu.u.vmx > #define hvm_svm hvm_vcpu.u.svm > > +bool_t update_runstate_area(const struct vcpu *); > +bool_t update_secondary_system_time(const struct vcpu *, > + struct vcpu_time_info *); > + > void vcpu_show_execution_state(struct vcpu *); > void vcpu_show_registers(const struct vcpu *); > > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel --------------000303050600060402060603 Content-Type: text/html; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit
On 21/01/2014 15:33, Jan Beulich wrote:
Since 64-bit PV uses separate kernel and user mode page tables, kernel
addresses (as usually provided via VCPUOP_register_runstate_memory_area
and possibly via VCPUOP_register_vcpu_time_memory_area) aren't
necessarily accessible when the respective updating occurs. Add logic
for toggle_guest_mode() to take care of this (if necessary) the next
time the vCPU switches to kernel mode.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1323,10 +1323,10 @@ static void paravirt_ctxt_switch_to(stru
 }
 
 /* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+bool_t update_runstate_area(const struct vcpu *v)

Can you adjust the comment to indicate what the return value means.  The logic is quite opaque, but I believe it is "true if the runstate has been suitably updated".

 {
     if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+        return 1;
 
     if ( has_32bit_shinfo(v->domain) )
     {
@@ -1334,10 +1334,18 @@ static void update_runstate_area(struct 
 
         XLAT_vcpu_runstate_info(&info, &v->runstate);
         __copy_to_guest(v->runstate_guest.compat, &info, 1);
-        return;
+        return 1;
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    return __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
+           sizeof(v->runstate);
+}
+
+static void _update_runstate_area(struct vcpu *v)
+{
+    if ( !update_runstate_area(v) && is_pv_vcpu(v) &&
+         !(v->arch.flags & TF_kernel_mode) )
+        v->arch.pv_vcpu.need_update_runstate_area = 1;
 }
 
 static inline int need_full_gdt(struct vcpu *v)
@@ -1443,8 +1451,8 @@ void context_switch(struct vcpu *prev, s
         flush_tlb_mask(&dirty_mask);
     }
 
-    if (prev != next)
-        update_runstate_area(prev);
+    if ( prev != next )
+        _update_runstate_area(prev);
 
     if ( is_hvm_vcpu(prev) )
     {
@@ -1497,8 +1505,8 @@ void context_switch(struct vcpu *prev, s
 
     context_saved(prev);
 
-    if (prev != next)
-        update_runstate_area(next);
+    if ( prev != next )
+        _update_runstate_area(next);
 
     /* Ensure that the vcpu has an up-to-date time base. */
     update_vcpu_system_time(next);
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -740,7 +740,6 @@ static void __update_vcpu_system_time(st
 {
     struct cpu_time       *t;
     struct vcpu_time_info *u, _u;
-    XEN_GUEST_HANDLE(vcpu_time_info_t) user_u;
     struct domain *d = v->domain;
     s_time_t tsc_stamp = 0;
 
@@ -805,19 +804,31 @@ static void __update_vcpu_system_time(st
     /* 3. Update guest kernel version. */
     u->version = version_update_end(u->version);
 
-    user_u = v->arch.time_info_guest;
-    if ( !guest_handle_is_null(user_u) )
-    {
-        /* 1. Update userspace version. */
-        __copy_field_to_guest(user_u, &_u, version);
-        wmb();
-        /* 2. Update all other userspavce fields. */
-        __copy_to_guest(user_u, &_u, 1);
-        wmb();
-        /* 3. Update userspace version. */
-        _u.version = version_update_end(_u.version);
-        __copy_field_to_guest(user_u, &_u, version);
-    }
+    if ( !update_secondary_system_time(v, &_u) && is_pv_domain(d) &&
+         !is_pv_32bit_domain(d) && !(v->arch.flags & TF_kernel_mode) )
+        v->arch.pv_vcpu.pending_system_time = _u;
+}
+
+bool_t update_secondary_system_time(const struct vcpu *v,
+                                    struct vcpu_time_info *u)
+{
+    XEN_GUEST_HANDLE(vcpu_time_info_t) user_u = v->arch.time_info_guest;
+
+    if ( guest_handle_is_null(user_u) )
+        return 1;
+
+    /* 1. Update userspace version. */
+    if ( __copy_field_to_guest(user_u, u, version) == sizeof(u->version) )
+        return 0;
+    wmb();
+    /* 2. Update all other userspace fields. */
+    __copy_to_guest(user_u, u, 1);
+    wmb();
+    /* 3. Update userspace version. */
+    u->version = version_update_end(u->version);
+    __copy_field_to_guest(user_u, u, version);
+
+    return 1;
 }
 
 void update_vcpu_system_time(struct vcpu *v)
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -266,6 +266,18 @@ void toggle_guest_mode(struct vcpu *v)
 #else
     write_ptbase(v);
 #endif
+
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        return;
+
+    if ( v->arch.pv_vcpu.need_update_runstate_area &&
+         update_runstate_area(v) )
+        v->arch.pv_vcpu.need_update_runstate_area = 0;
+
+    if ( v->arch.pv_vcpu.pending_system_time.version &&
+         update_secondary_system_time(v,
+                                      &v->arch.pv_vcpu.pending_system_time) )
+        v->arch.pv_vcpu.pending_system_time.version = 0;

What would happen now if a guest kernel loads a faulting address for its runstate info (or edits its pagetables so a previously good address now faults)?  It appears as if we could retry writing to the same bad address each time we try to toggle into kernel mode.

Given that we know we are running on the correct set of pagetables, I think both of these pending variable can be unconditionally cleared, whether or not update{runstate_area,secondary_system_time} succeed or fail.

~Andrew

 }
 
 unsigned long do_iret(void)
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -373,6 +373,10 @@ struct pv_vcpu
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
+
+    /* Deferred VA-based update state. */
+    bool_t need_update_runstate_area;
+    struct vcpu_time_info pending_system_time;
 };
 
 struct arch_vcpu
@@ -445,6 +449,10 @@ struct arch_vcpu
 #define hvm_vmx         hvm_vcpu.u.vmx
 #define hvm_svm         hvm_vcpu.u.svm
 
+bool_t update_runstate_area(const struct vcpu *);
+bool_t update_secondary_system_time(const struct vcpu *,
+                                    struct vcpu_time_info *);
+
 void vcpu_show_execution_state(struct vcpu *);
 void vcpu_show_registers(const struct vcpu *);
 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

--------------000303050600060402060603-- --===============2147995466282644967== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --===============2147995466282644967==--