* [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
@ 2014-10-10 9:07 Paul Durrant
2014-10-10 13:01 ` Jan Beulich
2014-10-13 10:14 ` Andrew Cooper
0 siblings, 2 replies; 5+ messages in thread
From: Paul Durrant @ 2014-10-10 9:07 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Keir Fraser, Jan Beulich
In XenServer system test it has become apparent that versions of Windows
that make use of the time reference counter enlightenment cannot cope with
large jumps forward in the value read from the MSR. Specifically,
suspending a very large domain took approx. 45 minutes to complete and
when the domain was resumed it was discovered that the WMI (Windows
Management Instrumentation) service had hung.
This patch adds code to freeze the value of the time reference counter
on domain pause and 'thaw' it on domain unpause, but only if the domain is
not shutting down. The absolute value of the counter is then saved in the
viridian domain context record. This prevents the guest OS from experiencing
large jumps in the value of the MSR and has been shown to reliably fix
the problem with WMI.
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
xen/arch/x86/hvm/viridian.c | 62 +++++++++++++++++++++++++-------
xen/common/domain.c | 41 +++++++++++++++------
xen/include/asm-x86/hvm/hvm.h | 9 ++++-
xen/include/asm-x86/hvm/viridian.h | 27 ++++++++++++++
xen/include/public/arch-x86/hvm/save.h | 1 +
5 files changed, 116 insertions(+), 24 deletions(-)
diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
index 6726168..7d55363 100644
--- a/xen/arch/x86/hvm/viridian.c
+++ b/xen/arch/x86/hvm/viridian.c
@@ -289,6 +289,39 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
return 1;
}
+static int64_t raw_trc_val(struct domain *d)
+{
+ uint64_t tsc;
+ struct time_scale tsc_to_ns;
+
+ tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
+
+ /* convert tsc to count of 100ns periods */
+ set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
+ return scale_delta(tsc, &tsc_to_ns) / 100ul;
+}
+
+void viridian_time_ref_count_freeze(struct domain *d)
+{
+ struct viridian_time_ref_count *trc;
+
+ trc = &d->arch.hvm_domain.viridian.time_ref_count;
+
+ if ( test_and_clear_bit(_TRC_running, &trc->flags) )
+ trc->val = raw_trc_val(d) + trc->off;
+}
+
+void viridian_time_ref_count_thaw(struct domain *d)
+{
+ struct viridian_time_ref_count *trc;
+
+ trc = &d->arch.hvm_domain.viridian.time_ref_count;
+
+ if ( !d->is_shutting_down &&
+ !test_and_set_bit(_TRC_running, &trc->flags) )
+ trc->off = (int64_t)trc->val - raw_trc_val(d);
+}
+
int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
{
struct vcpu *v = current;
@@ -348,18 +381,19 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
case VIRIDIAN_MSR_TIME_REF_COUNT:
{
- uint64_t tsc;
- struct time_scale tsc_to_ns;
+ struct viridian_time_ref_count *trc;
+
+ trc = &d->arch.hvm_domain.viridian.time_ref_count;
if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
return 0;
- perfc_incr(mshv_rdmsr_time_ref_count);
- tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
+ if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
+ printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT: accessed\n",
+ d->domain_id);
- /* convert tsc to count of 100ns periods */
- set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
- *val = scale_delta(tsc, &tsc_to_ns) / 100ul;
+ perfc_incr(mshv_rdmsr_time_ref_count);
+ *val = raw_trc_val(d) + trc->off;
break;
}
@@ -450,9 +484,10 @@ static int viridian_save_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
if ( !is_viridian_domain(d) )
return 0;
- ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
- ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
-
+ ctxt.time_ref_count = d->arch.hvm_domain.viridian.time_ref_count.val;
+ ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
+ ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
+
return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
}
@@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
{
struct hvm_viridian_domain_context ctxt;
- if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
+ if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
return -EINVAL;
- d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
- d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
+ d->arch.hvm_domain.viridian.time_ref_count.val = ctxt.time_ref_count;
+ d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
+ d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
return 0;
}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 190998c..af5dc82 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -43,6 +43,10 @@
#include <xen/trace.h>
#include <xen/tmem.h>
+#ifdef CONFIG_X86
+#include <asm/hvm/viridian.h>
+#endif
+
/* Linux config option: propageted to domain0 */
/* xen_processor_pmbits: xen control Cx, Px, ... */
unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
@@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8 reason)
v->paused_for_shutdown = 1;
}
+#ifdef CONFIG_X86
+ if ( has_viridian_time_ref_count(d) )
+ viridian_time_ref_count_freeze(d);
+#endif
+
__domain_finalise_shutdown(d);
spin_unlock(&d->shutdown_lock);
@@ -925,32 +934,44 @@ int vcpu_unpause_by_systemcontroller(struct vcpu *v)
return 0;
}
-void domain_pause(struct domain *d)
+void do_domain_pause(struct domain *d, bool_t nosync)
{
struct vcpu *v;
- ASSERT(d != current->domain);
-
atomic_inc(&d->pause_count);
for_each_vcpu( d, v )
- vcpu_sleep_sync(v);
+ if ( nosync )
+ vcpu_sleep_nosync(v);
+ else
+ vcpu_sleep_sync(v);
+
+#ifdef CONFIG_X86
+ if ( has_viridian_time_ref_count(d) )
+ viridian_time_ref_count_freeze(d);
+#endif
}
-void domain_pause_nosync(struct domain *d)
+void domain_pause(struct domain *d)
{
- struct vcpu *v;
-
- atomic_inc(&d->pause_count);
+ ASSERT(d != current->domain);
+ do_domain_pause(d, 0);
+}
- for_each_vcpu( d, v )
- vcpu_sleep_nosync(v);
+void domain_pause_nosync(struct domain *d)
+{
+ do_domain_pause(d, 1);
}
void domain_unpause(struct domain *d)
{
struct vcpu *v;
+#ifdef CONFIG_X86
+ if ( has_viridian_time_ref_count(d) )
+ viridian_time_ref_count_thaw(d);
+#endif
+
if ( atomic_dec_and_test(&d->pause_count) )
for_each_vcpu( d, v )
vcpu_wake(v);
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0d94c48..2dda908 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -340,12 +340,19 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v)
return hvm_funcs.get_shadow_gs_base(v);
}
+
+#define has_hvm_params(d) \
+ ((d)->arch.hvm_domain.params != NULL)
+
#define viridian_feature_mask(d) \
- ((d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
+ (has_hvm_params(d) && (d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
#define is_viridian_domain(d) \
(is_hvm_domain(d) && (viridian_feature_mask(d) & HVMPV_base_freq))
+#define has_viridian_time_ref_count(d) \
+ (is_viridian_domain(d) && (viridian_feature_mask(d) & HVMPV_time_ref_count))
+
void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx);
diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm-x86/hvm/viridian.h
index 496da33..4cab2e8 100644
--- a/xen/include/asm-x86/hvm/viridian.h
+++ b/xen/include/asm-x86/hvm/viridian.h
@@ -48,10 +48,24 @@ union viridian_hypercall_gpa
} fields;
};
+struct viridian_time_ref_count
+{
+ unsigned long flags;
+
+#define _TRC_accessed 0
+#define TRC_accessed (1 << _TRC_accessed)
+#define _TRC_running 1
+#define TRC_running (1 << _TRC_running)
+
+ uint64_t val;
+ int64_t off;
+};
+
struct viridian_domain
{
union viridian_guest_os_id guest_os_id;
union viridian_hypercall_gpa hypercall_gpa;
+ struct viridian_time_ref_count time_ref_count;
};
int
@@ -75,4 +89,17 @@ rdmsr_viridian_regs(
int
viridian_hypercall(struct cpu_user_regs *regs);
+void viridian_time_ref_count_freeze(struct domain *d);
+void viridian_time_ref_count_thaw(struct domain *d);
+
#endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h
index 16d85a3..88aab7e 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
struct hvm_viridian_domain_context {
uint64_t hypercall_gpa;
uint64_t guest_os_id;
+ uint64_t time_ref_count;
};
DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
2014-10-10 9:07 [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused Paul Durrant
@ 2014-10-10 13:01 ` Jan Beulich
2014-10-10 13:26 ` Paul Durrant
2014-10-13 10:14 ` Andrew Cooper
1 sibling, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2014-10-10 13:01 UTC (permalink / raw)
To: Paul Durrant; +Cc: Andrew Cooper, Keir Fraser, xen-devel
>>> On 10.10.14 at 11:07, <paul.durrant@citrix.com> wrote:
> @@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
> {
> struct hvm_viridian_domain_context ctxt;
>
> - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> return -EINVAL;
>
> - d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> - d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> + d->arch.hvm_domain.viridian.time_ref_count.val = ctxt.time_ref_count;
> + d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> + d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
Is loading zero here correct when migration comes from an older
hypervisor?
> @@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8 reason)
> v->paused_for_shutdown = 1;
> }
>
> +#ifdef CONFIG_X86
> + if ( has_viridian_time_ref_count(d) )
> + viridian_time_ref_count_freeze(d);
> +#endif
> +
So in the description you say "but only if the domain is not shutting
down" - how does the above change fit that? Or is that comment
ambiguous whether it refers to both the freezing and thawing or only
the thawing?
Jan
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
2014-10-10 13:01 ` Jan Beulich
@ 2014-10-10 13:26 ` Paul Durrant
0 siblings, 0 replies; 5+ messages in thread
From: Paul Durrant @ 2014-10-10 13:26 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, Keir (Xen.org), xen-devel@lists.xenproject.org
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 10 October 2014 14:02
> To: Paul Durrant
> Cc: Andrew Cooper; xen-devel@lists.xenproject.org; Keir (Xen.org)
> Subject: Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter
> when domain is paused
>
> >>> On 10.10.14 at 11:07, <paul.durrant@citrix.com> wrote:
> > @@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct
> domain *d, hvm_domain_context_t *h)
> > {
> > struct hvm_viridian_domain_context ctxt;
> >
> > - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> > + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> > return -EINVAL;
> >
> > - d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> > - d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> > + d->arch.hvm_domain.viridian.time_ref_count.val =
> ctxt.time_ref_count;
> > + d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> > + d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
>
> Is loading zero here correct when migration comes from an older
> hypervisor?
The enlightenment can only be enabled in 4.5 so, providing this patch goes into 4.5, there is no compatibility problem.
>
> > @@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8
> reason)
> > v->paused_for_shutdown = 1;
> > }
> >
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_freeze(d);
> > +#endif
> > +
>
> So in the description you say "but only if the domain is not shutting
> down" - how does the above change fit that? Or is that comment
> ambiguous whether it refers to both the freezing and thawing or only
> the thawing?
It only applies to thawing. Sorry for the ambiguity.
Paul
>
> Jan
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
2014-10-10 9:07 [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused Paul Durrant
2014-10-10 13:01 ` Jan Beulich
@ 2014-10-13 10:14 ` Andrew Cooper
2014-10-13 12:13 ` Paul Durrant
1 sibling, 1 reply; 5+ messages in thread
From: Andrew Cooper @ 2014-10-13 10:14 UTC (permalink / raw)
To: Paul Durrant, xen-devel; +Cc: Keir Fraser, Jan Beulich
On 10/10/14 10:07, Paul Durrant wrote:
> In XenServer system test it has become apparent that versions of Windows
> that make use of the time reference counter enlightenment cannot cope with
> large jumps forward in the value read from the MSR. Specifically,
> suspending a very large domain took approx. 45 minutes to complete and
> when the domain was resumed it was discovered that the WMI (Windows
> Management Instrumentation) service had hung.
It is possibly worth noting in the commit message that the OSes idea of
the suspend time happens at the SHEDOP_suspend time, while the VMs real
suspend time is after all the memory is complete, and the toolstack
calls DOMCTL_gethvmcontext.
For a large VM (or slow network/storage), this is a delta of 45 minutes.
>
> This patch adds code to freeze the value of the time reference counter
> on domain pause and 'thaw' it on domain unpause, but only if the domain is
> not shutting down. The absolute value of the counter is then saved in the
> viridian domain context record. This prevents the guest OS from experiencing
> large jumps in the value of the MSR and has been shown to reliably fix
> the problem with WMI.
>
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Keir Fraser <keir@xen.org>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> xen/arch/x86/hvm/viridian.c | 62 +++++++++++++++++++++++++-------
> xen/common/domain.c | 41 +++++++++++++++------
> xen/include/asm-x86/hvm/hvm.h | 9 ++++-
> xen/include/asm-x86/hvm/viridian.h | 27 ++++++++++++++
> xen/include/public/arch-x86/hvm/save.h | 1 +
> 5 files changed, 116 insertions(+), 24 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
> index 6726168..7d55363 100644
> --- a/xen/arch/x86/hvm/viridian.c
> +++ b/xen/arch/x86/hvm/viridian.c
> @@ -289,6 +289,39 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
> return 1;
> }
>
> +static int64_t raw_trc_val(struct domain *d)
> +{
> + uint64_t tsc;
> + struct time_scale tsc_to_ns;
> +
> + tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
> +
> + /* convert tsc to count of 100ns periods */
> + set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
> + return scale_delta(tsc, &tsc_to_ns) / 100ul;
> +}
> +
> +void viridian_time_ref_count_freeze(struct domain *d)
> +{
> + struct viridian_time_ref_count *trc;
> +
> + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> +
> + if ( test_and_clear_bit(_TRC_running, &trc->flags) )
> + trc->val = raw_trc_val(d) + trc->off;
> +}
> +
> +void viridian_time_ref_count_thaw(struct domain *d)
> +{
> + struct viridian_time_ref_count *trc;
> +
> + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> +
> + if ( !d->is_shutting_down &&
> + !test_and_set_bit(_TRC_running, &trc->flags) )
> + trc->off = (int64_t)trc->val - raw_trc_val(d);
> +}
> +
> int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
> {
> struct vcpu *v = current;
> @@ -348,18 +381,19 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
>
> case VIRIDIAN_MSR_TIME_REF_COUNT:
> {
> - uint64_t tsc;
> - struct time_scale tsc_to_ns;
> + struct viridian_time_ref_count *trc;
> +
> + trc = &d->arch.hvm_domain.viridian.time_ref_count;
>
> if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
> return 0;
>
> - perfc_incr(mshv_rdmsr_time_ref_count);
> - tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
> + if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
> + printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT: accessed\n",
> + d->domain_id);
>
> - /* convert tsc to count of 100ns periods */
> - set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
> - *val = scale_delta(tsc, &tsc_to_ns) / 100ul;
> + perfc_incr(mshv_rdmsr_time_ref_count);
> + *val = raw_trc_val(d) + trc->off;
> break;
> }
>
> @@ -450,9 +484,10 @@ static int viridian_save_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
> if ( !is_viridian_domain(d) )
> return 0;
>
> - ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
> - ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
> -
> + ctxt.time_ref_count = d->arch.hvm_domain.viridian.time_ref_count.val;
> + ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
> + ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
> +
> return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
> }
>
> @@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
> {
> struct hvm_viridian_domain_context ctxt;
>
> - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> return -EINVAL;
>
> - d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> - d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> + d->arch.hvm_domain.viridian.time_ref_count.val = ctxt.time_ref_count;
> + d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> + d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
>
> return 0;
> }
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 190998c..af5dc82 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -43,6 +43,10 @@
> #include <xen/trace.h>
> #include <xen/tmem.h>
>
> +#ifdef CONFIG_X86
> +#include <asm/hvm/viridian.h>
> +#endif
> +
> /* Linux config option: propageted to domain0 */
> /* xen_processor_pmbits: xen control Cx, Px, ... */
> unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
> @@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8 reason)
> v->paused_for_shutdown = 1;
> }
>
> +#ifdef CONFIG_X86
> + if ( has_viridian_time_ref_count(d) )
> + viridian_time_ref_count_freeze(d);
> +#endif
> +
> __domain_finalise_shutdown(d);
>
> spin_unlock(&d->shutdown_lock);
> @@ -925,32 +934,44 @@ int vcpu_unpause_by_systemcontroller(struct vcpu *v)
> return 0;
> }
>
> -void domain_pause(struct domain *d)
> +void do_domain_pause(struct domain *d, bool_t nosync)
> {
> struct vcpu *v;
>
> - ASSERT(d != current->domain);
> -
> atomic_inc(&d->pause_count);
>
> for_each_vcpu( d, v )
> - vcpu_sleep_sync(v);
> + if ( nosync )
> + vcpu_sleep_nosync(v);
> + else
> + vcpu_sleep_sync(v);
> +
> +#ifdef CONFIG_X86
> + if ( has_viridian_time_ref_count(d) )
> + viridian_time_ref_count_freeze(d);
> +#endif
> }
>
> -void domain_pause_nosync(struct domain *d)
> +void domain_pause(struct domain *d)
> {
> - struct vcpu *v;
> -
> - atomic_inc(&d->pause_count);
> + ASSERT(d != current->domain);
> + do_domain_pause(d, 0);
> +}
>
> - for_each_vcpu( d, v )
> - vcpu_sleep_nosync(v);
> +void domain_pause_nosync(struct domain *d)
> +{
> + do_domain_pause(d, 1);
> }
All this fiddling with a boolean nosync parameter might be neater done
in the same style as I did with __domain_pause_by_systemcontroller(), by
passing a function pointer.
>
> void domain_unpause(struct domain *d)
> {
> struct vcpu *v;
>
> +#ifdef CONFIG_X86
> + if ( has_viridian_time_ref_count(d) )
> + viridian_time_ref_count_thaw(d);
> +#endif
> +
> if ( atomic_dec_and_test(&d->pause_count) )
> for_each_vcpu( d, v )
> vcpu_wake(v);
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 0d94c48..2dda908 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -340,12 +340,19 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v)
> return hvm_funcs.get_shadow_gs_base(v);
> }
>
> +
> +#define has_hvm_params(d) \
> + ((d)->arch.hvm_domain.params != NULL)
> +
> #define viridian_feature_mask(d) \
> - ((d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
> + (has_hvm_params(d) && (d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
The result of this expression is a boolean, not the viridian feature mask.
Furthermore, I don't think that has_hvm_params() is useful. Any HVM
domain will have params, and without an HVM check, looking into
d->arch.hvm_domain is invalid.
~Andrew
>
> #define is_viridian_domain(d) \
> (is_hvm_domain(d) && (viridian_feature_mask(d) & HVMPV_base_freq))
>
> +#define has_viridian_time_ref_count(d) \
> + (is_viridian_domain(d) && (viridian_feature_mask(d) & HVMPV_time_ref_count))
> +
> void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx,
> uint32_t *eax, uint32_t *ebx,
> uint32_t *ecx, uint32_t *edx);
> diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm-x86/hvm/viridian.h
> index 496da33..4cab2e8 100644
> --- a/xen/include/asm-x86/hvm/viridian.h
> +++ b/xen/include/asm-x86/hvm/viridian.h
> @@ -48,10 +48,24 @@ union viridian_hypercall_gpa
> } fields;
> };
>
> +struct viridian_time_ref_count
> +{
> + unsigned long flags;
> +
> +#define _TRC_accessed 0
> +#define TRC_accessed (1 << _TRC_accessed)
> +#define _TRC_running 1
> +#define TRC_running (1 << _TRC_running)
> +
> + uint64_t val;
> + int64_t off;
> +};
> +
> struct viridian_domain
> {
> union viridian_guest_os_id guest_os_id;
> union viridian_hypercall_gpa hypercall_gpa;
> + struct viridian_time_ref_count time_ref_count;
> };
>
> int
> @@ -75,4 +89,17 @@ rdmsr_viridian_regs(
> int
> viridian_hypercall(struct cpu_user_regs *regs);
>
> +void viridian_time_ref_count_freeze(struct domain *d);
> +void viridian_time_ref_count_thaw(struct domain *d);
> +
> #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h
> index 16d85a3..88aab7e 100644
> --- a/xen/include/public/arch-x86/hvm/save.h
> +++ b/xen/include/public/arch-x86/hvm/save.h
> @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
> struct hvm_viridian_domain_context {
> uint64_t hypercall_gpa;
> uint64_t guest_os_id;
> + uint64_t time_ref_count;
> };
>
> DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context);
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
2014-10-13 10:14 ` Andrew Cooper
@ 2014-10-13 12:13 ` Paul Durrant
0 siblings, 0 replies; 5+ messages in thread
From: Paul Durrant @ 2014-10-13 12:13 UTC (permalink / raw)
To: Andrew Cooper, xen-devel@lists.xenproject.org; +Cc: Keir (Xen.org), Jan Beulich
> -----Original Message-----
> From: Andrew Cooper
> Sent: 13 October 2014 11:14
> To: Paul Durrant; xen-devel@lists.xenproject.org
> Cc: Keir (Xen.org); Jan Beulich
> Subject: Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter
> when domain is paused
>
> On 10/10/14 10:07, Paul Durrant wrote:
> > In XenServer system test it has become apparent that versions of Windows
> > that make use of the time reference counter enlightenment cannot cope
> with
> > large jumps forward in the value read from the MSR. Specifically,
> > suspending a very large domain took approx. 45 minutes to complete and
> > when the domain was resumed it was discovered that the WMI (Windows
> > Management Instrumentation) service had hung.
>
> It is possibly worth noting in the commit message that the OSes idea of
> the suspend time happens at the SHEDOP_suspend time, while the VMs real
> suspend time is after all the memory is complete, and the toolstack
> calls DOMCTL_gethvmcontext.
>
> For a large VM (or slow network/storage), this is a delta of 45 minutes.
>
Ok.
> >
> > This patch adds code to freeze the value of the time reference counter
> > on domain pause and 'thaw' it on domain unpause, but only if the domain is
> > not shutting down. The absolute value of the counter is then saved in the
> > viridian domain context record. This prevents the guest OS from
> experiencing
> > large jumps in the value of the MSR and has been shown to reliably fix
> > the problem with WMI.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Keir Fraser <keir@xen.org>
> > Cc: Jan Beulich <jbeulich@suse.com>
> > Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> > ---
> > xen/arch/x86/hvm/viridian.c | 62 +++++++++++++++++++++++++--
> -----
> > xen/common/domain.c | 41 +++++++++++++++------
> > xen/include/asm-x86/hvm/hvm.h | 9 ++++-
> > xen/include/asm-x86/hvm/viridian.h | 27 ++++++++++++++
> > xen/include/public/arch-x86/hvm/save.h | 1 +
> > 5 files changed, 116 insertions(+), 24 deletions(-)
> >
> > diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
> > index 6726168..7d55363 100644
> > --- a/xen/arch/x86/hvm/viridian.c
> > +++ b/xen/arch/x86/hvm/viridian.c
> > @@ -289,6 +289,39 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
> > return 1;
> > }
> >
> > +static int64_t raw_trc_val(struct domain *d)
> > +{
> > + uint64_t tsc;
> > + struct time_scale tsc_to_ns;
> > +
> > + tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
> > +
> > + /* convert tsc to count of 100ns periods */
> > + set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
> > + return scale_delta(tsc, &tsc_to_ns) / 100ul;
> > +}
> > +
> > +void viridian_time_ref_count_freeze(struct domain *d)
> > +{
> > + struct viridian_time_ref_count *trc;
> > +
> > + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> > +
> > + if ( test_and_clear_bit(_TRC_running, &trc->flags) )
> > + trc->val = raw_trc_val(d) + trc->off;
> > +}
> > +
> > +void viridian_time_ref_count_thaw(struct domain *d)
> > +{
> > + struct viridian_time_ref_count *trc;
> > +
> > + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> > +
> > + if ( !d->is_shutting_down &&
> > + !test_and_set_bit(_TRC_running, &trc->flags) )
> > + trc->off = (int64_t)trc->val - raw_trc_val(d);
> > +}
> > +
> > int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
> > {
> > struct vcpu *v = current;
> > @@ -348,18 +381,19 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t
> *val)
> >
> > case VIRIDIAN_MSR_TIME_REF_COUNT:
> > {
> > - uint64_t tsc;
> > - struct time_scale tsc_to_ns;
> > + struct viridian_time_ref_count *trc;
> > +
> > + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> >
> > if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
> > return 0;
> >
> > - perfc_incr(mshv_rdmsr_time_ref_count);
> > - tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
> > + if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
> > + printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT:
> accessed\n",
> > + d->domain_id);
> >
> > - /* convert tsc to count of 100ns periods */
> > - set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
> > - *val = scale_delta(tsc, &tsc_to_ns) / 100ul;
> > + perfc_incr(mshv_rdmsr_time_ref_count);
> > + *val = raw_trc_val(d) + trc->off;
> > break;
> > }
> >
> > @@ -450,9 +484,10 @@ static int viridian_save_domain_ctxt(struct domain
> *d, hvm_domain_context_t *h)
> > if ( !is_viridian_domain(d) )
> > return 0;
> >
> > - ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
> > - ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
> > -
> > + ctxt.time_ref_count = d-
> >arch.hvm_domain.viridian.time_ref_count.val;
> > + ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
> > + ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
> > +
> > return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
> > }
> >
> > @@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct
> domain *d, hvm_domain_context_t *h)
> > {
> > struct hvm_viridian_domain_context ctxt;
> >
> > - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> > + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> > return -EINVAL;
> >
> > - d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> > - d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> > + d->arch.hvm_domain.viridian.time_ref_count.val =
> ctxt.time_ref_count;
> > + d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> > + d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> >
> > return 0;
> > }
> > diff --git a/xen/common/domain.c b/xen/common/domain.c
> > index 190998c..af5dc82 100644
> > --- a/xen/common/domain.c
> > +++ b/xen/common/domain.c
> > @@ -43,6 +43,10 @@
> > #include <xen/trace.h>
> > #include <xen/tmem.h>
> >
> > +#ifdef CONFIG_X86
> > +#include <asm/hvm/viridian.h>
> > +#endif
> > +
> > /* Linux config option: propageted to domain0 */
> > /* xen_processor_pmbits: xen control Cx, Px, ... */
> > unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
> > @@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8
> reason)
> > v->paused_for_shutdown = 1;
> > }
> >
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_freeze(d);
> > +#endif
> > +
> > __domain_finalise_shutdown(d);
> >
> > spin_unlock(&d->shutdown_lock);
> > @@ -925,32 +934,44 @@ int vcpu_unpause_by_systemcontroller(struct
> vcpu *v)
> > return 0;
> > }
> >
> > -void domain_pause(struct domain *d)
> > +void do_domain_pause(struct domain *d, bool_t nosync)
> > {
> > struct vcpu *v;
> >
> > - ASSERT(d != current->domain);
> > -
> > atomic_inc(&d->pause_count);
> >
> > for_each_vcpu( d, v )
> > - vcpu_sleep_sync(v);
> > + if ( nosync )
> > + vcpu_sleep_nosync(v);
> > + else
> > + vcpu_sleep_sync(v);
> > +
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_freeze(d);
> > +#endif
> > }
> >
> > -void domain_pause_nosync(struct domain *d)
> > +void domain_pause(struct domain *d)
> > {
> > - struct vcpu *v;
> > -
> > - atomic_inc(&d->pause_count);
> > + ASSERT(d != current->domain);
> > + do_domain_pause(d, 0);
> > +}
> >
> > - for_each_vcpu( d, v )
> > - vcpu_sleep_nosync(v);
> > +void domain_pause_nosync(struct domain *d)
> > +{
> > + do_domain_pause(d, 1);
> > }
>
> All this fiddling with a boolean nosync parameter might be neater done
> in the same style as I did with __domain_pause_by_systemcontroller(), by
> passing a function pointer.
>
Yeah - probably would be neater. I was in two minds about that.
> >
> > void domain_unpause(struct domain *d)
> > {
> > struct vcpu *v;
> >
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_thaw(d);
> > +#endif
> > +
> > if ( atomic_dec_and_test(&d->pause_count) )
> > for_each_vcpu( d, v )
> > vcpu_wake(v);
> > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-
> x86/hvm/hvm.h
> > index 0d94c48..2dda908 100644
> > --- a/xen/include/asm-x86/hvm/hvm.h
> > +++ b/xen/include/asm-x86/hvm/hvm.h
> > @@ -340,12 +340,19 @@ static inline unsigned long
> hvm_get_shadow_gs_base(struct vcpu *v)
> > return hvm_funcs.get_shadow_gs_base(v);
> > }
> >
> > +
> > +#define has_hvm_params(d) \
> > + ((d)->arch.hvm_domain.params != NULL)
> > +
> > #define viridian_feature_mask(d) \
> > - ((d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
> > + (has_hvm_params(d) && (d)-
> >arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
>
> The result of this expression is a boolean, not the viridian feature mask.
>
Dammit.
> Furthermore, I don't think that has_hvm_params() is useful. Any HVM
> domain will have params, and without an HVM check, looking into
> d->arch.hvm_domain is invalid.
>
Not true, which is why I made the change. domain_pause() is called on an HVM domain before the array is allocated.
Paul
> ~Andrew
>
> >
> > #define is_viridian_domain(d) \
> > (is_hvm_domain(d) && (viridian_feature_mask(d) &
> HVMPV_base_freq))
> >
> > +#define has_viridian_time_ref_count(d) \
> > + (is_viridian_domain(d) && (viridian_feature_mask(d) &
> HVMPV_time_ref_count))
> > +
> > void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx,
> > uint32_t *eax, uint32_t *ebx,
> > uint32_t *ecx, uint32_t *edx);
> > diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm-
> x86/hvm/viridian.h
> > index 496da33..4cab2e8 100644
> > --- a/xen/include/asm-x86/hvm/viridian.h
> > +++ b/xen/include/asm-x86/hvm/viridian.h
> > @@ -48,10 +48,24 @@ union viridian_hypercall_gpa
> > } fields;
> > };
> >
> > +struct viridian_time_ref_count
> > +{
> > + unsigned long flags;
> > +
> > +#define _TRC_accessed 0
> > +#define TRC_accessed (1 << _TRC_accessed)
> > +#define _TRC_running 1
> > +#define TRC_running (1 << _TRC_running)
> > +
> > + uint64_t val;
> > + int64_t off;
> > +};
> > +
> > struct viridian_domain
> > {
> > union viridian_guest_os_id guest_os_id;
> > union viridian_hypercall_gpa hypercall_gpa;
> > + struct viridian_time_ref_count time_ref_count;
> > };
> >
> > int
> > @@ -75,4 +89,17 @@ rdmsr_viridian_regs(
> > int
> > viridian_hypercall(struct cpu_user_regs *regs);
> >
> > +void viridian_time_ref_count_freeze(struct domain *d);
> > +void viridian_time_ref_count_thaw(struct domain *d);
> > +
> > #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 4
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/xen/include/public/arch-x86/hvm/save.h
> b/xen/include/public/arch-x86/hvm/save.h
> > index 16d85a3..88aab7e 100644
> > --- a/xen/include/public/arch-x86/hvm/save.h
> > +++ b/xen/include/public/arch-x86/hvm/save.h
> > @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
> > struct hvm_viridian_domain_context {
> > uint64_t hypercall_gpa;
> > uint64_t guest_os_id;
> > + uint64_t time_ref_count;
> > };
> >
> > DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct
> hvm_viridian_domain_context);
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-10-13 12:13 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-10 9:07 [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused Paul Durrant
2014-10-10 13:01 ` Jan Beulich
2014-10-10 13:26 ` Paul Durrant
2014-10-13 10:14 ` Andrew Cooper
2014-10-13 12:13 ` Paul Durrant
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.