* [PATCH 1/2] factor out cpu_khz to common code
2008-07-22 16:15 [PATCH 0/2] Paravirt loops per jiffy calculation Glauber Costa
@ 2008-07-22 16:15 ` Glauber Costa
2008-07-22 16:15 ` [PATCH 2/2] use paravirt function to calculate cpu khz Glauber Costa
2008-07-23 10:07 ` [PATCH 0/2] Paravirt loops per jiffy calculation Gerd Hoffmann
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Glauber Costa @ 2008-07-22 16:15 UTC (permalink / raw)
To: kvm; +Cc: glommer, avi, aliguori, mtosatti, virtualization
KVM intends to use paravirt code to calibrate khz. Xen
current code will do just fine. So as a first step, factor out
code to pvclock.c.
Signed-off-by: Glauber Costa <gcosta@redhat.com>
---
arch/x86/kernel/pvclock.c | 12 ++++++++++++
arch/x86/xen/time.c | 11 ++---------
include/asm-x86/pvclock.h | 1 +
3 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 05fbe9a..1c54b5f 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -97,6 +97,18 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
return dst->version;
}
+unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
+{
+ u64 tsc_khz = 1000000ULL << 32;
+
+ do_div(tsc_khz, src->tsc_to_system_mul);
+ if (src->tsc_shift < 0)
+ tsc_khz <<= -src->tsc_shift;
+ else
+ tsc_khz >>= src->tsc_shift;
+ return tsc_khz;
+}
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 685b774..1eb88fe 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -200,17 +200,10 @@ unsigned long long xen_sched_clock(void)
/* Get the TSC speed from Xen */
unsigned long xen_tsc_khz(void)
{
- u64 xen_khz = 1000000ULL << 32;
- const struct pvclock_vcpu_time_info *info =
+ struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
- do_div(xen_khz, info->tsc_to_system_mul);
- if (info->tsc_shift < 0)
- xen_khz <<= -info->tsc_shift;
- else
- xen_khz >>= info->tsc_shift;
-
- return xen_khz;
+ return pvclock_tsc_khz(info);
}
static cycle_t xen_clocksource_read(void)
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
index 85b1bba..28783bc 100644
--- a/include/asm-x86/pvclock.h
+++ b/include/asm-x86/pvclock.h
@@ -6,6 +6,7 @@
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
struct pvclock_vcpu_time_info *vcpu,
struct timespec *ts);
--
1.5.5.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 2/2] use paravirt function to calculate cpu khz
2008-07-22 16:15 ` [PATCH 1/2] factor out cpu_khz to common code Glauber Costa
@ 2008-07-22 16:15 ` Glauber Costa
0 siblings, 0 replies; 10+ messages in thread
From: Glauber Costa @ 2008-07-22 16:15 UTC (permalink / raw)
To: kvm; +Cc: glommer, avi, aliguori, mtosatti, virtualization
We're currently facing timing problems in guests that do
calibration under heavy load, and then the load vanishes.
This means we'll have a much lower lpj than we actually should,
and delays end up taking less time than they should, which is a
nasty bug.
Solution is to pass on the lpj value from host to guest, and have it
preset.
Signed-off-by: Glauber Costa <gcosta@redhat.com>
---
arch/x86/kernel/kvmclock.c | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d02def0..84db1bb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -78,6 +78,34 @@ static cycle_t kvm_clock_read(void)
return ret;
}
+/*
+ * If we don't do that, there is the possibility that the guest
+ * will calibrate under heavy load - thus, getting a lower lpj -
+ * and execute the delays themselves without load. This is wrong,
+ * because no delay loop can finish beforehand.
+ * Any heuristics is subject to fail, because ultimately, a large
+ * poll of guests can be running and trouble each other. So we preset
+ * lpj here
+ */
+static unsigned long kvm_get_tsc_khz(void)
+{
+ return preset_lpj;
+}
+
+static void kvm_get_preset_lpj(void)
+{
+ struct pvclock_vcpu_time_info *src;
+ unsigned long khz;
+ u64 lpj;
+
+ src = &per_cpu(hv_clock, 0);
+ khz = pvclock_tsc_khz(src);
+
+ lpj = ((u64)khz * 1000);
+ do_div(lpj, HZ);
+ preset_lpj = lpj;
+}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
@@ -153,6 +181,7 @@ void __init kvmclock_init(void)
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read;
+ pv_time_ops.get_tsc_khz = kvm_get_tsc_khz;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
#endif
@@ -163,6 +192,7 @@ void __init kvmclock_init(void)
#ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
+ kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
}
}
--
1.5.5.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 0/2] Paravirt loops per jiffy calculation
2008-07-22 16:15 [PATCH 0/2] Paravirt loops per jiffy calculation Glauber Costa
2008-07-22 16:15 ` [PATCH 1/2] factor out cpu_khz to common code Glauber Costa
@ 2008-07-23 10:07 ` Gerd Hoffmann
2008-07-23 13:35 ` Glauber Costa
2008-07-27 7:59 ` Avi Kivity
2008-07-28 14:47 ` [PATCH 0/2] Paravirt loops per jiffy Glauber Costa
3 siblings, 1 reply; 10+ messages in thread
From: Gerd Hoffmann @ 2008-07-23 10:07 UTC (permalink / raw)
To: Glauber Costa; +Cc: kvm, aliguori, virtualization
Hi,
> Turns out that only implementing a pv get_tsc_khz is not enough, since
> it will only do the right thing for cpu0, which is not what we desire.
>
> So we set preset_lpj. Recall this code is run before arch parameter setup,
> so if we pass lpj=xx in the command line, it'll still work.
Hmm, why kvm needs the preset_lpj thing and xen doesn't?
cheers,
Gerd
--
http://kraxel.fedorapeople.org/xenner/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/2] Paravirt loops per jiffy calculation
2008-07-23 10:07 ` [PATCH 0/2] Paravirt loops per jiffy calculation Gerd Hoffmann
@ 2008-07-23 13:35 ` Glauber Costa
0 siblings, 0 replies; 10+ messages in thread
From: Glauber Costa @ 2008-07-23 13:35 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: Glauber Costa, kvm, aliguori, virtualization
On Wed, Jul 23, 2008 at 7:07 AM, Gerd Hoffmann <kraxel@redhat.com> wrote:
> Hi,
>
>> Turns out that only implementing a pv get_tsc_khz is not enough, since
>> it will only do the right thing for cpu0, which is not what we desire.
>>
>> So we set preset_lpj. Recall this code is run before arch parameter setup,
>> so if we pass lpj=xx in the command line, it'll still work.
>
> Hmm, why kvm needs the preset_lpj thing and xen doesn't?
I asked myself the same question. But from the snippet in calibrate.c:
if (preset_lpj) {
loops_per_jiffy = preset_lpj;
printk(KERN_INFO
"Calibrating delay loop (skipped) preset value.. ");
} else if ((smp_processor_id() == 0) && lpj_fine) {
loops_per_jiffy = lpj_fine;
printk(KERN_INFO
"Calibrating delay loop (skipped), "
"value calculated using timer frequency.. ");
} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
printk(KERN_INFO
"Calibrating delay using timer specific routine.. ");
The third one is the one we want to skip. The second one, runs only for CPU0.
calibrate_delay_loop() itself, is called very early from smp_callin,
and tsc calibration only sets lpj_fine. So I don't see too much of an
option here.
> cheers,
> Gerd
>
> --
> http://kraxel.fedorapeople.org/xenner/
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
Glauber Costa.
"Free as in Freedom"
http://glommer.net
"The less confident you are, the more serious you have to act."
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/2] Paravirt loops per jiffy calculation
2008-07-22 16:15 [PATCH 0/2] Paravirt loops per jiffy calculation Glauber Costa
2008-07-22 16:15 ` [PATCH 1/2] factor out cpu_khz to common code Glauber Costa
2008-07-23 10:07 ` [PATCH 0/2] Paravirt loops per jiffy calculation Gerd Hoffmann
@ 2008-07-27 7:59 ` Avi Kivity
2008-07-28 14:47 ` [PATCH 0/2] Paravirt loops per jiffy Glauber Costa
3 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2008-07-27 7:59 UTC (permalink / raw)
To: Glauber Costa; +Cc: kvm, glommer, aliguori, mtosatti, virtualization
Glauber Costa wrote:
> Hey,
>
> Over the last weeks, there has been some discussion regarding paravirt
> lpj calculation for kvm. Here's my shot at that.
>
> KVM hypervisor already put the right value in our pvclock structures,
> as part of the xen compatibility efforts.
>
> So the first patch moves the respective xen code to pvclock.c (since
> we'll be doing the same calculation anyway), while the second, implements
> functions for kvm that makes use of it.
>
> Turns out that only implementing a pv get_tsc_khz is not enough, since
> it will only do the right thing for cpu0, which is not what we desire.
>
> So we set preset_lpj. Recall this code is run before arch parameter setup,
> so if we pass lpj=xx in the command line, it'll still work.
>
> Have a good (paravirtual) time!
>
>
I only have this email out of the patchset. Please resend.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 0/2] Paravirt loops per jiffy
2008-07-22 16:15 [PATCH 0/2] Paravirt loops per jiffy calculation Glauber Costa
` (2 preceding siblings ...)
2008-07-27 7:59 ` Avi Kivity
@ 2008-07-28 14:47 ` Glauber Costa
2008-07-28 14:47 ` [PATCH 1/2] factor out cpu_khz to common code Glauber Costa
2008-07-29 14:17 ` [PATCH 0/2] Paravirt loops per jiffy Avi Kivity
3 siblings, 2 replies; 10+ messages in thread
From: Glauber Costa @ 2008-07-28 14:47 UTC (permalink / raw)
To: kvm; +Cc: avi, aliguori, mtosatti, virtualization
Resending, as avi seem to have missed the patches.
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH 1/2] factor out cpu_khz to common code
2008-07-28 14:47 ` [PATCH 0/2] Paravirt loops per jiffy Glauber Costa
@ 2008-07-28 14:47 ` Glauber Costa
2008-07-28 14:47 ` [PATCH 2/2] use paravirt function to calculate cpu khz Glauber Costa
2008-07-29 14:17 ` [PATCH 0/2] Paravirt loops per jiffy Avi Kivity
1 sibling, 1 reply; 10+ messages in thread
From: Glauber Costa @ 2008-07-28 14:47 UTC (permalink / raw)
To: kvm; +Cc: avi, aliguori, mtosatti, virtualization
KVM intends to use paravirt code to calibrate khz. Xen
current code will do just fine. So as a first step, factor out
code to pvclock.c.
Signed-off-by: Glauber Costa <gcosta@redhat.com>
---
arch/x86/kernel/pvclock.c | 12 ++++++++++++
arch/x86/xen/time.c | 11 ++---------
include/asm-x86/pvclock.h | 1 +
3 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 05fbe9a..1c54b5f 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -97,6 +97,18 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
return dst->version;
}
+unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
+{
+ u64 tsc_khz = 1000000ULL << 32;
+
+ do_div(tsc_khz, src->tsc_to_system_mul);
+ if (src->tsc_shift < 0)
+ tsc_khz <<= -src->tsc_shift;
+ else
+ tsc_khz >>= src->tsc_shift;
+ return tsc_khz;
+}
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 685b774..1eb88fe 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -200,17 +200,10 @@ unsigned long long xen_sched_clock(void)
/* Get the TSC speed from Xen */
unsigned long xen_tsc_khz(void)
{
- u64 xen_khz = 1000000ULL << 32;
- const struct pvclock_vcpu_time_info *info =
+ struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
- do_div(xen_khz, info->tsc_to_system_mul);
- if (info->tsc_shift < 0)
- xen_khz <<= -info->tsc_shift;
- else
- xen_khz >>= info->tsc_shift;
-
- return xen_khz;
+ return pvclock_tsc_khz(info);
}
static cycle_t xen_clocksource_read(void)
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
index 85b1bba..28783bc 100644
--- a/include/asm-x86/pvclock.h
+++ b/include/asm-x86/pvclock.h
@@ -6,6 +6,7 @@
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
struct pvclock_vcpu_time_info *vcpu,
struct timespec *ts);
--
1.5.5.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 2/2] use paravirt function to calculate cpu khz
2008-07-28 14:47 ` [PATCH 1/2] factor out cpu_khz to common code Glauber Costa
@ 2008-07-28 14:47 ` Glauber Costa
0 siblings, 0 replies; 10+ messages in thread
From: Glauber Costa @ 2008-07-28 14:47 UTC (permalink / raw)
To: kvm; +Cc: avi, aliguori, mtosatti, virtualization
We're currently facing timing problems in guests that do
calibration under heavy load, and then the load vanishes.
This means we'll have a much lower lpj than we actually should,
and delays end up taking less time than they should, which is a
nasty bug.
Solution is to pass on the lpj value from host to guest, and have it
preset.
Signed-off-by: Glauber Costa <gcosta@redhat.com>
---
arch/x86/kernel/kvmclock.c | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d02def0..84db1bb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -78,6 +78,34 @@ static cycle_t kvm_clock_read(void)
return ret;
}
+/*
+ * If we don't do that, there is the possibility that the guest
+ * will calibrate under heavy load - thus, getting a lower lpj -
+ * and execute the delays themselves without load. This is wrong,
+ * because no delay loop can finish beforehand.
+ * Any heuristics is subject to fail, because ultimately, a large
+ * poll of guests can be running and trouble each other. So we preset
+ * lpj here
+ */
+static unsigned long kvm_get_tsc_khz(void)
+{
+ return preset_lpj;
+}
+
+static void kvm_get_preset_lpj(void)
+{
+ struct pvclock_vcpu_time_info *src;
+ unsigned long khz;
+ u64 lpj;
+
+ src = &per_cpu(hv_clock, 0);
+ khz = pvclock_tsc_khz(src);
+
+ lpj = ((u64)khz * 1000);
+ do_div(lpj, HZ);
+ preset_lpj = lpj;
+}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
@@ -153,6 +181,7 @@ void __init kvmclock_init(void)
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read;
+ pv_time_ops.get_tsc_khz = kvm_get_tsc_khz;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
#endif
@@ -163,6 +192,7 @@ void __init kvmclock_init(void)
#ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
+ kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
}
}
--
1.5.5.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 0/2] Paravirt loops per jiffy
2008-07-28 14:47 ` [PATCH 0/2] Paravirt loops per jiffy Glauber Costa
2008-07-28 14:47 ` [PATCH 1/2] factor out cpu_khz to common code Glauber Costa
@ 2008-07-29 14:17 ` Avi Kivity
1 sibling, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2008-07-29 14:17 UTC (permalink / raw)
To: Glauber Costa; +Cc: kvm, aliguori, mtosatti, virtualization
Glauber Costa wrote:
> Resending, as avi seem to have missed the patches.
>
Applied, thanks.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 10+ messages in thread