From mboxrd@z Thu Jan 1 00:00:00 1970 From: Gleb Natapov Subject: Re: [patch 10/16] x86: vdso: pvclock gettime support Date: Wed, 14 Nov 2012 12:42:48 +0200 Message-ID: <20121114104248.GC13385@redhat.com> References: <20121031224656.417434866@redhat.com> <20121031224824.293748067@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: kvm@vger.kernel.org, johnstul@us.ibm.com, jeremy@goop.org, glommer@parallels.com, zamsden@gmail.com, avi@redhat.com, pbonzini@redhat.com To: Marcelo Tosatti Return-path: Received: from mx1.redhat.com ([209.132.183.28]:34384 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932095Ab2KNKm5 (ORCPT ); Wed, 14 Nov 2012 05:42:57 -0500 Content-Disposition: inline In-Reply-To: <20121031224824.293748067@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: On Wed, Oct 31, 2012 at 08:47:06PM -0200, Marcelo Tosatti wrote: > Improve performance of time system calls when using Linux pvclock, > by reading time info from fixmap visible copy of pvclock data. > > Originally from Jeremy Fitzhardinge. > > Signed-off-by: Marcelo Tosatti > > Index: vsyscall/arch/x86/vdso/vclock_gettime.c > =================================================================== > --- vsyscall.orig/arch/x86/vdso/vclock_gettime.c > +++ vsyscall/arch/x86/vdso/vclock_gettime.c > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > > #define gtod (&VVAR(vsyscall_gtod_data)) > > @@ -62,6 +63,70 @@ static notrace cycle_t vread_hpet(void) > return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); > } > > +#ifdef CONFIG_PARAVIRT_CLOCK > + > +static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) > +{ > + const aligned_pvti_t *pvti_base; > + int idx = cpu / (PAGE_SIZE/PVTI_SIZE); > + int offset = cpu % (PAGE_SIZE/PVTI_SIZE); > + > + BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); > + > + pvti_base = (aligned_pvti_t *)__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); > + > + return &pvti_base[offset].info; > +} > + > +static notrace cycle_t vread_pvclock(int *mode) > +{ > + const struct pvclock_vsyscall_time_info *pvti; > + cycle_t ret; > + u64 last; > + u32 version; > + u32 migrate_count; > + u8 flags; > + unsigned cpu, cpu1; > + > + > + /* > + * When looping to get a consistent (time-info, tsc) pair, we > + * also need to deal with the possibility we can switch vcpus, > + * so make sure we always re-fetch time-info for the current vcpu. > + */ > + do { > + cpu = __getcpu() & VGETCPU_CPU_MASK; > + pvti = get_pvti(cpu); > + > + migrate_count = pvti->migrate_count; > + > + version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); > + > + /* > + * Test we're still on the cpu as well as the version. > + * We could have been migrated just after the first > + * vgetcpu but before fetching the version, so we > + * wouldn't notice a version change. > + */ > + cpu1 = __getcpu() & VGETCPU_CPU_MASK; > + } while (unlikely(cpu != cpu1 || > + (pvti->pvti.version & 1) || > + pvti->pvti.version != version || > + pvti->migrate_count != migrate_count)); > + We can put vcpu id into higher bits of pvti.version. This will save a couple of cycles by getting rid of __getcpu() calls. > + if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) > + *mode = VCLOCK_NONE; > + > + /* refer to tsc.c read_tsc() comment for rationale */ > + last = VVAR(vsyscall_gtod_data).clock.cycle_last; > + > + if (likely(ret >= last)) > + return ret; > + > + return last; > +} > +#endif > + > notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) > { > long ret; > @@ -80,7 +145,7 @@ notrace static long vdso_fallback_gtod(s > } > > > -notrace static inline u64 vgetsns(void) > +notrace static inline u64 vgetsns(int *mode) > { > long v; > cycles_t cycles; > @@ -88,6 +153,8 @@ notrace static inline u64 vgetsns(void) > cycles = vread_tsc(); > else if (gtod->clock.vclock_mode == VCLOCK_HPET) > cycles = vread_hpet(); > + else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) > + cycles = vread_pvclock(mode); > else > return 0; > v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; > @@ -107,7 +174,7 @@ notrace static int __always_inline do_re > mode = gtod->clock.vclock_mode; > ts->tv_sec = gtod->wall_time_sec; > ns = gtod->wall_time_snsec; > - ns += vgetsns(); > + ns += vgetsns(&mode); > ns >>= gtod->clock.shift; > } while (unlikely(read_seqcount_retry(>od->seq, seq))); > > @@ -127,7 +194,7 @@ notrace static int do_monotonic(struct t > mode = gtod->clock.vclock_mode; > ts->tv_sec = gtod->monotonic_time_sec; > ns = gtod->monotonic_time_snsec; > - ns += vgetsns(); > + ns += vgetsns(&mode); > ns >>= gtod->clock.shift; > } while (unlikely(read_seqcount_retry(>od->seq, seq))); > timespec_add_ns(ts, ns); > Index: vsyscall/arch/x86/include/asm/vsyscall.h > =================================================================== > --- vsyscall.orig/arch/x86/include/asm/vsyscall.h > +++ vsyscall/arch/x86/include/asm/vsyscall.h > @@ -33,6 +33,23 @@ extern void map_vsyscall(void); > */ > extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); > > +#define VGETCPU_CPU_MASK 0xfff > + > +static inline unsigned int __getcpu(void) > +{ > + unsigned int p; > + > + if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { > + /* Load per CPU data from RDTSCP */ > + native_read_tscp(&p); > + } else { > + /* Load per CPU data from GDT */ > + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); > + } > + > + return p; > +} > + > #endif /* __KERNEL__ */ > > #endif /* _ASM_X86_VSYSCALL_H */ > Index: vsyscall/arch/x86/vdso/vgetcpu.c > =================================================================== > --- vsyscall.orig/arch/x86/vdso/vgetcpu.c > +++ vsyscall/arch/x86/vdso/vgetcpu.c > @@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *n > { > unsigned int p; > > - if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { > - /* Load per CPU data from RDTSCP */ > - native_read_tscp(&p); > - } else { > - /* Load per CPU data from GDT */ > - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); > - } > + p = __getcpu(); > + > if (cpu) > - *cpu = p & 0xfff; > + *cpu = p & VGETCPU_CPU_MASK; > if (node) > *node = p >> 12; > return 0; > -- Gleb.