From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754177AbcLLKQR (ORCPT ); Mon, 12 Dec 2016 05:16:17 -0500 Received: from mail-wj0-f196.google.com ([209.85.210.196]:34503 "EHLO mail-wj0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751266AbcLLKQQ (ORCPT ); Mon, 12 Dec 2016 05:16:16 -0500 Date: Mon, 12 Dec 2016 11:16:12 +0100 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Thomas Gleixner , "H. Peter Anvin" , Peter Zijlstra , Andrew Morton Subject: [GIT PULL] x86/platform changes for v4.10 Message-ID: <20161212101611.GA32039@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.24 (2015-08-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Linus, Please pull the latest x86-platform-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-platform-for-linus # HEAD: 80e9a4f21fd7ccce7e9b8439986fd028c9946dda x86/vmware: Add paravirt sched clock Two changes: - Implement various VMWare guest OS improvements/fixes (Alexey Makhalov) - Unexport a spurious export from the intel-mid platform driver (Lukas Wunner) Thanks, Ingo ------------------> Alexey Makhalov (4): x86/vmware: Read tsc_khz only once at boot time x86/vmware: Use tsc_khz value for calibrate_cpu() x86/vmware: Add basic paravirt ops support x86/vmware: Add paravirt sched clock Lukas Wunner (1): x86/platform/intel-mid: Unexport intel_mid_pci_set_power_state() Documentation/kernel-parameters.txt | 4 ++ arch/x86/kernel/cpu/vmware.c | 86 ++++++++++++++++++++++++++++++------- arch/x86/platform/intel-mid/pwr.c | 1 - 3 files changed, 74 insertions(+), 17 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 37babf91f2cb..b3b2ec00646f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2754,6 +2754,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page fault handling. + no-vmw-sched-clock + [X86,PV_OPS] Disable paravirtualized VMware scheduler + clock and use the default one. + no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting. steal time is computed, but won't influence scheduler behaviour diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 81160578b91a..cdbe38be28fd 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -24,10 +24,15 @@ #include #include #include +#include #include #include #include #include +#include + +#undef pr_fmt +#define pr_fmt(fmt) "vmware: " fmt #define CPUID_VMWARE_INFO_LEAF 0x40000000 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 @@ -47,6 +52,8 @@ "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ "memory"); +static unsigned long vmware_tsc_khz __ro_after_init; + static inline int __vmware_platform(void) { uint32_t eax, ebx, ecx, edx; @@ -56,35 +63,80 @@ static inline int __vmware_platform(void) static unsigned long vmware_get_tsc_khz(void) { - uint64_t tsc_hz, lpj; - uint32_t eax, ebx, ecx, edx; + return vmware_tsc_khz; +} - VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); +#ifdef CONFIG_PARAVIRT +static struct cyc2ns_data vmware_cyc2ns __ro_after_init; +static int vmw_sched_clock __initdata = 1; - tsc_hz = eax | (((uint64_t)ebx) << 32); - do_div(tsc_hz, 1000); - BUG_ON(tsc_hz >> 32); - pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", - (unsigned long) tsc_hz / 1000, - (unsigned long) tsc_hz % 1000); - - if (!preset_lpj) { - lpj = ((u64)tsc_hz * 1000); - do_div(lpj, HZ); - preset_lpj = lpj; - } +static __init int setup_vmw_sched_clock(char *s) +{ + vmw_sched_clock = 0; + return 0; +} +early_param("no-vmw-sched-clock", setup_vmw_sched_clock); - return tsc_hz; +static unsigned long long vmware_sched_clock(void) +{ + unsigned long long ns; + + ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul, + vmware_cyc2ns.cyc2ns_shift); + ns -= vmware_cyc2ns.cyc2ns_offset; + return ns; } +static void __init vmware_sched_clock_setup(void) +{ + struct cyc2ns_data *d = &vmware_cyc2ns; + unsigned long long tsc_now = rdtsc(); + + clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift, + vmware_tsc_khz, NSEC_PER_MSEC, 0); + d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, + d->cyc2ns_shift); + + pv_time_ops.sched_clock = vmware_sched_clock; + pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset); +} + +static void __init vmware_paravirt_ops_setup(void) +{ + pv_info.name = "VMware hypervisor"; + pv_cpu_ops.io_delay = paravirt_nop; + + if (vmware_tsc_khz && vmw_sched_clock) + vmware_sched_clock_setup(); +} +#else +#define vmware_paravirt_ops_setup() do {} while (0) +#endif + static void __init vmware_platform_setup(void) { uint32_t eax, ebx, ecx, edx; + uint64_t lpj, tsc_khz; VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); if (ebx != UINT_MAX) { + lpj = tsc_khz = eax | (((uint64_t)ebx) << 32); + do_div(tsc_khz, 1000); + WARN_ON(tsc_khz >> 32); + pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", + (unsigned long) tsc_khz / 1000, + (unsigned long) tsc_khz % 1000); + + if (!preset_lpj) { + do_div(lpj, HZ); + preset_lpj = lpj; + } + + vmware_tsc_khz = tsc_khz; x86_platform.calibrate_tsc = vmware_get_tsc_khz; + x86_platform.calibrate_cpu = vmware_get_tsc_khz; + #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ lapic_timer_frequency = ecx / HZ; @@ -94,6 +146,8 @@ static void __init vmware_platform_setup(void) } else { pr_warn("Failed to get TSC freq from the hypervisor\n"); } + + vmware_paravirt_ops_setup(); } /* diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index 5d3b45ad1c03..59628e4b5551 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -270,7 +270,6 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) return 0; } -EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); void intel_mid_pwr_power_off(void) {