From: tip-bot for Vitaly Kuznetsov <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, vkuznets@redhat.com,
haiyangz@microsoft.com, sthemmin@microsoft.com,
rostedt@goodmis.org, peterz@infradead.org, luto@kernel.org,
andy.shevchenko@gmail.com, torvalds@linux-foundation.org,
mingo@kernel.org, tglx@linutronix.de, hpa@zytor.com,
Jork.Loeser@microsoft.com, kys@microsoft.com,
sixiao@microsoft.com
Subject: [tip:x86/platform] x86/hyper-v: Use hypercall for remote TLB flush
Date: Thu, 10 Aug 2017 09:39:32 -0700 [thread overview]
Message-ID: <tip-88b46342eb037d35decda4d651cfee5216f4f822@git.kernel.org> (raw)
In-Reply-To: <20170802160921.21791-8-vkuznets@redhat.com>
Commit-ID: 88b46342eb037d35decda4d651cfee5216f4f822
Gitweb: http://git.kernel.org/tip/88b46342eb037d35decda4d651cfee5216f4f822
Author: Vitaly Kuznetsov <vkuznets@redhat.com>
AuthorDate: Wed, 2 Aug 2017 18:09:19 +0200
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 10 Aug 2017 16:50:23 +0200
x86/hyper-v: Use hypercall for remote TLB flush
Hyper-V host can suggest us to use hypercall for doing remote TLB flush,
this is supposed to work faster than IPIs.
Implementation details: to do HvFlushVirtualAddress{Space,List} hypercalls
we need to put the input somewhere in memory and we don't really want to
have memory allocation on each call so we pre-allocate per cpu memory areas
on boot.
pv_ops patching is happening very early so we need to separate
hyperv_setup_mmu_ops() and hyper_alloc_mmu().
It is possible and easy to implement local TLB flushing too and there is
even a hint for that. However, I don't see a room for optimization on the
host side as both hypercall and native tlb flush will result in vmexit. The
hint is also not set on modern Hyper-V versions.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jork Loeser <Jork.Loeser@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Simon Xiao <sixiao@microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: devel@linuxdriverproject.org
Link: http://lkml.kernel.org/r/20170802160921.21791-8-vkuznets@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/hv_init.c | 2 +
arch/x86/hyperv/mmu.c | 138 +++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/mshyperv.h | 3 +
arch/x86/include/uapi/asm/hyperv.h | 7 ++
arch/x86/kernel/cpu/mshyperv.c | 1 +
6 files changed, 152 insertions(+), 1 deletion(-)
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 171ae09..367a820 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1 @@
-obj-y := hv_init.o
+obj-y := hv_init.o mmu.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index e93b9a0..1a8eb55 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -140,6 +140,8 @@ void hyperv_init(void)
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hyper_alloc_mmu();
+
/*
* Register Hyper-V specific clocksource.
*/
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
new file mode 100644
index 0000000..9419a20
--- /dev/null
+++ b/arch/x86/hyperv/mmu.c
@@ -0,0 +1,138 @@
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
+#include <linux/hyperv.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/fpu/api.h>
+#include <asm/mshyperv.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_flush_pcpu {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+};
+
+/* Each gva in gva_list encodes up to 4096 pages to flush */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static struct hv_flush_pcpu __percpu *pcpu_flush;
+
+/*
+ * Fills in gva_list starting from offset. Returns the number of items added.
+ */
+static inline int fill_gva_list(u64 gva_list[], int offset,
+ unsigned long start, unsigned long end)
+{
+ int gva_n = offset;
+ unsigned long cur = start, diff;
+
+ do {
+ diff = end > cur ? end - cur : 0;
+
+ gva_list[gva_n] = cur & PAGE_MASK;
+ /*
+ * Lower 12 bits encode the number of additional
+ * pages to flush (in addition to the 'cur' page).
+ */
+ if (diff >= HV_TLB_FLUSH_UNIT)
+ gva_list[gva_n] |= ~PAGE_MASK;
+ else if (diff)
+ gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+
+ cur += HV_TLB_FLUSH_UNIT;
+ gva_n++;
+
+ } while (cur < end);
+
+ return gva_n - offset;
+}
+
+static void hyperv_flush_tlb_others(const struct cpumask *cpus,
+ const struct flush_tlb_info *info)
+{
+ int cpu, vcpu, gva_n, max_gvas;
+ struct hv_flush_pcpu *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ if (!pcpu_flush || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush = this_cpu_ptr(pcpu_flush);
+
+ if (info->mm) {
+ flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->processor_mask = 0;
+ if (cpumask_equal(cpus, cpu_present_mask)) {
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ } else {
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ if (vcpu >= 64)
+ goto do_native;
+
+ __set_bit(vcpu, (unsigned long *)
+ &flush->processor_mask);
+ }
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
+
+ if (info->end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else if (info->end &&
+ ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, 0,
+ info->start, info->end);
+ status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+ gva_n, 0, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, info);
+}
+
+void hyperv_setup_mmu_ops(void)
+{
+ if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) {
+ pr_info("Using hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ }
+}
+
+void hyper_alloc_mmu(void)
+{
+ if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)
+ pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+}
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index efd2f80..0d4b01c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -307,6 +307,8 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number)
}
void hyperv_init(void);
+void hyperv_setup_mmu_ops(void);
+void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
@@ -314,6 +316,7 @@ void hyperv_cleanup(void);
static inline void hyperv_init(void) {}
static inline bool hv_is_hypercall_page_setup(void) { return false; }
static inline void hyperv_cleanup(void) {}
+static inline void hyperv_setup_mmu_ops(void) {}
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 127ddad..a6fdd3b 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -242,6 +242,8 @@
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
/* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
@@ -259,6 +261,11 @@
#define HV_PROCESSOR_POWER_STATE_C2 2
#define HV_PROCESSOR_POWER_STATE_C3 3
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 70e717f..daefd67 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -249,6 +249,7 @@ static void __init ms_hyperv_init_platform(void)
* Setup the hook to get control post apic initialization.
*/
x86_platform.apic_post_init = hyperv_init;
+ hyperv_setup_mmu_ops();
#endif
}
next prev parent reply other threads:[~2017-08-10 16:44 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-02 16:09 [PATCH v10 0/9] Hyper-V: paravirtualized remote TLB flushing and hypercall improvements Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 1/9] x86/hyper-v: include hyperv/ only when CONFIG_HYPERV is set Vitaly Kuznetsov
2017-08-10 16:37 ` [tip:x86/platform] x86/hyper-v: Include " tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 2/9] x86/hyper-v: make hv_do_hypercall() inline Vitaly Kuznetsov
2017-08-10 16:37 ` [tip:x86/platform] x86/hyper-v: Make " tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 3/9] x86/hyper-v: fast hypercall implementation Vitaly Kuznetsov
2017-08-10 16:37 ` [tip:x86/platform] x86/hyper-v: Introduce " tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 4/9] hyper-v: use fast hypercall for HVCALL_SIGNAL_EVENT Vitaly Kuznetsov
2017-08-10 16:38 ` [tip:x86/platform] hyper-v: Use " tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 5/9] x86/hyper-v: implement rep hypercalls Vitaly Kuznetsov
2017-08-10 16:38 ` [tip:x86/platform] x86/hyper-v: Implement " tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 6/9] hyper-v: globalize vp_index Vitaly Kuznetsov
2017-08-10 16:39 ` [tip:x86/platform] hyper-v: Globalize vp_index tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 7/9] x86/hyper-v: use hypercall for remote TLB flush Vitaly Kuznetsov
2017-08-10 16:39 ` tip-bot for Vitaly Kuznetsov [this message]
2017-08-10 18:21 ` [tip:x86/platform] x86/hyper-v: Use " tip-bot for Vitaly Kuznetsov
2017-08-10 18:56 ` Peter Zijlstra
2017-08-10 18:59 ` KY Srinivasan
2017-08-10 19:08 ` Jork Loeser
2017-08-10 19:27 ` Peter Zijlstra
2017-08-11 1:15 ` Jork Loeser
2017-08-11 9:03 ` Peter Zijlstra
2017-08-11 11:29 ` Kirill A. Shutemov
2017-08-11 16:16 ` Linus Torvalds
2017-08-11 16:26 ` Peter Zijlstra
2017-08-14 13:20 ` Vitaly Kuznetsov
2017-08-16 16:42 ` Vitaly Kuznetsov
2017-08-16 21:41 ` Boris Ostrovsky
2017-08-17 7:58 ` Vitaly Kuznetsov
2017-08-11 9:23 ` Vitaly Kuznetsov
2017-08-11 10:56 ` Peter Zijlstra
2017-08-11 11:05 ` [Xen-devel] " Andrew Cooper
2017-08-11 12:07 ` Peter Zijlstra
2017-08-11 12:07 ` Peter Zijlstra
2017-08-16 0:02 ` [Xen-devel] " Steven Rostedt
2017-08-16 0:02 ` Steven Rostedt
2017-08-11 11:05 ` Andrew Cooper
2017-08-11 12:22 ` Juergen Gross
2017-08-11 12:22 ` Juergen Gross
2017-08-11 12:35 ` Peter Zijlstra
2017-08-11 12:46 ` Juergen Gross
2017-08-11 12:54 ` Peter Zijlstra
2017-08-11 13:07 ` Juergen Gross
2017-08-11 13:39 ` Peter Zijlstra
2017-08-11 13:39 ` Peter Zijlstra
2017-08-11 13:07 ` Juergen Gross
2017-08-11 12:54 ` Peter Zijlstra
2017-08-11 12:46 ` Juergen Gross
2017-08-11 12:35 ` Peter Zijlstra
2017-08-11 10:56 ` Peter Zijlstra
2017-08-02 16:09 ` [PATCH v10 8/9] x86/hyper-v: support extended CPU ranges for TLB flush hypercalls Vitaly Kuznetsov
2017-08-31 20:01 ` [tip:x86/platform] x86/hyper-v: Support " tip-bot for Vitaly Kuznetsov
2017-08-02 16:09 ` [PATCH v10 9/9] tracing/hyper-v: trace hyperv_mmu_flush_tlb_others() Vitaly Kuznetsov
2017-08-31 20:01 ` [tip:x86/platform] tracing/hyper-v: Trace hyperv_mmu_flush_tlb_others() tip-bot for Vitaly Kuznetsov
2017-08-10 11:58 ` [PATCH v10 0/9] Hyper-V: paravirtualized remote TLB flushing and hypercall improvements Vitaly Kuznetsov
2017-08-10 15:12 ` Ingo Molnar
2017-08-10 15:17 ` Vitaly Kuznetsov
2017-08-10 16:03 ` Ingo Molnar
2017-08-10 17:00 ` Vitaly Kuznetsov
2017-08-31 11:43 ` Vitaly Kuznetsov
2017-08-31 12:22 ` Ingo Molnar
2017-08-31 14:53 ` Vitaly Kuznetsov
2017-08-31 20:01 ` Ingo Molnar
2017-11-06 8:43 ` Wanpeng Li
2017-11-06 8:43 ` Wanpeng Li
2017-11-06 9:14 ` Vitaly Kuznetsov
2017-11-06 9:14 ` Vitaly Kuznetsov
2017-11-06 9:57 ` Wanpeng Li
2017-11-06 9:57 ` Wanpeng Li
2017-11-06 10:10 ` Vitaly Kuznetsov
2017-11-06 11:07 ` Wanpeng Li
2017-11-06 11:07 ` Wanpeng Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-88b46342eb037d35decda4d651cfee5216f4f822@git.kernel.org \
--to=tipbot@zytor.com \
--cc=Jork.Loeser@microsoft.com \
--cc=andy.shevchenko@gmail.com \
--cc=haiyangz@microsoft.com \
--cc=hpa@zytor.com \
--cc=kys@microsoft.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=sixiao@microsoft.com \
--cc=sthemmin@microsoft.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=vkuznets@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.