* [PATCH 0/2] Drivers: hv: Remove dependencies on guest page size
From: Himadri Pandya @ 2019-07-30 9:49 UTC (permalink / raw)
To: mikelley, kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa
Cc: x86, linux-hyperv, linux-kernel, Himadri Pandya
Hyper-V assumes page size to be 4KB. This might not be the case on ARM64
architecture. The first patch in this patchset introduces a hyer-v
specific function for allocating a zeroed page which can have a
different implementation on ARM64 to address the issue of different
guest and host page sizes. The second patch removes dependencies on
guest page size in vmbus by using hyper-v specific page symbol and
functions.
Himadri Pandya (2):
x86: hv: Add function to allocate zeroed page for Hyper-V
Drivers: hv: vmbus: Remove dependencies on guest page size
arch/x86/hyperv/hv_init.c | 8 ++++++++
arch/x86/include/asm/mshyperv.h | 1 +
drivers/hv/connection.c | 14 +++++++-------
drivers/hv/vmbus_drv.c | 6 +++---
4 files changed, 19 insertions(+), 10 deletions(-)
--
2.17.1
^ permalink raw reply
* [PATCH 1/2] x86: hv: Add function to allocate zeroed page for Hyper-V
From: Himadri Pandya @ 2019-07-30 9:49 UTC (permalink / raw)
To: mikelley, kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa
Cc: x86, linux-hyperv, linux-kernel, Himadri Pandya
In-Reply-To: <20190730094944.96007-1-himadri18.07@gmail.com>
Hyper-V assumes page size to be 4K. While this assumption holds true on
x86 architecture, it might not be true for ARM64 architecture. Hence
define hyper-v specific function to allocate a zeroed page which can
have a different implementation on ARM64 architecture to handle the
conflict between hyper-v's assumed page size and actual guest page size.
Signed-off-by: Himadri Pandya <himadri18.07@gmail.com>
---
arch/x86/hyperv/hv_init.c | 8 ++++++++
arch/x86/include/asm/mshyperv.h | 1 +
2 files changed, 9 insertions(+)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index d314cf1e15fd..2d0b9b2bddf7 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -45,6 +45,14 @@ void *hv_alloc_hyperv_page(void)
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
+void *hv_alloc_hyperv_zeroed_page(void)
+{
+ BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
+
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
+
void hv_free_hyperv_page(unsigned long addr)
{
free_page(addr);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f4138aeb4280..6b79515abb82 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -219,6 +219,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
void *hv_alloc_hyperv_page(void);
+void *hv_alloc_hyperv_zeroed_page(void);
void hv_free_hyperv_page(unsigned long addr);
void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
--
2.17.1
^ permalink raw reply related
* [PATCH 2/2] Drivers: hv: vmbus: Remove dependencies on guest page size
From: Himadri Pandya @ 2019-07-30 9:49 UTC (permalink / raw)
To: mikelley, kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa
Cc: x86, linux-hyperv, linux-kernel, Himadri Pandya
In-Reply-To: <20190730094944.96007-1-himadri18.07@gmail.com>
Hyper-V assumes page size to be 4K. This might not be the case for ARM64
architecture. Hence use hyper-v page size and page allocation function
to avoid conflicts between different host and guest page size on ARM64.
Signed-off-by: Himadri Pandya <himadri18.07@gmail.com>
---
drivers/hv/connection.c | 14 +++++++-------
drivers/hv/vmbus_drv.c | 6 +++---
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e15d4a0..dcb8f6a8c08c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -202,7 +202,7 @@ int vmbus_connect(void)
* abstraction stuff
*/
vmbus_connection.int_page =
- (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
+ (void *)hv_alloc_hyperv_zeroed_page();
if (vmbus_connection.int_page == NULL) {
ret = -ENOMEM;
goto cleanup;
@@ -211,14 +211,14 @@ int vmbus_connect(void)
vmbus_connection.recv_int_page = vmbus_connection.int_page;
vmbus_connection.send_int_page =
(void *)((unsigned long)vmbus_connection.int_page +
- (PAGE_SIZE >> 1));
+ (HV_HYP_PAGE_SIZE >> 1));
/*
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
- vmbus_connection.monitor_pages[0] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0);
- vmbus_connection.monitor_pages[1] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0);
+ vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page();
+ vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page();
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
@@ -291,12 +291,12 @@ void vmbus_disconnect(void)
destroy_workqueue(vmbus_connection.work_queue);
if (vmbus_connection.int_page) {
- free_pages((unsigned long)vmbus_connection.int_page, 0);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.int_page);
vmbus_connection.int_page = NULL;
}
- free_pages((unsigned long)vmbus_connection.monitor_pages[0], 0);
- free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages[1] = NULL;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc35290..2ee388a23c8f 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1186,7 +1186,7 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
- kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE,
+ kmsg_dump_get_buffer(dumper, true, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (bytes_written)
hyperv_report_panic_msg(panic_pa, bytes_written);
@@ -1290,7 +1290,7 @@ static int vmbus_bus_init(void)
*/
hv_get_crash_ctl(hyperv_crash_ctl);
if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) {
- hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL);
+ hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
if (hv_panic_page) {
ret = kmsg_dump_register(&hv_kmsg_dumper);
if (ret)
@@ -1319,7 +1319,7 @@ static int vmbus_bus_init(void)
hv_remove_vmbus_irq();
bus_unregister(&hv_bus);
- free_page((unsigned long)hv_panic_page);
+ hv_free_hyperv_page((unsigned long)hv_panic_page);
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
return ret;
--
2.17.1
^ permalink raw reply related
* Re: [PATCH 0/2] clocksource/Hyper-V: Add Hyper-V specific sched clock function
From: Tianyu Lan @ 2019-07-30 13:41 UTC (permalink / raw)
To: Vitaly Kuznetsov
Cc: Peter Zijlstra, Tianyu Lan, linux-arch, linux-hyperv,
linux-kernel@vger kernel org, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, KY Srinivasan, Haiyang Zhang,
Stephen Hemminger, Sasha Levin, Daniel Lezcano, Arnd Bergmann,
michael.h.kelley, ashal
In-Reply-To: <87wog1kpib.fsf@vitty.brq.redhat.com>
Hi Vitaly & Peter:
Thanks for your review.
On Mon, Jul 29, 2019 at 8:13 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Peter Zijlstra <peterz@infradead.org> writes:
>
> > On Mon, Jul 29, 2019 at 12:59:26PM +0200, Vitaly Kuznetsov wrote:
> >> lantianyu1986@gmail.com writes:
> >>
> >> > From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> >> >
> >> > Hyper-V guests use the default native_sched_clock() in pv_ops.time.sched_clock
> >> > on x86. But native_sched_clock() directly uses the raw TSC value, which
> >> > can be discontinuous in a Hyper-V VM. Add the generic hv_setup_sched_clock()
> >> > to set the sched clock function appropriately. On x86, this sets
> >> > pv_ops.time.sched_clock to read the Hyper-V reference TSC value that is
> >> > scaled and adjusted to be continuous.
> >>
> >> Hypervisor can, in theory, disable TSC page and then we're forced to use
> >> MSR-based clocksource but using it as sched_clock() can be very slow,
> >> I'm afraid.
> >>
> >> On the other hand, what we have now is probably worse: TSC can,
> >> actually, jump backwards (e.g. on migration) and we're breaking the
> >> requirements for sched_clock().
> >
> > That (obviously) also breaks the requirements for using TSC as
> > clocksource.
> >
> > IOW, it breaks the entire purpose of having TSC in the first place.
>
> Currently, we mark raw TSC as unstable when running on Hyper-V (see
> 88c9281a9fba6), 'TSC page' (which is TSC * scale + offset) is being used
> instead. The problem is that 'TSC page' can be disabled by the
> hypervisor and in that case the only remaining clocksource is MSR-based
> (slow).
>
Yes, that will be slow if Hyper-V doesn't expose hv tsc page and
kernel uses MSR based
clocksource. Each MSR read will trigger one VM-EXIT. This also happens on other
hypervisors (e,g, KVM doesn't expose KVM clock). Hypervisor should
take this into
account and determine which clocksource should be exposed or not.
--
Best regards
Tianyu Lan
^ permalink raw reply
* RE: [PATCH v2] x86/hyper-v: Zero out the VP ASSIST PAGE to fix CPU offlining
From: Michael Kelley @ 2019-07-30 20:22 UTC (permalink / raw)
To: Dexuan Cui, Thomas Gleixner, vkuznets, Haiyang Zhang,
KY Srinivasan, Stephen Hemminger, Sasha Levin,
linux-hyperv@vger.kernel.org, Long Li, x86@kernel.org
Cc: Ingo Molnar, Borislav Petkov, H. Peter Anvin, jasowang@redhat.com,
driverdev-devel@linuxdriverproject.org,
linux-kernel@vger.kernel.org, apw@canonical.com,
marcelo.cerri@canonical.com, olaf@aepfle.de
In-Reply-To: <PU1P153MB0169B716A637FABF07433C04BFCB0@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>
From: Dexuan Cui <decui@microsoft.com> Sent: Thursday, July 18, 2019 8:23 PM
>
> The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
> 5.2.1 "GPA Overlay Pages" for the details) and here is an excerpt:
>
> "
> The hypervisor defines several special pages that "overlay" the guest's
> Guest Physical Addresses (GPA) space. Overlays are addressed GPA but are
> not included in the normal GPA map maintained internally by the hypervisor.
> Conceptually, they exist in a separate map that overlays the GPA map.
>
> If a page within the GPA space is overlaid, any SPA page mapped to the
> GPA page is effectively "obscured" and generally unreachable by the
> virtual processor through processor memory accesses.
>
> If an overlay page is disabled, the underlying GPA page is "uncovered",
> and an existing mapping becomes accessible to the guest.
> "
>
> SPA = System Physical Address = the final real physical address.
>
> When a CPU (e.g. CPU1) is being onlined, in hv_cpu_init(), we allocate the
> VP ASSIST PAGE and enable the EOI optimization for this CPU by writing the
> MSR HV_X64_MSR_VP_ASSIST_PAGE. From now on, hvp->apic_assist belongs to the
> special SPA page, and this CPU *always* uses hvp->apic_assist (which is
> shared with the hypervisor) to decide if it needs to write the EOI MSR.
>
> When a CPU (e.g. CPU1) is being offlined, on this CPU, we do:
> 1. in hv_cpu_die(), we disable the EOI optimizaton for this CPU, and from
> now on hvp->apic_assist belongs to the original "normal" SPA page;
> 2. we finish the remaining work of stopping this CPU;
> 3. this CPU is completely stopped.
>
> Between 1 and 3, this CPU can still receive interrupts (e.g. reschedule
> IPIs from CPU0, and Local APIC timer interrupts), and this CPU *must* write
> the EOI MSR for every interrupt received, otherwise the hypervisor may not
> deliver further interrupts, which may be needed to completely stop the CPU.
>
> So, after we disable the EOI optimization in hv_cpu_die(), we need to make
> sure hvp->apic_assist's bit0 is zero. The easiest way is we just zero out
> the page when it's allocated in hv_cpu_init().
>
> Note 1: after the "normal" SPA page is allocted and zeroed out, neither the
> hypervisor nor the guest writes into the page, so the page remains with
> zeros.
>
> Note 2: see Section 10.3.5 "EOI Assist" for the details of the EOI
> optimization. When the optimization is enabled, the guest can still write
> the EOI MSR register irrespective of the "No EOI required" value, though
> by doing so we can't benefit from the optimization.
>
> Fixes: ba696429d290 ("x86/hyper-v: Implement EOI assist")
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>
> v2: there is no code change. I just improved the comment and the changelog
> according to the discussion with tglx:
>
> https://lkml.org/lkml/2019/7/17/781
> https://lkml.org/lkml/2019/7/18/91
>
> arch/x86/hyperv/hv_init.c | 10 +++++++++-
> 1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 0e033ef11a9f..d26832cb38bb 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -60,8 +60,16 @@ static int hv_cpu_init(unsigned int cpu)
> if (!hv_vp_assist_page)
> return 0;
>
> + /*
> + * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
> + * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
> + * we always write the EOI MSR in hv_apic_eoi_write() *after* the
> + * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
> + * not be stopped in the case of CPU offlining and the VM will hang.
> + */
> if (!*hvp)
> - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
> + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
> + PAGE_KERNEL);
>
> if (*hvp) {
> u64 val;
> --
> 2.19.1
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
^ permalink raw reply
* RE: [PATCH 1/7] x86/hyper-v: Suspend/resume the hypercall page for hibernation
From: Michael Kelley @ 2019-07-30 22:18 UTC (permalink / raw)
To: Dexuan Cui, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1562650084-99874-2-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:29 PM
>
> This is needed for hibernation, e.g. when we resume the old kernel, we need
> to disable the "current" kernel's hypercall page and then resume the old
> kernel's.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> arch/x86/hyperv/hv_init.c | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 0e033ef..3005871 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -20,6 +20,7 @@
> #include <linux/hyperv.h>
> #include <linux/slab.h>
> #include <linux/cpuhotplug.h>
> +#include <linux/syscore_ops.h>
> #include <clocksource/hyperv_timer.h>
>
> void *hv_hypercall_pg;
> @@ -214,6 +215,34 @@ static int __init hv_pci_init(void)
> return 1;
> }
>
> +static int hv_suspend(void)
> +{
> + union hv_x64_msr_hypercall_contents hypercall_msr;
> +
> + /* Reset the hypercall page */
> + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> + hypercall_msr.enable = 0;
> + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> +
> + return 0;
> +}
> +
> +static void hv_resume(void)
> +{
> + union hv_x64_msr_hypercall_contents hypercall_msr;
> +
> + /* Re-enable the hypercall page */
> + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> + hypercall_msr.enable = 1;
> + hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
> + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> +}
> +
> +static struct syscore_ops hv_syscore_ops = {
> + .suspend = hv_suspend,
> + .resume = hv_resume,
> +};
> +
> /*
> * This function is to be invoked early in the boot sequence after the
> * hypervisor has been detected.
> @@ -294,6 +323,9 @@ void __init hyperv_init(void)
>
> /* Register Hyper-V specific clocksource */
> hv_init_clocksource();
> +
> + register_syscore_ops(&hv_syscore_ops);
> +
> return;
>
> remove_cpuhp_state:
> @@ -313,6 +345,8 @@ void hyperv_cleanup(void)
> {
> union hv_x64_msr_hypercall_contents hypercall_msr;
>
> + unregister_syscore_ops(&hv_syscore_ops);
> +
> /* Reset our OS id */
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
>
> --
> 1.8.3.1
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
^ permalink raw reply
* RE: [PATCH 2/7] clocksource/drivers: Suspend/resume Hyper-V clocksource for hibernation
From: Michael Kelley @ 2019-07-30 22:23 UTC (permalink / raw)
To: Dexuan Cui, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1562650084-99874-3-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:29 PM
>
> This is needed for hibernation, e.g. when we resume the old kernel, we need
> to disable the "current" kernel's TSC page and then resume the old kernel's.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> drivers/clocksource/hyperv_timer.c | 25 +++++++++++++++++++++++++
> 1 file changed, 25 insertions(+)
>
> diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
> index ba2c79e6..41c31a7 100644
> --- a/drivers/clocksource/hyperv_timer.c
> +++ b/drivers/clocksource/hyperv_timer.c
> @@ -237,12 +237,37 @@ static u64 read_hv_clock_tsc(struct clocksource *arg)
> return read_hv_sched_clock_tsc();
> }
>
> +static void suspend_hv_clock_tsc(struct clocksource *arg)
> +{
> + u64 tsc_msr;
> +
> + /* Disable the TSC page */
> + hv_get_reference_tsc(tsc_msr);
> + tsc_msr &= ~BIT_ULL(0);
> + hv_set_reference_tsc(tsc_msr);
> +}
> +
> +
> +static void resume_hv_clock_tsc(struct clocksource *arg)
> +{
> + phys_addr_t phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
> + u64 tsc_msr;
> +
> + /* Re-enable the TSC page */
> + hv_get_reference_tsc(tsc_msr);
> + tsc_msr &= GENMASK_ULL(11, 0);
> + tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
> + hv_set_reference_tsc(tsc_msr);
> +}
> +
> static struct clocksource hyperv_cs_tsc = {
> .name = "hyperv_clocksource_tsc_page",
> .rating = 400,
> .read = read_hv_clock_tsc,
> .mask = CLOCKSOURCE_MASK(64),
> .flags = CLOCK_SOURCE_IS_CONTINUOUS,
> + .suspend= suspend_hv_clock_tsc,
> + .resume = resume_hv_clock_tsc,
> };
> #endif
>
> --
> 1.8.3.1
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
^ permalink raw reply
* Re: [PATCH] hv: Use the correct style for SPDX License Identifier
From: Sasha Levin @ 2019-07-30 22:31 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: Nishad Kamdar, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
Joe Perches, Uwe Kleine-König, linux-hyperv, linux-kernel
In-Reply-To: <20190722140809.GA29862@kroah.com>
On Mon, Jul 22, 2019 at 04:08:09PM +0200, Greg Kroah-Hartman wrote:
>On Mon, Jul 22, 2019 at 07:01:17PM +0530, Nishad Kamdar wrote:
>> This patch corrects the SPDX License Identifier style
>> in the trace header file related to Microsoft Hyper-V
>> client drivers.
>> For C header files Documentation/process/license-rules.rst
>> mandates C-like comments (opposed to C source files where
>> C++ style should be used)
>>
>> Changes made by using a script provided by Joe Perches here:
>> https://lkml.org/lkml/2019/2/7/46
>>
>> Suggested-by: Joe Perches <joe@perches.com>
>> Signed-off-by: Nishad Kamdar <nishadkamdar@gmail.com>
>
>Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Queued up for hyperv-fixes, thanks!
--
Thanks,
Sasha
^ permalink raw reply
* RE: [PATCH 3/7] Drivers: hv: vmbus: Split hv_synic_init/cleanup into regs and timer settings
From: Michael Kelley @ 2019-07-30 22:35 UTC (permalink / raw)
To: Dexuan Cui, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1562650084-99874-4-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:29 PM
>
> There is only one functional change: the unnecessary check
> "if (sctrl.enable != 1) return -EFAULT;" is removed, because when we're in
> hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1.
>
> The new functions hv_synic_disable/enable_regs() will be used by a later patch
> to support hibernation.
Seems like this commit message doesn't really describe the main change.
How about:
Break out synic enable and disable operations into separate
hv_synic_disable_regs() and hv_synic_enable_regs() functions for use by a
later patch to support hibernation.
There is no functional change except the unnecessary check
"if (sctrl.enable != 1) return -EFAULT;" is removed, because when we're in
hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1.
Otherwise,
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> drivers/hv/hv.c | 66 ++++++++++++++++++++++++++---------------------
> drivers/hv/hyperv_vmbus.h | 2 ++
> 2 files changed, 39 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 6188fb7..fcc5279 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -154,7 +154,7 @@ void hv_synic_free(void)
> * retrieve the initialized message and event pages. Otherwise, we create and
> * initialize the message and event pages.
> */
> -int hv_synic_init(unsigned int cpu)
> +void hv_synic_enable_regs(unsigned int cpu)
> {
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
> @@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
> sctrl.enable = 1;
>
> hv_set_synic_state(sctrl.as_uint64);
> +}
> +
> +int hv_synic_init(unsigned int cpu)
> +{
> + hv_synic_enable_regs(cpu);
>
> hv_stimer_init(cpu);
>
> @@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
> /*
> * hv_synic_cleanup - Cleanup routine for hv_synic_init().
> */
> -int hv_synic_cleanup(unsigned int cpu)
> +void hv_synic_disable_regs(unsigned int cpu)
> {
> union hv_synic_sint shared_sint;
> union hv_synic_simp simp;
> union hv_synic_siefp siefp;
> union hv_synic_scontrol sctrl;
> +
> + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> +
> + shared_sint.masked = 1;
> +
> + /* Need to correctly cleanup in the case of SMP!!! */
> + /* Disable the interrupt */
> + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> +
> + hv_get_simp(simp.as_uint64);
> + simp.simp_enabled = 0;
> + simp.base_simp_gpa = 0;
> +
> + hv_set_simp(simp.as_uint64);
> +
> + hv_get_siefp(siefp.as_uint64);
> + siefp.siefp_enabled = 0;
> + siefp.base_siefp_gpa = 0;
> +
> + hv_set_siefp(siefp.as_uint64);
> +
> + /* Disable the global synic bit */
> + hv_get_synic_state(sctrl.as_uint64);
> + sctrl.enable = 0;
> + hv_set_synic_state(sctrl.as_uint64);
> +}
> +
> +int hv_synic_cleanup(unsigned int cpu)
> +{
> struct vmbus_channel *channel, *sc;
> bool channel_found = false;
> unsigned long flags;
>
> - hv_get_synic_state(sctrl.as_uint64);
> - if (sctrl.enable != 1)
> - return -EFAULT;
> -
> /*
> * Search for channels which are bound to the CPU we're about to
> * cleanup. In case we find one and vmbus is still connected we need to
> @@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
>
> hv_stimer_cleanup(cpu);
>
> - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> -
> - shared_sint.masked = 1;
> -
> - /* Need to correctly cleanup in the case of SMP!!! */
> - /* Disable the interrupt */
> - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> -
> - hv_get_simp(simp.as_uint64);
> - simp.simp_enabled = 0;
> - simp.base_simp_gpa = 0;
> -
> - hv_set_simp(simp.as_uint64);
> -
> - hv_get_siefp(siefp.as_uint64);
> - siefp.siefp_enabled = 0;
> - siefp.base_siefp_gpa = 0;
> -
> - hv_set_siefp(siefp.as_uint64);
> -
> - /* Disable the global synic bit */
> - sctrl.enable = 0;
> - hv_set_synic_state(sctrl.as_uint64);
> + hv_synic_disable_regs(cpu);
>
> return 0;
> }
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index 362e70e..26ea161 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -171,8 +171,10 @@ extern int hv_post_message(union hv_connection_id
> connection_id,
>
> extern void hv_synic_free(void);
>
> +extern void hv_synic_enable_regs(unsigned int cpu);
> extern int hv_synic_init(unsigned int cpu);
>
> +extern void hv_synic_disable_regs(unsigned int cpu);
> extern int hv_synic_cleanup(unsigned int cpu);
>
> /* Interface */
> --
> 1.8.3.1
^ permalink raw reply
* Re: [PATCH 1/2] hv_balloon: Use a static page for the balloon_up send buffer
From: Sasha Levin @ 2019-07-30 22:36 UTC (permalink / raw)
To: Dexuan Cui
Cc: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, Haiyang Zhang, KY Srinivasan,
linux-kernel@vger.kernel.org, Michael Kelley, Tianyu Lan,
olaf@aepfle.de, apw@canonical.com, jasowang@redhat.com, vkuznets,
marcelo.cerri@canonical.com
In-Reply-To: <1560537692-37400-1-git-send-email-decui@microsoft.com>
On Fri, Jun 14, 2019 at 06:42:17PM +0000, Dexuan Cui wrote:
>It's unnecessary to dynamically allocate the buffer.
>
>Signed-off-by: Dexuan Cui <decui@microsoft.com>
I've queued these up for hyperv-next, thanks!
--
Thanks,
Sasha
^ permalink raw reply
* RE: [PATCH 5/7] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Michael Kelley @ 2019-07-30 23:07 UTC (permalink / raw)
To: Dexuan Cui, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1562650084-99874-6-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:29 PM
>
> When the VM resumes, the host re-sends the offers. We should not add the
> offers to the global vmbus_connection.chn_list again.
>
> Added some debug code, in case the host screws up the exact info related to
> the offers.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> drivers/hv/channel_mgmt.c | 28 +++++++++++++++++++++++++++-
> 1 file changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index addcef5..a9aeeab 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -854,12 +854,38 @@ void vmbus_initiate_unload(bool crash)
> static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
> {
> struct vmbus_channel_offer_channel *offer;
> - struct vmbus_channel *newchannel;
> + struct vmbus_channel *oldchannel, *newchannel;
> + size_t offer_sz;
>
> offer = (struct vmbus_channel_offer_channel *)hdr;
>
> trace_vmbus_onoffer(offer);
>
> + mutex_lock(&vmbus_connection.channel_mutex);
> + oldchannel = relid2channel(offer->child_relid);
> + mutex_unlock(&vmbus_connection.channel_mutex);
> +
> + if (oldchannel != NULL) {
> + atomic_dec(&vmbus_connection.offer_in_progress);
> +
> + /*
> + * We're resuming from hibernation: we expect the host to send
> + * exactly the same offers that we had before the hibernation.
> + */
> + offer_sz = sizeof(*offer);
> + if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
> + return;
The offermsg contains "reserved" and "padding" fields. Does Hyper-V
guarantee that all these fields are the same in the new offer after resuming
from hibernation? Or should a less stringent check be made? For example,
I could imagine a newer version of Hyper-V allowing a VM that was
hibernated on an older version to be resumed. But one of the reserved fields
might be used in the newer version, and the comparison could fail
unnecessarily.
> +
> + pr_err("Mismatched offer from the host (relid=%d)!\n",
> + offer->child_relid);
> +
> + print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET, 4,
> + 4, &oldchannel->offermsg, offer_sz, false);
> + print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET, 4,
> + 4, offer, offer_sz, false);
The third argument to print_hex_dump() is the rowsize and is specified as must
be 16 or 32.
> + return;
> + }
> +
> /* Allocate the channel object and save this offer. */
> newchannel = alloc_channel();
> if (!newchannel) {
> --
> 1.8.3.1
^ permalink raw reply
* RE: [PATCH 3/7] Drivers: hv: vmbus: Split hv_synic_init/cleanup into regs and timer settings
From: Dexuan Cui @ 2019-07-30 23:18 UTC (permalink / raw)
To: Michael Kelley, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <MWHPR21MB0784D7CFED4961C5B3D310B4D7DC0@MWHPR21MB0784.namprd21.prod.outlook.com>
> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Tuesday, July 30, 2019 3:36 PM
>
> From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:29
> PM
> >
> > There is only one functional change: the unnecessary check
> > "if (sctrl.enable != 1) return -EFAULT;" is removed, because when we're in
> > hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1.
> >
> > The new functions hv_synic_disable/enable_regs() will be used by a later
> patch
> > to support hibernation.
>
> Seems like this commit message doesn't really describe the main change.
> How about:
>
> Break out synic enable and disable operations into separate
> hv_synic_disable_regs() and hv_synic_enable_regs() functions for use by a
> later patch to support hibernation.
>
> There is no functional change except the unnecessary check
> "if (sctrl.enable != 1) return -EFAULT;" is removed, because when we're in
> hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1.
>
> Otherwise,
>
> Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Thanks! I'll use your version as the changelog of v2. I'll change the
Subject accordingly.
Thanks,
-- Dexuan
^ permalink raw reply
* RE: [PATCH 6/7] Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
From: Michael Kelley @ 2019-07-30 23:25 UTC (permalink / raw)
To: Dexuan Cui, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1562650084-99874-7-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:30 PM
>
> This is needed when we resume the old kernel from the "current" kernel.
Perhaps a bit more descriptive commit message could be supplied?
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> drivers/hv/connection.c | 3 +--
> drivers/hv/hyperv_vmbus.h | 2 ++
> drivers/hv/vmbus_drv.c | 40 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 43 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 09829e1..806319c 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -59,8 +59,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
> }
> }
>
> -static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
> - __u32 version)
> +int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
> {
> int ret = 0;
> unsigned int cur_cpu;
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index 26ea161..e657197 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -274,6 +274,8 @@ struct vmbus_msginfo {
>
> extern struct vmbus_connection vmbus_connection;
>
> +int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
> +
> static inline void vmbus_send_interrupt(u32 relid)
> {
> sync_set_bit(relid, vmbus_connection.send_int_page);
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 1c2d935..1730e7b 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -2045,6 +2045,41 @@ static int vmbus_acpi_add(struct acpi_device *device)
> return ret_val;
> }
>
> +static int vmbus_bus_suspend(struct device *dev)
> +{
> + vmbus_initiate_unload(false);
> +
> + vmbus_connection.conn_state = DISCONNECTED;
> +
> + return 0;
> +}
> +
> +static int vmbus_bus_resume(struct device *dev)
> +{
> + struct vmbus_channel_msginfo *msginfo;
> + size_t msgsize;
> + int ret;
> +
> + msgsize = sizeof(*msginfo) +
> + sizeof(struct vmbus_channel_initiate_contact);
> +
> + msginfo = kzalloc(msgsize, GFP_KERNEL);
> +
> + if (msginfo == NULL)
> + return -ENOMEM;
> +
> + ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
I think this code answers my earlier question: Upon resume, we negotiate the same
VMbus protocol version we had at time of hibernation, even if running on a newer
version of Hyper-V that might support newer protocol versions. Hence the offer
messages should not have any previously reserved fields now being used. A
comment to this effect would be useful.
> +
> + kfree(msginfo);
> +
> + if (ret != 0)
> + return ret;
> +
> + vmbus_request_offers();
> +
> + return 0;
> +}
> +
> static const struct acpi_device_id vmbus_acpi_device_ids[] = {
> {"VMBUS", 0},
> {"VMBus", 0},
> @@ -2052,6 +2087,10 @@ static int vmbus_acpi_add(struct acpi_device *device)
> };
> MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
>
> +static const struct dev_pm_ops vmbus_bus_pm = {
> + SET_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
> +};
> +
> static struct acpi_driver vmbus_acpi_driver = {
> .name = "vmbus",
> .ids = vmbus_acpi_device_ids,
> @@ -2059,6 +2098,7 @@ static int vmbus_acpi_add(struct acpi_device *device)
> .add = vmbus_acpi_add,
> .remove = vmbus_acpi_remove,
> },
> + .drv.pm = &vmbus_bus_pm,
> };
>
> static void hv_kexec_handler(void)
> --
> 1.8.3.1
^ permalink raw reply
* RE: [PATCH 7/7] Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
From: Michael Kelley @ 2019-07-30 23:38 UTC (permalink / raw)
To: Dexuan Cui, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1562650084-99874-8-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:30 PM
>
> The high-level VSC drivers will implement device-specific callbacks.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> drivers/hv/vmbus_drv.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> include/linux/hyperv.h | 3 +++
> 2 files changed, 45 insertions(+)
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 1730e7b..e29e2171 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device)
> drv->shutdown(dev);
> }
>
> +/*
> + * vmbus_suspend - Suspend a vmbus device
> + */
> +static int vmbus_suspend(struct device *child_device)
> +{
> + struct hv_driver *drv;
> + struct hv_device *dev = device_to_hv_device(child_device);
> +
> + /* The device may not be attached yet */
> + if (!child_device->driver)
> + return 0;
> +
> + drv = drv_to_hv_drv(child_device->driver);
> + if (!drv->suspend)
> + return -EOPNOTSUPP;
> +
> + return drv->suspend(dev);
> +}
> +
> +/*
> + * vmbus_resume - Resume a vmbus device
> + */
> +static int vmbus_resume(struct device *child_device)
> +{
> + struct hv_driver *drv;
> + struct hv_device *dev = device_to_hv_device(child_device);
> +
> + /* The device may not be attached yet */
> + if (!child_device->driver)
> + return 0;
> +
> + drv = drv_to_hv_drv(child_device->driver);
> + if (!drv->resume)
> + return -EOPNOTSUPP;
> +
> + return drv->resume(dev);
> +}
>
> /*
> * vmbus_device_release - Final callback release of the vmbus child device
> @@ -926,6 +963,10 @@ static void vmbus_device_release(struct device *device)
> kfree(hv_dev);
> }
>
> +static const struct dev_pm_ops vmbus_pm = {
> + SET_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
> +};
> +
> /* The one and only one */
> static struct bus_type hv_bus = {
> .name = "vmbus",
> @@ -936,6 +977,7 @@ static void vmbus_device_release(struct device *device)
> .uevent = vmbus_uevent,
> .dev_groups = vmbus_dev_groups,
> .drv_groups = vmbus_drv_groups,
> + .pm = &vmbus_pm,
> };
>
> struct onmessage_work_context {
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 6256cc3..94443c4 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1149,6 +1149,9 @@ struct hv_driver {
> int (*remove)(struct hv_device *);
> void (*shutdown)(struct hv_device *);
>
> + int (*suspend)(struct hv_device *);
> + int (*resume)(struct hv_device *);
> +
> };
>
> /* Base device object */
> --
> 1.8.3.1
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
^ permalink raw reply
* RE: [PATCH 5/7] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Dexuan Cui @ 2019-07-31 0:01 UTC (permalink / raw)
To: Michael Kelley, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <MWHPR21MB078481A7C7F65E0297135D41D7DC0@MWHPR21MB0784.namprd21.prod.outlook.com>
> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Tuesday, July 30, 2019 4:07 PM
> > +
> > + if (oldchannel != NULL) {
> > + atomic_dec(&vmbus_connection.offer_in_progress);
> > +
> > + /*
> > + * We're resuming from hibernation: we expect the host to send
> > + * exactly the same offers that we had before the hibernation.
> > + */
> > + offer_sz = sizeof(*offer);
> > + if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
> > + return;
>
> The offermsg contains "reserved" and "padding" fields. Does Hyper-V
> guarantee that all these fields are the same in the new offer after resuming
> from hibernation?
Yes. I confirmed this with Hyper-V team. The reserved/padding fields don't change
across hiberantion. BTW, the fields are filled with zeros since they're not used.
> Or should a less stringent check be made? For example,
> I could imagine a newer version of Hyper-V allowing a VM that was
> hibernated on an older version to be resumed. But one of the reserved fields
> might be used in the newer version, and the comparison could fail
> unnecessarily.
Upon resume, Linux VM always uses the same version, which was used when the
VM firstly booted up before suspend, to re-negotiate with the host.
> > +
> > + pr_err("Mismatched offer from the host (relid=%d)!\n",
> > + offer->child_relid);
> > +
> > + print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
> 4,
> > + 4, &oldchannel->offermsg, offer_sz, false);
> > + print_hex_dump_debug("New vmbus offer: ",
> DUMP_PREFIX_OFFSET, 4,
> > + 4, offer, offer_sz, false);
>
> The third argument to print_hex_dump() is the rowsize and is specified as must
> be 16 or 32.
Thanks! I misunderstood the argument. I'll change it to 16.
Thanks,
-- Dexuan
^ permalink raw reply
* RE: [PATCH v3 4/9] x86/mm/tlb: Flush remote and local TLBs concurrently
From: Michael Kelley @ 2019-07-31 0:13 UTC (permalink / raw)
To: Nadav Amit, Andy Lutomirski, Dave Hansen
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Peter Zijlstra,
Thomas Gleixner, Ingo Molnar, KY Srinivasan, Haiyang Zhang,
Stephen Hemminger, Sasha Levin, Borislav Petkov, Juergen Gross,
Paolo Bonzini, Boris Ostrovsky, linux-hyperv@vger.kernel.org,
virtualization@lists.linux-foundation.org, kvm@vger.kernel.org,
xen-devel@lists.xenproject.org
In-Reply-To: <20190719005837.4150-5-namit@vmware.com>
From: Nadav Amit <namit@vmware.com> Sent: Thursday, July 18, 2019 5:59 PM
>
> To improve TLB shootdown performance, flush the remote and local TLBs
> concurrently. Introduce flush_tlb_multi() that does so. Introduce
> paravirtual versions of flush_tlb_multi() for KVM, Xen and hyper-v (Xen
> and hyper-v are only compile-tested).
>
> While the updated smp infrastructure is capable of running a function on
> a single local core, it is not optimized for this case. The multiple
> function calls and the indirect branch introduce some overhead, and
> might make local TLB flushes slower than they were before the recent
> changes.
>
> Before calling the SMP infrastructure, check if only a local TLB flush
> is needed to restore the lost performance in this common case. This
> requires to check mm_cpumask() one more time, but unless this mask is
> updated very frequently, this should impact performance negatively.
>
> Cc: "K. Y. Srinivasan" <kys@microsoft.com>
> Cc: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: Sasha Levin <sashal@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: x86@kernel.org
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: linux-hyperv@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: virtualization@lists.linux-foundation.org
> Cc: kvm@vger.kernel.org
> Cc: xen-devel@lists.xenproject.org
> Signed-off-by: Nadav Amit <namit@vmware.com>
> ---
> arch/x86/hyperv/mmu.c | 10 +++---
> arch/x86/include/asm/paravirt.h | 6 ++--
> arch/x86/include/asm/paravirt_types.h | 4 +--
> arch/x86/include/asm/tlbflush.h | 8 ++---
> arch/x86/include/asm/trace/hyperv.h | 2 +-
> arch/x86/kernel/kvm.c | 11 +++++--
> arch/x86/kernel/paravirt.c | 2 +-
> arch/x86/mm/tlb.c | 47 ++++++++++++++++++---------
> arch/x86/xen/mmu_pv.c | 11 +++----
> include/trace/events/xen.h | 2 +-
> 10 files changed, 62 insertions(+), 41 deletions(-)
>
For the Hyper-V parts --
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
^ permalink raw reply
* RE: [PATCH 6/7] Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
From: Dexuan Cui @ 2019-07-31 0:16 UTC (permalink / raw)
To: Michael Kelley, linux-hyperv@vger.kernel.org,
gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org
In-Reply-To: <MWHPR21MB078461F2DB6FE0A6D0647B70D7DC0@MWHPR21MB0784.namprd21.prod.outlook.com>
> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Tuesday, July 30, 2019 4:26 PM
> From: Dexuan Cui <decui@microsoft.com> Sent: Monday, July 8, 2019 10:30
> PM
> >
> > This is needed when we resume the old kernel from the "current" kernel.
>
> Perhaps a bit more descriptive commit message could be supplied?
I'll use this as the new changelog:
Before Linux enters hibernation, it sends the CHANNELMSG_UNLOAD message to
the host so all the offers are gone. After hibernation, Linux needs to re-negotiate
with the host using the same vmbus protocol version (which was in use
before hibernation), and ask the host to re-offer the vmbus devices.
> > +
> > + ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
>
> I think this code answers my earlier question: Upon resume, we negotiate
> the same
> VMbus protocol version we had at time of hibernation, even if running on a
> newer
> version of Hyper-V that might support newer protocol versions. Hence the
> offer
> messages should not have any previously reserved fields now being used. A
> comment to this effect would be useful.
Ok, let me add a comment before the line.
I'll post a v2 of the patchset, including the "[PATCH 4/7]", which needs a small change.
Thanks,
-- Dexuan
^ permalink raw reply
* [PATCH v2 net] hv_sock: Fix hang when a connection is closed
From: Dexuan Cui @ 2019-07-31 1:25 UTC (permalink / raw)
To: Sunil Muthuswamy, David Miller, netdev@vger.kernel.org
Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
sashal@kernel.org, Michael Kelley, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, olaf@aepfle.de, apw@canonical.com,
jasowang@redhat.com, vkuznets, marcelo.cerri@canonical.com
There is a race condition for an established connection that is being closed
by the guest: the refcnt is 4 at the end of hvs_release() (Note: here the
'remove_sock' is false):
1 for the initial value;
1 for the sk being in the bound list;
1 for the sk being in the connected list;
1 for the delayed close_work.
After hvs_release() finishes, __vsock_release() -> sock_put(sk) *may*
decrease the refcnt to 3.
Concurrently, hvs_close_connection() runs in another thread:
calls vsock_remove_sock() to decrease the refcnt by 2;
call sock_put() to decrease the refcnt to 0, and free the sk;
next, the "release_sock(sk)" may hang due to use-after-free.
In the above, after hvs_release() finishes, if hvs_close_connection() runs
faster than "__vsock_release() -> sock_put(sk)", then there is not any issue,
because at the beginning of hvs_close_connection(), the refcnt is still 4.
The issue can be resolved if an extra reference is taken when the
connection is established.
Fixes: a9eeb998c28d ("hv_sock: Add support for delayed close")
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: stable@vger.kernel.org
---
Changes in v2:
Changed the location of the sock_hold() call.
Updated the changelog accordingly.
Thanks Sunil for the suggestion!
With the proper kernel debugging options enabled, first a warning can
appear:
kworker/1:0/4467 is freeing memory ..., with a lock still held there!
stack backtrace:
Workqueue: events vmbus_onmessage_work [hv_vmbus]
Call Trace:
dump_stack+0x67/0x90
debug_check_no_locks_freed.cold.52+0x78/0x7d
slab_free_freelist_hook+0x85/0x140
kmem_cache_free+0xa5/0x380
__sk_destruct+0x150/0x260
hvs_close_connection+0x24/0x30 [hv_sock]
vmbus_onmessage_work+0x1d/0x30 [hv_vmbus]
process_one_work+0x241/0x600
worker_thread+0x3c/0x390
kthread+0x11b/0x140
ret_from_fork+0x24/0x30
and then the following release_sock(sk) can hang:
watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/1:0:4467]
...
irq event stamp: 62890
CPU: 1 PID: 4467 Comm: kworker/1:0 Tainted: G W 5.2.0+ #39
Workqueue: events vmbus_onmessage_work [hv_vmbus]
RIP: 0010:queued_spin_lock_slowpath+0x2b/0x1e0
...
Call Trace:
do_raw_spin_lock+0xab/0xb0
release_sock+0x19/0xb0
vmbus_onmessage_work+0x1d/0x30 [hv_vmbus]
process_one_work+0x241/0x600
worker_thread+0x3c/0x390
kthread+0x11b/0x140
ret_from_fork+0x24/0x30
net/vmw_vsock/hyperv_transport.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index f2084e3f7aa4..9d864ebeb7b3 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -312,6 +312,11 @@ static void hvs_close_connection(struct vmbus_channel *chan)
lock_sock(sk);
hvs_do_close_lock_held(vsock_sk(sk), true);
release_sock(sk);
+
+ /* Release the refcnt for the channel that's opened in
+ * hvs_open_connection().
+ */
+ sock_put(sk);
}
static void hvs_open_connection(struct vmbus_channel *chan)
@@ -407,6 +412,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
}
set_per_channel_state(chan, conn_from_host ? new : sk);
+
+ /* This reference will be dropped by hvs_close_connection(). */
+ sock_hold(conn_from_host ? new : sk);
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
/* Set the pending send size to max packet size to always get
--
2.19.1
^ permalink raw reply related
* [PATCH v2 0/7] Enhance the hv_vmbus driver to support hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
Hi,
This is the first patchset to enable hibernation when Linux runs on Hyper-V.
A second patchset to enhance the high-level VSC drivers (hv_netvsc,
hv_storvsc, etc.) for hibernation will be posted later. The second patchset
depends on this first patchset, so I hope this pathset can be accepted soon.
The changes in this patchset are mostly straightforward new code that only
runs when the VM enters hibernation, so IMHO it's pretty safe to have this
patchset, because the hibernation feature never worked for Linux VM running
on Hyper-V.
The patchset is rebased on the branch hyperv-next of
https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
and can also cleanly apply to Linus's tree.
Hi Greg, tglx,
I hope the patchset can go through Sasha's hyperv/linux.git tree, because
the changes belong to the hv code and they need to be together to work
properly.
Michael Kelley reviewed the v1 of the patchset, so I added his Reviewed-by
for patch 1, 2, 3 and 7. Michael, please review the other 3 patches again,
and give your Reviewed-by if the updated version is ok to you.
Changes in v2:
Patch 3: Improved the changelog and added a comment.
Patch 4: Remove the "hv_stimer_cleanup" in hv_synic_suspend(), because I
suppose https://lkml.org/lkml/2019/7/27/5 will be accepted. Also
improved changelog and the comment.
Patch 5: Fixed the third argument of print_hex_dump_debug(). Also improved
the changelog.
Patch 6: Improved the changelog and the comment. Added a check for the
'vmbus_proto_version' in vmbus_bus_resume().
Thanks,
Dexuan
Dexuan Cui (7):
x86/hyper-v: Suspend/resume the hypercall page for hibernation
clocksource/drivers: Suspend/resume Hyper-V clocksource for
hibernation
Drivers: hv: vmbus: Break out synic enable and disable operations
Drivers: hv: vmbus: Suspend/resume the synic for hibernation
Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for
hibernation
arch/x86/hyperv/hv_init.c | 34 +++++++++
drivers/clocksource/hyperv_timer.c | 25 +++++++
drivers/hv/channel_mgmt.c | 29 +++++++-
drivers/hv/connection.c | 3 +-
drivers/hv/hv.c | 66 ++++++++++--------
drivers/hv/hyperv_vmbus.h | 4 ++
drivers/hv/vmbus_drv.c | 138 +++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 3 +
8 files changed, 270 insertions(+), 32 deletions(-)
--
1.8.3.1
^ permalink raw reply
* [PATCH v2 1/7] x86/hyper-v: Suspend/resume the hypercall page for hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1564595464-56520-1-git-send-email-decui@microsoft.com>
This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's hypercall page and then resume the old
kernel's.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
arch/x86/hyperv/hv_init.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0d25868..78e53d9 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -20,6 +20,7 @@
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
+#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
@@ -223,6 +224,34 @@ static int __init hv_pci_init(void)
return 1;
}
+static int hv_suspend(void)
+{
+ union hv_x64_msr_hypercall_contents hypercall_msr;
+
+ /* Reset the hypercall page */
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hypercall_msr.enable = 0;
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+ return 0;
+}
+
+static void hv_resume(void)
+{
+ union hv_x64_msr_hypercall_contents hypercall_msr;
+
+ /* Re-enable the hypercall page */
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hypercall_msr.enable = 1;
+ hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+}
+
+static struct syscore_ops hv_syscore_ops = {
+ .suspend = hv_suspend,
+ .resume = hv_resume,
+};
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -303,6 +332,9 @@ void __init hyperv_init(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
+
+ register_syscore_ops(&hv_syscore_ops);
+
return;
remove_cpuhp_state:
@@ -322,6 +354,8 @@ void hyperv_cleanup(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
+ unregister_syscore_ops(&hv_syscore_ops);
+
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 7/7] Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1564595464-56520-1-git-send-email-decui@microsoft.com>
The high-level VSC drivers will implement device-specific callbacks.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/hv/vmbus_drv.c | 42 ++++++++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 3 +++
2 files changed, 45 insertions(+)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ca6f4c8..337ecce 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->suspend)
+ return -EOPNOTSUPP;
+
+ return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->resume)
+ return -EOPNOTSUPP;
+
+ return drv->resume(dev);
+}
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -926,6 +963,10 @@ static void vmbus_device_release(struct device *device)
kfree(hv_dev);
}
+static const struct dev_pm_ops vmbus_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
/* The one and only one */
static struct bus_type hv_bus = {
.name = "vmbus",
@@ -936,6 +977,7 @@ static void vmbus_device_release(struct device *device)
.uevent = vmbus_uevent,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
+ .pm = &vmbus_pm,
};
struct onmessage_work_context {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..94443c4 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1149,6 +1149,9 @@ struct hv_driver {
int (*remove)(struct hv_device *);
void (*shutdown)(struct hv_device *);
+ int (*suspend)(struct hv_device *);
+ int (*resume)(struct hv_device *);
+
};
/* Base device object */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 4/7] Drivers: hv: vmbus: Suspend/resume the synic for hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1564595464-56520-1-git-send-email-decui@microsoft.com>
This is needed when we resume the old kernel from the "current" kernel.
Note: when hv_synic_suspend() and hv_synic_resume() run, all the
non-boot CPUs have been offlined, and interrupts are disabled on CPU0.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc..2ef375c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -30,6 +30,7 @@
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
@@ -2086,6 +2087,47 @@ static void hv_crash_handler(struct pt_regs *regs)
hyperv_cleanup();
};
+static int hv_synic_suspend(void)
+{
+ /*
+ * When we reach here, all the non-boot CPUs have been offlined, and
+ * the stimers on them have been unbound in hv_synic_cleanup() ->
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
+ *
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+ * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+ * because the interrupts remain disabled between syscore_suspend()
+ * and syscore_resume(): see create_image() and resume_target_kernel();
+ * 2) the stimer on CPU0 is automatically disabled later by
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+ * would be triggered if we call clockevents_unbind_device(), which
+ * may sleep, in an interrupts-disabled context. So, we intentionally
+ * don't call hv_stimer_cleanup(0) here.
+ */
+
+ hv_synic_disable_regs(0);
+
+ return 0;
+}
+
+static void hv_synic_resume(void)
+{
+ hv_synic_enable_regs(0);
+
+ /*
+ * Note: we don't need to call hv_stimer_init(0), because the timer
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+ * automatically re-enabled in timekeeping_resume().
+ */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+ .suspend = hv_synic_suspend,
+ .resume = hv_synic_resume,
+};
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -2116,6 +2158,8 @@ static int __init hv_acpi_init(void)
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
+ register_syscore_ops(&hv_synic_syscore_ops);
+
return 0;
cleanup:
@@ -2128,6 +2172,8 @@ static void __exit vmbus_exit(void)
{
int cpu;
+ unregister_syscore_ops(&hv_synic_syscore_ops);
+
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 6/7] Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1564595464-56520-1-git-send-email-decui@microsoft.com>
Before Linux enters hibernation, it sends the CHANNELMSG_UNLOAD message to
the host so all the offers are gone. After hibernation, Linux needs to
re-negotiate with the host using the same vmbus protocol version (which
was in use before hibernation), and ask the host to re-offer the vmbus
devices.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/connection.c | 3 +--
drivers/hv/hyperv_vmbus.h | 2 ++
drivers/hv/vmbus_drv.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e1..806319c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -59,8 +59,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
}
}
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
- __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
{
int ret = 0;
unsigned int cur_cpu;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 26ea161..e657197 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -274,6 +274,8 @@ struct vmbus_msginfo {
extern struct vmbus_connection vmbus_connection;
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
static inline void vmbus_send_interrupt(u32 relid)
{
sync_set_bit(relid, vmbus_connection.send_int_page);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2ef375c..ca6f4c8 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2043,6 +2043,51 @@ static int vmbus_acpi_add(struct acpi_device *device)
return ret_val;
}
+static int vmbus_bus_suspend(struct device *dev)
+{
+ vmbus_initiate_unload(false);
+
+ vmbus_connection.conn_state = DISCONNECTED;
+
+ return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+ struct vmbus_channel_msginfo *msginfo;
+ size_t msgsize;
+ int ret;
+
+ /*
+ * We only use the 'vmbus_proto_version', which was in use before
+ * hibernation, to re-negotiate with the host.
+ */
+ if (vmbus_proto_version == VERSION_INVAL ||
+ vmbus_proto_version == 0) {
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+ return -EINVAL;
+ }
+
+ msgsize = sizeof(*msginfo) +
+ sizeof(struct vmbus_channel_initiate_contact);
+
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+ if (msginfo == NULL)
+ return -ENOMEM;
+
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+ kfree(msginfo);
+
+ if (ret != 0)
+ return ret;
+
+ vmbus_request_offers();
+
+ return 0;
+}
+
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
@@ -2050,6 +2095,10 @@ static int vmbus_acpi_add(struct acpi_device *device)
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
+static const struct dev_pm_ops vmbus_bus_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
static struct acpi_driver vmbus_acpi_driver = {
.name = "vmbus",
.ids = vmbus_acpi_device_ids,
@@ -2057,6 +2106,7 @@ static int vmbus_acpi_add(struct acpi_device *device)
.add = vmbus_acpi_add,
.remove = vmbus_acpi_remove,
},
+ .drv.pm = &vmbus_bus_pm,
};
static void hv_kexec_handler(void)
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 5/7] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1564595464-56520-1-git-send-email-decui@microsoft.com>
When the VM resumes, the host re-sends the offers. We should not add the
offers to the global vmbus_connection.chn_list again.
Added some debug code, in case the host screws up the exact info related to
the offers.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/channel_mgmt.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index addcef5..165f125 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -854,12 +854,39 @@ void vmbus_initiate_unload(bool crash)
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_offer_channel *offer;
- struct vmbus_channel *newchannel;
+ struct vmbus_channel *oldchannel, *newchannel;
+ size_t offer_sz;
offer = (struct vmbus_channel_offer_channel *)hdr;
trace_vmbus_onoffer(offer);
+ mutex_lock(&vmbus_connection.channel_mutex);
+ oldchannel = relid2channel(offer->child_relid);
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ if (oldchannel != NULL) {
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
+ /*
+ * We're resuming from hibernation: we expect the host to send
+ * exactly the same offers that we had before the hibernation.
+ */
+ offer_sz = sizeof(*offer);
+ if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
+ return;
+
+ pr_err("Mismatched offer from the host (relid=%d)!\n",
+ offer->child_relid);
+
+ print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, &oldchannel->offermsg, offer_sz,
+ false);
+ print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, offer, offer_sz, false);
+ return;
+ }
+
/* Allocate the channel object and save this offer. */
newchannel = alloc_channel();
if (!newchannel) {
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 2/7] clocksource/drivers: Suspend/resume Hyper-V clocksource for hibernation
From: Dexuan Cui @ 2019-07-31 17:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1564595464-56520-1-git-send-email-decui@microsoft.com>
This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's TSC page and then resume the old kernel's.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/clocksource/hyperv_timer.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index ba2c79e6..41c31a7 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -237,12 +237,37 @@ static u64 read_hv_clock_tsc(struct clocksource *arg)
return read_hv_sched_clock_tsc();
}
+static void suspend_hv_clock_tsc(struct clocksource *arg)
+{
+ u64 tsc_msr;
+
+ /* Disable the TSC page */
+ hv_get_reference_tsc(tsc_msr);
+ tsc_msr &= ~BIT_ULL(0);
+ hv_set_reference_tsc(tsc_msr);
+}
+
+
+static void resume_hv_clock_tsc(struct clocksource *arg)
+{
+ phys_addr_t phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
+ u64 tsc_msr;
+
+ /* Re-enable the TSC page */
+ hv_get_reference_tsc(tsc_msr);
+ tsc_msr &= GENMASK_ULL(11, 0);
+ tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
+ hv_set_reference_tsc(tsc_msr);
+}
+
static struct clocksource hyperv_cs_tsc = {
.name = "hyperv_clocksource_tsc_page",
.rating = 400,
.read = read_hv_clock_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .suspend= suspend_hv_clock_tsc,
+ .resume = resume_hv_clock_tsc,
};
#endif
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox