* Re: [PATCH 2/4] mshv: Introduce hv_deposit_memory helper functions
From: Mukesh R @ 2026-01-24 0:33 UTC (permalink / raw)
To: Stanislav Kinsburskii, kys, haiyangz, wei.liu, decui, longli
Cc: linux-hyperv, linux-kernel
In-Reply-To: <176913212322.89165.12915292926444353627.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>
On 1/22/26 17:35, Stanislav Kinsburskii wrote:
> Introduce hv_deposit_memory_node() and hv_deposit_memory() helper
> functions to handle memory deposition with proper error handling.
>
> The new hv_deposit_memory_node() function takes the hypervisor status
> as a parameter and validates it before depositing pages. It checks for
> HV_STATUS_INSUFFICIENT_MEMORY specifically and returns an error for
> unexpected status codes.
>
> This is a precursor patch to new out-of-memory error codes support.
> No functional changes intended.
>
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
> drivers/hv/hv_proc.c | 22 ++++++++++++++++++++--
> drivers/hv/mshv_root_hv_call.c | 25 +++++++++----------------
> drivers/hv/mshv_root_main.c | 3 +--
> include/asm-generic/mshyperv.h | 10 ++++++++++
> 4 files changed, 40 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 80c66d1c74d5..c0c2bfc80d77 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -110,6 +110,23 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> }
> EXPORT_SYMBOL_GPL(hv_call_deposit_pages);
>
> +int hv_deposit_memory_node(int node, u64 partition_id,
> + u64 hv_status)
> +{
> + u32 num_pages;
> +
> + switch (hv_result(hv_status)) {
> + case HV_STATUS_INSUFFICIENT_MEMORY:
> + num_pages = 1;
> + break;
> + default:
> + hv_status_err(hv_status, "Unexpected!\n");
> + return -ENOMEM;
> + }
> + return hv_call_deposit_pages(node, partition_id, num_pages);
> +}
> +EXPORT_SYMBOL_GPL(hv_deposit_memory_node);
> +
Different hypercalls may want to deposit different number of pages in one
shot. As feature evolves, page sizes get mixed, we'd almost need that
flexibility. So, imo, either we just don't do this for now, or add num pages
parameter to be passed down.
Thanks,
-Mukesh
> bool hv_result_oom(u64 status)
> {
> switch (hv_result(status)) {
> @@ -155,7 +172,8 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
> }
> break;
> }
> - ret = hv_call_deposit_pages(node, hv_current_partition_id, 1);
> + ret = hv_deposit_memory_node(node, hv_current_partition_id,
> + status);
> } while (!ret);
>
> return ret;
> @@ -197,7 +215,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
> }
> break;
> }
> - ret = hv_call_deposit_pages(node, partition_id, 1);
> + ret = hv_deposit_memory_node(node, partition_id, status);
>
> } while (!ret);
>
> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> index 58c5cbf2e567..06f2bac8039d 100644
> --- a/drivers/hv/mshv_root_hv_call.c
> +++ b/drivers/hv/mshv_root_hv_call.c
> @@ -123,8 +123,7 @@ int hv_call_create_partition(u64 flags,
> break;
> }
> local_irq_restore(irq_flags);
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - hv_current_partition_id, 1);
> + ret = hv_deposit_memory(hv_current_partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -151,7 +150,7 @@ int hv_call_initialize_partition(u64 partition_id)
> ret = hv_result_to_errno(status);
> break;
> }
> - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
> + ret = hv_deposit_memory(partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -465,8 +464,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
> }
> local_irq_restore(flags);
>
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - partition_id, 1);
> + ret = hv_deposit_memory(partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -525,8 +523,7 @@ int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
> }
> local_irq_restore(flags);
>
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - partition_id, 1);
> + ret = hv_deposit_memory(partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -573,7 +570,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
>
> local_irq_restore(flags);
>
> - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
> + ret = hv_deposit_memory(partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -722,8 +719,7 @@ hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> ret = hv_result_to_errno(status);
> break;
> }
> - ret = hv_call_deposit_pages(NUMA_NO_NODE, port_partition_id, 1);
> -
> + ret = hv_deposit_memory(port_partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -776,8 +772,7 @@ hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
> ret = hv_result_to_errno(status);
> break;
> }
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - connection_partition_id, 1);
> + ret = hv_deposit_memory(connection_partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -848,8 +843,7 @@ static int hv_call_map_stats_page2(enum hv_stats_object_type type,
> break;
> }
>
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - hv_current_partition_id, 1);
> + ret = hv_deposit_memory(hv_current_partition_id, status);
> } while (!ret);
>
> return ret;
> @@ -885,8 +879,7 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type,
> return ret;
> }
>
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - hv_current_partition_id, 1);
> + ret = hv_deposit_memory(hv_current_partition_id, status);
> if (ret)
> return ret;
> } while (!ret);
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index f4697497f83e..5fc572e31cd7 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -264,8 +264,7 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
> if (!hv_result_oom(status))
> ret = hv_result_to_errno(status);
> else
> - ret = hv_call_deposit_pages(NUMA_NO_NODE,
> - pt_id, 1);
> + ret = hv_deposit_memory(pt_id, status);
> } while (!ret);
>
> args.status = hv_result(status);
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index b73352a7fc9e..c8e8976839f8 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -344,6 +344,7 @@ static inline bool hv_parent_partition(void)
> }
>
> bool hv_result_oom(u64 status);
> +int hv_deposit_memory_node(int node, u64 partition_id, u64 status);
> int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
> int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
> int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
> @@ -353,6 +354,10 @@ static inline bool hv_root_partition(void) { return false; }
> static inline bool hv_l1vh_partition(void) { return false; }
> static inline bool hv_parent_partition(void) { return false; }
> static inline bool hv_result_oom(u64 status) { return false; }
> +static inline int hv_deposit_memory_node(int node, u64 partition_id, u64 status)
> +{
> + return -EOPNOTSUPP;
> +}
> static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> {
> return -EOPNOTSUPP;
> @@ -367,6 +372,11 @@ static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u3
> }
> #endif /* CONFIG_MSHV_ROOT */
>
> +static inline int hv_deposit_memory(u64 partition_id, u64 status)
> +{
> + return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
> +}
> +
> #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
> u8 __init get_vtl(void);
> #else
>
>
^ permalink raw reply
* Re: [PATCH 1/4] mshv: Introduce hv_result_oom() helper function
From: Mukesh R @ 2026-01-24 0:31 UTC (permalink / raw)
To: Stanislav Kinsburskii, kys, haiyangz, wei.liu, decui, longli
Cc: linux-hyperv, linux-kernel
In-Reply-To: <176913211358.89165.15502151782362191256.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>
On 1/22/26 17:35, Stanislav Kinsburskii wrote:
> Replace direct comparisons of hv_result(status) against
> HV_STATUS_INSUFFICIENT_MEMORY with a new hv_result_oom() helper function.
> This improves code readability and provides a consistent and extendable
> interface for checking out-of-memory conditions in hypercall results.
>
> No functional changes intended.
>
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
> drivers/hv/hv_proc.c | 14 ++++++++++++--
> drivers/hv/mshv_root_hv_call.c | 20 ++++++++++----------
> drivers/hv/mshv_root_main.c | 2 +-
> include/asm-generic/mshyperv.h | 3 +++
> 4 files changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index fbb4eb3901bb..80c66d1c74d5 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -110,6 +110,16 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> }
> EXPORT_SYMBOL_GPL(hv_call_deposit_pages);
>
> +bool hv_result_oom(u64 status)
> +{
> + switch (hv_result(status)) {
> + case HV_STATUS_INSUFFICIENT_MEMORY:
> + return true;
> + }
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(hv_result_oom);
I had mentioned this during internal review previously, so forgive me
for repeating. I don't think using _oom suffix is a good idea. Firstly,
system is not out of memory, hypervisor will continue to work perfectly,
just the particalur hypercall needs a bit more ram to succeed. Secondly
and more importantly, "oom" has come to mean a very specific event
in linux, and as such reusing it for something totally different is
unnecessary. For example, if another maintainer working on oom happens
to see this, and not being familiar with HyperV may get totally confused
and waste time unnecessarily.
It can easily be renamed: hv_result_insuff_mem, or hv_result_enomem,
or hv_result_deposit_ram etc... there are many options.
Thanks,
-Mukesh
.... deleted ...
^ permalink raw reply
* Re: [PATCH] mshv: Make MSHV mutually exclusive with KEXEC
From: Mukesh R @ 2026-01-24 0:16 UTC (permalink / raw)
To: Stanislav Kinsburskii, kys, haiyangz, wei.liu, decui, longli
Cc: linux-hyperv, linux-kernel
In-Reply-To: <176920684805.250171.6817228088359793537.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>
On 1/23/26 14:20, Stanislav Kinsburskii wrote:
> The MSHV driver deposits kernel-allocated pages to the hypervisor during
> runtime and never withdraws them. This creates a fundamental incompatibility
> with KEXEC, as these deposited pages remain unavailable to the new kernel
> loaded via KEXEC, leading to potential system crashes upon kernel accessing
> hypervisor deposited pages.
>
> Make MSHV mutually exclusive with KEXEC until proper page lifecycle
> management is implemented.
>
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
> drivers/hv/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
> index 7937ac0cbd0f..cfd4501db0fa 100644
> --- a/drivers/hv/Kconfig
> +++ b/drivers/hv/Kconfig
> @@ -74,6 +74,7 @@ config MSHV_ROOT
> # e.g. When withdrawing memory, the hypervisor gives back 4k pages in
> # no particular order, making it impossible to reassemble larger pages
> depends on PAGE_SIZE_4KB
> + depends on !KEXEC
> select EVENTFD
> select VIRT_XFER_TO_GUEST_WORK
> select HMM_MIRROR
>
>
Will this affect CRASH kexec? I see few CONFIG_CRASH_DUMP in kexec.c
implying that crash dump might be involved. Or did you test kdump
and it was fine?
Thanks,
-Mukesh
^ permalink raw reply
* Re: [PATCH v4 6/7] mshv: Add data for printing stats page counters
From: Nuno Das Neves @ 2026-01-24 0:13 UTC (permalink / raw)
To: Stanislav Kinsburskii
Cc: Michael Kelley, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, kys@microsoft.com,
haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com,
longli@microsoft.com, prapal@linux.microsoft.com,
mrathor@linux.microsoft.com, paekkaladevi@linux.microsoft.com
In-Reply-To: <aXP2s7V7u6aScDHv@skinsburskii.localdomain>
On 1/23/2026 2:31 PM, Stanislav Kinsburskii wrote:
> On Fri, Jan 23, 2026 at 11:04:52AM -0800, Nuno Das Neves wrote:
>> On 1/23/2026 9:09 AM, Michael Kelley wrote:
>>> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
>>>>
>>>> Introduce hv_counters.c, containing static data corresponding to
>>>> HV_*_COUNTER enums in the hypervisor source. Defining the enum
>>>> members as an array instead makes more sense, since it will be
>>>> iterated over to print counter information to debugfs.
>>>
>>> I would have expected the filename to be mshv_counters.c, so that the association
>>> with the MS hypervisor is clear. And the file is inextricably linked to mshv_debugfs.c,
>>> which of course has the "mshv_" prefix. Or is there some thinking I'm not aware of
>>> for using the "hv_" prefix?
>>>
>> Good question - I originally thought of using hv_ because the definitions inside are
>> part of the hypervisor ABI, and hence also have the hv_ prefix.
>>
>> However you have a good point, and I'm not opposed to changing it.
>>
>> Maybe to just be super explicit: "mshv_debugfs_counters.c" ?
>>
>
> This is reudnant from my POV.
> If these counters are only used by mshv_debugfs.c, then should rather be
> a part of this file.
> What was the reason to move them elsewhere?
>
Just a matter of taste - so there isn't ~450 lines of definitions at the beginning of
mshv_debugfs.c. But I'm not fussed. If you think it's better to just prepend the
definitions to mshv_debugfs.c, then that's an easy change.
Nuno
> Thanks,
> Stanislav
>
>>> Also, I see in Patch 7 of this series that hv_counters.c is #included as a .c file
>>> in mshv_debugfs.c. Is there a reason for doing the #include instead of adding
>>> hv_counters.c to the Makefile and building it on its own? You would need to
>>> add a handful of extern statements to mshv_root.h so that the tables are
>>> referenceable from mshv_debugfs.c. But that would seem to be the more
>>> normal way of doing things. #including a .c file is unusual.
>>>
>>
>> Yes...I thought I could avoid noise in mshv_root.h and the Makefile, since it's
>> only relevant for mshv_debugfs.c. However I could see this file (whether as .c or
>> .h) being misused and included elsewhere inadvertantly, which would duplicate the
>> tables, so maybe doing it the normal way is a better idea, even if mshv_debugfs.c
>> is likely the only user.
>>
>>> See one more comment on the last line of this patch ...
>>>
<snip>
^ permalink raw reply
* Re: [PATCH] mshv: Make MSHV mutually exclusive with KEXEC
From: Nuno Das Neves @ 2026-01-24 0:09 UTC (permalink / raw)
To: Stanislav Kinsburskii, kys, haiyangz, wei.liu, decui, longli
Cc: linux-hyperv, linux-kernel
In-Reply-To: <176920684805.250171.6817228088359793537.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>
On 1/23/2026 2:20 PM, Stanislav Kinsburskii wrote:
> The MSHV driver deposits kernel-allocated pages to the hypervisor during
> runtime and never withdraws them. This creates a fundamental incompatibility
> with KEXEC, as these deposited pages remain unavailable to the new kernel
> loaded via KEXEC, leading to potential system crashes upon kernel accessing
> hypervisor deposited pages.
>
> Make MSHV mutually exclusive with KEXEC until proper page lifecycle
> management is implemented.
>
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
> drivers/hv/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
> index 7937ac0cbd0f..cfd4501db0fa 100644
> --- a/drivers/hv/Kconfig
> +++ b/drivers/hv/Kconfig
> @@ -74,6 +74,7 @@ config MSHV_ROOT
> # e.g. When withdrawing memory, the hypervisor gives back 4k pages in
> # no particular order, making it impossible to reassemble larger pages
> depends on PAGE_SIZE_4KB
> + depends on !KEXEC
> select EVENTFD
> select VIRT_XFER_TO_GUEST_WORK
> select HMM_MIRROR
>
>
Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
^ permalink raw reply
* Re: [PATCH v4 6/7] mshv: Add data for printing stats page counters
From: Stanislav Kinsburskii @ 2026-01-23 22:31 UTC (permalink / raw)
To: Nuno Das Neves
Cc: Michael Kelley, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, kys@microsoft.com,
haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com,
longli@microsoft.com, prapal@linux.microsoft.com,
mrathor@linux.microsoft.com, paekkaladevi@linux.microsoft.com
In-Reply-To: <2ea6f13f-ac2e-4ed7-9f2c-6c079cb25b85@linux.microsoft.com>
On Fri, Jan 23, 2026 at 11:04:52AM -0800, Nuno Das Neves wrote:
> On 1/23/2026 9:09 AM, Michael Kelley wrote:
> > From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
> >>
> >> Introduce hv_counters.c, containing static data corresponding to
> >> HV_*_COUNTER enums in the hypervisor source. Defining the enum
> >> members as an array instead makes more sense, since it will be
> >> iterated over to print counter information to debugfs.
> >
> > I would have expected the filename to be mshv_counters.c, so that the association
> > with the MS hypervisor is clear. And the file is inextricably linked to mshv_debugfs.c,
> > which of course has the "mshv_" prefix. Or is there some thinking I'm not aware of
> > for using the "hv_" prefix?
> >
> Good question - I originally thought of using hv_ because the definitions inside are
> part of the hypervisor ABI, and hence also have the hv_ prefix.
>
> However you have a good point, and I'm not opposed to changing it.
>
> Maybe to just be super explicit: "mshv_debugfs_counters.c" ?
>
This is reudnant from my POV.
If these counters are only used by mshv_debugfs.c, then should rather be
a part of this file.
What was the reason to move them elsewhere?
Thanks,
Stanislav
> > Also, I see in Patch 7 of this series that hv_counters.c is #included as a .c file
> > in mshv_debugfs.c. Is there a reason for doing the #include instead of adding
> > hv_counters.c to the Makefile and building it on its own? You would need to
> > add a handful of extern statements to mshv_root.h so that the tables are
> > referenceable from mshv_debugfs.c. But that would seem to be the more
> > normal way of doing things. #including a .c file is unusual.
> >
>
> Yes...I thought I could avoid noise in mshv_root.h and the Makefile, since it's
> only relevant for mshv_debugfs.c. However I could see this file (whether as .c or
> .h) being misused and included elsewhere inadvertantly, which would duplicate the
> tables, so maybe doing it the normal way is a better idea, even if mshv_debugfs.c
> is likely the only user.
>
> > See one more comment on the last line of this patch ...
> >
> >>
> >> Include hypervisor, logical processor, partition, and virtual
> >> processor counters.
> >>
> >> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> >> ---
> >> drivers/hv/hv_counters.c | 488 +++++++++++++++++++++++++++++++++++++++
> >> 1 file changed, 488 insertions(+)
> >> create mode 100644 drivers/hv/hv_counters.c
> >>
> >> diff --git a/drivers/hv/hv_counters.c b/drivers/hv/hv_counters.c
> >> new file mode 100644
> >> index 000000000000..a8e07e72cc29
> >> --- /dev/null
> >> +++ b/drivers/hv/hv_counters.c
> >> @@ -0,0 +1,488 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/*
> >> + * Copyright (c) 2026, Microsoft Corporation.
> >> + *
> >> + * Data for printing stats page counters via debugfs.
> >> + *
> >> + * Authors: Microsoft Linux virtualization team
> >> + */
> >> +
> >> +struct hv_counter_entry {
> >> + char *name;
> >> + int idx;
> >> +};
> >> +
> >> +/* HV_HYPERVISOR_COUNTER */
> >> +static struct hv_counter_entry hv_hypervisor_counters[] = {
> >> + { "HvLogicalProcessors", 1 },
> >> + { "HvPartitions", 2 },
> >> + { "HvTotalPages", 3 },
> >> + { "HvVirtualProcessors", 4 },
> >> + { "HvMonitoredNotifications", 5 },
> >> + { "HvModernStandbyEntries", 6 },
> >> + { "HvPlatformIdleTransitions", 7 },
> >> + { "HvHypervisorStartupCost", 8 },
> >> +
> >> + { "HvIOSpacePages", 10 },
> >> + { "HvNonEssentialPagesForDump", 11 },
> >> + { "HvSubsumedPages", 12 },
> >> +};
> >> +
> >> +/* HV_CPU_COUNTER */
> >> +static struct hv_counter_entry hv_lp_counters[] = {
> >> + { "LpGlobalTime", 1 },
> >> + { "LpTotalRunTime", 2 },
> >> + { "LpHypervisorRunTime", 3 },
> >> + { "LpHardwareInterrupts", 4 },
> >> + { "LpContextSwitches", 5 },
> >> + { "LpInterProcessorInterrupts", 6 },
> >> + { "LpSchedulerInterrupts", 7 },
> >> + { "LpTimerInterrupts", 8 },
> >> + { "LpInterProcessorInterruptsSent", 9 },
> >> + { "LpProcessorHalts", 10 },
> >> + { "LpMonitorTransitionCost", 11 },
> >> + { "LpContextSwitchTime", 12 },
> >> + { "LpC1TransitionsCount", 13 },
> >> + { "LpC1RunTime", 14 },
> >> + { "LpC2TransitionsCount", 15 },
> >> + { "LpC2RunTime", 16 },
> >> + { "LpC3TransitionsCount", 17 },
> >> + { "LpC3RunTime", 18 },
> >> + { "LpRootVpIndex", 19 },
> >> + { "LpIdleSequenceNumber", 20 },
> >> + { "LpGlobalTscCount", 21 },
> >> + { "LpActiveTscCount", 22 },
> >> + { "LpIdleAccumulation", 23 },
> >> + { "LpReferenceCycleCount0", 24 },
> >> + { "LpActualCycleCount0", 25 },
> >> + { "LpReferenceCycleCount1", 26 },
> >> + { "LpActualCycleCount1", 27 },
> >> + { "LpProximityDomainId", 28 },
> >> + { "LpPostedInterruptNotifications", 29 },
> >> + { "LpBranchPredictorFlushes", 30 },
> >> +#if IS_ENABLED(CONFIG_X86_64)
> >> + { "LpL1DataCacheFlushes", 31 },
> >> + { "LpImmediateL1DataCacheFlushes", 32 },
> >> + { "LpMbFlushes", 33 },
> >> + { "LpCounterRefreshSequenceNumber", 34 },
> >> + { "LpCounterRefreshReferenceTime", 35 },
> >> + { "LpIdleAccumulationSnapshot", 36 },
> >> + { "LpActiveTscCountSnapshot", 37 },
> >> + { "LpHwpRequestContextSwitches", 38 },
> >> + { "LpPlaceholder1", 39 },
> >> + { "LpPlaceholder2", 40 },
> >> + { "LpPlaceholder3", 41 },
> >> + { "LpPlaceholder4", 42 },
> >> + { "LpPlaceholder5", 43 },
> >> + { "LpPlaceholder6", 44 },
> >> + { "LpPlaceholder7", 45 },
> >> + { "LpPlaceholder8", 46 },
> >> + { "LpPlaceholder9", 47 },
> >> + { "LpSchLocalRunListSize", 48 },
> >> + { "LpReserveGroupId", 49 },
> >> + { "LpRunningPriority", 50 },
> >> + { "LpPerfmonInterruptCount", 51 },
> >> +#elif IS_ENABLED(CONFIG_ARM64)
> >> + { "LpCounterRefreshSequenceNumber", 31 },
> >> + { "LpCounterRefreshReferenceTime", 32 },
> >> + { "LpIdleAccumulationSnapshot", 33 },
> >> + { "LpActiveTscCountSnapshot", 34 },
> >> + { "LpHwpRequestContextSwitches", 35 },
> >> + { "LpPlaceholder2", 36 },
> >> + { "LpPlaceholder3", 37 },
> >> + { "LpPlaceholder4", 38 },
> >> + { "LpPlaceholder5", 39 },
> >> + { "LpPlaceholder6", 40 },
> >> + { "LpPlaceholder7", 41 },
> >> + { "LpPlaceholder8", 42 },
> >> + { "LpPlaceholder9", 43 },
> >> + { "LpSchLocalRunListSize", 44 },
> >> + { "LpReserveGroupId", 45 },
> >> + { "LpRunningPriority", 46 },
> >> +#endif
> >> +};
> >> +
> >> +/* HV_PROCESS_COUNTER */
> >> +static struct hv_counter_entry hv_partition_counters[] = {
> >> + { "PtVirtualProcessors", 1 },
> >> +
> >> + { "PtTlbSize", 3 },
> >> + { "PtAddressSpaces", 4 },
> >> + { "PtDepositedPages", 5 },
> >> + { "PtGpaPages", 6 },
> >> + { "PtGpaSpaceModifications", 7 },
> >> + { "PtVirtualTlbFlushEntires", 8 },
> >> + { "PtRecommendedTlbSize", 9 },
> >> + { "PtGpaPages4K", 10 },
> >> + { "PtGpaPages2M", 11 },
> >> + { "PtGpaPages1G", 12 },
> >> + { "PtGpaPages512G", 13 },
> >> + { "PtDevicePages4K", 14 },
> >> + { "PtDevicePages2M", 15 },
> >> + { "PtDevicePages1G", 16 },
> >> + { "PtDevicePages512G", 17 },
> >> + { "PtAttachedDevices", 18 },
> >> + { "PtDeviceInterruptMappings", 19 },
> >> + { "PtIoTlbFlushes", 20 },
> >> + { "PtIoTlbFlushCost", 21 },
> >> + { "PtDeviceInterruptErrors", 22 },
> >> + { "PtDeviceDmaErrors", 23 },
> >> + { "PtDeviceInterruptThrottleEvents", 24 },
> >> + { "PtSkippedTimerTicks", 25 },
> >> + { "PtPartitionId", 26 },
> >> +#if IS_ENABLED(CONFIG_X86_64)
> >> + { "PtNestedTlbSize", 27 },
> >> + { "PtRecommendedNestedTlbSize", 28 },
> >> + { "PtNestedTlbFreeListSize", 29 },
> >> + { "PtNestedTlbTrimmedPages", 30 },
> >> + { "PtPagesShattered", 31 },
> >> + { "PtPagesRecombined", 32 },
> >> + { "PtHwpRequestValue", 33 },
> >> + { "PtAutoSuspendEnableTime", 34 },
> >> + { "PtAutoSuspendTriggerTime", 35 },
> >> + { "PtAutoSuspendDisableTime", 36 },
> >> + { "PtPlaceholder1", 37 },
> >> + { "PtPlaceholder2", 38 },
> >> + { "PtPlaceholder3", 39 },
> >> + { "PtPlaceholder4", 40 },
> >> + { "PtPlaceholder5", 41 },
> >> + { "PtPlaceholder6", 42 },
> >> + { "PtPlaceholder7", 43 },
> >> + { "PtPlaceholder8", 44 },
> >> + { "PtHypervisorStateTransferGeneration", 45 },
> >> + { "PtNumberofActiveChildPartitions", 46 },
> >> +#elif IS_ENABLED(CONFIG_ARM64)
> >> + { "PtHwpRequestValue", 27 },
> >> + { "PtAutoSuspendEnableTime", 28 },
> >> + { "PtAutoSuspendTriggerTime", 29 },
> >> + { "PtAutoSuspendDisableTime", 30 },
> >> + { "PtPlaceholder1", 31 },
> >> + { "PtPlaceholder2", 32 },
> >> + { "PtPlaceholder3", 33 },
> >> + { "PtPlaceholder4", 34 },
> >> + { "PtPlaceholder5", 35 },
> >> + { "PtPlaceholder6", 36 },
> >> + { "PtPlaceholder7", 37 },
> >> + { "PtPlaceholder8", 38 },
> >> + { "PtHypervisorStateTransferGeneration", 39 },
> >> + { "PtNumberofActiveChildPartitions", 40 },
> >> +#endif
> >> +};
> >> +
> >> +/* HV_THREAD_COUNTER */
> >> +static struct hv_counter_entry hv_vp_counters[] = {
> >> + { "VpTotalRunTime", 1 },
> >> + { "VpHypervisorRunTime", 2 },
> >> + { "VpRemoteNodeRunTime", 3 },
> >> + { "VpNormalizedRunTime", 4 },
> >> + { "VpIdealCpu", 5 },
> >> +
> >> + { "VpHypercallsCount", 7 },
> >> + { "VpHypercallsTime", 8 },
> >> +#if IS_ENABLED(CONFIG_X86_64)
> >> + { "VpPageInvalidationsCount", 9 },
> >> + { "VpPageInvalidationsTime", 10 },
> >> + { "VpControlRegisterAccessesCount", 11 },
> >> + { "VpControlRegisterAccessesTime", 12 },
> >> + { "VpIoInstructionsCount", 13 },
> >> + { "VpIoInstructionsTime", 14 },
> >> + { "VpHltInstructionsCount", 15 },
> >> + { "VpHltInstructionsTime", 16 },
> >> + { "VpMwaitInstructionsCount", 17 },
> >> + { "VpMwaitInstructionsTime", 18 },
> >> + { "VpCpuidInstructionsCount", 19 },
> >> + { "VpCpuidInstructionsTime", 20 },
> >> + { "VpMsrAccessesCount", 21 },
> >> + { "VpMsrAccessesTime", 22 },
> >> + { "VpOtherInterceptsCount", 23 },
> >> + { "VpOtherInterceptsTime", 24 },
> >> + { "VpExternalInterruptsCount", 25 },
> >> + { "VpExternalInterruptsTime", 26 },
> >> + { "VpPendingInterruptsCount", 27 },
> >> + { "VpPendingInterruptsTime", 28 },
> >> + { "VpEmulatedInstructionsCount", 29 },
> >> + { "VpEmulatedInstructionsTime", 30 },
> >> + { "VpDebugRegisterAccessesCount", 31 },
> >> + { "VpDebugRegisterAccessesTime", 32 },
> >> + { "VpPageFaultInterceptsCount", 33 },
> >> + { "VpPageFaultInterceptsTime", 34 },
> >> + { "VpGuestPageTableMaps", 35 },
> >> + { "VpLargePageTlbFills", 36 },
> >> + { "VpSmallPageTlbFills", 37 },
> >> + { "VpReflectedGuestPageFaults", 38 },
> >> + { "VpApicMmioAccesses", 39 },
> >> + { "VpIoInterceptMessages", 40 },
> >> + { "VpMemoryInterceptMessages", 41 },
> >> + { "VpApicEoiAccesses", 42 },
> >> + { "VpOtherMessages", 43 },
> >> + { "VpPageTableAllocations", 44 },
> >> + { "VpLogicalProcessorMigrations", 45 },
> >> + { "VpAddressSpaceEvictions", 46 },
> >> + { "VpAddressSpaceSwitches", 47 },
> >> + { "VpAddressDomainFlushes", 48 },
> >> + { "VpAddressSpaceFlushes", 49 },
> >> + { "VpGlobalGvaRangeFlushes", 50 },
> >> + { "VpLocalGvaRangeFlushes", 51 },
> >> + { "VpPageTableEvictions", 52 },
> >> + { "VpPageTableReclamations", 53 },
> >> + { "VpPageTableResets", 54 },
> >> + { "VpPageTableValidations", 55 },
> >> + { "VpApicTprAccesses", 56 },
> >> + { "VpPageTableWriteIntercepts", 57 },
> >> + { "VpSyntheticInterrupts", 58 },
> >> + { "VpVirtualInterrupts", 59 },
> >> + { "VpApicIpisSent", 60 },
> >> + { "VpApicSelfIpisSent", 61 },
> >> + { "VpGpaSpaceHypercalls", 62 },
> >> + { "VpLogicalProcessorHypercalls", 63 },
> >> + { "VpLongSpinWaitHypercalls", 64 },
> >> + { "VpOtherHypercalls", 65 },
> >> + { "VpSyntheticInterruptHypercalls", 66 },
> >> + { "VpVirtualInterruptHypercalls", 67 },
> >> + { "VpVirtualMmuHypercalls", 68 },
> >> + { "VpVirtualProcessorHypercalls", 69 },
> >> + { "VpHardwareInterrupts", 70 },
> >> + { "VpNestedPageFaultInterceptsCount", 71 },
> >> + { "VpNestedPageFaultInterceptsTime", 72 },
> >> + { "VpPageScans", 73 },
> >> + { "VpLogicalProcessorDispatches", 74 },
> >> + { "VpWaitingForCpuTime", 75 },
> >> + { "VpExtendedHypercalls", 76 },
> >> + { "VpExtendedHypercallInterceptMessages", 77 },
> >> + { "VpMbecNestedPageTableSwitches", 78 },
> >> + { "VpOtherReflectedGuestExceptions", 79 },
> >> + { "VpGlobalIoTlbFlushes", 80 },
> >> + { "VpGlobalIoTlbFlushCost", 81 },
> >> + { "VpLocalIoTlbFlushes", 82 },
> >> + { "VpLocalIoTlbFlushCost", 83 },
> >> + { "VpHypercallsForwardedCount", 84 },
> >> + { "VpHypercallsForwardingTime", 85 },
> >> + { "VpPageInvalidationsForwardedCount", 86 },
> >> + { "VpPageInvalidationsForwardingTime", 87 },
> >> + { "VpControlRegisterAccessesForwardedCount", 88 },
> >> + { "VpControlRegisterAccessesForwardingTime", 89 },
> >> + { "VpIoInstructionsForwardedCount", 90 },
> >> + { "VpIoInstructionsForwardingTime", 91 },
> >> + { "VpHltInstructionsForwardedCount", 92 },
> >> + { "VpHltInstructionsForwardingTime", 93 },
> >> + { "VpMwaitInstructionsForwardedCount", 94 },
> >> + { "VpMwaitInstructionsForwardingTime", 95 },
> >> + { "VpCpuidInstructionsForwardedCount", 96 },
> >> + { "VpCpuidInstructionsForwardingTime", 97 },
> >> + { "VpMsrAccessesForwardedCount", 98 },
> >> + { "VpMsrAccessesForwardingTime", 99 },
> >> + { "VpOtherInterceptsForwardedCount", 100 },
> >> + { "VpOtherInterceptsForwardingTime", 101 },
> >> + { "VpExternalInterruptsForwardedCount", 102 },
> >> + { "VpExternalInterruptsForwardingTime", 103 },
> >> + { "VpPendingInterruptsForwardedCount", 104 },
> >> + { "VpPendingInterruptsForwardingTime", 105 },
> >> + { "VpEmulatedInstructionsForwardedCount", 106 },
> >> + { "VpEmulatedInstructionsForwardingTime", 107 },
> >> + { "VpDebugRegisterAccessesForwardedCount", 108 },
> >> + { "VpDebugRegisterAccessesForwardingTime", 109 },
> >> + { "VpPageFaultInterceptsForwardedCount", 110 },
> >> + { "VpPageFaultInterceptsForwardingTime", 111 },
> >> + { "VpVmclearEmulationCount", 112 },
> >> + { "VpVmclearEmulationTime", 113 },
> >> + { "VpVmptrldEmulationCount", 114 },
> >> + { "VpVmptrldEmulationTime", 115 },
> >> + { "VpVmptrstEmulationCount", 116 },
> >> + { "VpVmptrstEmulationTime", 117 },
> >> + { "VpVmreadEmulationCount", 118 },
> >> + { "VpVmreadEmulationTime", 119 },
> >> + { "VpVmwriteEmulationCount", 120 },
> >> + { "VpVmwriteEmulationTime", 121 },
> >> + { "VpVmxoffEmulationCount", 122 },
> >> + { "VpVmxoffEmulationTime", 123 },
> >> + { "VpVmxonEmulationCount", 124 },
> >> + { "VpVmxonEmulationTime", 125 },
> >> + { "VpNestedVMEntriesCount", 126 },
> >> + { "VpNestedVMEntriesTime", 127 },
> >> + { "VpNestedSLATSoftPageFaultsCount", 128 },
> >> + { "VpNestedSLATSoftPageFaultsTime", 129 },
> >> + { "VpNestedSLATHardPageFaultsCount", 130 },
> >> + { "VpNestedSLATHardPageFaultsTime", 131 },
> >> + { "VpInvEptAllContextEmulationCount", 132 },
> >> + { "VpInvEptAllContextEmulationTime", 133 },
> >> + { "VpInvEptSingleContextEmulationCount", 134 },
> >> + { "VpInvEptSingleContextEmulationTime", 135 },
> >> + { "VpInvVpidAllContextEmulationCount", 136 },
> >> + { "VpInvVpidAllContextEmulationTime", 137 },
> >> + { "VpInvVpidSingleContextEmulationCount", 138 },
> >> + { "VpInvVpidSingleContextEmulationTime", 139 },
> >> + { "VpInvVpidSingleAddressEmulationCount", 140 },
> >> + { "VpInvVpidSingleAddressEmulationTime", 141 },
> >> + { "VpNestedTlbPageTableReclamations", 142 },
> >> + { "VpNestedTlbPageTableEvictions", 143 },
> >> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 144 },
> >> + { "VpFlushGuestPhysicalAddressListHypercalls", 145 },
> >> + { "VpPostedInterruptNotifications", 146 },
> >> + { "VpPostedInterruptScans", 147 },
> >> + { "VpTotalCoreRunTime", 148 },
> >> + { "VpMaximumRunTime", 149 },
> >> + { "VpHwpRequestContextSwitches", 150 },
> >> + { "VpWaitingForCpuTimeBucket0", 151 },
> >> + { "VpWaitingForCpuTimeBucket1", 152 },
> >> + { "VpWaitingForCpuTimeBucket2", 153 },
> >> + { "VpWaitingForCpuTimeBucket3", 154 },
> >> + { "VpWaitingForCpuTimeBucket4", 155 },
> >> + { "VpWaitingForCpuTimeBucket5", 156 },
> >> + { "VpWaitingForCpuTimeBucket6", 157 },
> >> + { "VpVmloadEmulationCount", 158 },
> >> + { "VpVmloadEmulationTime", 159 },
> >> + { "VpVmsaveEmulationCount", 160 },
> >> + { "VpVmsaveEmulationTime", 161 },
> >> + { "VpGifInstructionEmulationCount", 162 },
> >> + { "VpGifInstructionEmulationTime", 163 },
> >> + { "VpEmulatedErrataSvmInstructions", 164 },
> >> + { "VpPlaceholder1", 165 },
> >> + { "VpPlaceholder2", 166 },
> >> + { "VpPlaceholder3", 167 },
> >> + { "VpPlaceholder4", 168 },
> >> + { "VpPlaceholder5", 169 },
> >> + { "VpPlaceholder6", 170 },
> >> + { "VpPlaceholder7", 171 },
> >> + { "VpPlaceholder8", 172 },
> >> + { "VpContentionTime", 173 },
> >> + { "VpWakeUpTime", 174 },
> >> + { "VpSchedulingPriority", 175 },
> >> + { "VpRdpmcInstructionsCount", 176 },
> >> + { "VpRdpmcInstructionsTime", 177 },
> >> + { "VpPerfmonPmuMsrAccessesCount", 178 },
> >> + { "VpPerfmonLbrMsrAccessesCount", 179 },
> >> + { "VpPerfmonIptMsrAccessesCount", 180 },
> >> + { "VpPerfmonInterruptCount", 181 },
> >> + { "VpVtl1DispatchCount", 182 },
> >> + { "VpVtl2DispatchCount", 183 },
> >> + { "VpVtl2DispatchBucket0", 184 },
> >> + { "VpVtl2DispatchBucket1", 185 },
> >> + { "VpVtl2DispatchBucket2", 186 },
> >> + { "VpVtl2DispatchBucket3", 187 },
> >> + { "VpVtl2DispatchBucket4", 188 },
> >> + { "VpVtl2DispatchBucket5", 189 },
> >> + { "VpVtl2DispatchBucket6", 190 },
> >> + { "VpVtl1RunTime", 191 },
> >> + { "VpVtl2RunTime", 192 },
> >> + { "VpIommuHypercalls", 193 },
> >> + { "VpCpuGroupHypercalls", 194 },
> >> + { "VpVsmHypercalls", 195 },
> >> + { "VpEventLogHypercalls", 196 },
> >> + { "VpDeviceDomainHypercalls", 197 },
> >> + { "VpDepositHypercalls", 198 },
> >> + { "VpSvmHypercalls", 199 },
> >> + { "VpBusLockAcquisitionCount", 200 },
> >> + { "VpLoadAvg", 201 },
> >> + { "VpRootDispatchThreadBlocked", 202 },
> >> + { "VpIdleCpuTime", 203 },
> >> + { "VpWaitingForCpuTimeBucket7", 204 },
> >> + { "VpWaitingForCpuTimeBucket8", 205 },
> >> + { "VpWaitingForCpuTimeBucket9", 206 },
> >> + { "VpWaitingForCpuTimeBucket10", 207 },
> >> + { "VpWaitingForCpuTimeBucket11", 208 },
> >> + { "VpWaitingForCpuTimeBucket12", 209 },
> >> + { "VpHierarchicalSuspendTime", 210 },
> >> + { "VpExpressSchedulingAttempts", 211 },
> >> + { "VpExpressSchedulingCount", 212 },
> >> + { "VpBusLockAcquisitionTime", 213 },
> >> +#elif IS_ENABLED(CONFIG_ARM64)
> >> + { "VpSysRegAccessesCount", 9 },
> >> + { "VpSysRegAccessesTime", 10 },
> >> + { "VpSmcInstructionsCount", 11 },
> >> + { "VpSmcInstructionsTime", 12 },
> >> + { "VpOtherInterceptsCount", 13 },
> >> + { "VpOtherInterceptsTime", 14 },
> >> + { "VpExternalInterruptsCount", 15 },
> >> + { "VpExternalInterruptsTime", 16 },
> >> + { "VpPendingInterruptsCount", 17 },
> >> + { "VpPendingInterruptsTime", 18 },
> >> + { "VpGuestPageTableMaps", 19 },
> >> + { "VpLargePageTlbFills", 20 },
> >> + { "VpSmallPageTlbFills", 21 },
> >> + { "VpReflectedGuestPageFaults", 22 },
> >> + { "VpMemoryInterceptMessages", 23 },
> >> + { "VpOtherMessages", 24 },
> >> + { "VpLogicalProcessorMigrations", 25 },
> >> + { "VpAddressDomainFlushes", 26 },
> >> + { "VpAddressSpaceFlushes", 27 },
> >> + { "VpSyntheticInterrupts", 28 },
> >> + { "VpVirtualInterrupts", 29 },
> >> + { "VpApicSelfIpisSent", 30 },
> >> + { "VpGpaSpaceHypercalls", 31 },
> >> + { "VpLogicalProcessorHypercalls", 32 },
> >> + { "VpLongSpinWaitHypercalls", 33 },
> >> + { "VpOtherHypercalls", 34 },
> >> + { "VpSyntheticInterruptHypercalls", 35 },
> >> + { "VpVirtualInterruptHypercalls", 36 },
> >> + { "VpVirtualMmuHypercalls", 37 },
> >> + { "VpVirtualProcessorHypercalls", 38 },
> >> + { "VpHardwareInterrupts", 39 },
> >> + { "VpNestedPageFaultInterceptsCount", 40 },
> >> + { "VpNestedPageFaultInterceptsTime", 41 },
> >> + { "VpLogicalProcessorDispatches", 42 },
> >> + { "VpWaitingForCpuTime", 43 },
> >> + { "VpExtendedHypercalls", 44 },
> >> + { "VpExtendedHypercallInterceptMessages", 45 },
> >> + { "VpMbecNestedPageTableSwitches", 46 },
> >> + { "VpOtherReflectedGuestExceptions", 47 },
> >> + { "VpGlobalIoTlbFlushes", 48 },
> >> + { "VpGlobalIoTlbFlushCost", 49 },
> >> + { "VpLocalIoTlbFlushes", 50 },
> >> + { "VpLocalIoTlbFlushCost", 51 },
> >> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 52 },
> >> + { "VpFlushGuestPhysicalAddressListHypercalls", 53 },
> >> + { "VpPostedInterruptNotifications", 54 },
> >> + { "VpPostedInterruptScans", 55 },
> >> + { "VpTotalCoreRunTime", 56 },
> >> + { "VpMaximumRunTime", 57 },
> >> + { "VpWaitingForCpuTimeBucket0", 58 },
> >> + { "VpWaitingForCpuTimeBucket1", 59 },
> >> + { "VpWaitingForCpuTimeBucket2", 60 },
> >> + { "VpWaitingForCpuTimeBucket3", 61 },
> >> + { "VpWaitingForCpuTimeBucket4", 62 },
> >> + { "VpWaitingForCpuTimeBucket5", 63 },
> >> + { "VpWaitingForCpuTimeBucket6", 64 },
> >> + { "VpHwpRequestContextSwitches", 65 },
> >> + { "VpPlaceholder2", 66 },
> >> + { "VpPlaceholder3", 67 },
> >> + { "VpPlaceholder4", 68 },
> >> + { "VpPlaceholder5", 69 },
> >> + { "VpPlaceholder6", 70 },
> >> + { "VpPlaceholder7", 71 },
> >> + { "VpPlaceholder8", 72 },
> >> + { "VpContentionTime", 73 },
> >> + { "VpWakeUpTime", 74 },
> >> + { "VpSchedulingPriority", 75 },
> >> + { "VpVtl1DispatchCount", 76 },
> >> + { "VpVtl2DispatchCount", 77 },
> >> + { "VpVtl2DispatchBucket0", 78 },
> >> + { "VpVtl2DispatchBucket1", 79 },
> >> + { "VpVtl2DispatchBucket2", 80 },
> >> + { "VpVtl2DispatchBucket3", 81 },
> >> + { "VpVtl2DispatchBucket4", 82 },
> >> + { "VpVtl2DispatchBucket5", 83 },
> >> + { "VpVtl2DispatchBucket6", 84 },
> >> + { "VpVtl1RunTime", 85 },
> >> + { "VpVtl2RunTime", 86 },
> >> + { "VpIommuHypercalls", 87 },
> >> + { "VpCpuGroupHypercalls", 88 },
> >> + { "VpVsmHypercalls", 89 },
> >> + { "VpEventLogHypercalls", 90 },
> >> + { "VpDeviceDomainHypercalls", 91 },
> >> + { "VpDepositHypercalls", 92 },
> >> + { "VpSvmHypercalls", 93 },
> >> + { "VpLoadAvg", 94 },
> >> + { "VpRootDispatchThreadBlocked", 95 },
> >> + { "VpIdleCpuTime", 96 },
> >> + { "VpWaitingForCpuTimeBucket7", 97 },
> >> + { "VpWaitingForCpuTimeBucket8", 98 },
> >> + { "VpWaitingForCpuTimeBucket9", 99 },
> >> + { "VpWaitingForCpuTimeBucket10", 100 },
> >> + { "VpWaitingForCpuTimeBucket11", 101 },
> >> + { "VpWaitingForCpuTimeBucket12", 102 },
> >> + { "VpHierarchicalSuspendTime", 103 },
> >> + { "VpExpressSchedulingAttempts", 104 },
> >> + { "VpExpressSchedulingCount", 105 },
> >> +#endif
> >> +};
> >> +
> >
> > The patch puts a blank line at the end of the new hv_counters.c file. When using
> > "git am" to apply this patch, I get this warning:
> >
> > .git/rebase-apply/patch:499: new blank line at EOF.
> > +
> > warning: 1 line adds whitespace errors.
> >
> > Line 499 is that blank line at the end of the new file. If I modify the patch to remove
> > the adding of the blank line, "git am" will apply the patch with no warning. This
> > should probably be fixed.
> >
> Thanks for pointing that out, I'll fix it!
>
> > Michael
^ permalink raw reply
* Re: [PATCH v4 6/7] mshv: Add data for printing stats page counters
From: Stanislav Kinsburskii @ 2026-01-23 22:28 UTC (permalink / raw)
To: Nuno Das Neves
Cc: linux-hyperv, linux-kernel, mhklinux, kys, haiyangz, wei.liu,
decui, longli, prapal, mrathor, paekkaladevi
In-Reply-To: <3ecdc642-75de-4622-a010-dc6edc78137c@linux.microsoft.com>
On Thu, Jan 22, 2026 at 10:21:17AM -0800, Nuno Das Neves wrote:
> On 1/21/2026 5:18 PM, Stanislav Kinsburskii wrote:
> > On Wed, Jan 21, 2026 at 01:46:22PM -0800, Nuno Das Neves wrote:
> >> Introduce hv_counters.c, containing static data corresponding to
> >> HV_*_COUNTER enums in the hypervisor source. Defining the enum
> >> members as an array instead makes more sense, since it will be
> >> iterated over to print counter information to debugfs.
> >>
> >> Include hypervisor, logical processor, partition, and virtual
> >> processor counters.
> >>
> >> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> >> ---
> >> drivers/hv/hv_counters.c | 488 +++++++++++++++++++++++++++++++++++++++
> >> 1 file changed, 488 insertions(+)
> >> create mode 100644 drivers/hv/hv_counters.c
> >>
> >> diff --git a/drivers/hv/hv_counters.c b/drivers/hv/hv_counters.c
> >> new file mode 100644
> >> index 000000000000..a8e07e72cc29
> >> --- /dev/null
> >> +++ b/drivers/hv/hv_counters.c
> >> @@ -0,0 +1,488 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/*
> >> + * Copyright (c) 2026, Microsoft Corporation.
> >> + *
> >> + * Data for printing stats page counters via debugfs.
> >> + *
> >> + * Authors: Microsoft Linux virtualization team
> >> + */
> >> +
> >> +struct hv_counter_entry {
> >> + char *name;
> >> + int idx;
> >> +};
> >
> > This structure looks redundant to me mostly because of the "idx".
> > It looks what you need here is an arry of pointers to strings, like
> > below:
> >
> > static const char *hv_hypervisor_counters[] = {
> > NULL, /* 0 is unused */
> > "HvLogicalProcessors",
> > "HvPartitions",
> > "HvTotalPages",
> > "HvVirtualProcessors",
> > "HvMonitoredNotifications",
> > "HvModernStandbyEntries",
> > "HvPlatformIdleTransitions",
> > "HvHypervisorStartupCost",
> > NULL, /* 9 is unused */
> > "HvIOSpacePages",
> > ...
> > };
> >
> > which can be iterated like this:
> >
> > for (idx = 0; idx < ARRAY_SIZE(hv_hypervisor_counters); idx++) {
> > const char *name = hv_hypervisor_counters[idx];
> > if (!name)
> > continue;
> > /* print */
> > ...
> > }
> >
> > What do you think?
>
> It's an elegant option, given the values are almost uniformly
> tightly packed. It also saves a fair bit of space - around 2.5Kb.
>
> For my taste, I do like being able to visually verify the
> correctness of any given member. That way whenever I look at it, I
> don't have to blindly trust that the list was previously set up
> correctly, or count the lines to check if a given value is correct.
> Not a big deal, but it does introduce some friction.
>
> We could also use a designated initializer list:
>
> static const char *hv_hypervisor_counters[] = {
> [1] = "HvLogicalProcessors",
> [2] = "HvPartitions",
> [3] = "HvTotalPages",
> [4] = "HvVirtualProcessors",
> [5] = "HvMonitoredNotifications",
> [6] = "HvModernStandbyEntries",
> [7] = "HvPlatformIdleTransitions",
> [8] = "HvHypervisorStartupCost",
>
> [10] = "HvIOSpacePages",
> ...
> };
>
> The indices are explicit, so it's easy to visually verify that any
> particular part of the list is correct. It's functionally identical
> to your approach, so it saves the same amount of space, and the
> explicit NULLs are unnecessary so it's more straightforward to
> transform from the Windows source in case of any gaps that are
> harder to notice later on in the list.
>
> How does that sound?
>
Fine by me.
Thanks,
Stanislav
> Nuno
>
> >
> > Thanks,
> > Stanislav
> >
> >> +
> >> +/* HV_HYPERVISOR_COUNTER */
> >> +static struct hv_counter_entry hv_hypervisor_counters[] = {
> >> + { "HvLogicalProcessors", 1 },
> >> + { "HvPartitions", 2 },
> >> + { "HvTotalPages", 3 },
> >> + { "HvVirtualProcessors", 4 },
> >> + { "HvMonitoredNotifications", 5 },
> >> + { "HvModernStandbyEntries", 6 },
> >> + { "HvPlatformIdleTransitions", 7 },
> >> + { "HvHypervisorStartupCost", 8 },
> >> +
> >> + { "HvIOSpacePages", 10 },
> >> + { "HvNonEssentialPagesForDump", 11 },
> >> + { "HvSubsumedPages", 12 },
> >> +};
> >> +
> >> +/* HV_CPU_COUNTER */
> >> +static struct hv_counter_entry hv_lp_counters[] = {
> >> + { "LpGlobalTime", 1 },
> >> + { "LpTotalRunTime", 2 },
> >> + { "LpHypervisorRunTime", 3 },
> >> + { "LpHardwareInterrupts", 4 },
> >> + { "LpContextSwitches", 5 },
> >> + { "LpInterProcessorInterrupts", 6 },
> >> + { "LpSchedulerInterrupts", 7 },
> >> + { "LpTimerInterrupts", 8 },
> >> + { "LpInterProcessorInterruptsSent", 9 },
> >> + { "LpProcessorHalts", 10 },
> >> + { "LpMonitorTransitionCost", 11 },
> >> + { "LpContextSwitchTime", 12 },
> >> + { "LpC1TransitionsCount", 13 },
> >> + { "LpC1RunTime", 14 },
> >> + { "LpC2TransitionsCount", 15 },
> >> + { "LpC2RunTime", 16 },
> >> + { "LpC3TransitionsCount", 17 },
> >> + { "LpC3RunTime", 18 },
> >> + { "LpRootVpIndex", 19 },
> >> + { "LpIdleSequenceNumber", 20 },
> >> + { "LpGlobalTscCount", 21 },
> >> + { "LpActiveTscCount", 22 },
> >> + { "LpIdleAccumulation", 23 },
> >> + { "LpReferenceCycleCount0", 24 },
> >> + { "LpActualCycleCount0", 25 },
> >> + { "LpReferenceCycleCount1", 26 },
> >> + { "LpActualCycleCount1", 27 },
> >> + { "LpProximityDomainId", 28 },
> >> + { "LpPostedInterruptNotifications", 29 },
> >> + { "LpBranchPredictorFlushes", 30 },
> >> +#if IS_ENABLED(CONFIG_X86_64)
> >> + { "LpL1DataCacheFlushes", 31 },
> >> + { "LpImmediateL1DataCacheFlushes", 32 },
> >> + { "LpMbFlushes", 33 },
> >> + { "LpCounterRefreshSequenceNumber", 34 },
> >> + { "LpCounterRefreshReferenceTime", 35 },
> >> + { "LpIdleAccumulationSnapshot", 36 },
> >> + { "LpActiveTscCountSnapshot", 37 },
> >> + { "LpHwpRequestContextSwitches", 38 },
> >> + { "LpPlaceholder1", 39 },
> >> + { "LpPlaceholder2", 40 },
> >> + { "LpPlaceholder3", 41 },
> >> + { "LpPlaceholder4", 42 },
> >> + { "LpPlaceholder5", 43 },
> >> + { "LpPlaceholder6", 44 },
> >> + { "LpPlaceholder7", 45 },
> >> + { "LpPlaceholder8", 46 },
> >> + { "LpPlaceholder9", 47 },
> >> + { "LpSchLocalRunListSize", 48 },
> >> + { "LpReserveGroupId", 49 },
> >> + { "LpRunningPriority", 50 },
> >> + { "LpPerfmonInterruptCount", 51 },
> >> +#elif IS_ENABLED(CONFIG_ARM64)
> >> + { "LpCounterRefreshSequenceNumber", 31 },
> >> + { "LpCounterRefreshReferenceTime", 32 },
> >> + { "LpIdleAccumulationSnapshot", 33 },
> >> + { "LpActiveTscCountSnapshot", 34 },
> >> + { "LpHwpRequestContextSwitches", 35 },
> >> + { "LpPlaceholder2", 36 },
> >> + { "LpPlaceholder3", 37 },
> >> + { "LpPlaceholder4", 38 },
> >> + { "LpPlaceholder5", 39 },
> >> + { "LpPlaceholder6", 40 },
> >> + { "LpPlaceholder7", 41 },
> >> + { "LpPlaceholder8", 42 },
> >> + { "LpPlaceholder9", 43 },
> >> + { "LpSchLocalRunListSize", 44 },
> >> + { "LpReserveGroupId", 45 },
> >> + { "LpRunningPriority", 46 },
> >> +#endif
> >> +};
> >> +
> >> +/* HV_PROCESS_COUNTER */
> >> +static struct hv_counter_entry hv_partition_counters[] = {
> >> + { "PtVirtualProcessors", 1 },
> >> +
> >> + { "PtTlbSize", 3 },
> >> + { "PtAddressSpaces", 4 },
> >> + { "PtDepositedPages", 5 },
> >> + { "PtGpaPages", 6 },
> >> + { "PtGpaSpaceModifications", 7 },
> >> + { "PtVirtualTlbFlushEntires", 8 },
> >> + { "PtRecommendedTlbSize", 9 },
> >> + { "PtGpaPages4K", 10 },
> >> + { "PtGpaPages2M", 11 },
> >> + { "PtGpaPages1G", 12 },
> >> + { "PtGpaPages512G", 13 },
> >> + { "PtDevicePages4K", 14 },
> >> + { "PtDevicePages2M", 15 },
> >> + { "PtDevicePages1G", 16 },
> >> + { "PtDevicePages512G", 17 },
> >> + { "PtAttachedDevices", 18 },
> >> + { "PtDeviceInterruptMappings", 19 },
> >> + { "PtIoTlbFlushes", 20 },
> >> + { "PtIoTlbFlushCost", 21 },
> >> + { "PtDeviceInterruptErrors", 22 },
> >> + { "PtDeviceDmaErrors", 23 },
> >> + { "PtDeviceInterruptThrottleEvents", 24 },
> >> + { "PtSkippedTimerTicks", 25 },
> >> + { "PtPartitionId", 26 },
> >> +#if IS_ENABLED(CONFIG_X86_64)
> >> + { "PtNestedTlbSize", 27 },
> >> + { "PtRecommendedNestedTlbSize", 28 },
> >> + { "PtNestedTlbFreeListSize", 29 },
> >> + { "PtNestedTlbTrimmedPages", 30 },
> >> + { "PtPagesShattered", 31 },
> >> + { "PtPagesRecombined", 32 },
> >> + { "PtHwpRequestValue", 33 },
> >> + { "PtAutoSuspendEnableTime", 34 },
> >> + { "PtAutoSuspendTriggerTime", 35 },
> >> + { "PtAutoSuspendDisableTime", 36 },
> >> + { "PtPlaceholder1", 37 },
> >> + { "PtPlaceholder2", 38 },
> >> + { "PtPlaceholder3", 39 },
> >> + { "PtPlaceholder4", 40 },
> >> + { "PtPlaceholder5", 41 },
> >> + { "PtPlaceholder6", 42 },
> >> + { "PtPlaceholder7", 43 },
> >> + { "PtPlaceholder8", 44 },
> >> + { "PtHypervisorStateTransferGeneration", 45 },
> >> + { "PtNumberofActiveChildPartitions", 46 },
> >> +#elif IS_ENABLED(CONFIG_ARM64)
> >> + { "PtHwpRequestValue", 27 },
> >> + { "PtAutoSuspendEnableTime", 28 },
> >> + { "PtAutoSuspendTriggerTime", 29 },
> >> + { "PtAutoSuspendDisableTime", 30 },
> >> + { "PtPlaceholder1", 31 },
> >> + { "PtPlaceholder2", 32 },
> >> + { "PtPlaceholder3", 33 },
> >> + { "PtPlaceholder4", 34 },
> >> + { "PtPlaceholder5", 35 },
> >> + { "PtPlaceholder6", 36 },
> >> + { "PtPlaceholder7", 37 },
> >> + { "PtPlaceholder8", 38 },
> >> + { "PtHypervisorStateTransferGeneration", 39 },
> >> + { "PtNumberofActiveChildPartitions", 40 },
> >> +#endif
> >> +};
> >> +
> >> +/* HV_THREAD_COUNTER */
> >> +static struct hv_counter_entry hv_vp_counters[] = {
> >> + { "VpTotalRunTime", 1 },
> >> + { "VpHypervisorRunTime", 2 },
> >> + { "VpRemoteNodeRunTime", 3 },
> >> + { "VpNormalizedRunTime", 4 },
> >> + { "VpIdealCpu", 5 },
> >> +
> >> + { "VpHypercallsCount", 7 },
> >> + { "VpHypercallsTime", 8 },
> >> +#if IS_ENABLED(CONFIG_X86_64)
> >> + { "VpPageInvalidationsCount", 9 },
> >> + { "VpPageInvalidationsTime", 10 },
> >> + { "VpControlRegisterAccessesCount", 11 },
> >> + { "VpControlRegisterAccessesTime", 12 },
> >> + { "VpIoInstructionsCount", 13 },
> >> + { "VpIoInstructionsTime", 14 },
> >> + { "VpHltInstructionsCount", 15 },
> >> + { "VpHltInstructionsTime", 16 },
> >> + { "VpMwaitInstructionsCount", 17 },
> >> + { "VpMwaitInstructionsTime", 18 },
> >> + { "VpCpuidInstructionsCount", 19 },
> >> + { "VpCpuidInstructionsTime", 20 },
> >> + { "VpMsrAccessesCount", 21 },
> >> + { "VpMsrAccessesTime", 22 },
> >> + { "VpOtherInterceptsCount", 23 },
> >> + { "VpOtherInterceptsTime", 24 },
> >> + { "VpExternalInterruptsCount", 25 },
> >> + { "VpExternalInterruptsTime", 26 },
> >> + { "VpPendingInterruptsCount", 27 },
> >> + { "VpPendingInterruptsTime", 28 },
> >> + { "VpEmulatedInstructionsCount", 29 },
> >> + { "VpEmulatedInstructionsTime", 30 },
> >> + { "VpDebugRegisterAccessesCount", 31 },
> >> + { "VpDebugRegisterAccessesTime", 32 },
> >> + { "VpPageFaultInterceptsCount", 33 },
> >> + { "VpPageFaultInterceptsTime", 34 },
> >> + { "VpGuestPageTableMaps", 35 },
> >> + { "VpLargePageTlbFills", 36 },
> >> + { "VpSmallPageTlbFills", 37 },
> >> + { "VpReflectedGuestPageFaults", 38 },
> >> + { "VpApicMmioAccesses", 39 },
> >> + { "VpIoInterceptMessages", 40 },
> >> + { "VpMemoryInterceptMessages", 41 },
> >> + { "VpApicEoiAccesses", 42 },
> >> + { "VpOtherMessages", 43 },
> >> + { "VpPageTableAllocations", 44 },
> >> + { "VpLogicalProcessorMigrations", 45 },
> >> + { "VpAddressSpaceEvictions", 46 },
> >> + { "VpAddressSpaceSwitches", 47 },
> >> + { "VpAddressDomainFlushes", 48 },
> >> + { "VpAddressSpaceFlushes", 49 },
> >> + { "VpGlobalGvaRangeFlushes", 50 },
> >> + { "VpLocalGvaRangeFlushes", 51 },
> >> + { "VpPageTableEvictions", 52 },
> >> + { "VpPageTableReclamations", 53 },
> >> + { "VpPageTableResets", 54 },
> >> + { "VpPageTableValidations", 55 },
> >> + { "VpApicTprAccesses", 56 },
> >> + { "VpPageTableWriteIntercepts", 57 },
> >> + { "VpSyntheticInterrupts", 58 },
> >> + { "VpVirtualInterrupts", 59 },
> >> + { "VpApicIpisSent", 60 },
> >> + { "VpApicSelfIpisSent", 61 },
> >> + { "VpGpaSpaceHypercalls", 62 },
> >> + { "VpLogicalProcessorHypercalls", 63 },
> >> + { "VpLongSpinWaitHypercalls", 64 },
> >> + { "VpOtherHypercalls", 65 },
> >> + { "VpSyntheticInterruptHypercalls", 66 },
> >> + { "VpVirtualInterruptHypercalls", 67 },
> >> + { "VpVirtualMmuHypercalls", 68 },
> >> + { "VpVirtualProcessorHypercalls", 69 },
> >> + { "VpHardwareInterrupts", 70 },
> >> + { "VpNestedPageFaultInterceptsCount", 71 },
> >> + { "VpNestedPageFaultInterceptsTime", 72 },
> >> + { "VpPageScans", 73 },
> >> + { "VpLogicalProcessorDispatches", 74 },
> >> + { "VpWaitingForCpuTime", 75 },
> >> + { "VpExtendedHypercalls", 76 },
> >> + { "VpExtendedHypercallInterceptMessages", 77 },
> >> + { "VpMbecNestedPageTableSwitches", 78 },
> >> + { "VpOtherReflectedGuestExceptions", 79 },
> >> + { "VpGlobalIoTlbFlushes", 80 },
> >> + { "VpGlobalIoTlbFlushCost", 81 },
> >> + { "VpLocalIoTlbFlushes", 82 },
> >> + { "VpLocalIoTlbFlushCost", 83 },
> >> + { "VpHypercallsForwardedCount", 84 },
> >> + { "VpHypercallsForwardingTime", 85 },
> >> + { "VpPageInvalidationsForwardedCount", 86 },
> >> + { "VpPageInvalidationsForwardingTime", 87 },
> >> + { "VpControlRegisterAccessesForwardedCount", 88 },
> >> + { "VpControlRegisterAccessesForwardingTime", 89 },
> >> + { "VpIoInstructionsForwardedCount", 90 },
> >> + { "VpIoInstructionsForwardingTime", 91 },
> >> + { "VpHltInstructionsForwardedCount", 92 },
> >> + { "VpHltInstructionsForwardingTime", 93 },
> >> + { "VpMwaitInstructionsForwardedCount", 94 },
> >> + { "VpMwaitInstructionsForwardingTime", 95 },
> >> + { "VpCpuidInstructionsForwardedCount", 96 },
> >> + { "VpCpuidInstructionsForwardingTime", 97 },
> >> + { "VpMsrAccessesForwardedCount", 98 },
> >> + { "VpMsrAccessesForwardingTime", 99 },
> >> + { "VpOtherInterceptsForwardedCount", 100 },
> >> + { "VpOtherInterceptsForwardingTime", 101 },
> >> + { "VpExternalInterruptsForwardedCount", 102 },
> >> + { "VpExternalInterruptsForwardingTime", 103 },
> >> + { "VpPendingInterruptsForwardedCount", 104 },
> >> + { "VpPendingInterruptsForwardingTime", 105 },
> >> + { "VpEmulatedInstructionsForwardedCount", 106 },
> >> + { "VpEmulatedInstructionsForwardingTime", 107 },
> >> + { "VpDebugRegisterAccessesForwardedCount", 108 },
> >> + { "VpDebugRegisterAccessesForwardingTime", 109 },
> >> + { "VpPageFaultInterceptsForwardedCount", 110 },
> >> + { "VpPageFaultInterceptsForwardingTime", 111 },
> >> + { "VpVmclearEmulationCount", 112 },
> >> + { "VpVmclearEmulationTime", 113 },
> >> + { "VpVmptrldEmulationCount", 114 },
> >> + { "VpVmptrldEmulationTime", 115 },
> >> + { "VpVmptrstEmulationCount", 116 },
> >> + { "VpVmptrstEmulationTime", 117 },
> >> + { "VpVmreadEmulationCount", 118 },
> >> + { "VpVmreadEmulationTime", 119 },
> >> + { "VpVmwriteEmulationCount", 120 },
> >> + { "VpVmwriteEmulationTime", 121 },
> >> + { "VpVmxoffEmulationCount", 122 },
> >> + { "VpVmxoffEmulationTime", 123 },
> >> + { "VpVmxonEmulationCount", 124 },
> >> + { "VpVmxonEmulationTime", 125 },
> >> + { "VpNestedVMEntriesCount", 126 },
> >> + { "VpNestedVMEntriesTime", 127 },
> >> + { "VpNestedSLATSoftPageFaultsCount", 128 },
> >> + { "VpNestedSLATSoftPageFaultsTime", 129 },
> >> + { "VpNestedSLATHardPageFaultsCount", 130 },
> >> + { "VpNestedSLATHardPageFaultsTime", 131 },
> >> + { "VpInvEptAllContextEmulationCount", 132 },
> >> + { "VpInvEptAllContextEmulationTime", 133 },
> >> + { "VpInvEptSingleContextEmulationCount", 134 },
> >> + { "VpInvEptSingleContextEmulationTime", 135 },
> >> + { "VpInvVpidAllContextEmulationCount", 136 },
> >> + { "VpInvVpidAllContextEmulationTime", 137 },
> >> + { "VpInvVpidSingleContextEmulationCount", 138 },
> >> + { "VpInvVpidSingleContextEmulationTime", 139 },
> >> + { "VpInvVpidSingleAddressEmulationCount", 140 },
> >> + { "VpInvVpidSingleAddressEmulationTime", 141 },
> >> + { "VpNestedTlbPageTableReclamations", 142 },
> >> + { "VpNestedTlbPageTableEvictions", 143 },
> >> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 144 },
> >> + { "VpFlushGuestPhysicalAddressListHypercalls", 145 },
> >> + { "VpPostedInterruptNotifications", 146 },
> >> + { "VpPostedInterruptScans", 147 },
> >> + { "VpTotalCoreRunTime", 148 },
> >> + { "VpMaximumRunTime", 149 },
> >> + { "VpHwpRequestContextSwitches", 150 },
> >> + { "VpWaitingForCpuTimeBucket0", 151 },
> >> + { "VpWaitingForCpuTimeBucket1", 152 },
> >> + { "VpWaitingForCpuTimeBucket2", 153 },
> >> + { "VpWaitingForCpuTimeBucket3", 154 },
> >> + { "VpWaitingForCpuTimeBucket4", 155 },
> >> + { "VpWaitingForCpuTimeBucket5", 156 },
> >> + { "VpWaitingForCpuTimeBucket6", 157 },
> >> + { "VpVmloadEmulationCount", 158 },
> >> + { "VpVmloadEmulationTime", 159 },
> >> + { "VpVmsaveEmulationCount", 160 },
> >> + { "VpVmsaveEmulationTime", 161 },
> >> + { "VpGifInstructionEmulationCount", 162 },
> >> + { "VpGifInstructionEmulationTime", 163 },
> >> + { "VpEmulatedErrataSvmInstructions", 164 },
> >> + { "VpPlaceholder1", 165 },
> >> + { "VpPlaceholder2", 166 },
> >> + { "VpPlaceholder3", 167 },
> >> + { "VpPlaceholder4", 168 },
> >> + { "VpPlaceholder5", 169 },
> >> + { "VpPlaceholder6", 170 },
> >> + { "VpPlaceholder7", 171 },
> >> + { "VpPlaceholder8", 172 },
> >> + { "VpContentionTime", 173 },
> >> + { "VpWakeUpTime", 174 },
> >> + { "VpSchedulingPriority", 175 },
> >> + { "VpRdpmcInstructionsCount", 176 },
> >> + { "VpRdpmcInstructionsTime", 177 },
> >> + { "VpPerfmonPmuMsrAccessesCount", 178 },
> >> + { "VpPerfmonLbrMsrAccessesCount", 179 },
> >> + { "VpPerfmonIptMsrAccessesCount", 180 },
> >> + { "VpPerfmonInterruptCount", 181 },
> >> + { "VpVtl1DispatchCount", 182 },
> >> + { "VpVtl2DispatchCount", 183 },
> >> + { "VpVtl2DispatchBucket0", 184 },
> >> + { "VpVtl2DispatchBucket1", 185 },
> >> + { "VpVtl2DispatchBucket2", 186 },
> >> + { "VpVtl2DispatchBucket3", 187 },
> >> + { "VpVtl2DispatchBucket4", 188 },
> >> + { "VpVtl2DispatchBucket5", 189 },
> >> + { "VpVtl2DispatchBucket6", 190 },
> >> + { "VpVtl1RunTime", 191 },
> >> + { "VpVtl2RunTime", 192 },
> >> + { "VpIommuHypercalls", 193 },
> >> + { "VpCpuGroupHypercalls", 194 },
> >> + { "VpVsmHypercalls", 195 },
> >> + { "VpEventLogHypercalls", 196 },
> >> + { "VpDeviceDomainHypercalls", 197 },
> >> + { "VpDepositHypercalls", 198 },
> >> + { "VpSvmHypercalls", 199 },
> >> + { "VpBusLockAcquisitionCount", 200 },
> >> + { "VpLoadAvg", 201 },
> >> + { "VpRootDispatchThreadBlocked", 202 },
> >> + { "VpIdleCpuTime", 203 },
> >> + { "VpWaitingForCpuTimeBucket7", 204 },
> >> + { "VpWaitingForCpuTimeBucket8", 205 },
> >> + { "VpWaitingForCpuTimeBucket9", 206 },
> >> + { "VpWaitingForCpuTimeBucket10", 207 },
> >> + { "VpWaitingForCpuTimeBucket11", 208 },
> >> + { "VpWaitingForCpuTimeBucket12", 209 },
> >> + { "VpHierarchicalSuspendTime", 210 },
> >> + { "VpExpressSchedulingAttempts", 211 },
> >> + { "VpExpressSchedulingCount", 212 },
> >> + { "VpBusLockAcquisitionTime", 213 },
> >> +#elif IS_ENABLED(CONFIG_ARM64)
> >> + { "VpSysRegAccessesCount", 9 },
> >> + { "VpSysRegAccessesTime", 10 },
> >> + { "VpSmcInstructionsCount", 11 },
> >> + { "VpSmcInstructionsTime", 12 },
> >> + { "VpOtherInterceptsCount", 13 },
> >> + { "VpOtherInterceptsTime", 14 },
> >> + { "VpExternalInterruptsCount", 15 },
> >> + { "VpExternalInterruptsTime", 16 },
> >> + { "VpPendingInterruptsCount", 17 },
> >> + { "VpPendingInterruptsTime", 18 },
> >> + { "VpGuestPageTableMaps", 19 },
> >> + { "VpLargePageTlbFills", 20 },
> >> + { "VpSmallPageTlbFills", 21 },
> >> + { "VpReflectedGuestPageFaults", 22 },
> >> + { "VpMemoryInterceptMessages", 23 },
> >> + { "VpOtherMessages", 24 },
> >> + { "VpLogicalProcessorMigrations", 25 },
> >> + { "VpAddressDomainFlushes", 26 },
> >> + { "VpAddressSpaceFlushes", 27 },
> >> + { "VpSyntheticInterrupts", 28 },
> >> + { "VpVirtualInterrupts", 29 },
> >> + { "VpApicSelfIpisSent", 30 },
> >> + { "VpGpaSpaceHypercalls", 31 },
> >> + { "VpLogicalProcessorHypercalls", 32 },
> >> + { "VpLongSpinWaitHypercalls", 33 },
> >> + { "VpOtherHypercalls", 34 },
> >> + { "VpSyntheticInterruptHypercalls", 35 },
> >> + { "VpVirtualInterruptHypercalls", 36 },
> >> + { "VpVirtualMmuHypercalls", 37 },
> >> + { "VpVirtualProcessorHypercalls", 38 },
> >> + { "VpHardwareInterrupts", 39 },
> >> + { "VpNestedPageFaultInterceptsCount", 40 },
> >> + { "VpNestedPageFaultInterceptsTime", 41 },
> >> + { "VpLogicalProcessorDispatches", 42 },
> >> + { "VpWaitingForCpuTime", 43 },
> >> + { "VpExtendedHypercalls", 44 },
> >> + { "VpExtendedHypercallInterceptMessages", 45 },
> >> + { "VpMbecNestedPageTableSwitches", 46 },
> >> + { "VpOtherReflectedGuestExceptions", 47 },
> >> + { "VpGlobalIoTlbFlushes", 48 },
> >> + { "VpGlobalIoTlbFlushCost", 49 },
> >> + { "VpLocalIoTlbFlushes", 50 },
> >> + { "VpLocalIoTlbFlushCost", 51 },
> >> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 52 },
> >> + { "VpFlushGuestPhysicalAddressListHypercalls", 53 },
> >> + { "VpPostedInterruptNotifications", 54 },
> >> + { "VpPostedInterruptScans", 55 },
> >> + { "VpTotalCoreRunTime", 56 },
> >> + { "VpMaximumRunTime", 57 },
> >> + { "VpWaitingForCpuTimeBucket0", 58 },
> >> + { "VpWaitingForCpuTimeBucket1", 59 },
> >> + { "VpWaitingForCpuTimeBucket2", 60 },
> >> + { "VpWaitingForCpuTimeBucket3", 61 },
> >> + { "VpWaitingForCpuTimeBucket4", 62 },
> >> + { "VpWaitingForCpuTimeBucket5", 63 },
> >> + { "VpWaitingForCpuTimeBucket6", 64 },
> >> + { "VpHwpRequestContextSwitches", 65 },
> >> + { "VpPlaceholder2", 66 },
> >> + { "VpPlaceholder3", 67 },
> >> + { "VpPlaceholder4", 68 },
> >> + { "VpPlaceholder5", 69 },
> >> + { "VpPlaceholder6", 70 },
> >> + { "VpPlaceholder7", 71 },
> >> + { "VpPlaceholder8", 72 },
> >> + { "VpContentionTime", 73 },
> >> + { "VpWakeUpTime", 74 },
> >> + { "VpSchedulingPriority", 75 },
> >> + { "VpVtl1DispatchCount", 76 },
> >> + { "VpVtl2DispatchCount", 77 },
> >> + { "VpVtl2DispatchBucket0", 78 },
> >> + { "VpVtl2DispatchBucket1", 79 },
> >> + { "VpVtl2DispatchBucket2", 80 },
> >> + { "VpVtl2DispatchBucket3", 81 },
> >> + { "VpVtl2DispatchBucket4", 82 },
> >> + { "VpVtl2DispatchBucket5", 83 },
> >> + { "VpVtl2DispatchBucket6", 84 },
> >> + { "VpVtl1RunTime", 85 },
> >> + { "VpVtl2RunTime", 86 },
> >> + { "VpIommuHypercalls", 87 },
> >> + { "VpCpuGroupHypercalls", 88 },
> >> + { "VpVsmHypercalls", 89 },
> >> + { "VpEventLogHypercalls", 90 },
> >> + { "VpDeviceDomainHypercalls", 91 },
> >> + { "VpDepositHypercalls", 92 },
> >> + { "VpSvmHypercalls", 93 },
> >> + { "VpLoadAvg", 94 },
> >> + { "VpRootDispatchThreadBlocked", 95 },
> >> + { "VpIdleCpuTime", 96 },
> >> + { "VpWaitingForCpuTimeBucket7", 97 },
> >> + { "VpWaitingForCpuTimeBucket8", 98 },
> >> + { "VpWaitingForCpuTimeBucket9", 99 },
> >> + { "VpWaitingForCpuTimeBucket10", 100 },
> >> + { "VpWaitingForCpuTimeBucket11", 101 },
> >> + { "VpWaitingForCpuTimeBucket12", 102 },
> >> + { "VpHierarchicalSuspendTime", 103 },
> >> + { "VpExpressSchedulingAttempts", 104 },
> >> + { "VpExpressSchedulingCount", 105 },
> >> +#endif
> >> +};
> >> +
> >> --
> >> 2.34.1
^ permalink raw reply
* [PATCH] mshv: Make MSHV mutually exclusive with KEXEC
From: Stanislav Kinsburskii @ 2026-01-23 22:20 UTC (permalink / raw)
To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
The MSHV driver deposits kernel-allocated pages to the hypervisor during
runtime and never withdraws them. This creates a fundamental incompatibility
with KEXEC, as these deposited pages remain unavailable to the new kernel
loaded via KEXEC, leading to potential system crashes upon kernel accessing
hypervisor deposited pages.
Make MSHV mutually exclusive with KEXEC until proper page lifecycle
management is implemented.
Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
drivers/hv/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 7937ac0cbd0f..cfd4501db0fa 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -74,6 +74,7 @@ config MSHV_ROOT
# e.g. When withdrawing memory, the hypervisor gives back 4k pages in
# no particular order, making it impossible to reassemble larger pages
depends on PAGE_SIZE_4KB
+ depends on !KEXEC
select EVENTFD
select VIRT_XFER_TO_GUEST_WORK
select HMM_MIRROR
^ permalink raw reply related
* Re: [PATCH v4 7/7] mshv: Add debugfs to view hypervisor statistics
From: Nuno Das Neves @ 2026-01-23 21:11 UTC (permalink / raw)
To: Michael Kelley, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, skinsburskii@linux.microsoft.com
Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, longli@microsoft.com,
prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
paekkaladevi@linux.microsoft.com, Jinank Jain
In-Reply-To: <SN6PR02MB415765A8221B8F6270ACEF1DD494A@SN6PR02MB4157.namprd02.prod.outlook.com>
On 1/23/2026 9:09 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
>>
>> Introduce a debugfs interface to expose root and child partition stats
>> when running with mshv_root.
>>
>> Create a debugfs directory "mshv" containing 'stats' files organized by
>> type and id. A stats file contains a number of counters depending on
>> its type. e.g. an excerpt from a VP stats file:
>>
>> TotalRunTime : 1997602722
>> HypervisorRunTime : 649671371
>> RemoteNodeRunTime : 0
>> NormalizedRunTime : 1997602721
>> IdealCpu : 0
>> HypercallsCount : 1708169
>> HypercallsTime : 111914774
>> PageInvalidationsCount : 0
>> PageInvalidationsTime : 0
>>
>> On a root partition with some active child partitions, the entire
>> directory structure may look like:
>>
>> mshv/
>> stats # hypervisor stats
>> lp/ # logical processors
>> 0/ # LP id
>> stats # LP 0 stats
>> 1/
>> 2/
>> 3/
>> partition/ # partition stats
>> 1/ # root partition id
>> stats # root partition stats
>> vp/ # root virtual processors
>> 0/ # root VP id
>> stats # root VP 0 stats
>> 1/
>> 2/
>> 3/
>> 42/ # child partition id
>> stats # child partition stats
>> vp/ # child VPs
>> 0/ # child VP id
>> stats # child VP 0 stats
>> 1/
>> 43/
>> 55/
>>
>> On L1VH, some stats are not present as it does not own the hardware
>> like the root partition does:
>> - The hypervisor and lp stats are not present
>> - L1VH's partition directory is named "self" because it can't get its
>> own id
>> - Some of L1VH's partition and VP stats fields are not populated, because
>> it can't map its own HV_STATS_AREA_PARENT page.
>>
>> Co-developed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
>> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
>> Co-developed-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>> Co-developed-by: Mukesh Rathor <mrathor@linux.microsoft.com>
>> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
>> Co-developed-by: Purna Pavan Chandra Aekkaladevi
>> <paekkaladevi@linux.microsoft.com>
>> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
>> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
>> ---
>> drivers/hv/Makefile | 1 +
>> drivers/hv/hv_counters.c | 1 +
>> drivers/hv/hv_synic.c | 177 +++++++++
>
> This new file hv_synic.c seems to be spurious. It looks like you unintentionally
> picked up this new file from the build tree where you were creating the patches
> for this series.
>
Oh, that's embarrassing! Yes, it's a half-baked, unrelated work-in-progress...
Please ignore!
<snip>
>> diff --git a/drivers/hv/mshv_debugfs.c b/drivers/hv/mshv_debugfs.c
>> new file mode 100644
>> index 000000000000..72eb0ae44e4b
>> --- /dev/null
>> +++ b/drivers/hv/mshv_debugfs.c
>> @@ -0,0 +1,703 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2026, Microsoft Corporation.
>> + *
>> + * The /sys/kernel/debug/mshv directory contents.
>> + * Contains various statistics data, provided by the hypervisor.
>> + *
>> + * Authors: Microsoft Linux virtualization team
>> + */
>> +
>> +#include <linux/debugfs.h>
>> +#include <linux/stringify.h>
>> +#include <asm/mshyperv.h>
>> +#include <linux/slab.h>
>> +
>> +#include "mshv.h"
>> +#include "mshv_root.h"
>> +
>> +#include "hv_counters.c"
>> +
>> +#define U32_BUF_SZ 11
>> +#define U64_BUF_SZ 21
>> +#define NUM_STATS_AREAS (HV_STATS_AREA_PARENT + 1)
>
> This is sort of weak in that it doesn't really guard against
> changes in the enum that defines HV_STATS_AREA_PARENT.
> It would work if it were defined as part of the enum, but then
> you are changing the code coming from the Windows world,
> which I know is a different problem.
>
> The enum is part of the hypervisor ABI and hence isn't likely to
> change, but it still feels funny to define NUM_STATS_AREAS like
> this. I would suggest dropping this and just using
> HV_STATS_AREA_COUNT for the memory allocations even
> though doing so will allocate space for a stats area pointer
> that isn't used by this code. It's only a few bytes.
>
That would work, but then I'd want to have a comment explaining
that the decision is intentional, otherwise I think it's just as
confusing to have unexplained wasted space.
Alternatively, the usage of SELF and PARENT (but not INTERNAL)
could be made explicit by a compile-time check, and a comment to
clarify:
/* Only support SELF and PARENT areas */
#define NUM_STATS_AREAS 2
static_assert(HV_STATS_AREA_SELF == 0 && HV_STATS_AREA_PARENT == 1,
"SELF and PARENT areas must be usable as indices into an array of size NUM_STATS_AREAS")
>> +
>> +static struct dentry *mshv_debugfs;
>> +static struct dentry *mshv_debugfs_partition;
>> +static struct dentry *mshv_debugfs_lp;
>> +static struct dentry **parent_vp_stats;
>> +static struct dentry *parent_partition_stats;
>> +
>> +static u64 mshv_lps_count;
>> +static struct hv_stats_page **mshv_lps_stats;
>> +
>> +static int lp_stats_show(struct seq_file *m, void *v)
>> +{
>> + const struct hv_stats_page *stats = m->private;
>> + struct hv_counter_entry *entry = hv_lp_counters;
>> + int i;
>> +
>> + for (i = 0; i < ARRAY_SIZE(hv_lp_counters); i++, entry++)
>> + seq_printf(m, "%-29s: %llu\n", entry->name,
>> + stats->data[entry->idx]);
>> +
>> + return 0;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(lp_stats);
>> +
>> +static void mshv_lp_stats_unmap(u32 lp_index)
>> +{
>> + union hv_stats_object_identity identity = {
>> + .lp.lp_index = lp_index,
>> + .lp.stats_area_type = HV_STATS_AREA_SELF,
>> + };
>> + int err;
>> +
>> + err = hv_unmap_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR,
>> + mshv_lps_stats[lp_index], &identity);
>> + if (err)
>> + pr_err("%s: failed to unmap logical processor %u stats, err: %d\n",
>> + __func__, lp_index, err);
>
> Perhaps set mshv_lps_stats[lp_index] to NULL? I don't think it's actually
> required, but similar code later in this file sets some pointers to NULL
> just as good hygiene.
>
Good idea, I'll do that.
>> +}
>> +
<snip>
>> +
>> +static int __init mshv_debugfs_lp_create(struct dentry *parent)
>> +{
>> + struct dentry *lp_dir;
>> + int err, lp_index;
>> +
>> + mshv_lps_stats = kcalloc(mshv_lps_count,
>> + sizeof(*mshv_lps_stats),
>> + GFP_KERNEL_ACCOUNT);
>> +
>> + if (!mshv_lps_stats)
>> + return -ENOMEM;
>> +
>> + lp_dir = debugfs_create_dir("lp", parent);
>> + if (IS_ERR(lp_dir)) {
>> + err = PTR_ERR(lp_dir);
>> + goto free_lp_stats;
>> + }
>> +
>> + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) {
>> + err = lp_debugfs_create(lp_index, lp_dir);
>> + if (err)
>> + goto remove_debugfs_lps;
>> + }
>> +
>> + mshv_debugfs_lp = lp_dir;
>> +
>> + return 0;
>> +
>> +remove_debugfs_lps:
>> + for (lp_index -= 1; lp_index >= 0; lp_index--)
>> + mshv_lp_stats_unmap(lp_index);
>> + debugfs_remove_recursive(lp_dir);
>> +free_lp_stats:
>> + kfree(mshv_lps_stats);
>
> Set mshv_lps_stats to NULL?
>
Agreed, thanks.
>> +
>> + return err;
>> +}
>> +
<snip>
>> +
>> +static void mshv_debugfs_parent_partition_remove(void)
>> +{
>> + int idx;
>> +
>> + for_each_online_cpu(idx)
>> + parent_vp_debugfs_remove(idx,
>
> The first parameter here ("idx") should be translated through the
> hv_vp_index[] array like is done in mshv_debugfs_parent_partition_create().
>
Ok, thanks
>> + parent_vp_stats[idx]);
>> +
>> + partition_debugfs_remove(hv_current_partition_id,
>> + parent_partition_stats);
>> + kfree(parent_vp_stats);
>> + parent_vp_stats = NULL;
>> + parent_partition_stats = NULL;
>> +
>
> Extra blank line.
>
Ack
>> +}
>> +
>> +static int __init parent_vp_debugfs_create(u32 vp_index,
>> + struct dentry **vp_stats_ptr,
>> + struct dentry *parent)
>> +{
>> + struct hv_stats_page **pstats;
>> + int err;
>> +
>> + pstats = kcalloc(2, sizeof(struct hv_stats_page *), GFP_KERNEL_ACCOUNT);
>
> Another case of using "2" that should be changed.
>
Ack
>> + if (!pstats)
>> + return -ENOMEM;
>> +
>> + err = mshv_vp_stats_map(hv_current_partition_id, vp_index, pstats);
>> + if (err)
>> + goto cleanup;
>> +
>> + err = vp_debugfs_create(hv_current_partition_id, vp_index, pstats,
>> + vp_stats_ptr, parent);
>> + if (err)
>> + goto unmap_vp_stats;
>> +
>> + return 0;
>> +
>> +unmap_vp_stats:
>> + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
>> +cleanup:
>> + kfree(pstats);
>> + return err;
>> +}
>> +
>> +static int __init mshv_debugfs_parent_partition_create(void)
>> +{
>> + struct dentry *vp_dir;
>> + int err, idx, i;
>> +
>> + mshv_debugfs_partition = debugfs_create_dir("partition",
>> + mshv_debugfs);
>> + if (IS_ERR(mshv_debugfs_partition))
>> + return PTR_ERR(mshv_debugfs_partition);
>> +
>> + err = partition_debugfs_create(hv_current_partition_id,
>> + &vp_dir,
>> + &parent_partition_stats,
>> + mshv_debugfs_partition);
>> + if (err)
>> + goto remove_debugfs_partition;
>> +
>> + parent_vp_stats = kcalloc(num_possible_cpus(),
>
> num_possible_cpus() should not be used to allocate an array that is
> then indexed by the Linux CPU number. Use nr_cpu_ids instead when
> allocating the array. See commit 16b18fdf6bc7 for the full explanation.
> As explained in that commit message, using num_possible_cpus()
> doesn't break things now, but it might in the future.
>
Thanks, will do
>> + sizeof(*parent_vp_stats),
>> + GFP_KERNEL);
>> + if (!parent_vp_stats) {
>> + err = -ENOMEM;
>> + goto remove_debugfs_partition;
>> + }
>> +
>> + for_each_online_cpu(idx) {
>> + err = parent_vp_debugfs_create(hv_vp_index[idx],
>> + &parent_vp_stats[idx],
>> + vp_dir);
>> + if (err)
>> + goto remove_debugfs_partition_vp;
>> + }
>> +
>> + return 0;
>> +
>> +remove_debugfs_partition_vp:
>> + for_each_online_cpu(i) {
>> + if (i >= idx)
>> + break;
>> + parent_vp_debugfs_remove(i, parent_vp_stats[i]);
>> + }
>> + partition_debugfs_remove(hv_current_partition_id,
>> + parent_partition_stats);
>> +
>> + kfree(parent_vp_stats);
>> + parent_vp_stats = NULL;
>> + parent_partition_stats = NULL;
>> +
>> +remove_debugfs_partition:
>> + debugfs_remove_recursive(mshv_debugfs_partition);
>> + mshv_debugfs_partition = NULL;
>> + return err;
>> +}
>> +
>> +static int hv_stats_show(struct seq_file *m, void *v)
>> +{
>> + const struct hv_stats_page *stats = m->private;
>> + struct hv_counter_entry *entry = hv_hypervisor_counters;
>> + int i;
>> +
>> + for (i = 0; i < ARRAY_SIZE(hv_hypervisor_counters); i++, entry++)
>> + seq_printf(m, "%-25s: %llu\n", entry->name,
>> + stats->data[entry->idx]);
>> +
>> + return 0;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(hv_stats);
>> +
>> +static void mshv_hv_stats_unmap(void)
>> +{
>> + union hv_stats_object_identity identity = {
>> + .hv.stats_area_type = HV_STATS_AREA_SELF,
>> + };
>> + int err;
>> +
>> + err = hv_unmap_stats_page(HV_STATS_OBJECT_HYPERVISOR, NULL, &identity);
>> + if (err)
>> + pr_err("%s: failed to unmap hypervisor stats: %d\n",
>> + __func__, err);
>> +}
>> +
>> +static void * __init mshv_hv_stats_map(void)
>> +{
>> + union hv_stats_object_identity identity = {
>> + .hv.stats_area_type = HV_STATS_AREA_SELF,
>> + };
>> + struct hv_stats_page *stats;
>> + int err;
>> +
>> + err = hv_map_stats_page(HV_STATS_OBJECT_HYPERVISOR, &identity, &stats);
>> + if (err) {
>> + pr_err("%s: failed to map hypervisor stats: %d\n",
>> + __func__, err);
>> + return ERR_PTR(err);
>> + }
>> + return stats;
>> +}
>> +
>> +static int __init mshv_debugfs_hv_stats_create(struct dentry *parent)
>> +{
>> + struct dentry *dentry;
>> + u64 *stats;
>> + int err;
>> +
>> + stats = mshv_hv_stats_map();
>> + if (IS_ERR(stats))
>> + return PTR_ERR(stats);
>> +
>> + dentry = debugfs_create_file("stats", 0400, parent,
>> + stats, &hv_stats_fops);
>> + if (IS_ERR(dentry)) {
>> + err = PTR_ERR(dentry);
>> + pr_err("%s: failed to create hypervisor stats dentry: %d\n",
>> + __func__, err);
>> + goto unmap_hv_stats;
>> + }
>> +
>> + mshv_lps_count = num_present_cpus();
>
> This method of setting mshv_lps_count, and the iteration through the lp_index
> in mshv_debugfs_lp_create() and mshv_debugfs_lp_remove(), seems risky. The
> lp_index gets passed to the hypervisor, so it must be the hypervisor's concept
> of the lp_index. Is that always guaranteed to be the same as Linux's numbering
> of the present CPUs? There may be edge cases where it is not. For example, what
> if Linux in the root partition were booted with the "nosmt" kernel boot option,
> such that Linux ignores all the 2nd hyper-threads in a core? Could that create
> a numbering mismatch?
>
Ah, this was using the hypervisor stats page before; HvLogicalProcessors. But
I removed the enum, so I thought this would be a reasonable way to get the number
of LPs, but I think I'm mistaken.
For context, there is a fix to how LP and VP numbers are assigned in
hv_smp_prepare_cpus(), but it's part of a future patchset. That fix ensures the
LP indices are dense. The code looks like:
/* create dense LPs from 0-N for all apicids */
i = next_smallest_apicid(apicids, 0);
for (lpidx = 1; i != INT_MAX; lpidx++) {
node = __apicid_to_node[i];
if (node == NUMA_NO_NODE)
node = 0;
/* params: node num, lp index, apic id */
ret = hv_call_add_logical_proc(node, lpidx, i);
BUG_ON(ret);
i = next_smallest_apicid(apicids, i);
}
/* create a VP for each present CPU */
lpidx = 1; /* skip BSP cpu 0 */
for_each_present_cpu(i) {
if (i == 0)
continue;
/* params: node num, domid, vp index, lp index */
ret = hv_call_create_vp(numa_cpu_node(i),
hv_current_partition_id, lpidx, lpidx);
BUG_ON(ret);
lpidx++;
}
For what it's worth, with that fix^ I tested with "nosmt" and things worked as I
would expect: All LPs were displayed in debugfs, but every second LP was not in
use by Linux, as evidenced by e.g. the number of timer interrupts not going up:
LpTimerInterrupts : 1
Also, only every second VP was created (0, 2, 4, 6...) since the others aren't
in the present mask at boot.
> Note that for vp_index, we have the hv_vp_index[] array for translating from
> Linux's concept of a CPU number to Hyper-V's concept of vp_index. For
> example, mshv_debugfs_parent_partition_create() correctly goes through
> this translation. And presumably when the VMM code does the
> MSHV_CREATE_VP ioctl, it is passing in a hypervisor vp_index.
>
> Everything may work fine "as is" for the moment, but the lp functions here
> are still conflating the hypervisor's LP numbering with Linux's CPU numbering,
> and that seems like a recipe for trouble somewhere down the road. I'm
> not sure how the hypervisor interprets the "lp_index" part of the identity
> argument passed to a hypercall, so I'm not sure what the fix is.
>
The simplest thing for now might be to bring back that enum value
HvLogicalProcessors just for this one usage. I'll admit I'm not familiar with
all the nuances here so there are still probably edge cases here.
>> +
>> + return 0;
>> +
>> +unmap_hv_stats:
>> + mshv_hv_stats_unmap();
>> + return err;
>> +}
>> +
<snip>
^ permalink raw reply
* RE: [PATCH] PCI: hv: Allocate MMIO from above 4GB for the config window
From: Dexuan Cui @ 2026-01-23 20:21 UTC (permalink / raw)
To: Michael Kelley, Matthew Ruffell
Cc: bhelgaas@google.com, Haiyang Zhang, Jake Oshins,
kwilczynski@kernel.org, KY Srinivasan,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, Long Li, lpieralisi@kernel.org,
mani@kernel.org, robh@kernel.org, stable@vger.kernel.org,
wei.liu@kernel.org
In-Reply-To: <SN6PR02MB415759DBA9428256D379841AD494A@SN6PR02MB4157.namprd02.prod.outlook.com>
Thank you for all the good input! I'll do more research and report back.
^ permalink raw reply
* RE: [PATCH v4 6/7] mshv: Add data for printing stats page counters
From: Michael Kelley @ 2026-01-23 19:10 UTC (permalink / raw)
To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, skinsburskii@linux.microsoft.com
Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, longli@microsoft.com,
prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
paekkaladevi@linux.microsoft.com
In-Reply-To: <2ea6f13f-ac2e-4ed7-9f2c-6c079cb25b85@linux.microsoft.com>
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, January 23, 2026 11:05 AM
>
> On 1/23/2026 9:09 AM, Michael Kelley wrote:
> > From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
> >>
> >> Introduce hv_counters.c, containing static data corresponding to
> >> HV_*_COUNTER enums in the hypervisor source. Defining the enum
> >> members as an array instead makes more sense, since it will be
> >> iterated over to print counter information to debugfs.
> >
> > I would have expected the filename to be mshv_counters.c, so that the association
> > with the MS hypervisor is clear. And the file is inextricably linked to mshv_debugfs.c,
> > which of course has the "mshv_" prefix. Or is there some thinking I'm not aware of
> > for using the "hv_" prefix?
> >
> Good question - I originally thought of using hv_ because the definitions inside are
> part of the hypervisor ABI, and hence also have the hv_ prefix.
>
> However you have a good point, and I'm not opposed to changing it.
>
> Maybe to just be super explicit: "mshv_debugfs_counters.c" ?
That sounds good to me.
Michael
^ permalink raw reply
* Re: [PATCH v4 6/7] mshv: Add data for printing stats page counters
From: Nuno Das Neves @ 2026-01-23 19:04 UTC (permalink / raw)
To: Michael Kelley, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, skinsburskii@linux.microsoft.com
Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, longli@microsoft.com,
prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
paekkaladevi@linux.microsoft.com
In-Reply-To: <SN6PR02MB41572B2CC3494BE6BC737424D494A@SN6PR02MB4157.namprd02.prod.outlook.com>
On 1/23/2026 9:09 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
>>
>> Introduce hv_counters.c, containing static data corresponding to
>> HV_*_COUNTER enums in the hypervisor source. Defining the enum
>> members as an array instead makes more sense, since it will be
>> iterated over to print counter information to debugfs.
>
> I would have expected the filename to be mshv_counters.c, so that the association
> with the MS hypervisor is clear. And the file is inextricably linked to mshv_debugfs.c,
> which of course has the "mshv_" prefix. Or is there some thinking I'm not aware of
> for using the "hv_" prefix?
>
Good question - I originally thought of using hv_ because the definitions inside are
part of the hypervisor ABI, and hence also have the hv_ prefix.
However you have a good point, and I'm not opposed to changing it.
Maybe to just be super explicit: "mshv_debugfs_counters.c" ?
> Also, I see in Patch 7 of this series that hv_counters.c is #included as a .c file
> in mshv_debugfs.c. Is there a reason for doing the #include instead of adding
> hv_counters.c to the Makefile and building it on its own? You would need to
> add a handful of extern statements to mshv_root.h so that the tables are
> referenceable from mshv_debugfs.c. But that would seem to be the more
> normal way of doing things. #including a .c file is unusual.
>
Yes...I thought I could avoid noise in mshv_root.h and the Makefile, since it's
only relevant for mshv_debugfs.c. However I could see this file (whether as .c or
.h) being misused and included elsewhere inadvertantly, which would duplicate the
tables, so maybe doing it the normal way is a better idea, even if mshv_debugfs.c
is likely the only user.
> See one more comment on the last line of this patch ...
>
>>
>> Include hypervisor, logical processor, partition, and virtual
>> processor counters.
>>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>> drivers/hv/hv_counters.c | 488 +++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 488 insertions(+)
>> create mode 100644 drivers/hv/hv_counters.c
>>
>> diff --git a/drivers/hv/hv_counters.c b/drivers/hv/hv_counters.c
>> new file mode 100644
>> index 000000000000..a8e07e72cc29
>> --- /dev/null
>> +++ b/drivers/hv/hv_counters.c
>> @@ -0,0 +1,488 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2026, Microsoft Corporation.
>> + *
>> + * Data for printing stats page counters via debugfs.
>> + *
>> + * Authors: Microsoft Linux virtualization team
>> + */
>> +
>> +struct hv_counter_entry {
>> + char *name;
>> + int idx;
>> +};
>> +
>> +/* HV_HYPERVISOR_COUNTER */
>> +static struct hv_counter_entry hv_hypervisor_counters[] = {
>> + { "HvLogicalProcessors", 1 },
>> + { "HvPartitions", 2 },
>> + { "HvTotalPages", 3 },
>> + { "HvVirtualProcessors", 4 },
>> + { "HvMonitoredNotifications", 5 },
>> + { "HvModernStandbyEntries", 6 },
>> + { "HvPlatformIdleTransitions", 7 },
>> + { "HvHypervisorStartupCost", 8 },
>> +
>> + { "HvIOSpacePages", 10 },
>> + { "HvNonEssentialPagesForDump", 11 },
>> + { "HvSubsumedPages", 12 },
>> +};
>> +
>> +/* HV_CPU_COUNTER */
>> +static struct hv_counter_entry hv_lp_counters[] = {
>> + { "LpGlobalTime", 1 },
>> + { "LpTotalRunTime", 2 },
>> + { "LpHypervisorRunTime", 3 },
>> + { "LpHardwareInterrupts", 4 },
>> + { "LpContextSwitches", 5 },
>> + { "LpInterProcessorInterrupts", 6 },
>> + { "LpSchedulerInterrupts", 7 },
>> + { "LpTimerInterrupts", 8 },
>> + { "LpInterProcessorInterruptsSent", 9 },
>> + { "LpProcessorHalts", 10 },
>> + { "LpMonitorTransitionCost", 11 },
>> + { "LpContextSwitchTime", 12 },
>> + { "LpC1TransitionsCount", 13 },
>> + { "LpC1RunTime", 14 },
>> + { "LpC2TransitionsCount", 15 },
>> + { "LpC2RunTime", 16 },
>> + { "LpC3TransitionsCount", 17 },
>> + { "LpC3RunTime", 18 },
>> + { "LpRootVpIndex", 19 },
>> + { "LpIdleSequenceNumber", 20 },
>> + { "LpGlobalTscCount", 21 },
>> + { "LpActiveTscCount", 22 },
>> + { "LpIdleAccumulation", 23 },
>> + { "LpReferenceCycleCount0", 24 },
>> + { "LpActualCycleCount0", 25 },
>> + { "LpReferenceCycleCount1", 26 },
>> + { "LpActualCycleCount1", 27 },
>> + { "LpProximityDomainId", 28 },
>> + { "LpPostedInterruptNotifications", 29 },
>> + { "LpBranchPredictorFlushes", 30 },
>> +#if IS_ENABLED(CONFIG_X86_64)
>> + { "LpL1DataCacheFlushes", 31 },
>> + { "LpImmediateL1DataCacheFlushes", 32 },
>> + { "LpMbFlushes", 33 },
>> + { "LpCounterRefreshSequenceNumber", 34 },
>> + { "LpCounterRefreshReferenceTime", 35 },
>> + { "LpIdleAccumulationSnapshot", 36 },
>> + { "LpActiveTscCountSnapshot", 37 },
>> + { "LpHwpRequestContextSwitches", 38 },
>> + { "LpPlaceholder1", 39 },
>> + { "LpPlaceholder2", 40 },
>> + { "LpPlaceholder3", 41 },
>> + { "LpPlaceholder4", 42 },
>> + { "LpPlaceholder5", 43 },
>> + { "LpPlaceholder6", 44 },
>> + { "LpPlaceholder7", 45 },
>> + { "LpPlaceholder8", 46 },
>> + { "LpPlaceholder9", 47 },
>> + { "LpSchLocalRunListSize", 48 },
>> + { "LpReserveGroupId", 49 },
>> + { "LpRunningPriority", 50 },
>> + { "LpPerfmonInterruptCount", 51 },
>> +#elif IS_ENABLED(CONFIG_ARM64)
>> + { "LpCounterRefreshSequenceNumber", 31 },
>> + { "LpCounterRefreshReferenceTime", 32 },
>> + { "LpIdleAccumulationSnapshot", 33 },
>> + { "LpActiveTscCountSnapshot", 34 },
>> + { "LpHwpRequestContextSwitches", 35 },
>> + { "LpPlaceholder2", 36 },
>> + { "LpPlaceholder3", 37 },
>> + { "LpPlaceholder4", 38 },
>> + { "LpPlaceholder5", 39 },
>> + { "LpPlaceholder6", 40 },
>> + { "LpPlaceholder7", 41 },
>> + { "LpPlaceholder8", 42 },
>> + { "LpPlaceholder9", 43 },
>> + { "LpSchLocalRunListSize", 44 },
>> + { "LpReserveGroupId", 45 },
>> + { "LpRunningPriority", 46 },
>> +#endif
>> +};
>> +
>> +/* HV_PROCESS_COUNTER */
>> +static struct hv_counter_entry hv_partition_counters[] = {
>> + { "PtVirtualProcessors", 1 },
>> +
>> + { "PtTlbSize", 3 },
>> + { "PtAddressSpaces", 4 },
>> + { "PtDepositedPages", 5 },
>> + { "PtGpaPages", 6 },
>> + { "PtGpaSpaceModifications", 7 },
>> + { "PtVirtualTlbFlushEntires", 8 },
>> + { "PtRecommendedTlbSize", 9 },
>> + { "PtGpaPages4K", 10 },
>> + { "PtGpaPages2M", 11 },
>> + { "PtGpaPages1G", 12 },
>> + { "PtGpaPages512G", 13 },
>> + { "PtDevicePages4K", 14 },
>> + { "PtDevicePages2M", 15 },
>> + { "PtDevicePages1G", 16 },
>> + { "PtDevicePages512G", 17 },
>> + { "PtAttachedDevices", 18 },
>> + { "PtDeviceInterruptMappings", 19 },
>> + { "PtIoTlbFlushes", 20 },
>> + { "PtIoTlbFlushCost", 21 },
>> + { "PtDeviceInterruptErrors", 22 },
>> + { "PtDeviceDmaErrors", 23 },
>> + { "PtDeviceInterruptThrottleEvents", 24 },
>> + { "PtSkippedTimerTicks", 25 },
>> + { "PtPartitionId", 26 },
>> +#if IS_ENABLED(CONFIG_X86_64)
>> + { "PtNestedTlbSize", 27 },
>> + { "PtRecommendedNestedTlbSize", 28 },
>> + { "PtNestedTlbFreeListSize", 29 },
>> + { "PtNestedTlbTrimmedPages", 30 },
>> + { "PtPagesShattered", 31 },
>> + { "PtPagesRecombined", 32 },
>> + { "PtHwpRequestValue", 33 },
>> + { "PtAutoSuspendEnableTime", 34 },
>> + { "PtAutoSuspendTriggerTime", 35 },
>> + { "PtAutoSuspendDisableTime", 36 },
>> + { "PtPlaceholder1", 37 },
>> + { "PtPlaceholder2", 38 },
>> + { "PtPlaceholder3", 39 },
>> + { "PtPlaceholder4", 40 },
>> + { "PtPlaceholder5", 41 },
>> + { "PtPlaceholder6", 42 },
>> + { "PtPlaceholder7", 43 },
>> + { "PtPlaceholder8", 44 },
>> + { "PtHypervisorStateTransferGeneration", 45 },
>> + { "PtNumberofActiveChildPartitions", 46 },
>> +#elif IS_ENABLED(CONFIG_ARM64)
>> + { "PtHwpRequestValue", 27 },
>> + { "PtAutoSuspendEnableTime", 28 },
>> + { "PtAutoSuspendTriggerTime", 29 },
>> + { "PtAutoSuspendDisableTime", 30 },
>> + { "PtPlaceholder1", 31 },
>> + { "PtPlaceholder2", 32 },
>> + { "PtPlaceholder3", 33 },
>> + { "PtPlaceholder4", 34 },
>> + { "PtPlaceholder5", 35 },
>> + { "PtPlaceholder6", 36 },
>> + { "PtPlaceholder7", 37 },
>> + { "PtPlaceholder8", 38 },
>> + { "PtHypervisorStateTransferGeneration", 39 },
>> + { "PtNumberofActiveChildPartitions", 40 },
>> +#endif
>> +};
>> +
>> +/* HV_THREAD_COUNTER */
>> +static struct hv_counter_entry hv_vp_counters[] = {
>> + { "VpTotalRunTime", 1 },
>> + { "VpHypervisorRunTime", 2 },
>> + { "VpRemoteNodeRunTime", 3 },
>> + { "VpNormalizedRunTime", 4 },
>> + { "VpIdealCpu", 5 },
>> +
>> + { "VpHypercallsCount", 7 },
>> + { "VpHypercallsTime", 8 },
>> +#if IS_ENABLED(CONFIG_X86_64)
>> + { "VpPageInvalidationsCount", 9 },
>> + { "VpPageInvalidationsTime", 10 },
>> + { "VpControlRegisterAccessesCount", 11 },
>> + { "VpControlRegisterAccessesTime", 12 },
>> + { "VpIoInstructionsCount", 13 },
>> + { "VpIoInstructionsTime", 14 },
>> + { "VpHltInstructionsCount", 15 },
>> + { "VpHltInstructionsTime", 16 },
>> + { "VpMwaitInstructionsCount", 17 },
>> + { "VpMwaitInstructionsTime", 18 },
>> + { "VpCpuidInstructionsCount", 19 },
>> + { "VpCpuidInstructionsTime", 20 },
>> + { "VpMsrAccessesCount", 21 },
>> + { "VpMsrAccessesTime", 22 },
>> + { "VpOtherInterceptsCount", 23 },
>> + { "VpOtherInterceptsTime", 24 },
>> + { "VpExternalInterruptsCount", 25 },
>> + { "VpExternalInterruptsTime", 26 },
>> + { "VpPendingInterruptsCount", 27 },
>> + { "VpPendingInterruptsTime", 28 },
>> + { "VpEmulatedInstructionsCount", 29 },
>> + { "VpEmulatedInstructionsTime", 30 },
>> + { "VpDebugRegisterAccessesCount", 31 },
>> + { "VpDebugRegisterAccessesTime", 32 },
>> + { "VpPageFaultInterceptsCount", 33 },
>> + { "VpPageFaultInterceptsTime", 34 },
>> + { "VpGuestPageTableMaps", 35 },
>> + { "VpLargePageTlbFills", 36 },
>> + { "VpSmallPageTlbFills", 37 },
>> + { "VpReflectedGuestPageFaults", 38 },
>> + { "VpApicMmioAccesses", 39 },
>> + { "VpIoInterceptMessages", 40 },
>> + { "VpMemoryInterceptMessages", 41 },
>> + { "VpApicEoiAccesses", 42 },
>> + { "VpOtherMessages", 43 },
>> + { "VpPageTableAllocations", 44 },
>> + { "VpLogicalProcessorMigrations", 45 },
>> + { "VpAddressSpaceEvictions", 46 },
>> + { "VpAddressSpaceSwitches", 47 },
>> + { "VpAddressDomainFlushes", 48 },
>> + { "VpAddressSpaceFlushes", 49 },
>> + { "VpGlobalGvaRangeFlushes", 50 },
>> + { "VpLocalGvaRangeFlushes", 51 },
>> + { "VpPageTableEvictions", 52 },
>> + { "VpPageTableReclamations", 53 },
>> + { "VpPageTableResets", 54 },
>> + { "VpPageTableValidations", 55 },
>> + { "VpApicTprAccesses", 56 },
>> + { "VpPageTableWriteIntercepts", 57 },
>> + { "VpSyntheticInterrupts", 58 },
>> + { "VpVirtualInterrupts", 59 },
>> + { "VpApicIpisSent", 60 },
>> + { "VpApicSelfIpisSent", 61 },
>> + { "VpGpaSpaceHypercalls", 62 },
>> + { "VpLogicalProcessorHypercalls", 63 },
>> + { "VpLongSpinWaitHypercalls", 64 },
>> + { "VpOtherHypercalls", 65 },
>> + { "VpSyntheticInterruptHypercalls", 66 },
>> + { "VpVirtualInterruptHypercalls", 67 },
>> + { "VpVirtualMmuHypercalls", 68 },
>> + { "VpVirtualProcessorHypercalls", 69 },
>> + { "VpHardwareInterrupts", 70 },
>> + { "VpNestedPageFaultInterceptsCount", 71 },
>> + { "VpNestedPageFaultInterceptsTime", 72 },
>> + { "VpPageScans", 73 },
>> + { "VpLogicalProcessorDispatches", 74 },
>> + { "VpWaitingForCpuTime", 75 },
>> + { "VpExtendedHypercalls", 76 },
>> + { "VpExtendedHypercallInterceptMessages", 77 },
>> + { "VpMbecNestedPageTableSwitches", 78 },
>> + { "VpOtherReflectedGuestExceptions", 79 },
>> + { "VpGlobalIoTlbFlushes", 80 },
>> + { "VpGlobalIoTlbFlushCost", 81 },
>> + { "VpLocalIoTlbFlushes", 82 },
>> + { "VpLocalIoTlbFlushCost", 83 },
>> + { "VpHypercallsForwardedCount", 84 },
>> + { "VpHypercallsForwardingTime", 85 },
>> + { "VpPageInvalidationsForwardedCount", 86 },
>> + { "VpPageInvalidationsForwardingTime", 87 },
>> + { "VpControlRegisterAccessesForwardedCount", 88 },
>> + { "VpControlRegisterAccessesForwardingTime", 89 },
>> + { "VpIoInstructionsForwardedCount", 90 },
>> + { "VpIoInstructionsForwardingTime", 91 },
>> + { "VpHltInstructionsForwardedCount", 92 },
>> + { "VpHltInstructionsForwardingTime", 93 },
>> + { "VpMwaitInstructionsForwardedCount", 94 },
>> + { "VpMwaitInstructionsForwardingTime", 95 },
>> + { "VpCpuidInstructionsForwardedCount", 96 },
>> + { "VpCpuidInstructionsForwardingTime", 97 },
>> + { "VpMsrAccessesForwardedCount", 98 },
>> + { "VpMsrAccessesForwardingTime", 99 },
>> + { "VpOtherInterceptsForwardedCount", 100 },
>> + { "VpOtherInterceptsForwardingTime", 101 },
>> + { "VpExternalInterruptsForwardedCount", 102 },
>> + { "VpExternalInterruptsForwardingTime", 103 },
>> + { "VpPendingInterruptsForwardedCount", 104 },
>> + { "VpPendingInterruptsForwardingTime", 105 },
>> + { "VpEmulatedInstructionsForwardedCount", 106 },
>> + { "VpEmulatedInstructionsForwardingTime", 107 },
>> + { "VpDebugRegisterAccessesForwardedCount", 108 },
>> + { "VpDebugRegisterAccessesForwardingTime", 109 },
>> + { "VpPageFaultInterceptsForwardedCount", 110 },
>> + { "VpPageFaultInterceptsForwardingTime", 111 },
>> + { "VpVmclearEmulationCount", 112 },
>> + { "VpVmclearEmulationTime", 113 },
>> + { "VpVmptrldEmulationCount", 114 },
>> + { "VpVmptrldEmulationTime", 115 },
>> + { "VpVmptrstEmulationCount", 116 },
>> + { "VpVmptrstEmulationTime", 117 },
>> + { "VpVmreadEmulationCount", 118 },
>> + { "VpVmreadEmulationTime", 119 },
>> + { "VpVmwriteEmulationCount", 120 },
>> + { "VpVmwriteEmulationTime", 121 },
>> + { "VpVmxoffEmulationCount", 122 },
>> + { "VpVmxoffEmulationTime", 123 },
>> + { "VpVmxonEmulationCount", 124 },
>> + { "VpVmxonEmulationTime", 125 },
>> + { "VpNestedVMEntriesCount", 126 },
>> + { "VpNestedVMEntriesTime", 127 },
>> + { "VpNestedSLATSoftPageFaultsCount", 128 },
>> + { "VpNestedSLATSoftPageFaultsTime", 129 },
>> + { "VpNestedSLATHardPageFaultsCount", 130 },
>> + { "VpNestedSLATHardPageFaultsTime", 131 },
>> + { "VpInvEptAllContextEmulationCount", 132 },
>> + { "VpInvEptAllContextEmulationTime", 133 },
>> + { "VpInvEptSingleContextEmulationCount", 134 },
>> + { "VpInvEptSingleContextEmulationTime", 135 },
>> + { "VpInvVpidAllContextEmulationCount", 136 },
>> + { "VpInvVpidAllContextEmulationTime", 137 },
>> + { "VpInvVpidSingleContextEmulationCount", 138 },
>> + { "VpInvVpidSingleContextEmulationTime", 139 },
>> + { "VpInvVpidSingleAddressEmulationCount", 140 },
>> + { "VpInvVpidSingleAddressEmulationTime", 141 },
>> + { "VpNestedTlbPageTableReclamations", 142 },
>> + { "VpNestedTlbPageTableEvictions", 143 },
>> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 144 },
>> + { "VpFlushGuestPhysicalAddressListHypercalls", 145 },
>> + { "VpPostedInterruptNotifications", 146 },
>> + { "VpPostedInterruptScans", 147 },
>> + { "VpTotalCoreRunTime", 148 },
>> + { "VpMaximumRunTime", 149 },
>> + { "VpHwpRequestContextSwitches", 150 },
>> + { "VpWaitingForCpuTimeBucket0", 151 },
>> + { "VpWaitingForCpuTimeBucket1", 152 },
>> + { "VpWaitingForCpuTimeBucket2", 153 },
>> + { "VpWaitingForCpuTimeBucket3", 154 },
>> + { "VpWaitingForCpuTimeBucket4", 155 },
>> + { "VpWaitingForCpuTimeBucket5", 156 },
>> + { "VpWaitingForCpuTimeBucket6", 157 },
>> + { "VpVmloadEmulationCount", 158 },
>> + { "VpVmloadEmulationTime", 159 },
>> + { "VpVmsaveEmulationCount", 160 },
>> + { "VpVmsaveEmulationTime", 161 },
>> + { "VpGifInstructionEmulationCount", 162 },
>> + { "VpGifInstructionEmulationTime", 163 },
>> + { "VpEmulatedErrataSvmInstructions", 164 },
>> + { "VpPlaceholder1", 165 },
>> + { "VpPlaceholder2", 166 },
>> + { "VpPlaceholder3", 167 },
>> + { "VpPlaceholder4", 168 },
>> + { "VpPlaceholder5", 169 },
>> + { "VpPlaceholder6", 170 },
>> + { "VpPlaceholder7", 171 },
>> + { "VpPlaceholder8", 172 },
>> + { "VpContentionTime", 173 },
>> + { "VpWakeUpTime", 174 },
>> + { "VpSchedulingPriority", 175 },
>> + { "VpRdpmcInstructionsCount", 176 },
>> + { "VpRdpmcInstructionsTime", 177 },
>> + { "VpPerfmonPmuMsrAccessesCount", 178 },
>> + { "VpPerfmonLbrMsrAccessesCount", 179 },
>> + { "VpPerfmonIptMsrAccessesCount", 180 },
>> + { "VpPerfmonInterruptCount", 181 },
>> + { "VpVtl1DispatchCount", 182 },
>> + { "VpVtl2DispatchCount", 183 },
>> + { "VpVtl2DispatchBucket0", 184 },
>> + { "VpVtl2DispatchBucket1", 185 },
>> + { "VpVtl2DispatchBucket2", 186 },
>> + { "VpVtl2DispatchBucket3", 187 },
>> + { "VpVtl2DispatchBucket4", 188 },
>> + { "VpVtl2DispatchBucket5", 189 },
>> + { "VpVtl2DispatchBucket6", 190 },
>> + { "VpVtl1RunTime", 191 },
>> + { "VpVtl2RunTime", 192 },
>> + { "VpIommuHypercalls", 193 },
>> + { "VpCpuGroupHypercalls", 194 },
>> + { "VpVsmHypercalls", 195 },
>> + { "VpEventLogHypercalls", 196 },
>> + { "VpDeviceDomainHypercalls", 197 },
>> + { "VpDepositHypercalls", 198 },
>> + { "VpSvmHypercalls", 199 },
>> + { "VpBusLockAcquisitionCount", 200 },
>> + { "VpLoadAvg", 201 },
>> + { "VpRootDispatchThreadBlocked", 202 },
>> + { "VpIdleCpuTime", 203 },
>> + { "VpWaitingForCpuTimeBucket7", 204 },
>> + { "VpWaitingForCpuTimeBucket8", 205 },
>> + { "VpWaitingForCpuTimeBucket9", 206 },
>> + { "VpWaitingForCpuTimeBucket10", 207 },
>> + { "VpWaitingForCpuTimeBucket11", 208 },
>> + { "VpWaitingForCpuTimeBucket12", 209 },
>> + { "VpHierarchicalSuspendTime", 210 },
>> + { "VpExpressSchedulingAttempts", 211 },
>> + { "VpExpressSchedulingCount", 212 },
>> + { "VpBusLockAcquisitionTime", 213 },
>> +#elif IS_ENABLED(CONFIG_ARM64)
>> + { "VpSysRegAccessesCount", 9 },
>> + { "VpSysRegAccessesTime", 10 },
>> + { "VpSmcInstructionsCount", 11 },
>> + { "VpSmcInstructionsTime", 12 },
>> + { "VpOtherInterceptsCount", 13 },
>> + { "VpOtherInterceptsTime", 14 },
>> + { "VpExternalInterruptsCount", 15 },
>> + { "VpExternalInterruptsTime", 16 },
>> + { "VpPendingInterruptsCount", 17 },
>> + { "VpPendingInterruptsTime", 18 },
>> + { "VpGuestPageTableMaps", 19 },
>> + { "VpLargePageTlbFills", 20 },
>> + { "VpSmallPageTlbFills", 21 },
>> + { "VpReflectedGuestPageFaults", 22 },
>> + { "VpMemoryInterceptMessages", 23 },
>> + { "VpOtherMessages", 24 },
>> + { "VpLogicalProcessorMigrations", 25 },
>> + { "VpAddressDomainFlushes", 26 },
>> + { "VpAddressSpaceFlushes", 27 },
>> + { "VpSyntheticInterrupts", 28 },
>> + { "VpVirtualInterrupts", 29 },
>> + { "VpApicSelfIpisSent", 30 },
>> + { "VpGpaSpaceHypercalls", 31 },
>> + { "VpLogicalProcessorHypercalls", 32 },
>> + { "VpLongSpinWaitHypercalls", 33 },
>> + { "VpOtherHypercalls", 34 },
>> + { "VpSyntheticInterruptHypercalls", 35 },
>> + { "VpVirtualInterruptHypercalls", 36 },
>> + { "VpVirtualMmuHypercalls", 37 },
>> + { "VpVirtualProcessorHypercalls", 38 },
>> + { "VpHardwareInterrupts", 39 },
>> + { "VpNestedPageFaultInterceptsCount", 40 },
>> + { "VpNestedPageFaultInterceptsTime", 41 },
>> + { "VpLogicalProcessorDispatches", 42 },
>> + { "VpWaitingForCpuTime", 43 },
>> + { "VpExtendedHypercalls", 44 },
>> + { "VpExtendedHypercallInterceptMessages", 45 },
>> + { "VpMbecNestedPageTableSwitches", 46 },
>> + { "VpOtherReflectedGuestExceptions", 47 },
>> + { "VpGlobalIoTlbFlushes", 48 },
>> + { "VpGlobalIoTlbFlushCost", 49 },
>> + { "VpLocalIoTlbFlushes", 50 },
>> + { "VpLocalIoTlbFlushCost", 51 },
>> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 52 },
>> + { "VpFlushGuestPhysicalAddressListHypercalls", 53 },
>> + { "VpPostedInterruptNotifications", 54 },
>> + { "VpPostedInterruptScans", 55 },
>> + { "VpTotalCoreRunTime", 56 },
>> + { "VpMaximumRunTime", 57 },
>> + { "VpWaitingForCpuTimeBucket0", 58 },
>> + { "VpWaitingForCpuTimeBucket1", 59 },
>> + { "VpWaitingForCpuTimeBucket2", 60 },
>> + { "VpWaitingForCpuTimeBucket3", 61 },
>> + { "VpWaitingForCpuTimeBucket4", 62 },
>> + { "VpWaitingForCpuTimeBucket5", 63 },
>> + { "VpWaitingForCpuTimeBucket6", 64 },
>> + { "VpHwpRequestContextSwitches", 65 },
>> + { "VpPlaceholder2", 66 },
>> + { "VpPlaceholder3", 67 },
>> + { "VpPlaceholder4", 68 },
>> + { "VpPlaceholder5", 69 },
>> + { "VpPlaceholder6", 70 },
>> + { "VpPlaceholder7", 71 },
>> + { "VpPlaceholder8", 72 },
>> + { "VpContentionTime", 73 },
>> + { "VpWakeUpTime", 74 },
>> + { "VpSchedulingPriority", 75 },
>> + { "VpVtl1DispatchCount", 76 },
>> + { "VpVtl2DispatchCount", 77 },
>> + { "VpVtl2DispatchBucket0", 78 },
>> + { "VpVtl2DispatchBucket1", 79 },
>> + { "VpVtl2DispatchBucket2", 80 },
>> + { "VpVtl2DispatchBucket3", 81 },
>> + { "VpVtl2DispatchBucket4", 82 },
>> + { "VpVtl2DispatchBucket5", 83 },
>> + { "VpVtl2DispatchBucket6", 84 },
>> + { "VpVtl1RunTime", 85 },
>> + { "VpVtl2RunTime", 86 },
>> + { "VpIommuHypercalls", 87 },
>> + { "VpCpuGroupHypercalls", 88 },
>> + { "VpVsmHypercalls", 89 },
>> + { "VpEventLogHypercalls", 90 },
>> + { "VpDeviceDomainHypercalls", 91 },
>> + { "VpDepositHypercalls", 92 },
>> + { "VpSvmHypercalls", 93 },
>> + { "VpLoadAvg", 94 },
>> + { "VpRootDispatchThreadBlocked", 95 },
>> + { "VpIdleCpuTime", 96 },
>> + { "VpWaitingForCpuTimeBucket7", 97 },
>> + { "VpWaitingForCpuTimeBucket8", 98 },
>> + { "VpWaitingForCpuTimeBucket9", 99 },
>> + { "VpWaitingForCpuTimeBucket10", 100 },
>> + { "VpWaitingForCpuTimeBucket11", 101 },
>> + { "VpWaitingForCpuTimeBucket12", 102 },
>> + { "VpHierarchicalSuspendTime", 103 },
>> + { "VpExpressSchedulingAttempts", 104 },
>> + { "VpExpressSchedulingCount", 105 },
>> +#endif
>> +};
>> +
>
> The patch puts a blank line at the end of the new hv_counters.c file. When using
> "git am" to apply this patch, I get this warning:
>
> .git/rebase-apply/patch:499: new blank line at EOF.
> +
> warning: 1 line adds whitespace errors.
>
> Line 499 is that blank line at the end of the new file. If I modify the patch to remove
> the adding of the blank line, "git am" will apply the patch with no warning. This
> should probably be fixed.
>
Thanks for pointing that out, I'll fix it!
> Michael
^ permalink raw reply
* Re: [PATCH net-next v2 0/9] net: convert drivers to .get_rx_ring_count (last part)
From: patchwork-bot+netdevbpf @ 2026-01-23 19:00 UTC (permalink / raw)
To: Breno Leitao
Cc: ajit.khaparde, sriharsha.basavapatna, somnath.kotur,
andrew+netdev, davem, edumazet, kuba, pabeni, irusskikh, horms,
kys, haiyangz, wei.liu, decui, longli, alexanderduyck,
kernel-team, ecree.xilinx, brett.creeley, netdev, linux-kernel,
oss-drivers, linux-hyperv, linux-net-drivers, sbhatta
In-Reply-To: <20260122-grxring_big_v4-v2-0-94dbe4dcaa10@debian.org>
Hello:
This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:
On Thu, 22 Jan 2026 10:40:12 -0800 you wrote:
> Commit 84eaf4359c36 ("net: ethtool: add get_rx_ring_count callback to
> optimize RX ring queries") added specific support for GRXRINGS callback,
> simplifying .get_rxnfc.
>
> Remove the handling of GRXRINGS in .get_rxnfc() by moving it to the new
> .get_rx_ring_count().
>
> [...]
Here is the summary with links:
- [net-next,v2,1/9] net: benet: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/8f2a880d652e
- [net-next,v2,2/9] net: atlantic: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/489a5b81abbc
- [net-next,v2,3/9] net: nfp: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/46f4ad55605e
- [net-next,v2,4/9] net: mana: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/3eb722571835
- [net-next,v2,5/9] net: fbnic: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/ea28b02da84c
- [net-next,v2,6/9] net: ionic: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/8bd5cee9891a
- [net-next,v2,7/9] net: sfc: efx: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/67f16fba554f
- [net-next,v2,8/9] net: sfc: siena: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/c9e4688b2ee2
- [net-next,v2,9/9] net: sfc: falcon: convert to use .get_rx_ring_count
https://git.kernel.org/netdev/net-next/c/f584347c1a2b
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH v0 14/15] mshv: Remove mapping of mmio space during map user ioctl
From: Nuno Das Neves @ 2026-01-23 18:34 UTC (permalink / raw)
To: Mukesh R, linux-kernel, linux-hyperv, linux-arm-kernel, iommu,
linux-pci, linux-arch
Cc: kys, haiyangz, wei.liu, decui, longli, catalin.marinas, will,
tglx, mingo, bp, dave.hansen, hpa, joro, lpieralisi, kwilczynski,
mani, robh, bhelgaas, arnd, mhklinux, romank
In-Reply-To: <20260120064230.3602565-15-mrathor@linux.microsoft.com>
On 1/19/2026 10:42 PM, Mukesh R wrote:
> From: Mukesh Rathor <mrathor@linux.microsoft.com>
>
> VFIO no longer puts the mmio pfn in vma->vm_pgoff. So, remove code
> that is using it to map mmio space. It is broken and will cause
> panic.
What is the reason for having this as a separate commit from patch 15?
It seems like removing this code and adding the mmio intercept
handling could be done in one patch.
>
> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
> ---
> drivers/hv/mshv_root_main.c | 20 ++++----------------
> 1 file changed, 4 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 27313419828d..03f3aa9f5541 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1258,16 +1258,8 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region)
> }
>
> /*
> - * This maps two things: guest RAM and for pci passthru mmio space.
> - *
> - * mmio:
> - * - vfio overloads vm_pgoff to store the mmio start pfn/spa.
> - * - Two things need to happen for mapping mmio range:
> - * 1. mapped in the uaddr so VMM can access it.
> - * 2. mapped in the hwpt (gfn <-> mmio phys addr) so guest can access it.
> - *
> - * This function takes care of the second. The first one is managed by vfio,
> - * and hence is taken care of via vfio_pci_mmap_fault().
> + * This is called for both user ram and mmio space. The mmio space is not
> + * mapped here, but later during intercept.
> */
> static long
> mshv_map_user_memory(struct mshv_partition *partition,
> @@ -1276,7 +1268,6 @@ mshv_map_user_memory(struct mshv_partition *partition,
> struct mshv_mem_region *region;
> struct vm_area_struct *vma;
> bool is_mmio;
> - ulong mmio_pfn;
> long ret;
>
> if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP) ||
> @@ -1286,7 +1277,6 @@ mshv_map_user_memory(struct mshv_partition *partition,
> mmap_read_lock(current->mm);
> vma = vma_lookup(current->mm, mem.userspace_addr);
> is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0;
> - mmio_pfn = is_mmio ? vma->vm_pgoff : 0;
> mmap_read_unlock(current->mm);
>
> if (!vma)
> @@ -1313,10 +1303,8 @@ mshv_map_user_memory(struct mshv_partition *partition,
> HV_MAP_GPA_NO_ACCESS, NULL);
> break;
> case MSHV_REGION_TYPE_MMIO:
> - ret = hv_call_map_mmio_pages(partition->pt_id,
> - region->start_gfn,
> - mmio_pfn,
> - region->nr_pages);
> + /* mmio mappings are handled later during intercepts */
> + ret = 0;
> break;
> }
>
^ permalink raw reply
* Re: [PATCH v0 06/15] mshv: Implement mshv bridge device for VFIO
From: Nuno Das Neves @ 2026-01-23 18:32 UTC (permalink / raw)
To: Mukesh R, linux-kernel, linux-hyperv, linux-arm-kernel, iommu,
linux-pci, linux-arch
Cc: kys, haiyangz, wei.liu, decui, longli, catalin.marinas, will,
tglx, mingo, bp, dave.hansen, hpa, joro, lpieralisi, kwilczynski,
mani, robh, bhelgaas, arnd, mhklinux, romank
In-Reply-To: <20260120064230.3602565-7-mrathor@linux.microsoft.com>
On 1/19/2026 10:42 PM, Mukesh R wrote:
> From: Mukesh Rathor <mrathor@linux.microsoft.com>
>
> Add a new file to implement VFIO-MSHV bridge pseudo device. These
> functions are called in the VFIO framework, and credits to kvm/vfio.c
> as this file was adapted from it.
>
> Original author: Wei Liu <wei.liu@kernel.org>
> (Slightly modified from the original version).
>
> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
Since the code is very similar to Wei's original commit, the way I'd
recommend to do it is:
1. Change the commit author to Wei, using git commit --amend --author=
and
2. Put his signed-off line before yours:
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
This shows he is the author of the commit but you ported it.
If you feel you changed it enough that it should be considered
co-authored, you can instead keep your authorship of the commit and
put:
Co-developed-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
> ---
> drivers/hv/Makefile | 3 +-
> drivers/hv/mshv_vfio.c | 210 +++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 212 insertions(+), 1 deletion(-)
> create mode 100644 drivers/hv/mshv_vfio.c
>
> diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> index a49f93c2d245..eae003c4cb8f 100644
> --- a/drivers/hv/Makefile
> +++ b/drivers/hv/Makefile
> @@ -14,7 +14,8 @@ hv_vmbus-y := vmbus_drv.o \
> hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
> hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
> mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
> - mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
> + mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o \
> + mshv_vfio.o
> mshv_vtl-y := mshv_vtl_main.o
>
> # Code that must be built-in
> diff --git a/drivers/hv/mshv_vfio.c b/drivers/hv/mshv_vfio.c
> new file mode 100644
> index 000000000000..6ea4d99a3bd2
> --- /dev/null
> +++ b/drivers/hv/mshv_vfio.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * VFIO-MSHV bridge pseudo device
> + *
> + * Heavily inspired by the VFIO-KVM bridge pseudo device.
> + */
> +#include <linux/errno.h>
> +#include <linux/file.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/vfio.h>
> +
> +#include "mshv.h"
> +#include "mshv_root.h"
> +
> +struct mshv_vfio_file {
> + struct list_head node;
> + struct file *file; /* list of struct mshv_vfio_file */
> +};
> +
> +struct mshv_vfio {
> + struct list_head file_list;
> + struct mutex lock;
> +};
> +
> +static bool mshv_vfio_file_is_valid(struct file *file)
> +{
> + bool (*fn)(struct file *file);
> + bool ret;
> +
> + fn = symbol_get(vfio_file_is_valid);
> + if (!fn)
> + return false;
> +
> + ret = fn(file);
> +
> + symbol_put(vfio_file_is_valid);
> +
> + return ret;
> +}
> +
> +static long mshv_vfio_file_add(struct mshv_device *mshvdev, unsigned int fd)
> +{
> + struct mshv_vfio *mshv_vfio = mshvdev->device_private;
> + struct mshv_vfio_file *mvf;
> + struct file *filp;
> + long ret = 0;
> +
> + filp = fget(fd);
> + if (!filp)
> + return -EBADF;
> +
> + /* Ensure the FD is a vfio FD. */
> + if (!mshv_vfio_file_is_valid(filp)) {
> + ret = -EINVAL;
> + goto out_fput;
> + }
> +
> + mutex_lock(&mshv_vfio->lock);
> +
> + list_for_each_entry(mvf, &mshv_vfio->file_list, node) {
> + if (mvf->file == filp) {
> + ret = -EEXIST;
> + goto out_unlock;
> + }
> + }
> +
> + mvf = kzalloc(sizeof(*mvf), GFP_KERNEL_ACCOUNT);
> + if (!mvf) {
> + ret = -ENOMEM;
> + goto out_unlock;
> + }
> +
> + mvf->file = get_file(filp);
> + list_add_tail(&mvf->node, &mshv_vfio->file_list);
> +
> +out_unlock:
> + mutex_unlock(&mshv_vfio->lock);
> +out_fput:
> + fput(filp);
> + return ret;
> +}
> +
> +static long mshv_vfio_file_del(struct mshv_device *mshvdev, unsigned int fd)
> +{
> + struct mshv_vfio *mshv_vfio = mshvdev->device_private;
> + struct mshv_vfio_file *mvf;
> + long ret;
> +
> + CLASS(fd, f)(fd);
> +
> + if (fd_empty(f))
> + return -EBADF;
> +
> + ret = -ENOENT;
> + mutex_lock(&mshv_vfio->lock);
> +
> + list_for_each_entry(mvf, &mshv_vfio->file_list, node) {
> + if (mvf->file != fd_file(f))
> + continue;
> +
> + list_del(&mvf->node);
> + fput(mvf->file);
> + kfree(mvf);
> + ret = 0;
> + break;
> + }
> +
> + mutex_unlock(&mshv_vfio->lock);
> + return ret;
> +}
> +
> +static long mshv_vfio_set_file(struct mshv_device *mshvdev, long attr,
> + void __user *arg)
> +{
> + int32_t __user *argp = arg;
> + int32_t fd;
> +
> + switch (attr) {
> + case MSHV_DEV_VFIO_FILE_ADD:
> + if (get_user(fd, argp))
> + return -EFAULT;
> + return mshv_vfio_file_add(mshvdev, fd);
> +
> + case MSHV_DEV_VFIO_FILE_DEL:
> + if (get_user(fd, argp))
> + return -EFAULT;
> + return mshv_vfio_file_del(mshvdev, fd);
> + }
> +
> + return -ENXIO;
> +}
> +
> +static long mshv_vfio_set_attr(struct mshv_device *mshvdev,
> + struct mshv_device_attr *attr)
> +{
> + switch (attr->group) {
> + case MSHV_DEV_VFIO_FILE:
> + return mshv_vfio_set_file(mshvdev, attr->attr,
> + u64_to_user_ptr(attr->addr));
> + }
> +
> + return -ENXIO;
> +}
> +
> +static long mshv_vfio_has_attr(struct mshv_device *mshvdev,
> + struct mshv_device_attr *attr)
> +{
> + switch (attr->group) {
> + case MSHV_DEV_VFIO_FILE:
> + switch (attr->attr) {
> + case MSHV_DEV_VFIO_FILE_ADD:
> + case MSHV_DEV_VFIO_FILE_DEL:
> + return 0;
> + }
> +
> + break;
> + }
> +
> + return -ENXIO;
> +}
> +
> +static long mshv_vfio_create_device(struct mshv_device *mshvdev, u32 type)
> +{
> + struct mshv_device *tmp;
> + struct mshv_vfio *mshv_vfio;
> +
> + /* Only one VFIO "device" per VM */
> + hlist_for_each_entry(tmp, &mshvdev->device_pt->pt_devices,
> + device_ptnode)
> + if (tmp->device_ops == &mshv_vfio_device_ops)
> + return -EBUSY;
> +
> + mshv_vfio = kzalloc(sizeof(*mshv_vfio), GFP_KERNEL_ACCOUNT);
> + if (mshv_vfio == NULL)
> + return -ENOMEM;
> +
> + INIT_LIST_HEAD(&mshv_vfio->file_list);
> + mutex_init(&mshv_vfio->lock);
> +
> + mshvdev->device_private = mshv_vfio;
> +
> + return 0;
> +}
> +
> +/* This is called from mshv_device_fop_release() */
> +static void mshv_vfio_release_device(struct mshv_device *mshvdev)
> +{
> + struct mshv_vfio *mv = mshvdev->device_private;
> + struct mshv_vfio_file *mvf, *tmp;
> +
> + list_for_each_entry_safe(mvf, tmp, &mv->file_list, node) {
> + fput(mvf->file);
> + list_del(&mvf->node);
> + kfree(mvf);
> + }
> +
> + kfree(mv);
> + kfree(mshvdev);
> +}
> +
> +struct mshv_device_ops mshv_vfio_device_ops = {
> + .device_name = "mshv-vfio",
> + .device_create = mshv_vfio_create_device,
> + .device_release = mshv_vfio_release_device,
> + .device_set_attr = mshv_vfio_set_attr,
> + .device_has_attr = mshv_vfio_has_attr,
> +};
^ permalink raw reply
* RE: [PATCH] PCI: hv: Allocate MMIO from above 4GB for the config window
From: Michael Kelley @ 2026-01-23 18:28 UTC (permalink / raw)
To: Michael Kelley, Matthew Ruffell
Cc: DECUI@microsoft.com, bhelgaas@google.com, haiyangz@microsoft.com,
jakeo@microsoft.com, kwilczynski@kernel.org, kys@microsoft.com,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, longli@microsoft.com,
lpieralisi@kernel.org, mani@kernel.org, robh@kernel.org,
stable@vger.kernel.org, wei.liu@kernel.org
In-Reply-To: <SN6PR02MB41573CD2EA6CD82A0C238F66D494A@SN6PR02MB4157.namprd02.prod.outlook.com>
From: Michael Kelley <mhklinux@outlook.com> Sent: Thursday, January 22, 2026 10:39 PM
>
> From: Matthew Ruffell <matthew.ruffell@canonical.com> Sent: Thursday, January 22, 2026 9:39 PM
> >
> > Hi Michael,
> >
> > > > I wonder if commit a41e0ab394e4 broke the initialization of screen_info in the
> > > > kdump kernel. Or perhaps there is now a rev-lock between the kernel with this
> > > > commit and a new version of the user space kexec command.
> >
> > a41e0ab394e4 isn't a mainline commit. Can you please mention the commit subject
> > so I can have a read.
>
> It's this patch:
>
> https://lore.kernel.org/lkml/20251126160854.553077-5-tzimmermann@suse.de/
>
> which is in linux-next, but not yet in mainline. Since you are dealing with older
> kernels, it's not the culprit.
>
> >
> > > > There's a parameter to the kexec() command that governs whether it uses the
> > > > kexec_file_load() system call or the kexec_load() system call.
> > > > I wonder if that parameter makes a difference in the problem described for this
> > > > patch.
> >
> > Yes, it does indeed make a difference. I have been debugging this the past few
> > days, and my colleague Melissa noticed that the problem reproduces when secure
> > boot is disabled, but it does not reproduce when secure boot is enabled.
> > Additionally, it reproduces on jammy, but not noble. It turns out that
> > kexec-tools on jammy defaults to kexec_load() when secure boot is disabled,
> > and when enabled, it instead uses kexec_file_load(). On noble, it defaults to
> > first trying kexec_file_load() before falling back to kexec_load(), so the
> > issue does not reproduce.
>
> This is good info, and definitely a clue. So to be clear, the problem repros
> only when kexec_load() is used. With kexec_file_load(), it does not repro. Is that
> right? I saw a similar distinction when working on commit 304386373007,
> though in the opposite direction!
>
> >
> > > > > /*
> > > > > * Set up a region of MMIO space to use for accessing configuration
> > > > > - * space.
> > > > > + * space. Use the high MMIO range to not conflict with the hyperv_drm
> > > > > + * driver (which normally gets MMIO from the low MMIO range) in the
> > > > > + * kdump kernel of a Gen2 VM, which fails to reserve the framebuffer
> > > > > + * MMIO range in vmbus_reserve_fb() due to screen_info.lfb_base being
> > > > > + * zero in the kdump kernel.
> > > > > */
> > > > > - ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, 0, -1,
> > > > > + ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, SZ_4G, -1,
> > > > > PCI_CONFIG_MMIO_LENGTH, 0x1000, false);
> > > > > if (ret)
> > > > > return ret;
> > > > > --
> >
> > Thank you for the patch Dexuan.
> >
> > This patch fixes the problem on Ubuntu 5.15, and 6.8 based kernels
> > booting V6 instance types on Azure with Gen 2 images.
>
> Are you seeing the problem on x86/64 or arm64 instances in Azure?
> "V6 instance types" could be either, I think, but I'm guessing you
> are on x86/64.
>
> And just to confirm: are you seeing the problem with the
> Hyper-V DRM driver, or the Hyper-V FB driver? This patch mentions
> the DRM driver, so I assume that's the problematic config.
>
> >
> > Tested-by: Matthew Ruffell <matthew.ruffell@canonical.com>
>
> While this patch may solve the observed problem, I'm interested in
> understanding the root cause of why vmbus_reserve_fb() is seeing
> screen_info.lfb_base set to zero. It may be next week before I can
> take a look, and I may need follow up with you on more details of the
> scenario to reproduce the problem.
One more thought here: Is commit 96959283a58d relevant? The
commit message describes a scenario where vmbus_reserve_fb()
doesn't do anything because CONFIG_SYSFB is not set. Looking at
the code for vmbus_reserve_fb(), it doing nothing might imply that
screen_info.lfb_base is 0. But when CONFIG_SYSFB is not set,
screen_info.lfb_base is just ignored, with the same result. This behavior
started with the 6.7 kernel due to commit a07b50d80ab6.
Note that commit 96959283a58d has a follow-on to correct a
problem when CONFIG_EFI is not set. See commit 7b89a44b2e8c.
If there's a reason to backport 96959283a58d, also get
7b89a44b2e8c.
Michael
^ permalink raw reply
* Re: [PATCH v0 05/15] mshv: Declarations and definitions for VFIO-MSHV bridge device
From: Nuno Das Neves @ 2026-01-23 18:25 UTC (permalink / raw)
To: Mukesh R, linux-kernel, linux-hyperv, linux-arm-kernel, iommu,
linux-pci, linux-arch
Cc: kys, haiyangz, wei.liu, decui, longli, catalin.marinas, will,
tglx, mingo, bp, dave.hansen, hpa, joro, lpieralisi, kwilczynski,
mani, robh, bhelgaas, arnd, mhklinux, romank
In-Reply-To: <20260120064230.3602565-6-mrathor@linux.microsoft.com>
On 1/19/2026 10:42 PM, Mukesh R wrote:
> From: Mukesh Rathor <mrathor@linux.microsoft.com>
>
> Add data structs needed by the subsequent patch that introduces a new
> module to implement VFIO-MSHV pseudo device.
>
> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
> ---
> drivers/hv/mshv_root.h | 23 +++++++++++++++++++++++
> include/uapi/linux/mshv.h | 31 +++++++++++++++++++++++++++++++
> 2 files changed, 54 insertions(+)
>
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index c3753b009fd8..42e1da1d545b 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -220,6 +220,29 @@ struct port_table_info {
> };
> };
>
> +struct mshv_device {
> + const struct mshv_device_ops *device_ops;
> + struct mshv_partition *device_pt;
> + void *device_private;
> + struct hlist_node device_ptnode;
> +};
> +
> +struct mshv_device_ops {
> + const char *device_name;
> + long (*device_create)(struct mshv_device *dev, u32 type);
> + void (*device_release)(struct mshv_device *dev);
> + long (*device_set_attr)(struct mshv_device *dev,
> + struct mshv_device_attr *attr);
> + long (*device_has_attr)(struct mshv_device *dev,
> + struct mshv_device_attr *attr);
> +};
> +
> +extern struct mshv_device_ops mshv_vfio_device_ops;
> +int mshv_vfio_ops_init(void);
> +void mshv_vfio_ops_exit(void);
> +long mshv_partition_ioctl_create_device(struct mshv_partition *partition,
> + void __user *user_args);
> +
> int mshv_update_routing_table(struct mshv_partition *partition,
> const struct mshv_user_irq_entry *entries,
> unsigned int numents);
> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
> index dee3ece28ce5..b7b10f9e2896 100644
> --- a/include/uapi/linux/mshv.h
> +++ b/include/uapi/linux/mshv.h
> @@ -252,6 +252,7 @@ struct mshv_root_hvcall {
> #define MSHV_GET_GPAP_ACCESS_BITMAP _IOWR(MSHV_IOCTL, 0x06, struct mshv_gpap_access_bitmap)
> /* Generic hypercall */
> #define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
> +#define MSHV_CREATE_DEVICE _IOWR(MSHV_IOCTL, 0x08, struct mshv_create_device)
>
With this commit, the IOCTL number is exposed to userspace but it doesn't work.
Ideally the IOCTL number should be added in the commit where it becomes usable.
> /*
> ********************************
> @@ -402,4 +403,34 @@ struct mshv_sint_mask {
> /* hv_hvcall device */
> #define MSHV_HVCALL_SETUP _IOW(MSHV_IOCTL, 0x1E, struct mshv_vtl_hvcall_setup)
> #define MSHV_HVCALL _IOWR(MSHV_IOCTL, 0x1F, struct mshv_vtl_hvcall)
> +
> +/* device passhthru */
> +#define MSHV_CREATE_DEVICE_TEST 1
> +
> +enum {
> + MSHV_DEV_TYPE_VFIO,
> + MSHV_DEV_TYPE_MAX,
> +};
> +
> +struct mshv_create_device {
> + __u32 type; /* in: MSHV_DEV_TYPE_xxx */
> + __u32 fd; /* out: device handle */
> + __u32 flags; /* in: MSHV_CREATE_DEVICE_xxx */
> +};
> +
> +#define MSHV_DEV_VFIO_FILE 1
> +#define MSHV_DEV_VFIO_FILE_ADD 1
> +#define MSHV_DEV_VFIO_FILE_DEL 2
> +
> +struct mshv_device_attr {
> + __u32 flags; /* no flags currently defined */
> + __u32 group; /* device-defined */
> + __u64 attr; /* group-defined */
> + __u64 addr; /* userspace address of attr data */
> +};
> +
> +/* Device fds created with MSHV_CREATE_DEVICE */
> +#define MSHV_SET_DEVICE_ATTR _IOW(MSHV_IOCTL, 0x00, struct mshv_device_attr)
> +#define MSHV_HAS_DEVICE_ATTR _IOW(MSHV_IOCTL, 0x01, struct mshv_device_attr)
> +
> #endif
^ permalink raw reply
* Re: [PATCH v0 04/15] mshv: Provide a way to get partition id if running in a VMM process
From: Nuno Das Neves @ 2026-01-23 18:23 UTC (permalink / raw)
To: Mukesh R, linux-kernel, linux-hyperv, linux-arm-kernel, iommu,
linux-pci, linux-arch
Cc: kys, haiyangz, wei.liu, decui, longli, catalin.marinas, will,
tglx, mingo, bp, dave.hansen, hpa, joro, lpieralisi, kwilczynski,
mani, robh, bhelgaas, arnd, mhklinux, romank
In-Reply-To: <20260120064230.3602565-5-mrathor@linux.microsoft.com>
On 1/19/2026 10:42 PM, Mukesh R wrote:
> From: Mukesh Rathor <mrathor@linux.microsoft.com>
>
> Many PCI passthru related hypercalls require partition id of the target
> guest. Guests are actually managed by MSHV driver and the partition id
> is only maintained there. Add a field in the partition struct in MSHV
> driver to save the tgid of the VMM process creating the partition,
> and add a function there to retrieve partition id if valid VMM tgid.
>
> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
> ---
> drivers/hv/mshv_root.h | 1 +
> drivers/hv/mshv_root_main.c | 35 +++++++++++++++++++++++++++-------
> include/asm-generic/mshyperv.h | 1 +
> 3 files changed, 30 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index 3c1d88b36741..c3753b009fd8 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -134,6 +134,7 @@ struct mshv_partition {
>
> struct mshv_girq_routing_table __rcu *pt_girq_tbl;
> u64 isolation_type;
> + pid_t pt_vmm_tgid;
> bool import_completed;
> bool pt_initialized;
> };
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 1134a82c7881..83c7bad269a0 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1823,6 +1823,20 @@ mshv_partition_release(struct inode *inode, struct file *filp)
> return 0;
> }
>
> +/* Given a process tgid, return partition id if it is a VMM process */
> +u64 mshv_pid_to_partid(pid_t tgid)
> +{
> + struct mshv_partition *pt;
> + int i;
> +
> + hash_for_each_rcu(mshv_root.pt_htable, i, pt, pt_hnode)
> + if (pt->pt_vmm_tgid == tgid)
> + return pt->pt_id;
> +
> + return HV_PARTITION_ID_INVALID;
> +}
> +EXPORT_SYMBOL_GPL(mshv_pid_to_partid);
> +
> static int
> add_partition(struct mshv_partition *partition)
> {
> @@ -1987,13 +2001,20 @@ mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> goto delete_partition;
>
> ret = mshv_init_async_handler(partition);
> - if (!ret) {
> - ret = FD_ADD(O_CLOEXEC, anon_inode_getfile("mshv_partition",
> - &mshv_partition_fops,
> - partition, O_RDWR));
> - if (ret >= 0)
> - return ret;
> - }
> + if (ret)
> + goto rem_partition;
> +
> + ret = FD_ADD(O_CLOEXEC, anon_inode_getfile("mshv_partition",
> + &mshv_partition_fops,
> + partition, O_RDWR));
> + if (ret < 0)
> + goto rem_partition;
> +
> + partition->pt_vmm_tgid = current->tgid;
> +
> + return ret;
> +
> +rem_partition:
> remove_partition(partition);
> delete_partition:
> hv_call_delete_partition(partition->pt_id);
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index ecedab554c80..e46a38916e76 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -211,6 +211,7 @@ void __init ms_hyperv_late_init(void);
> int hv_common_cpu_init(unsigned int cpu);
> int hv_common_cpu_die(unsigned int cpu);
> void hv_identify_partition_type(void);
> +u64 mshv_pid_to_partid(pid_t tgid);
This should go inside the #if IS_ENABLED(CONFIG_MSHV_ROOT) section.
>
> /**
> * hv_cpu_number_to_vp_number() - Map CPU to VP.
^ permalink raw reply
* RE: [PATCH v4 7/7] mshv: Add debugfs to view hypervisor statistics
From: Michael Kelley @ 2026-01-23 17:09 UTC (permalink / raw)
To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, skinsburskii@linux.microsoft.com
Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, longli@microsoft.com,
prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
paekkaladevi@linux.microsoft.com, Jinank Jain
In-Reply-To: <20260121214623.76374-8-nunodasneves@linux.microsoft.com>
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
>
> Introduce a debugfs interface to expose root and child partition stats
> when running with mshv_root.
>
> Create a debugfs directory "mshv" containing 'stats' files organized by
> type and id. A stats file contains a number of counters depending on
> its type. e.g. an excerpt from a VP stats file:
>
> TotalRunTime : 1997602722
> HypervisorRunTime : 649671371
> RemoteNodeRunTime : 0
> NormalizedRunTime : 1997602721
> IdealCpu : 0
> HypercallsCount : 1708169
> HypercallsTime : 111914774
> PageInvalidationsCount : 0
> PageInvalidationsTime : 0
>
> On a root partition with some active child partitions, the entire
> directory structure may look like:
>
> mshv/
> stats # hypervisor stats
> lp/ # logical processors
> 0/ # LP id
> stats # LP 0 stats
> 1/
> 2/
> 3/
> partition/ # partition stats
> 1/ # root partition id
> stats # root partition stats
> vp/ # root virtual processors
> 0/ # root VP id
> stats # root VP 0 stats
> 1/
> 2/
> 3/
> 42/ # child partition id
> stats # child partition stats
> vp/ # child VPs
> 0/ # child VP id
> stats # child VP 0 stats
> 1/
> 43/
> 55/
>
> On L1VH, some stats are not present as it does not own the hardware
> like the root partition does:
> - The hypervisor and lp stats are not present
> - L1VH's partition directory is named "self" because it can't get its
> own id
> - Some of L1VH's partition and VP stats fields are not populated, because
> it can't map its own HV_STATS_AREA_PARENT page.
>
> Co-developed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> Co-developed-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> Co-developed-by: Mukesh Rathor <mrathor@linux.microsoft.com>
> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
> Co-developed-by: Purna Pavan Chandra Aekkaladevi
> <paekkaladevi@linux.microsoft.com>
> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
> drivers/hv/Makefile | 1 +
> drivers/hv/hv_counters.c | 1 +
> drivers/hv/hv_synic.c | 177 +++++++++
This new file hv_synic.c seems to be spurious. It looks like you unintentionally
picked up this new file from the build tree where you were creating the patches
for this series.
> drivers/hv/mshv_debugfs.c | 703 ++++++++++++++++++++++++++++++++++++
> drivers/hv/mshv_root.h | 34 ++
> drivers/hv/mshv_root_main.c | 26 +-
> 6 files changed, 940 insertions(+), 2 deletions(-)
> create mode 100644 drivers/hv/hv_synic.c
> create mode 100644 drivers/hv/mshv_debugfs.c
>
> diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> index a49f93c2d245..2593711c3628 100644
> --- a/drivers/hv/Makefile
> +++ b/drivers/hv/Makefile
> @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
> hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
> mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
> mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
> +mshv_root-$(CONFIG_DEBUG_FS) += mshv_debugfs.o
> mshv_vtl-y := mshv_vtl_main.o
>
> # Code that must be built-in
> diff --git a/drivers/hv/hv_counters.c b/drivers/hv/hv_counters.c
> index a8e07e72cc29..45ff3d663e56 100644
> --- a/drivers/hv/hv_counters.c
> +++ b/drivers/hv/hv_counters.c
> @@ -3,6 +3,7 @@
> * Copyright (c) 2026, Microsoft Corporation.
> *
> * Data for printing stats page counters via debugfs.
> + * Included directly in mshv_debugfs.c.
> *
> * Authors: Microsoft Linux virtualization team
> */
> diff --git a/drivers/hv/hv_synic.c b/drivers/hv/hv_synic.c
> new file mode 100644
> index 000000000000..cc81d78887f2
> --- /dev/null
> +++ b/drivers/hv/hv_synic.c
> @@ -0,0 +1,177 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2025, Microsoft Corporation.
> + *
> + * Authors: Microsoft Linux virtualization team
> + */
> +
> +/*
> + root l1vh vtl
> +vmbus
> +
> +guest
> +vmbus, nothing else
> +
> +vtl
> +mshv_vtl uses intercept SINT, VTL2_VMBUS_SINT_INDEX (7, not in hvgdk_mini lol)
> +vmbus
> +
> +bm root
> +mshv_root, no vmbus
> +
> +nested root
> +mshv_root uses L1
> +vmbus uses L0 (NESTED regs)
> +
> +l1vh
> +mshv_root and vmbus use same regs
> +
> +*/
> +
> +struct hv_synic_page {
> + u64 msr;
> + void *ptr;
> + struct kref refcount;
> +};
> +
> +void *hv_get_synic_page(u32 msr) {
> + struct hv_synic_page *page_obj;
> + page_obj = kmalloc
> +}
> +
> +
> +#define HV_SYNIC_PAGE_STRUCT(type, name) \
> +struct
> +
> +/* UGH */
> +struct hv_percpu_synic_cxt {
> + struct {
> + struct hv_message_page *ptr;
> + refcount_t pt_ref_count;
> + } hv_simp;
> + struct hv_message_page *hv_simp;
> + struct hv_synic_event_flags_page *hv_siefp;
> + struct hv_synic_event_ring_page *hv_sierp;
> +};
> +
> +int hv_setup_sint(u32 sint_msr)
> +{
> + union hv_synic_sint sint;
> +
> + // TODO validate sint_msr
> +
> + sint.as_uint64 = hv_get_msr(sint_msr);
> + sint.vector = vmbus_interrupt;
> + sint.masked = false;
> + sint.auto_eoi = hv_recommend_using_aeoi();
> +
> + hv_set_msr(sint_msr, sint.as_uint64);
> +
> + return 0;
> +}
> +
> +void *hv_setup_synic_page(u32 msr)
> +{
> + void *addr;
> + struct hv_synic_page synic_page;
> +
> + // TODO validate msr
> +
> + synic_page.as_uint64 = hv_get_msr(msr);
> + synic_page.enabled = 1;
> +
> + if (ms_hyperv.paravisor_present || hv_root_partition()) {
> + /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> + u64 base = (synic_page.gpa << HV_HYP_PAGE_SHIFT) &
> + ~ms_hyperv.shared_gpa_boundary;
> + addr = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
> + if (!addr) {
> + pr_err("%s: Fail to map synic page from %#x.\n",
> + __func__, msr);
> + return NULL;
> + }
> + } else {
> + addr = (void *)__get_free_page(GFP_KERNEL);
> + if (!page)
> + return NULL;
> +
> + memset(page, 0, PAGE_SIZE);
> + synic_page.gpa = virt_to_phys(addr) >> HV_HYP_PAGE_SHIFT;
> + }
> + hv_set_msr(msr, synic_page.as_uint64);
> +
> + return addr;
> +}
> +
> +/*
> + * hv_hyp_synic_enable_regs - Initialize the Synthetic Interrupt Controller
> + * with the hypervisor.
> + */
> +void hv_hyp_synic_enable_regs(unsigned int cpu)
> +{
> + struct hv_per_cpu_context *hv_cpu =
> + per_cpu_ptr(hv_context.cpu_context, cpu);
> + union hv_synic_simp simp;
> + union hv_synic_siefp siefp;
> + union hv_synic_sint shared_sint;
> +
> + /* Setup the Synic's message page with the hypervisor. */
> + simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
> + simp.simp_enabled = 1;
> +
> + if (ms_hyperv.paravisor_present || hv_root_partition()) {
> + /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> + u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
> + ~ms_hyperv.shared_gpa_boundary;
> + hv_cpu->hyp_synic_message_page =
> + (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
> + if (!hv_cpu->hyp_synic_message_page)
> + pr_err("Fail to map synic message page.\n");
> + } else {
> + simp.base_simp_gpa = virt_to_phys(hv_cpu-
> >hyp_synic_message_page)
> + >> HV_HYP_PAGE_SHIFT;
> + }
> +
> + hv_set_msr(HV_MSR_SIMP, simp.as_uint64);
> +
> + /* Setup the Synic's event page with the hypervisor. */
> + siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
> + siefp.siefp_enabled = 1;
> +
> + if (ms_hyperv.paravisor_present || hv_root_partition()) {
> + /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> + u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
> + ~ms_hyperv.shared_gpa_boundary;
> + hv_cpu->hyp_synic_event_page =
> + (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
> + if (!hv_cpu->hyp_synic_event_page)
> + pr_err("Fail to map synic event page.\n");
> + } else {
> + siefp.base_siefp_gpa = virt_to_phys(hv_cpu->hyp_synic_event_page)
> + >> HV_HYP_PAGE_SHIFT;
> + }
> +
> + hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64);
> + hv_enable_coco_interrupt(cpu, vmbus_interrupt, true);
> +
> + /* Setup the shared SINT. */
> + if (vmbus_irq != -1)
> + enable_percpu_irq(vmbus_irq, 0);
> + shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 +
> VMBUS_MESSAGE_SINT);
> +
> + shared_sint.vector = vmbus_interrupt;
> + shared_sint.masked = false;
> + shared_sint.auto_eoi = hv_recommend_using_aeoi();
> + hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT,
> shared_sint.as_uint64);
> +}
> +
> +static void hv_hyp_synic_enable_interrupts(void)
> +{
> + union hv_synic_scontrol sctrl;
> +
> + /* Enable the global synic bit */
> + sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
> + sctrl.enable = 1;
> +
> + hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
> +}
> diff --git a/drivers/hv/mshv_debugfs.c b/drivers/hv/mshv_debugfs.c
> new file mode 100644
> index 000000000000..72eb0ae44e4b
> --- /dev/null
> +++ b/drivers/hv/mshv_debugfs.c
> @@ -0,0 +1,703 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2026, Microsoft Corporation.
> + *
> + * The /sys/kernel/debug/mshv directory contents.
> + * Contains various statistics data, provided by the hypervisor.
> + *
> + * Authors: Microsoft Linux virtualization team
> + */
> +
> +#include <linux/debugfs.h>
> +#include <linux/stringify.h>
> +#include <asm/mshyperv.h>
> +#include <linux/slab.h>
> +
> +#include "mshv.h"
> +#include "mshv_root.h"
> +
> +#include "hv_counters.c"
> +
> +#define U32_BUF_SZ 11
> +#define U64_BUF_SZ 21
> +#define NUM_STATS_AREAS (HV_STATS_AREA_PARENT + 1)
This is sort of weak in that it doesn't really guard against
changes in the enum that defines HV_STATS_AREA_PARENT.
It would work if it were defined as part of the enum, but then
you are changing the code coming from the Windows world,
which I know is a different problem.
The enum is part of the hypervisor ABI and hence isn't likely to
change, but it still feels funny to define NUM_STATS_AREAS like
this. I would suggest dropping this and just using
HV_STATS_AREA_COUNT for the memory allocations even
though doing so will allocate space for a stats area pointer
that isn't used by this code. It's only a few bytes.
> +
> +static struct dentry *mshv_debugfs;
> +static struct dentry *mshv_debugfs_partition;
> +static struct dentry *mshv_debugfs_lp;
> +static struct dentry **parent_vp_stats;
> +static struct dentry *parent_partition_stats;
> +
> +static u64 mshv_lps_count;
> +static struct hv_stats_page **mshv_lps_stats;
> +
> +static int lp_stats_show(struct seq_file *m, void *v)
> +{
> + const struct hv_stats_page *stats = m->private;
> + struct hv_counter_entry *entry = hv_lp_counters;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(hv_lp_counters); i++, entry++)
> + seq_printf(m, "%-29s: %llu\n", entry->name,
> + stats->data[entry->idx]);
> +
> + return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(lp_stats);
> +
> +static void mshv_lp_stats_unmap(u32 lp_index)
> +{
> + union hv_stats_object_identity identity = {
> + .lp.lp_index = lp_index,
> + .lp.stats_area_type = HV_STATS_AREA_SELF,
> + };
> + int err;
> +
> + err = hv_unmap_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR,
> + mshv_lps_stats[lp_index], &identity);
> + if (err)
> + pr_err("%s: failed to unmap logical processor %u stats, err: %d\n",
> + __func__, lp_index, err);
Perhaps set mshv_lps_stats[lp_index] to NULL? I don't think it's actually
required, but similar code later in this file sets some pointers to NULL
just as good hygiene.
> +}
> +
> +static struct hv_stats_page * __init mshv_lp_stats_map(u32 lp_index)
> +{
> + union hv_stats_object_identity identity = {
> + .lp.lp_index = lp_index,
> + .lp.stats_area_type = HV_STATS_AREA_SELF,
> + };
> + struct hv_stats_page *stats;
> + int err;
> +
> + err = hv_map_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, &identity,
> + &stats);
> + if (err) {
> + pr_err("%s: failed to map logical processor %u stats, err: %d\n",
> + __func__, lp_index, err);
> + return ERR_PTR(err);
> + }
> + mshv_lps_stats[lp_index] = stats;
> +
> + return stats;
> +}
> +
> +static struct hv_stats_page * __init lp_debugfs_stats_create(u32 lp_index,
> + struct dentry *parent)
> +{
> + struct dentry *dentry;
> + struct hv_stats_page *stats;
> +
> + stats = mshv_lp_stats_map(lp_index);
> + if (IS_ERR(stats))
> + return stats;
> +
> + dentry = debugfs_create_file("stats", 0400, parent,
> + stats, &lp_stats_fops);
> + if (IS_ERR(dentry)) {
> + mshv_lp_stats_unmap(lp_index);
> + return ERR_CAST(dentry);
> + }
> + return stats;
> +}
> +
> +static int __init lp_debugfs_create(u32 lp_index, struct dentry *parent)
> +{
> + struct dentry *idx;
> + char lp_idx_str[U32_BUF_SZ];
> + struct hv_stats_page *stats;
> + int err;
> +
> + sprintf(lp_idx_str, "%u", lp_index);
> +
> + idx = debugfs_create_dir(lp_idx_str, parent);
> + if (IS_ERR(idx))
> + return PTR_ERR(idx);
> +
> + stats = lp_debugfs_stats_create(lp_index, idx);
> + if (IS_ERR(stats)) {
> + err = PTR_ERR(stats);
> + goto remove_debugfs_lp_idx;
> + }
> +
> + return 0;
> +
> +remove_debugfs_lp_idx:
> + debugfs_remove_recursive(idx);
> + return err;
> +}
> +
> +static void mshv_debugfs_lp_remove(void)
> +{
> + int lp_index;
> +
> + debugfs_remove_recursive(mshv_debugfs_lp);
> +
> + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++)
> + mshv_lp_stats_unmap(lp_index);
> +
> + kfree(mshv_lps_stats);
> + mshv_lps_stats = NULL;
> +}
> +
> +static int __init mshv_debugfs_lp_create(struct dentry *parent)
> +{
> + struct dentry *lp_dir;
> + int err, lp_index;
> +
> + mshv_lps_stats = kcalloc(mshv_lps_count,
> + sizeof(*mshv_lps_stats),
> + GFP_KERNEL_ACCOUNT);
> +
> + if (!mshv_lps_stats)
> + return -ENOMEM;
> +
> + lp_dir = debugfs_create_dir("lp", parent);
> + if (IS_ERR(lp_dir)) {
> + err = PTR_ERR(lp_dir);
> + goto free_lp_stats;
> + }
> +
> + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) {
> + err = lp_debugfs_create(lp_index, lp_dir);
> + if (err)
> + goto remove_debugfs_lps;
> + }
> +
> + mshv_debugfs_lp = lp_dir;
> +
> + return 0;
> +
> +remove_debugfs_lps:
> + for (lp_index -= 1; lp_index >= 0; lp_index--)
> + mshv_lp_stats_unmap(lp_index);
> + debugfs_remove_recursive(lp_dir);
> +free_lp_stats:
> + kfree(mshv_lps_stats);
Set mshv_lps_stats to NULL?
> +
> + return err;
> +}
> +
> +static int vp_stats_show(struct seq_file *m, void *v)
> +{
> + const struct hv_stats_page **pstats = m->private;
> + struct hv_counter_entry *entry = hv_vp_counters;
> + int i;
> +
> + /*
> + * For VP and partition stats, there may be two stats areas mapped,
> + * SELF and PARENT. These refer to the privilege level of the data in
> + * each page. Some fields may be 0 in SELF and nonzero in PARENT, or
> + * vice versa.
> + *
> + * Hence, prioritize printing from the PARENT page (more privileged
> + * data), but use the value from the SELF page if the PARENT value is
> + * 0.
> + */
> +
> + for (i = 0; i < ARRAY_SIZE(hv_vp_counters); i++, entry++) {
> + u64 parent_val = pstats[HV_STATS_AREA_PARENT]->data[entry->idx];
> + u64 self_val = pstats[HV_STATS_AREA_SELF]->data[entry->idx];
> +
> + seq_printf(m, "%-43s: %llu\n", entry->name,
> + parent_val ? parent_val : self_val);
> + }
> +
> + return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(vp_stats);
> +
> +static void vp_debugfs_remove(struct dentry *vp_stats)
> +{
> + debugfs_remove_recursive(vp_stats->d_parent);
> +}
> +
> +static int vp_debugfs_create(u64 partition_id, u32 vp_index,
> + struct hv_stats_page **pstats,
> + struct dentry **vp_stats_ptr,
> + struct dentry *parent)
> +{
> + struct dentry *vp_idx_dir, *d;
> + char vp_idx_str[U32_BUF_SZ];
> + int err;
> +
> + sprintf(vp_idx_str, "%u", vp_index);
> +
> + vp_idx_dir = debugfs_create_dir(vp_idx_str, parent);
> + if (IS_ERR(vp_idx_dir))
> + return PTR_ERR(vp_idx_dir);
> +
> + d = debugfs_create_file("stats", 0400, vp_idx_dir,
> + pstats, &vp_stats_fops);
> + if (IS_ERR(d)) {
> + err = PTR_ERR(d);
> + goto remove_debugfs_vp_idx;
> + }
> +
> + *vp_stats_ptr = d;
> +
> + return 0;
> +
> +remove_debugfs_vp_idx:
> + debugfs_remove_recursive(vp_idx_dir);
> + return err;
> +}
> +
> +static int partition_stats_show(struct seq_file *m, void *v)
> +{
> + const struct hv_stats_page **pstats = m->private;
> + struct hv_counter_entry *entry = hv_partition_counters;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(hv_partition_counters); i++, entry++) {
> + u64 parent_val = pstats[HV_STATS_AREA_PARENT]->data[entry->idx];
> + u64 self_val = pstats[HV_STATS_AREA_SELF]->data[entry->idx];
> +
> + seq_printf(m, "%-32s: %llu\n", entry->name,
> + parent_val ? parent_val : self_val);
> + }
> +
> + return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(partition_stats);
> +
> +static void mshv_partition_stats_unmap(u64 partition_id,
> + struct hv_stats_page *stats_page,
> + enum hv_stats_area_type stats_area_type)
> +{
> + union hv_stats_object_identity identity = {
> + .partition.partition_id = partition_id,
> + .partition.stats_area_type = stats_area_type,
> + };
> + int err;
> +
> + err = hv_unmap_stats_page(HV_STATS_OBJECT_PARTITION, stats_page,
> + &identity);
> + if (err)
> + pr_err("%s: failed to unmap partition %lld %s stats, err: %d\n",
> + __func__, partition_id,
> + (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent",
> + err);
> +}
> +
> +static struct hv_stats_page *mshv_partition_stats_map(u64 partition_id,
> + enum hv_stats_area_type
> stats_area_type)
> +{
> + union hv_stats_object_identity identity = {
> + .partition.partition_id = partition_id,
> + .partition.stats_area_type = stats_area_type,
> + };
> + struct hv_stats_page *stats;
> + int err;
> +
> + err = hv_map_stats_page(HV_STATS_OBJECT_PARTITION, &identity, &stats);
> + if (err) {
> + pr_err("%s: failed to map partition %lld %s stats, err: %d\n",
> + __func__, partition_id,
> + (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent",
> + err);
> + return ERR_PTR(err);
> + }
> + return stats;
> +}
> +
> +static int mshv_debugfs_partition_stats_create(u64 partition_id,
> + struct dentry **partition_stats_ptr,
> + struct dentry *parent)
> +{
> + struct dentry *dentry;
> + struct hv_stats_page **pstats;
> + int err;
> +
> + pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *),
> + GFP_KERNEL_ACCOUNT);
> + if (!pstats)
> + return -ENOMEM;
> +
> + pstats[HV_STATS_AREA_SELF] = mshv_partition_stats_map(partition_id,
> + HV_STATS_AREA_SELF);
> + if (IS_ERR(pstats[HV_STATS_AREA_SELF])) {
> + err = PTR_ERR(pstats[HV_STATS_AREA_SELF]);
> + goto cleanup;
> + }
> +
> + /*
> + * L1VH partition cannot access its partition stats in parent area.
> + */
> + if (is_l1vh_parent(partition_id)) {
> + pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF];
> + } else {
> + pstats[HV_STATS_AREA_PARENT] = mshv_partition_stats_map(partition_id,
> +
> HV_STATS_AREA_PARENT);
> + if (IS_ERR(pstats[HV_STATS_AREA_PARENT])) {
> + err = PTR_ERR(pstats[HV_STATS_AREA_PARENT]);
> + goto unmap_self;
> + }
> + if (!pstats[HV_STATS_AREA_PARENT])
> + pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF];
> + }
> +
> + dentry = debugfs_create_file("stats", 0400, parent,
> + pstats, &partition_stats_fops);
> + if (IS_ERR(dentry)) {
> + err = PTR_ERR(dentry);
> + goto unmap_partition_stats;
> + }
> +
> + *partition_stats_ptr = dentry;
> + return 0;
> +
> +unmap_partition_stats:
> + if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF])
> + mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_PARENT],
> + HV_STATS_AREA_PARENT);
> +unmap_self:
> + mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_SELF],
> + HV_STATS_AREA_SELF);
> +cleanup:
> + kfree(pstats);
> + return err;
> +}
> +
> +static void partition_debugfs_remove(u64 partition_id, struct dentry *dentry)
> +{
> + struct hv_stats_page **pstats = NULL;
> +
> + pstats = dentry->d_inode->i_private;
> +
> + debugfs_remove_recursive(dentry->d_parent);
> +
> + if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) {
> + mshv_partition_stats_unmap(partition_id,
> + pstats[HV_STATS_AREA_PARENT],
> + HV_STATS_AREA_PARENT);
> + }
> +
> + mshv_partition_stats_unmap(partition_id,
> + pstats[HV_STATS_AREA_SELF],
> + HV_STATS_AREA_SELF);
> +
> + kfree(pstats);
> +}
> +
> +static int partition_debugfs_create(u64 partition_id,
> + struct dentry **vp_dir_ptr,
> + struct dentry **partition_stats_ptr,
> + struct dentry *parent)
> +{
> + char part_id_str[U64_BUF_SZ];
> + struct dentry *part_id_dir, *vp_dir;
> + int err;
> +
> + if (is_l1vh_parent(partition_id))
> + sprintf(part_id_str, "self");
> + else
> + sprintf(part_id_str, "%llu", partition_id);
> +
> + part_id_dir = debugfs_create_dir(part_id_str, parent);
> + if (IS_ERR(part_id_dir))
> + return PTR_ERR(part_id_dir);
> +
> + vp_dir = debugfs_create_dir("vp", part_id_dir);
> + if (IS_ERR(vp_dir)) {
> + err = PTR_ERR(vp_dir);
> + goto remove_debugfs_partition_id;
> + }
> +
> + err = mshv_debugfs_partition_stats_create(partition_id,
> + partition_stats_ptr,
> + part_id_dir);
> + if (err)
> + goto remove_debugfs_partition_id;
> +
> + *vp_dir_ptr = vp_dir;
> +
> + return 0;
> +
> +remove_debugfs_partition_id:
> + debugfs_remove_recursive(part_id_dir);
> + return err;
> +}
> +
> +static void parent_vp_debugfs_remove(u32 vp_index,
> + struct dentry *vp_stats_ptr)
> +{
> + struct hv_stats_page **pstats;
> +
> + pstats = vp_stats_ptr->d_inode->i_private;
> + vp_debugfs_remove(vp_stats_ptr);
> + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
> + kfree(pstats);
> +}
> +
> +static void mshv_debugfs_parent_partition_remove(void)
> +{
> + int idx;
> +
> + for_each_online_cpu(idx)
> + parent_vp_debugfs_remove(idx,
The first parameter here ("idx") should be translated through the
hv_vp_index[] array like is done in mshv_debugfs_parent_partition_create().
> + parent_vp_stats[idx]);
> +
> + partition_debugfs_remove(hv_current_partition_id,
> + parent_partition_stats);
> + kfree(parent_vp_stats);
> + parent_vp_stats = NULL;
> + parent_partition_stats = NULL;
> +
Extra blank line.
> +}
> +
> +static int __init parent_vp_debugfs_create(u32 vp_index,
> + struct dentry **vp_stats_ptr,
> + struct dentry *parent)
> +{
> + struct hv_stats_page **pstats;
> + int err;
> +
> + pstats = kcalloc(2, sizeof(struct hv_stats_page *), GFP_KERNEL_ACCOUNT);
Another case of using "2" that should be changed.
> + if (!pstats)
> + return -ENOMEM;
> +
> + err = mshv_vp_stats_map(hv_current_partition_id, vp_index, pstats);
> + if (err)
> + goto cleanup;
> +
> + err = vp_debugfs_create(hv_current_partition_id, vp_index, pstats,
> + vp_stats_ptr, parent);
> + if (err)
> + goto unmap_vp_stats;
> +
> + return 0;
> +
> +unmap_vp_stats:
> + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
> +cleanup:
> + kfree(pstats);
> + return err;
> +}
> +
> +static int __init mshv_debugfs_parent_partition_create(void)
> +{
> + struct dentry *vp_dir;
> + int err, idx, i;
> +
> + mshv_debugfs_partition = debugfs_create_dir("partition",
> + mshv_debugfs);
> + if (IS_ERR(mshv_debugfs_partition))
> + return PTR_ERR(mshv_debugfs_partition);
> +
> + err = partition_debugfs_create(hv_current_partition_id,
> + &vp_dir,
> + &parent_partition_stats,
> + mshv_debugfs_partition);
> + if (err)
> + goto remove_debugfs_partition;
> +
> + parent_vp_stats = kcalloc(num_possible_cpus(),
num_possible_cpus() should not be used to allocate an array that is
then indexed by the Linux CPU number. Use nr_cpu_ids instead when
allocating the array. See commit 16b18fdf6bc7 for the full explanation.
As explained in that commit message, using num_possible_cpus()
doesn't break things now, but it might in the future.
> + sizeof(*parent_vp_stats),
> + GFP_KERNEL);
> + if (!parent_vp_stats) {
> + err = -ENOMEM;
> + goto remove_debugfs_partition;
> + }
> +
> + for_each_online_cpu(idx) {
> + err = parent_vp_debugfs_create(hv_vp_index[idx],
> + &parent_vp_stats[idx],
> + vp_dir);
> + if (err)
> + goto remove_debugfs_partition_vp;
> + }
> +
> + return 0;
> +
> +remove_debugfs_partition_vp:
> + for_each_online_cpu(i) {
> + if (i >= idx)
> + break;
> + parent_vp_debugfs_remove(i, parent_vp_stats[i]);
> + }
> + partition_debugfs_remove(hv_current_partition_id,
> + parent_partition_stats);
> +
> + kfree(parent_vp_stats);
> + parent_vp_stats = NULL;
> + parent_partition_stats = NULL;
> +
> +remove_debugfs_partition:
> + debugfs_remove_recursive(mshv_debugfs_partition);
> + mshv_debugfs_partition = NULL;
> + return err;
> +}
> +
> +static int hv_stats_show(struct seq_file *m, void *v)
> +{
> + const struct hv_stats_page *stats = m->private;
> + struct hv_counter_entry *entry = hv_hypervisor_counters;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(hv_hypervisor_counters); i++, entry++)
> + seq_printf(m, "%-25s: %llu\n", entry->name,
> + stats->data[entry->idx]);
> +
> + return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(hv_stats);
> +
> +static void mshv_hv_stats_unmap(void)
> +{
> + union hv_stats_object_identity identity = {
> + .hv.stats_area_type = HV_STATS_AREA_SELF,
> + };
> + int err;
> +
> + err = hv_unmap_stats_page(HV_STATS_OBJECT_HYPERVISOR, NULL, &identity);
> + if (err)
> + pr_err("%s: failed to unmap hypervisor stats: %d\n",
> + __func__, err);
> +}
> +
> +static void * __init mshv_hv_stats_map(void)
> +{
> + union hv_stats_object_identity identity = {
> + .hv.stats_area_type = HV_STATS_AREA_SELF,
> + };
> + struct hv_stats_page *stats;
> + int err;
> +
> + err = hv_map_stats_page(HV_STATS_OBJECT_HYPERVISOR, &identity, &stats);
> + if (err) {
> + pr_err("%s: failed to map hypervisor stats: %d\n",
> + __func__, err);
> + return ERR_PTR(err);
> + }
> + return stats;
> +}
> +
> +static int __init mshv_debugfs_hv_stats_create(struct dentry *parent)
> +{
> + struct dentry *dentry;
> + u64 *stats;
> + int err;
> +
> + stats = mshv_hv_stats_map();
> + if (IS_ERR(stats))
> + return PTR_ERR(stats);
> +
> + dentry = debugfs_create_file("stats", 0400, parent,
> + stats, &hv_stats_fops);
> + if (IS_ERR(dentry)) {
> + err = PTR_ERR(dentry);
> + pr_err("%s: failed to create hypervisor stats dentry: %d\n",
> + __func__, err);
> + goto unmap_hv_stats;
> + }
> +
> + mshv_lps_count = num_present_cpus();
This method of setting mshv_lps_count, and the iteration through the lp_index
in mshv_debugfs_lp_create() and mshv_debugfs_lp_remove(), seems risky. The
lp_index gets passed to the hypervisor, so it must be the hypervisor's concept
of the lp_index. Is that always guaranteed to be the same as Linux's numbering
of the present CPUs? There may be edge cases where it is not. For example, what
if Linux in the root partition were booted with the "nosmt" kernel boot option,
such that Linux ignores all the 2nd hyper-threads in a core? Could that create
a numbering mismatch?
Note that for vp_index, we have the hv_vp_index[] array for translating from
Linux's concept of a CPU number to Hyper-V's concept of vp_index. For
example, mshv_debugfs_parent_partition_create() correctly goes through
this translation. And presumably when the VMM code does the
MSHV_CREATE_VP ioctl, it is passing in a hypervisor vp_index.
Everything may work fine "as is" for the moment, but the lp functions here
are still conflating the hypervisor's LP numbering with Linux's CPU numbering,
and that seems like a recipe for trouble somewhere down the road. I'm
not sure how the hypervisor interprets the "lp_index" part of the identity
argument passed to a hypercall, so I'm not sure what the fix is.
> +
> + return 0;
> +
> +unmap_hv_stats:
> + mshv_hv_stats_unmap();
> + return err;
> +}
> +
> +int mshv_debugfs_vp_create(struct mshv_vp *vp)
> +{
> + struct mshv_partition *p = vp->vp_partition;
> +
> + if (!mshv_debugfs)
> + return 0;
> +
> + return vp_debugfs_create(p->pt_id, vp->vp_index,
> + vp->vp_stats_pages,
> + &vp->vp_stats_dentry,
> + p->pt_vp_dentry);
> +}
> +
> +void mshv_debugfs_vp_remove(struct mshv_vp *vp)
> +{
> + if (!mshv_debugfs)
> + return;
> +
> + vp_debugfs_remove(vp->vp_stats_dentry);
> +}
> +
> +int mshv_debugfs_partition_create(struct mshv_partition *partition)
> +{
> + int err;
> +
> + if (!mshv_debugfs)
> + return 0;
> +
> + err = partition_debugfs_create(partition->pt_id,
> + &partition->pt_vp_dentry,
> + &partition->pt_stats_dentry,
> + mshv_debugfs_partition);
> + if (err)
> + return err;
> +
> + return 0;
> +}
> +
> +void mshv_debugfs_partition_remove(struct mshv_partition *partition)
> +{
> + if (!mshv_debugfs)
> + return;
> +
> + partition_debugfs_remove(partition->pt_id,
> + partition->pt_stats_dentry);
> +}
> +
> +int __init mshv_debugfs_init(void)
> +{
> + int err;
> +
> + mshv_debugfs = debugfs_create_dir("mshv", NULL);
> + if (IS_ERR(mshv_debugfs)) {
> + pr_err("%s: failed to create debugfs directory\n", __func__);
> + return PTR_ERR(mshv_debugfs);
> + }
> +
> + if (hv_root_partition()) {
> + err = mshv_debugfs_hv_stats_create(mshv_debugfs);
> + if (err)
> + goto remove_mshv_dir;
> +
> + err = mshv_debugfs_lp_create(mshv_debugfs);
> + if (err)
> + goto unmap_hv_stats;
> + }
> +
> + err = mshv_debugfs_parent_partition_create();
> + if (err)
> + goto unmap_lp_stats;
> +
> + return 0;
> +
> +unmap_lp_stats:
> + if (hv_root_partition()) {
> + mshv_debugfs_lp_remove();
> + mshv_debugfs_lp = NULL;
> + }
> +unmap_hv_stats:
> + if (hv_root_partition())
> + mshv_hv_stats_unmap();
> +remove_mshv_dir:
> + debugfs_remove_recursive(mshv_debugfs);
> + mshv_debugfs = NULL;
> + return err;
> +}
> +
> +void mshv_debugfs_exit(void)
> +{
> + mshv_debugfs_parent_partition_remove();
> +
> + if (hv_root_partition()) {
> + mshv_debugfs_lp_remove();
> + mshv_debugfs_lp = NULL;
> + mshv_hv_stats_unmap();
> + }
> +
> + debugfs_remove_recursive(mshv_debugfs);
> + mshv_debugfs = NULL;
> + mshv_debugfs_partition = NULL;
> +}
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index e4912b0618fa..7332d9af8373 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -52,6 +52,9 @@ struct mshv_vp {
> unsigned int kicked_by_hv;
> wait_queue_head_t vp_suspend_queue;
> } run;
> +#if IS_ENABLED(CONFIG_DEBUG_FS)
> + struct dentry *vp_stats_dentry;
> +#endif
> };
>
> #define vp_fmt(fmt) "p%lluvp%u: " fmt
> @@ -136,6 +139,10 @@ struct mshv_partition {
> u64 isolation_type;
> bool import_completed;
> bool pt_initialized;
> +#if IS_ENABLED(CONFIG_DEBUG_FS)
> + struct dentry *pt_stats_dentry;
> + struct dentry *pt_vp_dentry;
> +#endif
> };
>
> #define pt_fmt(fmt) "p%llu: " fmt
> @@ -327,6 +334,33 @@ int hv_call_modify_spa_host_access(u64 partition_id, struct
> page **pages,
> int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg,
> void *property_value, size_t property_value_sz);
>
> +#if IS_ENABLED(CONFIG_DEBUG_FS)
> +int __init mshv_debugfs_init(void);
> +void mshv_debugfs_exit(void);
> +
> +int mshv_debugfs_partition_create(struct mshv_partition *partition);
> +void mshv_debugfs_partition_remove(struct mshv_partition *partition);
> +int mshv_debugfs_vp_create(struct mshv_vp *vp);
> +void mshv_debugfs_vp_remove(struct mshv_vp *vp);
> +#else
> +static inline int __init mshv_debugfs_init(void)
> +{
> + return 0;
> +}
> +static inline void mshv_debugfs_exit(void) { }
> +
> +static inline int mshv_debugfs_partition_create(struct mshv_partition *partition)
> +{
> + return 0;
> +}
> +static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { }
> +static inline int mshv_debugfs_vp_create(struct mshv_vp *vp)
> +{
> + return 0;
> +}
> +static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { }
> +#endif
> +
> extern struct mshv_root mshv_root;
> extern enum hv_scheduler_type hv_scheduler_type;
> extern u8 * __percpu *hv_synic_eventring_tail;
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 12825666e21b..f4654fb8cd23 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1096,6 +1096,10 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
>
> memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages));
>
> + ret = mshv_debugfs_vp_create(vp);
> + if (ret)
> + goto put_partition;
> +
> /*
> * Keep anon_inode_getfd last: it installs fd in the file struct and
> * thus makes the state accessible in user space.
> @@ -1103,7 +1107,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
> ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp,
> O_RDWR | O_CLOEXEC);
> if (ret < 0)
> - goto put_partition;
> + goto remove_debugfs_vp;
>
> /* already exclusive with the partition mutex for all ioctls */
> partition->pt_vp_count++;
> @@ -1111,6 +1115,8 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
>
> return ret;
>
> +remove_debugfs_vp:
> + mshv_debugfs_vp_remove(vp);
> put_partition:
> mshv_partition_put(partition);
> free_vp:
> @@ -1553,10 +1559,16 @@ mshv_partition_ioctl_initialize(struct mshv_partition *partition)
> if (ret)
> goto withdraw_mem;
>
> + ret = mshv_debugfs_partition_create(partition);
> + if (ret)
> + goto finalize_partition;
> +
> partition->pt_initialized = true;
>
> return 0;
>
> +finalize_partition:
> + hv_call_finalize_partition(partition->pt_id);
> withdraw_mem:
> hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id);
>
> @@ -1736,6 +1748,7 @@ static void destroy_partition(struct mshv_partition *partition)
> if (!vp)
> continue;
>
> + mshv_debugfs_vp_remove(vp);
> mshv_vp_stats_unmap(partition->pt_id, vp->vp_index,
> vp->vp_stats_pages);
>
> @@ -1769,6 +1782,8 @@ static void destroy_partition(struct mshv_partition *partition)
> partition->pt_vp_array[i] = NULL;
> }
>
> + mshv_debugfs_partition_remove(partition);
> +
> /* Deallocates and unmaps everything including vcpus, GPA mappings etc */
> hv_call_finalize_partition(partition->pt_id);
>
> @@ -2314,10 +2329,14 @@ static int __init mshv_parent_partition_init(void)
>
> mshv_init_vmm_caps(dev);
>
> - ret = mshv_irqfd_wq_init();
> + ret = mshv_debugfs_init();
> if (ret)
> goto exit_partition;
>
> + ret = mshv_irqfd_wq_init();
> + if (ret)
> + goto exit_debugfs;
> +
> spin_lock_init(&mshv_root.pt_ht_lock);
> hash_init(mshv_root.pt_htable);
>
> @@ -2325,6 +2344,8 @@ static int __init mshv_parent_partition_init(void)
>
> return 0;
>
> +exit_debugfs:
> + mshv_debugfs_exit();
> exit_partition:
> if (hv_root_partition())
> mshv_root_partition_exit();
> @@ -2341,6 +2362,7 @@ static void __exit mshv_parent_partition_exit(void)
> {
> hv_setup_mshv_handler(NULL);
> mshv_port_table_fini();
> + mshv_debugfs_exit();
> misc_deregister(&mshv_dev);
> mshv_irqfd_wq_cleanup();
> if (hv_root_partition())
> --
> 2.34.1
^ permalink raw reply
* RE: [PATCH v4 6/7] mshv: Add data for printing stats page counters
From: Michael Kelley @ 2026-01-23 17:09 UTC (permalink / raw)
To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, skinsburskii@linux.microsoft.com
Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, longli@microsoft.com,
prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
paekkaladevi@linux.microsoft.com
In-Reply-To: <20260121214623.76374-7-nunodasneves@linux.microsoft.com>
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, January 21, 2026 1:46 PM
>
> Introduce hv_counters.c, containing static data corresponding to
> HV_*_COUNTER enums in the hypervisor source. Defining the enum
> members as an array instead makes more sense, since it will be
> iterated over to print counter information to debugfs.
I would have expected the filename to be mshv_counters.c, so that the association
with the MS hypervisor is clear. And the file is inextricably linked to mshv_debugfs.c,
which of course has the "mshv_" prefix. Or is there some thinking I'm not aware of
for using the "hv_" prefix?
Also, I see in Patch 7 of this series that hv_counters.c is #included as a .c file
in mshv_debugfs.c. Is there a reason for doing the #include instead of adding
hv_counters.c to the Makefile and building it on its own? You would need to
add a handful of extern statements to mshv_root.h so that the tables are
referenceable from mshv_debugfs.c. But that would seem to be the more
normal way of doing things. #including a .c file is unusual.
See one more comment on the last line of this patch ...
>
> Include hypervisor, logical processor, partition, and virtual
> processor counters.
>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
> drivers/hv/hv_counters.c | 488 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 488 insertions(+)
> create mode 100644 drivers/hv/hv_counters.c
>
> diff --git a/drivers/hv/hv_counters.c b/drivers/hv/hv_counters.c
> new file mode 100644
> index 000000000000..a8e07e72cc29
> --- /dev/null
> +++ b/drivers/hv/hv_counters.c
> @@ -0,0 +1,488 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2026, Microsoft Corporation.
> + *
> + * Data for printing stats page counters via debugfs.
> + *
> + * Authors: Microsoft Linux virtualization team
> + */
> +
> +struct hv_counter_entry {
> + char *name;
> + int idx;
> +};
> +
> +/* HV_HYPERVISOR_COUNTER */
> +static struct hv_counter_entry hv_hypervisor_counters[] = {
> + { "HvLogicalProcessors", 1 },
> + { "HvPartitions", 2 },
> + { "HvTotalPages", 3 },
> + { "HvVirtualProcessors", 4 },
> + { "HvMonitoredNotifications", 5 },
> + { "HvModernStandbyEntries", 6 },
> + { "HvPlatformIdleTransitions", 7 },
> + { "HvHypervisorStartupCost", 8 },
> +
> + { "HvIOSpacePages", 10 },
> + { "HvNonEssentialPagesForDump", 11 },
> + { "HvSubsumedPages", 12 },
> +};
> +
> +/* HV_CPU_COUNTER */
> +static struct hv_counter_entry hv_lp_counters[] = {
> + { "LpGlobalTime", 1 },
> + { "LpTotalRunTime", 2 },
> + { "LpHypervisorRunTime", 3 },
> + { "LpHardwareInterrupts", 4 },
> + { "LpContextSwitches", 5 },
> + { "LpInterProcessorInterrupts", 6 },
> + { "LpSchedulerInterrupts", 7 },
> + { "LpTimerInterrupts", 8 },
> + { "LpInterProcessorInterruptsSent", 9 },
> + { "LpProcessorHalts", 10 },
> + { "LpMonitorTransitionCost", 11 },
> + { "LpContextSwitchTime", 12 },
> + { "LpC1TransitionsCount", 13 },
> + { "LpC1RunTime", 14 },
> + { "LpC2TransitionsCount", 15 },
> + { "LpC2RunTime", 16 },
> + { "LpC3TransitionsCount", 17 },
> + { "LpC3RunTime", 18 },
> + { "LpRootVpIndex", 19 },
> + { "LpIdleSequenceNumber", 20 },
> + { "LpGlobalTscCount", 21 },
> + { "LpActiveTscCount", 22 },
> + { "LpIdleAccumulation", 23 },
> + { "LpReferenceCycleCount0", 24 },
> + { "LpActualCycleCount0", 25 },
> + { "LpReferenceCycleCount1", 26 },
> + { "LpActualCycleCount1", 27 },
> + { "LpProximityDomainId", 28 },
> + { "LpPostedInterruptNotifications", 29 },
> + { "LpBranchPredictorFlushes", 30 },
> +#if IS_ENABLED(CONFIG_X86_64)
> + { "LpL1DataCacheFlushes", 31 },
> + { "LpImmediateL1DataCacheFlushes", 32 },
> + { "LpMbFlushes", 33 },
> + { "LpCounterRefreshSequenceNumber", 34 },
> + { "LpCounterRefreshReferenceTime", 35 },
> + { "LpIdleAccumulationSnapshot", 36 },
> + { "LpActiveTscCountSnapshot", 37 },
> + { "LpHwpRequestContextSwitches", 38 },
> + { "LpPlaceholder1", 39 },
> + { "LpPlaceholder2", 40 },
> + { "LpPlaceholder3", 41 },
> + { "LpPlaceholder4", 42 },
> + { "LpPlaceholder5", 43 },
> + { "LpPlaceholder6", 44 },
> + { "LpPlaceholder7", 45 },
> + { "LpPlaceholder8", 46 },
> + { "LpPlaceholder9", 47 },
> + { "LpSchLocalRunListSize", 48 },
> + { "LpReserveGroupId", 49 },
> + { "LpRunningPriority", 50 },
> + { "LpPerfmonInterruptCount", 51 },
> +#elif IS_ENABLED(CONFIG_ARM64)
> + { "LpCounterRefreshSequenceNumber", 31 },
> + { "LpCounterRefreshReferenceTime", 32 },
> + { "LpIdleAccumulationSnapshot", 33 },
> + { "LpActiveTscCountSnapshot", 34 },
> + { "LpHwpRequestContextSwitches", 35 },
> + { "LpPlaceholder2", 36 },
> + { "LpPlaceholder3", 37 },
> + { "LpPlaceholder4", 38 },
> + { "LpPlaceholder5", 39 },
> + { "LpPlaceholder6", 40 },
> + { "LpPlaceholder7", 41 },
> + { "LpPlaceholder8", 42 },
> + { "LpPlaceholder9", 43 },
> + { "LpSchLocalRunListSize", 44 },
> + { "LpReserveGroupId", 45 },
> + { "LpRunningPriority", 46 },
> +#endif
> +};
> +
> +/* HV_PROCESS_COUNTER */
> +static struct hv_counter_entry hv_partition_counters[] = {
> + { "PtVirtualProcessors", 1 },
> +
> + { "PtTlbSize", 3 },
> + { "PtAddressSpaces", 4 },
> + { "PtDepositedPages", 5 },
> + { "PtGpaPages", 6 },
> + { "PtGpaSpaceModifications", 7 },
> + { "PtVirtualTlbFlushEntires", 8 },
> + { "PtRecommendedTlbSize", 9 },
> + { "PtGpaPages4K", 10 },
> + { "PtGpaPages2M", 11 },
> + { "PtGpaPages1G", 12 },
> + { "PtGpaPages512G", 13 },
> + { "PtDevicePages4K", 14 },
> + { "PtDevicePages2M", 15 },
> + { "PtDevicePages1G", 16 },
> + { "PtDevicePages512G", 17 },
> + { "PtAttachedDevices", 18 },
> + { "PtDeviceInterruptMappings", 19 },
> + { "PtIoTlbFlushes", 20 },
> + { "PtIoTlbFlushCost", 21 },
> + { "PtDeviceInterruptErrors", 22 },
> + { "PtDeviceDmaErrors", 23 },
> + { "PtDeviceInterruptThrottleEvents", 24 },
> + { "PtSkippedTimerTicks", 25 },
> + { "PtPartitionId", 26 },
> +#if IS_ENABLED(CONFIG_X86_64)
> + { "PtNestedTlbSize", 27 },
> + { "PtRecommendedNestedTlbSize", 28 },
> + { "PtNestedTlbFreeListSize", 29 },
> + { "PtNestedTlbTrimmedPages", 30 },
> + { "PtPagesShattered", 31 },
> + { "PtPagesRecombined", 32 },
> + { "PtHwpRequestValue", 33 },
> + { "PtAutoSuspendEnableTime", 34 },
> + { "PtAutoSuspendTriggerTime", 35 },
> + { "PtAutoSuspendDisableTime", 36 },
> + { "PtPlaceholder1", 37 },
> + { "PtPlaceholder2", 38 },
> + { "PtPlaceholder3", 39 },
> + { "PtPlaceholder4", 40 },
> + { "PtPlaceholder5", 41 },
> + { "PtPlaceholder6", 42 },
> + { "PtPlaceholder7", 43 },
> + { "PtPlaceholder8", 44 },
> + { "PtHypervisorStateTransferGeneration", 45 },
> + { "PtNumberofActiveChildPartitions", 46 },
> +#elif IS_ENABLED(CONFIG_ARM64)
> + { "PtHwpRequestValue", 27 },
> + { "PtAutoSuspendEnableTime", 28 },
> + { "PtAutoSuspendTriggerTime", 29 },
> + { "PtAutoSuspendDisableTime", 30 },
> + { "PtPlaceholder1", 31 },
> + { "PtPlaceholder2", 32 },
> + { "PtPlaceholder3", 33 },
> + { "PtPlaceholder4", 34 },
> + { "PtPlaceholder5", 35 },
> + { "PtPlaceholder6", 36 },
> + { "PtPlaceholder7", 37 },
> + { "PtPlaceholder8", 38 },
> + { "PtHypervisorStateTransferGeneration", 39 },
> + { "PtNumberofActiveChildPartitions", 40 },
> +#endif
> +};
> +
> +/* HV_THREAD_COUNTER */
> +static struct hv_counter_entry hv_vp_counters[] = {
> + { "VpTotalRunTime", 1 },
> + { "VpHypervisorRunTime", 2 },
> + { "VpRemoteNodeRunTime", 3 },
> + { "VpNormalizedRunTime", 4 },
> + { "VpIdealCpu", 5 },
> +
> + { "VpHypercallsCount", 7 },
> + { "VpHypercallsTime", 8 },
> +#if IS_ENABLED(CONFIG_X86_64)
> + { "VpPageInvalidationsCount", 9 },
> + { "VpPageInvalidationsTime", 10 },
> + { "VpControlRegisterAccessesCount", 11 },
> + { "VpControlRegisterAccessesTime", 12 },
> + { "VpIoInstructionsCount", 13 },
> + { "VpIoInstructionsTime", 14 },
> + { "VpHltInstructionsCount", 15 },
> + { "VpHltInstructionsTime", 16 },
> + { "VpMwaitInstructionsCount", 17 },
> + { "VpMwaitInstructionsTime", 18 },
> + { "VpCpuidInstructionsCount", 19 },
> + { "VpCpuidInstructionsTime", 20 },
> + { "VpMsrAccessesCount", 21 },
> + { "VpMsrAccessesTime", 22 },
> + { "VpOtherInterceptsCount", 23 },
> + { "VpOtherInterceptsTime", 24 },
> + { "VpExternalInterruptsCount", 25 },
> + { "VpExternalInterruptsTime", 26 },
> + { "VpPendingInterruptsCount", 27 },
> + { "VpPendingInterruptsTime", 28 },
> + { "VpEmulatedInstructionsCount", 29 },
> + { "VpEmulatedInstructionsTime", 30 },
> + { "VpDebugRegisterAccessesCount", 31 },
> + { "VpDebugRegisterAccessesTime", 32 },
> + { "VpPageFaultInterceptsCount", 33 },
> + { "VpPageFaultInterceptsTime", 34 },
> + { "VpGuestPageTableMaps", 35 },
> + { "VpLargePageTlbFills", 36 },
> + { "VpSmallPageTlbFills", 37 },
> + { "VpReflectedGuestPageFaults", 38 },
> + { "VpApicMmioAccesses", 39 },
> + { "VpIoInterceptMessages", 40 },
> + { "VpMemoryInterceptMessages", 41 },
> + { "VpApicEoiAccesses", 42 },
> + { "VpOtherMessages", 43 },
> + { "VpPageTableAllocations", 44 },
> + { "VpLogicalProcessorMigrations", 45 },
> + { "VpAddressSpaceEvictions", 46 },
> + { "VpAddressSpaceSwitches", 47 },
> + { "VpAddressDomainFlushes", 48 },
> + { "VpAddressSpaceFlushes", 49 },
> + { "VpGlobalGvaRangeFlushes", 50 },
> + { "VpLocalGvaRangeFlushes", 51 },
> + { "VpPageTableEvictions", 52 },
> + { "VpPageTableReclamations", 53 },
> + { "VpPageTableResets", 54 },
> + { "VpPageTableValidations", 55 },
> + { "VpApicTprAccesses", 56 },
> + { "VpPageTableWriteIntercepts", 57 },
> + { "VpSyntheticInterrupts", 58 },
> + { "VpVirtualInterrupts", 59 },
> + { "VpApicIpisSent", 60 },
> + { "VpApicSelfIpisSent", 61 },
> + { "VpGpaSpaceHypercalls", 62 },
> + { "VpLogicalProcessorHypercalls", 63 },
> + { "VpLongSpinWaitHypercalls", 64 },
> + { "VpOtherHypercalls", 65 },
> + { "VpSyntheticInterruptHypercalls", 66 },
> + { "VpVirtualInterruptHypercalls", 67 },
> + { "VpVirtualMmuHypercalls", 68 },
> + { "VpVirtualProcessorHypercalls", 69 },
> + { "VpHardwareInterrupts", 70 },
> + { "VpNestedPageFaultInterceptsCount", 71 },
> + { "VpNestedPageFaultInterceptsTime", 72 },
> + { "VpPageScans", 73 },
> + { "VpLogicalProcessorDispatches", 74 },
> + { "VpWaitingForCpuTime", 75 },
> + { "VpExtendedHypercalls", 76 },
> + { "VpExtendedHypercallInterceptMessages", 77 },
> + { "VpMbecNestedPageTableSwitches", 78 },
> + { "VpOtherReflectedGuestExceptions", 79 },
> + { "VpGlobalIoTlbFlushes", 80 },
> + { "VpGlobalIoTlbFlushCost", 81 },
> + { "VpLocalIoTlbFlushes", 82 },
> + { "VpLocalIoTlbFlushCost", 83 },
> + { "VpHypercallsForwardedCount", 84 },
> + { "VpHypercallsForwardingTime", 85 },
> + { "VpPageInvalidationsForwardedCount", 86 },
> + { "VpPageInvalidationsForwardingTime", 87 },
> + { "VpControlRegisterAccessesForwardedCount", 88 },
> + { "VpControlRegisterAccessesForwardingTime", 89 },
> + { "VpIoInstructionsForwardedCount", 90 },
> + { "VpIoInstructionsForwardingTime", 91 },
> + { "VpHltInstructionsForwardedCount", 92 },
> + { "VpHltInstructionsForwardingTime", 93 },
> + { "VpMwaitInstructionsForwardedCount", 94 },
> + { "VpMwaitInstructionsForwardingTime", 95 },
> + { "VpCpuidInstructionsForwardedCount", 96 },
> + { "VpCpuidInstructionsForwardingTime", 97 },
> + { "VpMsrAccessesForwardedCount", 98 },
> + { "VpMsrAccessesForwardingTime", 99 },
> + { "VpOtherInterceptsForwardedCount", 100 },
> + { "VpOtherInterceptsForwardingTime", 101 },
> + { "VpExternalInterruptsForwardedCount", 102 },
> + { "VpExternalInterruptsForwardingTime", 103 },
> + { "VpPendingInterruptsForwardedCount", 104 },
> + { "VpPendingInterruptsForwardingTime", 105 },
> + { "VpEmulatedInstructionsForwardedCount", 106 },
> + { "VpEmulatedInstructionsForwardingTime", 107 },
> + { "VpDebugRegisterAccessesForwardedCount", 108 },
> + { "VpDebugRegisterAccessesForwardingTime", 109 },
> + { "VpPageFaultInterceptsForwardedCount", 110 },
> + { "VpPageFaultInterceptsForwardingTime", 111 },
> + { "VpVmclearEmulationCount", 112 },
> + { "VpVmclearEmulationTime", 113 },
> + { "VpVmptrldEmulationCount", 114 },
> + { "VpVmptrldEmulationTime", 115 },
> + { "VpVmptrstEmulationCount", 116 },
> + { "VpVmptrstEmulationTime", 117 },
> + { "VpVmreadEmulationCount", 118 },
> + { "VpVmreadEmulationTime", 119 },
> + { "VpVmwriteEmulationCount", 120 },
> + { "VpVmwriteEmulationTime", 121 },
> + { "VpVmxoffEmulationCount", 122 },
> + { "VpVmxoffEmulationTime", 123 },
> + { "VpVmxonEmulationCount", 124 },
> + { "VpVmxonEmulationTime", 125 },
> + { "VpNestedVMEntriesCount", 126 },
> + { "VpNestedVMEntriesTime", 127 },
> + { "VpNestedSLATSoftPageFaultsCount", 128 },
> + { "VpNestedSLATSoftPageFaultsTime", 129 },
> + { "VpNestedSLATHardPageFaultsCount", 130 },
> + { "VpNestedSLATHardPageFaultsTime", 131 },
> + { "VpInvEptAllContextEmulationCount", 132 },
> + { "VpInvEptAllContextEmulationTime", 133 },
> + { "VpInvEptSingleContextEmulationCount", 134 },
> + { "VpInvEptSingleContextEmulationTime", 135 },
> + { "VpInvVpidAllContextEmulationCount", 136 },
> + { "VpInvVpidAllContextEmulationTime", 137 },
> + { "VpInvVpidSingleContextEmulationCount", 138 },
> + { "VpInvVpidSingleContextEmulationTime", 139 },
> + { "VpInvVpidSingleAddressEmulationCount", 140 },
> + { "VpInvVpidSingleAddressEmulationTime", 141 },
> + { "VpNestedTlbPageTableReclamations", 142 },
> + { "VpNestedTlbPageTableEvictions", 143 },
> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 144 },
> + { "VpFlushGuestPhysicalAddressListHypercalls", 145 },
> + { "VpPostedInterruptNotifications", 146 },
> + { "VpPostedInterruptScans", 147 },
> + { "VpTotalCoreRunTime", 148 },
> + { "VpMaximumRunTime", 149 },
> + { "VpHwpRequestContextSwitches", 150 },
> + { "VpWaitingForCpuTimeBucket0", 151 },
> + { "VpWaitingForCpuTimeBucket1", 152 },
> + { "VpWaitingForCpuTimeBucket2", 153 },
> + { "VpWaitingForCpuTimeBucket3", 154 },
> + { "VpWaitingForCpuTimeBucket4", 155 },
> + { "VpWaitingForCpuTimeBucket5", 156 },
> + { "VpWaitingForCpuTimeBucket6", 157 },
> + { "VpVmloadEmulationCount", 158 },
> + { "VpVmloadEmulationTime", 159 },
> + { "VpVmsaveEmulationCount", 160 },
> + { "VpVmsaveEmulationTime", 161 },
> + { "VpGifInstructionEmulationCount", 162 },
> + { "VpGifInstructionEmulationTime", 163 },
> + { "VpEmulatedErrataSvmInstructions", 164 },
> + { "VpPlaceholder1", 165 },
> + { "VpPlaceholder2", 166 },
> + { "VpPlaceholder3", 167 },
> + { "VpPlaceholder4", 168 },
> + { "VpPlaceholder5", 169 },
> + { "VpPlaceholder6", 170 },
> + { "VpPlaceholder7", 171 },
> + { "VpPlaceholder8", 172 },
> + { "VpContentionTime", 173 },
> + { "VpWakeUpTime", 174 },
> + { "VpSchedulingPriority", 175 },
> + { "VpRdpmcInstructionsCount", 176 },
> + { "VpRdpmcInstructionsTime", 177 },
> + { "VpPerfmonPmuMsrAccessesCount", 178 },
> + { "VpPerfmonLbrMsrAccessesCount", 179 },
> + { "VpPerfmonIptMsrAccessesCount", 180 },
> + { "VpPerfmonInterruptCount", 181 },
> + { "VpVtl1DispatchCount", 182 },
> + { "VpVtl2DispatchCount", 183 },
> + { "VpVtl2DispatchBucket0", 184 },
> + { "VpVtl2DispatchBucket1", 185 },
> + { "VpVtl2DispatchBucket2", 186 },
> + { "VpVtl2DispatchBucket3", 187 },
> + { "VpVtl2DispatchBucket4", 188 },
> + { "VpVtl2DispatchBucket5", 189 },
> + { "VpVtl2DispatchBucket6", 190 },
> + { "VpVtl1RunTime", 191 },
> + { "VpVtl2RunTime", 192 },
> + { "VpIommuHypercalls", 193 },
> + { "VpCpuGroupHypercalls", 194 },
> + { "VpVsmHypercalls", 195 },
> + { "VpEventLogHypercalls", 196 },
> + { "VpDeviceDomainHypercalls", 197 },
> + { "VpDepositHypercalls", 198 },
> + { "VpSvmHypercalls", 199 },
> + { "VpBusLockAcquisitionCount", 200 },
> + { "VpLoadAvg", 201 },
> + { "VpRootDispatchThreadBlocked", 202 },
> + { "VpIdleCpuTime", 203 },
> + { "VpWaitingForCpuTimeBucket7", 204 },
> + { "VpWaitingForCpuTimeBucket8", 205 },
> + { "VpWaitingForCpuTimeBucket9", 206 },
> + { "VpWaitingForCpuTimeBucket10", 207 },
> + { "VpWaitingForCpuTimeBucket11", 208 },
> + { "VpWaitingForCpuTimeBucket12", 209 },
> + { "VpHierarchicalSuspendTime", 210 },
> + { "VpExpressSchedulingAttempts", 211 },
> + { "VpExpressSchedulingCount", 212 },
> + { "VpBusLockAcquisitionTime", 213 },
> +#elif IS_ENABLED(CONFIG_ARM64)
> + { "VpSysRegAccessesCount", 9 },
> + { "VpSysRegAccessesTime", 10 },
> + { "VpSmcInstructionsCount", 11 },
> + { "VpSmcInstructionsTime", 12 },
> + { "VpOtherInterceptsCount", 13 },
> + { "VpOtherInterceptsTime", 14 },
> + { "VpExternalInterruptsCount", 15 },
> + { "VpExternalInterruptsTime", 16 },
> + { "VpPendingInterruptsCount", 17 },
> + { "VpPendingInterruptsTime", 18 },
> + { "VpGuestPageTableMaps", 19 },
> + { "VpLargePageTlbFills", 20 },
> + { "VpSmallPageTlbFills", 21 },
> + { "VpReflectedGuestPageFaults", 22 },
> + { "VpMemoryInterceptMessages", 23 },
> + { "VpOtherMessages", 24 },
> + { "VpLogicalProcessorMigrations", 25 },
> + { "VpAddressDomainFlushes", 26 },
> + { "VpAddressSpaceFlushes", 27 },
> + { "VpSyntheticInterrupts", 28 },
> + { "VpVirtualInterrupts", 29 },
> + { "VpApicSelfIpisSent", 30 },
> + { "VpGpaSpaceHypercalls", 31 },
> + { "VpLogicalProcessorHypercalls", 32 },
> + { "VpLongSpinWaitHypercalls", 33 },
> + { "VpOtherHypercalls", 34 },
> + { "VpSyntheticInterruptHypercalls", 35 },
> + { "VpVirtualInterruptHypercalls", 36 },
> + { "VpVirtualMmuHypercalls", 37 },
> + { "VpVirtualProcessorHypercalls", 38 },
> + { "VpHardwareInterrupts", 39 },
> + { "VpNestedPageFaultInterceptsCount", 40 },
> + { "VpNestedPageFaultInterceptsTime", 41 },
> + { "VpLogicalProcessorDispatches", 42 },
> + { "VpWaitingForCpuTime", 43 },
> + { "VpExtendedHypercalls", 44 },
> + { "VpExtendedHypercallInterceptMessages", 45 },
> + { "VpMbecNestedPageTableSwitches", 46 },
> + { "VpOtherReflectedGuestExceptions", 47 },
> + { "VpGlobalIoTlbFlushes", 48 },
> + { "VpGlobalIoTlbFlushCost", 49 },
> + { "VpLocalIoTlbFlushes", 50 },
> + { "VpLocalIoTlbFlushCost", 51 },
> + { "VpFlushGuestPhysicalAddressSpaceHypercalls", 52 },
> + { "VpFlushGuestPhysicalAddressListHypercalls", 53 },
> + { "VpPostedInterruptNotifications", 54 },
> + { "VpPostedInterruptScans", 55 },
> + { "VpTotalCoreRunTime", 56 },
> + { "VpMaximumRunTime", 57 },
> + { "VpWaitingForCpuTimeBucket0", 58 },
> + { "VpWaitingForCpuTimeBucket1", 59 },
> + { "VpWaitingForCpuTimeBucket2", 60 },
> + { "VpWaitingForCpuTimeBucket3", 61 },
> + { "VpWaitingForCpuTimeBucket4", 62 },
> + { "VpWaitingForCpuTimeBucket5", 63 },
> + { "VpWaitingForCpuTimeBucket6", 64 },
> + { "VpHwpRequestContextSwitches", 65 },
> + { "VpPlaceholder2", 66 },
> + { "VpPlaceholder3", 67 },
> + { "VpPlaceholder4", 68 },
> + { "VpPlaceholder5", 69 },
> + { "VpPlaceholder6", 70 },
> + { "VpPlaceholder7", 71 },
> + { "VpPlaceholder8", 72 },
> + { "VpContentionTime", 73 },
> + { "VpWakeUpTime", 74 },
> + { "VpSchedulingPriority", 75 },
> + { "VpVtl1DispatchCount", 76 },
> + { "VpVtl2DispatchCount", 77 },
> + { "VpVtl2DispatchBucket0", 78 },
> + { "VpVtl2DispatchBucket1", 79 },
> + { "VpVtl2DispatchBucket2", 80 },
> + { "VpVtl2DispatchBucket3", 81 },
> + { "VpVtl2DispatchBucket4", 82 },
> + { "VpVtl2DispatchBucket5", 83 },
> + { "VpVtl2DispatchBucket6", 84 },
> + { "VpVtl1RunTime", 85 },
> + { "VpVtl2RunTime", 86 },
> + { "VpIommuHypercalls", 87 },
> + { "VpCpuGroupHypercalls", 88 },
> + { "VpVsmHypercalls", 89 },
> + { "VpEventLogHypercalls", 90 },
> + { "VpDeviceDomainHypercalls", 91 },
> + { "VpDepositHypercalls", 92 },
> + { "VpSvmHypercalls", 93 },
> + { "VpLoadAvg", 94 },
> + { "VpRootDispatchThreadBlocked", 95 },
> + { "VpIdleCpuTime", 96 },
> + { "VpWaitingForCpuTimeBucket7", 97 },
> + { "VpWaitingForCpuTimeBucket8", 98 },
> + { "VpWaitingForCpuTimeBucket9", 99 },
> + { "VpWaitingForCpuTimeBucket10", 100 },
> + { "VpWaitingForCpuTimeBucket11", 101 },
> + { "VpWaitingForCpuTimeBucket12", 102 },
> + { "VpHierarchicalSuspendTime", 103 },
> + { "VpExpressSchedulingAttempts", 104 },
> + { "VpExpressSchedulingCount", 105 },
> +#endif
> +};
> +
The patch puts a blank line at the end of the new hv_counters.c file. When using
"git am" to apply this patch, I get this warning:
.git/rebase-apply/patch:499: new blank line at EOF.
+
warning: 1 line adds whitespace errors.
Line 499 is that blank line at the end of the new file. If I modify the patch to remove
the adding of the blank line, "git am" will apply the patch with no warning. This
should probably be fixed.
Michael
^ permalink raw reply
* RE: [PATCH] PCI: hv: Allocate MMIO from above 4GB for the config window
From: Michael Kelley @ 2026-01-23 6:39 UTC (permalink / raw)
To: Matthew Ruffell
Cc: DECUI@microsoft.com, bhelgaas@google.com, haiyangz@microsoft.com,
jakeo@microsoft.com, kwilczynski@kernel.org, kys@microsoft.com,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, longli@microsoft.com,
lpieralisi@kernel.org, mani@kernel.org, robh@kernel.org,
stable@vger.kernel.org, wei.liu@kernel.org
In-Reply-To: <20260123053909.95584-1-matthew.ruffell@canonical.com>
From: Matthew Ruffell <matthew.ruffell@canonical.com> Sent: Thursday, January 22, 2026 9:39 PM
>
> Hi Michael,
>
> > > I wonder if commit a41e0ab394e4 broke the initialization of screen_info in the
> > > kdump kernel. Or perhaps there is now a rev-lock between the kernel with this
> > > commit and a new version of the user space kexec command.
>
> a41e0ab394e4 isn't a mainline commit. Can you please mention the commit subject
> so I can have a read.
It's this patch:
https://lore.kernel.org/lkml/20251126160854.553077-5-tzimmermann@suse.de/
which is in linux-next, but not yet in mainline. Since you are dealing with older
kernels, it's not the culprit.
>
> > > There's a parameter to the kexec() command that governs whether it uses the
> > > kexec_file_load() system call or the kexec_load() system call.
> > > I wonder if that parameter makes a difference in the problem described for this
> > > patch.
>
> Yes, it does indeed make a difference. I have been debugging this the past few
> days, and my colleague Melissa noticed that the problem reproduces when secure
> boot is disabled, but it does not reproduce when secure boot is enabled.
> Additionally, it reproduces on jammy, but not noble. It turns out that
> kexec-tools on jammy defaults to kexec_load() when secure boot is disabled,
> and when enabled, it instead uses kexec_file_load(). On noble, it defaults to
> first trying kexec_file_load() before falling back to kexec_load(), so the
> issue does not reproduce.
This is good info, and definitely a clue. So to be clear, the problem repros
only when kexec_load() is used. With kexec_file_load(), it does not repro. Is that
right? I saw a similar distinction when working on commit 304386373007,
though in the opposite direction!
>
> > > > /*
> > > > * Set up a region of MMIO space to use for accessing configuration
> > > > - * space.
> > > > + * space. Use the high MMIO range to not conflict with the hyperv_drm
> > > > + * driver (which normally gets MMIO from the low MMIO range) in the
> > > > + * kdump kernel of a Gen2 VM, which fails to reserve the framebuffer
> > > > + * MMIO range in vmbus_reserve_fb() due to screen_info.lfb_base being
> > > > + * zero in the kdump kernel.
> > > > */
> > > > - ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, 0, -1,
> > > > + ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, SZ_4G, -1,
> > > > PCI_CONFIG_MMIO_LENGTH, 0x1000, false);
> > > > if (ret)
> > > > return ret;
> > > > --
>
> Thank you for the patch Dexuan.
>
> This patch fixes the problem on Ubuntu 5.15, and 6.8 based kernels
> booting V6 instance types on Azure with Gen 2 images.
Are you seeing the problem on x86/64 or arm64 instances in Azure?
"V6 instance types" could be either, I think, but I'm guessing you
are on x86/64.
And just to confirm: are you seeing the problem with the
Hyper-V DRM driver, or the Hyper-V FB driver? This patch mentions
the DRM driver, so I assume that's the problematic config.
>
> Tested-by: Matthew Ruffell <matthew.ruffell@canonical.com>
While this patch may solve the observed problem, I'm interested in
understanding the root cause of why vmbus_reserve_fb() is seeing
screen_info.lfb_base set to zero. It may be next week before I can
take a look, and I may need follow up with you on more details of the
scenario to reproduce the problem.
Michael
^ permalink raw reply
* RE: [PATCH] PCI: hv: Allocate MMIO from above 4GB for the config window
From: Matthew Ruffell @ 2026-01-23 5:39 UTC (permalink / raw)
To: mhklinux
Cc: DECUI, bhelgaas, haiyangz, jakeo, kwilczynski, kys, linux-hyperv,
linux-kernel, linux-pci, longli, lpieralisi, mani, robh, stable,
wei.liu
In-Reply-To: <SN6PR02MB4157545DAFDCCE0028439DB2D497A@SN6PR02MB4157.namprd02.prod.outlook.com>
Hi Michael,
> > I wonder if commit a41e0ab394e4 broke the initialization of screen_info in the
> > kdump kernel. Or perhaps there is now a rev-lock between the kernel with this
> > commit and a new version of the user space kexec command.
a41e0ab394e4 isn't a mainline commit. Can you please mention the commit subject
so I can have a read.
> > There's a parameter to the kexec() command that governs whether it uses the
> > kexec_file_load() system call or the kexec_load() system call.
> > I wonder if that parameter makes a difference in the problem described for this
> > patch.
Yes, it does indeed make a difference. I have been debugging this the past few
days, and my colleague Melissa noticed that the problem reproduces when secure
boot is disabled, but it does not reproduce when secure boot is enabled.
Additionally, it reproduces on jammy, but not noble. It turns out that
kexec-tools on jammy defaults to kexec_load() when secure boot is disabled,
and when enabled, it instead uses kexec_file_load(). On noble, it defaults to
first trying kexec_file_load() before falling back to kexec_load(), so the
issue does not reproduce.
> > > /*
> > > * Set up a region of MMIO space to use for accessing configuration
> > > - * space.
> > > + * space. Use the high MMIO range to not conflict with the hyperv_drm
> > > + * driver (which normally gets MMIO from the low MMIO range) in the
> > > + * kdump kernel of a Gen2 VM, which fails to reserve the framebuffer
> > > + * MMIO range in vmbus_reserve_fb() due to screen_info.lfb_base being
> > > + * zero in the kdump kernel.
> > > */
> > > - ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, 0, -1,
> > > + ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, SZ_4G, -1,
> > > PCI_CONFIG_MMIO_LENGTH, 0x1000, false);
> > > if (ret)
> > > return ret;
> > > --
Thank you for the patch Dexuan.
This patch fixes the problem on Ubuntu 5.15, and 6.8 based kernels
booting V6 instance types on Azure with Gen 2 images.
Tested-by: Matthew Ruffell <matthew.ruffell@canonical.com>
Thanks,
Matthew
^ permalink raw reply
* Re: [PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
From: Jakub Kicinski @ 2026-01-23 2:07 UTC (permalink / raw)
To: Erni Sri Satya Vennela
Cc: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
edumazet, pabeni, leon, kotaranov, shradhagupta, yury.norov,
dipayanroy, shirazsaleem, ssengar, gargaditya, linux-hyperv,
netdev, linux-kernel
In-Reply-To: <aXJhzi58GqLKtui4@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net>
On Thu, 22 Jan 2026 09:43:42 -0800 Erni Sri Satya Vennela wrote:
> On Wed, Jan 21, 2026 at 08:14:12PM -0800, Jakub Kicinski wrote:
> > On Tue, 20 Jan 2026 22:56:55 -0800 Erni Sri Satya Vennela wrote:
> > > Enhance MANA driver logging to provide better visibility into
> > > hardware configuration and error states during driver initialization
> > > and runtime operations.
> >
> > > + dev_info(gc->dev, "Max Resources: msix_usable=%u max_queues=%u\n",
> > > + gc->num_msix_usable, gc->max_num_queues);
> >
> > > + dev_info(dev, "Device Config: max_vports=%u adapter_mtu=%u bm_hostmode=%u\n",
> > > + *max_num_vports, gc->adapter_mtu, *bm_hostmode);
> >
> > IIUC in networking we try to follow the mantra that if the system is
> > functioning correctly there should be no logs. You can expose the debug
> > info via ethtool, devlink, debugfs etc. Take your pick.
>
> We discussed this internally and noted that customers often cannot
> reliably reproduce the VM issue. In such cases, the only evidence
> available is the dmesg logs captured during the incident. Asking them to
> re-enable debug options later is not practical, since the problem may
> not occur again. Similarly, exposing the information via ethtool,
> devlink, or debugfs is less effective because the data is transient and
> lost after a reboot. As these messages are printed only once during
> initialization, and not repeated during runtime or driver load/unload,
> we decided to keep them at info level to aid troubleshooting without
> adding noise.
You will have to build proper support tooling like every single vendor
before you. Presumably you can also log from the hypervisor side which
makes your life so much easier than supporting real HW. Yet, real
NIC don't spew random trash to the logs all the time. SMH. Respectfully,
next time y'all "discuss things internally" start with the question of
what makes your case special :|
^ permalink raw reply
* [PATCH 4/4] mshv: Handle insufficient root memory hypervisor statuses
From: Stanislav Kinsburskii @ 2026-01-23 1:35 UTC (permalink / raw)
To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <176913164914.89165.5792608454600292463.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>
When creating guest partition objects, the hypervisor may fail to
allocate root partition pages and return an insufficient memory status.
In this case, deposit memory using the root partition ID instead.
Note: This error should never occur in a guest of L1VH partition context.
Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
drivers/hv/hv_common.c | 2 +
drivers/hv/hv_proc.c | 14 ++++++++++
include/hyperv/hvgdk_mini.h | 58 ++++++++++++++++++++++---------------------
3 files changed, 46 insertions(+), 28 deletions(-)
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index c7f63c9de503..cab0d1733607 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -792,6 +792,8 @@ static const struct hv_status_info hv_status_infos[] = {
_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY, -ENOMEM),
+ _STATUS_INFO(HV_STATUS_INSUFFICIENT_ROOT_MEMORY, -ENOMEM),
+ _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL),
_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL),
_STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO),
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index ac21e16f9348..89870c1b0087 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -122,6 +122,18 @@ int hv_deposit_memory_node(int node, u64 partition_id,
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
break;
+
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
+ num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
+ fallthrough;
+ case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
+ if (!hv_root_partition()) {
+ hv_status_err(hv_status, "Unexpected root memory deposit\n");
+ return -ENOMEM;
+ }
+ partition_id = HV_PARTITION_ID_SELF;
+ break;
+
default:
hv_status_err(hv_status, "Unexpected!\n");
return -ENOMEM;
@@ -135,6 +147,8 @@ bool hv_result_oom(u64 status)
switch (hv_result(status)) {
case HV_STATUS_INSUFFICIENT_MEMORY:
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
+ case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
return true;
}
return false;
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 70f22ef44948..5b74a857ef43 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -14,34 +14,36 @@ struct hv_u128 {
} __packed;
/* NOTE: when adding below, update hv_result_to_string() */
-#define HV_STATUS_SUCCESS 0x0
-#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
-#define HV_STATUS_INVALID_ALIGNMENT 0x4
-#define HV_STATUS_INVALID_PARAMETER 0x5
-#define HV_STATUS_ACCESS_DENIED 0x6
-#define HV_STATUS_INVALID_PARTITION_STATE 0x7
-#define HV_STATUS_OPERATION_DENIED 0x8
-#define HV_STATUS_UNKNOWN_PROPERTY 0x9
-#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA
-#define HV_STATUS_INSUFFICIENT_MEMORY 0xB
-#define HV_STATUS_INVALID_PARTITION_ID 0xD
-#define HV_STATUS_INVALID_VP_INDEX 0xE
-#define HV_STATUS_NOT_FOUND 0x10
-#define HV_STATUS_INVALID_PORT_ID 0x11
-#define HV_STATUS_INVALID_CONNECTION_ID 0x12
-#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13
-#define HV_STATUS_NOT_ACKNOWLEDGED 0x14
-#define HV_STATUS_INVALID_VP_STATE 0x15
-#define HV_STATUS_NO_RESOURCES 0x1D
-#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20
-#define HV_STATUS_INVALID_LP_INDEX 0x41
-#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
-#define HV_STATUS_OPERATION_FAILED 0x71
-#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75
-#define HV_STATUS_TIME_OUT 0x78
-#define HV_STATUS_CALL_PENDING 0x79
-#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
+#define HV_STATUS_SUCCESS 0x0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
+#define HV_STATUS_INVALID_ALIGNMENT 0x4
+#define HV_STATUS_INVALID_PARAMETER 0x5
+#define HV_STATUS_ACCESS_DENIED 0x6
+#define HV_STATUS_INVALID_PARTITION_STATE 0x7
+#define HV_STATUS_OPERATION_DENIED 0x8
+#define HV_STATUS_UNKNOWN_PROPERTY 0x9
+#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA
+#define HV_STATUS_INSUFFICIENT_MEMORY 0xB
+#define HV_STATUS_INVALID_PARTITION_ID 0xD
+#define HV_STATUS_INVALID_VP_INDEX 0xE
+#define HV_STATUS_NOT_FOUND 0x10
+#define HV_STATUS_INVALID_PORT_ID 0x11
+#define HV_STATUS_INVALID_CONNECTION_ID 0x12
+#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13
+#define HV_STATUS_NOT_ACKNOWLEDGED 0x14
+#define HV_STATUS_INVALID_VP_STATE 0x15
+#define HV_STATUS_NO_RESOURCES 0x1D
+#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20
+#define HV_STATUS_INVALID_LP_INDEX 0x41
+#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
+#define HV_STATUS_OPERATION_FAILED 0x71
+#define HV_STATUS_INSUFFICIENT_ROOT_MEMORY 0x73
+#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75
+#define HV_STATUS_TIME_OUT 0x78
+#define HV_STATUS_CALL_PENDING 0x79
+#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY 0x83
+#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
/*
* The Hyper-V TimeRefCount register and the TSC
^ permalink raw reply related
* [PATCH 3/4] mshv: Handle insufficient contiguous memory hypervisor status
From: Stanislav Kinsburskii @ 2026-01-23 1:35 UTC (permalink / raw)
To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <176913164914.89165.5792608454600292463.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>
The HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY status indicates that the
hypervisor lacks sufficient contiguous memory for its internal allocations.
When this status is encountered, allocate and deposit
HV_MAX_CONTIGUOUS_ALLOCATION_PAGES contiguous pages to the hypervisor.
HV_MAX_CONTIGUOUS_ALLOCATION_PAGES is defined in the hypervisor headers, a
deposit of this size will always satisfy the hypervisor's requirements.
Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
drivers/hv/hv_common.c | 1 +
drivers/hv/hv_proc.c | 4 ++++
include/hyperv/hvgdk_mini.h | 1 +
include/hyperv/hvhdk_mini.h | 2 ++
4 files changed, 8 insertions(+)
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 0a3ab7efed46..c7f63c9de503 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -791,6 +791,7 @@ static const struct hv_status_info hv_status_infos[] = {
_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO),
_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM),
+ _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL),
_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL),
_STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO),
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index c0c2bfc80d77..ac21e16f9348 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -119,6 +119,9 @@ int hv_deposit_memory_node(int node, u64 partition_id,
case HV_STATUS_INSUFFICIENT_MEMORY:
num_pages = 1;
break;
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
+ num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
+ break;
default:
hv_status_err(hv_status, "Unexpected!\n");
return -ENOMEM;
@@ -131,6 +134,7 @@ bool hv_result_oom(u64 status)
{
switch (hv_result(status)) {
case HV_STATUS_INSUFFICIENT_MEMORY:
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
return true;
}
return false;
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 04b18d0e37af..70f22ef44948 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -38,6 +38,7 @@ struct hv_u128 {
#define HV_STATUS_INVALID_LP_INDEX 0x41
#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
#define HV_STATUS_OPERATION_FAILED 0x71
+#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75
#define HV_STATUS_TIME_OUT 0x78
#define HV_STATUS_CALL_PENDING 0x79
#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
index 0f7178fa88a8..c5cfe13fae57 100644
--- a/include/hyperv/hvhdk_mini.h
+++ b/include/hyperv/hvhdk_mini.h
@@ -7,6 +7,8 @@
#include "hvgdk_mini.h"
+#define HV_MAX_CONTIGUOUS_ALLOCATION_PAGES 8
+
/*
* Doorbell connection_info flags.
*/
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox