Linux-HyperV List
 help / color / mirror / Atom feed
* Re: [PATCH 08/11] Drivers: hv: mshv_vtl: Move register page config to arch-specific files
From: Naman Jain @ 2026-04-20 15:23 UTC (permalink / raw)
  To: Michael Kelley, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <SN6PR02MB4157CF364DA2C0CC657A6DCBD450A@SN6PR02MB4157.namprd02.prod.outlook.com>



On 4/1/2026 10:28 PM, Michael Kelley wrote:
> From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
>>
>> Move mshv_vtl_configure_reg_page() implementation from
>> drivers/hv/mshv_vtl_main.c to arch-specific files:
>> - arch/x86/hyperv/hv_vtl.c: full implementation with register page setup
>> - arch/arm64/hyperv/hv_vtl.c: stub implementation (unsupported)
>>
>> Move common type definitions to include/asm-generic/mshyperv.h:
>> - struct mshv_vtl_per_cpu
>> - union hv_synic_overlay_page_msr
>>
>> Move hv_call_get_vp_registers() and hv_call_set_vp_registers()
>> declarations to include/asm-generic/mshyperv.h since these functions
>> are used by multiple modules.
>>
>> While at it, remove the unnecessary stub implementations in #else
>> case for mshv_vtl_return* functions in arch/x86/include/asm/mshyperv.h.
> 
> Seems like this patch is doing multiple things. The reg page configuration
> changes are more substantial and should probably be in a patch by
> themselves. The other changes are more trivial and maybe are OK
> grouped into a single patch, but you could also consider breaking them
> out.

I will split this patch into 3 patches.

> 
>>
>> This is essential for adding support for ARM64 in MSHV_VTL.
>>
>> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
>> ---
>>   arch/arm64/hyperv/hv_vtl.c        |  8 +++++
>>   arch/arm64/include/asm/mshyperv.h |  3 ++
>>   arch/x86/hyperv/hv_vtl.c          | 32 ++++++++++++++++++++
>>   arch/x86/include/asm/mshyperv.h   |  7 ++---
>>   drivers/hv/mshv.h                 |  8 -----
>>   drivers/hv/mshv_vtl_main.c        | 49 +++----------------------------
>>   include/asm-generic/mshyperv.h    | 42 ++++++++++++++++++++++++++
>>   7 files changed, 92 insertions(+), 57 deletions(-)
>>
>> diff --git a/arch/arm64/hyperv/hv_vtl.c b/arch/arm64/hyperv/hv_vtl.c
>> index 66318672c242..d699138427c1 100644
>> --- a/arch/arm64/hyperv/hv_vtl.c
>> +++ b/arch/arm64/hyperv/hv_vtl.c
>> @@ -10,6 +10,7 @@
>>   #include <asm/boot.h>
>>   #include <asm/mshyperv.h>
>>   #include <asm/cpu_ops.h>
>> +#include <linux/export.h>
>>
>>   void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
>>   {
>> @@ -142,3 +143,10 @@ void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
>>   		"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
>>   }
>>   EXPORT_SYMBOL(mshv_vtl_return_call);
>> +
>> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
>> +{
>> +	pr_debug("Register page not supported on ARM64\n");
>> +	return false;
>> +}
>> +EXPORT_SYMBOL_GPL(hv_vtl_configure_reg_page);
>> diff --git a/arch/arm64/include/asm/mshyperv.h
>> b/arch/arm64/include/asm/mshyperv.h
>> index de7f3a41a8ea..36803f0386cc 100644
>> --- a/arch/arm64/include/asm/mshyperv.h
>> +++ b/arch/arm64/include/asm/mshyperv.h
>> @@ -61,6 +61,8 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg)
>>   				ARM_SMCCC_OWNER_VENDOR_HYP,	\
>>   				HV_SMCCC_FUNC_NUMBER)
>>
>> +struct mshv_vtl_per_cpu;
>> +
>>   struct mshv_vtl_cpu_context {
>>   /*
>>    * NOTE: x18 is managed by the hypervisor. It won't be reloaded from this array.
>> @@ -82,6 +84,7 @@ static inline int hv_vtl_get_set_reg(struct hv_register_assoc *regs,
>> bool set, u
>>   }
>>
>>   void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
>> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu);
> 
> I think this declaration could be added in asm-generic/mshyperv.h so that it
> is shared by x86 and arm64. That also obviates the need for the forward
> ref to struct mshv_vtl_per_cpu that you've added here.

Acked.

> 
>>   #endif
>>
>>   #include <asm-generic/mshyperv.h>
>> diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
>> index 72a0bb4ae0c7..ede290985d41 100644
>> --- a/arch/x86/hyperv/hv_vtl.c
>> +++ b/arch/x86/hyperv/hv_vtl.c
>> @@ -20,6 +20,7 @@
>>   #include <uapi/asm/mtrr.h>
>>   #include <asm/debugreg.h>
>>   #include <linux/export.h>
>> +#include <linux/hyperv.h>
>>   #include <../kernel/smpboot.h>
>>   #include "../../kernel/fpu/legacy.h"
>>
>> @@ -259,6 +260,37 @@ int __init hv_vtl_early_init(void)
>>   	return 0;
>>   }
>>
>> +static const union hv_input_vtl input_vtl_zero;
>> +
>> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
>> +{
>> +	struct hv_register_assoc reg_assoc = {};
>> +	union hv_synic_overlay_page_msr overlay = {};
>> +	struct page *reg_page;
>> +
>> +	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
>> +	if (!reg_page) {
>> +		WARN(1, "failed to allocate register page\n");
>> +		return false;
>> +	}
>> +
>> +	overlay.enabled = 1;
>> +	overlay.pfn = page_to_hvpfn(reg_page);
>> +	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
>> +	reg_assoc.value.reg64 = overlay.as_uint64;
>> +
>> +	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
>> +				     1, input_vtl_zero, &reg_assoc)) {
>> +		WARN(1, "failed to setup register page\n");
>> +		__free_page(reg_page);
>> +		return false;
>> +	}
>> +
>> +	per_cpu->reg_page = reg_page;
>> +	return true;
> 
> As Sashiko AI noted, the memory allocated for the reg_page never gets freed.

These are present in existing code, I'll address them in a separate series.

> 
>> +}
>> +EXPORT_SYMBOL_GPL(hv_vtl_configure_reg_page);
>> +
>>   DEFINE_STATIC_CALL_NULL(__mshv_vtl_return_hypercall, void (*)(void));
>>
>>   void mshv_vtl_return_call_init(u64 vtl_return_offset)
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index d5355a5b7517..d592fea49cdb 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -271,6 +271,8 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg) {
>> return 0; }
>>   static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; }
>>   #endif /* CONFIG_HYPERV */
>>
>> +struct mshv_vtl_per_cpu;
>> +
>>   struct mshv_vtl_cpu_context {
>>   	union {
>>   		struct {
>> @@ -305,13 +307,10 @@ void mshv_vtl_return_call_init(u64 vtl_return_offset);
>>   void mshv_vtl_return_hypercall(void);
>>   void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
>>   int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u64 shared);
>> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu);
> 
> Same as for arm64. Add a shared declaration in asm-generic/mshyperv.h.

Ditto.

> 
>>   #else
>>   static inline void __init hv_vtl_init_platform(void) {}
>>   static inline int __init hv_vtl_early_init(void) { return 0; }
>> -static inline void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
>> -static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {}
>> -static inline void mshv_vtl_return_hypercall(void) {}
>> -static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
>>   #endif
>>
>>   #include <asm-generic/mshyperv.h>
>> diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
>> index d4813df92b9c..0fcb7f9ba6a9 100644
>> --- a/drivers/hv/mshv.h
>> +++ b/drivers/hv/mshv.h
>> @@ -14,14 +14,6 @@
>>   	memchr_inv(&((STRUCT).MEMBER), \
>>   		   0, sizeof_field(typeof(STRUCT), MEMBER))
>>
>> -int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
>> -			     union hv_input_vtl input_vtl,
>> -			     struct hv_register_assoc *registers);
>> -
>> -int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
>> -			     union hv_input_vtl input_vtl,
>> -			     struct hv_register_assoc *registers);
>> -
>>   int hv_call_get_partition_property(u64 partition_id, u64 property_code,
>>   				   u64 *property_value);
>>
>> diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
>> index 91517b45d526..c79d24317b8e 100644
>> --- a/drivers/hv/mshv_vtl_main.c
>> +++ b/drivers/hv/mshv_vtl_main.c
>> @@ -78,21 +78,6 @@ struct mshv_vtl {
>>   	u64 id;
>>   };
>>
>> -struct mshv_vtl_per_cpu {
>> -	struct mshv_vtl_run *run;
>> -	struct page *reg_page;
>> -};
>> -
>> -/* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
>> -union hv_synic_overlay_page_msr {
>> -	u64 as_uint64;
>> -	struct {
>> -		u64 enabled: 1;
>> -		u64 reserved: 11;
>> -		u64 pfn: 52;
>> -	} __packed;
>> -};
>> -
>>   static struct mutex mshv_vtl_poll_file_lock;
>>   static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
>>   static union hv_register_vsm_capabilities mshv_vsm_capabilities;
>> @@ -201,34 +186,6 @@ static struct page *mshv_vtl_cpu_reg_page(int cpu)
>>   	return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
>>   }
>>
>> -static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
>> -{
>> -	struct hv_register_assoc reg_assoc = {};
>> -	union hv_synic_overlay_page_msr overlay = {};
>> -	struct page *reg_page;
>> -
>> -	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
>> -	if (!reg_page) {
>> -		WARN(1, "failed to allocate register page\n");
>> -		return;
>> -	}
>> -
>> -	overlay.enabled = 1;
>> -	overlay.pfn = page_to_hvpfn(reg_page);
>> -	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
>> -	reg_assoc.value.reg64 = overlay.as_uint64;
>> -
>> -	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
>> -				     1, input_vtl_zero, &reg_assoc)) {
>> -		WARN(1, "failed to setup register page\n");
>> -		__free_page(reg_page);
>> -		return;
>> -	}
>> -
>> -	per_cpu->reg_page = reg_page;
>> -	mshv_has_reg_page = true;
>> -}
>> -
>>   static void mshv_vtl_synic_enable_regs(unsigned int cpu)
>>   {
>>   	union hv_synic_sint sint;
>> @@ -329,8 +286,10 @@ static int mshv_vtl_alloc_context(unsigned int cpu)
>>   	if (!per_cpu->run)
>>   		return -ENOMEM;
>>
>> -	if (mshv_vsm_capabilities.intercept_page_available)
>> -		mshv_vtl_configure_reg_page(per_cpu);
>> +	if (mshv_vsm_capabilities.intercept_page_available) {
>> +		if (hv_vtl_configure_reg_page(per_cpu))
>> +			mshv_has_reg_page = true;
> 
> As Sashiko AI noted, it doesn't work to use the global mshv_has_reg_page
> to indicate the success of configuring the reg page, which is a per-cpu
> operation. But this bug existed before this patch set, so maybe it should
> be fixed as a preliminary patch.

Acked. Will address them in a separate series.

> 
>> +	}
>>
>>   	mshv_vtl_synic_enable_regs(cpu);
>>
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index b147a12085e4..b53fcc071596 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -383,8 +383,50 @@ static inline int hv_deposit_memory(u64 partition_id, u64 status)
>>   	return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
>>   }
>>
>> +#if IS_ENABLED(CONFIG_MSHV_ROOT) || IS_ENABLED(CONFIG_MSHV_VTL)
>> +int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
>> +			     union hv_input_vtl input_vtl,
>> +			     struct hv_register_assoc *registers);
>> +
>> +int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
>> +			     union hv_input_vtl input_vtl,
>> +			     struct hv_register_assoc *registers);
>> +#else
>> +static inline int hv_call_get_vp_registers(u32 vp_index, u64 partition_id,
>> +					   u16 count,
>> +					   union hv_input_vtl input_vtl,
>> +					   struct hv_register_assoc *registers)
>> +{
>> +	return -EOPNOTSUPP;
>> +}
>> +
>> +static inline int hv_call_set_vp_registers(u32 vp_index, u64 partition_id,
>> +					   u16 count,
>> +					   union hv_input_vtl input_vtl,
>> +					   struct hv_register_assoc *registers)
>> +{
>> +	return -EOPNOTSUPP;
>> +}
>> +#endif /* CONFIG_MSHV_ROOT || CONFIG_MSHV_VTL */
>> +
>>   #define HV_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
>> +
>>   #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
>> +struct mshv_vtl_per_cpu {
>> +	struct mshv_vtl_run *run;
>> +	struct page *reg_page;
>> +};
>> +
>> +/* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
>> +union hv_synic_overlay_page_msr {
>> +	u64 as_uint64;
>> +	struct {
>> +		u64 enabled: 1;
>> +		u64 reserved: 11;
>> +		u64 pfn: 52;
>> +	} __packed;
>> +};
>> +
>>   u8 __init get_vtl(void);
>>   #else
>>   static inline u8 get_vtl(void) { return 0; }
>> --
>> 2.43.0
>>
> 
> Sashiko AI noted another existing bug in mshv_vtl_init(), which is that
> the error path does kfree(mem_dev) when it should do
> put_device(mem_dev).  See the comment in the header of
> device_initialize().


To avoid this series bloating up, I am thinking of taking up these fixes 
in a separate series.

Regards,
Naman

^ permalink raw reply

* Re: [PATCH 02/11] Drivers: hv: Move hv_vp_assist_page to common files
From: Naman Jain @ 2026-04-20 15:22 UTC (permalink / raw)
  To: Michael Kelley, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <SN6PR02MB415790977DA40BAD0822DA54D450A@SN6PR02MB4157.namprd02.prod.outlook.com>



On 4/1/2026 10:25 PM, Michael Kelley wrote:
> From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
>>
>> Move the logic to initialize and export hv_vp_assist_page from x86
>> architecture code to Hyper-V common code to allow it to be used for
>> upcoming arm64 support in MSHV_VTL driver.
>> Note: This change also improves error handling - if VP assist page
>> allocation fails, hyperv_init() now returns early instead of
>> continuing with partial initialization.
>>
>> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
>> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
>> ---
>>   arch/x86/hyperv/hv_init.c      | 88 +---------------------------------
>>   drivers/hv/hv_common.c         | 88 ++++++++++++++++++++++++++++++++++
>>   include/asm-generic/mshyperv.h |  4 ++
>>   include/hyperv/hvgdk_mini.h    |  2 +
>>   4 files changed, 95 insertions(+), 87 deletions(-)
>>
>> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
>> index 323adc93f2dc..75a98b5e451b 100644
>> --- a/arch/x86/hyperv/hv_init.c
>> +++ b/arch/x86/hyperv/hv_init.c
>> @@ -81,9 +81,6 @@ union hv_ghcb * __percpu *hv_ghcb_pg;
>>   /* Storage to save the hypercall page temporarily for hibernation */
>>   static void *hv_hypercall_pg_saved;
>>
>> -struct hv_vp_assist_page **hv_vp_assist_page;
>> -EXPORT_SYMBOL_GPL(hv_vp_assist_page);
>> -
>>   static int hyperv_init_ghcb(void)
>>   {
>>   	u64 ghcb_gpa;
>> @@ -117,59 +114,12 @@ static int hyperv_init_ghcb(void)
>>
>>   static int hv_cpu_init(unsigned int cpu)
>>   {
>> -	union hv_vp_assist_msr_contents msr = { 0 };
>> -	struct hv_vp_assist_page **hvp;
>>   	int ret;
>>
>>   	ret = hv_common_cpu_init(cpu);
>>   	if (ret)
>>   		return ret;
>>
>> -	if (!hv_vp_assist_page)
>> -		return 0;
>> -
>> -	hvp = &hv_vp_assist_page[cpu];
>> -	if (hv_root_partition()) {
>> -		/*
>> -		 * For root partition we get the hypervisor provided VP assist
>> -		 * page, instead of allocating a new page.
>> -		 */
>> -		rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
>> -		*hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
>> -				PAGE_SIZE, MEMREMAP_WB);
>> -	} else {
>> -		/*
>> -		 * The VP assist page is an "overlay" page (see Hyper-V TLFS's
>> -		 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
>> -		 * out to make sure we always write the EOI MSR in
>> -		 * hv_apic_eoi_write() *after* the EOI optimization is disabled
>> -		 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
>> -		 * case of CPU offlining and the VM will hang.
>> -		 */
>> -		if (!*hvp) {
>> -			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
>> -
>> -			/*
>> -			 * Hyper-V should never specify a VM that is a Confidential
>> -			 * VM and also running in the root partition. Root partition
>> -			 * is blocked to run in Confidential VM. So only decrypt assist
>> -			 * page in non-root partition here.
>> -			 */
>> -			if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
>> -				WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
>> -				memset(*hvp, 0, PAGE_SIZE);
>> -			}
>> -		}
>> -
>> -		if (*hvp)
>> -			msr.pfn = vmalloc_to_pfn(*hvp);
>> -
>> -	}
>> -	if (!WARN_ON(!(*hvp))) {
>> -		msr.enable = 1;
>> -		wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
>> -	}
>> -
>>   	/* Allow Hyper-V stimer vector to be injected from Hypervisor. */
>>   	if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
>>   		apic_update_vector(cpu, HYPERV_STIMER0_VECTOR, true);
>> @@ -286,23 +236,6 @@ static int hv_cpu_die(unsigned int cpu)
>>
>>   	hv_common_cpu_die(cpu);
>>
>> -	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
>> -		union hv_vp_assist_msr_contents msr = { 0 };
>> -		if (hv_root_partition()) {
>> -			/*
>> -			 * For root partition the VP assist page is mapped to
>> -			 * hypervisor provided page, and thus we unmap the
>> -			 * page here and nullify it, so that in future we have
>> -			 * correct page address mapped in hv_cpu_init.
>> -			 */
>> -			memunmap(hv_vp_assist_page[cpu]);
>> -			hv_vp_assist_page[cpu] = NULL;
>> -			rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
>> -			msr.enable = 0;
>> -		}
>> -		wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
>> -	}
>> -
>>   	if (hv_reenlightenment_cb == NULL)
>>   		return 0;
>>
>> @@ -460,21 +393,6 @@ void __init hyperv_init(void)
>>   	if (hv_common_init())
>>   		return;
>>
>> -	/*
>> -	 * The VP assist page is useless to a TDX guest: the only use we
>> -	 * would have for it is lazy EOI, which can not be used with TDX.
>> -	 */
>> -	if (hv_isolation_type_tdx())
>> -		hv_vp_assist_page = NULL;
>> -	else
>> -		hv_vp_assist_page = kzalloc_objs(*hv_vp_assist_page, nr_cpu_ids);
>> -	if (!hv_vp_assist_page) {
>> -		ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
>> -
>> -		if (!hv_isolation_type_tdx())
>> -			goto common_free;
>> -	}
>> -
>>   	if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
>>   		/* Negotiate GHCB Version. */
>>   		if (!hv_ghcb_negotiate_protocol())
>> @@ -483,7 +401,7 @@ void __init hyperv_init(void)
>>
>>   		hv_ghcb_pg = alloc_percpu(union hv_ghcb *);
>>   		if (!hv_ghcb_pg)
>> -			goto free_vp_assist_page;
>> +			goto free_ghcb_page;
>>   	}
>>
>>   	cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
>> @@ -613,10 +531,6 @@ void __init hyperv_init(void)
>>   	cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE);
>>   free_ghcb_page:
>>   	free_percpu(hv_ghcb_pg);
>> -free_vp_assist_page:
>> -	kfree(hv_vp_assist_page);
>> -	hv_vp_assist_page = NULL;
>> -common_free:
>>   	hv_common_free();
>>   }
>>
>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 6b67ac616789..d1ebc0ebd08f 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -28,7 +28,9 @@
>>   #include <linux/slab.h>
>>   #include <linux/dma-map-ops.h>
>>   #include <linux/set_memory.h>
>> +#include <linux/vmalloc.h>
>>   #include <hyperv/hvhdk.h>
>> +#include <hyperv/hvgdk.h>
>>   #include <asm/mshyperv.h>
> 
> Need to add
> 
> #include <linux/io.h>
> 
> because of the memremap() and related calls that have been added.
> io.h is probably being #include'd indirectly, but it is better to #include
> it directly.
> 

Acked.

>>
>>   u64 hv_current_partition_id = HV_PARTITION_ID_SELF;
>> @@ -78,6 +80,8 @@ static struct ctl_table_header *hv_ctl_table_hdr;
>>   u8 * __percpu *hv_synic_eventring_tail;
>>   EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
>>
>> +struct hv_vp_assist_page **hv_vp_assist_page;
>> +EXPORT_SYMBOL_GPL(hv_vp_assist_page);
>>   /*
>>    * Hyper-V specific initialization and shutdown code that is
>>    * common across all architectures.  Called from architecture
>> @@ -92,6 +96,9 @@ void __init hv_common_free(void)
>>   	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
>>   		hv_kmsg_dump_unregister();
>>
>> +	kfree(hv_vp_assist_page);
>> +	hv_vp_assist_page = NULL;
>> +
>>   	kfree(hv_vp_index);
>>   	hv_vp_index = NULL;
>>
>> @@ -394,6 +401,23 @@ int __init hv_common_init(void)
>>   	for (i = 0; i < nr_cpu_ids; i++)
>>   		hv_vp_index[i] = VP_INVAL;
>>
>> +	/*
>> +	 * The VP assist page is useless to a TDX guest: the only use we
>> +	 * would have for it is lazy EOI, which can not be used with TDX.
>> +	 */
>> +	if (hv_isolation_type_tdx()) {
>> +		hv_vp_assist_page = NULL;
>> +	} else {
>> +		hv_vp_assist_page = kzalloc_objs(*hv_vp_assist_page, nr_cpu_ids);
>> +		if (!hv_vp_assist_page) {
>> +#ifdef CONFIG_X86_64
>> +			ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
>> +#endif
>> +			hv_common_free();
>> +			return -ENOMEM;
> 
> Given that "failure to allocate memory" now returns an error that is
> essentially fatal to hyperv_init(), is it still necessary to clear the flag in
> ms_hyperv.hints?  I'd love to see that #ifdef go away. It's the only
> #ifdef in hv_common.c, and I had worked hard in the past to avoid
> such #ifdef's. :-)

Yes, this particular block can be removed, and I will remove it in v2.
The other thing pointed out in Sashiko's AI review was having this 
if-def block in tdx case after setting hv_vp_assist_page to NULL. This 
is to maintain parity with existing code. That's the reason, I will need 
to add it back there.

> 
>> +		}
>> +	}
>> +
>>   	return 0;
>>   }
>>
>> @@ -471,6 +495,8 @@ void __init ms_hyperv_late_init(void)
>>
>>   int hv_common_cpu_init(unsigned int cpu)
>>   {
>> +	union hv_vp_assist_msr_contents msr = { 0 };
>> +	struct hv_vp_assist_page **hvp;
>>   	void **inputarg, **outputarg;
>>   	u8 **synic_eventring_tail;
>>   	u64 msr_vp_index;
>> @@ -542,6 +568,50 @@ int hv_common_cpu_init(unsigned int cpu)
>>   			ret = -ENOMEM;
> 
> The Sashiko AI comment here about a bug when ret is set to -ENOMEM
> seems valid to me.
> 

I'm planning to simply "return -ENOMEM" here.

>>   	}
>>
>> +	if (!hv_vp_assist_page)
>> +		return ret;
>> +
>> +	hvp = &hv_vp_assist_page[cpu];
>> +	if (hv_root_partition()) {
>> +		/*
>> +		 * For root partition we get the hypervisor provided VP assist
>> +		 * page, instead of allocating a new page.
>> +		 */
>> +		msr.as_uint64 = hv_get_msr(HV_SYN_REG_VP_ASSIST_PAGE);
>> +		*hvp = memremap(msr.pfn << HV_VP_ASSIST_PAGE_ADDRESS_SHIFT,
>> +				PAGE_SIZE, MEMREMAP_WB);
> 
> The Sashiko AI comment about potentially memremap'ing 64K instead of 4K can
> be ignored. We know that the root partition can only run with a 4K page size,
> and that is enforced in drivers/hv/Kconfig.
>

I am thinking of adding this config dependency (PAGE_SIZE_4KB) in the 
Kconfig patch in this series, to MSHV_VTL as well. We are also using 
only 4KB as page size. This should prevent all of PAGE_SIZE Sachiko 
issues. I am also replacing PAGE_SIZE with HV_HYP_PAGE_SIZE in all 
places. Hope that is fine?

> HV_VP_ASSIST_PAGE_ADDRESS_SHIFT is defined in asm-generic/mshyperv.h.
> But there is also HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT in hvgdk_mini.h.
> Is there a clean way to eliminate the duplication?

Although both these architectures are using same value - 12, I was 
hesitant to use x64 register for ARM64. I will move arch based 
definition of HV_VP_ASSIST_PAGE_ADDRESS_SHIFT to hvgdk_mini.h and remove 
it from asm-generic/mshyperv.h.

> 
>> +	} else {
>> +		/*
>> +		 * The VP assist page is an "overlay" page (see Hyper-V TLFS's
>> +		 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
>> +		 * out to make sure we always write the EOI MSR in
>> +		 * hv_apic_eoi_write() *after* the EOI optimization is disabled
>> +		 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
>> +		 * case of CPU offlining and the VM will hang.
>> +		 */
> 
> Somewhere in the comment above, I'd suggest adding a short "on x86/x64"
> qualifier, as the comment doesn't apply on arm64 since it doesn't support
> the AutoEOI optimization.  Maybe "Here it must be zeroed out to make sure
> that on x86/x64 we always write the EOI MSR in ....".

Acked. I will add it.

> 
>> +		if (!*hvp) {
>> +			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
> 
> The Sashiko AI comment about using "flags" instead of GFP_KERNEL seems valid.

Acked.

> 
>> +
>> +			/*
>> +			 * Hyper-V should never specify a VM that is a Confidential
>> +			 * VM and also running in the root partition. Root partition
>> +			 * is blocked to run in Confidential VM. So only decrypt assist
>> +			 * page in non-root partition here.
>> +			 */
>> +			if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
>> +				WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
>> +				memset(*hvp, 0, PAGE_SIZE);
>> +			}
>> +		}
>> +
>> +		if (*hvp)
>> +			msr.pfn = vmalloc_to_pfn(*hvp);
> 
> The Sashiko AI comment about page size here seems valid. But what are the rules
> about arm64 page sizes that are supported for VTL2, and how does they relate
> to VTL0 allowing 4K, 16K, and 64K page size? What combinations are allowed?
> For example, can a VTL2 built with 4K page size run with a VTL0 built with
> 64K page size? It would be nice to have the rules recorded somewhere in a
> code comment, but I'm not sure of the best place.
> 

VTL2 uses 4k page size only. This can be enforced with a Kconfig change 
in next version. As and when other page size support is added in ARM64 
for MSHV_VTL, this change can be removed.

Regarding support of VTL0 kernel page sizes, page size in VTL2 is of no 
impact to it.


> But regardless of the rules, I'd suggest future-proofing by using
> "page_to_hvpfn(vmalloc_to_page(*hvp))" so that the PFN generated is always
> in terms of 4K page size as the Hyper-V host expects.

Acked. Will try this and make the changes.

> 
>> +	}
>> +	if (!WARN_ON(!(*hvp))) {
>> +		msr.enable = 1;
>> +		hv_set_msr(HV_SYN_REG_VP_ASSIST_PAGE, msr.as_uint64);
>> +	}
>> +
>>   	return ret;
>>   }
>>
>> @@ -566,6 +636,24 @@ int hv_common_cpu_die(unsigned int cpu)
>>   		*synic_eventring_tail = NULL;
>>   	}
>>
>> +	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
>> +		union hv_vp_assist_msr_contents msr = { 0 };
>> +
>> +		if (hv_root_partition()) {
>> +			/*
>> +			 * For root partition the VP assist page is mapped to
>> +			 * hypervisor provided page, and thus we unmap the
>> +			 * page here and nullify it, so that in future we have
>> +			 * correct page address mapped in hv_cpu_init.
>> +			 */
>> +			memunmap(hv_vp_assist_page[cpu]);
>> +			hv_vp_assist_page[cpu] = NULL;
>> +			msr.as_uint64 = hv_get_msr(HV_SYN_REG_VP_ASSIST_PAGE);
>> +			msr.enable = 0;
>> +		}
>> +		hv_set_msr(HV_SYN_REG_VP_ASSIST_PAGE, msr.as_uint64);
>> +	}
>> +
>>   	return 0;
>>   }
>>
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index d37b68238c97..108f135d4fd9 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -25,6 +25,7 @@
>>   #include <linux/nmi.h>
>>   #include <asm/ptrace.h>
>>   #include <hyperv/hvhdk.h>
>> +#include <hyperv/hvgdk.h>
>>
>>   #define VTPM_BASE_ADDRESS 0xfed40000
>>
>> @@ -299,6 +300,8 @@ do { \
>>   #define hv_status_debug(status, fmt, ...) \
>>   	hv_status_printk(debug, status, fmt, ##__VA_ARGS__)
>>
>> +extern struct hv_vp_assist_page **hv_vp_assist_page;
> 
> This "extern" statement is added here so it is visible to both x86/x64 and arm64.
> And that's correct.
> 
> But there is still some VP assist page stuff that has been left in the arch/x86
> version of mshyperv.h.  That other stuff, including the inline function
> hv_get_vp_assist_page(), should also be moved to asm-generic/mshyperv.h.
> Given that the VP assist page support is now fully generic and not x86/x64
> specific, it shouldn't occur anywhere in the arch/x86 version of mshyperv.h.

Will move the remaining code.

> 
>> +
>>   const char *hv_result_to_string(u64 hv_status);
>>   int hv_result_to_errno(u64 status);
>>   void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>> @@ -377,6 +380,7 @@ static inline int hv_deposit_memory(u64 partition_id, u64 status)
>>   	return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
>>   }
>>
>> +#define HV_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
>>   #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
>>   u8 __init get_vtl(void);
>>   #else
>> diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
>> index 056ef7b6b360..be697ddb211a 100644
>> --- a/include/hyperv/hvgdk_mini.h
>> +++ b/include/hyperv/hvgdk_mini.h
>> @@ -149,6 +149,7 @@ struct hv_u128 {
>>   #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
>>   #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
>>   		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>> +#define HV_SYN_REG_VP_ASSIST_PAGE              (HV_X64_MSR_VP_ASSIST_PAGE)
>>
>>   /* Hyper-V Enlightened VMCS version mask in nested features CPUID */
>>   #define HV_X64_ENLIGHTENED_VMCS_VERSION		0xff
>> @@ -1185,6 +1186,7 @@ enum hv_register_name {
>>
>>   #define HV_MSR_STIMER0_CONFIG	(HV_REGISTER_STIMER0_CONFIG)
>>   #define HV_MSR_STIMER0_COUNT	(HV_REGISTER_STIMER0_COUNT)
>> +#define HV_SYN_REG_VP_ASSIST_PAGE    (HV_REGISTER_VP_ASSIST_PAGE)
> 
> This defines a new register name prefix "HV_SYN_REG_" that isn't used
> anywhere else. The prefixes for Hyper-V register names are already complex
> to account to x86/x64 and arm64 differences, and the fact the x86/x64 has
> synthetic MSRs, while arm64 does not. So introducing another prefix is
> undesirable. Couldn't this just be HV_MSR_VP_ASSIST_PAGE using the
> same structure as HV_MSR_STIMER0_COUNT (for example)?
>

Will rename it to HV_MSR_VP_ASSIST_PAGE in all places.

>>
>>   #endif /* CONFIG_ARM64 */
>>
>> --
>> 2.43.0
>>


Thank you so much for thoroughly reviwing this Michael.

Regards,
Naman

^ permalink raw reply

* [PATCH AUTOSEL 6.18] PCI: hv: Set default NUMA node to 0 for devices without affinity info
From: Sasha Levin @ 2026-04-20 13:21 UTC (permalink / raw)
  To: patches, stable
  Cc: Long Li, Michael Kelley, Wei Liu, Sasha Levin, kys, haiyangz,
	decui, lpieralisi, kwilczynski, mani, bhelgaas, mikelley,
	linux-hyperv, linux-pci, linux-kernel
In-Reply-To: <20260420132314.1023554-1-sashal@kernel.org>

From: Long Li <longli@microsoft.com>

[ Upstream commit 7b3b1e5a87b2f5e35c52b5386d7c327be869454f ]

When hv_pci_assign_numa_node() processes a device that does not have
HV_PCI_DEVICE_FLAG_NUMA_AFFINITY set or has an out-of-range
virtual_numa_node, the device NUMA node is left unset. On x86_64,
the uninitialized default happens to be 0, but on ARM64 it is
NUMA_NO_NODE (-1).

Tests show that when no NUMA information is available from the Hyper-V
host, devices perform best when assigned to node 0. With NUMA_NO_NODE
the kernel may spread work across NUMA nodes, which degrades
performance on Hyper-V, particularly for high-throughput devices like
MANA.

Always set the device NUMA node to 0 before the conditional NUMA
affinity check, so that devices get a performant default when the host
provides no NUMA information, and behavior is consistent on both
x86_64 and ARM64.

Fixes: 999dd956d838 ("PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2")
Signed-off-by: Long Li <longli@microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---

LLM Generated explanations, may be completely bogus:

Error: Failed to generate final synthesis

 drivers/pci/controller/pci-hyperv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 146b43981b278..28b1572974879 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2486,6 +2486,14 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
 		if (!hv_dev)
 			continue;
 
+		/*
+		 * If the Hyper-V host doesn't provide a NUMA node for the
+		 * device, default to node 0. With NUMA_NO_NODE the kernel
+		 * may spread work across NUMA nodes, which degrades
+		 * performance on Hyper-V.
+		 */
+		set_dev_node(&dev->dev, 0);
+
 		if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY &&
 		    hv_dev->desc.virtual_numa_node < num_possible_nodes())
 			/*
-- 
2.53.0


^ permalink raw reply related

* [PATCH AUTOSEL 7.0-6.18] net: mana: hardening: Validate adapter_mtu from MANA_QUERY_DEV_CONFIG
From: Sasha Levin @ 2026-04-20 13:17 UTC (permalink / raw)
  To: patches, stable
  Cc: Erni Sri Satya Vennela, Jakub Kicinski, Sasha Levin, kys,
	haiyangz, wei.liu, decui, longli, andrew+netdev, davem, edumazet,
	pabeni, linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260420132314.1023554-1-sashal@kernel.org>

From: Erni Sri Satya Vennela <ernis@linux.microsoft.com>

[ Upstream commit d7709812e13d06132ddae3d21540472ea5cb11c5 ]

As a part of MANA hardening for CVM, validate the adapter_mtu value
returned from the MANA_QUERY_DEV_CONFIG HWC command.

The adapter_mtu value is used to compute ndev->max_mtu via:
gc->adapter_mtu - ETH_HLEN. If hardware returns a bogus adapter_mtu
smaller than ETH_HLEN (e.g. 0), the unsigned subtraction wraps to a
huge value, silently allowing oversized MTU settings.

Add a validation check to reject adapter_mtu values below
ETH_MIN_MTU + ETH_HLEN, returning -EPROTO to fail the device
configuration early with a clear error message.

Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
Link: https://patch.msgid.link/20260326173101.2010514-1-ernis@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---

LLM Generated explanations, may be completely bogus:

This confirms the integer underflow. Now let me complete the analysis.

---

## PHASE 1: COMMIT MESSAGE FORENSICS

**Step 1.1: Subject Line**
- Subsystem: `net: mana:` (Microsoft Azure Network Adapter driver)
- Action: "hardening: Validate" - input validation / defensive check
- Summary: Validates `adapter_mtu` from hardware config query to prevent
  integer underflow

**Step 1.2: Tags**
- `Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>` -
  author, Microsoft employee, regular MANA contributor (9+ commits)
- `Link: https://patch.msgid.link/20260326173101.2010514-1-
  ernis@linux.microsoft.com` - single patch (not part of a series,
  1-of-1)
- `Signed-off-by: Jakub Kicinski <kuba@kernel.org>` - netdev maintainer
  accepted the patch
- No Fixes: tag (expected for candidates under review)
- No Reported-by tag
- No Cc: stable tag

**Step 1.3: Body Text**
- Bug: `adapter_mtu` value from hardware can be bogus (< ETH_HLEN = 14).
  The subtraction `gc->adapter_mtu - ETH_HLEN` used to compute
  `ndev->max_mtu` wraps to a huge value (~4GB), silently allowing
  oversized MTU settings.
- Context: Part of CVM (Confidential VM) hardening where the hypervisor
  is less trusted.
- Fix: Reject values below `ETH_MIN_MTU + ETH_HLEN` (82 bytes) with
  `-EPROTO`.

**Step 1.4: Hidden Bug Fix Detection**
- Though labeled "hardening," this IS a real bug fix: it prevents a
  concrete integer underflow that leads to incorrect max_mtu. The bug
  mechanism is clear and the consequences (allowing oversized MTU
  settings) are real.

## PHASE 2: DIFF ANALYSIS

**Step 2.1: Inventory**
- Files: `drivers/net/ethernet/microsoft/mana/mana_en.c` (+8/-2 net, ~6
  lines of logic)
- Function modified: `mana_query_device_cfg()`
- Scope: Single-file, single-function, surgical fix

**Step 2.2: Code Flow Change**
- Before: `resp.adapter_mtu` was accepted unconditionally when
  msg_version >= GDMA_MESSAGE_V2
- After: Validates `resp.adapter_mtu >= ETH_MIN_MTU + ETH_HLEN` (82)
  before accepting; returns `-EPROTO` on failure
- The else branch and brace additions are purely cosmetic (adding braces
  to existing if/else)

**Step 2.3: Bug Mechanism**
- Category: Integer underflow / input validation bug
- Mechanism: `gc->adapter_mtu` (u16, could be 0) used in `ndev->max_mtu
  = gc->adapter_mtu - ETH_HLEN`. If adapter_mtu < 14, the result wraps
  to ~4GB as unsigned int.
- Confirmed via two usage sites:
  - `mana_en.c:3349`: `ndev->max_mtu = gc->adapter_mtu - ETH_HLEN`
  - `mana_bpf.c:242`: `ndev->max_mtu = gc->adapter_mtu - ETH_HLEN`

**Step 2.4: Fix Quality**
- Obviously correct: simple bounds check with a clear threshold
- Minimal: 6 lines of logic change
- No regression risk: only rejects values that would cause incorrect
  behavior anyway
- Clean: well-contained, single function

## PHASE 3: GIT HISTORY INVESTIGATION

**Step 3.1: Blame**
- The `adapter_mtu` field assignment was introduced in commit
  `80f6215b450eb8` ("net: mana: Add support for jumbo frame", Haiyang
  Zhang, 2023-04-12)
- This commit was first included in `v6.4-rc1`
- The vulnerable code has been present since v6.4

**Step 3.2: No Fixes: tag to follow**

**Step 3.3: File History**
- The file has active development with multiple fixes applied. No
  conflicting changes to the `mana_query_device_cfg()` function recently
  aside from commit `290e5d3c49f687` which added GDMA_MESSAGE_V3
  handling.

**Step 3.4: Author**
- Erni Sri Satya Vennela is a regular MANA contributor with 9+ commits
  to the driver, all from `@linux.microsoft.com`. The author is part of
  the Microsoft team maintaining this driver.

**Step 3.5: Dependencies**
- This is a standalone patch (1-of-1, not part of a series)
- Uses only existing constants (`ETH_MIN_MTU`, `ETH_HLEN`) which exist
  in all kernel versions
- The GDMA_MESSAGE_V2 check already exists in stable trees since v6.4

## PHASE 4: MAILING LIST RESEARCH

**Step 4.1-4.5:** b4 dig failed to find the thread. Lore is behind an
anti-scraping wall. However, the patch was accepted by netdev maintainer
Jakub Kicinski (signed-off-by), which indicates it passed netdev review.
The Link tag confirms it was a single-patch submission.

## PHASE 5: CODE SEMANTIC ANALYSIS

**Step 5.1: Functions Modified**
- `mana_query_device_cfg()` - device configuration query during probe

**Step 5.2: Callers**
- Called from `mana_probe_port()` -> `mana_query_device_cfg()` during
  device initialization
- This is the main probe path for all MANA network interfaces in Azure
  VMs

**Step 5.3: Downstream Impact**
- `gc->adapter_mtu` is used in two places to compute `ndev->max_mtu`:
  - `mana_en.c:3349` during probe
  - `mana_bpf.c:242` when XDP is detached
- Both perform `gc->adapter_mtu - ETH_HLEN` without checking for
  underflow

**Step 5.4: Reachability**
- This code is reached during every MANA device probe in Azure VMs -
  very common path for Azure users

## PHASE 6: STABLE TREE ANALYSIS

**Step 6.1: Buggy Code in Stable Trees**
- `adapter_mtu` was added in v6.4-rc1 via commit `80f6215b450eb8`
- Present in stable trees: 6.6.y, 6.12.y, 7.0.y
- NOT present in: 6.1.y, 5.15.y, 5.10.y (pre-dates adapter_mtu feature)

**Step 6.2: Backport Complications**
- Note: the current 7.0 tree has `resp.hdr.response.msg_version` (from
  commit `290e5d3c49f687`) while older stable trees may have
  `resp.hdr.resp.msg_version`. The diff may need minor adjustment for
  6.6.y.
- The validation logic itself is self-contained and trivially adaptable.

**Step 6.3: No related fixes already in stable.**

## PHASE 7: SUBSYSTEM AND MAINTAINER CONTEXT

**Step 7.1: Subsystem**
- `drivers/net/ethernet/microsoft/mana/` - MANA network driver for Azure
  VMs
- Criticality: IMPORTANT - widely used in Azure cloud infrastructure
  (millions of VMs)

**Step 7.2: Activity**
- Actively maintained with regular fixes. The author and team are
  Microsoft employees dedicated to this driver.

## PHASE 8: IMPACT AND RISK ASSESSMENT

**Step 8.1: Who is Affected**
- All Azure VM users running MANA driver (very large population)
- Especially CVM (Confidential VM) users where the hypervisor is less
  trusted

**Step 8.2: Trigger Conditions**
- Triggered when hardware/hypervisor returns `adapter_mtu < 82` in the
  config query response
- In CVM scenarios: malicious hypervisor could deliberately trigger this
- In non-CVM: unlikely but possible with firmware bugs

**Step 8.3: Failure Mode Severity**
- Integer underflow causes `max_mtu` to be set to ~4GB
- This silently allows setting huge MTU values that the hardware cannot
  support
- Could lead to packet corruption, buffer overflows in TX path, or
  device malfunction
- Severity: HIGH (potential for data corruption or security issue,
  especially in CVM)

**Step 8.4: Risk-Benefit Ratio**
- BENEFIT: Prevents integer underflow and incorrect device
  configuration. HIGH for CVM users, MEDIUM for regular Azure users.
- RISK: VERY LOW - only adds a bounds check on the initialization path.
  Cannot cause regression because it only rejects values that would
  cause broken behavior.

## PHASE 9: FINAL SYNTHESIS

**Step 9.1: Evidence Summary**

FOR backporting:
- Fixes a concrete integer underflow bug (adapter_mtu - ETH_HLEN wraps
  to ~4GB)
- Small, surgical fix (6 lines of logic)
- Obviously correct bounds check
- No regression risk
- Accepted by netdev maintainer
- Author is regular driver contributor
- Affects widely-used Azure MANA driver
- Security-relevant in CVM environments

AGAINST backporting:
- Labeled as "hardening" rather than "fix"
- No user reports of this being triggered in practice
- Trigger requires malicious or buggy firmware
- May need minor adjustment for older stable trees (response field name)

**Step 9.2: Stable Rules Checklist**
1. Obviously correct and tested? **YES** - simple bounds check, accepted
   by netdev maintainer
2. Fixes a real bug? **YES** - integer underflow leading to incorrect
   max_mtu
3. Important issue? **YES** - incorrect MTU can cause device
   malfunction; security issue in CVM
4. Small and contained? **YES** - 6 lines, single function, single file
5. No new features or APIs? **CORRECT** - no new features
6. Can apply to stable? **YES** - may need trivial adjustment for
   response field name in 6.6.y

**Step 9.3: Exception Categories**
- Not a standard exception category, but fits the pattern of input
  validation fixes that prevent integer overflow/underflow.

**Step 9.4: Decision**
The fix prevents a concrete integer underflow that causes `max_mtu` to
be set to ~4GB when hardware returns an invalid adapter_mtu. The fix is
minimal, obviously correct, and has zero regression risk. It is relevant
for Azure CVM security and defensive against firmware bugs.

## Verification

- [Phase 1] Parsed tags: Signed-off-by from author (Microsoft) and
  netdev maintainer Jakub Kicinski; Link to single-patch submission
- [Phase 2] Diff analysis: +6 lines of validation in
  `mana_query_device_cfg()`, checks `resp.adapter_mtu >= ETH_MIN_MTU +
  ETH_HLEN` (82)
- [Phase 2] Integer underflow verified: adapter_mtu=0 ->
  max_mtu=4294967282 (~4GB) via Python simulation
- [Phase 3] git blame: adapter_mtu code introduced in commit
  `80f6215b450eb8` (v6.4-rc1, 2023-04-12)
- [Phase 3] git describe --contains: confirmed in v6.4-rc1
- [Phase 3] Author has 9+ commits to MANA driver, regular contributor
- [Phase 4] b4 dig failed to find thread (timeout); lore blocked by
  anti-bot measures
- [Phase 5] Callers: `mana_query_device_cfg()` called from probe path;
  `gc->adapter_mtu - ETH_HLEN` used at mana_en.c:3349 and mana_bpf.c:242
- [Phase 5] Both usage sites perform unsigned subtraction without bounds
  check
- [Phase 6] Buggy code exists in stable trees 6.6.y+ (since v6.4-rc1)
- [Phase 6] Standalone fix, may need minor field name adjustment for
  older trees
- [Phase 7] MANA driver widely used in Azure (IMPORTANT criticality)
- [Phase 8] Failure mode: max_mtu set to ~4GB, allowing oversized MTU;
  severity HIGH
- [Phase 8] Risk: VERY LOW (only rejects clearly invalid values)

**YES**

 drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 09a53c9775455..7589ead7efdb6 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1214,10 +1214,16 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
 
 	*max_num_vports = resp.max_num_vports;
 
-	if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2)
+	if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) {
+		if (resp.adapter_mtu < ETH_MIN_MTU + ETH_HLEN) {
+			dev_err(dev, "Adapter MTU too small: %u\n",
+				resp.adapter_mtu);
+			return -EPROTO;
+		}
 		gc->adapter_mtu = resp.adapter_mtu;
-	else
+	} else {
 		gc->adapter_mtu = ETH_FRAME_LEN;
+	}
 
 	if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3)
 		*bm_hostmode = resp.bm_hostmode;
-- 
2.53.0


^ permalink raw reply related

* [PATCH net v4 5/5] net: mana: Fix EQ leak in mana_remove on NULL port
From: Erni Sri Satya Vennela @ 2026-04-20 12:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ernis, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260420124741.1056179-1-ernis@linux.microsoft.com>

In mana_remove(), when a NULL port is encountered in the port iteration
loop, 'goto out' skips the mana_destroy_eq(ac) call, leaking the event
queues allocated earlier by mana_create_eq().

This can happen when mana_probe_port() fails for port 0, leaving
ac->ports[0] as NULL. On driver unload or error cleanup, mana_remove()
hits the NULL entry and jumps past mana_destroy_eq().

Change 'goto out' to 'break' so the for-loop exits normally and
mana_destroy_eq() is always reached. Remove the now-unreferenced out:
label.

Fixes: 1e2d0824a9c3 ("net: mana: Add support for EQ sharing")
Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
---
Changes in v4:
* No change
Changes in v3;
* Update Fixes tag to appropriate commit id.
Changes in v2:
* Apply the patch in net instead of net-next.
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 39b18577fb51..98e2fcc797ca 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -3752,7 +3752,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
 		if (!ndev) {
 			if (i == 0)
 				dev_err(dev, "No net device to remove\n");
-			goto out;
+			break;
 		}
 
 		apc = netdev_priv(ndev);
@@ -3783,7 +3783,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
 	}
 
 	mana_destroy_eq(ac);
-out:
+
 	if (ac->per_port_queue_reset_wq) {
 		destroy_workqueue(ac->per_port_queue_reset_wq);
 		ac->per_port_queue_reset_wq = NULL;
-- 
2.34.1


^ permalink raw reply related

* [PATCH net v4 4/5] net: mana: Don't overwrite port probe error with add_adev result
From: Erni Sri Satya Vennela @ 2026-04-20 12:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ernis, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260420124741.1056179-1-ernis@linux.microsoft.com>

In mana_probe(), if mana_probe_port() fails for any port, the error
is stored in 'err' and the loop breaks. However, the subsequent
unconditional 'err = add_adev(gd, "eth")' overwrites this error.
If add_adev() succeeds, mana_probe() returns success despite ports
being left in a partially initialized state (ac->ports[i] == NULL).

Only call add_adev() when there is no prior error, so the probe
correctly fails and triggers mana_remove() cleanup.

Fixes: a69839d4327d ("net: mana: Add support for auxiliary device")
Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
---
Changes in v4:
* Update Fixes tag to a69839d4327d.
Changes in v3:
*  Fix inaccurate comments.
Changes in v2:
* Apply the patch in net instead of net-next.
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index ce1b7ec46a27..39b18577fb51 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -3680,10 +3680,9 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 	if (!resuming) {
 		for (i = 0; i < ac->num_ports; i++) {
 			err = mana_probe_port(ac, i, &ac->ports[i]);
-			/* we log the port for which the probe failed and stop
-			 * probes for subsequent ports.
-			 * Note that we keep running ports, for which the probes
-			 * were successful, unless add_adev fails too
+			/* Log the port for which the probe failed, stop probing
+			 * subsequent ports, and skip add_adev.
+			 * mana_remove() will clean up already-probed ports.
 			 */
 			if (err) {
 				dev_err(dev, "Probe Failed for port %d\n", i);
@@ -3697,10 +3696,9 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 			enable_work(&apc->queue_reset_work);
 			err = mana_attach(ac->ports[i]);
 			rtnl_unlock();
-			/* we log the port for which the attach failed and stop
-			 * attach for subsequent ports
-			 * Note that we keep running ports, for which the attach
-			 * were successful, unless add_adev fails too
+			/* Log the port for which the attach failed, stop
+			 * attaching subsequent ports, and skip add_adev.
+			 * mana_remove() will clean up already-attached ports.
 			 */
 			if (err) {
 				dev_err(dev, "Attach Failed for port %d\n", i);
@@ -3709,7 +3707,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 		}
 	}
 
-	err = add_adev(gd, "eth");
+	if (!err)
+		err = add_adev(gd, "eth");
 
 	schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH net v4 3/5] net: mana: Guard mana_remove against double invocation
From: Erni Sri Satya Vennela @ 2026-04-20 12:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ernis, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260420124741.1056179-1-ernis@linux.microsoft.com>

If PM resume fails (e.g., mana_attach() returns an error), mana_probe()
calls mana_remove(), which tears down the device and sets
gd->gdma_context = NULL and gd->driver_data = NULL.

However, a failed resume callback does not automatically unbind the
driver. When the device is eventually unbound, mana_remove() is invoked
a second time. Without a NULL check, it dereferences gc->dev with
gc == NULL, causing a kernel panic.

Add an early return if gdma_context or driver_data is NULL so the second
invocation is harmless. Move the dev = gc->dev assignment after the
guard so it cannot dereference NULL.

Fixes: 635096a86edb ("net: mana: Support hibernation and kexec")
Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
---
Changes in v4:
* Update Fixes tag to 635096a86edb
Changes in v3:
* Add this patch to the patchset
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 468ed60a8a00..ce1b7ec46a27 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -3731,11 +3731,16 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
 	struct gdma_context *gc = gd->gdma_context;
 	struct mana_context *ac = gd->driver_data;
 	struct mana_port_context *apc;
-	struct device *dev = gc->dev;
+	struct device *dev;
 	struct net_device *ndev;
 	int err;
 	int i;
 
+	if (!gc || !ac)
+		return;
+
+	dev = gc->dev;
+
 	disable_work_sync(&ac->link_change_work);
 	cancel_delayed_work_sync(&ac->gf_stats_work);
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH net v4 2/5] net: mana: Init gf_stats_work before potential error paths in probe
From: Erni Sri Satya Vennela @ 2026-04-20 12:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ernis, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260420124741.1056179-1-ernis@linux.microsoft.com>

Move INIT_DELAYED_WORK(gf_stats_work) to before mana_create_eq(),
while keeping schedule_delayed_work() at its original location.

Previously, if any function between mana_create_eq() and the
INIT_DELAYED_WORK call failed, mana_probe() would call mana_remove()
which unconditionally calls cancel_delayed_work_sync(gf_stats_work)
in __flush_work() or debug object warnings with
CONFIG_DEBUG_OBJECTS_WORK enabled.

Fixes: be4f1d67ec56 ("net: mana: Add standard counter rx_missed_errors")
Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
---
Changes in v3,v4:
* No change.
Changes in v2:
* Apply the patch in net instead of net-next.
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index e3e4b6de6668..468ed60a8a00 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -3635,6 +3635,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 		INIT_WORK(&ac->link_change_work, mana_link_state_handle);
 	}
 
+	INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler);
+
 	err = mana_create_eq(ac);
 	if (err) {
 		dev_err(dev, "Failed to create EQs: %d\n", err);
@@ -3709,7 +3711,6 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 
 	err = add_adev(gd, "eth");
 
-	INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler);
 	schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
 
 out:
-- 
2.34.1


^ permalink raw reply related

* [PATCH net v4 1/5] net: mana: Init link_change_work before potential error paths in probe
From: Erni Sri Satya Vennela @ 2026-04-20 12:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ernis, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260420124741.1056179-1-ernis@linux.microsoft.com>

Move INIT_WORK(link_change_work) to right after the mana_context
allocation, before any error path that could reach mana_remove().

Previously, if mana_create_eq() or mana_query_device_cfg() failed,
mana_probe() would jump to the error path which calls mana_remove().
mana_remove() unconditionally calls disable_work_sync(link_change_work),
but the work struct had not been initialized yet. This can trigger
CONFIG_DEBUG_OBJECTS_WORK enabled.

Fixes: 54133f9b4b53 ("net: mana: Support HW link state events")
Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
---
Changes in v3,v4:
* No change.
Changes in v2:
* Apply the patch in net instead of net-next.
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 6302432b9bf6..e3e4b6de6668 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -3631,6 +3631,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 
 		ac->gdma_dev = gd;
 		gd->driver_data = ac;
+
+		INIT_WORK(&ac->link_change_work, mana_link_state_handle);
 	}
 
 	err = mana_create_eq(ac);
@@ -3648,8 +3650,6 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
 
 	if (!resuming) {
 		ac->num_ports = num_ports;
-
-		INIT_WORK(&ac->link_change_work, mana_link_state_handle);
 	} else {
 		if (ac->num_ports != num_ports) {
 			dev_err(dev, "The number of vPorts changed: %d->%d\n",
-- 
2.34.1


^ permalink raw reply related

* [PATCH net v4 0/5] net: mana: Fix probe/remove error path bugs
From: Erni Sri Satya Vennela @ 2026-04-20 12:47 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ernis, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel

Fix five bugs in mana_probe()/mana_remove() error handling that can
cause warnings on uninitialized work structs, NULL pointer dereferences,
masked errors, and resource leaks when early probe steps fail.

Patches 1-2 move work struct initialization (link_change_work and
gf_stats_work) to before any error path that could trigger
mana_remove(), preventing WARN_ON in __flush_work() or debug object
warnings when sync cancellation runs on uninitialized work structs.

Patch 3 guards mana_remove() against double invocation. If PM resume
fails, mana_probe() calls mana_remove() which sets gdma_context and
driver_data to NULL. A failed resume does not unbind the driver, so
when the device is eventually unbound, mana_remove() is called again
and dereferences NULL, causing a kernel panic. An early return on
NULL gdma_context or driver_data makes the second call harmless.

Patch 4 prevents add_adev() from overwriting a port probe error,
which could leave the driver in a broken state with NULL ports while
reporting success.

Patch 5 changes 'goto out' to 'break' in mana_remove()'s port loop
so that mana_destroy_eq() is always reached, preventing EQ leaks when
a NULL port is encountered.
---
Changes in v4:
* Correct Fixes tag from ca9c54d2d6a5 to 635096a86edb
* Correct Fixes tag from ced82fce77e9 to a69839d4327d
Changes in v3:
* Add patch 3: net: mana: Guard mana_remove against double invocation.
* Fix inaccurate comments.
* Correct Fixes tag from ca9c54d2d6a5 to 1e2d0824a9c3.
Changes in v2:
* Apply the patchset in net instead of net-next.
---

Erni Sri Satya Vennela (5):
  net: mana: Init link_change_work before potential error paths in probe
  net: mana: Init gf_stats_work before potential error paths in probe
  net: mana: Guard mana_remove against double invocation
  net: mana: Don't overwrite port probe error with add_adev result
  net: mana: Fix EQ leak in mana_remove on NULL port

 drivers/net/ethernet/microsoft/mana/mana_en.c | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

-- 
2.34.1


^ permalink raw reply

* Re: [PATCH net v3 3/5] net: mana: Guard mana_remove against double invocation
From: Erni Sri Satya Vennela @ 2026-04-20 12:45 UTC (permalink / raw)
  To: Simon Horman
  Cc: haiyangz, davem, longli, ssengar, linux-kernel, kuba, kotaranov,
	stephen, andrew+netdev, kys, wei.liu, shirazsaleem, kees, pabeni,
	linux-hyperv, edumazet, decui, leon, gargaditya, dipayanroy,
	netdev, shacharr
In-Reply-To: <20260417140953.218832-1-horms@kernel.org>

On Fri, Apr 17, 2026 at 03:09:53PM +0100, Simon Horman wrote:
> This is an AI-generated review of your patch. The human sending this
> email has considered the AI review valid, or at least plausible.
> 
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: mana: Guard mana_remove against double invocation
> 
> If PM resume fails (e.g., mana_attach() returns an error), mana_probe()
> calls mana_remove(), which tears down the device and sets
> gd->gdma_context = NULL and gd->driver_data = NULL. However, a failed
> resume callback does not automatically unbind the driver. When the device
> is eventually unbound, mana_remove() is invoked a second time. Without a
> NULL check, it dereferences gc->dev with gc == NULL, causing a kernel
> panic. Add an early return if gdma_context or driver_data is NULL so the
> second invocation is harmless.
> 
> > Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
> 
> The Fixes: tag points to ca9c54d2d6a5, which introduced mana_remove()
> without NULL checks. However, the exploitable double invocation scenario
> was actually introduced by commit 635096a86edb ("net: mana: Support
> hibernation and kexec"), which added suspend/resume support and made
> mana_probe() call mana_remove() on error.
> 
> Should the Fixes: tag be:
> 
> Fixes: 635096a86edb ("net: mana: Support hibernation and kexec")
> 
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > index 9d095a6fb56c..2ab7c89e2fed 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > @@ -3685,11 +3685,16 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
> >  	struct gdma_context *gc = gd->gdma_context;
> >  	struct mana_context *ac = gd->driver_data;
> >  	struct mana_port_context *apc;
> > -	struct device *dev = gc->dev;
> > +	struct device *dev;
> >  	struct net_device *ndev;
> >  	int err;
> >  	int i;
> >
> > +	if (!gc || !ac)
> > +		return;
> > +
> > +	dev = gc->dev;
> > +
> >  	disable_work_sync(&ac->link_change_work);
> >  	cancel_delayed_work_sync(&ac->gf_stats_work);
Thankyou for the correction, Simon.
I'll make this change in the next version.

- Vennela

^ permalink raw reply

* Re: [PATCH net v3 4/5] net: mana: Don't overwrite port probe error with add_adev result
From: Erni Sri Satya Vennela @ 2026-04-20 12:45 UTC (permalink / raw)
  To: Simon Horman
  Cc: haiyangz, davem, longli, ssengar, linux-kernel, kuba, kotaranov,
	stephen, andrew+netdev, kys, wei.liu, shirazsaleem, kees, pabeni,
	linux-hyperv, edumazet, decui, leon, gargaditya, dipayanroy,
	netdev, shacharr
In-Reply-To: <20260417141014.218936-1-horms@kernel.org>

On Fri, Apr 17, 2026 at 03:10:14PM +0100, Simon Horman wrote:
> This is an AI-generated review of your patch. The human sending this
> email has considered the AI review valid, or at least plausible.
> 
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: mana: Don't overwrite port probe error with add_adev result
> 
> In mana_probe(), if mana_probe_port() fails for any port, the error
> is stored in 'err' and the loop breaks. However, the subsequent
> unconditional 'err = add_adev(gd, "eth")' overwrites this error.
> If add_adev() succeeds, mana_probe() returns success despite ports
> being left in a partially initialized state (ac->ports[i] == NULL).
> Only call add_adev() when there is no prior error, so the probe
> correctly fails and triggers mana_remove() cleanup.
> 
> > In mana_probe(), if mana_probe_port() fails for any port, the error
> > is stored in 'err' and the loop breaks. However, the subsequent
> > unconditional 'err = add_adev(gd, "eth")' overwrites this error.
> > If add_adev() succeeds, mana_probe() returns success despite ports
> > being left in a partially initialized state (ac->ports[i] == NULL).
> >
> > Only call add_adev() when there is no prior error, so the probe
> > correctly fails and triggers mana_remove() cleanup.
> >
> > Fixes: ced82fce77e9 ("net: mana: Probe rdma device in mana driver")
> 
> The existing Fixes: tag points to commit ced82fce77e9 ("net: mana: Probe
> rdma device in mana driver"), but was the bug actually introduced there?
> 
> Looking at the commit history, commit a69839d4327d ("net: mana: Add support
> for auxiliary device") added the unconditional 'err = add_adev(gd);' call
> that overwrites the error from mana_probe_port(). Commit ced82fce77e9 only
> modified the add_adev signature from add_adev(gd) to add_adev(gd, "eth")
> but did not introduce the buggy pattern.
> 
> Should the Fixes: tag be:
>     Fixes: a69839d4327d ("net: mana: Add support for auxiliary device")

Thankyou for the correction, Simon.
I'll make this change in the next version.

- Vennela

^ permalink raw reply

* Re: [PATCH v2] Drivers: hv: mshv: fix integer overflow in memory region overlap check
From: Junrui Luo @ 2026-04-20  5:17 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: vdso@mailbox.org, K. Y. Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Nuno Das Neves, Anirudh Rayabharam,
	Mukesh Rathor, Muminul Islam, Praveen K Paladugu, Jinank Jain,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
	Yuhao Jiang, stable@vger.kernel.org
In-Reply-To: <1644495552.14476.1776103846016@app.mailbox.org>

Hi Stanislav,

Gentle ping on this. Does this approach work for you?

Thanks,
Junrui Luo

^ permalink raw reply

* Re: [PATCH v0 07/15] mshv: Add ioctl support for MSHV-VFIO bridge device
From: Mukesh R @ 2026-04-18  0:20 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: linux-kernel, linux-hyperv, linux-arm-kernel, iommu, linux-pci,
	linux-arch, kys, haiyangz, wei.liu, decui, longli,
	catalin.marinas, will, tglx, mingo, bp, dave.hansen, hpa, joro,
	lpieralisi, kwilczynski, mani, robh, bhelgaas, arnd, nunodasneves,
	mhklinux, romank
In-Reply-To: <aW-pw7GlQdFv-lf5@skinsburskii.localdomain>

On 1/20/26 08:13, Stanislav Kinsburskii wrote:
> On Mon, Jan 19, 2026 at 10:42:22PM -0800, Mukesh R wrote:
>> From: Mukesh Rathor <mrathor@linux.microsoft.com>
>>
>> Add ioctl support for creating MSHV devices for a paritition. At
>> present only VFIO device types are supported, but more could be
>> added. At a high level, a partition ioctl to create device verifies
>> it is of type VFIO and does some setup for bridge code in mshv_vfio.c.
>> Adapted from KVM device ioctls.
>>
>> Credits: Original author: Wei Liu <wei.liu@kernel.org>
>> NB: Slightly modified from the original version.
>>
>> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com>
>> ---
>>   drivers/hv/mshv_root_main.c | 126 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 126 insertions(+)
>>
>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
>> index 83c7bad269a0..27313419828d 100644
>> --- a/drivers/hv/mshv_root_main.c
>> +++ b/drivers/hv/mshv_root_main.c
>> @@ -1551,6 +1551,129 @@ mshv_partition_ioctl_initialize(struct mshv_partition *partition)
>>   	return ret;
>>   }
>>   
>> +static long mshv_device_attr_ioctl(struct mshv_device *mshv_dev, int cmd,
>> +				   ulong uarg)
>> +{
>> +	struct mshv_device_attr attr;
>> +	const struct mshv_device_ops *devops = mshv_dev->device_ops;
>> +
>> +	if (copy_from_user(&attr, (void __user *)uarg, sizeof(attr)))
>> +		return -EFAULT;
>> +
>> +	switch (cmd) {
>> +	case MSHV_SET_DEVICE_ATTR:
>> +		if (devops->device_set_attr)
>> +			return devops->device_set_attr(mshv_dev, &attr);
>> +		break;
>> +	case MSHV_HAS_DEVICE_ATTR:
>> +		if (devops->device_has_attr)
>> +			return devops->device_has_attr(mshv_dev, &attr);
>> +		break;
>> +	}
>> +
>> +	return -EPERM;
>> +}
>> +
>> +static long mshv_device_fop_ioctl(struct file *filp, unsigned int cmd,
>> +				  ulong uarg)
>> +{
>> +	struct mshv_device *mshv_dev = filp->private_data;
>> +
>> +	switch (cmd) {
>> +	case MSHV_SET_DEVICE_ATTR:
>> +	case MSHV_HAS_DEVICE_ATTR:
>> +		return mshv_device_attr_ioctl(mshv_dev, cmd, uarg);
>> +	}
>> +
>> +	return -ENOTTY;
>> +}
>> +
>> +static int mshv_device_fop_release(struct inode *inode, struct file *filp)
>> +{
>> +	struct mshv_device *mshv_dev = filp->private_data;
>> +	struct mshv_partition *partition = mshv_dev->device_pt;
>> +
>> +	if (mshv_dev->device_ops->device_release) {
>> +		mutex_lock(&partition->pt_mutex);
>> +		hlist_del(&mshv_dev->device_ptnode);
>> +		mshv_dev->device_ops->device_release(mshv_dev);
>> +		mutex_unlock(&partition->pt_mutex);
>> +	}
>> +
>> +	mshv_partition_put(partition);
>> +	return 0;
>> +}
>> +
>> +static const struct file_operations mshv_device_fops = {
>> +	.owner = THIS_MODULE,
>> +	.unlocked_ioctl = mshv_device_fop_ioctl,
>> +	.release = mshv_device_fop_release,
>> +};
>> +
>> +long mshv_partition_ioctl_create_device(struct mshv_partition *partition,
>> +					void __user *uarg)
>> +{
>> +	long rc;
>> +	struct mshv_create_device devargk;
>> +	struct mshv_device *mshv_dev;
>> +	const struct mshv_device_ops *vfio_ops;
>> +	int type;
>> +
>> +	if (copy_from_user(&devargk, uarg, sizeof(devargk))) {
>> +		rc = -EFAULT;
>> +		goto out;
>> +	}
>> +
>> +	/* At present, only VFIO is supported */
>> +	if (devargk.type != MSHV_DEV_TYPE_VFIO) {
>> +		rc = -ENODEV;
>> +		goto out;
>> +	}
>> +
>> +	if (devargk.flags & MSHV_CREATE_DEVICE_TEST) {
>> +		rc = 0;
>> +		goto out;
>> +	}
>> +
>> +	mshv_dev = kzalloc(sizeof(*mshv_dev), GFP_KERNEL_ACCOUNT);
>> +	if (mshv_dev == NULL) {
>> +		rc = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	vfio_ops = &mshv_vfio_device_ops;
>> +	mshv_dev->device_ops = vfio_ops;
>> +	mshv_dev->device_pt = partition;
>> +
>> +	rc = vfio_ops->device_create(mshv_dev, type);
>> +	if (rc < 0) {
>> +		kfree(mshv_dev);
>> +		goto out;
>> +	}
>> +
>> +	hlist_add_head(&mshv_dev->device_ptnode, &partition->pt_devices);
>> +
>> +	mshv_partition_get(partition);
>> +	rc = anon_inode_getfd(vfio_ops->device_name, &mshv_device_fops,
>> +			      mshv_dev, O_RDWR | O_CLOEXEC);
>> +	if (rc < 0) {
>> +		mshv_partition_put(partition);
>> +		hlist_del(&mshv_dev->device_ptnode);
>> +		vfio_ops->device_release(mshv_dev);
>> +		goto out;
>> +	}
>> +
>> +	devargk.fd = rc;
>> +	rc = 0;
>> +
>> +	if (copy_to_user(uarg, &devargk, sizeof(devargk))) {
> 
> Shouldn't the partition be put here?

No. anon_inode_getfd was successful and so it installed the fd already..
As a result the cleanup will happen in the file op release.

Thanks,
-Mukesh

> Thanks,
> Stanislav
> 
>> +		rc = -EFAULT;
>> +		goto out;
>> +	}
>> +out:
>> +	return rc;
>> +}
>> +
>>   static long
>>   mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>>   {
>> @@ -1587,6 +1710,9 @@ mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>>   	case MSHV_ROOT_HVCALL:
>>   		ret = mshv_ioctl_passthru_hvcall(partition, true, uarg);
>>   		break;
>> +	case MSHV_CREATE_DEVICE:
>> +		ret = mshv_partition_ioctl_create_device(partition, uarg);
>> +		break;
>>   	default:
>>   		ret = -ENOTTY;
>>   	}
>> -- 
>> 2.51.2.vfs.0.1
>>


^ permalink raw reply

* Re: [PATCH] Drivers: hv: vmbus: Improve the logc of reserving fb_mmio on Gen2 VMs
From: Krister Johansen @ 2026-04-17 20:24 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: kys, haiyangz, wei.liu, longli, linux-hyperv, linux-kernel,
	mhklinux, matthew.ruffell, stable
In-Reply-To: <20260416183529.838321-1-decui@microsoft.com>

On Thu, Apr 16, 2026 at 11:35:29AM -0700, Dexuan Cui wrote:
> If vmbus_reserve_fb() in the kdump kernel fails to properly reserve the
> framebuffer MMIO range due to a Gen2 VM's screen.lfb_base being zero [1],
> there is an MMIO conflict between the drivers hyperv_drm and pci-hyperv.
> This is especially an issue if pci-hyperv is built-in and hyperv_drm is
> built as a module. Consequently, the kdump kernel fails to detect PCI
> devices via pci-hyperv, and may fail to mount the root file system,
> which may reside in a NVMe disk.
> 
> On Gen2 VMs, if the screen.lfb_base is 0 in the kdump kernel, fall
> back to the low MMIO base, which should be equal to the framebuffer
> MMIO base (Tested on x64 Windows Server 2016, and on x64 and ARM64 Windows
> Server 2025 and on Azure) [2]. In the first kernel, screen.lfb_base
> is not 0; if the user specifies a high resolution, it's not enough to
> only reserve 8MB: in this case, reserve half of the space below 4GB, but
> cap the reservation to 128MB, which is the required framebuffer size of
> the highest resolution 7680*4320 supported by Hyper-V.
> 
> Add the cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) check, because a CoCo
> VM (i.e. Confidential VM) on Hyper-V doesn't have any framebuffer
> device, so there is no need to reserve any MMIO for it.
> 
> While at it, fix the comparison "end > VTPM_BASE_ADDRESS" by changing
> the > to >=. Here the 'end' is an inclusive end (typically, it's
> 0xFFFF_FFFF).
> 
> [1] https://lore.kernel.org/all/SA1PR21MB692176C1BC53BFC9EAE5CF8EBF51A@SA1PR21MB6921.namprd21.prod.outlook.com/
> [2] https://lore.kernel.org/all/SA1PR21MB69218F955B62DFF62E3E88D2BF222@SA1PR21MB6921.namprd21.prod.outlook.com/
> 
> Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
> CC: stable@vger.kernel.org
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/vmbus_drv.c | 30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
 
Thanks for the updated patch.  I tested this on the arm64 instances that
had been failing and was able to confirm that without it present the
failure still occurred, but with the new patch networking was able to
attach correctly in the dump environment and kdumps were successful.

Tested-by: Krister Johansen <kjlx@templeofstupid.com>

-K

^ permalink raw reply

* Re: [PATCH] Drivers: hv: vmbus: Improve the logc of reserving fb_mmio on Gen2 VMs
From: Hardik Garg @ 2026-04-17 18:19 UTC (permalink / raw)
  To: Dexuan Cui, kys, haiyangz, wei.liu, longli, linux-hyperv,
	linux-kernel, mhklinux, matthew.ruffell, johansen
  Cc: stable
In-Reply-To: <20260416183529.838321-1-decui@microsoft.com>



On 4/16/2026 11:35 AM, Dexuan Cui wrote:
> If vmbus_reserve_fb() in the kdump kernel fails to properly reserve the
> framebuffer MMIO range due to a Gen2 VM's screen.lfb_base being zero [1],
> there is an MMIO conflict between the drivers hyperv_drm and pci-hyperv.
> This is especially an issue if pci-hyperv is built-in and hyperv_drm is
> built as a module. Consequently, the kdump kernel fails to detect PCI
> devices via pci-hyperv, and may fail to mount the root file system,
> which may reside in a NVMe disk.
> 
> On Gen2 VMs, if the screen.lfb_base is 0 in the kdump kernel, fall
> back to the low MMIO base, which should be equal to the framebuffer
> MMIO base (Tested on x64 Windows Server 2016, and on x64 and ARM64 Windows
> Server 2025 and on Azure) [2]. In the first kernel, screen.lfb_base
> is not 0; if the user specifies a high resolution, it's not enough to
> only reserve 8MB: in this case, reserve half of the space below 4GB, but
> cap the reservation to 128MB, which is the required framebuffer size of
> the highest resolution 7680*4320 supported by Hyper-V.
> 
> Add the cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) check, because a CoCo
> VM (i.e. Confidential VM) on Hyper-V doesn't have any framebuffer
> device, so there is no need to reserve any MMIO for it.
> 
> While at it, fix the comparison "end > VTPM_BASE_ADDRESS" by changing
> the > to >=. Here the 'end' is an inclusive end (typically, it's
> 0xFFFF_FFFF).
> 
> [1] https://lore.kernel.org/all/SA1PR21MB692176C1BC53BFC9EAE5CF8EBF51A@SA1PR21MB6921.namprd21.prod.outlook.com/
> [2] https://lore.kernel.org/all/SA1PR21MB69218F955B62DFF62E3E88D2BF222@SA1PR21MB6921.namprd21.prod.outlook.com/
> 
> Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
> CC: stable@vger.kernel.org
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/vmbus_drv.c | 30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
Reviewed-by: Hardik Garg <hargar@linux.microsoft.com>




Thanks,
Hardik

^ permalink raw reply

* Re: [PATCH net v3 4/5] net: mana: Don't overwrite port probe error with add_adev result
From: Simon Horman @ 2026-04-17 14:10 UTC (permalink / raw)
  To: ernis
  Cc: Simon Horman, haiyangz, davem, longli, ssengar, linux-kernel,
	kuba, kotaranov, stephen, andrew+netdev, kys, wei.liu,
	shirazsaleem, kees, pabeni, linux-hyperv, edumazet, decui, leon,
	gargaditya, dipayanroy, netdev, shacharr
In-Reply-To: <20260415080944.732901-5-ernis@linux.microsoft.com>

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: mana: Don't overwrite port probe error with add_adev result

In mana_probe(), if mana_probe_port() fails for any port, the error
is stored in 'err' and the loop breaks. However, the subsequent
unconditional 'err = add_adev(gd, "eth")' overwrites this error.
If add_adev() succeeds, mana_probe() returns success despite ports
being left in a partially initialized state (ac->ports[i] == NULL).
Only call add_adev() when there is no prior error, so the probe
correctly fails and triggers mana_remove() cleanup.

> In mana_probe(), if mana_probe_port() fails for any port, the error
> is stored in 'err' and the loop breaks. However, the subsequent
> unconditional 'err = add_adev(gd, "eth")' overwrites this error.
> If add_adev() succeeds, mana_probe() returns success despite ports
> being left in a partially initialized state (ac->ports[i] == NULL).
>
> Only call add_adev() when there is no prior error, so the probe
> correctly fails and triggers mana_remove() cleanup.
>
> Fixes: ced82fce77e9 ("net: mana: Probe rdma device in mana driver")

The existing Fixes: tag points to commit ced82fce77e9 ("net: mana: Probe
rdma device in mana driver"), but was the bug actually introduced there?

Looking at the commit history, commit a69839d4327d ("net: mana: Add support
for auxiliary device") added the unconditional 'err = add_adev(gd);' call
that overwrites the error from mana_probe_port(). Commit ced82fce77e9 only
modified the add_adev signature from add_adev(gd) to add_adev(gd, "eth")
but did not introduce the buggy pattern.

Should the Fixes: tag be:
    Fixes: a69839d4327d ("net: mana: Add support for auxiliary device")

^ permalink raw reply

* Re: [PATCH net v3 3/5] net: mana: Guard mana_remove against double invocation
From: Simon Horman @ 2026-04-17 14:09 UTC (permalink / raw)
  To: ernis
  Cc: Simon Horman, haiyangz, davem, longli, ssengar, linux-kernel,
	kuba, kotaranov, stephen, andrew+netdev, kys, wei.liu,
	shirazsaleem, kees, pabeni, linux-hyperv, edumazet, decui, leon,
	gargaditya, dipayanroy, netdev, shacharr
In-Reply-To: <20260415080944.732901-4-ernis@linux.microsoft.com>

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: mana: Guard mana_remove against double invocation

If PM resume fails (e.g., mana_attach() returns an error), mana_probe()
calls mana_remove(), which tears down the device and sets
gd->gdma_context = NULL and gd->driver_data = NULL. However, a failed
resume callback does not automatically unbind the driver. When the device
is eventually unbound, mana_remove() is invoked a second time. Without a
NULL check, it dereferences gc->dev with gc == NULL, causing a kernel
panic. Add an early return if gdma_context or driver_data is NULL so the
second invocation is harmless.

> Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")

The Fixes: tag points to ca9c54d2d6a5, which introduced mana_remove()
without NULL checks. However, the exploitable double invocation scenario
was actually introduced by commit 635096a86edb ("net: mana: Support
hibernation and kexec"), which added suspend/resume support and made
mana_probe() call mana_remove() on error.

Should the Fixes: tag be:

Fixes: 635096a86edb ("net: mana: Support hibernation and kexec")

> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 9d095a6fb56c..2ab7c89e2fed 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -3685,11 +3685,16 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
>  	struct gdma_context *gc = gd->gdma_context;
>  	struct mana_context *ac = gd->driver_data;
>  	struct mana_port_context *apc;
> -	struct device *dev = gc->dev;
> +	struct device *dev;
>  	struct net_device *ndev;
>  	int err;
>  	int i;
>
> +	if (!gc || !ac)
> +		return;
> +
> +	dev = gc->dev;
> +
>  	disable_work_sync(&ac->link_change_work);
>  	cancel_delayed_work_sync(&ac->gf_stats_work);

^ permalink raw reply

* Re: [PATCH net v3 2/5] net: mana: Init gf_stats_work before potential error paths in probe
From: Simon Horman @ 2026-04-17 14:08 UTC (permalink / raw)
  To: Erni Sri Satya Vennela
  Cc: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260415080944.732901-3-ernis@linux.microsoft.com>

On Wed, Apr 15, 2026 at 01:09:38AM -0700, Erni Sri Satya Vennela wrote:
> Move INIT_DELAYED_WORK(gf_stats_work) to before mana_create_eq(),
> while keeping schedule_delayed_work() at its original location.
> 
> Previously, if any function between mana_create_eq() and the
> INIT_DELAYED_WORK call failed, mana_probe() would call mana_remove()
> which unconditionally calls cancel_delayed_work_sync(gf_stats_work)
> in __flush_work() or debug object warnings with
> CONFIG_DEBUG_OBJECTS_WORK enabled.
> 
> Fixes: be4f1d67ec56 ("net: mana: Add standard counter rx_missed_errors")
> Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
> ---
> Changes in v3:
> * No change
> Changes in v2:
> * Apply the patch in net instead of net-next.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* Re: [PATCH net v3 1/5] net: mana: Init link_change_work before potential error paths in probe
From: Simon Horman @ 2026-04-17 14:08 UTC (permalink / raw)
  To: Erni Sri Satya Vennela
  Cc: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, ssengar, dipayanroy, gargaditya,
	shirazsaleem, kees, kotaranov, leon, shacharr, stephen,
	linux-hyperv, netdev, linux-kernel
In-Reply-To: <20260415080944.732901-2-ernis@linux.microsoft.com>

On Wed, Apr 15, 2026 at 01:09:37AM -0700, Erni Sri Satya Vennela wrote:
> Move INIT_WORK(link_change_work) to right after the mana_context
> allocation, before any error path that could reach mana_remove().
> 
> Previously, if mana_create_eq() or mana_query_device_cfg() failed,
> mana_probe() would jump to the error path which calls mana_remove().
> mana_remove() unconditionally calls disable_work_sync(link_change_work),
> but the work struct had not been initialized yet. This can trigger
> CONFIG_DEBUG_OBJECTS_WORK enabled.
> 
> Fixes: 54133f9b4b53 ("net: mana: Support HW link state events")
> Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
> ---
> Changes in v3:
> * No change.
> Changes in v2:
> * Apply the patch in net instead of net-next.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* Re: [PATCH net v2] hv_sock: Report EOF instead of -EIO for FIN
From: Stefano Garzarella @ 2026-04-17  8:11 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: kys, haiyangz, wei.liu, longli, davem, edumazet, kuba, pabeni,
	horms, niuxuewei.nxw, linux-hyperv, virtualization, netdev,
	linux-kernel, stable, Ben Hillis, Mitchell Levy
In-Reply-To: <20260416191433.840637-1-decui@microsoft.com>

On Thu, Apr 16, 2026 at 12:14:33PM -0700, Dexuan Cui wrote:
>Commit f0c5827d07cb unluckily causes a regression for the FIN packet,
>and the final read syscall gets an error rather than 0.
>
>Ideally, we would want to fix hvs_channel_readable_payload() so that it
>could return 0 in the FIN scenario, but it's not good for the hv_sock
>driver to use the VMBus ringbuffer's cached priv_read_index, which is
>internal data in the VMBus driver.
>
>Fix the regression in hv_sock by returning 0 rather than -EIO.
>
>Fixes: f0c5827d07cb ("hv_sock: Return the readable bytes in hvs_stream_has_data()")
>Cc: stable@vger.kernel.org
>Reported-by: Ben Hillis <Ben.Hillis@microsoft.com>
>Reported-by: Mitchell Levy <levymitchell0@gmail.com>
>Signed-off-by: Dexuan Cui <decui@microsoft.com>
>---
>
>Changes since v1:
>    Removed the local variable 'need_refill' to make the code more
>    readable. Stefano, thanks!

Thanks for the fix!

>
>    No other change.
>
> net/vmw_vsock/hyperv_transport.c | 20 ++++++++++++++++----
> 1 file changed, 16 insertions(+), 4 deletions(-)

Acked-by: Stefano Garzarella <sgarzare@redhat.com>


^ permalink raw reply

* Re: [PATCH net-next 0/2] net: mana: Avoid queue struct allocation failure under memory fragmentation
From: Jakub Kicinski @ 2026-04-17  2:08 UTC (permalink / raw)
  To: Aditya Garg
  Cc: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, pabeni, kotaranov, horms, ssengar, jacob.e.keller,
	dipayanroy, ernis, shirazsaleem, kees, sbhatta, leitao, netdev,
	linux-hyperv, linux-kernel, linux-rdma, bpf, gargaditya
In-Reply-To: <20260414151456.687506-1-gargaditya@linux.microsoft.com>

On Tue, 14 Apr 2026 08:13:28 -0700 Aditya Garg wrote:
> The MANA driver can fail to load on systems with high memory
> utilization because several allocations in the queue setup paths
> require large physically contiguous blocks via kmalloc. Under memory
> fragmentation these high-order allocations may fail, preventing the
> driver from creating queues at probe time or when reconfiguring
> channels, ring parameters or MTU at runtime.

## Form letter - net-next-closed

We have already submitted our pull request with net-next material for v7.1,
and therefore net-next is closed for new drivers, features, code refactoring
and optimizations. We are currently accepting bug fixes only.

Please repost when net-next reopens after Apr 27th.

RFC patches sent for review only are obviously welcome at any time.

See: https://www.kernel.org/doc/html/next/process/maintainer-netdev.html#development-cycle
-- 
pw-bot: defer
pv-bot: closed

^ permalink raw reply

* RE: [PATCH] Drivers: hv: vmbus: Improve the logc of reserving fb_mmio on Gen2 VMs
From: Dexuan Cui @ 2026-04-16 19:58 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org,
	Long Li, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, mhklinux@outlook.com,
	matthew.ruffell@canonical.com, johansen@templeofstupid.com
  Cc: stable@vger.kernel.org
In-Reply-To: <20260416183529.838321-1-decui@microsoft.com>

> Subject: [PATCH] Drivers: hv: vmbus: Improve the logc of reserving fb_mmio on
> Gen2 VMs

Sorry for the typo in the subject -- the "logc" should be "logic". If this is the only
issue, I guess Wei can fix it for me :-)


^ permalink raw reply

* RE: [EXTERNAL] Re: [PATCH net] hv_sock: Report EOF instead of -EIO for FIN
From: Dexuan Cui @ 2026-04-16 19:30 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org, Long Li,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, horms@kernel.org, niuxuewei.nxw@antgroup.com,
	linux-hyperv@vger.kernel.org, virtualization@lists.linux.dev,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	stable@vger.kernel.org, Ben Hillis, Mitchell Levy
In-Reply-To: <SA1PR21MB6921C57E27E17305E56BC0F9BF222@SA1PR21MB6921.namprd21.prod.outlook.com>

> From: Dexuan Cui
> Sent: Wednesday, April 15, 2026 9:56 AM
> To: 'Stefano Garzarella' <sgarzare@redhat.com>
> > ...
> > Can we drop `need_refill` entirly and just check `hvs->recv_desc` here?
> 
> OK. Will post v2 later today.
> 
> > Mainly because now the comment we are adding is confusing me about what
> > `need_refill` means.
> >
> > The rest LGTM.
> >
> > Thanks,
> > Stefano

Hi Stefano, I just posted v2 here:
https://lore.kernel.org/linux-hyperv/20260416191433.840637-1-decui@microsoft.com/T/#u

^ permalink raw reply

* [PATCH net v2] hv_sock: Report EOF instead of -EIO for FIN
From: Dexuan Cui @ 2026-04-16 19:14 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli, sgarzare, davem, edumazet,
	kuba, pabeni, horms, niuxuewei.nxw, linux-hyperv, virtualization,
	netdev, linux-kernel
  Cc: stable, Ben Hillis, Mitchell Levy

Commit f0c5827d07cb unluckily causes a regression for the FIN packet,
and the final read syscall gets an error rather than 0.

Ideally, we would want to fix hvs_channel_readable_payload() so that it
could return 0 in the FIN scenario, but it's not good for the hv_sock
driver to use the VMBus ringbuffer's cached priv_read_index, which is
internal data in the VMBus driver.

Fix the regression in hv_sock by returning 0 rather than -EIO.

Fixes: f0c5827d07cb ("hv_sock: Return the readable bytes in hvs_stream_has_data()")
Cc: stable@vger.kernel.org
Reported-by: Ben Hillis <Ben.Hillis@microsoft.com>
Reported-by: Mitchell Levy <levymitchell0@gmail.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---

Changes since v1:
    Removed the local variable 'need_refill' to make the code more 
    readable. Stefano, thanks!

    No other change.

 net/vmw_vsock/hyperv_transport.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 069386a74557..e5ee7aa14d0c 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -694,7 +694,6 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
 static s64 hvs_stream_has_data(struct vsock_sock *vsk)
 {
 	struct hvsock *hvs = vsk->trans;
-	bool need_refill;
 	s64 ret;
 
 	if (hvs->recv_data_len > 0)
@@ -702,9 +701,22 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk)
 
 	switch (hvs_channel_readable_payload(hvs->chan)) {
 	case 1:
-		need_refill = !hvs->recv_desc;
-		if (!need_refill)
-			return -EIO;
+		if (hvs->recv_desc) {
+			/* Here hvs->recv_data_len is 0, so hvs->recv_desc must
+			 * be NULL unless it points to the 0-byte-payload FIN
+			 * packet: see hvs_update_recv_data().
+			 *
+			 * Here all the payload has been dequeued, but
+			 * hvs_channel_readable_payload() still returns 1,
+			 * because the VMBus ringbuffer's read_index is not
+			 * updated for the FIN packet: hvs_stream_dequeue() ->
+			 * hv_pkt_iter_next() updates the cached priv_read_index
+			 * but has no opportunity to update the read_index in
+			 * hv_pkt_iter_close() as hvs_stream_has_data() returns
+			 * 0 for the FIN packet, so it won't get dequeued.
+			 */
+			return 0;
+		}
 
 		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
 		if (!hvs->recv_desc)
-- 
2.49.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox