public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] mshv: Extend create partition ioctl to support cpu features
@ 2025-11-11 23:19 Nuno Das Neves
  2025-11-12 16:27 ` Michael Kelley
  2025-11-17 23:58 ` Stanislav Kinsburskii
  0 siblings, 2 replies; 7+ messages in thread
From: Nuno Das Neves @ 2025-11-11 23:19 UTC (permalink / raw)
  To: linux-hyperv, linux-kernel, wei.liu, mhklinux
  Cc: kys, haiyangz, decui, longli, skinsburskii, prapal, mrathor,
	muislam, anrayabh, Jinank Jain, Nuno Das Neves

From: Muminul Islam <muislam@microsoft.com>

The existing mshv create partition ioctl does not provide a way to
specify which cpu features are enabled in the guest. Instead, it
attempts to enable all features and those that are not supported are
silently disabled by the hypervisor.

This was done to reduce unnecessary complexity and is sufficient for
many cases. However, new scenarios require fine-grained control over
these features.

Define a new mshv_create_partition_v2 structure which supports
passing the disabled processor and xsave feature bits through to the
create partition hypercall directly.

Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
the new structure. If unset, the original mshv_create_partition struct
is used, with the old behavior of enabling all features.

Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
Signed-off-by: Muminul Islam <muislam@microsoft.com>
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
Changes in v4:
- Change BIT() to BIT_ULL() [Michael Kelley]
- Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
  that number to never change. In future, additional processor banks
  will be settable as 'early' partition properties. Remove redundant
  code that set default values for unspecified banks [Michael Kelley]
- Set xsave features to 0 on arm64 [Michael Kelley]
- Add clarifying comments in a few places

Changes in v3:
- Remove the new cpu features definitions in hvhdk.h, and retain the
  old behavior of enabling all features for the old struct. For the v2
  struct, still disable unspecified feature banks, since that makes it
  robust to future extensions.
- Amend comments and commit message to reflect the above
- Fix unused variable on arm64 [kernel test robot]

Changes in v2:
- Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
- Fix compilation issue on arm64 [kernel test robot]
---
 drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
 include/uapi/linux/mshv.h   |  34 +++++++++++
 2 files changed, 126 insertions(+), 21 deletions(-)

diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index d542a0143bb8..9f9438289b60 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
 	return 0;
 }
 
-static long
-mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
+static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
+	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
+
+static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
+					struct hv_partition_creation_properties *cr_props,
+					union hv_partition_isolation_properties *isol_props)
 {
-	struct mshv_create_partition args;
-	u64 creation_flags;
-	struct hv_partition_creation_properties creation_properties = {};
-	union hv_partition_isolation_properties isolation_properties = {};
-	struct mshv_partition *partition;
-	struct file *file;
-	int fd;
-	long ret;
+	int i;
+	struct mshv_create_partition_v2 args;
+	union hv_partition_processor_features *disabled_procs;
+	union hv_partition_processor_xsave_features *disabled_xsave;
 
-	if (copy_from_user(&args, user_arg, sizeof(args)))
+	/* First, copy v1 struct in case user is on previous versions */
+	if (copy_from_user(&args, user_arg,
+			   sizeof(struct mshv_create_partition)))
 		return -EFAULT;
 
 	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
 	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
 		return -EINVAL;
 
+	disabled_procs = &cr_props->disabled_processor_features;
+	disabled_xsave = &cr_props->disabled_processor_xsave_features;
+
+	/* Check if user provided newer struct with feature fields */
+	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
+		if (copy_from_user(&args, user_arg, sizeof(args)))
+			return -EFAULT;
+
+		if (args.pt_num_cpu_fbanks != MSHV_NUM_CPU_FEATURES_BANKS ||
+		    mshv_field_nonzero(args, pt_rsvd) ||
+		    mshv_field_nonzero(args, pt_rsvd1))
+			return -EINVAL;
+
+		/*
+		 * Note this assumes MSHV_NUM_CPU_FEATURES_BANKS will never
+		 * change and equals HV_PARTITION_PROCESSOR_FEATURES_BANKS
+		 * (i.e. 2).
+		 *
+		 * Further banks (index >= 2) will be modifiable as 'early'
+		 * properties via the set partition property hypercall.
+		 */
+		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
+			disabled_procs->as_uint64[i] = args.pt_cpu_fbanks[i];
+
+#if IS_ENABLED(CONFIG_X86_64)
+		disabled_xsave->as_uint64 = args.pt_disabled_xsave;
+#else
+		/*
+		 * In practice this field is ignored on arm64, but safer to
+		 * zero it in case it is ever used.
+		 */
+		disabled_xsave->as_uint64 = 0;
+
+		if (mshv_field_nonzero(args, pt_rsvd2))
+			return -EINVAL;
+#endif
+	} else {
+		/*
+		 * v1 behavior: try to enable everything. The hypervisor will
+		 * disable features that are not supported. The banks can be
+		 * queried via the get partition property hypercall.
+		 */
+		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
+			disabled_procs->as_uint64[i] = 0;
+
+		disabled_xsave->as_uint64 = 0;
+	}
+
 	/* Only support EXO partitions */
-	creation_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
-			 HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
+	*pt_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
+		    HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
+
+	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_LAPIC))
+		*pt_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
+	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_X2APIC))
+		*pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
+	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES))
+		*pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
 
-	if (args.pt_flags & BIT(MSHV_PT_BIT_LAPIC))
-		creation_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
-	if (args.pt_flags & BIT(MSHV_PT_BIT_X2APIC))
-		creation_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
-	if (args.pt_flags & BIT(MSHV_PT_BIT_GPA_SUPER_PAGES))
-		creation_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
+	isol_props->as_uint64 = 0;
 
 	switch (args.pt_isolation) {
 	case MSHV_PT_ISOLATION_NONE:
-		isolation_properties.isolation_type =
-			HV_PARTITION_ISOLATION_TYPE_NONE;
+		isol_props->isolation_type = HV_PARTITION_ISOLATION_TYPE_NONE;
 		break;
 	}
 
+	return 0;
+}
+
+static long
+mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
+{
+	u64 creation_flags;
+	struct hv_partition_creation_properties creation_properties;
+	union hv_partition_isolation_properties isolation_properties;
+	struct mshv_partition *partition;
+	struct file *file;
+	int fd;
+	long ret;
+
+	ret = mshv_ioctl_process_pt_flags(user_arg, &creation_flags,
+					  &creation_properties,
+					  &isolation_properties);
+	if (ret)
+		return ret;
+
 	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
 	if (!partition)
 		return -ENOMEM;
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index 876bfe4e4227..cf904f3aa201 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -26,6 +26,7 @@ enum {
 	MSHV_PT_BIT_LAPIC,
 	MSHV_PT_BIT_X2APIC,
 	MSHV_PT_BIT_GPA_SUPER_PAGES,
+	MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES,
 	MSHV_PT_BIT_COUNT,
 };
 
@@ -41,6 +42,8 @@ enum {
  * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
  * @pt_isolation: MSHV_PT_ISOLATION_*
  *
+ * This is the initial/v1 version for backward compatibility.
+ *
  * Returns a file descriptor to act as a handle to a guest partition.
  * At this point the partition is not yet initialized in the hypervisor.
  * Some operations must be done with the partition in this state, e.g. setting
@@ -52,6 +55,37 @@ struct mshv_create_partition {
 	__u64 pt_isolation;
 };
 
+#define MSHV_NUM_CPU_FEATURES_BANKS 2
+
+/**
+ * struct mshv_create_partition_v2
+ *
+ * This is extended version of the above initial MSHV_CREATE_PARTITION
+ * ioctl and allows for following additional parameters:
+ *
+ * @pt_num_cpu_fbanks: Must be set to MSHV_NUM_CPU_FEATURES_BANKS.
+ * @pt_cpu_fbanks: Disabled processor feature banks array.
+ * @pt_disabled_xsave: Disabled xsave feature bits.
+ *
+ * pt_cpu_fbanks and pt_disabled_xsave are passed through as-is to the create
+ * partition hypercall.
+ *
+ * Returns : same as above original mshv_create_partition
+ */
+struct mshv_create_partition_v2 {
+	__u64 pt_flags;
+	__u64 pt_isolation;
+	__u16 pt_num_cpu_fbanks;
+	__u8  pt_rsvd[6];		/* MBZ */
+	__u64 pt_cpu_fbanks[MSHV_NUM_CPU_FEATURES_BANKS];
+	__u64 pt_rsvd1[2];		/* MBZ */
+#if defined(__x86_64__)
+	__u64 pt_disabled_xsave;
+#else
+	__u64 pt_rsvd2;			/* MBZ */
+#endif
+} __packed;
+
 /* /dev/mshv */
 #define MSHV_CREATE_PARTITION	_IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [PATCH v4] mshv: Extend create partition ioctl to support cpu features
  2025-11-11 23:19 [PATCH v4] mshv: Extend create partition ioctl to support cpu features Nuno Das Neves
@ 2025-11-12 16:27 ` Michael Kelley
  2025-11-13 18:47   ` Wei Liu
  2025-11-17 23:58 ` Stanislav Kinsburskii
  1 sibling, 1 reply; 7+ messages in thread
From: Michael Kelley @ 2025-11-12 16:27 UTC (permalink / raw)
  To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, wei.liu@kernel.org
  Cc: kys@microsoft.com, haiyangz@microsoft.com, decui@microsoft.com,
	longli@microsoft.com, skinsburskii@linux.microsoft.com,
	prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
	muislam@microsoft.com, anrayabh@linux.microsoft.com, Jinank Jain

From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Tuesday, November 11, 2025 3:20 PM
> 
> The existing mshv create partition ioctl does not provide a way to
> specify which cpu features are enabled in the guest. Instead, it
> attempts to enable all features and those that are not supported are
> silently disabled by the hypervisor.
> 
> This was done to reduce unnecessary complexity and is sufficient for
> many cases. However, new scenarios require fine-grained control over
> these features.
> 
> Define a new mshv_create_partition_v2 structure which supports
> passing the disabled processor and xsave feature bits through to the
> create partition hypercall directly.
> 
> Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
> the new structure. If unset, the original mshv_create_partition struct
> is used, with the old behavior of enabling all features.
> 
> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
> Signed-off-by: Muminul Islam <muislam@microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
> Changes in v4:
> - Change BIT() to BIT_ULL() [Michael Kelley]
> - Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
>   that number to never change. In future, additional processor banks
>   will be settable as 'early' partition properties. Remove redundant
>   code that set default values for unspecified banks [Michael Kelley]
> - Set xsave features to 0 on arm64 [Michael Kelley]
> - Add clarifying comments in a few places
> 
> Changes in v3:
> - Remove the new cpu features definitions in hvhdk.h, and retain the
>   old behavior of enabling all features for the old struct. For the v2
>   struct, still disable unspecified feature banks, since that makes it
>   robust to future extensions.
> - Amend comments and commit message to reflect the above
> - Fix unused variable on arm64 [kernel test robot]
> 
> Changes in v2:
> - Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
> - Fix compilation issue on arm64 [kernel test robot]
> ---
>  drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
>  include/uapi/linux/mshv.h   |  34 +++++++++++
>  2 files changed, 126 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index d542a0143bb8..9f9438289b60 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
>  	return 0;
>  }
> 
> -static long
> -mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> +static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
> +	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
> +
> +static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
> +					struct hv_partition_creation_properties *cr_props,
> +					union hv_partition_isolation_properties *isol_props)
>  {
> -	struct mshv_create_partition args;
> -	u64 creation_flags;
> -	struct hv_partition_creation_properties creation_properties = {};
> -	union hv_partition_isolation_properties isolation_properties = {};
> -	struct mshv_partition *partition;
> -	struct file *file;
> -	int fd;
> -	long ret;
> +	int i;
> +	struct mshv_create_partition_v2 args;
> +	union hv_partition_processor_features *disabled_procs;
> +	union hv_partition_processor_xsave_features *disabled_xsave;
> 
> -	if (copy_from_user(&args, user_arg, sizeof(args)))
> +	/* First, copy v1 struct in case user is on previous versions */
> +	if (copy_from_user(&args, user_arg,
> +			   sizeof(struct mshv_create_partition)))
>  		return -EFAULT;
> 
>  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
>  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
>  		return -EINVAL;
> 
> +	disabled_procs = &cr_props->disabled_processor_features;
> +	disabled_xsave = &cr_props->disabled_processor_xsave_features;
> +
> +	/* Check if user provided newer struct with feature fields */
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
> +		if (copy_from_user(&args, user_arg, sizeof(args)))
> +			return -EFAULT;

There's subtle issue here that I didn't notice previously. This second copy_from_user()
re-populates the first two fields of the "args" local variable. These two fields were
validated by code a few lines above. But there's no guarantee that a second read of
user space will get the same values. User space could have another thread that
changes the user space values between the two copy_from_user() calls, and thereby
sneak in some bogus values to be used further down in this function. Because of
this risk, there's a general rule for kernel code, which is to avoid multiple accesses to
the same user space values. There are places in the kernel where such double reads
would be an exploitable security hole.

The fix would be to validate the pt_flags and pt_isolation fields again, or to have the
second copy_from_user copy only the additional fields. But it's also the case that the
way the pt_flags and pt_isolation fields are used further down in this function,
nothing bad can happen if malicious user space should succeed in sneaking in some
bogus values.

Net, as currently coded, there's nothing that needs to be fixed. It would be more
robust to do one of the two fixes, if for no other reason than to acknowledge
awareness of the risk of reading user space twice. But I'm not going to insist
on a respin.

> +
> +		if (args.pt_num_cpu_fbanks != MSHV_NUM_CPU_FEATURES_BANKS ||
> +		    mshv_field_nonzero(args, pt_rsvd) ||
> +		    mshv_field_nonzero(args, pt_rsvd1))
> +			return -EINVAL;
> +
> +		/*
> +		 * Note this assumes MSHV_NUM_CPU_FEATURES_BANKS will never
> +		 * change and equals HV_PARTITION_PROCESSOR_FEATURES_BANKS
> +		 * (i.e. 2).
> +		 *
> +		 * Further banks (index >= 2) will be modifiable as 'early'
> +		 * properties via the set partition property hypercall.
> +		 */
> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
> +			disabled_procs->as_uint64[i] = args.pt_cpu_fbanks[i];
> +
> +#if IS_ENABLED(CONFIG_X86_64)
> +		disabled_xsave->as_uint64 = args.pt_disabled_xsave;
> +#else
> +		/*
> +		 * In practice this field is ignored on arm64, but safer to
> +		 * zero it in case it is ever used.
> +		 */
> +		disabled_xsave->as_uint64 = 0;
> +
> +		if (mshv_field_nonzero(args, pt_rsvd2))
> +			return -EINVAL;
> +#endif
> +	} else {
> +		/*
> +		 * v1 behavior: try to enable everything. The hypervisor will
> +		 * disable features that are not supported. The banks can be
> +		 * queried via the get partition property hypercall.
> +		 */
> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
> +			disabled_procs->as_uint64[i] = 0;
> +
> +		disabled_xsave->as_uint64 = 0;
> +	}
> +
>  	/* Only support EXO partitions */
> -	creation_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
> -			 HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
> +	*pt_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
> +			 HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
> +
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_LAPIC))
> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_X2APIC))
> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES))
> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
> 
> -	if (args.pt_flags & BIT(MSHV_PT_BIT_LAPIC))
> -		creation_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
> -	if (args.pt_flags & BIT(MSHV_PT_BIT_X2APIC))
> -		creation_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
> -	if (args.pt_flags & BIT(MSHV_PT_BIT_GPA_SUPER_PAGES))
> -		creation_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
> +	isol_props->as_uint64 = 0;
> 
>  	switch (args.pt_isolation) {
>  	case MSHV_PT_ISOLATION_NONE:
> -		isolation_properties.isolation_type =
> -			HV_PARTITION_ISOLATION_TYPE_NONE;
> +		isol_props->isolation_type = HV_PARTITION_ISOLATION_TYPE_NONE;
>  		break;
>  	}
> 
> +	return 0;
> +}
> +
> +static long
> +mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> +{
> +	u64 creation_flags;
> +	struct hv_partition_creation_properties creation_properties;
> +	union hv_partition_isolation_properties isolation_properties;
> +	struct mshv_partition *partition;
> +	struct file *file;
> +	int fd;
> +	long ret;
> +
> +	ret = mshv_ioctl_process_pt_flags(user_arg, &creation_flags,
> +					  &creation_properties,
> +					  &isolation_properties);
> +	if (ret)
> +		return ret;
> +
>  	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
>  	if (!partition)
>  		return -ENOMEM;
> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
> index 876bfe4e4227..cf904f3aa201 100644
> --- a/include/uapi/linux/mshv.h
> +++ b/include/uapi/linux/mshv.h
> @@ -26,6 +26,7 @@ enum {
>  	MSHV_PT_BIT_LAPIC,
>  	MSHV_PT_BIT_X2APIC,
>  	MSHV_PT_BIT_GPA_SUPER_PAGES,
> +	MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES,
>  	MSHV_PT_BIT_COUNT,
>  };
> 
> @@ -41,6 +42,8 @@ enum {
>   * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
>   * @pt_isolation: MSHV_PT_ISOLATION_*
>   *
> + * This is the initial/v1 version for backward compatibility.
> + *
>   * Returns a file descriptor to act as a handle to a guest partition.
>   * At this point the partition is not yet initialized in the hypervisor.
>   * Some operations must be done with the partition in this state, e.g. setting
> @@ -52,6 +55,37 @@ struct mshv_create_partition {
>  	__u64 pt_isolation;
>  };
> 
> +#define MSHV_NUM_CPU_FEATURES_BANKS 2
> +
> +/**
> + * struct mshv_create_partition_v2
> + *
> + * This is extended version of the above initial MSHV_CREATE_PARTITION
> + * ioctl and allows for following additional parameters:
> + *
> + * @pt_num_cpu_fbanks: Must be set to MSHV_NUM_CPU_FEATURES_BANKS.
> + * @pt_cpu_fbanks: Disabled processor feature banks array.
> + * @pt_disabled_xsave: Disabled xsave feature bits.
> + *
> + * pt_cpu_fbanks and pt_disabled_xsave are passed through as-is to the create
> + * partition hypercall.
> + *
> + * Returns : same as above original mshv_create_partition
> + */
> +struct mshv_create_partition_v2 {
> +	__u64 pt_flags;
> +	__u64 pt_isolation;
> +	__u16 pt_num_cpu_fbanks;
> +	__u8  pt_rsvd[6];		/* MBZ */
> +	__u64 pt_cpu_fbanks[MSHV_NUM_CPU_FEATURES_BANKS];
> +	__u64 pt_rsvd1[2];		/* MBZ */
> +#if defined(__x86_64__)
> +	__u64 pt_disabled_xsave;
> +#else
> +	__u64 pt_rsvd2;			/* MBZ */
> +#endif
> +} __packed;
> +
>  /* /dev/mshv */
>  #define MSHV_CREATE_PARTITION	_IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
> 
> --
> 2.34.1

Other than the double read of user space, LGTM.

Reviewed-by: Michael Kelley <mhklinux@outlook.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] mshv: Extend create partition ioctl to support cpu features
  2025-11-12 16:27 ` Michael Kelley
@ 2025-11-13 18:47   ` Wei Liu
  2025-11-13 19:11     ` Nuno Das Neves
  0 siblings, 1 reply; 7+ messages in thread
From: Wei Liu @ 2025-11-13 18:47 UTC (permalink / raw)
  To: Michael Kelley
  Cc: Nuno Das Neves, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, wei.liu@kernel.org,
	kys@microsoft.com, haiyangz@microsoft.com, decui@microsoft.com,
	longli@microsoft.com, skinsburskii@linux.microsoft.com,
	prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
	muislam@microsoft.com, anrayabh@linux.microsoft.com, Jinank Jain

On Wed, Nov 12, 2025 at 04:27:05PM +0000, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Tuesday, November 11, 2025 3:20 PM
> > 
> > The existing mshv create partition ioctl does not provide a way to
> > specify which cpu features are enabled in the guest. Instead, it
> > attempts to enable all features and those that are not supported are
> > silently disabled by the hypervisor.
> > 
> > This was done to reduce unnecessary complexity and is sufficient for
> > many cases. However, new scenarios require fine-grained control over
> > these features.
> > 
> > Define a new mshv_create_partition_v2 structure which supports
> > passing the disabled processor and xsave feature bits through to the
> > create partition hypercall directly.
> > 
> > Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
> > the new structure. If unset, the original mshv_create_partition struct
> > is used, with the old behavior of enabling all features.
> > 
> > Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
> > Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
> > Signed-off-by: Muminul Islam <muislam@microsoft.com>
> > Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> > ---
> > Changes in v4:
> > - Change BIT() to BIT_ULL() [Michael Kelley]
> > - Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
> >   that number to never change. In future, additional processor banks
> >   will be settable as 'early' partition properties. Remove redundant
> >   code that set default values for unspecified banks [Michael Kelley]
> > - Set xsave features to 0 on arm64 [Michael Kelley]
> > - Add clarifying comments in a few places
> > 
> > Changes in v3:
> > - Remove the new cpu features definitions in hvhdk.h, and retain the
> >   old behavior of enabling all features for the old struct. For the v2
> >   struct, still disable unspecified feature banks, since that makes it
> >   robust to future extensions.
> > - Amend comments and commit message to reflect the above
> > - Fix unused variable on arm64 [kernel test robot]
> > 
> > Changes in v2:
> > - Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
> > - Fix compilation issue on arm64 [kernel test robot]
> > ---
> >  drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
> >  include/uapi/linux/mshv.h   |  34 +++++++++++
> >  2 files changed, 126 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> > index d542a0143bb8..9f9438289b60 100644
> > --- a/drivers/hv/mshv_root_main.c
> > +++ b/drivers/hv/mshv_root_main.c
> > @@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
> >  	return 0;
> >  }
> > 
> > -static long
> > -mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> > +static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
> > +	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
> > +
> > +static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
> > +					struct hv_partition_creation_properties *cr_props,
> > +					union hv_partition_isolation_properties *isol_props)
> >  {
> > -	struct mshv_create_partition args;
> > -	u64 creation_flags;
> > -	struct hv_partition_creation_properties creation_properties = {};
> > -	union hv_partition_isolation_properties isolation_properties = {};
> > -	struct mshv_partition *partition;
> > -	struct file *file;
> > -	int fd;
> > -	long ret;
> > +	int i;
> > +	struct mshv_create_partition_v2 args;
> > +	union hv_partition_processor_features *disabled_procs;
> > +	union hv_partition_processor_xsave_features *disabled_xsave;
> > 
> > -	if (copy_from_user(&args, user_arg, sizeof(args)))
> > +	/* First, copy v1 struct in case user is on previous versions */
> > +	if (copy_from_user(&args, user_arg,
> > +			   sizeof(struct mshv_create_partition)))
> >  		return -EFAULT;
> > 
> >  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
> >  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
> >  		return -EINVAL;
> > 
> > +	disabled_procs = &cr_props->disabled_processor_features;
> > +	disabled_xsave = &cr_props->disabled_processor_xsave_features;
> > +
> > +	/* Check if user provided newer struct with feature fields */
> > +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
> > +		if (copy_from_user(&args, user_arg, sizeof(args)))
> > +			return -EFAULT;
> 
> There's subtle issue here that I didn't notice previously. This second copy_from_user()
> re-populates the first two fields of the "args" local variable. These two fields were
> validated by code a few lines above. But there's no guarantee that a second read of
> user space will get the same values. User space could have another thread that
> changes the user space values between the two copy_from_user() calls, and thereby
> sneak in some bogus values to be used further down in this function. Because of
> this risk, there's a general rule for kernel code, which is to avoid multiple accesses to
> the same user space values. There are places in the kernel where such double reads
> would be an exploitable security hole.
> 
> The fix would be to validate the pt_flags and pt_isolation fields again, or to have the
> second copy_from_user copy only the additional fields. But it's also the case that the
> way the pt_flags and pt_isolation fields are used further down in this function,
> nothing bad can happen if malicious user space should succeed in sneaking in some
> bogus values.
> 
> Net, as currently coded, there's nothing that needs to be fixed. It would be more
> robust to do one of the two fixes, if for no other reason than to acknowledge
> awareness of the risk of reading user space twice. But I'm not going to insist
> on a respin.

Nuno, I can commit this patch first. If you can post a diff later I can
squash it in.

	/* Re-validate fields after the second copy_from_user */
  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
  		return -EINVAL;

Perhaps something like this after the second copy_from_user()?

>> Other than the double read of user space, LGTM.
> 
> Reviewed-by: Michael Kelley <mhklinux@outlook.com>

Thank you for the detailed review!

Wei

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] mshv: Extend create partition ioctl to support cpu features
  2025-11-13 18:47   ` Wei Liu
@ 2025-11-13 19:11     ` Nuno Das Neves
  2025-11-13 19:44       ` Wei Liu
  0 siblings, 1 reply; 7+ messages in thread
From: Nuno Das Neves @ 2025-11-13 19:11 UTC (permalink / raw)
  To: Wei Liu, Michael Kelley
  Cc: linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
	kys@microsoft.com, haiyangz@microsoft.com, decui@microsoft.com,
	longli@microsoft.com, skinsburskii@linux.microsoft.com,
	prapal@linux.microsoft.com, mrathor@linux.microsoft.com,
	muislam@microsoft.com, anrayabh@linux.microsoft.com, Jinank Jain

On 11/13/2025 10:47 AM, Wei Liu wrote:
> On Wed, Nov 12, 2025 at 04:27:05PM +0000, Michael Kelley wrote:
>> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Tuesday, November 11, 2025 3:20 PM
>>>
>>> The existing mshv create partition ioctl does not provide a way to
>>> specify which cpu features are enabled in the guest. Instead, it
>>> attempts to enable all features and those that are not supported are
>>> silently disabled by the hypervisor.
>>>
>>> This was done to reduce unnecessary complexity and is sufficient for
>>> many cases. However, new scenarios require fine-grained control over
>>> these features.
>>>
>>> Define a new mshv_create_partition_v2 structure which supports
>>> passing the disabled processor and xsave feature bits through to the
>>> create partition hypercall directly.
>>>
>>> Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
>>> the new structure. If unset, the original mshv_create_partition struct
>>> is used, with the old behavior of enabling all features.
>>>
>>> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
>>> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
>>> Signed-off-by: Muminul Islam <muislam@microsoft.com>
>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>>> ---
>>> Changes in v4:
>>> - Change BIT() to BIT_ULL() [Michael Kelley]
>>> - Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
>>>   that number to never change. In future, additional processor banks
>>>   will be settable as 'early' partition properties. Remove redundant
>>>   code that set default values for unspecified banks [Michael Kelley]
>>> - Set xsave features to 0 on arm64 [Michael Kelley]
>>> - Add clarifying comments in a few places
>>>
>>> Changes in v3:
>>> - Remove the new cpu features definitions in hvhdk.h, and retain the
>>>   old behavior of enabling all features for the old struct. For the v2
>>>   struct, still disable unspecified feature banks, since that makes it
>>>   robust to future extensions.
>>> - Amend comments and commit message to reflect the above
>>> - Fix unused variable on arm64 [kernel test robot]
>>>
>>> Changes in v2:
>>> - Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
>>> - Fix compilation issue on arm64 [kernel test robot]
>>> ---
>>>  drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
>>>  include/uapi/linux/mshv.h   |  34 +++++++++++
>>>  2 files changed, 126 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
>>> index d542a0143bb8..9f9438289b60 100644
>>> --- a/drivers/hv/mshv_root_main.c
>>> +++ b/drivers/hv/mshv_root_main.c
>>> @@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
>>>  	return 0;
>>>  }
>>>
>>> -static long
>>> -mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
>>> +static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
>>> +	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
>>> +
>>> +static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
>>> +					struct hv_partition_creation_properties *cr_props,
>>> +					union hv_partition_isolation_properties *isol_props)
>>>  {
>>> -	struct mshv_create_partition args;
>>> -	u64 creation_flags;
>>> -	struct hv_partition_creation_properties creation_properties = {};
>>> -	union hv_partition_isolation_properties isolation_properties = {};
>>> -	struct mshv_partition *partition;
>>> -	struct file *file;
>>> -	int fd;
>>> -	long ret;
>>> +	int i;
>>> +	struct mshv_create_partition_v2 args;
>>> +	union hv_partition_processor_features *disabled_procs;
>>> +	union hv_partition_processor_xsave_features *disabled_xsave;
>>>
>>> -	if (copy_from_user(&args, user_arg, sizeof(args)))
>>> +	/* First, copy v1 struct in case user is on previous versions */
>>> +	if (copy_from_user(&args, user_arg,
>>> +			   sizeof(struct mshv_create_partition)))
>>>  		return -EFAULT;
>>>
>>>  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
>>>  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
>>>  		return -EINVAL;
>>>
>>> +	disabled_procs = &cr_props->disabled_processor_features;
>>> +	disabled_xsave = &cr_props->disabled_processor_xsave_features;
>>> +
>>> +	/* Check if user provided newer struct with feature fields */
>>> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
>>> +		if (copy_from_user(&args, user_arg, sizeof(args)))
>>> +			return -EFAULT;
>>
>> There's subtle issue here that I didn't notice previously. This second copy_from_user()
>> re-populates the first two fields of the "args" local variable. These two fields were
>> validated by code a few lines above. But there's no guarantee that a second read of
>> user space will get the same values. User space could have another thread that
>> changes the user space values between the two copy_from_user() calls, and thereby
>> sneak in some bogus values to be used further down in this function. Because of
>> this risk, there's a general rule for kernel code, which is to avoid multiple accesses to
>> the same user space values. There are places in the kernel where such double reads
>> would be an exploitable security hole.
>>

Good catch Michael! It's something I had read about once before long ago but had forgotten.
I wonder if there's some kind of automation that could warn about potential issues - i.e.
copy_from_user() on the same pointer twice.

>> The fix would be to validate the pt_flags and pt_isolation fields again, or to have the
>> second copy_from_user copy only the additional fields. But it's also the case that the
>> way the pt_flags and pt_isolation fields are used further down in this function,
>> nothing bad can happen if malicious user space should succeed in sneaking in some
>> bogus values.
>>
>> Net, as currently coded, there's nothing that needs to be fixed. It would be more
>> robust to do one of the two fixes, if for no other reason than to acknowledge
>> awareness of the risk of reading user space twice. But I'm not going to insist
>> on a respin.
> 
> Nuno, I can commit this patch first. If you can post a diff later I can
> squash it in.

It might be easier if I just spin a v5 today? I'll send it soon.

> 
> 	/* Re-validate fields after the second copy_from_user */
>   	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
>   	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
>   		return -EINVAL;
> 
> Perhaps something like this after the second copy_from_user()?
> 

Yes, that sounds fine. I thought about just copying the second part
of the struct but re-checking those fields looks like a simpler and
less error-prone way to me. 

Nuno

>>> Other than the double read of user space, LGTM.
>>
>> Reviewed-by: Michael Kelley <mhklinux@outlook.com>
> 
> Thank you for the detailed review!
> 
> Wei


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] mshv: Extend create partition ioctl to support cpu features
  2025-11-13 19:11     ` Nuno Das Neves
@ 2025-11-13 19:44       ` Wei Liu
  0 siblings, 0 replies; 7+ messages in thread
From: Wei Liu @ 2025-11-13 19:44 UTC (permalink / raw)
  To: Nuno Das Neves
  Cc: Wei Liu, Michael Kelley, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, kys@microsoft.com,
	haiyangz@microsoft.com, decui@microsoft.com, longli@microsoft.com,
	skinsburskii@linux.microsoft.com, prapal@linux.microsoft.com,
	mrathor@linux.microsoft.com, muislam@microsoft.com,
	anrayabh@linux.microsoft.com, Jinank Jain

On Thu, Nov 13, 2025 at 11:11:57AM -0800, Nuno Das Neves wrote:
> On 11/13/2025 10:47 AM, Wei Liu wrote:
> > On Wed, Nov 12, 2025 at 04:27:05PM +0000, Michael Kelley wrote:
> >> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Tuesday, November 11, 2025 3:20 PM
> >>>
> >>> The existing mshv create partition ioctl does not provide a way to
> >>> specify which cpu features are enabled in the guest. Instead, it
> >>> attempts to enable all features and those that are not supported are
> >>> silently disabled by the hypervisor.
> >>>
> >>> This was done to reduce unnecessary complexity and is sufficient for
> >>> many cases. However, new scenarios require fine-grained control over
> >>> these features.
> >>>
> >>> Define a new mshv_create_partition_v2 structure which supports
> >>> passing the disabled processor and xsave feature bits through to the
> >>> create partition hypercall directly.
> >>>
> >>> Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
> >>> the new structure. If unset, the original mshv_create_partition struct
> >>> is used, with the old behavior of enabling all features.
> >>>
> >>> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
> >>> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
> >>> Signed-off-by: Muminul Islam <muislam@microsoft.com>
> >>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> >>> ---
> >>> Changes in v4:
> >>> - Change BIT() to BIT_ULL() [Michael Kelley]
> >>> - Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
> >>>   that number to never change. In future, additional processor banks
> >>>   will be settable as 'early' partition properties. Remove redundant
> >>>   code that set default values for unspecified banks [Michael Kelley]
> >>> - Set xsave features to 0 on arm64 [Michael Kelley]
> >>> - Add clarifying comments in a few places
> >>>
> >>> Changes in v3:
> >>> - Remove the new cpu features definitions in hvhdk.h, and retain the
> >>>   old behavior of enabling all features for the old struct. For the v2
> >>>   struct, still disable unspecified feature banks, since that makes it
> >>>   robust to future extensions.
> >>> - Amend comments and commit message to reflect the above
> >>> - Fix unused variable on arm64 [kernel test robot]
> >>>
> >>> Changes in v2:
> >>> - Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
> >>> - Fix compilation issue on arm64 [kernel test robot]
> >>> ---
> >>>  drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
> >>>  include/uapi/linux/mshv.h   |  34 +++++++++++
> >>>  2 files changed, 126 insertions(+), 21 deletions(-)
> >>>
> >>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> >>> index d542a0143bb8..9f9438289b60 100644
> >>> --- a/drivers/hv/mshv_root_main.c
> >>> +++ b/drivers/hv/mshv_root_main.c
> >>> @@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
> >>>  	return 0;
> >>>  }
> >>>
> >>> -static long
> >>> -mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> >>> +static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
> >>> +	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
> >>> +
> >>> +static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
> >>> +					struct hv_partition_creation_properties *cr_props,
> >>> +					union hv_partition_isolation_properties *isol_props)
> >>>  {
> >>> -	struct mshv_create_partition args;
> >>> -	u64 creation_flags;
> >>> -	struct hv_partition_creation_properties creation_properties = {};
> >>> -	union hv_partition_isolation_properties isolation_properties = {};
> >>> -	struct mshv_partition *partition;
> >>> -	struct file *file;
> >>> -	int fd;
> >>> -	long ret;
> >>> +	int i;
> >>> +	struct mshv_create_partition_v2 args;
> >>> +	union hv_partition_processor_features *disabled_procs;
> >>> +	union hv_partition_processor_xsave_features *disabled_xsave;
> >>>
> >>> -	if (copy_from_user(&args, user_arg, sizeof(args)))
> >>> +	/* First, copy v1 struct in case user is on previous versions */
> >>> +	if (copy_from_user(&args, user_arg,
> >>> +			   sizeof(struct mshv_create_partition)))
> >>>  		return -EFAULT;
> >>>
> >>>  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
> >>>  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
> >>>  		return -EINVAL;
> >>>
> >>> +	disabled_procs = &cr_props->disabled_processor_features;
> >>> +	disabled_xsave = &cr_props->disabled_processor_xsave_features;
> >>> +
> >>> +	/* Check if user provided newer struct with feature fields */
> >>> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
> >>> +		if (copy_from_user(&args, user_arg, sizeof(args)))
> >>> +			return -EFAULT;
> >>
> >> There's subtle issue here that I didn't notice previously. This second copy_from_user()
> >> re-populates the first two fields of the "args" local variable. These two fields were
> >> validated by code a few lines above. But there's no guarantee that a second read of
> >> user space will get the same values. User space could have another thread that
> >> changes the user space values between the two copy_from_user() calls, and thereby
> >> sneak in some bogus values to be used further down in this function. Because of
> >> this risk, there's a general rule for kernel code, which is to avoid multiple accesses to
> >> the same user space values. There are places in the kernel where such double reads
> >> would be an exploitable security hole.
> >>
> 
> Good catch Michael! It's something I had read about once before long ago but had forgotten.
> I wonder if there's some kind of automation that could warn about potential issues - i.e.
> copy_from_user() on the same pointer twice.
> 
> >> The fix would be to validate the pt_flags and pt_isolation fields again, or to have the
> >> second copy_from_user copy only the additional fields. But it's also the case that the
> >> way the pt_flags and pt_isolation fields are used further down in this function,
> >> nothing bad can happen if malicious user space should succeed in sneaking in some
> >> bogus values.
> >>
> >> Net, as currently coded, there's nothing that needs to be fixed. It would be more
> >> robust to do one of the two fixes, if for no other reason than to acknowledge
> >> awareness of the risk of reading user space twice. But I'm not going to insist
> >> on a respin.
> > 
> > Nuno, I can commit this patch first. If you can post a diff later I can
> > squash it in.
> 
> It might be easier if I just spin a v5 today? I'll send it soon.

Yes, please resend. Thanks.

Wei

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] mshv: Extend create partition ioctl to support cpu features
  2025-11-11 23:19 [PATCH v4] mshv: Extend create partition ioctl to support cpu features Nuno Das Neves
  2025-11-12 16:27 ` Michael Kelley
@ 2025-11-17 23:58 ` Stanislav Kinsburskii
  2025-11-18 22:49   ` Nuno Das Neves
  1 sibling, 1 reply; 7+ messages in thread
From: Stanislav Kinsburskii @ 2025-11-17 23:58 UTC (permalink / raw)
  To: Nuno Das Neves
  Cc: linux-hyperv, linux-kernel, wei.liu, mhklinux, kys, haiyangz,
	decui, longli, prapal, mrathor, muislam, anrayabh, Jinank Jain

On Tue, Nov 11, 2025 at 03:19:54PM -0800, Nuno Das Neves wrote:
> From: Muminul Islam <muislam@microsoft.com>
> 
> The existing mshv create partition ioctl does not provide a way to
> specify which cpu features are enabled in the guest. Instead, it
> attempts to enable all features and those that are not supported are
> silently disabled by the hypervisor.
> 
> This was done to reduce unnecessary complexity and is sufficient for
> many cases. However, new scenarios require fine-grained control over
> these features.
> 
> Define a new mshv_create_partition_v2 structure which supports
> passing the disabled processor and xsave feature bits through to the
> create partition hypercall directly.
> 
> Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
> the new structure. If unset, the original mshv_create_partition struct
> is used, with the old behavior of enabling all features.
> 
> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
> Signed-off-by: Muminul Islam <muislam@microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
> Changes in v4:
> - Change BIT() to BIT_ULL() [Michael Kelley]
> - Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
>   that number to never change. In future, additional processor banks
>   will be settable as 'early' partition properties. Remove redundant
>   code that set default values for unspecified banks [Michael Kelley]
> - Set xsave features to 0 on arm64 [Michael Kelley]
> - Add clarifying comments in a few places
> 
> Changes in v3:
> - Remove the new cpu features definitions in hvhdk.h, and retain the
>   old behavior of enabling all features for the old struct. For the v2
>   struct, still disable unspecified feature banks, since that makes it
>   robust to future extensions.
> - Amend comments and commit message to reflect the above
> - Fix unused variable on arm64 [kernel test robot]
> 
> Changes in v2:
> - Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
> - Fix compilation issue on arm64 [kernel test robot]
> ---
>  drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
>  include/uapi/linux/mshv.h   |  34 +++++++++++
>  2 files changed, 126 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index d542a0143bb8..9f9438289b60 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
>  	return 0;
>  }
>  
> -static long
> -mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> +static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
> +	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
> +
> +static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
> +					struct hv_partition_creation_properties *cr_props,
> +					union hv_partition_isolation_properties *isol_props)
>  {
> -	struct mshv_create_partition args;
> -	u64 creation_flags;
> -	struct hv_partition_creation_properties creation_properties = {};
> -	union hv_partition_isolation_properties isolation_properties = {};
> -	struct mshv_partition *partition;
> -	struct file *file;
> -	int fd;
> -	long ret;
> +	int i;
> +	struct mshv_create_partition_v2 args;
> +	union hv_partition_processor_features *disabled_procs;
> +	union hv_partition_processor_xsave_features *disabled_xsave;
>  
> -	if (copy_from_user(&args, user_arg, sizeof(args)))
> +	/* First, copy v1 struct in case user is on previous versions */
> +	if (copy_from_user(&args, user_arg,
> +			   sizeof(struct mshv_create_partition)))
>  		return -EFAULT;
>  
>  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
>  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
>  		return -EINVAL;
>  
> +	disabled_procs = &cr_props->disabled_processor_features;
> +	disabled_xsave = &cr_props->disabled_processor_xsave_features;
> +
> +	/* Check if user provided newer struct with feature fields */
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
> +		if (copy_from_user(&args, user_arg, sizeof(args)))
> +			return -EFAULT;
> +
> +		if (args.pt_num_cpu_fbanks != MSHV_NUM_CPU_FEATURES_BANKS ||
> +		    mshv_field_nonzero(args, pt_rsvd) ||
> +		    mshv_field_nonzero(args, pt_rsvd1))
> +			return -EINVAL;
> +
> +		/*
> +		 * Note this assumes MSHV_NUM_CPU_FEATURES_BANKS will never
> +		 * change and equals HV_PARTITION_PROCESSOR_FEATURES_BANKS
> +		 * (i.e. 2).
> +		 *
> +		 * Further banks (index >= 2) will be modifiable as 'early'
> +		 * properties via the set partition property hypercall.
> +		 */
> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
> +			disabled_procs->as_uint64[i] = args.pt_cpu_fbanks[i];
> +
> +#if IS_ENABLED(CONFIG_X86_64)
> +		disabled_xsave->as_uint64 = args.pt_disabled_xsave;
> +#else
> +		/*
> +		 * In practice this field is ignored on arm64, but safer to
> +		 * zero it in case it is ever used.
> +		 */
> +		disabled_xsave->as_uint64 = 0;

Why not explicitly treat non-zero value as invalid here instead?
Isn't it always better than implicitly (and silently) zeroing it?

> +
> +		if (mshv_field_nonzero(args, pt_rsvd2))
> +			return -EINVAL;
> +#endif
> +	} else {
> +		/*
> +		 * v1 behavior: try to enable everything. The hypervisor will
> +		 * disable features that are not supported. The banks can be
> +		 * queried via the get partition property hypercall.
> +		 */
> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
> +			disabled_procs->as_uint64[i] = 0;
> +
> +		disabled_xsave->as_uint64 = 0;
> +	}
> +
>  	/* Only support EXO partitions */
> -	creation_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
> -			 HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
> +	*pt_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
> +		    HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
> +
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_LAPIC))
> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_X2APIC))
> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES))
> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
>  
> -	if (args.pt_flags & BIT(MSHV_PT_BIT_LAPIC))
> -		creation_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
> -	if (args.pt_flags & BIT(MSHV_PT_BIT_X2APIC))
> -		creation_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
> -	if (args.pt_flags & BIT(MSHV_PT_BIT_GPA_SUPER_PAGES))
> -		creation_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
> +	isol_props->as_uint64 = 0;

These properties are missing the nested one.
I'd recommend squeezing that change in this patch

Thanks,
Stanislav

>  
>  	switch (args.pt_isolation) {
>  	case MSHV_PT_ISOLATION_NONE:
> -		isolation_properties.isolation_type =
> -			HV_PARTITION_ISOLATION_TYPE_NONE;
> +		isol_props->isolation_type = HV_PARTITION_ISOLATION_TYPE_NONE;
>  		break;
>  	}
>  
> +	return 0;
> +}
> +
> +static long
> +mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
> +{
> +	u64 creation_flags;
> +	struct hv_partition_creation_properties creation_properties;
> +	union hv_partition_isolation_properties isolation_properties;
> +	struct mshv_partition *partition;
> +	struct file *file;
> +	int fd;
> +	long ret;
> +
> +	ret = mshv_ioctl_process_pt_flags(user_arg, &creation_flags,
> +					  &creation_properties,
> +					  &isolation_properties);
> +	if (ret)
> +		return ret;
> +
>  	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
>  	if (!partition)
>  		return -ENOMEM;
> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
> index 876bfe4e4227..cf904f3aa201 100644
> --- a/include/uapi/linux/mshv.h
> +++ b/include/uapi/linux/mshv.h
> @@ -26,6 +26,7 @@ enum {
>  	MSHV_PT_BIT_LAPIC,
>  	MSHV_PT_BIT_X2APIC,
>  	MSHV_PT_BIT_GPA_SUPER_PAGES,
> +	MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES,
>  	MSHV_PT_BIT_COUNT,
>  };
>  
> @@ -41,6 +42,8 @@ enum {
>   * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
>   * @pt_isolation: MSHV_PT_ISOLATION_*
>   *
> + * This is the initial/v1 version for backward compatibility.
> + *
>   * Returns a file descriptor to act as a handle to a guest partition.
>   * At this point the partition is not yet initialized in the hypervisor.
>   * Some operations must be done with the partition in this state, e.g. setting
> @@ -52,6 +55,37 @@ struct mshv_create_partition {
>  	__u64 pt_isolation;
>  };
>  
> +#define MSHV_NUM_CPU_FEATURES_BANKS 2
> +
> +/**
> + * struct mshv_create_partition_v2
> + *
> + * This is extended version of the above initial MSHV_CREATE_PARTITION
> + * ioctl and allows for following additional parameters:
> + *
> + * @pt_num_cpu_fbanks: Must be set to MSHV_NUM_CPU_FEATURES_BANKS.
> + * @pt_cpu_fbanks: Disabled processor feature banks array.
> + * @pt_disabled_xsave: Disabled xsave feature bits.
> + *
> + * pt_cpu_fbanks and pt_disabled_xsave are passed through as-is to the create
> + * partition hypercall.
> + *
> + * Returns : same as above original mshv_create_partition
> + */
> +struct mshv_create_partition_v2 {
> +	__u64 pt_flags;
> +	__u64 pt_isolation;
> +	__u16 pt_num_cpu_fbanks;
> +	__u8  pt_rsvd[6];		/* MBZ */
> +	__u64 pt_cpu_fbanks[MSHV_NUM_CPU_FEATURES_BANKS];
> +	__u64 pt_rsvd1[2];		/* MBZ */
> +#if defined(__x86_64__)
> +	__u64 pt_disabled_xsave;
> +#else
> +	__u64 pt_rsvd2;			/* MBZ */
> +#endif
> +} __packed;
> +
>  /* /dev/mshv */
>  #define MSHV_CREATE_PARTITION	_IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
>  
> -- 
> 2.34.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] mshv: Extend create partition ioctl to support cpu features
  2025-11-17 23:58 ` Stanislav Kinsburskii
@ 2025-11-18 22:49   ` Nuno Das Neves
  0 siblings, 0 replies; 7+ messages in thread
From: Nuno Das Neves @ 2025-11-18 22:49 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: linux-hyperv, linux-kernel, wei.liu, mhklinux, kys, haiyangz,
	decui, longli, prapal, mrathor, muislam, anrayabh, Jinank Jain

On 11/17/2025 3:58 PM, Stanislav Kinsburskii wrote:
> On Tue, Nov 11, 2025 at 03:19:54PM -0800, Nuno Das Neves wrote:
>> From: Muminul Islam <muislam@microsoft.com>
>>
>> The existing mshv create partition ioctl does not provide a way to
>> specify which cpu features are enabled in the guest. Instead, it
>> attempts to enable all features and those that are not supported are
>> silently disabled by the hypervisor.
>>
>> This was done to reduce unnecessary complexity and is sufficient for
>> many cases. However, new scenarios require fine-grained control over
>> these features.
>>
>> Define a new mshv_create_partition_v2 structure which supports
>> passing the disabled processor and xsave feature bits through to the
>> create partition hypercall directly.
>>
>> Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables
>> the new structure. If unset, the original mshv_create_partition struct
>> is used, with the old behavior of enabling all features.
>>
>> Co-developed-by: Jinank Jain <jinankjain@microsoft.com>
>> Signed-off-by: Jinank Jain <jinankjain@microsoft.com>
>> Signed-off-by: Muminul Islam <muislam@microsoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>> Changes in v4:
>> - Change BIT() to BIT_ULL() [Michael Kelley]
>> - Enforce pt_num_cpu_fbanks == MSHV_NUM_CPU_FEATURES_BANKS and expect
>>   that number to never change. In future, additional processor banks
>>   will be settable as 'early' partition properties. Remove redundant
>>   code that set default values for unspecified banks [Michael Kelley]
>> - Set xsave features to 0 on arm64 [Michael Kelley]
>> - Add clarifying comments in a few places
>>
>> Changes in v3:
>> - Remove the new cpu features definitions in hvhdk.h, and retain the
>>   old behavior of enabling all features for the old struct. For the v2
>>   struct, still disable unspecified feature banks, since that makes it
>>   robust to future extensions.
>> - Amend comments and commit message to reflect the above
>> - Fix unused variable on arm64 [kernel test robot]
>>
>> Changes in v2:
>> - Fix exposure of CONFIG_X86_64 to uapi [kernel test robot]
>> - Fix compilation issue on arm64 [kernel test robot]
>> ---
>>  drivers/hv/mshv_root_main.c | 113 +++++++++++++++++++++++++++++-------
>>  include/uapi/linux/mshv.h   |  34 +++++++++++
>>  2 files changed, 126 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
>> index d542a0143bb8..9f9438289b60 100644
>> --- a/drivers/hv/mshv_root_main.c
>> +++ b/drivers/hv/mshv_root_main.c
>> @@ -1900,43 +1900,114 @@ add_partition(struct mshv_partition *partition)
>>  	return 0;
>>  }
>>  
>> -static long
>> -mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
>> +static_assert(MSHV_NUM_CPU_FEATURES_BANKS ==
>> +	      HV_PARTITION_PROCESSOR_FEATURES_BANKS);
>> +
>> +static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
>> +					struct hv_partition_creation_properties *cr_props,
>> +					union hv_partition_isolation_properties *isol_props)
>>  {
>> -	struct mshv_create_partition args;
>> -	u64 creation_flags;
>> -	struct hv_partition_creation_properties creation_properties = {};
>> -	union hv_partition_isolation_properties isolation_properties = {};
>> -	struct mshv_partition *partition;
>> -	struct file *file;
>> -	int fd;
>> -	long ret;
>> +	int i;
>> +	struct mshv_create_partition_v2 args;
>> +	union hv_partition_processor_features *disabled_procs;
>> +	union hv_partition_processor_xsave_features *disabled_xsave;
>>  
>> -	if (copy_from_user(&args, user_arg, sizeof(args)))
>> +	/* First, copy v1 struct in case user is on previous versions */
>> +	if (copy_from_user(&args, user_arg,
>> +			   sizeof(struct mshv_create_partition)))
>>  		return -EFAULT;
>>  
>>  	if ((args.pt_flags & ~MSHV_PT_FLAGS_MASK) ||
>>  	    args.pt_isolation >= MSHV_PT_ISOLATION_COUNT)
>>  		return -EINVAL;
>>  
>> +	disabled_procs = &cr_props->disabled_processor_features;
>> +	disabled_xsave = &cr_props->disabled_processor_xsave_features;
>> +
>> +	/* Check if user provided newer struct with feature fields */
>> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES)) {
>> +		if (copy_from_user(&args, user_arg, sizeof(args)))
>> +			return -EFAULT;
>> +
>> +		if (args.pt_num_cpu_fbanks != MSHV_NUM_CPU_FEATURES_BANKS ||
>> +		    mshv_field_nonzero(args, pt_rsvd) ||
>> +		    mshv_field_nonzero(args, pt_rsvd1))
>> +			return -EINVAL;
>> +
>> +		/*
>> +		 * Note this assumes MSHV_NUM_CPU_FEATURES_BANKS will never
>> +		 * change and equals HV_PARTITION_PROCESSOR_FEATURES_BANKS
>> +		 * (i.e. 2).
>> +		 *
>> +		 * Further banks (index >= 2) will be modifiable as 'early'
>> +		 * properties via the set partition property hypercall.
>> +		 */
>> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
>> +			disabled_procs->as_uint64[i] = args.pt_cpu_fbanks[i];
>> +
>> +#if IS_ENABLED(CONFIG_X86_64)
>> +		disabled_xsave->as_uint64 = args.pt_disabled_xsave;
>> +#else
>> +		/*
>> +		 * In practice this field is ignored on arm64, but safer to
>> +		 * zero it in case it is ever used.
>> +		 */
>> +		disabled_xsave->as_uint64 = 0;
> 
> Why not explicitly treat non-zero value as invalid here instead?
> Isn't it always better than implicitly (and silently) zeroing it?
> 
This is setting the hypercall input field to zero, because it looks like
the hypervisor ignores it on arm64. That's not ideal but it isn't under
our control here so all we can do is zero it.

The ioctl argument which we *do* control lacks the xsave field on ARM64,
instead we have a reserved field which is checked below (pt_rsvd2).

>> +
>> +		if (mshv_field_nonzero(args, pt_rsvd2))
>> +			return -EINVAL;
>> +#endif
>> +	} else {
>> +		/*
>> +		 * v1 behavior: try to enable everything. The hypervisor will
>> +		 * disable features that are not supported. The banks can be
>> +		 * queried via the get partition property hypercall.
>> +		 */
>> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURES_BANKS; i++)
>> +			disabled_procs->as_uint64[i] = 0;
>> +
>> +		disabled_xsave->as_uint64 = 0;
>> +	}
>> +
>>  	/* Only support EXO partitions */
>> -	creation_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
>> -			 HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
>> +	*pt_flags = HV_PARTITION_CREATION_FLAG_EXO_PARTITION |
>> +		    HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED;
>> +
>> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_LAPIC))
>> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
>> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_X2APIC))
>> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
>> +	if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES))
>> +		*pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
>>  
>> -	if (args.pt_flags & BIT(MSHV_PT_BIT_LAPIC))
>> -		creation_flags |= HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED;
>> -	if (args.pt_flags & BIT(MSHV_PT_BIT_X2APIC))
>> -		creation_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
>> -	if (args.pt_flags & BIT(MSHV_PT_BIT_GPA_SUPER_PAGES))
>> -		creation_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
>> +	isol_props->as_uint64 = 0;
> 
> These properties are missing the nested one.
> I'd recommend squeezing that change in this patch
>
The nested flag is not introduced yet. It should be in a separate patch.

FYI, there is a v5 of this patch I posted on November 13.

Nuno

> Thanks,
> Stanislav
> 
>>  
>>  	switch (args.pt_isolation) {
>>  	case MSHV_PT_ISOLATION_NONE:
>> -		isolation_properties.isolation_type =
>> -			HV_PARTITION_ISOLATION_TYPE_NONE;
>> +		isol_props->isolation_type = HV_PARTITION_ISOLATION_TYPE_NONE;
>>  		break;
>>  	}
>>  
>> +	return 0;
>> +}
>> +
>> +static long
>> +mshv_ioctl_create_partition(void __user *user_arg, struct device *module_dev)
>> +{
>> +	u64 creation_flags;
>> +	struct hv_partition_creation_properties creation_properties;
>> +	union hv_partition_isolation_properties isolation_properties;
>> +	struct mshv_partition *partition;
>> +	struct file *file;
>> +	int fd;
>> +	long ret;
>> +
>> +	ret = mshv_ioctl_process_pt_flags(user_arg, &creation_flags,
>> +					  &creation_properties,
>> +					  &isolation_properties);
>> +	if (ret)
>> +		return ret;
>> +
>>  	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
>>  	if (!partition)
>>  		return -ENOMEM;
>> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
>> index 876bfe4e4227..cf904f3aa201 100644
>> --- a/include/uapi/linux/mshv.h
>> +++ b/include/uapi/linux/mshv.h
>> @@ -26,6 +26,7 @@ enum {
>>  	MSHV_PT_BIT_LAPIC,
>>  	MSHV_PT_BIT_X2APIC,
>>  	MSHV_PT_BIT_GPA_SUPER_PAGES,
>> +	MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES,
>>  	MSHV_PT_BIT_COUNT,
>>  };
>>  
>> @@ -41,6 +42,8 @@ enum {
>>   * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
>>   * @pt_isolation: MSHV_PT_ISOLATION_*
>>   *
>> + * This is the initial/v1 version for backward compatibility.
>> + *
>>   * Returns a file descriptor to act as a handle to a guest partition.
>>   * At this point the partition is not yet initialized in the hypervisor.
>>   * Some operations must be done with the partition in this state, e.g. setting
>> @@ -52,6 +55,37 @@ struct mshv_create_partition {
>>  	__u64 pt_isolation;
>>  };
>>  
>> +#define MSHV_NUM_CPU_FEATURES_BANKS 2
>> +
>> +/**
>> + * struct mshv_create_partition_v2
>> + *
>> + * This is extended version of the above initial MSHV_CREATE_PARTITION
>> + * ioctl and allows for following additional parameters:
>> + *
>> + * @pt_num_cpu_fbanks: Must be set to MSHV_NUM_CPU_FEATURES_BANKS.
>> + * @pt_cpu_fbanks: Disabled processor feature banks array.
>> + * @pt_disabled_xsave: Disabled xsave feature bits.
>> + *
>> + * pt_cpu_fbanks and pt_disabled_xsave are passed through as-is to the create
>> + * partition hypercall.
>> + *
>> + * Returns : same as above original mshv_create_partition
>> + */
>> +struct mshv_create_partition_v2 {
>> +	__u64 pt_flags;
>> +	__u64 pt_isolation;
>> +	__u16 pt_num_cpu_fbanks;
>> +	__u8  pt_rsvd[6];		/* MBZ */
>> +	__u64 pt_cpu_fbanks[MSHV_NUM_CPU_FEATURES_BANKS];
>> +	__u64 pt_rsvd1[2];		/* MBZ */
>> +#if defined(__x86_64__)
>> +	__u64 pt_disabled_xsave;
>> +#else
>> +	__u64 pt_rsvd2;			/* MBZ */
>> +#endif
>> +} __packed;
>> +
>>  /* /dev/mshv */
>>  #define MSHV_CREATE_PARTITION	_IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
>>  
>> -- 
>> 2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-11-18 22:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-11 23:19 [PATCH v4] mshv: Extend create partition ioctl to support cpu features Nuno Das Neves
2025-11-12 16:27 ` Michael Kelley
2025-11-13 18:47   ` Wei Liu
2025-11-13 19:11     ` Nuno Das Neves
2025-11-13 19:44       ` Wei Liu
2025-11-17 23:58 ` Stanislav Kinsburskii
2025-11-18 22:49   ` Nuno Das Neves

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox