[PATCH] mshv: Fix deposit memory in MSHV_ROOT

Linux-HyperV List
 help / color / mirror / Atom feed

* [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
@ 2025-10-16 19:53 Nuno Das Neves
  2025-10-17  1:12 ` Michael Kelley
  0 siblings, 1 reply; 6+ messages in thread
From: Nuno Das Neves @ 2025-10-16 19:53 UTC (permalink / raw)
  To: linux-hyperv, linux-kernel
  Cc: kys, haiyangz, wei.liu, decui, arnd, mhklinux, mrathor,
	skinsburskii, Nuno Das Neves

When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
-EAGAIN to userspace.

However, it's much easier and efficient if the driver simply deposits
memory on demand and immediately retries the hypercall as is done with
all the other hypercall helper functions.

But unlike those, in MSHV_ROOT_HVCALL the input is opaque to the
kernel. This is problematic for rep hypercalls, because the next part
of the input list can't be copied on each loop after depositing pages
(this was the original reason for returning -EAGAIN in this case).

Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
parameter. This solves the issue, allowing the deposit loop in
MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
partway through.

Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
 drivers/hv/mshv_root_main.c    | 52 ++++++++++++++++++++--------------
 include/asm-generic/mshyperv.h | 14 +++++++--
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index 9ae67c6e9f60..731ec8cbbd63 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -159,6 +159,7 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
 	unsigned int pages_order;
 	void *input_pg = NULL;
 	void *output_pg = NULL;
+	u16 reps_completed;
 
 	if (copy_from_user(&args, user_args, sizeof(args)))
 		return -EFAULT;
@@ -210,28 +211,35 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
 	 */
 	*(u64 *)input_pg = partition->pt_id;
 
-	if (args.reps)
-		status = hv_do_rep_hypercall(args.code, args.reps, 0,
-					     input_pg, output_pg);
-	else
-		status = hv_do_hypercall(args.code, input_pg, output_pg);
-
-	if (hv_result(status) == HV_STATUS_CALL_PENDING) {
-		if (is_async) {
-			mshv_async_hvcall_handler(partition, &status);
-		} else { /* Paranoia check. This shouldn't happen! */
-			ret = -EBADFD;
-			goto free_pages_out;
+	reps_completed = 0;
+	do {
+		if (args.reps) {
+			status = hv_do_rep_hypercall_ex(args.code, args.reps,
+							0, reps_completed,
+							input_pg, output_pg);
+			reps_completed = hv_repcomp(status);
+		} else {
+			status = hv_do_hypercall(args.code, input_pg, output_pg);
 		}
-	}
 
-	if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
-		ret = hv_call_deposit_pages(NUMA_NO_NODE, partition->pt_id, 1);
-		if (!ret)
-			ret = -EAGAIN;
-	} else if (!hv_result_success(status)) {
-		ret = hv_result_to_errno(status);
-	}
+		if (hv_result(status) == HV_STATUS_CALL_PENDING) {
+			if (is_async) {
+				mshv_async_hvcall_handler(partition, &status);
+			} else { /* Paranoia check. This shouldn't happen! */
+				ret = -EBADFD;
+				goto free_pages_out;
+			}
+		}
+
+		if (hv_result_success(status))
+			break;
+
+		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)
+			ret = hv_result_to_errno(status);
+		else
+			ret = hv_call_deposit_pages(NUMA_NO_NODE,
+						    partition->pt_id, 1);
+	} while (!ret);
 
 	/*
 	 * Always return the status and output data regardless of result.
@@ -240,11 +248,11 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
 	 * succeeded.
 	 */
 	args.status = hv_result(status);
-	args.reps = args.reps ? hv_repcomp(status) : 0;
+	args.reps = reps_completed;
 	if (copy_to_user(user_args, &args, sizeof(args)))
 		ret = -EFAULT;
 
-	if (output_pg &&
+	if (!ret && output_pg &&
 	    copy_to_user((void __user *)args.out_ptr, output_pg, args.out_sz))
 		ret = -EFAULT;
 
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index ebf458dbcf84..31a209f0e18f 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -128,8 +128,9 @@ static inline unsigned int hv_repcomp(u64 status)
  * Rep hypercalls. Callers of this functions are supposed to ensure that
  * rep_count and varhead_size comply with Hyper-V hypercall definition.
  */
-static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
-				      void *input, void *output)
+static inline u64 hv_do_rep_hypercall_ex(u16 code, u16 rep_count,
+					 u16 varhead_size, u16 rep_start,
+					 void *input, void *output)
 {
 	u64 control = code;
 	u64 status;
@@ -137,6 +138,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
 
 	control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
 	control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
+	control |= (u64)rep_start << HV_HYPERCALL_REP_START_OFFSET;
 
 	do {
 		status = hv_do_hypercall(control, input, output);
@@ -154,6 +156,14 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
 	return status;
 }
 
+/* For the typical case where rep_start is 0 */
+static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
+				      void *input, void *output)
+{
+	return hv_do_rep_hypercall_ex(code, rep_count, varhead_size, 0,
+				      input, output);
+}
+
 /* Generate the guest OS identifier as described in the Hyper-V TLFS */
 static inline u64 hv_generate_guest_id(u64 kernel_version)
 {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* RE: [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
  2025-10-16 19:53 [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL Nuno Das Neves
@ 2025-10-17  1:12 ` Michael Kelley
  2025-10-17 17:25   ` Nuno Das Neves
  0 siblings, 1 reply; 6+ messages in thread
From: Michael Kelley @ 2025-10-17  1:12 UTC (permalink / raw)
  To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org
  Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, arnd@arndb.de, mrathor@linux.microsoft.com,
	skinsburskii@linux.microsoft.com

From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Thursday, October 16, 2025 12:54 PM
> 
> When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
> HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
> -EAGAIN to userspace.
> 
> However, it's much easier and efficient if the driver simply deposits
> memory on demand and immediately retries the hypercall as is done with
> all the other hypercall helper functions.
> 
> But unlike those, in MSHV_ROOT_HVCALL the input is opaque to the
> kernel. This is problematic for rep hypercalls, because the next part
> of the input list can't be copied on each loop after depositing pages
> (this was the original reason for returning -EAGAIN in this case).
> 
> Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
> parameter. This solves the issue, allowing the deposit loop in
> MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
> partway through.

From reading the above, I'm pretty sure this code change is an
optimization that lets user space avoid having to deal with the
-EAGAIN result by resubmitting the ioctl with a different
starting point for a rep hypercall. As such, I'd suggest the patch
title should be "Improve deposit memory ...." (or something similar).
The word "Fix" makes it sound like a bug fix.

Or is user space code currently faulty in its handling of -EAGAIN, and
this really is an indirect bug fix to make things work? If so, do you
want a Fixes: tag so the change is backported?

> 
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/mshv_root_main.c    | 52 ++++++++++++++++++++--------------
>  include/asm-generic/mshyperv.h | 14 +++++++--
>  2 files changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 9ae67c6e9f60..731ec8cbbd63 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -159,6 +159,7 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
>  	unsigned int pages_order;
>  	void *input_pg = NULL;
>  	void *output_pg = NULL;
> +	u16 reps_completed;
> 
>  	if (copy_from_user(&args, user_args, sizeof(args)))
>  		return -EFAULT;
> @@ -210,28 +211,35 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
>  	 */
>  	*(u64 *)input_pg = partition->pt_id;
> 
> -	if (args.reps)
> -		status = hv_do_rep_hypercall(args.code, args.reps, 0,
> -					     input_pg, output_pg);
> -	else
> -		status = hv_do_hypercall(args.code, input_pg, output_pg);
> -
> -	if (hv_result(status) == HV_STATUS_CALL_PENDING) {
> -		if (is_async) {
> -			mshv_async_hvcall_handler(partition, &status);
> -		} else { /* Paranoia check. This shouldn't happen! */
> -			ret = -EBADFD;
> -			goto free_pages_out;
> +	reps_completed = 0;
> +	do {
> +		if (args.reps) {
> +			status = hv_do_rep_hypercall_ex(args.code, args.reps,
> +							0, reps_completed,
> +							input_pg, output_pg);
> +			reps_completed = hv_repcomp(status);
> +		} else {
> +			status = hv_do_hypercall(args.code, input_pg, output_pg);
>  		}
> -	}
> 
> -	if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
> -		ret = hv_call_deposit_pages(NUMA_NO_NODE, partition->pt_id, 1);
> -		if (!ret)
> -			ret = -EAGAIN;
> -	} else if (!hv_result_success(status)) {
> -		ret = hv_result_to_errno(status);
> -	}
> +		if (hv_result(status) == HV_STATUS_CALL_PENDING) {
> +			if (is_async) {
> +				mshv_async_hvcall_handler(partition, &status);
> +			} else { /* Paranoia check. This shouldn't happen! */
> +				ret = -EBADFD;
> +				goto free_pages_out;
> +			}
> +		}
> +
> +		if (hv_result_success(status))
> +			break;
> +
> +		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)
> +			ret = hv_result_to_errno(status);
> +		else
> +			ret = hv_call_deposit_pages(NUMA_NO_NODE,
> +						    partition->pt_id, 1);
> +	} while (!ret);
> 
>  	/*
>  	 * Always return the status and output data regardless of result.

This comment about always returning the output data is now incorrect.

> @@ -240,11 +248,11 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
>  	 * succeeded.
>  	 */
>  	args.status = hv_result(status);
> -	args.reps = args.reps ? hv_repcomp(status) : 0;
> +	args.reps = reps_completed;
>  	if (copy_to_user(user_args, &args, sizeof(args)))
>  		ret = -EFAULT;
> 
> -	if (output_pg &&
> +	if (!ret && output_pg &&
>  	    copy_to_user((void __user *)args.out_ptr, output_pg, args.out_sz))
>  		ret = -EFAULT;
> 
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index ebf458dbcf84..31a209f0e18f 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -128,8 +128,9 @@ static inline unsigned int hv_repcomp(u64 status)
>   * Rep hypercalls. Callers of this functions are supposed to ensure that
>   * rep_count and varhead_size comply with Hyper-V hypercall definition.

Nit: This comment could be updated to include the new "rep_start"
parameter.

>   */
> -static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
> -				      void *input, void *output)
> +static inline u64 hv_do_rep_hypercall_ex(u16 code, u16 rep_count,
> +					 u16 varhead_size, u16 rep_start,
> +					 void *input, void *output)
>  {
>  	u64 control = code;
>  	u64 status;
> @@ -137,6 +138,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
> 
>  	control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
>  	control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
> +	control |= (u64)rep_start << HV_HYPERCALL_REP_START_OFFSET;
> 
>  	do {
>  		status = hv_do_hypercall(control, input, output);
> @@ -154,6 +156,14 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
>  	return status;
>  }
> 
> +/* For the typical case where rep_start is 0 */
> +static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
> +				      void *input, void *output)
> +{
> +	return hv_do_rep_hypercall_ex(code, rep_count, varhead_size, 0,
> +				      input, output);
> +}
> +
>  /* Generate the guest OS identifier as described in the Hyper-V TLFS */
>  static inline u64 hv_generate_guest_id(u64 kernel_version)
>  {

Overall, this looks good to me. I don't see any issues with the code.

Michael

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
  2025-10-17  1:12 ` Michael Kelley
@ 2025-10-17 17:25   ` Nuno Das Neves
  2025-10-17 17:33     ` Michael Kelley
  0 siblings, 1 reply; 6+ messages in thread
From: Nuno Das Neves @ 2025-10-17 17:25 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org
  Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, arnd@arndb.de, mrathor@linux.microsoft.com,
	skinsburskii@linux.microsoft.com

On 10/16/2025 6:12 PM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Thursday, October 16, 2025 12:54 PM
>>
>> When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
>> HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
>> -EAGAIN to userspace.
>>
>> However, it's much easier and efficient if the driver simply deposits
>> memory on demand and immediately retries the hypercall as is done with
>> all the other hypercall helper functions.
>>
>> But unlike those, in MSHV_ROOT_HVCALL the input is opaque to the
>> kernel. This is problematic for rep hypercalls, because the next part
>> of the input list can't be copied on each loop after depositing pages
>> (this was the original reason for returning -EAGAIN in this case).
>>
>> Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
>> parameter. This solves the issue, allowing the deposit loop in
>> MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
>> partway through.
> 
>>From reading the above, I'm pretty sure this code change is an
> optimization that lets user space avoid having to deal with the
> -EAGAIN result by resubmitting the ioctl with a different
> starting point for a rep hypercall. As such, I'd suggest the patch
> title should be "Improve deposit memory ...." (or something similar).
> The word "Fix" makes it sound like a bug fix.
> 
> Or is user space code currently faulty in its handling of -EAGAIN, and
> this really is an indirect bug fix to make things work? If so, do you
> want a Fixes: tag so the change is backported?
> 

It's the latter case, userspace doesn't handle it correctly, so I
consider it a fix more than just an improvement.

I'll add a Fixes: tag pointing back to the original /dev/mshv patch.

>>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>>  drivers/hv/mshv_root_main.c    | 52 ++++++++++++++++++++--------------
>>  include/asm-generic/mshyperv.h | 14 +++++++--
>>  2 files changed, 42 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
>> index 9ae67c6e9f60..731ec8cbbd63 100644
>> --- a/drivers/hv/mshv_root_main.c
>> +++ b/drivers/hv/mshv_root_main.c
>> @@ -159,6 +159,7 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
>>  	unsigned int pages_order;
>>  	void *input_pg = NULL;
>>  	void *output_pg = NULL;
>> +	u16 reps_completed;
>>
>>  	if (copy_from_user(&args, user_args, sizeof(args)))
>>  		return -EFAULT;
>> @@ -210,28 +211,35 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
>>  	 */
>>  	*(u64 *)input_pg = partition->pt_id;
>>
>> -	if (args.reps)
>> -		status = hv_do_rep_hypercall(args.code, args.reps, 0,
>> -					     input_pg, output_pg);
>> -	else
>> -		status = hv_do_hypercall(args.code, input_pg, output_pg);
>> -
>> -	if (hv_result(status) == HV_STATUS_CALL_PENDING) {
>> -		if (is_async) {
>> -			mshv_async_hvcall_handler(partition, &status);
>> -		} else { /* Paranoia check. This shouldn't happen! */
>> -			ret = -EBADFD;
>> -			goto free_pages_out;
>> +	reps_completed = 0;
>> +	do {
>> +		if (args.reps) {
>> +			status = hv_do_rep_hypercall_ex(args.code, args.reps,
>> +							0, reps_completed,
>> +							input_pg, output_pg);
>> +			reps_completed = hv_repcomp(status);
>> +		} else {
>> +			status = hv_do_hypercall(args.code, input_pg, output_pg);
>>  		}
>> -	}
>>
>> -	if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
>> -		ret = hv_call_deposit_pages(NUMA_NO_NODE, partition->pt_id, 1);
>> -		if (!ret)
>> -			ret = -EAGAIN;
>> -	} else if (!hv_result_success(status)) {
>> -		ret = hv_result_to_errno(status);
>> -	}
>> +		if (hv_result(status) == HV_STATUS_CALL_PENDING) {
>> +			if (is_async) {
>> +				mshv_async_hvcall_handler(partition, &status);
>> +			} else { /* Paranoia check. This shouldn't happen! */
>> +				ret = -EBADFD;
>> +				goto free_pages_out;
>> +			}
>> +		}
>> +
>> +		if (hv_result_success(status))
>> +			break;
>> +
>> +		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)
>> +			ret = hv_result_to_errno(status);
>> +		else
>> +			ret = hv_call_deposit_pages(NUMA_NO_NODE,
>> +						    partition->pt_id, 1);
>> +	} while (!ret);
>>
>>  	/*
>>  	 * Always return the status and output data regardless of result.
> 
> This comment about always returning the output data is now incorrect.
> 

Thanks, I'll fix it

>> @@ -240,11 +248,11 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
>>  	 * succeeded.
>>  	 */
>>  	args.status = hv_result(status);
>> -	args.reps = args.reps ? hv_repcomp(status) : 0;
>> +	args.reps = reps_completed;
>>  	if (copy_to_user(user_args, &args, sizeof(args)))
>>  		ret = -EFAULT;
>>
>> -	if (output_pg &&
>> +	if (!ret && output_pg &&
>>  	    copy_to_user((void __user *)args.out_ptr, output_pg, args.out_sz))
>>  		ret = -EFAULT;
>>
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index ebf458dbcf84..31a209f0e18f 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -128,8 +128,9 @@ static inline unsigned int hv_repcomp(u64 status)
>>   * Rep hypercalls. Callers of this functions are supposed to ensure that
>>   * rep_count and varhead_size comply with Hyper-V hypercall definition.
> 
> Nit: This comment could be updated to include the new "rep_start"
> parameter.
> 

Thanks, will add

>>   */
>> -static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
>> -				      void *input, void *output)
>> +static inline u64 hv_do_rep_hypercall_ex(u16 code, u16 rep_count,
>> +					 u16 varhead_size, u16 rep_start,
>> +					 void *input, void *output)
>>  {
>>  	u64 control = code;
>>  	u64 status;
>> @@ -137,6 +138,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
>>
>>  	control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
>>  	control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
>> +	control |= (u64)rep_start << HV_HYPERCALL_REP_START_OFFSET;
>>
>>  	do {
>>  		status = hv_do_hypercall(control, input, output);
>> @@ -154,6 +156,14 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
>>  	return status;
>>  }
>>
>> +/* For the typical case where rep_start is 0 */
>> +static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
>> +				      void *input, void *output)
>> +{
>> +	return hv_do_rep_hypercall_ex(code, rep_count, varhead_size, 0,
>> +				      input, output);
>> +}
>> +
>>  /* Generate the guest OS identifier as described in the Hyper-V TLFS */
>>  static inline u64 hv_generate_guest_id(u64 kernel_version)
>>  {
> 
> Overall, this looks good to me. I don't see any issues with the code.
> 
> Michael


^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
  2025-10-17 17:25   ` Nuno Das Neves
@ 2025-10-17 17:33     ` Michael Kelley
  2025-10-17 17:55       ` Nuno Das Neves
  0 siblings, 1 reply; 6+ messages in thread
From: Michael Kelley @ 2025-10-17 17:33 UTC (permalink / raw)
  To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org
  Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, arnd@arndb.de, mrathor@linux.microsoft.com,
	skinsburskii@linux.microsoft.com

From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, October 17, 2025 10:26 AM
> 
> On 10/16/2025 6:12 PM, Michael Kelley wrote:
> > From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Thursday, October 16, 2025 12:54 PM
> >>
> >> When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
> >> HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
> >> -EAGAIN to userspace.
> >>
> >> However, it's much easier and efficient if the driver simply deposits
> >> memory on demand and immediately retries the hypercall as is done with
> >> all the other hypercall helper functions.
> >>
> >> But unlike those, in MSHV_ROOT_HVCALL the input is opaque to the
> >> kernel. This is problematic for rep hypercalls, because the next part
> >> of the input list can't be copied on each loop after depositing pages
> >> (this was the original reason for returning -EAGAIN in this case).
> >>
> >> Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
> >> parameter. This solves the issue, allowing the deposit loop in
> >> MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
> >> partway through.
> >
> >>From reading the above, I'm pretty sure this code change is an
> > optimization that lets user space avoid having to deal with the
> > -EAGAIN result by resubmitting the ioctl with a different
> > starting point for a rep hypercall. As such, I'd suggest the patch
> > title should be "Improve deposit memory ...." (or something similar).
> > The word "Fix" makes it sound like a bug fix.
> >
> > Or is user space code currently faulty in its handling of -EAGAIN, and
> > this really is an indirect bug fix to make things work? If so, do you
> > want a Fixes: tag so the change is backported?
> >
> 
> It's the latter case, userspace doesn't handle it correctly, so I
> consider it a fix more than just an improvement.

OK, thanks. You might want to tweak the commit message a bit
to clarify that this really is a bug fix and why. I was leaning
toward the wrong conclusion based on the current commit
message.

Michael

> 
> I'll add a Fixes: tag pointing back to the original /dev/mshv patch.
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
  2025-10-17 17:33     ` Michael Kelley
@ 2025-10-17 17:55       ` Nuno Das Neves
  2025-10-17 18:09         ` Michael Kelley
  0 siblings, 1 reply; 6+ messages in thread
From: Nuno Das Neves @ 2025-10-17 17:55 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org
  Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, arnd@arndb.de, mrathor@linux.microsoft.com,
	skinsburskii@linux.microsoft.com

On 10/17/2025 10:33 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, October 17, 2025 10:26 AM
>>
>> On 10/16/2025 6:12 PM, Michael Kelley wrote:
>>> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Thursday, October 16, 2025 12:54 PM
>>>>
>>>> When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
>>>> HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
>>>> -EAGAIN to userspace.
>>>>
>>>> However, it's much easier and efficient if the driver simply deposits
>>>> memory on demand and immediately retries the hypercall as is done with
>>>> all the other hypercall helper functions.
>>>>
>>>> But unlike those, in MSHV_ROOT_HVCALL the input is opaque to the
>>>> kernel. This is problematic for rep hypercalls, because the next part
>>>> of the input list can't be copied on each loop after depositing pages
>>>> (this was the original reason for returning -EAGAIN in this case).
>>>>
>>>> Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
>>>> parameter. This solves the issue, allowing the deposit loop in
>>>> MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
>>>> partway through.
>>>
>>> >From reading the above, I'm pretty sure this code change is an
>>> optimization that lets user space avoid having to deal with the
>>> -EAGAIN result by resubmitting the ioctl with a different
>>> starting point for a rep hypercall. As such, I'd suggest the patch
>>> title should be "Improve deposit memory ...." (or something similar).
>>> The word "Fix" makes it sound like a bug fix.
>>>
>>> Or is user space code currently faulty in its handling of -EAGAIN, and
>>> this really is an indirect bug fix to make things work? If so, do you
>>> want a Fixes: tag so the change is backported?
>>>
>>
>> It's the latter case, userspace doesn't handle it correctly, so I
>> consider it a fix more than just an improvement.
> 
> OK, thanks. You might want to tweak the commit message a bit
> to clarify that this really is a bug fix and why. I was leaning
> toward the wrong conclusion based on the current commit
> message.
> 

Is this clearer?

"
mshv: Fix deposit memory in MSHV_ROOT_HVCALL

When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
-EAGAIN to userspace. The expectation is that the VMM will retry.

However, some VMM code in the wild doesn't do this and simply fails.
Rather than force the VMM to retry, change the ioctl to deposit
memory on demand and immediately retry the hypercall as is done with
all the other hypercall helper functions.

In addition to making the ioctl easier to use, removing the need for
multiple syscalls improves performance.

There is a complication: unlike the other hypercall helper functions,
in MSHV_ROOT_HVCALL the input is opaque to the kernel. This is
problematic for rep hypercalls, because the next part of the input
list can't be copied on each loop after depositing pages (this was
the original reason for returning -EAGAIN in this case).

Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
parameter. This solves the issue, allowing the deposit loop in
MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
partway through.
"

> Michael
> 
>>
>> I'll add a Fixes: tag pointing back to the original /dev/mshv patch.
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL
  2025-10-17 17:55       ` Nuno Das Neves
@ 2025-10-17 18:09         ` Michael Kelley
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Kelley @ 2025-10-17 18:09 UTC (permalink / raw)
  To: Nuno Das Neves, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org
  Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, arnd@arndb.de, mrathor@linux.microsoft.com,
	skinsburskii@linux.microsoft.com

From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, October 17, 2025 10:55 AM
> 
> On 10/17/2025 10:33 AM, Michael Kelley wrote:
> > From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, October 17, 2025 10:26 AM
> >>
> >> On 10/16/2025 6:12 PM, Michael Kelley wrote:
> >>> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Thursday, October 16, 2025 12:54 PM
> >>>>
> >>>> When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
> >>>> HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
> >>>> -EAGAIN to userspace.
> >>>>
> >>>> However, it's much easier and efficient if the driver simply deposits
> >>>> memory on demand and immediately retries the hypercall as is done with
> >>>> all the other hypercall helper functions.
> >>>>
> >>>> But unlike those, in MSHV_ROOT_HVCALL the input is opaque to the
> >>>> kernel. This is problematic for rep hypercalls, because the next part
> >>>> of the input list can't be copied on each loop after depositing pages
> >>>> (this was the original reason for returning -EAGAIN in this case).
> >>>>
> >>>> Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
> >>>> parameter. This solves the issue, allowing the deposit loop in
> >>>> MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
> >>>> partway through.
> >>>
> >>> >From reading the above, I'm pretty sure this code change is an
> >>> optimization that lets user space avoid having to deal with the
> >>> -EAGAIN result by resubmitting the ioctl with a different
> >>> starting point for a rep hypercall. As such, I'd suggest the patch
> >>> title should be "Improve deposit memory ...." (or something similar).
> >>> The word "Fix" makes it sound like a bug fix.
> >>>
> >>> Or is user space code currently faulty in its handling of -EAGAIN, and
> >>> this really is an indirect bug fix to make things work? If so, do you
> >>> want a Fixes: tag so the change is backported?
> >>>
> >>
> >> It's the latter case, userspace doesn't handle it correctly, so I
> >> consider it a fix more than just an improvement.
> >
> > OK, thanks. You might want to tweak the commit message a bit
> > to clarify that this really is a bug fix and why. I was leaning
> > toward the wrong conclusion based on the current commit
> > message.
> >
> 
> Is this clearer?
> 
> "
> mshv: Fix deposit memory in MSHV_ROOT_HVCALL
> 
> When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
> HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
> -EAGAIN to userspace. The expectation is that the VMM will retry.
> 
> However, some VMM code in the wild doesn't do this and simply fails.
> Rather than force the VMM to retry, change the ioctl to deposit
> memory on demand and immediately retry the hypercall as is done with
> all the other hypercall helper functions.
> 
> In addition to making the ioctl easier to use, removing the need for
> multiple syscalls improves performance.
> 
> There is a complication: unlike the other hypercall helper functions,
> in MSHV_ROOT_HVCALL the input is opaque to the kernel. This is
> problematic for rep hypercalls, because the next part of the input
> list can't be copied on each loop after depositing pages (this was
> the original reason for returning -EAGAIN in this case).
> 
> Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
> parameter. This solves the issue, allowing the deposit loop in
> MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
> partway through.

Yes, that works for me.  Thanks.

Michael

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-10-17 18:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-16 19:53 [PATCH] mshv: Fix deposit memory in MSHV_ROOT_HVCALL Nuno Das Neves
2025-10-17  1:12 ` Michael Kelley
2025-10-17 17:25   ` Nuno Das Neves
2025-10-17 17:33     ` Michael Kelley
2025-10-17 17:55       ` Nuno Das Neves
2025-10-17 18:09         ` Michael Kelley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox