All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling
@ 2026-06-05 11:53 Arseniy Krasnov
  2026-06-05 15:08 ` David Laight
  0 siblings, 1 reply; 5+ messages in thread
From: Arseniy Krasnov @ 2026-06-05 11:53 UTC (permalink / raw)
  To: Stefan Hajnoczi, Stefano Garzarella, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Michael S. Tsirkin,
	Jason Wang, Bobby Eshleman, Xuan Zhuo, Eugenio Pérez,
	Simon Horman
  Cc: kvm, virtualization, netdev, linux-kernel, oxffffaa, rulkc,
	Arseniy Krasnov

Logically it was based on TCP implementation, so to make further
support easier, rewrite it in the TCP way.

Signed-off-by: Arseniy Krasnov <avkrasnov@rulkc.org>
---
 net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 2fd9eaaf5ca6..00caeeaa5590 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
 static int virtio_transport_fill_skb(struct sk_buff *skb,
 				     struct virtio_vsock_pkt_info *info,
 				     size_t len,
-				     bool zcopy)
+				     bool zcopy, struct ubuf_info *uarg)
 {
 	struct msghdr *msg = info->msg;
 
+	/* We have completion - attach it to 'skb'. */
+	skb_zcopy_set(skb, uarg, NULL);
+
 	if (zcopy)
 		return __zerocopy_sg_from_iter(msg, NULL, skb,
 					       &msg->msg_iter, len, NULL);
@@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
 						  u32 src_cid,
 						  u32 src_port,
 						  u32 dst_cid,
-						  u32 dst_port)
+						  u32 dst_port,
+						  struct ubuf_info *uarg)
 {
 	struct vsock_sock *vsk;
 	struct sk_buff *skb;
@@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
 	if (info->msg && payload_len > 0) {
 		int err;
 
-		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
+		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
 		if (err)
 			goto out;
 
@@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
 		return pkt_len;
 
-	if (info->msg) {
-		/* If zerocopy is not enabled by 'setsockopt()', we behave as
-		 * there is no MSG_ZEROCOPY flag set.
+	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
+		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
+		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
+		 * handling from 'tcp_sendmsg_locked()'.
 		 */
-		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
-			info->msg->msg_flags &= ~MSG_ZEROCOPY;
+		if (info->msg->msg_ubuf) {
+			uarg = info->msg->msg_ubuf;
+			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
+		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
+			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
+						    NULL, false);
+			if (!uarg) {
+				virtio_transport_put_credit(vvs, pkt_len);
+				return -ENOMEM;
+			}
 
-		if (info->msg->msg_flags & MSG_ZEROCOPY)
 			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
 
+			if (!can_zcopy)
+				uarg_to_msgzc(uarg)->zerocopy = 0;
+
+			have_uref = true;
+		}
+
+		/* 'can_zcopy' means that this transmission will be
+		 * in zerocopy way (e.g. using 'frags' array).
+		 */
 		if (can_zcopy)
 			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
 					    (MAX_SKB_FRAGS * PAGE_SIZE));
-
-		if (info->msg->msg_flags & MSG_ZEROCOPY &&
-		    info->op == VIRTIO_VSOCK_OP_RW) {
-			uarg = info->msg->msg_ubuf;
-
-			if (!uarg) {
-				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
-							    pkt_len, NULL, false);
-				if (!uarg) {
-					virtio_transport_put_credit(vvs, pkt_len);
-					return -ENOMEM;
-				}
-
-				if (!can_zcopy)
-					uarg_to_msgzc(uarg)->zerocopy = 0;
-
-				have_uref = true;
-			}
-		}
 	}
 
 	rest_len = pkt_len;
@@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 
 		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
 						 src_cid, src_port,
-						 dst_cid, dst_port);
+						 dst_cid, dst_port, uarg);
 		if (!skb) {
 			ret = -ENOMEM;
 			break;
 		}
 
-		skb_zcopy_set(skb, uarg, NULL);
-
 		virtio_transport_inc_tx_pkt(vvs, skb);
 
 		ret = t_ops->send_pkt(skb, info->net);
@@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
 					   le64_to_cpu(hdr->dst_cid),
 					   le32_to_cpu(hdr->dst_port),
 					   le64_to_cpu(hdr->src_cid),
-					   le32_to_cpu(hdr->src_port));
+					   le32_to_cpu(hdr->src_port), NULL);
 	if (!reply)
 		return -ENOMEM;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling
  2026-06-05 11:53 [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling Arseniy Krasnov
@ 2026-06-05 15:08 ` David Laight
  2026-06-08  8:10   ` Arseniy Krasnov
  0 siblings, 1 reply; 5+ messages in thread
From: David Laight @ 2026-06-05 15:08 UTC (permalink / raw)
  To: Arseniy Krasnov
  Cc: Stefan Hajnoczi, Stefano Garzarella, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Michael S. Tsirkin,
	Jason Wang, Bobby Eshleman, Xuan Zhuo, Eugenio Pérez,
	Simon Horman, kvm, virtualization, netdev, linux-kernel, oxffffaa,
	rulkc

On Fri,  5 Jun 2026 14:53:14 +0300
Arseniy Krasnov <avkrasnov@rulkc.org> wrote:

> Logically it was based on TCP implementation, so to make further
> support easier, rewrite it in the TCP way.
> 
> Signed-off-by: Arseniy Krasnov <avkrasnov@rulkc.org>
> ---
>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
>  1 file changed, 32 insertions(+), 32 deletions(-)
> 
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index 2fd9eaaf5ca6..00caeeaa5590 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
>  static int virtio_transport_fill_skb(struct sk_buff *skb,
>  				     struct virtio_vsock_pkt_info *info,
>  				     size_t len,
> -				     bool zcopy)
> +				     bool zcopy, struct ubuf_info *uarg)
>  {
>  	struct msghdr *msg = info->msg;
>  
> +	/* We have completion - attach it to 'skb'. */
> +	skb_zcopy_set(skb, uarg, NULL);
> +
>  	if (zcopy)
>  		return __zerocopy_sg_from_iter(msg, NULL, skb,
>  					       &msg->msg_iter, len, NULL);
> @@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>  						  u32 src_cid,
>  						  u32 src_port,
>  						  u32 dst_cid,
> -						  u32 dst_port)
> +						  u32 dst_port,
> +						  struct ubuf_info *uarg)
>  {
>  	struct vsock_sock *vsk;
>  	struct sk_buff *skb;
> @@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>  	if (info->msg && payload_len > 0) {
>  		int err;
>  
> -		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
> +		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
>  		if (err)
>  			goto out;
>  
> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>  	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>  		return pkt_len;
>  
> -	if (info->msg) {
> -		/* If zerocopy is not enabled by 'setsockopt()', we behave as
> -		 * there is no MSG_ZEROCOPY flag set.
> +	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
> +		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
> +		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
> +		 * handling from 'tcp_sendmsg_locked()'.
>  		 */
> -		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
> -			info->msg->msg_flags &= ~MSG_ZEROCOPY;
> +		if (info->msg->msg_ubuf) {
> +			uarg = info->msg->msg_ubuf;
> +			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
> +		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
> +			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
> +						    NULL, false);
> +			if (!uarg) {
> +				virtio_transport_put_credit(vvs, pkt_len);
> +				return -ENOMEM;
> +			}
>  
> -		if (info->msg->msg_flags & MSG_ZEROCOPY)
>  			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>  
> +			if (!can_zcopy)
> +				uarg_to_msgzc(uarg)->zerocopy = 0;
> +
> +			have_uref = true;
> +		}
> +
> +		/* 'can_zcopy' means that this transmission will be
> +		 * in zerocopy way (e.g. using 'frags' array).
> +		 */

I've not looked at the tcp code, but the above doesn't look right.
I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
That would give the outer code a callback when the last skb is freed but
still copy the data.

I also don't see the point of calling msg_zerocopy_realloc() to get a
callback when the last skb is freed and then setting
	uarg_to_msgzc(uarg)->zerocopy = 0;
so that the callback doesn't actually do anything.
It isn't as though you 'find out' later on that you can't actually do
zerocopy.

>  		if (can_zcopy)
>  			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
>  					    (MAX_SKB_FRAGS * PAGE_SIZE));
> -
> -		if (info->msg->msg_flags & MSG_ZEROCOPY &&
> -		    info->op == VIRTIO_VSOCK_OP_RW) {
> -			uarg = info->msg->msg_ubuf;
> -
> -			if (!uarg) {
> -				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
> -							    pkt_len, NULL, false);
> -				if (!uarg) {
> -					virtio_transport_put_credit(vvs, pkt_len);
> -					return -ENOMEM;
> -				}
> -
> -				if (!can_zcopy)
> -					uarg_to_msgzc(uarg)->zerocopy = 0;
> -
> -				have_uref = true;
> -			}
> -		}
>  	}
>  
>  	rest_len = pkt_len;
> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>  
>  		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
>  						 src_cid, src_port,
> -						 dst_cid, dst_port);
> +						 dst_cid, dst_port, uarg);
>  		if (!skb) {
>  			ret = -ENOMEM;
>  			break;
>  		}
>  
> -		skb_zcopy_set(skb, uarg, NULL);

Aren't you passing uarg through two function calls instead of doing it here.
Doesn't even make it clearer what is going on.

-- David

> -
>  		virtio_transport_inc_tx_pkt(vvs, skb);
>  
>  		ret = t_ops->send_pkt(skb, info->net);
> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
>  					   le64_to_cpu(hdr->dst_cid),
>  					   le32_to_cpu(hdr->dst_port),
>  					   le64_to_cpu(hdr->src_cid),
> -					   le32_to_cpu(hdr->src_port));
> +					   le32_to_cpu(hdr->src_port), NULL);
>  	if (!reply)
>  		return -ENOMEM;
>  


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling
  2026-06-05 15:08 ` David Laight
@ 2026-06-08  8:10   ` Arseniy Krasnov
  2026-06-08  9:37     ` David Laight
  0 siblings, 1 reply; 5+ messages in thread
From: Arseniy Krasnov @ 2026-06-08  8:10 UTC (permalink / raw)
  To: David Laight
  Cc: Stefan Hajnoczi, Stefano Garzarella, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Michael S. Tsirkin,
	Jason Wang, Bobby Eshleman, Xuan Zhuo, Eugenio Pérez,
	Simon Horman, kvm, virtualization, netdev, linux-kernel, oxffffaa,
	rulkc


On 05/06/2026 18:08, David Laight wrote:
> On Fri,  5 Jun 2026 14:53:14 +0300
> Arseniy Krasnov <avkrasnov@rulkc.org> wrote:
>
>> Logically it was based on TCP implementation, so to make further
>> support easier, rewrite it in the TCP way.
>>
>> Signed-off-by: Arseniy Krasnov <avkrasnov@rulkc.org>
>> ---
>>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
>>  1 file changed, 32 insertions(+), 32 deletions(-)
>>
>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> index 2fd9eaaf5ca6..00caeeaa5590 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
>>  static int virtio_transport_fill_skb(struct sk_buff *skb,
>>  				     struct virtio_vsock_pkt_info *info,
>>  				     size_t len,
>> -				     bool zcopy)
>> +				     bool zcopy, struct ubuf_info *uarg)
>>  {
>>  	struct msghdr *msg = info->msg;
>>  
>> +	/* We have completion - attach it to 'skb'. */
>> +	skb_zcopy_set(skb, uarg, NULL);
>> +
>>  	if (zcopy)
>>  		return __zerocopy_sg_from_iter(msg, NULL, skb,
>>  					       &msg->msg_iter, len, NULL);
>> @@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>  						  u32 src_cid,
>>  						  u32 src_port,
>>  						  u32 dst_cid,
>> -						  u32 dst_port)
>> +						  u32 dst_port,
>> +						  struct ubuf_info *uarg)
>>  {
>>  	struct vsock_sock *vsk;
>>  	struct sk_buff *skb;
>> @@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>  	if (info->msg && payload_len > 0) {
>>  		int err;
>>  
>> -		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
>> +		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
>>  		if (err)
>>  			goto out;
>>  
>> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>  	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>  		return pkt_len;
>>  
>> -	if (info->msg) {
>> -		/* If zerocopy is not enabled by 'setsockopt()', we behave as
>> -		 * there is no MSG_ZEROCOPY flag set.
>> +	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
>> +		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
>> +		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
>> +		 * handling from 'tcp_sendmsg_locked()'.
>>  		 */
>> -		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
>> -			info->msg->msg_flags &= ~MSG_ZEROCOPY;
>> +		if (info->msg->msg_ubuf) {
>> +			uarg = info->msg->msg_ubuf;
>> +			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>> +		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
>> +			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
>> +						    NULL, false);
>> +			if (!uarg) {
>> +				virtio_transport_put_credit(vvs, pkt_len);
>> +				return -ENOMEM;
>> +			}
>>  
>> -		if (info->msg->msg_flags & MSG_ZEROCOPY)
>>  			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>>  
>> +			if (!can_zcopy)
>> +				uarg_to_msgzc(uarg)->zerocopy = 0;
>> +
>> +			have_uref = true;
>> +		}
>> +
>> +		/* 'can_zcopy' means that this transmission will be
>> +		 * in zerocopy way (e.g. using 'frags' array).
>> +		 */
> I've not looked at the tcp code, but the above doesn't look right.
> I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
> That would give the outer code a callback when the last skb is freed but
> still copy the data.

Hi, 

I guess case when 'msg->msg_ubuf' is non-NULL is special case today for io_uring MSG_ZEROCOPY implementation.
It was added here https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eb315a7d1396b1139fc7daea55f2d3191e8e7092
As I see implementation of its tests in tools/testing/selftests/net/io_uring_zerocopy_tx.c , it doesn't require setting SOCK_ZEROCOPY option for
socket, so for virtio vsock case I just copied same logic to maintain compatibility, because there is MSG_ZEROCOPY io_uring test for virtio/vsock.


>
> I also don't see the point of calling msg_zerocopy_realloc() to get a
> callback when the last skb is freed and then setting
> 	uarg_to_msgzc(uarg)->zerocopy = 0;
> so that the callback doesn't actually do anything.
> It isn't as though you 'find out' later on that you can't actually do
> zerocopy.


Sorry, what do You mean "last skb" ? In this code we first allocate uarg (allocate, because third arg is always NULL). Then in
loop we allocate sk_buffs, fill it with data and send. I mean first/last skb will be freed after uarg is already allocated and we
don't touch it. I think i didn't understand Your question here.


>
>>  		if (can_zcopy)
>>  			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
>>  					    (MAX_SKB_FRAGS * PAGE_SIZE));
>> -
>> -		if (info->msg->msg_flags & MSG_ZEROCOPY &&
>> -		    info->op == VIRTIO_VSOCK_OP_RW) {
>> -			uarg = info->msg->msg_ubuf;
>> -
>> -			if (!uarg) {
>> -				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
>> -							    pkt_len, NULL, false);
>> -				if (!uarg) {
>> -					virtio_transport_put_credit(vvs, pkt_len);
>> -					return -ENOMEM;
>> -				}
>> -
>> -				if (!can_zcopy)
>> -					uarg_to_msgzc(uarg)->zerocopy = 0;
>> -
>> -				have_uref = true;
>> -			}
>> -		}
>>  	}
>>  
>>  	rest_len = pkt_len;
>> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>  
>>  		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
>>  						 src_cid, src_port,
>> -						 dst_cid, dst_port);
>> +						 dst_cid, dst_port, uarg);
>>  		if (!skb) {
>>  			ret = -ENOMEM;
>>  			break;
>>  		}
>>  
>> -		skb_zcopy_set(skb, uarg, NULL);
> Aren't you passing uarg through two function calls instead of doing it here.
> Doesn't even make it clearer what is going on.


Agree, to simplify patch, uarg could be set earlier (without passing it to functions) I guess.

Thanks


>
> -- David
>
>> -
>>  		virtio_transport_inc_tx_pkt(vvs, skb);
>>  
>>  		ret = t_ops->send_pkt(skb, info->net);
>> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
>>  					   le64_to_cpu(hdr->dst_cid),
>>  					   le32_to_cpu(hdr->dst_port),
>>  					   le64_to_cpu(hdr->src_cid),
>> -					   le32_to_cpu(hdr->src_port));
>> +					   le32_to_cpu(hdr->src_port), NULL);
>>  	if (!reply)
>>  		return -ENOMEM;
>>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling
  2026-06-08  8:10   ` Arseniy Krasnov
@ 2026-06-08  9:37     ` David Laight
  2026-06-08 18:20       ` Arseniy Krasnov
  0 siblings, 1 reply; 5+ messages in thread
From: David Laight @ 2026-06-08  9:37 UTC (permalink / raw)
  To: Arseniy Krasnov
  Cc: Stefan Hajnoczi, Stefano Garzarella, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Michael S. Tsirkin,
	Jason Wang, Bobby Eshleman, Xuan Zhuo, Eugenio Pérez,
	Simon Horman, kvm, virtualization, netdev, linux-kernel, oxffffaa,
	rulkc

On Mon, 8 Jun 2026 11:10:24 +0300
Arseniy Krasnov <avkrasnov@rulkc.org> wrote:

> On 05/06/2026 18:08, David Laight wrote:
> > On Fri,  5 Jun 2026 14:53:14 +0300
> > Arseniy Krasnov <avkrasnov@rulkc.org> wrote:
> >  
> >> Logically it was based on TCP implementation, so to make further
> >> support easier, rewrite it in the TCP way.
> >>
> >> Signed-off-by: Arseniy Krasnov <avkrasnov@rulkc.org>
> >> ---
> >>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
> >>  1 file changed, 32 insertions(+), 32 deletions(-)
> >>
> >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >> index 2fd9eaaf5ca6..00caeeaa5590 100644
> >> --- a/net/vmw_vsock/virtio_transport_common.c
> >> +++ b/net/vmw_vsock/virtio_transport_common.c
> >> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
> >>  static int virtio_transport_fill_skb(struct sk_buff *skb,
> >>  				     struct virtio_vsock_pkt_info *info,
> >>  				     size_t len,
> >> -				     bool zcopy)
> >> +				     bool zcopy, struct ubuf_info *uarg)
> >>  {
> >>  	struct msghdr *msg = info->msg;
> >>  
> >> +	/* We have completion - attach it to 'skb'. */
> >> +	skb_zcopy_set(skb, uarg, NULL);
> >> +
> >>  	if (zcopy)
> >>  		return __zerocopy_sg_from_iter(msg, NULL, skb,
> >>  					       &msg->msg_iter, len, NULL);
> >> @@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
> >>  						  u32 src_cid,
> >>  						  u32 src_port,
> >>  						  u32 dst_cid,
> >> -						  u32 dst_port)
> >> +						  u32 dst_port,
> >> +						  struct ubuf_info *uarg)
> >>  {
> >>  	struct vsock_sock *vsk;
> >>  	struct sk_buff *skb;
> >> @@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
> >>  	if (info->msg && payload_len > 0) {
> >>  		int err;
> >>  
> >> -		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
> >> +		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
> >>  		if (err)
> >>  			goto out;
> >>  
> >> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> >>  	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >>  		return pkt_len;
> >>  
> >> -	if (info->msg) {
> >> -		/* If zerocopy is not enabled by 'setsockopt()', we behave as
> >> -		 * there is no MSG_ZEROCOPY flag set.
> >> +	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
> >> +		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
> >> +		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
> >> +		 * handling from 'tcp_sendmsg_locked()'.
> >>  		 */
> >> -		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
> >> -			info->msg->msg_flags &= ~MSG_ZEROCOPY;
> >> +		if (info->msg->msg_ubuf) {
> >> +			uarg = info->msg->msg_ubuf;
> >> +			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
> >> +		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
> >> +			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
> >> +						    NULL, false);
> >> +			if (!uarg) {
> >> +				virtio_transport_put_credit(vvs, pkt_len);
> >> +				return -ENOMEM;
> >> +			}
> >>  
> >> -		if (info->msg->msg_flags & MSG_ZEROCOPY)
> >>  			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
> >>  
> >> +			if (!can_zcopy)
> >> +				uarg_to_msgzc(uarg)->zerocopy = 0;
> >> +
> >> +			have_uref = true;
> >> +		}
> >> +
> >> +		/* 'can_zcopy' means that this transmission will be
> >> +		 * in zerocopy way (e.g. using 'frags' array).
> >> +		 */  
> > I've not looked at the tcp code, but the above doesn't look right.
> > I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
> > That would give the outer code a callback when the last skb is freed but
> > still copy the data.  
> 
> Hi, 
> 
> I guess case when 'msg->msg_ubuf' is non-NULL is special case today for io_uring MSG_ZEROCOPY implementation.
> It was added here https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eb315a7d1396b1139fc7daea55f2d3191e8e7092
> As I see implementation of its tests in tools/testing/selftests/net/io_uring_zerocopy_tx.c , it doesn't require setting SOCK_ZEROCOPY option for
> socket, so for virtio vsock case I just copied same logic to maintain compatibility, because there is MSG_ZEROCOPY io_uring test for virtio/vsock.

That code path probably sets info->msg->msg_flags & MSG_ZEROCOPY so wants 'zerocopy'.
But wants it's own callback function called rather than one that msg_zerocopy_realloc()
adds.

But there is no reason why a caller might not just want a notification that
all the skb associated with the sendmsg have been freed without requesting zerocopy.
Maybe no one does it today, but it is trivial to support.

> 
> 
> >
> > I also don't see the point of calling msg_zerocopy_realloc() to get a
> > callback when the last skb is freed and then setting
> > 	uarg_to_msgzc(uarg)->zerocopy = 0;
> > so that the callback doesn't actually do anything.
> > It isn't as though you 'find out' later on that you can't actually do
> > zerocopy.  
> 
> 
> Sorry, what do You mean "last skb" ? In this code we first allocate uarg (allocate, because third arg is always NULL). Then in
> loop we allocate sk_buffs, fill it with data and send. I mean first/last skb will be freed after uarg is already allocated and we
> don't touch it. I think i didn't understand Your question here.

The 'uarg' is referenced by all of the skb that contain data for the sendmsg().
So when the last one of them is freed the callback function is called.
The purpose of that callback is to 'undo' the zerocopy (page pinning etc).
But when you set uarg_to_msgzc(uarg)->zerocopy = 0 the callback does nothing.
So there is no point setting up the callback at all.

-- David

> 
> 
> >  
> >>  		if (can_zcopy)
> >>  			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
> >>  					    (MAX_SKB_FRAGS * PAGE_SIZE));
> >> -
> >> -		if (info->msg->msg_flags & MSG_ZEROCOPY &&
> >> -		    info->op == VIRTIO_VSOCK_OP_RW) {
> >> -			uarg = info->msg->msg_ubuf;
> >> -
> >> -			if (!uarg) {
> >> -				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
> >> -							    pkt_len, NULL, false);
> >> -				if (!uarg) {
> >> -					virtio_transport_put_credit(vvs, pkt_len);
> >> -					return -ENOMEM;
> >> -				}
> >> -
> >> -				if (!can_zcopy)
> >> -					uarg_to_msgzc(uarg)->zerocopy = 0;
> >> -
> >> -				have_uref = true;
> >> -			}
> >> -		}
> >>  	}
> >>  
> >>  	rest_len = pkt_len;
> >> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> >>  
> >>  		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
> >>  						 src_cid, src_port,
> >> -						 dst_cid, dst_port);
> >> +						 dst_cid, dst_port, uarg);
> >>  		if (!skb) {
> >>  			ret = -ENOMEM;
> >>  			break;
> >>  		}
> >>  
> >> -		skb_zcopy_set(skb, uarg, NULL);  
> > Aren't you passing uarg through two function calls instead of doing it here.
> > Doesn't even make it clearer what is going on.  
> 
> 
> Agree, to simplify patch, uarg could be set earlier (without passing it to functions) I guess.
> 
> Thanks
> 
> 
> >
> > -- David
> >  
> >> -
> >>  		virtio_transport_inc_tx_pkt(vvs, skb);
> >>  
> >>  		ret = t_ops->send_pkt(skb, info->net);
> >> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
> >>  					   le64_to_cpu(hdr->dst_cid),
> >>  					   le32_to_cpu(hdr->dst_port),
> >>  					   le64_to_cpu(hdr->src_cid),
> >> -					   le32_to_cpu(hdr->src_port));
> >> +					   le32_to_cpu(hdr->src_port), NULL);
> >>  	if (!reply)
> >>  		return -ENOMEM;
> >>    


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling
  2026-06-08  9:37     ` David Laight
@ 2026-06-08 18:20       ` Arseniy Krasnov
  0 siblings, 0 replies; 5+ messages in thread
From: Arseniy Krasnov @ 2026-06-08 18:20 UTC (permalink / raw)
  To: David Laight
  Cc: Stefan Hajnoczi, Stefano Garzarella, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Michael S. Tsirkin,
	Jason Wang, Bobby Eshleman, Xuan Zhuo, Eugenio Pérez,
	Simon Horman, kvm, virtualization, netdev, linux-kernel, oxffffaa,
	rulkc


On 08/06/2026 12:37, David Laight wrote:
> On Mon, 8 Jun 2026 11:10:24 +0300
> Arseniy Krasnov <avkrasnov@rulkc.org> wrote:
>
>> On 05/06/2026 18:08, David Laight wrote:
>>> On Fri,  5 Jun 2026 14:53:14 +0300
>>> Arseniy Krasnov <avkrasnov@rulkc.org> wrote:
>>>  
>>>> Logically it was based on TCP implementation, so to make further
>>>> support easier, rewrite it in the TCP way.
>>>>
>>>> Signed-off-by: Arseniy Krasnov <avkrasnov@rulkc.org>
>>>> ---
>>>>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
>>>>  1 file changed, 32 insertions(+), 32 deletions(-)
>>>>
>>>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>>>> index 2fd9eaaf5ca6..00caeeaa5590 100644
>>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>>> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
>>>>  static int virtio_transport_fill_skb(struct sk_buff *skb,
>>>>  				     struct virtio_vsock_pkt_info *info,
>>>>  				     size_t len,
>>>> -				     bool zcopy)
>>>> +				     bool zcopy, struct ubuf_info *uarg)
>>>>  {
>>>>  	struct msghdr *msg = info->msg;
>>>>  
>>>> +	/* We have completion - attach it to 'skb'. */
>>>> +	skb_zcopy_set(skb, uarg, NULL);
>>>> +
>>>>  	if (zcopy)
>>>>  		return __zerocopy_sg_from_iter(msg, NULL, skb,
>>>>  					       &msg->msg_iter, len, NULL);
>>>> @@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>>>  						  u32 src_cid,
>>>>  						  u32 src_port,
>>>>  						  u32 dst_cid,
>>>> -						  u32 dst_port)
>>>> +						  u32 dst_port,
>>>> +						  struct ubuf_info *uarg)
>>>>  {
>>>>  	struct vsock_sock *vsk;
>>>>  	struct sk_buff *skb;
>>>> @@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>>>  	if (info->msg && payload_len > 0) {
>>>>  		int err;
>>>>  
>>>> -		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
>>>> +		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
>>>>  		if (err)
>>>>  			goto out;
>>>>  
>>>> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>>>  	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>>>  		return pkt_len;
>>>>  
>>>> -	if (info->msg) {
>>>> -		/* If zerocopy is not enabled by 'setsockopt()', we behave as
>>>> -		 * there is no MSG_ZEROCOPY flag set.
>>>> +	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
>>>> +		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
>>>> +		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
>>>> +		 * handling from 'tcp_sendmsg_locked()'.
>>>>  		 */
>>>> -		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
>>>> -			info->msg->msg_flags &= ~MSG_ZEROCOPY;
>>>> +		if (info->msg->msg_ubuf) {
>>>> +			uarg = info->msg->msg_ubuf;
>>>> +			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>>>> +		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
>>>> +			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
>>>> +						    NULL, false);
>>>> +			if (!uarg) {
>>>> +				virtio_transport_put_credit(vvs, pkt_len);
>>>> +				return -ENOMEM;
>>>> +			}
>>>>  
>>>> -		if (info->msg->msg_flags & MSG_ZEROCOPY)
>>>>  			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>>>>  
>>>> +			if (!can_zcopy)
>>>> +				uarg_to_msgzc(uarg)->zerocopy = 0;
>>>> +
>>>> +			have_uref = true;
>>>> +		}
>>>> +
>>>> +		/* 'can_zcopy' means that this transmission will be
>>>> +		 * in zerocopy way (e.g. using 'frags' array).
>>>> +		 */  
>>> I've not looked at the tcp code, but the above doesn't look right.
>>> I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
>>> That would give the outer code a callback when the last skb is freed but
>>> still copy the data.  
>> Hi, 
>>
>> I guess case when 'msg->msg_ubuf' is non-NULL is special case today for io_uring MSG_ZEROCOPY implementation.
>> It was added here https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eb315a7d1396b1139fc7daea55f2d3191e8e7092
>> As I see implementation of its tests in tools/testing/selftests/net/io_uring_zerocopy_tx.c , it doesn't require setting SOCK_ZEROCOPY option for
>> socket, so for virtio vsock case I just copied same logic to maintain compatibility, because there is MSG_ZEROCOPY io_uring test for virtio/vsock.
> That code path probably sets info->msg->msg_flags & MSG_ZEROCOPY so wants 'zerocopy'.
> But wants it's own callback function called rather than one that msg_zerocopy_realloc()
> adds.
>
> But there is no reason why a caller might not just want a notification that
> all the skb associated with the sendmsg have been freed without requesting zerocopy.
> Maybe no one does it today, but it is trivial to support.

Caller for this path is io_uring today. It uses own callback 'io_tx_ubuf_complete()' which implements notification using io_uring
achitecture instead of classic MSG_ERRQUEUE socket read way.

>
>>
>>> I also don't see the point of calling msg_zerocopy_realloc() to get a
>>> callback when the last skb is freed and then setting
>>> 	uarg_to_msgzc(uarg)->zerocopy = 0;
>>> so that the callback doesn't actually do anything.
>>> It isn't as though you 'find out' later on that you can't actually do
>>> zerocopy.  
>>
>> Sorry, what do You mean "last skb" ? In this code we first allocate uarg (allocate, because third arg is always NULL). Then in
>> loop we allocate sk_buffs, fill it with data and send. I mean first/last skb will be freed after uarg is already allocated and we
>> don't touch it. I think i didn't understand Your question here.
> The 'uarg' is referenced by all of the skb that contain data for the sendmsg().
> So when the last one of them is freed the callback function is called.
> The purpose of that callback is to 'undo' the zerocopy (page pinning etc).
> But when you set uarg_to_msgzc(uarg)->zerocopy = 0 the callback does nothing.
> So there is no point setting up the callback at all.

Pages are unpinned in sk_buff freeing logic: 'skb_release_data()' -> '__skb_frag_unref()'.  'zerocopy' flag of uarg shows 
caller that real zerocopy was used of not - it is checked in '__msg_zerocopy_callback()' and 'SO_EE_CODE_ZEROCOPY_COPIED' is set in
the 'ee_code' field of struct 'sock_extended_err' which is read by user. I mean idea of MSG_ZEROCOPY API is that if kernel doesn't
return error from 'sendmsg()' call with MSG_ZEROCOPY flag passed, it will send notification about data tx complete anyway - it doesn't
matter that real zercopy was done or not.

Thanks


>
> -- David
>
>>
>>>  
>>>>  		if (can_zcopy)
>>>>  			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
>>>>  					    (MAX_SKB_FRAGS * PAGE_SIZE));
>>>> -
>>>> -		if (info->msg->msg_flags & MSG_ZEROCOPY &&
>>>> -		    info->op == VIRTIO_VSOCK_OP_RW) {
>>>> -			uarg = info->msg->msg_ubuf;
>>>> -
>>>> -			if (!uarg) {
>>>> -				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
>>>> -							    pkt_len, NULL, false);
>>>> -				if (!uarg) {
>>>> -					virtio_transport_put_credit(vvs, pkt_len);
>>>> -					return -ENOMEM;
>>>> -				}
>>>> -
>>>> -				if (!can_zcopy)
>>>> -					uarg_to_msgzc(uarg)->zerocopy = 0;
>>>> -
>>>> -				have_uref = true;
>>>> -			}
>>>> -		}
>>>>  	}
>>>>  
>>>>  	rest_len = pkt_len;
>>>> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>>>  
>>>>  		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
>>>>  						 src_cid, src_port,
>>>> -						 dst_cid, dst_port);
>>>> +						 dst_cid, dst_port, uarg);
>>>>  		if (!skb) {
>>>>  			ret = -ENOMEM;
>>>>  			break;
>>>>  		}
>>>>  
>>>> -		skb_zcopy_set(skb, uarg, NULL);  
>>> Aren't you passing uarg through two function calls instead of doing it here.
>>> Doesn't even make it clearer what is going on.  
>>
>> Agree, to simplify patch, uarg could be set earlier (without passing it to functions) I guess.
>>
>> Thanks
>>
>>
>>> -- David
>>>  
>>>> -
>>>>  		virtio_transport_inc_tx_pkt(vvs, skb);
>>>>  
>>>>  		ret = t_ops->send_pkt(skb, info->net);
>>>> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
>>>>  					   le64_to_cpu(hdr->dst_cid),
>>>>  					   le32_to_cpu(hdr->dst_port),
>>>>  					   le64_to_cpu(hdr->src_cid),
>>>> -					   le32_to_cpu(hdr->src_port));
>>>> +					   le32_to_cpu(hdr->src_port), NULL);
>>>>  	if (!reply)
>>>>  		return -ENOMEM;
>>>>    

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-06-08 18:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-05 11:53 [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling Arseniy Krasnov
2026-06-05 15:08 ` David Laight
2026-06-08  8:10   ` Arseniy Krasnov
2026-06-08  9:37     ` David Laight
2026-06-08 18:20       ` Arseniy Krasnov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.