All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Laight <david.laight.linux@gmail.com>
To: Arseniy Krasnov <avkrasnov@rulkc.org>
Cc: "Stefan Hajnoczi" <stefanha@redhat.com>,
	"Stefano Garzarella" <sgarzare@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Jason Wang" <jasowang@redhat.com>,
	"Bobby Eshleman" <bobby.eshleman@bytedance.com>,
	"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
	"Eugenio Pérez" <eperezma@redhat.com>,
	"Simon Horman" <horms@kernel.org>,
	kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	oxffffaa@gmail.com, rulkc@linuxtesting.org
Subject: Re: [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling
Date: Fri, 5 Jun 2026 16:08:51 +0100	[thread overview]
Message-ID: <20260605160851.3ddbd2ed@pumpkin> (raw)
In-Reply-To: <20260605115314.552321-1-avkrasnov@rulkc.org>

On Fri,  5 Jun 2026 14:53:14 +0300
Arseniy Krasnov <avkrasnov@rulkc.org> wrote:

> Logically it was based on TCP implementation, so to make further
> support easier, rewrite it in the TCP way.
> 
> Signed-off-by: Arseniy Krasnov <avkrasnov@rulkc.org>
> ---
>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
>  1 file changed, 32 insertions(+), 32 deletions(-)
> 
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index 2fd9eaaf5ca6..00caeeaa5590 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
>  static int virtio_transport_fill_skb(struct sk_buff *skb,
>  				     struct virtio_vsock_pkt_info *info,
>  				     size_t len,
> -				     bool zcopy)
> +				     bool zcopy, struct ubuf_info *uarg)
>  {
>  	struct msghdr *msg = info->msg;
>  
> +	/* We have completion - attach it to 'skb'. */
> +	skb_zcopy_set(skb, uarg, NULL);
> +
>  	if (zcopy)
>  		return __zerocopy_sg_from_iter(msg, NULL, skb,
>  					       &msg->msg_iter, len, NULL);
> @@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>  						  u32 src_cid,
>  						  u32 src_port,
>  						  u32 dst_cid,
> -						  u32 dst_port)
> +						  u32 dst_port,
> +						  struct ubuf_info *uarg)
>  {
>  	struct vsock_sock *vsk;
>  	struct sk_buff *skb;
> @@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>  	if (info->msg && payload_len > 0) {
>  		int err;
>  
> -		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
> +		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
>  		if (err)
>  			goto out;
>  
> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>  	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>  		return pkt_len;
>  
> -	if (info->msg) {
> -		/* If zerocopy is not enabled by 'setsockopt()', we behave as
> -		 * there is no MSG_ZEROCOPY flag set.
> +	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
> +		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
> +		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
> +		 * handling from 'tcp_sendmsg_locked()'.
>  		 */
> -		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
> -			info->msg->msg_flags &= ~MSG_ZEROCOPY;
> +		if (info->msg->msg_ubuf) {
> +			uarg = info->msg->msg_ubuf;
> +			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
> +		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
> +			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
> +						    NULL, false);
> +			if (!uarg) {
> +				virtio_transport_put_credit(vvs, pkt_len);
> +				return -ENOMEM;
> +			}
>  
> -		if (info->msg->msg_flags & MSG_ZEROCOPY)
>  			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>  
> +			if (!can_zcopy)
> +				uarg_to_msgzc(uarg)->zerocopy = 0;
> +
> +			have_uref = true;
> +		}
> +
> +		/* 'can_zcopy' means that this transmission will be
> +		 * in zerocopy way (e.g. using 'frags' array).
> +		 */

I've not looked at the tcp code, but the above doesn't look right.
I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
That would give the outer code a callback when the last skb is freed but
still copy the data.

I also don't see the point of calling msg_zerocopy_realloc() to get a
callback when the last skb is freed and then setting
	uarg_to_msgzc(uarg)->zerocopy = 0;
so that the callback doesn't actually do anything.
It isn't as though you 'find out' later on that you can't actually do
zerocopy.

>  		if (can_zcopy)
>  			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
>  					    (MAX_SKB_FRAGS * PAGE_SIZE));
> -
> -		if (info->msg->msg_flags & MSG_ZEROCOPY &&
> -		    info->op == VIRTIO_VSOCK_OP_RW) {
> -			uarg = info->msg->msg_ubuf;
> -
> -			if (!uarg) {
> -				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
> -							    pkt_len, NULL, false);
> -				if (!uarg) {
> -					virtio_transport_put_credit(vvs, pkt_len);
> -					return -ENOMEM;
> -				}
> -
> -				if (!can_zcopy)
> -					uarg_to_msgzc(uarg)->zerocopy = 0;
> -
> -				have_uref = true;
> -			}
> -		}
>  	}
>  
>  	rest_len = pkt_len;
> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>  
>  		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
>  						 src_cid, src_port,
> -						 dst_cid, dst_port);
> +						 dst_cid, dst_port, uarg);
>  		if (!skb) {
>  			ret = -ENOMEM;
>  			break;
>  		}
>  
> -		skb_zcopy_set(skb, uarg, NULL);

Aren't you passing uarg through two function calls instead of doing it here.
Doesn't even make it clearer what is going on.

-- David

> -
>  		virtio_transport_inc_tx_pkt(vvs, skb);
>  
>  		ret = t_ops->send_pkt(skb, info->net);
> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
>  					   le64_to_cpu(hdr->dst_cid),
>  					   le32_to_cpu(hdr->dst_port),
>  					   le64_to_cpu(hdr->src_cid),
> -					   le32_to_cpu(hdr->src_port));
> +					   le32_to_cpu(hdr->src_port), NULL);
>  	if (!reply)
>  		return -ENOMEM;
>  


  reply	other threads:[~2026-06-05 15:09 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-05 11:53 [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling Arseniy Krasnov
2026-06-05 15:08 ` David Laight [this message]
2026-06-08  8:10   ` Arseniy Krasnov
2026-06-08  9:37     ` David Laight
2026-06-08 18:20       ` Arseniy Krasnov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260605160851.3ddbd2ed@pumpkin \
    --to=david.laight.linux@gmail.com \
    --cc=avkrasnov@rulkc.org \
    --cc=bobby.eshleman@bytedance.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=eperezma@redhat.com \
    --cc=horms@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=oxffffaa@gmail.com \
    --cc=pabeni@redhat.com \
    --cc=rulkc@linuxtesting.org \
    --cc=sgarzare@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=xuanzhuo@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.