From: jiangyiwen <jiangyiwen@huawei.com>
To: Jason Wang <jasowang@redhat.com>, <stefanha@redhat.com>
Cc: <netdev@vger.kernel.org>, <kvm@vger.kernel.org>,
<virtualization@lists.linux-foundation.org>
Subject: Re: [PATCH 3/5] VSOCK: support receive mergeable rx buffer in guest
Date: Tue, 6 Nov 2018 14:41:54 +0800 [thread overview]
Message-ID: <5BE137B2.5040305@huawei.com> (raw)
In-Reply-To: <791d67e7-ad95-38e4-0d38-2b7c54d68040@redhat.com>
On 2018/11/6 12:00, Jason Wang wrote:
>
> On 2018/11/5 下午3:47, jiangyiwen wrote:
>> Guest receive mergeable rx buffer, it can merge
>> scatter rx buffer into a big buffer and then copy
>> to user space.
>>
>> Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
>> ---
>> include/linux/virtio_vsock.h | 9 ++++
>> net/vmw_vsock/virtio_transport.c | 75 +++++++++++++++++++++++++++++----
>> net/vmw_vsock/virtio_transport_common.c | 59 ++++++++++++++++++++++----
>> 3 files changed, 127 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>> index da9e1fe..6be3cd7 100644
>> --- a/include/linux/virtio_vsock.h
>> +++ b/include/linux/virtio_vsock.h
>> @@ -13,6 +13,8 @@
>> #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4)
>> #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL
>> #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64)
>> +/* virtio_vsock_pkt + max_pkt_len(default MAX_PKT_BUF_SIZE) */
>> +#define VIRTIO_VSOCK_MAX_MRG_BUF_NUM ((VIRTIO_VSOCK_MAX_PKT_BUF_SIZE / PAGE_SIZE) + 1)
>>
>> /* Virtio-vsock feature */
>> #define VIRTIO_VSOCK_F_MRG_RXBUF 0 /* Host can merge receive buffers. */
>> @@ -48,6 +50,11 @@ struct virtio_vsock_sock {
>> struct list_head rx_queue;
>> };
>>
>> +struct virtio_vsock_mrg_rxbuf {
>> + void *buf;
>> + u32 len;
>> +};
>> +
>> struct virtio_vsock_pkt {
>> struct virtio_vsock_hdr hdr;
>> struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>> @@ -59,6 +66,8 @@ struct virtio_vsock_pkt {
>> u32 len;
>> u32 off;
>> bool reply;
>> + bool mergeable;
>> + struct virtio_vsock_mrg_rxbuf mrg_rxbuf[VIRTIO_VSOCK_MAX_MRG_BUF_NUM];
>> };
>
>
> It's better to use iov here I think, and drop buf completely.
>
> And this is better to be done in an independent patch.
>
You're right, I can use kvec instead of customized structure,
in addition, I don't understand about drop buf completely and
an independent patch.
Thanks.
>
>>
>> struct virtio_vsock_pkt_info {
>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>> index 2040a9e..3557ad3 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -359,11 +359,62 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
>> return val < virtqueue_get_vring_size(vq);
>> }
>>
>> +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq,
>> + struct virtio_vsock *vsock, unsigned int *total_len)
>> +{
>> + struct virtio_vsock_pkt *pkt;
>> + u16 num_buf;
>> + void *page;
>> + unsigned int len;
>> + int i = 0;
>> +
>> + page = virtqueue_get_buf(vq, &len);
>> + if (!page)
>> + return NULL;
>> +
>> + *total_len = len;
>> + vsock->rx_buf_nr--;
>> +
>> + pkt = page;
>> + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers);
>> + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_MRG_BUF_NUM)
>> + goto err;
>> +
>> + pkt->mergeable = true;
>> + if (!le32_to_cpu(pkt->hdr.len))
>> + return pkt;
>> +
>> + len -= sizeof(struct virtio_vsock_pkt);
>> + pkt->mrg_rxbuf[i].buf = page + sizeof(struct virtio_vsock_pkt);
>> + pkt->mrg_rxbuf[i].len = len;
>> + i++;
>> +
>> + while (--num_buf) {
>> + page = virtqueue_get_buf(vq, &len);
>> + if (!page)
>> + goto err;
>> +
>> + *total_len += len;
>> + vsock->rx_buf_nr--;
>> +
>> + pkt->mrg_rxbuf[i].buf = page;
>> + pkt->mrg_rxbuf[i].len = len;
>> + i++;
>> + }
>> +
>> + return pkt;
>> +err:
>> + virtio_transport_free_pkt(pkt);
>> + return NULL;
>> +}
>
>
> Similar logic could be found at virtio-net driver.
>
> Haven't thought this deeply, but it looks to me use virtio-net driver is also possible, e.g for data-path, just register vsock specific callbacks.
>
>
>> +
>> static void virtio_transport_rx_work(struct work_struct *work)
>> {
>> struct virtio_vsock *vsock =
>> container_of(work, struct virtio_vsock, rx_work);
>> struct virtqueue *vq;
>> + size_t vsock_hlen = vsock->mergeable ? sizeof(struct virtio_vsock_pkt) :
>> + sizeof(struct virtio_vsock_hdr);
>>
>> vq = vsock->vqs[VSOCK_VQ_RX];
>>
>> @@ -383,21 +434,29 @@ static void virtio_transport_rx_work(struct work_struct *work)
>> goto out;
>> }
>>
>> - pkt = virtqueue_get_buf(vq, &len);
>> - if (!pkt) {
>> - break;
>> - }
>> + if (likely(vsock->mergeable)) {
>> + pkt = receive_mergeable(vq, vsock, &len);
>> + if (!pkt)
>> + break;
>>
>> - vsock->rx_buf_nr--;
>> + pkt->len = le32_to_cpu(pkt->hdr.len);
>> + } else {
>> + pkt = virtqueue_get_buf(vq, &len);
>> + if (!pkt) {
>> + break;
>> + }
>> +
>> + vsock->rx_buf_nr--;
>> + }
>>
>> /* Drop short/long packets */
>> - if (unlikely(len < sizeof(pkt->hdr) ||
>> - len > sizeof(pkt->hdr) + pkt->len)) {
>> + if (unlikely(len < vsock_hlen ||
>> + len > vsock_hlen + pkt->len)) {
>> virtio_transport_free_pkt(pkt);
>> continue;
>> }
>>
>> - pkt->len = len - sizeof(pkt->hdr);
>> + pkt->len = len - vsock_hlen;
>> virtio_transport_deliver_tap_pkt(pkt);
>> virtio_transport_recv_pkt(pkt);
>> }
>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> index 3ae3a33..7bef1d5 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -272,14 +272,49 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
>> */
>> spin_unlock_bh(&vvs->rx_lock);
>>
>> - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
>> - if (err)
>> - goto out;
>> + if (pkt->mergeable) {
>> + struct virtio_vsock_mrg_rxbuf *buf = pkt->mrg_rxbuf;
>> + size_t mrg_copy_bytes, last_buf_total = 0, rxbuf_off;
>> + size_t tmp_bytes = bytes;
>> + int i;
>> +
>> + for (i = 0; i < le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); i++) {
>> + if (pkt->off > last_buf_total + buf[i].len) {
>> + last_buf_total += buf[i].len;
>> + continue;
>> + }
>> +
>> + rxbuf_off = pkt->off - last_buf_total;
>> + mrg_copy_bytes = min(buf[i].len - rxbuf_off, tmp_bytes);
>> + err = memcpy_to_msg(msg, buf[i].buf + rxbuf_off, mrg_copy_bytes);
>> + if (err)
>> + goto out;
>> +
>> + tmp_bytes -= mrg_copy_bytes;
>> + pkt->off += mrg_copy_bytes;
>> + last_buf_total += buf[i].len;
>> + if (!tmp_bytes)
>> + break;
>> + }
>
>
> After switch to use iov, you can user iov_iter helper to avoid the above open-coding I believe.
>
> And you can also drop the if (mergeable) condition.
>
> Thanks
>
Thanks, I will try to use iov instead.
>
>> +
>> + if (tmp_bytes) {
>> + printk(KERN_WARNING "WARNING! bytes = %llu, "
>> + "bytes = %llu\n",
>> + (unsigned long long)bytes,
>> + (unsigned long long)tmp_bytes);
>> + }
>> +
>> + total += (bytes - tmp_bytes);
>> + } else {
>> + err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
>> + if (err)
>> + goto out;
>> +
>> + total += bytes;
>> + pkt->off += bytes;
>> + }
>>
>> spin_lock_bh(&vvs->rx_lock);
>> -
>> - total += bytes;
>> - pkt->off += bytes;
>> if (pkt->off == pkt->len) {
>> virtio_transport_dec_rx_pkt(vvs, pkt);
>> list_del(&pkt->list);
>> @@ -1050,8 +1085,16 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
>>
>> void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
>> {
>> - kfree(pkt->buf);
>> - kfree(pkt);
>> + int i;
>> +
>> + if (pkt->mergeable) {
>> + for (i = 1; i < le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); i++)
>> + free_page((unsigned long)pkt->mrg_rxbuf[i].buf);
>> + free_page((unsigned long)(void *)pkt);
>> + } else {
>> + kfree(pkt->buf);
>> + kfree(pkt);
>> + }
>> }
>> EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
>>
>
> .
>
next prev parent reply other threads:[~2018-11-06 6:41 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-05 7:47 [PATCH 3/5] VSOCK: support receive mergeable rx buffer in guest jiangyiwen
2018-11-05 7:47 ` jiangyiwen
2018-11-06 4:00 ` Jason Wang
2018-11-06 6:41 ` jiangyiwen [this message]
2018-11-07 6:20 ` Jason Wang
2018-11-07 6:20 ` Jason Wang
2018-11-07 7:07 ` jiangyiwen
2018-11-07 7:07 ` jiangyiwen
2018-11-07 13:29 ` Jason Wang
2018-11-07 13:29 ` Jason Wang
2018-11-06 6:41 ` jiangyiwen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5BE137B2.5040305@huawei.com \
--to=jiangyiwen@huawei.com \
--cc=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=stefanha@redhat.com \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.