All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tiwei Bie <tiwei.bie@intel.com>
To: Flavio Leitner <fbl@sysclose.org>
Cc: dev@dpdk.org, Ilya Maximets <i.maximets@ovn.org>,
	Maxime Coquelin <maxime.coquelin@redhat.com>,
	Shahaf Shuler <shahafs@mellanox.com>,
	David Marchand <david.marchand@redhat.com>,
	Obrembski MichalX <michalx.obrembski@intel.com>,
	Stokes Ian <ian.stokes@intel.com>
Subject: Re: [dpdk-dev] [PATCH v3] vhost: add support for large buffers
Date: Mon, 14 Oct 2019 10:44:28 +0800	[thread overview]
Message-ID: <20191014024428.GA13124@___> (raw)
In-Reply-To: <20191011170947.30656-1-fbl@sysclose.org>

On Fri, Oct 11, 2019 at 02:09:47PM -0300, Flavio Leitner wrote:
> The rte_vhost_dequeue_burst supports two ways of dequeuing data.
> If the data fits into a buffer, then all data is copied and a
> single linear buffer is returned. Otherwise it allocates
> additional mbufs and chains them together to return a multiple
> segments mbuf.
> 
> While that covers most use cases, it forces applications that
> need to work with larger data sizes to support multiple segments
> mbufs. The non-linear characteristic brings complexity and
> performance implications to the application.
> 
> To resolve the issue, add support to attach external buffer
> to a pktmbuf and let the host provide during registration if
> attaching an external buffer to pktmbuf is supported and if
> only linear buffer are supported.
> 
> Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> ---
>  doc/guides/prog_guide/vhost_lib.rst |  35 +++++++++
>  lib/librte_vhost/rte_vhost.h        |   4 ++
>  lib/librte_vhost/socket.c           |  22 ++++++
>  lib/librte_vhost/vhost.c            |  22 ++++++
>  lib/librte_vhost/vhost.h            |   4 ++
>  lib/librte_vhost/virtio_net.c       | 108 ++++++++++++++++++++++++----
>  6 files changed, 181 insertions(+), 14 deletions(-)

Thanks for the new version!
Overall looks good to me, few minor comments inlined.


> +static int
> +virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint16_t size)
> +{
> +	struct rte_mbuf_ext_shared_info *shinfo = NULL;
> +	uint16_t buf_len = 0;
> +	rte_iova_t iova;
> +	void *buf;
> +
> +	/* Try to use pkt buffer to store shinfo to reduce the amount of memory
> +	 * required, otherwise store shinfo in the new buffer.
> +	 */
> +	if (rte_pktmbuf_tailroom(pkt) > sizeof(*shinfo))

How about "rte_pktmbuf_tailroom(pkt) >= sizeof(*shinfo)"?

> +		shinfo = rte_pktmbuf_mtod(pkt,
> +					  struct rte_mbuf_ext_shared_info *);
> +	else
> +		buf_len += sizeof(*shinfo);

The space taken by rte_pktmbuf_ext_shinfo_init_helper() may
be bigger than this depending on buf_end.

https://github.com/DPDK/dpdk/blob/cc8627bc6d7a7bd3ccc2653b746aac4fbaa0bc50/lib/librte_mbuf/rte_mbuf.h#L1585-L1589

> +
> +	if (unlikely(buf_len + size + RTE_PKTMBUF_HEADROOM > UINT16_MAX)) {
> +		RTE_LOG(ERR, VHOST_DATA,

We may have fallback available when extbuf alloc fails,
logging this as error may be too noisy.

> +			"buffer size %d exceeded maximum.\n", buf_len);
> +		return -ENOSPC;
> +	}
> +
> +	buf_len += size + RTE_PKTMBUF_HEADROOM;
> +	buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
> +	if (unlikely(buf == NULL))
> +		return -ENOMEM;
> +
> +	/* initialize shinfo */
> +	if (shinfo) {
> +		shinfo->free_cb = virtio_dev_extbuf_free;
> +		shinfo->fcb_opaque = buf;
> +		rte_mbuf_ext_refcnt_set(shinfo, 1);
> +	} else {
> +		shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> +					      virtio_dev_extbuf_free, buf);
> +		if (unlikely(shinfo == NULL)) {
> +			RTE_LOG(ERR, VHOST_DATA, "Failed to init shinfo\n");
> +			return -1;

buf should be freed.


> @@ -1343,26 +1429,23 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  	for (i = 0; i < count; i++) {
>  		struct buf_vector buf_vec[BUF_VECTOR_MAX];
>  		uint16_t head_idx;
> -		uint32_t dummy_len;
> +		uint32_t buf_len;
>  		uint16_t nr_vec = 0;
>  		int err;
>  
>  		if (unlikely(fill_vec_buf_split(dev, vq,
>  						vq->last_avail_idx + i,
>  						&nr_vec, buf_vec,
> -						&head_idx, &dummy_len,
> +						&head_idx, &buf_len,
>  						VHOST_ACCESS_RO) < 0))
>  			break;
>  
>  		if (likely(dev->dequeue_zero_copy == 0))
>  			update_shadow_used_ring_split(vq, head_idx, 0);
>  
> -		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
> -		if (unlikely(pkts[i] == NULL)) {
> -			RTE_LOG(ERR, VHOST_DATA,
> -				"Failed to allocate memory for mbuf.\n");
> +		pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);

The len in descriptor set by guests is 32bit, we may not
want to trim it to 16bit unconditionally like this.

> +		if (unlikely(pkts[i] == NULL))
>  			break;
> -		}
>  
>  		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
>  				mbuf_pool);
> @@ -1451,14 +1534,14 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  	for (i = 0; i < count; i++) {
>  		struct buf_vector buf_vec[BUF_VECTOR_MAX];
>  		uint16_t buf_id;
> -		uint32_t dummy_len;
> +		uint32_t buf_len;
>  		uint16_t desc_count, nr_vec = 0;
>  		int err;
>  
>  		if (unlikely(fill_vec_buf_packed(dev, vq,
>  						vq->last_avail_idx, &desc_count,
>  						buf_vec, &nr_vec,
> -						&buf_id, &dummy_len,
> +						&buf_id, &buf_len,
>  						VHOST_ACCESS_RO) < 0))
>  			break;
>  
> @@ -1466,12 +1549,9 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  			update_shadow_used_ring_packed(vq, buf_id, 0,
>  					desc_count);
>  
> -		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
> -		if (unlikely(pkts[i] == NULL)) {
> -			RTE_LOG(ERR, VHOST_DATA,
> -				"Failed to allocate memory for mbuf.\n");
> +		pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);

Ditto.

> +		if (unlikely(pkts[i] == NULL))
>  			break;
> -		}
>  
>  		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
>  				mbuf_pool);
> -- 
> 2.20.1
> 

  reply	other threads:[~2019-10-14  2:47 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-01 22:19 [dpdk-dev] [PATCH] vhost: add support to large linear mbufs Flavio Leitner
2019-10-01 23:10 ` Flavio Leitner
2019-10-02  4:45 ` Shahaf Shuler
2019-10-02  8:04   ` David Marchand
2019-10-02  9:00     ` Shahaf Shuler
2019-10-02 12:58       ` Flavio Leitner
2019-10-02 17:50         ` Shahaf Shuler
2019-10-02 18:15           ` Flavio Leitner
2019-10-03 16:57             ` Ilya Maximets
2019-10-03 21:25               ` Flavio Leitner
2019-10-02  7:51 ` Maxime Coquelin
2019-10-04 20:10 ` [dpdk-dev] [PATCH v2] vhost: add support for large buffers Flavio Leitner
2019-10-06  4:47   ` Shahaf Shuler
2019-10-10  5:12   ` Tiwei Bie
2019-10-10 12:12     ` Flavio Leitner
2019-10-11 17:09   ` [dpdk-dev] [PATCH v3] " Flavio Leitner
2019-10-14  2:44     ` Tiwei Bie [this message]
2019-10-15 16:17     ` [dpdk-dev] [PATCH v4] " Flavio Leitner
2019-10-15 17:41       ` Ilya Maximets
2019-10-15 18:44         ` Flavio Leitner
2019-10-15 18:59       ` [dpdk-dev] [PATCH v5] " Flavio Leitner
2019-10-16 10:02         ` Maxime Coquelin
2019-10-16 11:13         ` Maxime Coquelin
2019-10-16 13:32           ` Ilya Maximets
2019-10-16 13:46             ` Maxime Coquelin
2019-10-16 14:02               ` Flavio Leitner
2019-10-16 14:08                 ` Ilya Maximets
2019-10-16 14:14                   ` Flavio Leitner
2019-10-16 14:05               ` Ilya Maximets
2019-10-29  9:02         ` David Marchand
2019-10-29 12:21           ` Flavio Leitner
2019-10-29 16:19             ` David Marchand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191014024428.GA13124@___ \
    --to=tiwei.bie@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=fbl@sysclose.org \
    --cc=i.maximets@ovn.org \
    --cc=ian.stokes@intel.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=michalx.obrembski@intel.com \
    --cc=shahafs@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.