From: "Michael S. Tsirkin" <mst@redhat.com>
To: Jason Wang <jasowang@redhat.com>
Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets
Date: Mon, 21 May 2018 17:33:52 +0300 [thread overview]
Message-ID: <20180521173253-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <1526893473-20128-13-git-send-email-jasowang@redhat.com>
On Mon, May 21, 2018 at 05:04:33PM +0800, Jason Wang wrote:
> This patch implements XDP batching for vhost_net with tun. This is
> done by batching XDP buffs in vhost and submit them when:
>
> - vhost_net can not build XDP buff (mostly because of the size of packet)
> - #batched exceeds the limitation (VHOST_NET_RX_BATCH).
> - tun accept a batch of XDP buff through msg_control and process them
> in a batch
>
> With this tun XDP can benefit from e.g batch transmission during
> XDP_REDIRECT or XDP_TX.
>
> Tests shows 21% improvement on TX pps (from ~3.2Mpps to ~3.9Mpps)
> while transmitting through testpmd from guest to host by
> xdp_redirect_map between tap0 and ixgbe.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
s/underlayer/underlying/ ?
> ---
> drivers/net/tun.c | 36 +++++++++++++++++----------
> drivers/vhost/net.c | 71 ++++++++++++++++++++++++++++++++++++-----------------
> 2 files changed, 71 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index b586b3f..5d16d18 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1616,7 +1616,6 @@ static u32 tun_do_xdp(struct tun_struct *tun,
> switch (act) {
> case XDP_REDIRECT:
> *err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
> - xdp_do_flush_map();
> if (*err)
> break;
> goto out;
> @@ -1624,7 +1623,6 @@ static u32 tun_do_xdp(struct tun_struct *tun,
> *err = tun_xdp_tx(tun->dev, xdp);
> if (*err)
> break;
> - tun_xdp_flush(tun->dev);
> goto out;
> case XDP_PASS:
> goto out;
> @@ -2400,9 +2398,6 @@ static int tun_xdp_one(struct tun_struct *tun,
> int err = 0;
> bool skb_xdp = false;
>
> - preempt_disable();
> - rcu_read_lock();
> -
> xdp_prog = rcu_dereference(tun->xdp_prog);
> if (xdp_prog) {
> if (gso->gso_type) {
> @@ -2461,15 +2456,12 @@ static int tun_xdp_one(struct tun_struct *tun,
> tun_flow_update(tun, rxhash, tfile);
>
> out:
> - rcu_read_unlock();
> - preempt_enable();
> -
> return err;
> }
>
> static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> {
> - int ret;
> + int ret, i;
> struct tun_file *tfile = container_of(sock, struct tun_file, socket);
> struct tun_struct *tun = tun_get(tfile);
> struct tun_msg_ctl *ctl = m->msg_control;
> @@ -2477,10 +2469,28 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> if (!tun)
> return -EBADFD;
>
> - if (ctl && ctl->type == TUN_MSG_PTR) {
> - ret = tun_xdp_one(tun, tfile, ctl->ptr);
> - if (!ret)
> - ret = total_len;
> + if (ctl && ((ctl->type & 0xF) == TUN_MSG_PTR)) {
> + int n = ctl->type >> 16;
> +
> + preempt_disable();
> + rcu_read_lock();
> +
> + for (i = 0; i < n; i++) {
> + struct xdp_buff *x = (struct xdp_buff *)ctl->ptr;
> + struct xdp_buff *xdp = &x[i];
> +
> + xdp_set_data_meta_invalid(xdp);
> + xdp->rxq = &tfile->xdp_rxq;
> + tun_xdp_one(tun, tfile, xdp);
> + }
> +
> + xdp_do_flush_map();
> + tun_xdp_flush(tun->dev);
> +
> + rcu_read_unlock();
> + preempt_enable();
> +
> + ret = total_len;
> goto out;
> }
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 0d84de6..bec4109 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -118,6 +118,7 @@ struct vhost_net_virtqueue {
> struct ptr_ring *rx_ring;
> struct vhost_net_buf rxq;
> struct xdp_buff xdp[VHOST_RX_BATCH];
> + struct vring_used_elem heads[VHOST_RX_BATCH];
> };
>
> struct vhost_net {
> @@ -511,7 +512,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
> void *buf;
> int copied;
>
> - if (len < nvq->sock_hlen)
> + if (unlikely(len < nvq->sock_hlen))
> return -EFAULT;
>
> if (SKB_DATA_ALIGN(len + pad) +
> @@ -567,11 +568,37 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
> return 0;
> }
>
> +static void vhost_tx_batch(struct vhost_net *net,
> + struct vhost_net_virtqueue *nvq,
> + struct socket *sock,
> + struct msghdr *msghdr, int n)
> +{
> + struct tun_msg_ctl ctl = {
> + .type = n << 16 | TUN_MSG_PTR,
> + .ptr = nvq->xdp,
> + };
> + int err;
> +
> + if (n == 0)
> + return;
> +
> + msghdr->msg_control = &ctl;
> + err = sock->ops->sendmsg(sock, msghdr, 0);
> +
> + if (unlikely(err < 0)) {
> + /* FIXME vq_err() */
> + vq_err(&nvq->vq, "sendmsg err!\n");
> + return;
> + }
> + vhost_add_used_and_signal_n(&net->dev, &nvq->vq, nvq->vq.heads, n);
> +}
> +
> +/* Expects to be always run from workqueue - which acts as
> + * read-size critical section for our kind of RCU. */
> static void handle_tx_copy(struct vhost_net *net)
> {
> struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> struct vhost_virtqueue *vq = &nvq->vq;
> - struct xdp_buff xdp;
> unsigned out, in;
> int head;
> struct msghdr msg = {
> @@ -586,7 +613,6 @@ static void handle_tx_copy(struct vhost_net *net)
> size_t hdr_size;
> struct socket *sock;
> struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
> - struct tun_msg_ctl ctl;
> int sent_pkts = 0;
> s16 nheads = 0;
>
> @@ -631,22 +657,24 @@ static void handle_tx_copy(struct vhost_net *net)
> vq->heads[nheads].id = cpu_to_vhost32(vq, head);
> vq->heads[nheads].len = 0;
>
> - err = vhost_net_build_xdp(nvq, &msg.msg_iter, &xdp);
> - if (!err) {
> - ctl.type = TUN_MSG_PTR;
> - ctl.ptr = &xdp;
> - msg.msg_control = &ctl;
> - } else
> - msg.msg_control = NULL;
> -
> total_len += len;
> - if (total_len < VHOST_NET_WEIGHT &&
> - vhost_has_more_pkts(net, vq)) {
> - msg.msg_flags |= MSG_MORE;
> - } else {
> - msg.msg_flags &= ~MSG_MORE;
> + err = vhost_net_build_xdp(nvq, &msg.msg_iter,
> + &nvq->xdp[nheads]);
> + if (!err) {
> + if (++nheads == VHOST_RX_BATCH) {
> + vhost_tx_batch(net, nvq, sock, &msg, nheads);
> + nheads = 0;
> + }
> + goto done;
> + } else if (unlikely(err != -ENOSPC)) {
> + vq_err(vq, "Fail to build XDP buffer\n");
> + break;
> }
>
> + vhost_tx_batch(net, nvq, sock, &msg, nheads);
> + msg.msg_control = NULL;
> + nheads = 0;
> +
> /* TODO: Check specific error and bomb out unless ENOBUFS? */
> err = sock->ops->sendmsg(sock, &msg, len);
> if (unlikely(err < 0)) {
> @@ -657,11 +685,9 @@ static void handle_tx_copy(struct vhost_net *net)
> if (err != len)
> pr_debug("Truncated TX packet: "
> " len %d != %zd\n", err, len);
> - if (++nheads == VHOST_RX_BATCH) {
> - vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
> - nheads);
> - nheads = 0;
> - }
> +
> + vhost_add_used_and_signal(&net->dev, vq, head, 0);
> +done:
> if (vhost_exceeds_weight(++sent_pkts, total_len)) {
> vhost_poll_queue(&vq->poll);
> break;
> @@ -669,8 +695,7 @@ static void handle_tx_copy(struct vhost_net *net)
> }
> out:
> if (nheads)
> - vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
> - nheads);
> + vhost_tx_batch(net, nvq, sock, &msg, nheads);
> mutex_unlock(&vq->mutex);
> }
>
> --
> 2.7.4
next prev parent reply other threads:[~2018-05-21 14:33 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-21 9:04 [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 01/12] vhost_net: introduce helper to initialize tx iov iter Jason Wang
2018-05-21 16:24 ` Jesse Brandeburg
2018-05-22 12:26 ` Jason Wang
2018-05-22 12:26 ` Jason Wang
2018-05-21 16:24 ` Jesse Brandeburg
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 02/12] vhost_net: introduce vhost_exceeds_weight() Jason Wang
2018-05-21 16:29 ` Jesse Brandeburg
2018-05-21 16:29 ` Jesse Brandeburg
2018-05-22 12:27 ` Jason Wang
2018-05-22 12:27 ` Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 03/12] vhost_net: introduce vhost_has_more_pkts() Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 16:39 ` Jesse Brandeburg
2018-05-22 12:31 ` Jason Wang
2018-05-22 12:31 ` Jason Wang
2018-05-21 16:39 ` Jesse Brandeburg
2018-05-21 9:04 ` [RFC PATCH net-next 04/12] vhost_net: split out datacopy logic Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 16:46 ` Jesse Brandeburg
2018-05-21 16:46 ` Jesse Brandeburg
2018-05-22 12:39 ` Jason Wang
2018-05-22 12:39 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 05/12] vhost_net: batch update used ring for datacopy TX Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 06/12] tuntap: enable premmption early Jason Wang
2018-05-21 14:32 ` Michael S. Tsirkin
2018-05-21 14:32 ` Michael S. Tsirkin
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 07/12] tuntap: simplify error handling in tun_build_skb() Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 08/12] tuntap: tweak on the path of non-xdp case " Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 09/12] tuntap: split out XDP logic Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 10/12] vhost_net: build xdp buff Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 16:56 ` Jesse Brandeburg
2018-05-21 22:21 ` Michael S. Tsirkin
2018-05-21 22:21 ` Michael S. Tsirkin
2018-05-22 12:41 ` Jason Wang
2018-05-22 12:41 ` Jason Wang
2018-05-21 16:56 ` Jesse Brandeburg
2018-05-21 9:04 ` [RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets Jason Wang
2018-05-21 9:04 ` Jason Wang
2018-05-21 14:33 ` Michael S. Tsirkin
2018-05-21 14:33 ` Michael S. Tsirkin [this message]
2018-05-25 17:53 ` [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Michael S. Tsirkin
2018-05-25 17:53 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180521173253-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.