From: "Michael S. Tsirkin" <mst@redhat.com>
To: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: netdev@vger.kernel.org, "Jason Wang" <jasowang@redhat.com>,
"Eugenio Pérez" <eperezma@redhat.com>,
"David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Alexei Starovoitov" <ast@kernel.org>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Jesper Dangaard Brouer" <hawk@kernel.org>,
"John Fastabend" <john.fastabend@gmail.com>,
virtualization@lists.linux.dev, bpf@vger.kernel.org
Subject: Re: [PATCH net-next v4 09/15] virtio_net: xsk: bind/unbind xsk
Date: Wed, 12 Jun 2024 19:43:01 -0400 [thread overview]
Message-ID: <20240612194235-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20240611114147.31320-10-xuanzhuo@linux.alibaba.com>
On Tue, Jun 11, 2024 at 07:41:41PM +0800, Xuan Zhuo wrote:
> This patch implement the logic of bind/unbind xsk pool to sq and rq.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
I'd just squash with previous patch. This one is hard to review in
isolation.
> ---
> drivers/net/virtio_net.c | 199 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 199 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4968ab7eb5a4..c82a0691632c 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -26,6 +26,7 @@
> #include <net/netdev_rx_queue.h>
> #include <net/netdev_queues.h>
> #include <uapi/linux/virtio_ring.h>
> +#include <net/xdp_sock_drv.h>
>
> static int napi_weight = NAPI_POLL_WEIGHT;
> module_param(napi_weight, int, 0444);
> @@ -57,6 +58,8 @@ DECLARE_EWMA(pkt_len, 0, 64)
>
> #define VIRTNET_DRIVER_VERSION "1.0.0"
>
> +static struct virtio_net_hdr_mrg_rxbuf xsk_hdr;
> +
> static const unsigned long guest_offloads[] = {
> VIRTIO_NET_F_GUEST_TSO4,
> VIRTIO_NET_F_GUEST_TSO6,
> @@ -320,6 +323,12 @@ struct send_queue {
> bool premapped;
>
> struct virtnet_sq_dma_info dmainfo;
> +
> + struct {
> + struct xsk_buff_pool *pool;
> +
> + dma_addr_t hdr_dma_address;
> + } xsk;
> };
>
> /* Internal representation of a receive virtqueue */
> @@ -371,6 +380,13 @@ struct receive_queue {
>
> /* Record the last dma info to free after new pages is allocated. */
> struct virtnet_rq_dma *last_dma;
> +
> + struct {
> + struct xsk_buff_pool *pool;
> +
> + /* xdp rxq used by xsk */
> + struct xdp_rxq_info xdp_rxq;
> + } xsk;
> };
>
> /* This structure can contain rss message with maximum settings for indirection table and keysize
> @@ -5168,6 +5184,187 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
> return virtnet_set_guest_offloads(vi, offloads);
> }
>
> +static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
> + struct xsk_buff_pool *pool)
> +{
> + int err, qindex;
> +
> + qindex = rq - vi->rq;
> +
> + if (pool) {
> + err = xdp_rxq_info_reg(&rq->xsk.xdp_rxq, vi->dev, qindex, rq->napi.napi_id);
> + if (err < 0)
> + return err;
> +
> + err = xdp_rxq_info_reg_mem_model(&rq->xsk.xdp_rxq,
> + MEM_TYPE_XSK_BUFF_POOL, NULL);
> + if (err < 0) {
> + xdp_rxq_info_unreg(&rq->xsk.xdp_rxq);
> + return err;
> + }
> +
> + xsk_pool_set_rxq_info(pool, &rq->xsk.xdp_rxq);
> + }
> +
> + virtnet_rx_pause(vi, rq);
> +
> + err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf);
> + if (err) {
> + netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
> +
> + pool = NULL;
> + }
> +
> + if (!pool)
> + xdp_rxq_info_unreg(&rq->xsk.xdp_rxq);
> +
> + rq->xsk.pool = pool;
> +
> + virtnet_rx_resume(vi, rq);
> +
> + return err;
> +}
> +
> +static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi,
> + struct send_queue *sq,
> + struct xsk_buff_pool *pool)
> +{
> + int err, qindex;
> +
> + qindex = sq - vi->sq;
> +
> + virtnet_tx_pause(vi, sq);
> +
> + err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf);
> + if (err)
> + netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err);
> + else
> + err = virtnet_sq_set_premapped(sq, !!pool);
> +
> + if (err)
> + pool = NULL;
> +
> + sq->xsk.pool = pool;
> +
> + virtnet_tx_resume(vi, sq);
> +
> + return err;
> +}
> +
> +static int virtnet_xsk_pool_enable(struct net_device *dev,
> + struct xsk_buff_pool *pool,
> + u16 qid)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + struct receive_queue *rq;
> + struct send_queue *sq;
> + struct device *dma_dev;
> + dma_addr_t hdr_dma;
> + int err;
> +
> + /* In big_packets mode, xdp cannot work, so there is no need to
> + * initialize xsk of rq.
> + *
> + * Support for small mode firstly.
> + */
> + if (vi->big_packets)
> + return -ENOENT;
> +
> + if (qid >= vi->curr_queue_pairs)
> + return -EINVAL;
> +
> + sq = &vi->sq[qid];
> + rq = &vi->rq[qid];
> +
> + /* xsk tx zerocopy depend on the tx napi.
> + *
> + * All xsk packets are actually consumed and sent out from the xsk tx
> + * queue under the tx napi mechanism.
> + */
> + if (!sq->napi.weight)
> + return -EPERM;
> +
> + /* For the xsk, the tx and rx should have the same device. But
> + * vq->dma_dev allows every vq has the respective dma dev. So I check
> + * the dma dev of vq and sq is the same dev.
> + */
> + if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq))
> + return -EPERM;
> +
> + dma_dev = virtqueue_dma_dev(rq->vq);
> + if (!dma_dev)
> + return -EPERM;
> +
> + hdr_dma = dma_map_single(dma_dev, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE);
> + if (dma_mapping_error(dma_dev, hdr_dma))
> + return -ENOMEM;
> +
> + err = xsk_pool_dma_map(pool, dma_dev, 0);
> + if (err)
> + goto err_xsk_map;
> +
> + err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
> + if (err)
> + goto err_rq;
> +
> + err = virtnet_sq_bind_xsk_pool(vi, sq, pool);
> + if (err)
> + goto err_sq;
> +
> + /* Now, we do not support tx offset, so all the tx virtnet hdr is zero.
> + * So all the tx packets can share a single hdr.
> + */
> + sq->xsk.hdr_dma_address = hdr_dma;
> +
> + return 0;
> +
> +err_sq:
> + virtnet_rq_bind_xsk_pool(vi, rq, NULL);
> +err_rq:
> + xsk_pool_dma_unmap(pool, 0);
> +err_xsk_map:
> + dma_unmap_single(dma_dev, hdr_dma, vi->hdr_len, DMA_TO_DEVICE);
> + return err;
> +}
> +
> +static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + struct xsk_buff_pool *pool;
> + struct device *dma_dev;
> + struct receive_queue *rq;
> + struct send_queue *sq;
> + int err1, err2;
> +
> + if (qid >= vi->curr_queue_pairs)
> + return -EINVAL;
> +
> + sq = &vi->sq[qid];
> + rq = &vi->rq[qid];
> +
> + pool = sq->xsk.pool;
> +
> + err1 = virtnet_sq_bind_xsk_pool(vi, sq, NULL);
> + err2 = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
> +
> + xsk_pool_dma_unmap(pool, 0);
> +
> + dma_dev = virtqueue_dma_dev(rq->vq);
> +
> + dma_unmap_single(dma_dev, sq->xsk.hdr_dma_address, vi->hdr_len, DMA_TO_DEVICE);
> +
> + return err1 | err2;
> +}
> +
> +static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp)
> +{
> + if (xdp->xsk.pool)
> + return virtnet_xsk_pool_enable(dev, xdp->xsk.pool,
> + xdp->xsk.queue_id);
> + else
> + return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id);
> +}
> +
> static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> struct netlink_ext_ack *extack)
> {
> @@ -5293,6 +5490,8 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> switch (xdp->command) {
> case XDP_SETUP_PROG:
> return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
> + case XDP_SETUP_XSK_POOL:
> + return virtnet_xsk_pool_setup(dev, xdp);
> default:
> return -EINVAL;
> }
> --
> 2.32.0.3.g01195cf9f
next prev parent reply other threads:[~2024-06-12 23:43 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-11 11:41 [PATCH net-next v4 00/15] virtio-net: support AF_XDP zero copy Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 01/15] virtio_ring: introduce dma map api for page Xuan Zhuo
2024-06-12 23:20 ` Jakub Kicinski
2024-06-13 2:38 ` Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 02/15] virtio_ring: introduce vring_need_unmap_buffer Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 03/15] virtio_ring: virtqueue_set_dma_premapped() support to disable Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 04/15] virtio_net: separate virtnet_rx_resize() Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 05/15] virtio_net: separate virtnet_tx_resize() Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 06/15] virtio_net: separate receive_buf Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 07/15] virtio_net: refactor the xmit type Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 08/15] virtio_net: sq support premapped mode Xuan Zhuo
2024-06-12 23:23 ` Jakub Kicinski
2024-06-13 2:39 ` Xuan Zhuo
2024-06-12 23:41 ` Michael S. Tsirkin
2024-06-13 2:47 ` Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 09/15] virtio_net: xsk: bind/unbind xsk Xuan Zhuo
2024-06-12 23:43 ` Michael S. Tsirkin [this message]
2024-06-13 2:43 ` Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 10/15] virtio_net: xsk: prevent disable tx napi Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 11/15] virtio_net: xsk: tx: support xmit xsk buffer Xuan Zhuo
2024-06-12 23:25 ` Jakub Kicinski
2024-06-12 23:44 ` Michael S. Tsirkin
2024-06-13 2:43 ` Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 12/15] virtio_net: xsk: tx: support wakeup Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 13/15] virtio_net: xsk: tx: handle the transmitted xsk buffer Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 14/15] virtio_net: xsk: rx: support fill with " Xuan Zhuo
2024-06-11 11:41 ` [PATCH net-next v4 15/15] virtio_net: xsk: rx: support recv small mode Xuan Zhuo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240612194235-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=eperezma@redhat.com \
--cc=hawk@kernel.org \
--cc=jasowang@redhat.com \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=virtualization@lists.linux.dev \
--cc=xuanzhuo@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.