BPF List
 help / color / mirror / Atom feed
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
To: Jason Wang <jasowang@redhat.com>
Cc: netdev@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>,
	"Eugenio Pérez" <eperezma@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"John Fastabend" <john.fastabend@gmail.com>,
	virtualization@lists.linux.dev, bpf@vger.kernel.org
Subject: Re: [RFC net-next v1 10/12] virtio_net: xsk: tx: support xmit xsk buffer
Date: Tue, 24 Sep 2024 19:41:27 +0800	[thread overview]
Message-ID: <1727178087.972947-2-xuanzhuo@linux.alibaba.com> (raw)
In-Reply-To: <CACGkMEvbxs4AK+xCW0i-ZMo4B5WEKMLmFHBu_7ZRa+4Pv+-44w@mail.gmail.com>

On Tue, 24 Sep 2024 15:35:08 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Sep 24, 2024 at 9:32 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > The driver's tx napi is very important for XSK. It is responsible for
> > obtaining data from the XSK queue and sending it out.
> >
> > At the beginning, we need to trigger tx napi.
> >
> > virtnet_free_old_xmit distinguishes three type ptr(skb, xdp frame, xsk
> > buffer) by the last bits of the pointer.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/net/virtio_net.c | 176 ++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 166 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 3ad4c6e3ef18..1a870f1df910 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -83,6 +83,7 @@ struct virtnet_sq_free_stats {
> >         u64 bytes;
> >         u64 napi_packets;
> >         u64 napi_bytes;
> > +       u64 xsk;
> >  };
> >
> >  struct virtnet_sq_stats {
> > @@ -514,16 +515,20 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
> >                                                struct sk_buff *curr_skb,
> >                                                struct page *page, void *buf,
> >                                                int len, int truesize);
> > +static void virtnet_xsk_completed(struct send_queue *sq, int num);
> >
> >  enum virtnet_xmit_type {
> >         VIRTNET_XMIT_TYPE_SKB,
> >         VIRTNET_XMIT_TYPE_SKB_ORPHAN,
> >         VIRTNET_XMIT_TYPE_XDP,
> > +       VIRTNET_XMIT_TYPE_XSK,
> >  };
> >
> >  /* We use the last two bits of the pointer to distinguish the xmit type. */
> >  #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1))
> >
> > +#define VIRTIO_XSK_FLAG_OFFSET 4
>
> Any reason this is not 2?

There's no particular reason for this, any value greater than 2 will work.


>
> > +
> >  static enum virtnet_xmit_type virtnet_xmit_ptr_strip(void **ptr)
> >  {
> >         unsigned long p = (unsigned long)*ptr;
> > @@ -546,6 +551,11 @@ static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data,
> >                                     GFP_ATOMIC);
> >  }
> >
> > +static u32 virtnet_ptr_to_xsk_buff_len(void *ptr)
> > +{
> > +       return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
> > +}
> > +
> >  static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> >  {
> >         sg_assign_page(sg, NULL);
> > @@ -587,11 +597,27 @@ static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> >                         stats->bytes += xdp_get_frame_len(frame);
> >                         xdp_return_frame(frame);
> >                         break;
> > +
> > +               case VIRTNET_XMIT_TYPE_XSK:
> > +                       stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr);
> > +                       stats->xsk++;
> > +                       break;
> >                 }
> >         }
> >         netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
>
> Not related to this patch, but this seems unnecessary to AF_XDP.

YES.

netdev_tx_completed_queue will check napi_bytes firstly.
So I do not think we need to do anything for this.

>
> >  }
> >
> > +static void virtnet_free_old_xmit(struct send_queue *sq,
> > +                                 struct netdev_queue *txq,
> > +                                 bool in_napi,
> > +                                 struct virtnet_sq_free_stats *stats)
> > +{
> > +       __free_old_xmit(sq, txq, in_napi, stats);
> > +
> > +       if (stats->xsk)
> > +               virtnet_xsk_completed(sq, stats->xsk);
> > +}
> > +
> >  /* Converting between virtqueue no. and kernel tx/rx queue no.
> >   * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
> >   */
> > @@ -1019,7 +1045,7 @@ static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> >  {
> >         struct virtnet_sq_free_stats stats = {0};
> >
> > -       __free_old_xmit(sq, txq, in_napi, &stats);
> > +       virtnet_free_old_xmit(sq, txq, in_napi, &stats);
> >
> >         /* Avoid overhead when no packets have been processed
> >          * happens when called speculatively from start_xmit.
> > @@ -1380,6 +1406,111 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
> >         return err;
> >  }
> >
> > +static void *virtnet_xsk_to_ptr(u32 len)
> > +{
> > +       unsigned long p;
> > +
> > +       p = len << VIRTIO_XSK_FLAG_OFFSET;
> > +
> > +       return virtnet_xmit_ptr_mix((void *)p, VIRTNET_XMIT_TYPE_XSK);
> > +}
> > +
> > +static int virtnet_xsk_xmit_one(struct send_queue *sq,
> > +                               struct xsk_buff_pool *pool,
> > +                               struct xdp_desc *desc)
> > +{
> > +       struct virtnet_info *vi;
> > +       dma_addr_t addr;
> > +
> > +       vi = sq->vq->vdev->priv;
> > +
> > +       addr = xsk_buff_raw_get_dma(pool, desc->addr);
> > +       xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len);
> > +
> > +       sg_init_table(sq->sg, 2);
> > +
> > +       sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len);
> > +       sg_fill_dma(sq->sg + 1, addr, desc->len);
> > +
> > +       return virtqueue_add_outbuf(sq->vq, sq->sg, 2,
> > +                                   virtnet_xsk_to_ptr(desc->len), GFP_ATOMIC);
> > +}
> > +
> > +static int virtnet_xsk_xmit_batch(struct send_queue *sq,
> > +                                 struct xsk_buff_pool *pool,
> > +                                 unsigned int budget,
> > +                                 u64 *kicks)
> > +{
> > +       struct xdp_desc *descs = pool->tx_descs;
> > +       bool kick = false;
> > +       u32 nb_pkts, i;
> > +       int err;
> > +
> > +       budget = min_t(u32, budget, sq->vq->num_free);
> > +
> > +       nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget);
> > +       if (!nb_pkts)
> > +               return 0;
> > +
> > +       for (i = 0; i < nb_pkts; i++) {
> > +               err = virtnet_xsk_xmit_one(sq, pool, &descs[i]);
> > +               if (unlikely(err)) {
> > +                       xsk_tx_completed(sq->xsk_pool, nb_pkts - i);
> > +                       break;
> > +               }
> > +
> > +               kick = true;
> > +       }
> > +
> > +       if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
> > +               (*kicks)++;
> > +
> > +       return i;
> > +}
> > +
> > +static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool,
> > +                            int budget)
> > +{
> > +       struct virtnet_info *vi = sq->vq->vdev->priv;
> > +       struct virtnet_sq_free_stats stats = {};
> > +       struct net_device *dev = vi->dev;
> > +       u64 kicks = 0;
> > +       int sent;
> > +
> > +       /* Avoid to wakeup napi meanless, so call __free_old_xmit. */
>
> I don't understand the meaning of this comment.

The comments need to be more detailed. Here I want to explain why not just use
free_old_xmit.


>
> > +       __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats);
> > +
> > +       if (stats.xsk)
> > +               xsk_tx_completed(sq->xsk_pool, stats.xsk);
> > +
> > +       sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks);
> > +
> > +       if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
> > +               check_sq_full_and_disable(vi, vi->dev, sq);
> > +
> > +       u64_stats_update_begin(&sq->stats.syncp);
> > +       u64_stats_add(&sq->stats.packets, stats.packets);
> > +       u64_stats_add(&sq->stats.bytes,   stats.bytes);
> > +       u64_stats_add(&sq->stats.kicks,   kicks);
> > +       u64_stats_add(&sq->stats.xdp_tx,  sent);
> > +       u64_stats_update_end(&sq->stats.syncp);
> > +
> > +       if (xsk_uses_need_wakeup(pool))
> > +               xsk_set_tx_need_wakeup(pool);
> > +
> > +       return sent == budget;
> > +}
> > +
> > +static void xsk_wakeup(struct send_queue *sq)
> > +{
> > +       if (napi_if_scheduled_mark_missed(&sq->napi))
> > +               return;
> > +
> > +       local_bh_disable();
> > +       virtqueue_napi_schedule(&sq->napi, sq->vq);
> > +       local_bh_enable();
> > +}
> > +
> >  static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
> >  {
> >         struct virtnet_info *vi = netdev_priv(dev);
> > @@ -1393,14 +1524,19 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
> >
> >         sq = &vi->sq[qid];
> >
> > -       if (napi_if_scheduled_mark_missed(&sq->napi))
> > -               return 0;
> > +       xsk_wakeup(sq);
> > +       return 0;
> > +}
> >
> > -       local_bh_disable();
> > -       virtqueue_napi_schedule(&sq->napi, sq->vq);
> > -       local_bh_enable();
> > +static void virtnet_xsk_completed(struct send_queue *sq, int num)
> > +{
> > +       xsk_tx_completed(sq->xsk_pool, num);
> >
> > -       return 0;
> > +       /* If this is called by rx poll, start_xmit and xdp xmit we should
> > +        * wakeup the tx napi to consume the xsk tx queue, because the tx
> > +        * interrupt may not be triggered.
> > +        */
> > +       xsk_wakeup(sq);
> >  }
> >
> >  static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
> > @@ -1516,8 +1652,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,
> >         }
> >
> >         /* Free up any pending old buffers before queueing new ones. */
> > -       __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
> > -                       false, &stats);
> > +       virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
> > +                             false, &stats);
> >
> >         for (i = 0; i < n; i++) {
> >                 struct xdp_frame *xdpf = frames[i];
> > @@ -2961,6 +3097,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> >         struct virtnet_info *vi = sq->vq->vdev->priv;
> >         unsigned int index = vq2txq(sq->vq);
> >         struct netdev_queue *txq;
> > +       bool xsk_busy = false;
> >         int opaque;
> >         bool done;
> >
> > @@ -2973,7 +3110,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> >         txq = netdev_get_tx_queue(vi->dev, index);
> >         __netif_tx_lock(txq, raw_smp_processor_id());
> >         virtqueue_disable_cb(sq->vq);
> > -       free_old_xmit(sq, txq, !!budget);
> > +
> > +       if (sq->xsk_pool)
> > +               xsk_busy = virtnet_xsk_xmit(sq, sq->xsk_pool, budget);
>
> I think we need a better name of "xsk_busy", it looks like it means we
> exceeds the quota. Or just return the number of buffers received and
> let the caller to judge.

Will fix.

Thanks.


>
> Other looks good.
>
> With this fixed.
>
> Acked-by: Jason Wang <jasowang@redhat.com>
>
> Thanks
>

  reply	other threads:[~2024-09-24 11:48 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-24  1:31 [RFC net-next v1 00/12] virtio-net: support AF_XDP zero copy (tx) Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 01/12] virtio_ring: introduce vring_need_unmap_buffer Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 02/12] virtio_ring: split: record extras for indirect buffers Xuan Zhuo
2024-09-24  7:34   ` Jason Wang
2024-09-24  8:32     ` Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 03/12] virtio_ring: packed: " Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 04/12] virtio_ring: perform premapped operations based on per-buffer Xuan Zhuo
2024-09-24  7:35   ` Jason Wang
2024-09-24  8:22     ` Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 05/12] virtio-net: rq submits premapped per-buffer Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 06/12] virtio_ring: remove API virtqueue_set_dma_premapped Xuan Zhuo
2024-09-24  1:31 ` [RFC net-next v1 07/12] virtio_net: refactor the xmit type Xuan Zhuo
2024-09-24  7:35   ` Jason Wang
2024-09-24 11:39     ` Xuan Zhuo
2024-09-24  1:32 ` [RFC net-next v1 08/12] virtio_net: xsk: bind/unbind xsk for tx Xuan Zhuo
2024-09-24  7:35   ` Jason Wang
2024-09-24  9:22     ` Xuan Zhuo
2024-09-24  1:32 ` [RFC net-next v1 09/12] virtio_net: xsk: prevent disable tx napi Xuan Zhuo
2024-09-24  1:32 ` [RFC net-next v1 10/12] virtio_net: xsk: tx: support xmit xsk buffer Xuan Zhuo
2024-09-24  7:35   ` Jason Wang
2024-09-24 11:41     ` Xuan Zhuo [this message]
2024-09-24  1:32 ` [RFC net-next v1 11/12] virtio_net: update tx timeout record Xuan Zhuo
2024-09-24  1:32 ` [RFC net-next v1 12/12] virtio_net: xdp_features add NETDEV_XDP_ACT_XSK_ZEROCOPY Xuan Zhuo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1727178087.972947-2-xuanzhuo@linux.alibaba.com \
    --to=xuanzhuo@linux.alibaba.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=eperezma@redhat.com \
    --cc=hawk@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=virtualization@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox