netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: Vladimir Oltean <vladimir.oltean@nxp.com>
Cc: netdev@vger.kernel.org, "David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Claudiu Manoil" <claudiu.manoil@nxp.com>,
	"Björn Töpel" <bjorn@kernel.org>,
	"Magnus Karlsson" <magnus.karlsson@intel.com>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"John Fastabend" <john.fastabend@gmail.com>,
	bpf@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH net-next 10/11] net: enetc: add RX support for zero-copy XDP sockets
Date: Wed, 8 Feb 2023 17:36:33 +0100	[thread overview]
Message-ID: <Y+PPkcu1aS+oIH0o@boxer> (raw)
In-Reply-To: <20230206100837.451300-11-vladimir.oltean@nxp.com>

On Mon, Feb 06, 2023 at 12:08:36PM +0200, Vladimir Oltean wrote:
> Add support for filling an RX ring with buffers coming from an XSK umem.
> Although enetc has up to 8 RX rings, we still use one of the 2 per-CPU
> RX rings for XSK.
> 
> To set up an XSK pool on one of the RX queues, we use the
> reconfiguration procedure which temporarily stops the rings.
> 
> Since the RX procedure in the NAPI poll function is completely different
> (both the API for creating an xdp_buff, as well as refilling the ring
> with memory from user space), create a separate enetc_clean_rx_ring_xsk()
> function which gets called when we have both an XSK pool and an XDK
> program on this RX queue.
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> ---
>  drivers/net/ethernet/freescale/enetc/enetc.c  | 377 +++++++++++++++++-
>  drivers/net/ethernet/freescale/enetc/enetc.h  |   3 +
>  .../net/ethernet/freescale/enetc/enetc_pf.c   |   1 +
>  3 files changed, 373 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
> index dee432cacf85..3990c006c011 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc.c
> +++ b/drivers/net/ethernet/freescale/enetc/enetc.c
> @@ -10,6 +10,7 @@
>  #include <net/ip6_checksum.h>
>  #include <net/pkt_sched.h>
>  #include <net/tso.h>
> +#include <net/xdp_sock_drv.h>
>  
>  u32 enetc_port_mac_rd(struct enetc_si *si, u32 reg)
>  {
> @@ -103,6 +104,9 @@ static void enetc_free_rx_swbd(struct enetc_bdr *rx_ring,
>  			       rx_swbd->dir);
>  		__free_page(rx_swbd->page);
>  		rx_swbd->page = NULL;
> +	} else if (rx_swbd->xsk_buff) {
> +		xsk_buff_free(rx_swbd->xsk_buff);
> +		rx_swbd->xsk_buff = NULL;
>  	}
>  }
>  
> @@ -979,6 +983,44 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
>  	return j;
>  }
>  
> +static int enetc_refill_rx_ring_xsk(struct enetc_bdr *rx_ring, int buff_cnt)
> +{
> +	struct xsk_buff_pool *pool = rx_ring->xdp.xsk_pool;
> +	struct enetc_rx_swbd *rx_swbd;
> +	struct xdp_buff *xsk_buff;
> +	union enetc_rx_bd *rxbd;
> +	int i, j;
> +
> +	i = rx_ring->next_to_use;
> +	rxbd = enetc_rxbd(rx_ring, i);
> +
> +	for (j = 0; j < buff_cnt; j++) {
> +		xsk_buff = xsk_buff_alloc(pool); // TODO use _batch?

yes, use batch:P

> +		if (!xsk_buff)
> +			break;
> +
> +		rx_swbd = &rx_ring->rx_swbd[i];
> +		rx_swbd->xsk_buff = xsk_buff;
> +		rx_swbd->dma = xsk_buff_xdp_get_dma(xsk_buff);
> +
> +		/* update RxBD */
> +		rxbd->w.addr = cpu_to_le64(rx_swbd->dma);
> +		/* clear 'R" as well */
> +		rxbd->r.lstatus = 0;
> +
> +		enetc_rxbd_next(rx_ring, &rxbd, &i);
> +	}
> +
> +	if (likely(j)) {
> +		rx_ring->next_to_use = i;
> +
> +		/* update ENETC's consumer index */
> +		enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
> +	}
> +
> +	return j;
> +}
> +
>  #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
>  static void enetc_get_rx_tstamp(struct net_device *ndev,
>  				union enetc_rx_bd *rxbd,
> @@ -1128,6 +1170,18 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
>  	enetc_flip_rx_buff(rx_ring, rx_swbd);
>  }
>  
> +static void enetc_put_rx_swbd(struct enetc_bdr *rx_ring, int i)
> +{
> +	struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i];
> +
> +	if (rx_swbd->xsk_buff) {
> +		xsk_buff_free(rx_swbd->xsk_buff);
> +		rx_swbd->xsk_buff = NULL;
> +	} else {
> +		enetc_put_rx_buff(rx_ring, rx_swbd);
> +	}
> +}
> +
>  static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
>  					      u32 bd_status,
>  					      union enetc_rx_bd **rxbd, int *i,
> @@ -1136,7 +1190,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
>  	if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
>  		return false;
>  
> -	enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
> +	enetc_put_rx_swbd(rx_ring, *i);
>  	(*buffs_missing)++;
>  	enetc_rxbd_next(rx_ring, rxbd, i);
>  
> @@ -1144,7 +1198,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
>  		dma_rmb();
>  		bd_status = le32_to_cpu((*rxbd)->r.lstatus);
>  
> -		enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
> +		enetc_put_rx_swbd(rx_ring, *i);
>  		(*buffs_missing)++;
>  		enetc_rxbd_next(rx_ring, rxbd, i);
>  	}
> @@ -1484,6 +1538,43 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
>  	}
>  }
>  
> +static struct xdp_buff *enetc_build_xsk_buff(struct xsk_buff_pool *pool,
> +					     struct enetc_bdr *rx_ring,
> +					     u32 bd_status,
> +					     union enetc_rx_bd **rxbd, int *i,
> +					     int *buffs_missing, int *rx_byte_cnt)
> +{
> +	struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[*i];
> +	u16 size = le16_to_cpu((*rxbd)->r.buf_len);
> +	struct xdp_buff *xsk_buff;
> +
> +	/* Multi-buffer frames are not supported in XSK mode */

Nice! I realized we need to forbid that on ice now.

> +	if (unlikely(!(bd_status & ENETC_RXBD_LSTATUS_F))) {
> +		while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
> +			enetc_put_rx_swbd(rx_ring, *i);
> +
> +			(*buffs_missing)++;
> +			enetc_rxbd_next(rx_ring, rxbd, i);
> +			dma_rmb();
> +			bd_status = le32_to_cpu((*rxbd)->r.lstatus);
> +		}
> +
> +		return NULL;
> +	}
> +
> +	xsk_buff = rx_swbd->xsk_buff;
> +	xsk_buff_set_size(xsk_buff, size);
> +	xsk_buff_dma_sync_for_cpu(xsk_buff, pool);
> +
> +	rx_swbd->xsk_buff = NULL;
> +
> +	(*buffs_missing)++;
> +	(*rx_byte_cnt) += size;
> +	enetc_rxbd_next(rx_ring, rxbd, i);
> +
> +	return xsk_buff;
> +}
> +
>  /* Convert RX buffer descriptors to TX buffer descriptors. These will be
>   * recycled back into the RX ring in enetc_clean_tx_ring.
>   */
> @@ -1659,11 +1750,136 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
>  	return rx_frm_cnt;
>  }
>  
> +static void enetc_xsk_buff_to_skb(struct xdp_buff *xsk_buff,
> +				  struct enetc_bdr *rx_ring,
> +				  union enetc_rx_bd *rxbd,
> +				  struct napi_struct *napi)
> +{
> +	size_t len = xdp_get_buff_len(xsk_buff);
> +	struct sk_buff *skb;
> +
> +	skb = napi_alloc_skb(napi, len);
> +	if (unlikely(!skb)) {
> +		rx_ring->stats.rx_alloc_errs++;
> +		goto out;
> +	}
> +
> +	skb_put_data(skb, xsk_buff->data, len);
> +
> +	enetc_get_offloads(rx_ring, rxbd, skb);
> +
> +	skb_record_rx_queue(skb, rx_ring->index);
> +	skb->protocol = eth_type_trans(skb, rx_ring->ndev);
> +
> +	rx_ring->stats.packets += skb->len;
> +	rx_ring->stats.bytes++;
> +
> +	napi_gro_receive(napi, skb);
> +out:
> +	xsk_buff_free(xsk_buff);
> +}
> +
> +static int enetc_clean_rx_ring_xsk(struct enetc_bdr *rx_ring,
> +				   struct napi_struct *napi, int work_limit,
> +				   struct bpf_prog *prog,
> +				   struct xsk_buff_pool *pool)
> +{
> +	struct net_device *ndev = rx_ring->ndev;
> +	union enetc_rx_bd *rxbd, *orig_rxbd;
> +	int rx_frm_cnt = 0, rx_byte_cnt = 0;
> +	int xdp_redirect_frm_cnt = 0;
> +	struct xdp_buff *xsk_buff;
> +	int buffs_missing, err, i;
> +	bool wakeup_xsk = false;
> +	u32 bd_status, xdp_act;
> +
> +	buffs_missing = enetc_bd_unused(rx_ring);
> +	/* next descriptor to process */
> +	i = rx_ring->next_to_clean;
> +
> +	while (likely(rx_frm_cnt < work_limit)) {
> +		if (buffs_missing >= ENETC_RXBD_BUNDLE) {
> +			buffs_missing -= enetc_refill_rx_ring_xsk(rx_ring,
> +								  buffs_missing);
> +			wakeup_xsk |= (buffs_missing != 0);
> +		}
> +
> +		rxbd = enetc_rxbd(rx_ring, i);
> +		bd_status = le32_to_cpu(rxbd->r.lstatus);
> +		if (!bd_status)
> +			break;
> +
> +		enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
> +		dma_rmb(); /* for reading other rxbd fields */
> +
> +		if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
> +						      &rxbd, &i,
> +						      &buffs_missing))
> +			continue;
> +
> +		orig_rxbd = rxbd;
> +
> +		xsk_buff = enetc_build_xsk_buff(pool, rx_ring, bd_status,
> +						&rxbd, &i, &buffs_missing,
> +						&rx_byte_cnt);
> +		if (!xsk_buff)
> +			continue;
> +
> +		xdp_act = bpf_prog_run_xdp(prog, xsk_buff);
> +		switch (xdp_act) {
> +		default:
> +			bpf_warn_invalid_xdp_action(ndev, prog, xdp_act);
> +			fallthrough;
> +		case XDP_ABORTED:
> +			trace_xdp_exception(ndev, prog, xdp_act);
> +			fallthrough;
> +		case XDP_DROP:
> +			xsk_buff_free(xsk_buff);
> +			break;
> +		case XDP_PASS:
> +			enetc_xsk_buff_to_skb(xsk_buff, rx_ring, orig_rxbd,
> +					      napi);
> +			break;
> +		case XDP_REDIRECT:
> +			err = xdp_do_redirect(ndev, xsk_buff, prog);
> +			if (unlikely(err)) {
> +				if (err == -ENOBUFS)
> +					wakeup_xsk = true;
> +				xsk_buff_free(xsk_buff);
> +				rx_ring->stats.xdp_redirect_failures++;
> +			} else {
> +				xdp_redirect_frm_cnt++;
> +				rx_ring->stats.xdp_redirect++;
> +			}

no XDP_TX support? I don't see it being added on next patch.

> +		}
> +
> +		rx_frm_cnt++;
> +	}
> +
> +	rx_ring->next_to_clean = i;
> +
> +	rx_ring->stats.packets += rx_frm_cnt;
> +	rx_ring->stats.bytes += rx_byte_cnt;
> +
> +	if (xdp_redirect_frm_cnt)
> +		xdp_do_flush_map();
> +
> +	if (xsk_uses_need_wakeup(pool)) {
> +		if (wakeup_xsk)
> +			xsk_set_rx_need_wakeup(pool);
> +		else
> +			xsk_clear_rx_need_wakeup(pool);
> +	}
> +
> +	return rx_frm_cnt;
> +}
> +
>  static int enetc_poll(struct napi_struct *napi, int budget)
>  {
>  	struct enetc_int_vector
>  		*v = container_of(napi, struct enetc_int_vector, napi);
>  	struct enetc_bdr *rx_ring = &v->rx_ring;
> +	struct xsk_buff_pool *pool;
>  	struct bpf_prog *prog;
>  	bool complete = true;
>  	int work_done;
> @@ -1676,10 +1892,15 @@ static int enetc_poll(struct napi_struct *napi, int budget)
>  			complete = false;
>  
>  	prog = rx_ring->xdp.prog;
> -	if (prog)

(...)

  reply	other threads:[~2023-02-08 16:37 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-06 10:08 [RFC PATCH net-next 00/11] NXP ENETC AF_XDP zero-copy sockets Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 01/11] net: enetc: optimize struct enetc_rx_swbd layout Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 02/11] net: enetc: perform XDP RX queue registration at enetc_setup_bpf() time Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 03/11] net: enetc: rename "cleaned_cnt" to "buffs_missing" Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 04/11] net: enetc: continue NAPI processing on frames with RX errors Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 05/11] net: enetc: add support for ethtool --show-channels Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 06/11] net: enetc: consolidate rx_swbd freeing Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 07/11] net: enetc: rename enetc_free_tx_frame() to enetc_free_tx_swbd() Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 08/11] net: enetc: increment rx_byte_cnt for XDP data path Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 09/11] net: enetc: move setting of ENETC_TXBD_FLAGS_F flag to enetc_xdp_map_tx_buff() Vladimir Oltean
2023-02-06 10:08 ` [RFC PATCH net-next 10/11] net: enetc: add RX support for zero-copy XDP sockets Vladimir Oltean
2023-02-08 16:36   ` Maciej Fijalkowski [this message]
2023-02-06 10:08 ` [RFC PATCH net-next 11/11] net: enetc: add TX " Vladimir Oltean
2023-02-06 10:19   ` Vladimir Oltean
2023-02-08 16:38     ` Maciej Fijalkowski
2023-02-08 16:37   ` Maciej Fijalkowski
2023-02-08 17:08     ` Vladimir Oltean
2023-02-08 17:17       ` Maciej Fijalkowski
2023-03-20 16:30         ` Vladimir Oltean
2023-02-08 16:41 ` [RFC PATCH net-next 00/11] NXP ENETC AF_XDP zero-copy sockets Maciej Fijalkowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y+PPkcu1aS+oIH0o@boxer \
    --to=maciej.fijalkowski@intel.com \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=claudiu.manoil@nxp.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=vladimir.oltean@nxp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).