All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: <davem@davemloft.net>, <kuba@kernel.org>, <pabeni@redhat.com>,
	<edumazet@google.com>, <netdev@vger.kernel.org>,
	Sriram Yagnaraman <sriram.yagnaraman@est.tech>,
	<magnus.karlsson@intel.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <hawk@kernel.org>,
	<john.fastabend@gmail.com>, <bpf@vger.kernel.org>,
	<kurt@linutronix.de>, <sriram.yagnaraman@ericsson.com>,
	<richardcochran@gmail.com>, <benjamin.steinke@woks-audio.com>,
	<bigeasy@linutronix.de>,
	"Chandan Kumar Rout" <chandanx.rout@intel.com>
Subject: Re: [PATCH net-next 2/4] igb: Introduce XSK data structures and helpers
Date: Sat, 10 Aug 2024 15:35:25 +0200	[thread overview]
Message-ID: <ZrdsnT5aIs85jyL/@boxer> (raw)
In-Reply-To: <20240808183556.386397-3-anthony.l.nguyen@intel.com>

On Thu, Aug 08, 2024 at 11:35:52AM -0700, Tony Nguyen wrote:
> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> 
> Add the following ring flags
> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
> - IGB_RING_FLAG_AF_XDP_ZC (xsk pool is active)
> 
> Add a xdp_buff array for use with XSK receive batch API, and a pointer
> to xsk_pool in igb_adapter.
> 
> Add enable/disable functions for TX and RX rings
> Add enable/disable functions for XSK pool
> Add xsk wakeup function
> 
> None of the above functionality will be active until
> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.
> 
> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
> Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> ---
>  drivers/net/ethernet/intel/igb/Makefile   |   2 +-
>  drivers/net/ethernet/intel/igb/igb.h      |  14 +-
>  drivers/net/ethernet/intel/igb/igb_main.c |   9 +
>  drivers/net/ethernet/intel/igb/igb_xsk.c  | 210 ++++++++++++++++++++++
>  4 files changed, 233 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/ethernet/intel/igb/igb_xsk.c
> 
> diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
> index 463c0d26b9d4..6c1b702fd992 100644
> --- a/drivers/net/ethernet/intel/igb/Makefile
> +++ b/drivers/net/ethernet/intel/igb/Makefile
> @@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o
>  
>  igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
>  	 e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
> -	 e1000_i210.o igb_ptp.o igb_hwmon.o
> +	 e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o
> diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
> index 0de71ec324ed..053130c01480 100644
> --- a/drivers/net/ethernet/intel/igb/igb.h
> +++ b/drivers/net/ethernet/intel/igb/igb.h
> @@ -20,6 +20,7 @@
>  #include <linux/mdio.h>
>  
>  #include <net/xdp.h>
> +#include <net/xdp_sock_drv.h>
>  
>  struct igb_adapter;
>  
> @@ -320,6 +321,7 @@ struct igb_ring {
>  	union {				/* array of buffer info structs */
>  		struct igb_tx_buffer *tx_buffer_info;
>  		struct igb_rx_buffer *rx_buffer_info;
> +		struct xdp_buff **rx_buffer_info_zc;
>  	};
>  	void *desc;			/* descriptor ring memory */
>  	unsigned long flags;		/* ring specific flags */
> @@ -357,6 +359,7 @@ struct igb_ring {
>  		};
>  	};
>  	struct xdp_rxq_info xdp_rxq;
> +	struct xsk_buff_pool *xsk_pool;
>  } ____cacheline_internodealigned_in_smp;
>  
>  struct igb_q_vector {
> @@ -384,7 +387,9 @@ enum e1000_ring_flags_t {
>  	IGB_RING_FLAG_RX_SCTP_CSUM,
>  	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
>  	IGB_RING_FLAG_TX_CTX_IDX,
> -	IGB_RING_FLAG_TX_DETECT_HANG
> +	IGB_RING_FLAG_TX_DETECT_HANG,
> +	IGB_RING_FLAG_TX_DISABLED,
> +	IGB_RING_FLAG_AF_XDP_ZC
>  };
>  
>  #define ring_uses_large_buffer(ring) \
> @@ -822,4 +827,11 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter,
>  int igb_del_mac_steering_filter(struct igb_adapter *adapter,
>  				const u8 *addr, u8 queue, u8 flags);
>  
> +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
> +				   struct igb_ring *ring);
> +int igb_xsk_pool_setup(struct igb_adapter *adapter,
> +		       struct xsk_buff_pool *pool,
> +		       u16 qid);
> +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
> +
>  #endif /* _IGB_H_ */
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
> index bdb7637559b8..b6f23bbeff71 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -2904,9 +2904,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
>  
>  static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>  {
> +	struct igb_adapter *adapter = netdev_priv(dev);
> +
>  	switch (xdp->command) {
>  	case XDP_SETUP_PROG:
>  		return igb_xdp_setup(dev, xdp);
> +	case XDP_SETUP_XSK_POOL:
> +		return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
> +					  xdp->xsk.queue_id);
>  	default:
>  		return -EINVAL;
>  	}
> @@ -3033,6 +3038,7 @@ static const struct net_device_ops igb_netdev_ops = {
>  	.ndo_setup_tc		= igb_setup_tc,
>  	.ndo_bpf		= igb_xdp,
>  	.ndo_xdp_xmit		= igb_xdp_xmit,
> +	.ndo_xsk_wakeup         = igb_xsk_wakeup,
>  };
>  
>  /**
> @@ -4355,6 +4361,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
>  	u64 tdba = ring->dma;
>  	int reg_idx = ring->reg_idx;
>  
> +	ring->xsk_pool = igb_xsk_pool(adapter, ring);

use WRITE_ONCE()

> +
>  	wr32(E1000_TDLEN(reg_idx),
>  	     ring->count * sizeof(union e1000_adv_tx_desc));
>  	wr32(E1000_TDBAL(reg_idx),
> @@ -4750,6 +4758,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
>  	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
>  	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
>  					   MEM_TYPE_PAGE_SHARED, NULL));
> +	ring->xsk_pool = igb_xsk_pool(adapter, ring);

ditto

I was recently addressing issues around xsk in ice, see:
[0]: https://lore.kernel.org/netdev/172239123450.15322.12860347838208396251.git-patchwork-notify@kernel.org/

>  
>  	/* disable the queue */
>  	wr32(E1000_RXDCTL(reg_idx), 0);
> diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
> new file mode 100644
> index 000000000000..925bf97f7caa
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright(c) 2018 Intel Corporation. */
> +
> +#include <linux/bpf_trace.h>
> +#include <net/xdp_sock_drv.h>
> +#include <net/xdp.h>
> +
> +#include "e1000_hw.h"
> +#include "igb.h"
> +
> +static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
> +{
> +	int size = pool_present ?
> +		sizeof(*ring->rx_buffer_info_zc) * ring->count :
> +		sizeof(*ring->rx_buffer_info) * ring->count;
> +	void *buff_info = vmalloc(size);

You need to take into account the rx_buffer_info_zc in the memset in
igb_configure_rx_ring(). Also why vmalloc?

> +
> +	if (!buff_info)
> +		return -ENOMEM;
> +
> +	if (pool_present) {
> +		vfree(ring->rx_buffer_info);
> +		ring->rx_buffer_info = NULL;
> +		ring->rx_buffer_info_zc = buff_info;
> +	} else {
> +		vfree(ring->rx_buffer_info_zc);
> +		ring->rx_buffer_info_zc = NULL;
> +		ring->rx_buffer_info = buff_info;
> +	}
> +
> +	return 0;
> +}
> +
> +static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
> +{
> +	struct igb_ring *tx_ring = adapter->tx_ring[qid];
> +	struct igb_ring *rx_ring = adapter->rx_ring[qid];
> +	struct e1000_hw *hw = &adapter->hw;
> +
> +	set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
> +
> +	wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
> +	wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
> +

synchronize_net() to let the napi finish its current job?

> +	/* Rx/Tx share the same napi context. */
> +	napi_disable(&rx_ring->q_vector->napi);
> +
> +	igb_clean_tx_ring(tx_ring);
> +	igb_clean_rx_ring(rx_ring);
> +
> +	memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
> +	memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
> +}
> +
> +static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
> +{
> +	struct igb_ring *tx_ring = adapter->tx_ring[qid];
> +	struct igb_ring *rx_ring = adapter->rx_ring[qid];
> +
> +	igb_configure_tx_ring(adapter, tx_ring);
> +	igb_configure_rx_ring(adapter, rx_ring);
> +

synchronize_net() after updating xsk_pool ptrs

> +	clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
> +
> +	/* call igb_desc_unused which always leaves
> +	 * at least 1 descriptor unused to make sure
> +	 * next_to_use != next_to_clean
> +	 */
> +	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
> +
> +	/* Rx/Tx share the same napi context. */
> +	napi_enable(&rx_ring->q_vector->napi);
> +}
> +
> +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
> +				   struct igb_ring *ring)
> +{
> +	int qid = ring->queue_index;
> +
> +	if (!igb_xdp_is_enabled(adapter) ||
> +	    !test_bit(IGB_RING_FLAG_AF_XDP_ZC, &ring->flags))

See:
[1]: https://lore.kernel.org/netdev/20240603-net-2024-05-30-intel-net-fixes-v2-3-e3563aa89b0c@intel.com/

how to avoid the introduction of IGB_RING_FLAG_AF_XDP_ZC altogether.

> +		return NULL;
> +
> +	return xsk_get_pool_from_qid(adapter->netdev, qid);
> +}
> +
> +static int igb_xsk_pool_enable(struct igb_adapter *adapter,
> +			       struct xsk_buff_pool *pool,
> +			       u16 qid)
> +{
> +	struct net_device *netdev = adapter->netdev;
> +	struct igb_ring *tx_ring, *rx_ring;
> +	bool if_running;
> +	int err;
> +
> +	if (qid >= adapter->num_rx_queues)
> +		return -EINVAL;
> +
> +	if (qid >= netdev->real_num_rx_queues ||
> +	    qid >= netdev->real_num_tx_queues)
> +		return -EINVAL;
> +
> +	err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
> +	if (err)
> +		return err;
> +
> +	tx_ring = adapter->tx_ring[qid];
> +	rx_ring = adapter->rx_ring[qid];
> +	if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
> +	if (if_running)
> +		igb_txrx_ring_disable(adapter, qid);
> +
> +	set_bit(IGB_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
> +	set_bit(IGB_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
> +
> +	if (if_running) {
> +		err = igb_realloc_rx_buffer_info(rx_ring, true);
> +		if (!err) {
> +			igb_txrx_ring_enable(adapter, qid);
> +			/* Kick start the NAPI context so that receiving will start */
> +			err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
> +		}
> +
> +		if (err) {
> +			clear_bit(IGB_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
> +			clear_bit(IGB_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
> +			xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
> +{
> +	struct igb_ring *tx_ring, *rx_ring;
> +	struct xsk_buff_pool *pool;
> +	bool if_running;
> +	int err;
> +
> +	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
> +	if (!pool)
> +		return -EINVAL;
> +
> +	tx_ring = adapter->tx_ring[qid];
> +	rx_ring = adapter->rx_ring[qid];
> +	if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
> +	if (if_running)
> +		igb_txrx_ring_disable(adapter, qid);
> +
> +	xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
> +	clear_bit(IGB_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
> +	clear_bit(IGB_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
> +
> +	if (if_running) {
> +		err = igb_realloc_rx_buffer_info(rx_ring, false);
> +		if (err)
> +			return err;
> +
> +		igb_txrx_ring_enable(adapter, qid);
> +	}
> +
> +	return 0;
> +}
> +
> +int igb_xsk_pool_setup(struct igb_adapter *adapter,
> +		       struct xsk_buff_pool *pool,
> +		       u16 qid)
> +{
> +	return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
> +		igb_xsk_pool_disable(adapter, qid);
> +}
> +
> +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
> +{
> +	struct igb_adapter *adapter = netdev_priv(dev);
> +	struct e1000_hw *hw = &adapter->hw;
> +	struct igb_ring *ring;
> +	u32 eics = 0;
> +
> +	if (test_bit(__IGB_DOWN, &adapter->state))
> +		return -ENETDOWN;
> +
> +	if (!igb_xdp_is_enabled(adapter))
> +		return -EINVAL;
> +
> +	if (qid >= adapter->num_tx_queues)
> +		return -EINVAL;
> +
> +	ring = adapter->tx_ring[qid];
> +
> +	if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
> +		return -ENETDOWN;
> +
> +	if (!ring->xsk_pool)

READ_ONCE()

Also, please test this patchset against a scenario where you do Tx ZC from
every queue available and toggle the interface down and up. We had a nasty
case that [0] fixed where we were producing Tx descriptors to wire when
interface was either already going down or not brought up yet.

> +		return -EINVAL;
> +
> +	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
> +		/* Cause software interrupt to ensure Rx ring is cleaned */
> +		if (adapter->flags & IGB_FLAG_HAS_MSIX) {
> +			eics |= ring->q_vector->eims_value;
> +			wr32(E1000_EICS, eics);
> +		} else {
> +			wr32(E1000_ICS, E1000_ICS_RXDMT0);
> +		}
> +	}
> +
> +	return 0;
> +}
> -- 
> 2.42.0
> 

  reply	other threads:[~2024-08-10 13:35 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-08 18:35 [PATCH net-next 0/4][pull request] igb: Add support for AF_XDP zero-copy Tony Nguyen
2024-08-08 18:35 ` [PATCH net-next 1/4] igb: prepare for AF_XDP zero-copy support Tony Nguyen
2024-08-08 20:38   ` Maciej Fijalkowski
2024-08-09 13:05     ` Kurt Kanzenbach
2024-08-09 13:14       ` Fijalkowski, Maciej
2024-08-09 13:19         ` Kurt Kanzenbach
2024-08-08 18:35 ` [PATCH net-next 2/4] igb: Introduce XSK data structures and helpers Tony Nguyen
2024-08-10 13:35   ` Maciej Fijalkowski [this message]
2024-08-08 18:35 ` [PATCH net-next 3/4] igb: add AF_XDP zero-copy Rx support Tony Nguyen
2024-08-10 13:55   ` Maciej Fijalkowski
2024-08-10 14:12     ` Maciej Fijalkowski
2024-08-14  8:29     ` Kurt Kanzenbach
2024-08-14  8:55       ` Maciej Fijalkowski
2024-08-08 18:35 ` [PATCH net-next 4/4] igb: add AF_XDP zero-copy Tx support Tony Nguyen
2024-08-10 14:10   ` Maciej Fijalkowski
2024-08-14  8:36     ` Kurt Kanzenbach
2024-08-14  8:55       ` Maciej Fijalkowski
2024-08-14  9:12         ` Kurt Kanzenbach
2024-08-14 10:26           ` Maciej Fijalkowski
2024-08-14 12:51             ` Kurt Kanzenbach

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZrdsnT5aIs85jyL/@boxer \
    --to=maciej.fijalkowski@intel.com \
    --cc=anthony.l.nguyen@intel.com \
    --cc=ast@kernel.org \
    --cc=benjamin.steinke@woks-audio.com \
    --cc=bigeasy@linutronix.de \
    --cc=bpf@vger.kernel.org \
    --cc=chandanx.rout@intel.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=kurt@linutronix.de \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=richardcochran@gmail.com \
    --cc=sriram.yagnaraman@ericsson.com \
    --cc=sriram.yagnaraman@est.tech \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.