From: Kurt Kanzenbach <kurt@linutronix.de>
To: Tony Nguyen <anthony.l.nguyen@intel.com>,
Przemek Kitszel <przemyslaw.kitszel@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Jesper Dangaard Brouer <hawk@kernel.org>,
John Fastabend <john.fastabend@gmail.com>,
Richard Cochran <richardcochran@gmail.com>,
Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>,
Benjamin Steinke <benjamin.steinke@woks-audio.com>,
Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
Maciej Fijalkowski <maciej.fijalkowski@intel.com>,
intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
bpf@vger.kernel.org,
Sriram Yagnaraman <sriram.yagnaraman@est.tech>,
Kurt Kanzenbach <kurt@linutronix.de>
Subject: [PATCH iwl-next v8 3/6] igb: Introduce XSK data structures and helpers
Date: Fri, 11 Oct 2024 11:01:01 +0200 [thread overview]
Message-ID: <20241011-b4-igb_zero_copy-v8-3-83862f726a9e@linutronix.de> (raw)
In-Reply-To: <20241011-b4-igb_zero_copy-v8-0-83862f726a9e@linutronix.de>
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
Add the following ring flag:
- IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
Add a xdp_buff array for use with XSK receive batch API, and a pointer
to xsk_pool in igb_adapter.
Add enable/disable functions for TX and RX rings.
Add enable/disable functions for XSK pool.
Add xsk wakeup function.
None of the above functionality will be active until
NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
[Kurt: Add READ/WRITE_ONCE(), synchronize_net(),
remove IGB_RING_FLAG_AF_XDP_ZC]
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
drivers/net/ethernet/intel/igb/Makefile | 2 +-
drivers/net/ethernet/intel/igb/igb.h | 13 +-
drivers/net/ethernet/intel/igb/igb_main.c | 9 ++
drivers/net/ethernet/intel/igb/igb_xsk.c | 207 ++++++++++++++++++++++++++++++
4 files changed, 229 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index 463c0d26b9d4..6c1b702fd992 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o
igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
- e1000_i210.o igb_ptp.o igb_hwmon.o
+ e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 6e2b61ecff68..c30d6f9708f8 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -21,6 +21,7 @@
#include <linux/lockdep.h>
#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
struct igb_adapter;
@@ -321,6 +322,7 @@ struct igb_ring {
union { /* array of buffer info structs */
struct igb_tx_buffer *tx_buffer_info;
struct igb_rx_buffer *rx_buffer_info;
+ struct xdp_buff **rx_buffer_info_zc;
};
void *desc; /* descriptor ring memory */
unsigned long flags; /* ring specific flags */
@@ -358,6 +360,7 @@ struct igb_ring {
};
};
struct xdp_rxq_info xdp_rxq;
+ struct xsk_buff_pool *xsk_pool;
} ____cacheline_internodealigned_in_smp;
struct igb_q_vector {
@@ -385,7 +388,8 @@ enum e1000_ring_flags_t {
IGB_RING_FLAG_RX_SCTP_CSUM,
IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
IGB_RING_FLAG_TX_CTX_IDX,
- IGB_RING_FLAG_TX_DETECT_HANG
+ IGB_RING_FLAG_TX_DETECT_HANG,
+ IGB_RING_FLAG_TX_DISABLED
};
#define ring_uses_large_buffer(ring) \
@@ -841,4 +845,11 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter,
int igb_del_mac_steering_filter(struct igb_adapter *adapter,
const u8 *addr, u8 queue, u8 flags);
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+ struct igb_ring *ring);
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid);
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
#endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fc30966282c5..341b83e39019 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2904,9 +2904,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
+ struct igb_adapter *adapter = netdev_priv(dev);
+
switch (xdp->command) {
case XDP_SETUP_PROG:
return igb_xdp_setup(dev, xdp);
+ case XDP_SETUP_XSK_POOL:
+ return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
@@ -3015,6 +3020,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_setup_tc = igb_setup_tc,
.ndo_bpf = igb_xdp,
.ndo_xdp_xmit = igb_xdp_xmit,
+ .ndo_xsk_wakeup = igb_xsk_wakeup,
};
/**
@@ -4337,6 +4343,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
u64 tdba = ring->dma;
int reg_idx = ring->reg_idx;
+ WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
+
wr32(E1000_TDLEN(reg_idx),
ring->count * sizeof(union e1000_adv_tx_desc));
wr32(E1000_TDBAL(reg_idx),
@@ -4732,6 +4740,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL));
+ WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
/* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0);
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
new file mode 100644
index 000000000000..7b632be3e7e3
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "e1000_hw.h"
+#include "igb.h"
+
+static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
+{
+ int size = pool_present ?
+ sizeof(*ring->rx_buffer_info_zc) * ring->count :
+ sizeof(*ring->rx_buffer_info) * ring->count;
+ void *buff_info = vmalloc(size);
+
+ if (!buff_info)
+ return -ENOMEM;
+
+ if (pool_present) {
+ vfree(ring->rx_buffer_info);
+ ring->rx_buffer_info = NULL;
+ ring->rx_buffer_info_zc = buff_info;
+ } else {
+ vfree(ring->rx_buffer_info_zc);
+ ring->rx_buffer_info_zc = NULL;
+ ring->rx_buffer_info = buff_info;
+ }
+
+ return 0;
+}
+
+static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
+{
+ struct igb_ring *tx_ring = adapter->tx_ring[qid];
+ struct igb_ring *rx_ring = adapter->rx_ring[qid];
+ struct e1000_hw *hw = &adapter->hw;
+
+ set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+ wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
+ wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
+
+ synchronize_net();
+
+ /* Rx/Tx share the same napi context. */
+ napi_disable(&rx_ring->q_vector->napi);
+
+ igb_clean_tx_ring(tx_ring);
+ igb_clean_rx_ring(rx_ring);
+
+ memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
+ memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
+}
+
+static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
+{
+ struct igb_ring *tx_ring = adapter->tx_ring[qid];
+ struct igb_ring *rx_ring = adapter->rx_ring[qid];
+
+ igb_configure_tx_ring(adapter, tx_ring);
+ igb_configure_rx_ring(adapter, rx_ring);
+
+ synchronize_net();
+
+ clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+ /* call igb_desc_unused which always leaves
+ * at least 1 descriptor unused to make sure
+ * next_to_use != next_to_clean
+ */
+ igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+ /* Rx/Tx share the same napi context. */
+ napi_enable(&rx_ring->q_vector->napi);
+}
+
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+ struct igb_ring *ring)
+{
+ int qid = ring->queue_index;
+ struct xsk_buff_pool *pool;
+
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+
+ if (!igb_xdp_is_enabled(adapter))
+ return NULL;
+
+ return (pool && pool->dev) ? pool : NULL;
+}
+
+static int igb_xsk_pool_enable(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct igb_ring *rx_ring;
+ bool if_running;
+ int err;
+
+ if (qid >= adapter->num_rx_queues)
+ return -EINVAL;
+
+ if (qid >= netdev->real_num_rx_queues ||
+ qid >= netdev->real_num_tx_queues)
+ return -EINVAL;
+
+ err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
+ if (err)
+ return err;
+
+ rx_ring = adapter->rx_ring[qid];
+ if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+ if (if_running)
+ igb_txrx_ring_disable(adapter, qid);
+
+ if (if_running) {
+ err = igb_realloc_rx_buffer_info(rx_ring, true);
+ if (!err) {
+ igb_txrx_ring_enable(adapter, qid);
+ /* Kick start the NAPI context so that receiving will start */
+ err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
+ }
+
+ if (err) {
+ xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
+{
+ struct xsk_buff_pool *pool;
+ struct igb_ring *rx_ring;
+ bool if_running;
+ int err;
+
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+ if (!pool)
+ return -EINVAL;
+
+ rx_ring = adapter->rx_ring[qid];
+ if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+ if (if_running)
+ igb_txrx_ring_disable(adapter, qid);
+
+ xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+
+ if (if_running) {
+ err = igb_realloc_rx_buffer_info(rx_ring, false);
+ if (err)
+ return err;
+
+ igb_txrx_ring_enable(adapter, qid);
+ }
+
+ return 0;
+}
+
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
+ igb_xsk_pool_disable(adapter, qid);
+}
+
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct igb_ring *ring;
+ u32 eics = 0;
+
+ if (test_bit(__IGB_DOWN, &adapter->state))
+ return -ENETDOWN;
+
+ if (!igb_xdp_is_enabled(adapter))
+ return -EINVAL;
+
+ if (qid >= adapter->num_tx_queues)
+ return -EINVAL;
+
+ ring = adapter->tx_ring[qid];
+
+ if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
+ return -ENETDOWN;
+
+ if (!READ_ONCE(ring->xsk_pool))
+ return -EINVAL;
+
+ if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+ /* Cause software interrupt */
+ if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+ eics |= ring->q_vector->eims_value;
+ wr32(E1000_EICS, eics);
+ } else {
+ wr32(E1000_ICS, E1000_ICS_RXDMT0);
+ }
+ }
+
+ return 0;
+}
--
2.39.5
next prev parent reply other threads:[~2024-10-11 9:01 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-11 9:00 [PATCH iwl-next v8 0/6] igb: Add support for AF_XDP zero-copy Kurt Kanzenbach
2024-10-11 9:00 ` [PATCH iwl-next v8 1/6] igb: Remove static qualifiers Kurt Kanzenbach
2024-10-11 9:01 ` [PATCH iwl-next v8 2/6] igb: Introduce igb_xdp_is_enabled() Kurt Kanzenbach
2024-10-11 9:01 ` Kurt Kanzenbach [this message]
2024-10-11 9:01 ` [PATCH iwl-next v8 4/6] igb: Add XDP finalize and stats update functions Kurt Kanzenbach
2024-10-15 12:05 ` Maciej Fijalkowski
2024-10-11 9:01 ` [PATCH iwl-next v8 5/6] igb: Add AF_XDP zero-copy Rx support Kurt Kanzenbach
2024-10-15 12:15 ` Maciej Fijalkowski
2024-10-11 9:01 ` [PATCH iwl-next v8 6/6] igb: Add AF_XDP zero-copy Tx support Kurt Kanzenbach
2024-10-15 13:28 ` Maciej Fijalkowski
2024-10-15 17:16 ` Kurt Kanzenbach
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241011-b4-igb_zero_copy-v8-3-83862f726a9e@linutronix.de \
--to=kurt@linutronix.de \
--cc=anthony.l.nguyen@intel.com \
--cc=ast@kernel.org \
--cc=benjamin.steinke@woks-audio.com \
--cc=bigeasy@linutronix.de \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=intel-wired-lan@lists.osuosl.org \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=przemyslaw.kitszel@intel.com \
--cc=richardcochran@gmail.com \
--cc=sriram.yagnaraman@ericsson.com \
--cc=sriram.yagnaraman@est.tech \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).