* [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP
@ 2019-10-31 14:30 Tony Nguyen
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c Tony Nguyen
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Tony Nguyen @ 2019-10-31 14:30 UTC (permalink / raw)
To: intel-wired-lan
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Add support for XDP. Implement ndo_bpf and ndo_xdp_xmit. Upon load of
an XDP program, allocate additional Tx rings for dedicated XDP use.
The following actions are supported: XDP_TX, XDP_DROP, XDP_REDIRECT,
XDP_PASS, and XDP_ABORTED.
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
v4:
- return -EOPNOTSUPP instead of ENOTSUPP for too large MTU which makes
it impossible to attach XDP prog
- don't check for case when there's no XDP prog currently on interface
and ice_xdp() is called with NULL bpf_prog; this happens when user
does "ip link set eth0 xdp off" and no prog is present on VSI; no need
for that as it is handled by higher layer
- drop the extack message for unknown xdp->command
- use the smp_processor_id() for accessing the XDP Tx ring for XDP_TX
action
- don't leave the interface in downed state in case of any failure
during the XDP Tx resources handling
- undo rename of ice_build_ctob
---
drivers/net/ethernet/intel/ice/ice.h | 24 +-
drivers/net/ethernet/intel/ice/ice_base.c | 28 +-
drivers/net/ethernet/intel/ice/ice_ethtool.c | 50 ++-
drivers/net/ethernet/intel/ice/ice_lib.c | 68 +++-
drivers/net/ethernet/intel/ice/ice_lib.h | 6 +
drivers/net/ethernet/intel/ice/ice_main.c | 326 +++++++++++++++++
drivers/net/ethernet/intel/ice/ice_txrx.c | 346 ++++++++++++++++---
drivers/net/ethernet/intel/ice/ice_txrx.h | 34 +-
8 files changed, 825 insertions(+), 57 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 42eaae2f356d..0a90de35ca96 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -29,8 +29,10 @@
#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
#include <linux/if_bridge.h>
#include <linux/ctype.h>
+#include <linux/bpf.h>
#include <linux/avf/virtchnl.h>
#include <linux/mfd/core.h>
#include <net/ipv6.h>
@@ -81,8 +83,7 @@ extern const char ice_drv_ver[];
#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
-#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
- (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
+#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
#define ICE_UP_TABLE_TRANSLATE(val, i) \
(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -286,6 +287,10 @@ struct ice_vsi {
u16 num_tx_desc;
u16 qset_handle[ICE_MAX_TRAFFIC_CLASS];
struct ice_tc_cfg tc_cfg;
+ struct bpf_prog *xdp_prog;
+ struct ice_ring **xdp_rings; /* XDP ring array */
+ u16 num_xdp_txq; /* Used XDP queues */
+ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -438,6 +443,16 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
return np->vsi->back;
}
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+ return !!vsi->xdp_prog;
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+ ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
@@ -464,6 +479,11 @@ int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
int ice_vsi_cfg(struct ice_vsi *vsi);
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 1dd9c18ecd8c..e335256d483d 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -198,6 +198,9 @@ static void ice_cfg_itr_gran(struct ice_hw *hw)
*/
static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
{
+ WARN_ONCE(ice_ring_is_xdp(ring) && tc,
+ "XDP ring can't belong to TC other than 0");
+
/* idea here for calculation is that we subtract the number of queue
* count from TC that ring belongs to from it's absolute queue index
* and as a result we get the queue's index within TC.
@@ -287,6 +290,22 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
/* clear the context structure first */
memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+ ring->rx_buf_len = vsi->rx_buf_len;
+
+ if (ring->vsi->type == ICE_VSI_PF) {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index);
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err)
+ return err;
+ }
+ /* Receive Queue Base Address.
+ * Indicates the starting address of the descriptor queue defined in
+ * 128 Byte units.
+ */
rlan_ctx.base = ring->dma >> 7;
rlan_ctx.qlen = ring->count;
@@ -294,7 +313,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
/* Receive Packet Data Buffer Size.
* The Packet Data Buffer Size is defined in 128 byte units.
*/
- rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+ rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
/* use 32 byte descriptors */
rlan_ctx.dsize = 1;
@@ -657,6 +676,13 @@ ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+ val);
+ }
+ ice_flush(hw);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7e23034df955..6cee99b5865b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2577,6 +2577,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
{
struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_ring *xdp_rings = NULL;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int i, timeout = 50, err = 0;
@@ -2624,6 +2625,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
vsi->tx_rings[i]->count = new_tx_cnt;
for (i = 0; i < vsi->alloc_rxq; i++)
vsi->rx_rings[i]->count = new_rx_cnt;
+ if (ice_is_xdp_ena_vsi(vsi))
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->count = new_tx_cnt;
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
goto done;
}
@@ -2650,15 +2656,43 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
tx_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
- while (i) {
- i--;
+ while (i--)
ice_clean_tx_ring(&tx_rings[i]);
- }
devm_kfree(&pf->pdev->dev, tx_rings);
goto done;
}
}
+ if (!ice_is_xdp_ena_vsi(vsi))
+ goto process_rx;
+
+ /* alloc updated XDP resources */
+ netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+ vsi->xdp_rings[0]->count, new_tx_cnt);
+
+ xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+ sizeof(*xdp_rings), GFP_KERNEL);
+ if (!xdp_rings) {
+ err = -ENOMEM;
+ goto free_tx;
+ }
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ /* clone ring and setup updated count */
+ xdp_rings[i] = *vsi->xdp_rings[i];
+ xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].desc = NULL;
+ xdp_rings[i].tx_buf = NULL;
+ err = ice_setup_tx_ring(&xdp_rings[i]);
+ if (err) {
+ while (i--)
+ ice_clean_tx_ring(&xdp_rings[i]);
+ devm_kfree(&pf->pdev->dev, xdp_rings);
+ goto free_tx;
+ }
+ ice_set_ring_xdp(&xdp_rings[i]);
+ }
+
process_rx:
if (new_rx_cnt == vsi->rx_rings[0]->count)
goto process_link;
@@ -2737,6 +2771,16 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
devm_kfree(&pf->pdev->dev, rx_rings);
}
+ if (xdp_rings) {
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ *vsi->xdp_rings[i] = xdp_rings[i];
+ }
+ devm_kfree(&pf->pdev->dev, xdp_rings);
+ }
+
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
ice_up(vsi);
}
goto done;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 74c3bd191671..d3baad3d6b18 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -46,7 +46,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
if (!vsi->rx_rings)
goto err_rings;
- vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+ /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+ vsi->txq_map = devm_kcalloc(&pf->pdev->dev, (2 * vsi->alloc_txq),
sizeof(*vsi->txq_map), GFP_KERNEL);
if (!vsi->txq_map)
@@ -1218,6 +1219,20 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
return err;
}
+/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+ if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
+ vsi->max_frame = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ else
+ vsi->max_frame = ICE_RXBUF_2048;
+
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+}
+
/**
* ice_vsi_cfg_rxqs - Configure the VSI for Rx
* @vsi: the VSI being configured
@@ -1232,13 +1247,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
if (vsi->type == ICE_VSI_VF)
goto setup_rings;
- if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
- vsi->max_frame = vsi->netdev->mtu +
- ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
- else
- vsi->max_frame = ICE_RXBUF_2048;
-
- vsi->rx_buf_len = ICE_RXBUF_2048;
+ ice_vsi_cfg_frame_size(vsi);
setup_rings:
/* set up individual rings */
for (i = 0; i < vsi->num_rxq; i++) {
@@ -1300,6 +1309,18 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
}
+/**
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
+ */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+{
+ return ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+}
+
/**
* ice_intrl_usec_to_reg - convert interrupt rate limit to register value
* @intrl: interrupt rate limit in usecs
@@ -1523,6 +1544,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
}
+/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+ return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+}
+
/**
* ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
* @vsi: VSI to enable or disable VLAN pruning on
@@ -1920,6 +1950,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
for (q = 0; q < q_vector->num_ring_tx; q++) {
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+ }
txq++;
}
@@ -2302,6 +2337,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
vsi->base_vector = 0;
}
+ if (ice_is_xdp_ena_vsi(vsi))
+ /* return value check can be skipped here, it always returns
+ * 0 if reset is in progress
+ */
+ ice_destroy_xdp_rings(vsi);
ice_vsi_put_qs(vsi);
ice_vsi_clear_rings(vsi);
ice_vsi_free_arrays(vsi);
@@ -2342,6 +2382,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
goto err_vectors;
ice_vsi_map_rings_to_vectors(vsi);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ vsi->num_xdp_txq = vsi->alloc_txq;
+ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+ if (ret)
+ goto err_vectors;
+ }
/* Do not exit if configuring RSS had an issue, at least
* receive traffic on first queue. Hence no need to capture
* return value
@@ -2368,9 +2414,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
}
/* configure VSI nodes based on number of queues and TC's */
- for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ for (i = 0; i < vsi->tc_cfg.numtc; i++) {
max_txqs[i] = vsi->alloc_txq;
+ if (ice_is_xdp_ena_vsi(vsi))
+ max_txqs[i] += vsi->num_xdp_txq;
+ }
+
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 9919d3f810e3..53758ec71d3a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -38,6 +38,10 @@ int
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num);
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -81,6 +85,8 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
char *ice_nvm_version_str(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1916dce06375..fff7e5567910 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1703,6 +1703,309 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
return err;
}
+/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+ struct device *dev = &vsi->back->pdev->dev;
+ int i;
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ u16 xdp_q_idx = vsi->alloc_txq + i;
+ struct ice_ring *xdp_ring;
+
+ xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+ if (!xdp_ring)
+ goto free_xdp_rings;
+
+ xdp_ring->q_index = xdp_q_idx;
+ xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+ xdp_ring->ring_active = false;
+ xdp_ring->vsi = vsi;
+ xdp_ring->netdev = NULL;
+ xdp_ring->dev = dev;
+ xdp_ring->count = vsi->num_tx_desc;
+ vsi->xdp_rings[i] = xdp_ring;
+ if (ice_setup_tx_ring(xdp_ring))
+ goto free_xdp_rings;
+ ice_set_ring_xdp(xdp_ring);
+ }
+
+ return 0;
+
+free_xdp_rings:
+ for (; i >= 0; i--)
+ if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+ int i;
+
+ old_prog = xchg(&vsi->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ ice_for_each_rxq(vsi, i)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ int xdp_rings_rem = vsi->num_xdp_txq;
+ struct ice_pf *pf = vsi->back;
+ struct ice_qs_cfg xdp_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+ .pf_map = pf->avail_txqs,
+ .pf_map_size = pf->max_pf_txqs,
+ .q_count = vsi->num_xdp_txq,
+ .scatter_count = ICE_MAX_SCATTER_TXQS,
+ .vsi_map = vsi->txq_map,
+ .vsi_map_offset = vsi->alloc_txq,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ enum ice_status status;
+ int i, v_idx;
+
+ vsi->xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+ sizeof(*vsi->xdp_rings), GFP_KERNEL);
+ if (!vsi->xdp_rings)
+ return -ENOMEM;
+
+ vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+ if (__ice_vsi_get_qs(&xdp_qs_cfg))
+ goto err_map_xdp;
+
+ if (ice_xdp_alloc_setup_rings(vsi))
+ goto clear_xdp_rings;
+
+ /* follow the logic from ice_vsi_map_rings_to_vectors */
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ int xdp_rings_per_v, q_id, q_base;
+
+ xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+ vsi->num_q_vectors - v_idx);
+ q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+ xdp_ring->q_vector = q_vector;
+ xdp_ring->next = q_vector->tx.ring;
+ q_vector->tx.ring = xdp_ring;
+ }
+ xdp_rings_rem -= xdp_rings_per_v;
+ }
+
+ /* omit the scheduler update if in reset path; XDP queues will be
+ * taken into account at the end of ice_vsi_rebuild, where
+ * ice_cfg_vsi_lan is being called
+ */
+ if (ice_is_reset_in_progress(pf->state))
+ return 0;
+
+ /* tell the Tx scheduler that right now we have
+ * additional queues
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (status) {
+ dev_err(&pf->pdev->dev,
+ "Failed VSI LAN queue config for XDP, error:%d\n",
+ status);
+ goto clear_xdp_rings;
+ }
+ ice_vsi_assign_bpf_prog(vsi, prog);
+
+ return 0;
+clear_xdp_rings:
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+err_map_xdp:
+ mutex_lock(&pf->avail_q_mutex);
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+ return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_pf *pf = vsi->back;
+ int i, v_idx;
+
+ /* q_vectors are freed in reset path so there's no point in detaching
+ * rings; in case of rebuild being triggered not from reset reset bits
+ * in pf->state won't be set, so additionally check first q_vector
+ * against NULL
+ */
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ goto free_qmap;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ struct ice_ring *ring;
+
+ ice_for_each_ring(ring, q_vector->tx)
+ if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+ break;
+
+ /* restore the value of last node prior to XDP setup */
+ q_vector->tx.ring = ring;
+ }
+
+free_qmap:
+ mutex_lock(&pf->avail_q_mutex);
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+ if (vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+ devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+ vsi->xdp_rings = NULL;
+
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ return 0;
+
+ ice_vsi_assign_bpf_prog(vsi, NULL);
+
+ /* notify Tx scheduler that we destroyed XDP queues and bring
+ * back the old number of child nodes
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq;
+
+ return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ bool if_running = netif_running(vsi->netdev);
+ int ret = 0, xdp_ring_err = 0;
+
+ if (frame_size > vsi->rx_buf_len) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+ return -EOPNOTSUPP;
+ }
+
+ /* need to stop netdev while setting up the program for Rx rings */
+ if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+ ret = ice_down(vsi);
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Preparing device for XDP attach failed");
+ return ret;
+ }
+ }
+
+ if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+ vsi->num_xdp_txq = vsi->alloc_txq;
+ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Setting up XDP Tx resources failed");
+ } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ xdp_ring_err = ice_destroy_xdp_rings(vsi);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Freeing XDP Tx resources failed");
+ } else {
+ ice_vsi_assign_bpf_prog(vsi, prog);
+ }
+
+ if (if_running)
+ ret = ice_up(vsi);
+
+ return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (vsi->type != ICE_VSI_PF) {
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "XDP can be loaded only on PF VSI");
+ return -EINVAL;
+ }
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
/**
* ice_ena_misc_vector - enable the non-queue interrupts
* @pf: board private structure
@@ -2262,6 +2565,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
status = -ENODEV;
goto unroll_vsi_setup;
}
+ /* netdev has to be configured before setting frame size */
+ ice_vsi_cfg_frame_size(vsi);
/* registering the NAPI handler requires both the queues and
* netdev to be created, which are done in ice_pf_vsi_setup()
@@ -3613,6 +3918,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
ice_vsi_cfg_dcb_rings(vsi);
err = ice_vsi_cfg_lan_txqs(vsi);
+ if (!err && ice_is_xdp_ena_vsi(vsi))
+ err = ice_vsi_cfg_xdp_txqs(vsi);
if (!err)
err = ice_vsi_cfg_rxqs(vsi);
@@ -4028,6 +4335,13 @@ int ice_down(struct ice_vsi *vsi)
netdev_err(vsi->netdev,
"Failed stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
+ if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+ tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+ if (tx_err)
+ netdev_err(vsi->netdev,
+ "Failed stop XDP rings, VSI %d error %d\n",
+ vsi->vsi_num, tx_err);
+ }
rx_err = ice_vsi_stop_rx_rings(vsi);
if (rx_err)
@@ -4466,6 +4780,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ int frame_size = ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+
+ if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+ netdev_err(netdev, "max MTU for XDP usage is %d\n",
+ frame_size);
+ return -EINVAL;
+ }
+ }
+
if (new_mtu < netdev->min_mtu) {
netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
netdev->min_mtu);
@@ -5008,4 +5332,6 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_fdb_add = ice_fdb_add,
.ndo_fdb_del = ice_fdb_del,
.ndo_tx_timeout = ice_tx_timeout,
+ .ndo_bpf = ice_xdp,
+ .ndo_xdp_xmit = ice_xdp_xmit,
};
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 33dd103035dc..f79a9376159b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -5,6 +5,9 @@
#include <linux/prefetch.h>
#include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
@@ -19,7 +22,10 @@ static void
ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
- dev_kfree_skb_any(tx_buf->skb);
+ if (ice_ring_is_xdp(ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ dev_kfree_skb_any(tx_buf->skb);
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
@@ -136,8 +142,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
total_bytes += tx_buf->bytecount;
total_pkts += tx_buf->gso_segs;
- /* free the skb */
- napi_consume_skb(tx_buf->skb, napi_budget);
+ if (ice_ring_is_xdp(tx_ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ /* free the skb */
+ napi_consume_skb(tx_buf->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -195,6 +204,9 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_pkts += total_pkts;
+ if (ice_ring_is_xdp(tx_ring))
+ return !!budget;
+
netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
total_bytes);
@@ -319,6 +331,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
void ice_free_rx_ring(struct ice_ring *rx_ring)
{
ice_clean_rx_ring(rx_ring);
+ if (rx_ring->vsi->type == ICE_VSI_PF)
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ rx_ring->xdp_prog = NULL;
devm_kfree(rx_ring->dev, rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
@@ -363,6 +379,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
+
+ if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+ if (rx_ring->vsi->type == ICE_VSI_PF &&
+ !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->q_index))
+ goto err;
return 0;
err:
@@ -402,6 +427,214 @@ static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
}
}
+/**
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
+{
+ return ice_is_xdp_ena_vsi(rx_ring->vsi) ? XDP_PACKET_HEADROOM : 0;
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch.
+ */
+ wmb();
+ writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+static int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+ u16 i = xdp_ring->next_to_use;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+
+ if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ return ICE_XDP_CONSUMED;
+ }
+
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(xdp_ring->dev, dma))
+ return ICE_XDP_CONSUMED;
+
+ tx_buf = &xdp_ring->tx_buf[i];
+ tx_buf->bytecount = size;
+ tx_buf->gso_segs = 1;
+ tx_buf->raw_buf = data;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, i);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+ size, 0);
+
+ /* Make certain all of the status bits have been updated
+ * before next_to_watch is written.
+ */
+ smp_wmb();
+
+ i++;
+ if (i == xdp_ring->count)
+ i = 0;
+
+ tx_buf->next_to_watch = tx_desc;
+ xdp_ring->next_to_use = i;
+
+ return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+static int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+ if (unlikely(!xdpf))
+ return ICE_XDP_CONSUMED;
+
+ return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ int err, result = ICE_XDP_PASS;
+ struct ice_ring *xdp_ring;
+ u32 act;
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough -- not supported action */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping frame */
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
+ }
+
+ return result;
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ unsigned int queue_index = smp_processor_id();
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *xdp_ring;
+ int drops = 0, i;
+
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+ return -ENXIO;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ xdp_ring = vsi->xdp_rings[queue_index];
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+ if (err != ICE_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ ice_xdp_ring_update_tail(xdp_ring);
+
+ return n - drops;
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+static void
+ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+ if (xdp_res & ICE_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & ICE_XDP_TX) {
+ struct ice_ring *xdp_ring =
+ rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+ ice_xdp_ring_update_tail(xdp_ring);
+ }
+}
+
/**
* ice_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
@@ -444,7 +677,7 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
bi->dma = dma;
bi->page = page;
- bi->page_offset = 0;
+ bi->page_offset = ice_rx_offset(rx_ring);
page_ref_add(page, USHRT_MAX - 1);
bi->pagecnt_bias = USHRT_MAX;
@@ -682,7 +915,7 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
* ice_construct_skb - Allocate skb and populate it
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
- * @size: the length of the packet
+ * @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
@@ -690,16 +923,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
*/
static struct sk_buff *
ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
- unsigned int size)
+ struct xdp_buff *xdp)
{
- void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+ unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
+ prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
- prefetch((u8 *)va + L1_CACHE_BYTES);
+ prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif /* L1_CACHE_BYTES */
/* allocate a skb to store the frags */
@@ -712,10 +945,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
- headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+ sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
@@ -745,11 +979,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
*
- * This function will clean up the contents of the rx_buf. It will
- * either recycle the buffer or unmap it and free the associated resources.
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
*/
static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
if (!rx_buf)
return;
@@ -813,30 +1054,20 @@ ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
* @rx_desc: Rx descriptor for current buffer
* @skb: Current socket buffer containing buffer in progress
*
- * This function updates next to clean. If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static bool
ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb)
{
- u32 ntc = rx_ring->next_to_clean + 1;
-
- /* fetch, update, and store next to clean */
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
- prefetch(ICE_RX_DESC(rx_ring, ntc));
-
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
return false;
/* place skb in next buffer to be received */
- rx_ring->rx_buf[ntc].skb = skb;
+ rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
rx_ring->rx_stats.non_eop_descs++;
return true;
@@ -1006,8 +1237,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int xdp_res, xdp_xmit = 0;
+ struct bpf_prog *xdp_prog = NULL;
+ struct xdp_buff xdp;
bool failure;
+ xdp.rxq = &rx_ring->xdp_rxq;
+
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
@@ -1042,10 +1278,46 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
+ if (!size) {
+ xdp.data = NULL;
+ xdp.data_end = NULL;
+ goto construct_skb;
+ }
+
+ xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+ xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + size;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ goto construct_skb;
+ }
+
+ xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+ rcu_read_unlock();
+ if (xdp_res) {
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ xdp_xmit |= xdp_res;
+ ice_rx_buf_adjust_pg_offset(rx_buf,
+ ICE_RXBUF_2048);
+ } else {
+ rx_buf->pagecnt_bias++;
+ }
+ total_rx_bytes += size;
+ total_rx_pkts++;
+
+ cleaned_count++;
+ ice_put_rx_buf(rx_ring, rx_buf);
+ continue;
+ }
+construct_skb:
if (skb)
ice_add_rx_frag(rx_buf, skb, size);
else
- skb = ice_construct_skb(rx_ring, rx_buf, size);
+ skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -1099,6 +1371,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
+ if (xdp_prog)
+ ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
/* update queue and vector specific stats */
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.pkts += total_rx_pkts;
@@ -1527,17 +1802,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
return min_t(int, work_done, budget - 1);
}
-/* helper function for building cmd/type/offset */
-static __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
- return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
- (td_cmd << ICE_TXD_QW1_CMD_S) |
- (td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
- (td_tag << ICE_TXD_QW1_L2TAG1_S));
-}
-
/**
* __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
* @tx_ring: the ring to be checked
@@ -1689,9 +1953,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
i = 0;
/* write last descriptor with RS and EOP bits */
- td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag);
+ td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+ tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
+ td_tag);
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index a914e603b2ed..e40b4cb54ce3 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -22,6 +22,16 @@
#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */
#define ICE_MAX_TXQ_PER_TXQG 128
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+ return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ (td_offset << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ (td_tag << ICE_TXD_QW1_L2TAG1_S));
+}
+
/* We are assuming that the cache line is always 64 Bytes here for ice.
* In order to make sure that is a correct assumption there is a check in probe
* to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@@ -49,12 +59,24 @@
#define ICE_TX_FLAGS_VLAN_PR_S 29
#define ICE_TX_FLAGS_VLAN_S 16
+#define ICE_XDP_PASS 0
+#define ICE_XDP_CONSUMED BIT(0)
+#define ICE_XDP_TX BIT(1)
+#define ICE_XDP_REDIR BIT(2)
+
#define ICE_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
struct ice_tx_buf {
struct ice_tx_desc *next_to_watch;
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ void *raw_buf; /* used for XDP */
+ };
unsigned int bytecount;
unsigned short gso_segs;
u32 tx_flags;
@@ -198,9 +220,14 @@ struct ice_ring {
};
struct rcu_head rcu; /* to avoid race on free */
+ struct bpf_prog *xdp_prog;
+ /* CL3 - 3rd cacheline starts here */
+ struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
* in their own cache line if possible
*/
+#define ICE_TX_FLAGS_RING_XDP BIT(0)
+ u8 flags;
dma_addr_t dma; /* physical address of ring */
unsigned int size; /* length of descriptor ring in bytes */
u32 txq_teid; /* Added Tx queue TEID */
@@ -208,6 +235,11 @@ struct ice_ring {
u8 dcb_tc; /* Traffic class of ring */
} ____cacheline_internodealigned_in_smp;
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+ return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
struct ice_ring_container {
/* head of linked-list of rings */
struct ice_ring *ring;
--
2.20.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c
2019-10-31 14:30 [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Tony Nguyen
@ 2019-10-31 14:30 ` Tony Nguyen
2019-11-04 20:17 ` Bowers, AndrewX
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP Tony Nguyen
2019-11-04 20:16 ` [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Bowers, AndrewX
2 siblings, 1 reply; 6+ messages in thread
From: Tony Nguyen @ 2019-10-31 14:30 UTC (permalink / raw)
To: intel-wired-lan
From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
In preparation of AF XDP, move functions that will be used both by skb and
zero-copy paths to a new file called ice_txrx_lib.c. This allows us to
avoid using ifdefs to control the staticness of said functions.
Move other functions (ice_rx_csum, ice_rx_hash and ice_ptype_to_htype)
called only by the moved ones to the new file as well.
Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
v4:
- Change ice_build_ctob() call to build_ctob()
v2:
- Move ice_build_ctob() to ice_txrx_lib.h
---
drivers/net/ethernet/intel/ice/Makefile | 1 +
drivers/net/ethernet/intel/ice/ice_txrx.c | 303 +-----------------
drivers/net/ethernet/intel/ice/ice_txrx.h | 10 -
drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 273 ++++++++++++++++
drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 59 ++++
5 files changed, 334 insertions(+), 312 deletions(-)
create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx_lib.c
create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx_lib.h
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index ff737f880c12..94fe430fdfdd 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -15,6 +15,7 @@ ice-y := ice_main.o \
ice_sched.o \
ice_base.o \
ice_lib.o \
+ ice_txrx_lib.o \
ice_txrx.o \
ice_flex_pipe.o \
ice_idc.o \
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index f79a9376159b..279e5ec7d15f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/bpf_trace.h>
#include <net/xdp.h>
+#include "ice_txrx_lib.h"
#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
@@ -396,37 +397,6 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
return -ENOMEM;
}
-/**
- * ice_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
- */
-static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
-{
- u16 prev_ntu = rx_ring->next_to_use;
-
- rx_ring->next_to_use = val;
-
- /* update next to alloc since we have filled the ring */
- rx_ring->next_to_alloc = val;
-
- /* QRX_TAIL will be updated with any tail value, but hardware ignores
- * the lower 3 bits. This makes it so we only bump tail on meaningful
- * boundaries. Also, this allows us to bump tail on intervals of 8 up to
- * the budget depending on the current traffic load.
- */
- val &= ~0x7;
- if (prev_ntu != val) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
- */
- wmb();
- writel(val, rx_ring->tail);
- }
-}
-
/**
* ice_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
@@ -438,89 +408,6 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
return ice_is_xdp_ena_vsi(rx_ring->vsi) ? XDP_PACKET_HEADROOM : 0;
}
-/**
- * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
- * @xdp_ring: XDP Tx ring
- *
- * This function updates the XDP Tx ring tail register.
- */
-static void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
-{
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch.
- */
- wmb();
- writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
-}
-
-/**
- * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
- * @data: packet data pointer
- * @size: packet data size
- * @xdp_ring: XDP ring for transmission
- */
-static int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
-{
- u16 i = xdp_ring->next_to_use;
- struct ice_tx_desc *tx_desc;
- struct ice_tx_buf *tx_buf;
- dma_addr_t dma;
-
- if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
- xdp_ring->tx_stats.tx_busy++;
- return ICE_XDP_CONSUMED;
- }
-
- dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
- if (dma_mapping_error(xdp_ring->dev, dma))
- return ICE_XDP_CONSUMED;
-
- tx_buf = &xdp_ring->tx_buf[i];
- tx_buf->bytecount = size;
- tx_buf->gso_segs = 1;
- tx_buf->raw_buf = data;
-
- /* record length, and DMA address */
- dma_unmap_len_set(tx_buf, len, size);
- dma_unmap_addr_set(tx_buf, dma, dma);
-
- tx_desc = ICE_TX_DESC(xdp_ring, i);
- tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
- size, 0);
-
- /* Make certain all of the status bits have been updated
- * before next_to_watch is written.
- */
- smp_wmb();
-
- i++;
- if (i == xdp_ring->count)
- i = 0;
-
- tx_buf->next_to_watch = tx_desc;
- xdp_ring->next_to_use = i;
-
- return ICE_XDP_TX;
-}
-
-/**
- * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
- * @xdp: XDP buffer
- * @xdp_ring: XDP Tx ring
- *
- * Returns negative on failure, 0 on success.
- */
-static int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
-{
- struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
-
- if (unlikely(!xdpf))
- return ICE_XDP_CONSUMED;
-
- return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
-}
-
/**
* ice_run_xdp - Executes an XDP program on initialized xdp_buff
* @rx_ring: Rx ring
@@ -612,29 +499,6 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
return n - drops;
}
-/**
- * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
- * @rx_ring: Rx ring
- * @xdp_res: Result of the receive batch
- *
- * This function bumps XDP Tx tail and/or flush redirect map, and
- * should be called when a batch of packets has been processed in the
- * napi loop.
- */
-static void
-ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
-{
- if (xdp_res & ICE_XDP_REDIR)
- xdp_do_flush_map();
-
- if (xdp_res & ICE_XDP_TX) {
- struct ice_ring *xdp_ring =
- rx_ring->vsi->xdp_rings[rx_ring->q_index];
-
- ice_xdp_ring_update_tail(xdp_ring);
- }
-}
-
/**
* ice_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
@@ -1031,23 +895,6 @@ static bool ice_cleanup_headers(struct sk_buff *skb)
return false;
}
-/**
- * ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
-{
- return !!(rx_desc->wb.status_error0 &
- cpu_to_le16(stat_err_bits));
-}
-
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
@@ -1073,154 +920,6 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
return true;
}
-/**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
-{
- return PKT_HASH_TYPE_NONE;
-}
-
-/**
- * ice_rx_hash - set the hash value in the skb
- * @rx_ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: pointer to current skb
- * @rx_ptype: the ptype value from the descriptor
- */
-static void
-ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u8 rx_ptype)
-{
- struct ice_32b_rx_flex_desc_nic *nic_mdid;
- u32 hash;
-
- if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
- return;
-
- if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
- return;
-
- nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
- hash = le32_to_cpu(nic_mdid->rss_hash);
- skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
-}
-
-/**
- * ice_rx_csum - Indicate in skb if checksum is good
- * @ring: the ring we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
- *
- * skb->protocol must be set before this function is called
- */
-static void
-ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
- union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
-{
- struct ice_rx_ptype_decoded decoded;
- u32 rx_error, rx_status;
- bool ipv4, ipv6;
-
- rx_status = le16_to_cpu(rx_desc->wb.status_error0);
- rx_error = rx_status;
-
- decoded = ice_decode_rx_desc_ptype(ptype);
-
- /* Start with CHECKSUM_NONE and by default csum_level = 0 */
- skb->ip_summed = CHECKSUM_NONE;
- skb_checksum_none_assert(skb);
-
- /* check if Rx checksum is enabled */
- if (!(ring->netdev->features & NETIF_F_RXCSUM))
- return;
-
- /* check if HW has decoded the packet and checksum */
- if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
- return;
-
- if (!(decoded.known && decoded.outer_ip))
- return;
-
- ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
- ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
-
- if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
- BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
- goto checksum_fail;
- else if (ipv6 && (rx_status &
- (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
- goto checksum_fail;
-
- /* check for L4 errors and handle packets that were not able to be
- * checksummed due to arrival speed
- */
- if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
- goto checksum_fail;
-
- /* Only report checksum unnecessary for TCP, UDP, or SCTP */
- switch (decoded.inner_prot) {
- case ICE_RX_PTYPE_INNER_PROT_TCP:
- case ICE_RX_PTYPE_INNER_PROT_UDP:
- case ICE_RX_PTYPE_INNER_PROT_SCTP:
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- default:
- break;
- }
- return;
-
-checksum_fail:
- ring->vsi->back->hw_csum_rx_error++;
-}
-
-/**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- */
-static void
-ice_process_skb_fields(struct ice_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u8 ptype)
-{
- ice_rx_hash(rx_ring, rx_desc, skb, ptype);
-
- /* modifies the skb - consumes the enet header */
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
- ice_rx_csum(rx_ring, skb, rx_desc, ptype);
-}
-
-/**
- * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: Rx ring in play
- * @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
- *
- * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without VLAN tag)
- */
-static void
-ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
-{
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
- napi_gro_receive(&rx_ring->q_vector->napi, skb);
-}
-
/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index e40b4cb54ce3..a07101b13226 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -22,16 +22,6 @@
#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */
#define ICE_MAX_TXQ_PER_TXQG 128
-static inline __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
- return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
- (td_cmd << ICE_TXD_QW1_CMD_S) |
- (td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
- (td_tag << ICE_TXD_QW1_L2TAG1_S));
-}
-
/* We are assuming that the cache line is always 64 Bytes here for ice.
* In order to make sure that is a correct assumption there is a check in probe
* to print a warning if the read from GLPCI_CNF2 tells us that the cache line
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000000..35bbc4ff603c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+ u16 prev_ntu = rx_ring->next_to_use;
+
+ rx_ring->next_to_use = val;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = val;
+
+ /* QRX_TAIL will be updated with any tail value, but hardware ignores
+ * the lower 3 bits. This makes it so we only bump tail on meaningful
+ * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+ * the budget depending on the current traffic load.
+ */
+ val &= ~0x7;
+ if (prev_ntu != val) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(val, rx_ring->tail);
+ }
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+ return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 rx_ptype)
+{
+ struct ice_32b_rx_flex_desc_nic *nic_mdid;
+ u32 hash;
+
+ if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+ return;
+
+ if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+ return;
+
+ nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+ hash = le32_to_cpu(nic_mdid->rss_hash);
+ skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+ union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+ struct ice_rx_ptype_decoded decoded;
+ u32 rx_error, rx_status;
+ bool ipv4, ipv6;
+
+ rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+ rx_error = rx_status;
+
+ decoded = ice_decode_rx_desc_ptype(ptype);
+
+ /* Start with CHECKSUM_NONE and by default csum_level = 0 */
+ skb->ip_summed = CHECKSUM_NONE;
+ skb_checksum_none_assert(skb);
+
+ /* check if Rx checksum is enabled */
+ if (!(ring->netdev->features & NETIF_F_RXCSUM))
+ return;
+
+ /* check if HW has decoded the packet and checksum */
+ if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+ return;
+
+ if (!(decoded.known && decoded.outer_ip))
+ return;
+
+ ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+ ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+ if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+ BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+ goto checksum_fail;
+ else if (ipv6 && (rx_status &
+ (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+ goto checksum_fail;
+
+ /* check for L4 errors and handle packets that were not able to be
+ * checksummed due to arrival speed
+ */
+ if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+ goto checksum_fail;
+
+ /* Only report checksum unnecessary for TCP, UDP, or SCTP */
+ switch (decoded.inner_prot) {
+ case ICE_RX_PTYPE_INNER_PROT_TCP:
+ case ICE_RX_PTYPE_INNER_PROT_UDP:
+ case ICE_RX_PTYPE_INNER_PROT_SCTP:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ default:
+ break;
+ }
+ return;
+
+checksum_fail:
+ ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 ptype)
+{
+ ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+ ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+ if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ (vlan_tag & VLAN_VID_MASK))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+ u16 i = xdp_ring->next_to_use;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+
+ if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ return ICE_XDP_CONSUMED;
+ }
+
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(xdp_ring->dev, dma))
+ return ICE_XDP_CONSUMED;
+
+ tx_buf = &xdp_ring->tx_buf[i];
+ tx_buf->bytecount = size;
+ tx_buf->gso_segs = 1;
+ tx_buf->raw_buf = data;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, i);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+ size, 0);
+
+ /* Make certain all of the status bits have been updated
+ * before next_to_watch is written.
+ */
+ smp_wmb();
+
+ i++;
+ if (i == xdp_ring->count)
+ i = 0;
+
+ tx_buf->next_to_watch = tx_desc;
+ xdp_ring->next_to_use = i;
+
+ return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+ if (unlikely(!xdpf))
+ return ICE_XDP_CONSUMED;
+
+ return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+ if (xdp_res & ICE_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & ICE_XDP_TX) {
+ struct ice_ring *xdp_ring =
+ rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+ ice_xdp_ring_update_tail(xdp_ring);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000000..ba9164dad9ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+ return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+ return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ (td_offset << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ (td_tag << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch.
+ */
+ wmb();
+ writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
--
2.20.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP
2019-10-31 14:30 [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Tony Nguyen
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c Tony Nguyen
@ 2019-10-31 14:30 ` Tony Nguyen
2019-11-04 23:55 ` Bowers, AndrewX
2019-11-04 20:16 ` [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Bowers, AndrewX
2 siblings, 1 reply; 6+ messages in thread
From: Tony Nguyen @ 2019-10-31 14:30 UTC (permalink / raw)
To: intel-wired-lan
From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Add zero copy AF_XDP support. This patch adds zero copy support for
Tx and Rx; code for zero copy is added to ice_xsk.h and ice_xsk.c.
For Tx, implement ndo_xsk_wakeup. As with other drivers, reuse
existing XDP Tx queues for this task, since XDP_REDIRECT guarantees
mutual exclusion between different NAPI contexts based on CPU ID. In
turn, a netdev can XDP_REDIRECT to another netdev with a different
NAPI context, since the operation is bound to a specific core and each
core has its own hardware ring.
For Rx, allocate frames as MEM_TYPE_ZERO_COPY on queues that AF_XDP is
enabled.
Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Co-developed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
v4:
- Change ice_build_ctob() call to build_ctob()
v3:
- Remove unused prototype ice_xsk_umem_query()
---
drivers/net/ethernet/intel/ice/Makefile | 1 +
drivers/net/ethernet/intel/ice/ice.h | 26 +
drivers/net/ethernet/intel/ice/ice_base.c | 53 +-
drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 +
drivers/net/ethernet/intel/ice/ice_lib.c | 57 +-
drivers/net/ethernet/intel/ice/ice_lib.h | 4 +
drivers/net/ethernet/intel/ice/ice_main.c | 16 +
drivers/net/ethernet/intel/ice/ice_txrx.c | 46 +-
drivers/net/ethernet/intel/ice/ice_txrx.h | 20 +-
drivers/net/ethernet/intel/ice/ice_xsk.c | 1181 ++++++++++++++++++
drivers/net/ethernet/intel/ice/ice_xsk.h | 72 ++
11 files changed, 1456 insertions(+), 27 deletions(-)
create mode 100644 drivers/net/ethernet/intel/ice/ice_xsk.c
create mode 100644 drivers/net/ethernet/intel/ice/ice_xsk.h
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 94fe430fdfdd..774aad3ce237 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -22,3 +22,4 @@ ice-y := ice_main.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 0a90de35ca96..61e2030283aa 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -36,6 +36,7 @@
#include <linux/avf/virtchnl.h>
#include <linux/mfd/core.h>
#include <net/ipv6.h>
+#include <net/xdp_sock.h>
#include "ice_devids.h"
#include "ice_type.h"
#include "ice_txrx.h"
@@ -46,6 +47,7 @@
#include "ice_idc_int.h"
#include "ice_virtchnl_pf.h"
#include "ice_sriov.h"
+#include "ice_xsk.h"
extern const char ice_drv_ver[];
#define ICE_BAR0 0
@@ -291,6 +293,9 @@ struct ice_vsi {
struct ice_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+ struct xdp_umem **xsk_umems;
+ u16 num_xsk_umems_used;
+ u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -453,6 +458,27 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
ring->flags |= ICE_TX_FLAGS_RING_XDP;
}
+/**
+ * ice_xsk_umem - get XDP UMEM bound to a ring
+ * @ring - ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * NULL otherwise.
+ */
+static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+{
+ struct xdp_umem **umems = ring->vsi->xsk_umems;
+ int qid = ring->q_index;
+
+ if (ice_ring_is_xdp(ring))
+ qid -= ring->vsi->num_xdp_txq;
+
+ if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+ return NULL;
+
+ return umems[qid];
+}
+
/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index e335256d483d..4db042e88476 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -276,14 +276,17 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
*/
int ice_setup_rx_ctx(struct ice_ring *ring)
{
+ int chain_len = ICE_MAX_CHAINED_RX_BUFS;
struct ice_vsi *vsi = ring->vsi;
- struct ice_hw *hw = &vsi->back->hw;
u32 rxdid = ICE_RXDID_FLEX_NIC;
struct ice_rlan_ctx rlan_ctx;
+ struct ice_hw *hw;
u32 regval;
u16 pf_q;
int err;
+ hw = &vsi->back->hw;
+
/* what is Rx queue number in global space of 2K Rx queues */
pf_q = vsi->rxq_map[ring->q_index];
@@ -297,10 +300,38 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index);
- err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
- if (err)
- return err;
+ ring->xsk_umem = ice_xsk_umem(ring);
+ if (ring->xsk_umem) {
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+ ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+ XDP_PACKET_HEADROOM;
+ /* For AF_XDP ZC, we disallow packets to span on
+ * multiple buffers, thus letting us skip that
+ * handling in the fast-path.
+ */
+ chain_len = 1;
+ ring->zca.free = ice_zca_free;
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_ZERO_COPY,
+ &ring->zca);
+ if (err)
+ return err;
+
+ dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ ring->q_index);
+ } else {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq,
+ ring->netdev,
+ ring->q_index);
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err)
+ return err;
+ }
}
/* Receive Queue Base Address.
* Indicates the starting address of the descriptor queue defined in
@@ -340,7 +371,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
* than 5 x DBUF
*/
rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
- ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
+ chain_len * ring->rx_buf_len);
/* Rx queue threshold in units of 64 */
rlan_ctx.lrxqthresh = 1;
@@ -378,7 +409,15 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
/* init queue specific tail register */
ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
writel(0, ring->tail);
- ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+
+ err = ring->xsk_umem ?
+ ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+ ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+ if (err)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+ ring->xsk_umem ? "UMEM enabled " : "",
+ ring->q_index, pf_q);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 6cee99b5865b..42b032620f66 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2612,6 +2612,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
return 0;
}
+ /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ * disallow changing the number of descriptors -- regardless
+ * if the netdev is running or not.
+ */
+ if (ice_xsk_any_rx_ring_ena(vsi))
+ return -EBUSY;
+
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index d3baad3d6b18..4d2da81e0b9d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1318,7 +1318,17 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
*/
int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
{
- return ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+ int ret;
+ int i;
+
+ ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
+
+ return ret;
}
/**
@@ -2557,6 +2567,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
return buf;
}
+/**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+ u64 pkts, u64 bytes)
+{
+ ring->stats.bytes += bytes;
+ ring->stats.pkts += pkts;
+ cont->total_bytes += bytes;
+ cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&tx_ring->syncp);
+ ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+ u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+ u64_stats_update_end(&rx_ring->syncp);
+}
+
/**
* ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
* @vsi: the VSI being configured MAC filter
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 53758ec71d3a..12cb89e28631 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -85,6 +85,10 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index fff7e5567910..5b270fe0e349 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1734,6 +1734,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
+ xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
}
return 0;
@@ -1976,6 +1977,17 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (if_running)
ret = ice_up(vsi);
+ if (!ret && prog && vsi->xsk_umems) {
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+ if (rx_ring->xsk_umem)
+ napi_schedule(&rx_ring->q_vector->napi);
+ }
+ }
+
return (ret || xdp_ring_err) ? -ENOMEM : 0;
}
@@ -2001,6 +2013,9 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0;
+ case XDP_SETUP_XSK_UMEM:
+ return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
@@ -5334,4 +5349,5 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_tx_timeout = ice_tx_timeout,
.ndo_bpf = ice_xdp,
.ndo_xdp_xmit = ice_xdp_xmit,
+ .ndo_xsk_wakeup = ice_xsk_wakeup,
};
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 279e5ec7d15f..86a23036f420 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -11,6 +11,7 @@
#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
+#include "ice_xsk.h"
#define ICE_RX_HDR_SIZE 256
@@ -58,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
{
u16 i;
+ if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+ ice_xsk_clean_xdp_ring(tx_ring);
+ goto tx_skip_free;
+ }
+
/* ring already cleared, nothing to do */
if (!tx_ring->tx_buf)
return;
@@ -66,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
for (i = 0; i < tx_ring->count; i++)
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+tx_skip_free:
memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
/* Zero out the descriptor ring */
@@ -198,12 +205,8 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
i += tx_ring->count;
tx_ring->next_to_clean = i;
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.pkts += total_pkts;
- u64_stats_update_end(&tx_ring->syncp);
- tx_ring->q_vector->tx.total_bytes += total_bytes;
- tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+ ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
if (ice_ring_is_xdp(tx_ring))
return !!budget;
@@ -286,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
+ if (rx_ring->xsk_umem) {
+ ice_xsk_clean_rx_ring(rx_ring);
+ goto rx_skip_free;
+ }
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -313,6 +321,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
rx_buf->page_offset = 0;
}
+rx_skip_free:
memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
/* Zero out the descriptor ring */
@@ -1073,13 +1082,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
if (xdp_prog)
ice_finalize_xdp_rx(rx_ring, xdp_xmit);
- /* update queue and vector specific stats */
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.pkts += total_rx_pkts;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_pkts;
@@ -1457,9 +1460,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
- ice_for_each_ring(ring, q_vector->tx)
- if (!ice_clean_tx_irq(ring, budget))
+ ice_for_each_ring(ring, q_vector->tx) {
+ bool wd = ring->xsk_umem ?
+ ice_clean_tx_irq_zc(ring, budget) :
+ ice_clean_tx_irq(ring, budget);
+
+ if (!wd)
clean_complete = false;
+ }
/* Handle case where we are called by netpoll with a budget of 0 */
if (unlikely(budget <= 0))
@@ -1479,7 +1487,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
ice_for_each_ring(ring, q_vector->rx) {
int cleaned;
- cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+ /* A dedicated path for zero-copy allows making a single
+ * comparison in the irq context instead of many inside the
+ * ice_clean_rx_irq function and makes the codebase cleaner.
+ */
+ cleaned = ring->xsk_umem ?
+ ice_clean_rx_irq_zc(ring, budget_per_ring) :
+ ice_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
if (cleaned >= budget_per_ring)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index a07101b13226..d5d243b8e69f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -4,6 +4,8 @@
#ifndef _ICE_TXRX_H_
#define _ICE_TXRX_H_
+#include "ice_type.h"
+
#define ICE_DFLT_IRQ_WORK 256
#define ICE_RXBUF_2048 2048
#define ICE_MAX_CHAINED_RX_BUFS 5
@@ -88,9 +90,17 @@ struct ice_tx_offload_params {
struct ice_rx_buf {
struct sk_buff *skb;
dma_addr_t dma;
- struct page *page;
- unsigned int page_offset;
- u16 pagecnt_bias;
+ union {
+ struct {
+ struct page *page;
+ unsigned int page_offset;
+ u16 pagecnt_bias;
+ };
+ struct {
+ void *addr;
+ u64 handle;
+ };
+ };
};
struct ice_q_stats {
@@ -211,6 +221,8 @@ struct ice_ring {
struct rcu_head rcu; /* to avoid race on free */
struct bpf_prog *xdp_prog;
+ struct xdp_umem *xsk_umem;
+ struct zero_copy_allocator zca;
/* CL3 - 3rd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
@@ -250,6 +262,8 @@ struct ice_ring_container {
#define ice_for_each_ring(pos, head) \
for (pos = (head).ring; pos; pos = pos->next)
+union ice_32b_rx_flex_desc;
+
bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
void ice_clean_tx_ring(struct ice_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000000..fcffad0069d6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+ memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+ sizeof(vsi->rx_rings[q_idx]->rx_stats));
+ memset(&vsi->tx_rings[q_idx]->stats, 0,
+ sizeof(vsi->tx_rings[q_idx]->stats));
+ if (ice_is_xdp_ena_vsi(vsi))
+ memset(&vsi->xdp_rings[q_idx]->stats, 0,
+ sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+ ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+ ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+ bool enable)
+{
+ if (!vsi->netdev || !q_vector)
+ return;
+
+ if (enable)
+ napi_enable(&q_vector->napi);
+ else
+ napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+ struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int base = vsi->base_vector;
+ u16 reg;
+ u32 val;
+
+ /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+ * here only QINT_RQCTL
+ */
+ reg = rx_ring->reg_idx;
+ val = rd32(hw, QINT_RQCTL(reg));
+ val &= ~QINT_RQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_RQCTL(reg), val);
+
+ if (q_vector) {
+ u16 v_idx = q_vector->v_idx;
+
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+ ice_flush(hw);
+ synchronize_irq(pf->msix_entries[v_idx + base].vector);
+ }
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ u16 reg_idx = q_vector->reg_idx;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_ring *ring;
+
+ ice_cfg_itr(hw, q_vector);
+
+ wr32(hw, GLINT_RATE(reg_idx),
+ ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+ ice_for_each_ring(ring, q_vector->tx)
+ ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+ q_vector->tx.itr_idx);
+
+ ice_for_each_ring(ring, q_vector->rx)
+ ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+ q_vector->rx.itr_idx);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+
+ ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_txq_meta txq_meta = { };
+ struct ice_ring *tx_ring, *rx_ring;
+ struct ice_q_vector *q_vector;
+ int timeout = 50;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+ netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+ ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ if (err)
+ return err;
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(&txq_meta, 0, sizeof(txq_meta));
+ ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+ &txq_meta);
+ if (err)
+ return err;
+ }
+ err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx);
+ if (err)
+ return err;
+
+ ice_qvec_toggle_napi(vsi, q_vector, false);
+ ice_qp_clean_rings(vsi, q_idx);
+ ice_qp_reset_stats(vsi, q_idx);
+
+ return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ struct ice_ring *tx_ring, *rx_ring;
+ struct ice_q_vector *q_vector;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
+ if (!qg_buf)
+ return -ENOMEM;
+
+ qg_buf->num_txqs = 1;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+ if (err)
+ goto free_buf;
+
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(qg_buf, 0, sizeof(*qg_buf));
+ qg_buf->num_txqs = 1;
+ err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+ if (err)
+ goto free_buf;
+ ice_set_ring_xdp(xdp_ring);
+ xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ }
+
+ err = ice_setup_rx_ctx(rx_ring);
+ if (err)
+ goto free_buf;
+
+ ice_qvec_cfg_msix(vsi, q_vector);
+
+ err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx);
+ if (err)
+ goto free_buf;
+
+ clear_bit(__ICE_CFG_BUSY, vsi->state);
+ ice_qvec_toggle_napi(vsi, q_vector, true);
+ ice_qvec_ena_irq(vsi, q_vector);
+
+ netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+ kfree(qg_buf);
+ return err;
+}
+
+/**
+ * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
+ * @vsi: VSI to allocate the UMEM on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+{
+ if (vsi->xsk_umems)
+ return 0;
+
+ vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+ GFP_KERNEL);
+
+ if (!vsi->xsk_umems) {
+ vsi->num_xsk_umems = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_xsk_add_umem - add a UMEM region for XDP sockets
+ * @vsi: VSI to which the UMEM will be added
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ int err;
+
+ err = ice_xsk_alloc_umems(vsi);
+ if (err)
+ return err;
+
+ vsi->xsk_umems[qid] = umem;
+ vsi->num_xsk_umems_used++;
+
+ return 0;
+}
+
+/**
+ * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the UMEM
+ */
+static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+{
+ vsi->xsk_umems[qid] = NULL;
+ vsi->num_xsk_umems_used--;
+
+ if (vsi->num_xsk_umems_used == 0) {
+ kfree(vsi->xsk_umems);
+ vsi->xsk_umems = NULL;
+ vsi->num_xsk_umems = 0;
+ }
+}
+
+/**
+ * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
+ * @vsi: VSI to map the UMEM region
+ * @umem: UMEM to map
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ unsigned int i;
+
+ dev = &pf->pdev->dev;
+ for (i = 0; i < umem->npgs; i++) {
+ dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ ICE_RX_DMA_ATTR);
+ if (dma_mapping_error(dev, dma)) {
+ dev_dbg(dev,
+ "XSK UMEM DMA mapping error on page num %d", i);
+ goto out_unmap;
+ }
+
+ umem->pages[i].dma = dma;
+ }
+
+ return 0;
+
+out_unmap:
+ for (; i > 0; i--) {
+ dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+ umem->pages[i].dma = 0;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
+ * @vsi: VSI from which the UMEM will be unmapped
+ * @umem: UMEM to unmap
+ */
+static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ unsigned int i;
+
+ dev = &pf->pdev->dev;
+ for (i = 0; i < umem->npgs; i++) {
+ dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+
+ umem->pages[i].dma = 0;
+ }
+}
+
+/**
+ * ice_xsk_umem_disable - disable a UMEM region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+{
+ if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
+ !vsi->xsk_umems[qid])
+ return -EINVAL;
+
+ ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+ ice_xsk_remove_umem(vsi, qid);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_umem_enable - enable a UMEM region
+ * @vsi: Current VSI
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ struct xdp_umem_fq_reuse *reuseq;
+ int err;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+ if (qid >= vsi->num_xsk_umems)
+ return -EINVAL;
+
+ if (vsi->xsk_umems && vsi->xsk_umems[qid])
+ return -EBUSY;
+
+ reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+ if (!reuseq)
+ return -ENOMEM;
+
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+ err = ice_xsk_umem_dma_map(vsi, umem);
+ if (err)
+ return err;
+
+ err = ice_xsk_add_umem(vsi, umem, qid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ bool if_running, umem_present = !!umem;
+ int ret = 0, umem_failure = 0;
+
+ if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+ if (if_running) {
+ ret = ice_qp_dis(vsi, qid);
+ if (ret) {
+ netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret);
+ goto xsk_umem_if_up;
+ }
+ }
+
+ umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
+ ice_xsk_umem_disable(vsi, qid);
+
+xsk_umem_if_up:
+ if (if_running) {
+ ret = ice_qp_ena(vsi, qid);
+ if (!ret && umem_present)
+ napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+ else if (ret)
+ netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret);
+ }
+
+ if (umem_failure) {
+ netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d",
+ umem_present ? "en" : "dis", umem_failure);
+ return umem_failure;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
+ * @zca: zero-cpoy allocator
+ * @handle: Buffer handle
+ */
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+ struct ice_rx_buf *rx_buf;
+ struct ice_ring *rx_ring;
+ struct xdp_umem *umem;
+ u64 hr, mask;
+ u16 nta;
+
+ rx_ring = container_of(zca, struct ice_ring, zca);
+ umem = rx_ring->xsk_umem;
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ mask = umem->chunk_mask;
+
+ nta = rx_ring->next_to_alloc;
+ rx_buf = &rx_ring->rx_buf[nta];
+
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ handle &= mask;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += hr;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += hr;
+
+ rx_buf->handle = (u64)handle + umem->headroom;
+}
+
+/**
+ * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the hot path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ void *addr = rx_buf->addr;
+ u64 handle, hr;
+
+ if (addr) {
+ rx_ring->rx_stats.page_reuse_count++;
+ return true;
+ }
+
+ if (!xsk_umem_peek_addr(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += hr;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += hr;
+
+ rx_buf->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr(umem);
+ return true;
+}
+
+/**
+ * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the slow path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ u64 handle, headroom;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ handle &= umem->chunk_mask;
+ headroom = umem->headroom + XDP_PACKET_HEADROOM;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += headroom;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += headroom;
+
+ rx_buf->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr_rq(umem);
+ return true;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ * @alloc: the function pointer to call for allocation
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+static bool
+ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
+ bool alloc(struct ice_ring *, struct ice_rx_buf *))
+{
+ union ice_32b_rx_flex_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ struct ice_rx_buf *rx_buf;
+ bool ret = false;
+
+ if (!count)
+ return false;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ rx_buf = &rx_ring->rx_buf[ntu];
+
+ do {
+ if (!alloc(rx_ring, rx_buf)) {
+ ret = true;
+ break;
+ }
+
+ dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
+ rx_ring->rx_buf_len,
+ DMA_BIDIRECTIONAL);
+
+ rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+ rx_desc->wb.status_error0 = 0;
+
+ rx_desc++;
+ rx_buf++;
+ ntu++;
+
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ rx_buf = rx_ring->rx_buf;
+ ntu = 0;
+ }
+ } while (--count);
+
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
+
+ return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
+{
+ return ice_alloc_rx_bufs_zc(rx_ring, count,
+ ice_alloc_buf_fast_zc);
+}
+
+/**
+ * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
+{
+ return ice_alloc_rx_bufs_zc(rx_ring, count,
+ ice_alloc_buf_slow_zc);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+ int ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_get_rx_buf_zc - Fetch the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: size of a buffer
+ *
+ * This function returns the current, received Rx buffer and does
+ * DMA synchronization.
+ *
+ * Returns a pointer to the received Rx buffer.
+ */
+static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
+{
+ struct ice_rx_buf *rx_buf;
+
+ rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+
+ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
+ size, DMA_BIDIRECTIONAL);
+
+ return rx_buf;
+}
+
+/**
+ * ice_reuse_rx_buf_zc - reuse an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_buf: The buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the recycle
+ * queue (next_to_alloc).
+ */
+static void
+ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+ unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+ u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+ u16 nta = rx_ring->next_to_alloc;
+ struct ice_rx_buf *new_buf;
+
+ new_buf = &rx_ring->rx_buf[nta++];
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ new_buf->dma = old_buf->dma & mask;
+ new_buf->dma += hr;
+
+ new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
+ new_buf->addr += hr;
+
+ new_buf->handle = old_buf->handle & mask;
+ new_buf->handle += rx_ring->xsk_umem->headroom;
+
+ old_buf->addr = NULL;
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ * @xdp: XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ unsigned int datasize = xdp->data_end - xdp->data;
+ unsigned int datasize_hard = xdp->data_end -
+ xdp->data_hard_start;
+ struct sk_buff *skb;
+
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+
+ return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+ int err, result = ICE_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct ice_ring *xdp_ring;
+ u32 act;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ return ICE_XDP_PASS;
+ }
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ xdp->handle += xdp->data - xdp->data_hard_start;
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough -- not supported action */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping frame */
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
+ }
+
+ rcu_read_unlock();
+ return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int xdp_xmit = 0;
+ struct xdp_buff xdp;
+ bool failure = 0;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
+
+ while (likely(total_rx_packets < (unsigned int)budget)) {
+ union ice_32b_rx_flex_desc *rx_desc;
+ unsigned int size, xdp_res = 0;
+ struct ice_rx_buf *rx_buf;
+ struct sk_buff *skb;
+ u16 stat_err_bits;
+ u16 vlan_tag = 0;
+ u8 rx_ptype;
+
+ if (cleaned_count >= ICE_RX_BUF_WRITE) {
+ failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
+ cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+ if (!ice_test_staterr(rx_desc, stat_err_bits))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we have
+ * verified the descriptor has been written back.
+ */
+ dma_rmb();
+
+ size = le16_to_cpu(rx_desc->wb.pkt_len) &
+ ICE_RX_FLX_DESC_PKT_LEN_M;
+ if (!size)
+ break;
+
+ rx_buf = ice_get_rx_buf_zc(rx_ring, size);
+ if (!rx_buf->addr)
+ break;
+
+ xdp.data = rx_buf->addr;
+ xdp.data_meta = xdp.data;
+ xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+ xdp.data_end = xdp.data + size;
+ xdp.handle = rx_buf->handle;
+
+ xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+ if (xdp_res) {
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ xdp_xmit |= xdp_res;
+ rx_buf->addr = NULL;
+ } else {
+ ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+ }
+
+ total_rx_bytes += size;
+ total_rx_packets++;
+ cleaned_count++;
+
+ ice_bump_ntc(rx_ring);
+ continue;
+ }
+
+ /* XDP_PASS path */
+ skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buf_failed++;
+ break;
+ }
+
+ cleaned_count++;
+ ice_bump_ntc(rx_ring);
+
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ total_rx_bytes += skb->len;
+ total_rx_packets++;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+ if (ice_test_staterr(rx_desc, stat_err_bits))
+ vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
+ ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+ ice_receive_skb(rx_ring, skb, vlan_tag);
+ }
+
+ ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+ ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+ return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+ struct ice_tx_desc *tx_desc = NULL;
+ bool work_done = true;
+ struct xdp_desc desc;
+ dma_addr_t dma;
+
+ while (likely(budget-- > 0)) {
+ struct ice_tx_buf *tx_buf;
+
+ if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ work_done = false;
+ break;
+ }
+
+ tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+ if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+ break;
+
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+ dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+ DMA_BIDIRECTIONAL);
+
+ tx_buf->bytecount = desc.len;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
+ 0, desc.len, 0);
+
+ xdp_ring->next_to_use++;
+ if (xdp_ring->next_to_use == xdp_ring->count)
+ xdp_ring->next_to_use = 0;
+ }
+
+ if (tx_desc) {
+ ice_xdp_ring_update_tail(xdp_ring);
+ xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ }
+
+ return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+ xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+ int total_packets = 0, total_bytes = 0;
+ s16 ntc = xdp_ring->next_to_clean;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ bool xmit_done = true;
+ u32 xsk_frames = 0;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+ tx_buf = &xdp_ring->tx_buf[ntc];
+ ntc -= xdp_ring->count;
+
+ do {
+ if (!(tx_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ break;
+
+ total_bytes += tx_buf->bytecount;
+ total_packets++;
+
+ if (tx_buf->raw_buf) {
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ tx_buf->raw_buf = NULL;
+ } else {
+ xsk_frames++;
+ }
+
+ tx_desc->cmd_type_offset_bsz = 0;
+ tx_buf++;
+ tx_desc++;
+ ntc++;
+
+ if (unlikely(!ntc)) {
+ ntc -= xdp_ring->count;
+ tx_buf = xdp_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(xdp_ring, 0);
+ }
+
+ prefetch(tx_desc);
+
+ } while (likely(--budget));
+
+ ntc += xdp_ring->count;
+ xdp_ring->next_to_clean = ntc;
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+
+ ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+ xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+ return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+ u32 __always_unused flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_q_vector *q_vector;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *ring;
+
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ return -ENXIO;
+
+ if (queue_id >= vsi->num_txq)
+ return -ENXIO;
+
+ if (!vsi->xdp_rings[queue_id]->xsk_umem)
+ return -ENXIO;
+
+ ring = vsi->xdp_rings[queue_id];
+
+ /* The idea here is that if NAPI is running, mark a miss, so
+ * it will run again. If not, trigger an interrupt and
+ * schedule the NAPI from interrupt context. If NAPI would be
+ * scheduled here, the interrupt affinity would not be
+ * honored.
+ */
+ q_vector = ring->q_vector;
+ if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+ ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+ int i;
+
+ if (!vsi->xsk_umems)
+ return false;
+
+ for (i = 0; i < vsi->num_xsk_umems; i++) {
+ if (vsi->xsk_umems[i])
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+ u16 i;
+
+ for (i = 0; i < rx_ring->count; i++) {
+ struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+ if (!rx_buf->addr)
+ continue;
+
+ xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
+ rx_buf->addr = NULL;
+ }
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+ u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+ u32 xsk_frames = 0;
+
+ while (ntc != ntu) {
+ struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+ if (tx_buf->raw_buf)
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ else
+ xsk_frames++;
+
+ tx_buf->raw_buf = NULL;
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000000..3479e1de98fe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
+ struct xdp_umem __always_unused *umem,
+ u16 __always_unused qid)
+{
+ return -ENOTSUPP;
+}
+
+static inline void
+ice_zca_free(struct zero_copy_allocator __always_unused *zca,
+ unsigned long __always_unused handle)
+{
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+ int __always_unused budget)
+{
+ return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+ int __always_unused budget)
+{
+ return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
+ u16 __always_unused count)
+{
+ return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+ return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+ u32 __always_unused queue_id, u32 __always_unused flags)
+{
+ return -ENOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */
--
2.20.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP
2019-10-31 14:30 [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Tony Nguyen
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c Tony Nguyen
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP Tony Nguyen
@ 2019-11-04 20:16 ` Bowers, AndrewX
2 siblings, 0 replies; 6+ messages in thread
From: Bowers, AndrewX @ 2019-11-04 20:16 UTC (permalink / raw)
To: intel-wired-lan
> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces at osuosl.org] On
> Behalf Of Tony Nguyen
> Sent: Thursday, October 31, 2019 7:30 AM
> To: intel-wired-lan at lists.osuosl.org
> Subject: [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP
>
> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
>
> Add support for XDP. Implement ndo_bpf and ndo_xdp_xmit. Upon load of
> an XDP program, allocate additional Tx rings for dedicated XDP use.
> The following actions are supported: XDP_TX, XDP_DROP, XDP_REDIRECT,
> XDP_PASS, and XDP_ABORTED.
>
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> ---
> v4:
> - return -EOPNOTSUPP instead of ENOTSUPP for too large MTU which makes
> it impossible to attach XDP prog
> - don't check for case when there's no XDP prog currently on interface
> and ice_xdp() is called with NULL bpf_prog; this happens when user
> does "ip link set eth0 xdp off" and no prog is present on VSI; no need
> for that as it is handled by higher layer
> - drop the extack message for unknown xdp->command
> - use the smp_processor_id() for accessing the XDP Tx ring for XDP_TX
> action
> - don't leave the interface in downed state in case of any failure
> during the XDP Tx resources handling
> - undo rename of ice_build_ctob
> ---
> drivers/net/ethernet/intel/ice/ice.h | 24 +-
> drivers/net/ethernet/intel/ice/ice_base.c | 28 +-
> drivers/net/ethernet/intel/ice/ice_ethtool.c | 50 ++-
> drivers/net/ethernet/intel/ice/ice_lib.c | 68 +++-
> drivers/net/ethernet/intel/ice/ice_lib.h | 6 +
> drivers/net/ethernet/intel/ice/ice_main.c | 326 +++++++++++++++++
> drivers/net/ethernet/intel/ice/ice_txrx.c | 346 ++++++++++++++++---
> drivers/net/ethernet/intel/ice/ice_txrx.h | 34 +-
> 8 files changed, 825 insertions(+), 57 deletions(-)
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c Tony Nguyen
@ 2019-11-04 20:17 ` Bowers, AndrewX
0 siblings, 0 replies; 6+ messages in thread
From: Bowers, AndrewX @ 2019-11-04 20:17 UTC (permalink / raw)
To: intel-wired-lan
> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces at osuosl.org] On
> Behalf Of Tony Nguyen
> Sent: Thursday, October 31, 2019 7:30 AM
> To: intel-wired-lan at lists.osuosl.org
> Subject: [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to
> ice_txrx_lib.c
>
> From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
>
> In preparation of AF XDP, move functions that will be used both by skb and
> zero-copy paths to a new file called ice_txrx_lib.c. This allows us to avoid
> using ifdefs to control the staticness of said functions.
>
> Move other functions (ice_rx_csum, ice_rx_hash and ice_ptype_to_htype)
> called only by the moved ones to the new file as well.
>
> Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> ---
> v4:
> - Change ice_build_ctob() call to build_ctob()
> v2:
> - Move ice_build_ctob() to ice_txrx_lib.h
> ---
> drivers/net/ethernet/intel/ice/Makefile | 1 +
> drivers/net/ethernet/intel/ice/ice_txrx.c | 303 +-----------------
> drivers/net/ethernet/intel/ice/ice_txrx.h | 10 -
> drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 273 ++++++++++++++++
> drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 59 ++++
> 5 files changed, 334 insertions(+), 312 deletions(-) create mode 100644
> drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx_lib.h
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP Tony Nguyen
@ 2019-11-04 23:55 ` Bowers, AndrewX
0 siblings, 0 replies; 6+ messages in thread
From: Bowers, AndrewX @ 2019-11-04 23:55 UTC (permalink / raw)
To: intel-wired-lan
> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces at osuosl.org] On
> Behalf Of Tony Nguyen
> Sent: Thursday, October 31, 2019 7:30 AM
> To: intel-wired-lan at lists.osuosl.org
> Subject: [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP
>
> From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
>
> Add zero copy AF_XDP support. This patch adds zero copy support for Tx and
> Rx; code for zero copy is added to ice_xsk.h and ice_xsk.c.
>
> For Tx, implement ndo_xsk_wakeup. As with other drivers, reuse existing
> XDP Tx queues for this task, since XDP_REDIRECT guarantees mutual
> exclusion between different NAPI contexts based on CPU ID. In turn, a
> netdev can XDP_REDIRECT to another netdev with a different NAPI context,
> since the operation is bound to a specific core and each core has its own
> hardware ring.
>
> For Rx, allocate frames as MEM_TYPE_ZERO_COPY on queues that AF_XDP is
> enabled.
>
> Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
> Co-developed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> ---
> v4:
> - Change ice_build_ctob() call to build_ctob()
> v3:
> - Remove unused prototype ice_xsk_umem_query()
> ---
> drivers/net/ethernet/intel/ice/Makefile | 1 +
> drivers/net/ethernet/intel/ice/ice.h | 26 +
> drivers/net/ethernet/intel/ice/ice_base.c | 53 +-
> drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 +
> drivers/net/ethernet/intel/ice/ice_lib.c | 57 +-
> drivers/net/ethernet/intel/ice/ice_lib.h | 4 +
> drivers/net/ethernet/intel/ice/ice_main.c | 16 +
> drivers/net/ethernet/intel/ice/ice_txrx.c | 46 +-
> drivers/net/ethernet/intel/ice/ice_txrx.h | 20 +-
> drivers/net/ethernet/intel/ice/ice_xsk.c | 1181 ++++++++++++++++++
> drivers/net/ethernet/intel/ice/ice_xsk.h | 72 ++
> 11 files changed, 1456 insertions(+), 27 deletions(-) create mode 100644
> drivers/net/ethernet/intel/ice/ice_xsk.c
> create mode 100644 drivers/net/ethernet/intel/ice/ice_xsk.h
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-11-04 23:55 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-10-31 14:30 [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Tony Nguyen
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 4/9] ice: Move common functions to ice_txrx_lib.c Tony Nguyen
2019-11-04 20:17 ` Bowers, AndrewX
2019-10-31 14:30 ` [Intel-wired-lan] [PATCH S30 v4 5/9] ice: Add support for AF_XDP Tony Nguyen
2019-11-04 23:55 ` Bowers, AndrewX
2019-11-04 20:16 ` [Intel-wired-lan] [PATCH S30 v4 3/9] ice: Add support for XDP Bowers, AndrewX
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.