From: Mohsin Bashir <mohsin.bashr@gmail.com>
To: netdev@vger.kernel.org
Cc: kuba@kernel.org, alexanderduyck@fb.com, andrew+netdev@lunn.ch,
davem@davemloft.net, edumazet@google.com, pabeni@redhat.com,
mohsin.bashr@gmail.com, horms@kernel.org,
vadim.fedorenko@linux.dev, jdamato@fastly.com, sdf@fomichev.me,
aleksander.lobakin@intel.com, ast@kernel.org,
daniel@iogearbox.net, hawk@kernel.org, john.fastabend@gmail.com
Subject: [PATCH net-next 5/9] eth: fbnic: Add XDP pass, drop, abort support
Date: Wed, 23 Jul 2025 07:59:22 -0700 [thread overview]
Message-ID: <20250723145926.4120434-6-mohsin.bashr@gmail.com> (raw)
In-Reply-To: <20250723145926.4120434-1-mohsin.bashr@gmail.com>
Add basic support for attaching an XDP program to the device and support
for PASS/DROP/ABORT actions.
In fbnic, buffers are always mapped as DMA_BIDIRECTIONAL.
Testing:
Hook a simple XDP program that passes all the packets destined for a
specific port
iperf3 -c 192.168.1.10 -P 5 -p 12345
Connecting to host 192.168.1.10, port 12345
[ 5] local 192.168.1.9 port 46702 connected to 192.168.1.10 port 12345
[ ID] Interval Transfer Bitrate Retr Cwnd
- - - - - - - - - - - - - - - - - - - - - - - - -
[SUM] 1.00-2.00 sec 3.86 GBytes 33.2 Gbits/sec 0
XDP_DROP:
Hook an XDP program that drops packets destined for a specific port
iperf3 -c 192.168.1.10 -P 5 -p 12345
^C- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec 0 sender
[SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec receiver
iperf3: interrupt - the client has terminated
XDP with HDS:
- Validate XDP attachment failure when HDS is low
~] ethtool -G eth0 hds-thresh 512
~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
~] Error: fbnic: MTU too high, or HDS threshold is too low for single
buffer XDP.
- Validate successful XDP attachment when HDS threshold is appropriate
~] ethtool -G eth0 hds-thresh 1536
~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
- Validate when the XDP program is attached, changing HDS thresh to a
lower value fails
~] ethtool -G eth0 hds-thresh 512
~] netlink error: fbnic: Use higher HDS threshold or multi-buf capable
program
- Validate HDS thresh does not matter when xdp frags support is
available
~] ethtool -G eth0 hds-thresh 512
~] sudo ip link set eth0 xdpdrv obj xdp_pass_mb_12345.o sec xdp.frags
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Mohsin Bashir <mohsin.bashr@gmail.com>
---
.../net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++
.../net/ethernet/meta/fbnic/fbnic_netdev.c | 35 +++++++
.../net/ethernet/meta/fbnic/fbnic_netdev.h | 5 +
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 95 +++++++++++++++++--
drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 1 +
5 files changed, 140 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 84a0db9f1be0..d7b9eb267ead 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -329,6 +329,17 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
return -EINVAL;
}
+ /* If an XDP program is attached, we should check for potential frame
+ * splitting. If the new HDS threshold can cause splitting, we should
+ * only allow if the attached XDP program can handle frags.
+ */
+ if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
+ kernel_ring->hds_thresh)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Use higher HDS threshold or multi-buf capable program");
+ return -EINVAL;
+ }
+
if (!netif_running(netdev)) {
fbnic_set_rings(fbn, ring, kernel_ring);
return 0;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index d039e1c7a0d5..0621b89cbf3d 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -504,6 +504,40 @@ static void fbnic_get_stats64(struct net_device *dev,
}
}
+bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
+ u32 hds_thresh)
+{
+ if (!prog)
+ return false;
+
+ if (prog->aux->xdp_has_frags)
+ return false;
+
+ return mtu + ETH_HLEN > hds_thresh;
+}
+
+static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct bpf_prog *prog = bpf->prog, *prev_prog;
+ struct fbnic_net *fbn = netdev_priv(netdev);
+
+ if (bpf->command != XDP_SETUP_PROG)
+ return -EINVAL;
+
+ if (fbnic_check_split_frames(prog, netdev->mtu,
+ fbn->hds_thresh)) {
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "MTU too high, or HDS threshold is too low for single buffer XDP");
+ return -EOPNOTSUPP;
+ }
+
+ prev_prog = xchg(&fbn->xdp_prog, prog);
+ if (prev_prog)
+ bpf_prog_put(prev_prog);
+
+ return 0;
+}
+
static const struct net_device_ops fbnic_netdev_ops = {
.ndo_open = fbnic_open,
.ndo_stop = fbnic_stop,
@@ -513,6 +547,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
.ndo_set_mac_address = fbnic_set_mac,
.ndo_set_rx_mode = fbnic_set_rx_mode,
.ndo_get_stats64 = fbnic_get_stats64,
+ .ndo_bpf = fbnic_bpf,
.ndo_hwtstamp_get = fbnic_hwtstamp_get,
.ndo_hwtstamp_set = fbnic_hwtstamp_set,
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index 04c5c7ed6c3a..bfa79ea910d8 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -18,6 +18,8 @@
#define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6
struct fbnic_net {
+ struct bpf_prog *xdp_prog;
+
struct fbnic_ring *tx[FBNIC_MAX_TXQS];
struct fbnic_ring *rx[FBNIC_MAX_RXQS];
@@ -104,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
int fbnic_phylink_get_fecparam(struct net_device *netdev,
struct ethtool_fecparam *fecparam);
int fbnic_phylink_init(struct net_device *netdev);
+
+bool fbnic_check_split_frames(struct bpf_prog *prog,
+ unsigned int mtu, u32 hds_threshold);
#endif /* _FBNIC_NETDEV_H_ */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 71af7b9d5bcd..486c14e83ad5 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -2,17 +2,26 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/iopoll.h>
#include <linux/pci.h>
#include <net/netdev_queues.h>
#include <net/page_pool/helpers.h>
#include <net/tcp.h>
+#include <net/xdp.h>
#include "fbnic.h"
#include "fbnic_csr.h"
#include "fbnic_netdev.h"
#include "fbnic_txrx.h"
+enum {
+ FBNIC_XDP_PASS = 0,
+ FBNIC_XDP_CONSUME,
+ FBNIC_XDP_LEN_ERR,
+};
+
enum {
FBNIC_XMIT_CB_TS = 0x01,
};
@@ -877,7 +886,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
frame_sz = hdr_pg_end - hdr_pg_start;
- xdp_init_buff(&pkt->buff, frame_sz, NULL);
+ xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
FBNIC_BD_FRAG_SIZE;
@@ -966,6 +975,38 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
return skb;
}
+static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
+ struct fbnic_pkt_buff *pkt)
+{
+ struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
+ struct bpf_prog *xdp_prog;
+ int act;
+
+ xdp_prog = READ_ONCE(fbn->xdp_prog);
+ if (!xdp_prog)
+ goto xdp_pass;
+
+ if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
+ return ERR_PTR(-FBNIC_XDP_LEN_ERR);
+
+ act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
+ switch (act) {
+ case XDP_PASS:
+xdp_pass:
+ return fbnic_build_skb(nv, pkt);
+ default:
+ bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(nv->napi.dev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ break;
+ }
+
+ return ERR_PTR(-FBNIC_XDP_CONSUME);
+}
+
static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
{
return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
@@ -1064,7 +1105,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
if (unlikely(pkt->add_frag_failed))
skb = NULL;
else if (likely(!fbnic_rcd_metadata_err(rcd)))
- skb = fbnic_build_skb(nv, pkt);
+ skb = fbnic_run_xdp(nv, pkt);
/* Populate skb and invalidate XDP */
if (!IS_ERR_OR_NULL(skb)) {
@@ -1250,6 +1291,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
}
for (j = 0; j < nv->rxt_count; j++, i++) {
+ xdp_rxq_info_unreg(&nv->qt[i].xdp_rxq);
fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
@@ -1422,6 +1464,11 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
fbn->rx[rxq_idx] = &qt->cmpl;
+ err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, rxq_idx,
+ nv->napi.napi_id);
+ if (err)
+ goto free_ring_cur_qt;
+
/* Update Rx queue index */
rxt_count--;
rxq_idx += v_count;
@@ -1432,6 +1479,25 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
return 0;
+ while (rxt_count < nv->rxt_count) {
+ qt--;
+
+ xdp_rxq_info_unreg(&qt->xdp_rxq);
+free_ring_cur_qt:
+ fbnic_remove_rx_ring(fbn, &qt->sub0);
+ fbnic_remove_rx_ring(fbn, &qt->sub1);
+ fbnic_remove_rx_ring(fbn, &qt->cmpl);
+ rxt_count++;
+ }
+ while (txt_count < nv->txt_count) {
+ qt--;
+
+ fbnic_remove_tx_ring(fbn, &qt->sub0);
+ fbnic_remove_tx_ring(fbn, &qt->cmpl);
+
+ txt_count++;
+ }
+ fbnic_napi_free_irq(fbd, nv);
pp_destroy:
page_pool_destroy(nv->page_pool);
napi_del:
@@ -1708,8 +1774,10 @@ static void fbnic_free_nv_resources(struct fbnic_net *fbn,
for (i = 0; i < nv->txt_count; i++)
fbnic_free_qt_resources(fbn, &nv->qt[i]);
- for (j = 0; j < nv->rxt_count; j++, i++)
+ for (j = 0; j < nv->rxt_count; j++, i++) {
fbnic_free_qt_resources(fbn, &nv->qt[i]);
+ xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
+ }
}
static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
@@ -1721,19 +1789,32 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
for (i = 0; i < nv->txt_count; i++) {
err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
if (err)
- goto free_resources;
+ goto free_qt_resources;
}
/* Allocate Rx Resources */
for (j = 0; j < nv->rxt_count; j++, i++) {
+ /* Register XDP memory model for completion queue */
+ err = xdp_reg_mem_model(&nv->qt[i].xdp_rxq.mem,
+ MEM_TYPE_PAGE_POOL,
+ nv->page_pool);
+ if (err)
+ goto xdp_unreg_mem_model;
+
err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
if (err)
- goto free_resources;
+ goto xdp_unreg_cur_model;
}
return 0;
-free_resources:
+xdp_unreg_mem_model:
+ while (j-- && i--) {
+ fbnic_free_qt_resources(fbn, &nv->qt[i]);
+xdp_unreg_cur_model:
+ xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
+ }
+free_qt_resources:
while (i--)
fbnic_free_qt_resources(fbn, &nv->qt[i]);
return err;
@@ -2025,7 +2106,7 @@ void fbnic_flush(struct fbnic_net *fbn)
memset(qt->cmpl.desc, 0, qt->cmpl.size);
fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
- qt->cmpl.pkt->buff.data_hard_start = NULL;
+ memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
}
}
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index be34962c465e..0fefd1f00196 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -129,6 +129,7 @@ struct fbnic_ring {
struct fbnic_q_triad {
struct fbnic_ring sub0, sub1, cmpl;
+ struct xdp_rxq_info xdp_rxq;
};
struct fbnic_napi_vector {
--
2.47.1
next prev parent reply other threads:[~2025-07-23 15:00 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-23 14:59 [PATCH net-next 0/9] eth: fbnic Add XDP support for fbnic Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 1/9] eth: fbnic: Add support for HDS configuration Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 2/9] eth: fbnic: Update Headroom Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 3/9] eth: fbnic: Use shinfo to track frags state on Rx Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 4/9] eth: fbnic: Prefetch packet headers " Mohsin Bashir
2025-07-23 14:59 ` Mohsin Bashir [this message]
2025-07-23 17:35 ` [PATCH net-next 5/9] eth: fbnic: Add XDP pass, drop, abort support Maciej Fijalkowski
2025-07-24 15:47 ` Mohsin Bashir
2025-07-24 16:51 ` Jakub Kicinski
2025-07-24 21:14 ` Alexander H Duyck
2025-07-25 9:56 ` Maciej Fijalkowski
2025-07-25 15:10 ` Alexander Duyck
2025-08-07 21:24 ` Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 6/9] eth: fbnic: Add support for XDP queues Mohsin Bashir
2025-07-23 23:54 ` Jakub Kicinski
2025-07-23 14:59 ` [PATCH net-next 7/9] eth: fbnic: Add support for XDP_TX action Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 8/9] eth: fbnic: Collect packet statistics for XDP Mohsin Bashir
2025-07-24 10:18 ` Simon Horman
2025-07-24 15:48 ` Mohsin Bashir
2025-07-23 14:59 ` [PATCH net-next 9/9] eth: fbnic: Report XDP stats via ethtool Mohsin Bashir
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250723145926.4120434-6-mohsin.bashr@gmail.com \
--to=mohsin.bashr@gmail.com \
--cc=aleksander.lobakin@intel.com \
--cc=alexanderduyck@fb.com \
--cc=andrew+netdev@lunn.ch \
--cc=ast@kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=horms@kernel.org \
--cc=jdamato@fastly.com \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=vadim.fedorenko@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.