* [PATCH net-next v2 1/6] tsnep: Add adapter down state
2022-12-08 5:40 [PATCH net-next v2 0/6] tsnep: XDP support Gerhard Engleder
@ 2022-12-08 5:40 ` Gerhard Engleder
2022-12-08 5:40 ` [PATCH net-next v2 2/6] tsnep: Add XDP TX support Gerhard Engleder
` (4 subsequent siblings)
5 siblings, 0 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 5:40 UTC (permalink / raw)
To: netdev, bpf
Cc: davem, kuba, edumazet, pabeni, ast, daniel, hawk, john.fastabend,
Gerhard Engleder
Add adapter state with flag for down state. This flag will be used by
the XDP TX path to deny TX if adapter is down.
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/tsnep.h | 1 +
drivers/net/ethernet/engleder/tsnep_main.c | 11 +++++++++++
2 files changed, 12 insertions(+)
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index f93ba48bac3f..f72c0c4da1a9 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -148,6 +148,7 @@ struct tsnep_adapter {
phy_interface_t phy_mode;
struct phy_device *phydev;
int msg_enable;
+ unsigned long state;
struct platform_device *pdev;
struct device *dmadev;
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index bf0190e1d2ea..a28fde9fb060 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -43,6 +43,10 @@
#define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
+enum {
+ __TSNEP_DOWN,
+};
+
static void tsnep_enable_irq(struct tsnep_adapter *adapter, u32 mask)
{
iowrite32(mask, adapter->addr + ECM_INT_ENABLE);
@@ -1143,6 +1147,8 @@ static int tsnep_netdev_open(struct net_device *netdev)
tsnep_enable_irq(adapter, adapter->queue[i].irq_mask);
}
+ clear_bit(__TSNEP_DOWN, &adapter->state);
+
return 0;
phy_failed:
@@ -1165,6 +1171,8 @@ static int tsnep_netdev_close(struct net_device *netdev)
struct tsnep_adapter *adapter = netdev_priv(netdev);
int i;
+ set_bit(__TSNEP_DOWN, &adapter->state);
+
tsnep_disable_irq(adapter, ECM_INT_LINK);
tsnep_phy_close(adapter);
@@ -1518,6 +1526,7 @@ static int tsnep_probe(struct platform_device *pdev)
adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED;
+ set_bit(__TSNEP_DOWN, &adapter->state);
netdev->min_mtu = ETH_MIN_MTU;
netdev->max_mtu = TSNEP_MAX_FRAME_SIZE;
@@ -1614,6 +1623,8 @@ static int tsnep_remove(struct platform_device *pdev)
{
struct tsnep_adapter *adapter = platform_get_drvdata(pdev);
+ set_bit(__TSNEP_DOWN, &adapter->state);
+
unregister_netdev(adapter->netdev);
tsnep_rxnfc_cleanup(adapter);
--
2.30.2
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next v2 2/6] tsnep: Add XDP TX support
2022-12-08 5:40 [PATCH net-next v2 0/6] tsnep: XDP support Gerhard Engleder
2022-12-08 5:40 ` [PATCH net-next v2 1/6] tsnep: Add adapter down state Gerhard Engleder
@ 2022-12-08 5:40 ` Gerhard Engleder
2022-12-08 14:10 ` Maciej Fijalkowski
2022-12-09 2:23 ` Saeed Mahameed
2022-12-08 5:40 ` [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup Gerhard Engleder
` (3 subsequent siblings)
5 siblings, 2 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 5:40 UTC (permalink / raw)
To: netdev, bpf
Cc: davem, kuba, edumazet, pabeni, ast, daniel, hawk, john.fastabend,
Gerhard Engleder
Implement ndo_xdp_xmit() for XDP TX support. Support for fragmented XDP
frames is included.
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/tsnep.h | 12 +-
drivers/net/ethernet/engleder/tsnep_main.c | 204 ++++++++++++++++++++-
2 files changed, 207 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index f72c0c4da1a9..29b04127f529 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -57,6 +57,12 @@ struct tsnep_rxnfc_rule {
int location;
};
+enum tsnep_tx_type {
+ TSNEP_TX_TYPE_SKB,
+ TSNEP_TX_TYPE_XDP_TX,
+ TSNEP_TX_TYPE_XDP_NDO,
+};
+
struct tsnep_tx_entry {
struct tsnep_tx_desc *desc;
struct tsnep_tx_desc_wb *desc_wb;
@@ -65,7 +71,11 @@ struct tsnep_tx_entry {
u32 properties;
- struct sk_buff *skb;
+ enum tsnep_tx_type type;
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
size_t len;
DEFINE_DMA_UNMAP_ADDR(dma);
};
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index a28fde9fb060..b97cfd5fa1fa 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -310,10 +310,11 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
struct tsnep_tx_entry *entry = &tx->entry[index];
entry->properties = 0;
- if (entry->skb) {
+ if (entry->skb || entry->xdpf) {
entry->properties = length & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
- if (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
+ if (entry->type == TSNEP_TX_TYPE_SKB &&
+ skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG;
/* toggle user flag to prevent false acknowledge
@@ -400,6 +401,8 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
entry->desc->tx = __cpu_to_le64(dma);
+ entry->type = TSNEP_TX_TYPE_SKB;
+
map_len += len;
}
@@ -417,12 +420,13 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
entry = &tx->entry[(index + i) % TSNEP_RING_SIZE];
if (entry->len) {
- if (i == 0)
+ if (i == 0 && entry->type == TSNEP_TX_TYPE_SKB)
dma_unmap_single(dmadev,
dma_unmap_addr(entry, dma),
dma_unmap_len(entry, len),
DMA_TO_DEVICE);
- else
+ else if (entry->type == TSNEP_TX_TYPE_SKB ||
+ entry->type == TSNEP_TX_TYPE_XDP_NDO)
dma_unmap_page(dmadev,
dma_unmap_addr(entry, dma),
dma_unmap_len(entry, len),
@@ -505,6 +509,122 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_OK;
}
+static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx,
+ struct skb_shared_info *shinfo, int count,
+ bool dma_map)
+{
+ struct device *dmadev = tx->adapter->dmadev;
+ skb_frag_t *frag;
+ unsigned int len;
+ struct tsnep_tx_entry *entry;
+ void *data;
+ struct page *page;
+ dma_addr_t dma;
+ int map_len = 0;
+ int i;
+
+ frag = NULL;
+ len = xdpf->len;
+ for (i = 0; i < count; i++) {
+ entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE];
+ if (dma_map) {
+ data = unlikely(frag) ? skb_frag_address(frag) :
+ xdpf->data;
+ dma = dma_map_single(dmadev, data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dmadev, dma))
+ return -ENOMEM;
+
+ entry->type = TSNEP_TX_TYPE_XDP_NDO;
+ } else {
+ page = unlikely(frag) ? skb_frag_page(frag) :
+ virt_to_page(xdpf->data);
+ dma = page_pool_get_dma_addr(page);
+ if (unlikely(frag))
+ dma += skb_frag_off(frag);
+ else
+ dma += sizeof(*xdpf) + xdpf->headroom;
+ dma_sync_single_for_device(dmadev, dma, len,
+ DMA_BIDIRECTIONAL);
+
+ entry->type = TSNEP_TX_TYPE_XDP_TX;
+ }
+
+ entry->len = len;
+ dma_unmap_addr_set(entry, dma, dma);
+
+ entry->desc->tx = __cpu_to_le64(dma);
+
+ map_len += len;
+
+ if ((i + 1) < count) {
+ frag = &shinfo->frags[i];
+ len = skb_frag_size(frag);
+ }
+ }
+
+ return map_len;
+}
+
+/* This function requires __netif_tx_lock is held by the caller. */
+static int tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf,
+ struct tsnep_tx *tx, bool dma_map)
+{
+ struct skb_shared_info *shinfo = xdp_get_shared_info_from_frame(xdpf);
+ unsigned long flags;
+ int count = 1;
+ struct tsnep_tx_entry *entry;
+ int length;
+ int i;
+ int retval;
+
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ count += shinfo->nr_frags;
+
+ spin_lock_irqsave(&tx->lock, flags);
+
+ if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1 + count)) {
+ /* prevent full TX ring due to XDP */
+ spin_unlock_irqrestore(&tx->lock, flags);
+
+ return -EBUSY;
+ }
+
+ entry = &tx->entry[tx->write];
+ entry->xdpf = xdpf;
+
+ retval = tsnep_xdp_tx_map(xdpf, tx, shinfo, count, dma_map);
+ if (retval < 0) {
+ tsnep_tx_unmap(tx, tx->write, count);
+ entry->xdpf = NULL;
+
+ tx->dropped++;
+
+ spin_unlock_irqrestore(&tx->lock, flags);
+
+ netdev_err(tx->adapter->netdev, "XDP TX DMA map failed\n");
+
+ return retval;
+ }
+ length = retval;
+
+ for (i = 0; i < count; i++)
+ tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length,
+ i == (count - 1));
+ tx->write = (tx->write + count) % TSNEP_RING_SIZE;
+
+ /* descriptor properties shall be valid before hardware is notified */
+ dma_wmb();
+
+ spin_unlock_irqrestore(&tx->lock, flags);
+
+ return 0;
+}
+
+static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
+{
+ iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
+}
+
static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
{
unsigned long flags;
@@ -512,6 +632,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
struct tsnep_tx_entry *entry;
int count;
int length;
+ struct xdp_frame_bulk bq;
+
+ xdp_frame_bulk_init(&bq);
+
+ rcu_read_lock(); /* need for xdp_return_frame_bulk */
spin_lock_irqsave(&tx->lock, flags);
@@ -531,12 +656,17 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
dma_rmb();
count = 1;
- if (skb_shinfo(entry->skb)->nr_frags > 0)
+ if (entry->type == TSNEP_TX_TYPE_SKB &&
+ skb_shinfo(entry->skb)->nr_frags > 0)
count += skb_shinfo(entry->skb)->nr_frags;
+ else if (entry->type != TSNEP_TX_TYPE_SKB &&
+ xdp_frame_has_frags(entry->xdpf))
+ count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
length = tsnep_tx_unmap(tx, tx->read, count);
- if ((skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) &&
+ if (entry->type == TSNEP_TX_TYPE_SKB &&
+ (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) &&
(__le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) {
struct skb_shared_hwtstamps hwtstamps;
@@ -556,8 +686,20 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
skb_tstamp_tx(entry->skb, &hwtstamps);
}
- napi_consume_skb(entry->skb, budget);
- entry->skb = NULL;
+ switch (entry->type) {
+ case TSNEP_TX_TYPE_SKB:
+ napi_consume_skb(entry->skb, budget);
+ entry->skb = NULL;
+ break;
+ case TSNEP_TX_TYPE_XDP_TX:
+ xdp_return_frame_rx_napi(entry->xdpf);
+ entry->xdpf = NULL;
+ break;
+ case TSNEP_TX_TYPE_XDP_NDO:
+ xdp_return_frame_bulk(entry->xdpf, &bq);
+ entry->xdpf = NULL;
+ break;
+ }
tx->read = (tx->read + count) % TSNEP_RING_SIZE;
@@ -574,6 +716,10 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
spin_unlock_irqrestore(&tx->lock, flags);
+ xdp_flush_frame_bulk(&bq);
+
+ rcu_read_unlock();
+
return (budget != 0);
}
@@ -1335,6 +1481,47 @@ static ktime_t tsnep_netdev_get_tstamp(struct net_device *netdev,
return ns_to_ktime(timestamp);
}
+static int tsnep_netdev_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **xdp, u32 flags)
+{
+ struct tsnep_adapter *adapter = netdev_priv(dev);
+ int cpu = smp_processor_id();
+ int queue;
+ struct netdev_queue *nq;
+ int nxmit = 0;
+ int i;
+ int retval;
+
+ if (unlikely(test_bit(__TSNEP_DOWN, &adapter->state)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ queue = cpu % adapter->num_tx_queues;
+ nq = netdev_get_tx_queue(adapter->netdev, queue);
+
+ __netif_tx_lock(nq, cpu);
+
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+
+ for (i = 0; i < n; i++) {
+ retval = tsnep_xdp_xmit_frame_ring(xdp[i], &adapter->tx[queue], true);
+ if (retval)
+ break;
+
+ nxmit++;
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ tsnep_xdp_xmit_flush(&adapter->tx[queue]);
+
+ __netif_tx_unlock(nq);
+
+ return nxmit;
+}
+
static const struct net_device_ops tsnep_netdev_ops = {
.ndo_open = tsnep_netdev_open,
.ndo_stop = tsnep_netdev_close,
@@ -1346,6 +1533,7 @@ static const struct net_device_ops tsnep_netdev_ops = {
.ndo_set_features = tsnep_netdev_set_features,
.ndo_get_tstamp = tsnep_netdev_get_tstamp,
.ndo_setup_tc = tsnep_tc_setup,
+ .ndo_xdp_xmit = tsnep_netdev_xdp_xmit,
};
static int tsnep_mac_init(struct tsnep_adapter *adapter)
--
2.30.2
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 2/6] tsnep: Add XDP TX support
2022-12-08 5:40 ` [PATCH net-next v2 2/6] tsnep: Add XDP TX support Gerhard Engleder
@ 2022-12-08 14:10 ` Maciej Fijalkowski
2022-12-08 19:57 ` Gerhard Engleder
2022-12-09 2:23 ` Saeed Mahameed
1 sibling, 1 reply; 21+ messages in thread
From: Maciej Fijalkowski @ 2022-12-08 14:10 UTC (permalink / raw)
To: Gerhard Engleder
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On Thu, Dec 08, 2022 at 06:40:41AM +0100, Gerhard Engleder wrote:
> Implement ndo_xdp_xmit() for XDP TX support. Support for fragmented XDP
> frames is included.
>
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> ---
> drivers/net/ethernet/engleder/tsnep.h | 12 +-
> drivers/net/ethernet/engleder/tsnep_main.c | 204 ++++++++++++++++++++-
> 2 files changed, 207 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
> index f72c0c4da1a9..29b04127f529 100644
> --- a/drivers/net/ethernet/engleder/tsnep.h
> +++ b/drivers/net/ethernet/engleder/tsnep.h
> @@ -57,6 +57,12 @@ struct tsnep_rxnfc_rule {
> int location;
> };
>
> +enum tsnep_tx_type {
> + TSNEP_TX_TYPE_SKB,
> + TSNEP_TX_TYPE_XDP_TX,
> + TSNEP_TX_TYPE_XDP_NDO,
> +};
> +
> struct tsnep_tx_entry {
> struct tsnep_tx_desc *desc;
> struct tsnep_tx_desc_wb *desc_wb;
> @@ -65,7 +71,11 @@ struct tsnep_tx_entry {
>
> u32 properties;
>
> - struct sk_buff *skb;
> + enum tsnep_tx_type type;
> + union {
> + struct sk_buff *skb;
> + struct xdp_frame *xdpf;
> + };
> size_t len;
> DEFINE_DMA_UNMAP_ADDR(dma);
> };
> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> index a28fde9fb060..b97cfd5fa1fa 100644
> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> @@ -310,10 +310,11 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
> struct tsnep_tx_entry *entry = &tx->entry[index];
>
> entry->properties = 0;
> - if (entry->skb) {
> + if (entry->skb || entry->xdpf) {
i think this change is redundant, you could keep a single check as skb and
xdpf ptrs share the same memory, but i guess this makes it more obvious
> entry->properties = length & TSNEP_DESC_LENGTH_MASK;
> entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
> - if (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
> + if (entry->type == TSNEP_TX_TYPE_SKB &&
> + skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
> entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG;
>
> /* toggle user flag to prevent false acknowledge
> @@ -400,6 +401,8 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
>
> entry->desc->tx = __cpu_to_le64(dma);
>
> + entry->type = TSNEP_TX_TYPE_SKB;
> +
> map_len += len;
> }
>
> @@ -417,12 +420,13 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
> entry = &tx->entry[(index + i) % TSNEP_RING_SIZE];
>
> if (entry->len) {
> - if (i == 0)
> + if (i == 0 && entry->type == TSNEP_TX_TYPE_SKB)
> dma_unmap_single(dmadev,
> dma_unmap_addr(entry, dma),
> dma_unmap_len(entry, len),
> DMA_TO_DEVICE);
> - else
> + else if (entry->type == TSNEP_TX_TYPE_SKB ||
> + entry->type == TSNEP_TX_TYPE_XDP_NDO)
> dma_unmap_page(dmadev,
> dma_unmap_addr(entry, dma),
> dma_unmap_len(entry, len),
> @@ -505,6 +509,122 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
> return NETDEV_TX_OK;
> }
>
> +static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx,
> + struct skb_shared_info *shinfo, int count,
> + bool dma_map)
> +{
> + struct device *dmadev = tx->adapter->dmadev;
> + skb_frag_t *frag;
> + unsigned int len;
> + struct tsnep_tx_entry *entry;
> + void *data;
> + struct page *page;
> + dma_addr_t dma;
> + int map_len = 0;
> + int i;
> +
> + frag = NULL;
> + len = xdpf->len;
> + for (i = 0; i < count; i++) {
> + entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE];
> + if (dma_map) {
> + data = unlikely(frag) ? skb_frag_address(frag) :
> + xdpf->data;
> + dma = dma_map_single(dmadev, data, len, DMA_TO_DEVICE);
> + if (dma_mapping_error(dmadev, dma))
> + return -ENOMEM;
> +
> + entry->type = TSNEP_TX_TYPE_XDP_NDO;
> + } else {
> + page = unlikely(frag) ? skb_frag_page(frag) :
> + virt_to_page(xdpf->data);
> + dma = page_pool_get_dma_addr(page);
> + if (unlikely(frag))
> + dma += skb_frag_off(frag);
> + else
> + dma += sizeof(*xdpf) + xdpf->headroom;
> + dma_sync_single_for_device(dmadev, dma, len,
> + DMA_BIDIRECTIONAL);
> +
> + entry->type = TSNEP_TX_TYPE_XDP_TX;
> + }
> +
> + entry->len = len;
> + dma_unmap_addr_set(entry, dma, dma);
> +
> + entry->desc->tx = __cpu_to_le64(dma);
> +
> + map_len += len;
> +
> + if ((i + 1) < count) {
> + frag = &shinfo->frags[i];
> + len = skb_frag_size(frag);
> + }
> + }
> +
> + return map_len;
> +}
> +
> +/* This function requires __netif_tx_lock is held by the caller. */
> +static int tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf,
> + struct tsnep_tx *tx, bool dma_map)
> +{
> + struct skb_shared_info *shinfo = xdp_get_shared_info_from_frame(xdpf);
> + unsigned long flags;
> + int count = 1;
> + struct tsnep_tx_entry *entry;
> + int length;
> + int i;
> + int retval;
> +
> + if (unlikely(xdp_frame_has_frags(xdpf)))
> + count += shinfo->nr_frags;
> +
> + spin_lock_irqsave(&tx->lock, flags);
> +
> + if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1 + count)) {
Wouldn't count + 1 be sufficient to check against the descs available?
if there are frags then you have already accounted them under count
variable so i feel like MAX_SKB_FRAGS is redundant.
> + /* prevent full TX ring due to XDP */
> + spin_unlock_irqrestore(&tx->lock, flags);
> +
> + return -EBUSY;
> + }
> +
> + entry = &tx->entry[tx->write];
> + entry->xdpf = xdpf;
> +
> + retval = tsnep_xdp_tx_map(xdpf, tx, shinfo, count, dma_map);
> + if (retval < 0) {
> + tsnep_tx_unmap(tx, tx->write, count);
> + entry->xdpf = NULL;
> +
> + tx->dropped++;
> +
> + spin_unlock_irqrestore(&tx->lock, flags);
> +
> + netdev_err(tx->adapter->netdev, "XDP TX DMA map failed\n");
> +
> + return retval;
> + }
> + length = retval;
> +
> + for (i = 0; i < count; i++)
> + tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length,
> + i == (count - 1));
> + tx->write = (tx->write + count) % TSNEP_RING_SIZE;
> +
> + /* descriptor properties shall be valid before hardware is notified */
> + dma_wmb();
> +
> + spin_unlock_irqrestore(&tx->lock, flags);
> +
> + return 0;
> +}
> +
(...)
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 2/6] tsnep: Add XDP TX support
2022-12-08 14:10 ` Maciej Fijalkowski
@ 2022-12-08 19:57 ` Gerhard Engleder
0 siblings, 0 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 19:57 UTC (permalink / raw)
To: Maciej Fijalkowski
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 08.12.22 15:10, Maciej Fijalkowski wrote:
>> @@ -65,7 +71,11 @@ struct tsnep_tx_entry {
>>
>> u32 properties;
>>
>> - struct sk_buff *skb;
>> + enum tsnep_tx_type type;
>> + union {
>> + struct sk_buff *skb;
>> + struct xdp_frame *xdpf;
>> + };
>> size_t len;
>> DEFINE_DMA_UNMAP_ADDR(dma);
>> };
>> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
>> index a28fde9fb060..b97cfd5fa1fa 100644
>> --- a/drivers/net/ethernet/engleder/tsnep_main.c
>> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
>> @@ -310,10 +310,11 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
>> struct tsnep_tx_entry *entry = &tx->entry[index];
>>
>> entry->properties = 0;
>> - if (entry->skb) {
>> + if (entry->skb || entry->xdpf) {
>
> i think this change is redundant, you could keep a single check as skb and
> xdpf ptrs share the same memory, but i guess this makes it more obvious
Yes it is actually redundant. I thought it is not a good idea to rely on
the union in the code.
>> +/* This function requires __netif_tx_lock is held by the caller. */
>> +static int tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf,
>> + struct tsnep_tx *tx, bool dma_map)
>> +{
>> + struct skb_shared_info *shinfo = xdp_get_shared_info_from_frame(xdpf);
>> + unsigned long flags;
>> + int count = 1;
>> + struct tsnep_tx_entry *entry;
>> + int length;
>> + int i;
>> + int retval;
>> +
>> + if (unlikely(xdp_frame_has_frags(xdpf)))
>> + count += shinfo->nr_frags;
>> +
>> + spin_lock_irqsave(&tx->lock, flags);
>> +
>> + if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1 + count)) {
>
> Wouldn't count + 1 be sufficient to check against the descs available?
> if there are frags then you have already accounted them under count
> variable so i feel like MAX_SKB_FRAGS is redundant.
In the standard TX path tsnep_xmit_frame_ring() would stop the queue if
less than MAX_SKB_FRAGS + 1 descriptors are available. I wanted to keep
that stop queue logic in tsnep_xmit_frame_ring() by ensuring that XDP
never exceeds this limit (similar to STMMAC_TX_THRESH of stmmac).
So this line checks if enough descriptors are available and that the
queue would not have been stopped by tsnep_xmit_frame_ring().
I could improve the comment below.
>> + /* prevent full TX ring due to XDP */
>> + spin_unlock_irqrestore(&tx->lock, flags);
>> +
>> + return -EBUSY;
>> + }
Gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH net-next v2 2/6] tsnep: Add XDP TX support
2022-12-08 5:40 ` [PATCH net-next v2 2/6] tsnep: Add XDP TX support Gerhard Engleder
2022-12-08 14:10 ` Maciej Fijalkowski
@ 2022-12-09 2:23 ` Saeed Mahameed
2022-12-09 8:02 ` Gerhard Engleder
1 sibling, 1 reply; 21+ messages in thread
From: Saeed Mahameed @ 2022-12-09 2:23 UTC (permalink / raw)
To: Gerhard Engleder
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 08 Dec 06:40, Gerhard Engleder wrote:
>Implement ndo_xdp_xmit() for XDP TX support. Support for fragmented XDP
>frames is included.
>
>Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
>---
> drivers/net/ethernet/engleder/tsnep.h | 12 +-
> drivers/net/ethernet/engleder/tsnep_main.c | 204 ++++++++++++++++++++-
> 2 files changed, 207 insertions(+), 9 deletions(-)
>
>diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
>index f72c0c4da1a9..29b04127f529 100644
>--- a/drivers/net/ethernet/engleder/tsnep.h
>+++ b/drivers/net/ethernet/engleder/tsnep.h
>@@ -57,6 +57,12 @@ struct tsnep_rxnfc_rule {
> int location;
> };
>
>+enum tsnep_tx_type {
>+ TSNEP_TX_TYPE_SKB,
>+ TSNEP_TX_TYPE_XDP_TX,
>+ TSNEP_TX_TYPE_XDP_NDO,
>+};
>+
> struct tsnep_tx_entry {
> struct tsnep_tx_desc *desc;
> struct tsnep_tx_desc_wb *desc_wb;
>@@ -65,7 +71,11 @@ struct tsnep_tx_entry {
>
> u32 properties;
>
>- struct sk_buff *skb;
>+ enum tsnep_tx_type type;
>+ union {
>+ struct sk_buff *skb;
>+ struct xdp_frame *xdpf;
>+ };
> size_t len;
> DEFINE_DMA_UNMAP_ADDR(dma);
> };
>diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
>index a28fde9fb060..b97cfd5fa1fa 100644
>--- a/drivers/net/ethernet/engleder/tsnep_main.c
>+++ b/drivers/net/ethernet/engleder/tsnep_main.c
>@@ -310,10 +310,11 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
> struct tsnep_tx_entry *entry = &tx->entry[index];
>
> entry->properties = 0;
>- if (entry->skb) {
>+ if (entry->skb || entry->xdpf) {
> entry->properties = length & TSNEP_DESC_LENGTH_MASK;
> entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
>- if (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
>+ if (entry->type == TSNEP_TX_TYPE_SKB &&
>+ skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
> entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG;
>
> /* toggle user flag to prevent false acknowledge
>@@ -400,6 +401,8 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
>
> entry->desc->tx = __cpu_to_le64(dma);
>
>+ entry->type = TSNEP_TX_TYPE_SKB;
>+
> map_len += len;
> }
>
>@@ -417,12 +420,13 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
> entry = &tx->entry[(index + i) % TSNEP_RING_SIZE];
>
> if (entry->len) {
>- if (i == 0)
>+ if (i == 0 && entry->type == TSNEP_TX_TYPE_SKB)
> dma_unmap_single(dmadev,
> dma_unmap_addr(entry, dma),
> dma_unmap_len(entry, len),
> DMA_TO_DEVICE);
>- else
>+ else if (entry->type == TSNEP_TX_TYPE_SKB ||
>+ entry->type == TSNEP_TX_TYPE_XDP_NDO)
> dma_unmap_page(dmadev,
> dma_unmap_addr(entry, dma),
> dma_unmap_len(entry, len),
>@@ -505,6 +509,122 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
> return NETDEV_TX_OK;
> }
>
>+static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx,
>+ struct skb_shared_info *shinfo, int count,
>+ bool dma_map)
>+{
>+ struct device *dmadev = tx->adapter->dmadev;
>+ skb_frag_t *frag;
>+ unsigned int len;
>+ struct tsnep_tx_entry *entry;
>+ void *data;
>+ struct page *page;
>+ dma_addr_t dma;
>+ int map_len = 0;
>+ int i;
>+
>+ frag = NULL;
>+ len = xdpf->len;
>+ for (i = 0; i < count; i++) {
>+ entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE];
>+ if (dma_map) {
wouldn't it have made more sense if you passed TSNEP_TX_TYPE instead of
bool dma_map ?
here and in tsnep_xdp_xmit_frame_ring as well..
>+ data = unlikely(frag) ? skb_frag_address(frag) :
>+ xdpf->data;
>+ dma = dma_map_single(dmadev, data, len, DMA_TO_DEVICE);
>+ if (dma_mapping_error(dmadev, dma))
>+ return -ENOMEM;
>+
>+ entry->type = TSNEP_TX_TYPE_XDP_NDO;
>+ } else {
>+ page = unlikely(frag) ? skb_frag_page(frag) :
>+ virt_to_page(xdpf->data);
>+ dma = page_pool_get_dma_addr(page);
>+ if (unlikely(frag))
>+ dma += skb_frag_off(frag);
>+ else
>+ dma += sizeof(*xdpf) + xdpf->headroom;
>+ dma_sync_single_for_device(dmadev, dma, len,
>+ DMA_BIDIRECTIONAL);
>+
>+ entry->type = TSNEP_TX_TYPE_XDP_TX;
>+ }
>+
>+ entry->len = len;
>+ dma_unmap_addr_set(entry, dma, dma);
>+
>+ entry->desc->tx = __cpu_to_le64(dma);
>+
>+ map_len += len;
>+
>+ if ((i + 1) < count) {
>+ frag = &shinfo->frags[i];
>+ len = skb_frag_size(frag);
>+ }
>+ }
>+
>+ return map_len;
>+}
>+
>+/* This function requires __netif_tx_lock is held by the caller. */
>+static int tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf,
>+ struct tsnep_tx *tx, bool dma_map)
>+{
>+ struct skb_shared_info *shinfo = xdp_get_shared_info_from_frame(xdpf);
>+ unsigned long flags;
>+ int count = 1;
>+ struct tsnep_tx_entry *entry;
>+ int length;
>+ int i;
>+ int retval;
Maciiej already commented on this, and i agree with him, the whole series
needs some work on rev xmas tree variable declaration, code will look much
neater.
>+
>+ if (unlikely(xdp_frame_has_frags(xdpf)))
>+ count += shinfo->nr_frags;
>+
>+ spin_lock_irqsave(&tx->lock, flags);
>+
>+ if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1 + count)) {
>+ /* prevent full TX ring due to XDP */
>+ spin_unlock_irqrestore(&tx->lock, flags);
>+
>+ return -EBUSY;
You don't really do anything with the retval, so just return a boolean.
>+ }
>+
>+ entry = &tx->entry[tx->write];
>+ entry->xdpf = xdpf;
>+
>+ retval = tsnep_xdp_tx_map(xdpf, tx, shinfo, count, dma_map);
>+ if (retval < 0) {
>+ tsnep_tx_unmap(tx, tx->write, count);
>+ entry->xdpf = NULL;
>+
>+ tx->dropped++;
>+
>+ spin_unlock_irqrestore(&tx->lock, flags);
>+
>+ netdev_err(tx->adapter->netdev, "XDP TX DMA map failed\n");
please avoid printing in data path, find other means to expose such info.
stats
tracepoints
debug_message rate limited, etc ..
>+
>+ return retval;
>+ }
>+ length = retval;
>+
>+ for (i = 0; i < count; i++)
>+ tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length,
>+ i == (count - 1));
>+ tx->write = (tx->write + count) % TSNEP_RING_SIZE;
>+
>+ /* descriptor properties shall be valid before hardware is notified */
>+ dma_wmb();
>+
>+ spin_unlock_irqrestore(&tx->lock, flags);
>+
>+ return 0;
>+}
>+
>+static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
>+{
>+ iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
>+}
>+
> static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
> {
> unsigned long flags;
>@@ -512,6 +632,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
> struct tsnep_tx_entry *entry;
> int count;
> int length;
>+ struct xdp_frame_bulk bq;
>+
>+ xdp_frame_bulk_init(&bq);
>+
>+ rcu_read_lock(); /* need for xdp_return_frame_bulk */
>
> spin_lock_irqsave(&tx->lock, flags);
>
>@@ -531,12 +656,17 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
> dma_rmb();
>
> count = 1;
>- if (skb_shinfo(entry->skb)->nr_frags > 0)
>+ if (entry->type == TSNEP_TX_TYPE_SKB &&
>+ skb_shinfo(entry->skb)->nr_frags > 0)
> count += skb_shinfo(entry->skb)->nr_frags;
>+ else if (entry->type != TSNEP_TX_TYPE_SKB &&
>+ xdp_frame_has_frags(entry->xdpf))
>+ count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
>
> length = tsnep_tx_unmap(tx, tx->read, count);
>
>- if ((skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) &&
>+ if (entry->type == TSNEP_TX_TYPE_SKB &&
>+ (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) &&
> (__le32_to_cpu(entry->desc_wb->properties) &
> TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) {
> struct skb_shared_hwtstamps hwtstamps;
>@@ -556,8 +686,20 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
> skb_tstamp_tx(entry->skb, &hwtstamps);
> }
>
>- napi_consume_skb(entry->skb, budget);
>- entry->skb = NULL;
>+ switch (entry->type) {
>+ case TSNEP_TX_TYPE_SKB:
>+ napi_consume_skb(entry->skb, budget);
>+ entry->skb = NULL;
>+ break;
>+ case TSNEP_TX_TYPE_XDP_TX:
>+ xdp_return_frame_rx_napi(entry->xdpf);
>+ entry->xdpf = NULL;
>+ break;
>+ case TSNEP_TX_TYPE_XDP_NDO:
>+ xdp_return_frame_bulk(entry->xdpf, &bq);
>+ entry->xdpf = NULL;
>+ break;
>+ }
>
> tx->read = (tx->read + count) % TSNEP_RING_SIZE;
>
>@@ -574,6 +716,10 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>
> spin_unlock_irqrestore(&tx->lock, flags);
>
>+ xdp_flush_frame_bulk(&bq);
>+
>+ rcu_read_unlock();
>+
> return (budget != 0);
> }
>
>@@ -1335,6 +1481,47 @@ static ktime_t tsnep_netdev_get_tstamp(struct net_device *netdev,
> return ns_to_ktime(timestamp);
> }
>
>+static int tsnep_netdev_xdp_xmit(struct net_device *dev, int n,
>+ struct xdp_frame **xdp, u32 flags)
>+{
>+ struct tsnep_adapter *adapter = netdev_priv(dev);
>+ int cpu = smp_processor_id();
>+ int queue;
>+ struct netdev_queue *nq;
>+ int nxmit = 0;
>+ int i;
>+ int retval;
>+
>+ if (unlikely(test_bit(__TSNEP_DOWN, &adapter->state)))
>+ return -ENETDOWN;
>+
>+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>+ return -EINVAL;
>+
>+ queue = cpu % adapter->num_tx_queues;
>+ nq = netdev_get_tx_queue(adapter->netdev, queue);
>+
>+ __netif_tx_lock(nq, cpu);
>+
>+ /* Avoid transmit queue timeout since we share it with the slow path */
>+ txq_trans_cond_update(nq);
>+
>+ for (i = 0; i < n; i++) {
>+ retval = tsnep_xdp_xmit_frame_ring(xdp[i], &adapter->tx[queue], true);
>+ if (retval)
>+ break;
>+
>+ nxmit++;
you could just use the loop iterator and drop nxmit.
>+ }
>+
>+ if (flags & XDP_XMIT_FLUSH)
>+ tsnep_xdp_xmit_flush(&adapter->tx[queue]);
>+
>+ __netif_tx_unlock(nq);
>+
>+ return nxmit;
>
[ ... ]
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 2/6] tsnep: Add XDP TX support
2022-12-09 2:23 ` Saeed Mahameed
@ 2022-12-09 8:02 ` Gerhard Engleder
0 siblings, 0 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-09 8:02 UTC (permalink / raw)
To: Saeed Mahameed
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 09.12.22 03:23, Saeed Mahameed wrote:
>> +static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx,
>> + struct skb_shared_info *shinfo, int count,
>> + bool dma_map)
>> +{
>> + struct device *dmadev = tx->adapter->dmadev;
>> + skb_frag_t *frag;
>> + unsigned int len;
>> + struct tsnep_tx_entry *entry;
>> + void *data;
>> + struct page *page;
>> + dma_addr_t dma;
>> + int map_len = 0;
>> + int i;
>> +
>> + frag = NULL;
>> + len = xdpf->len;
>> + for (i = 0; i < count; i++) {
>> + entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE];
>> + if (dma_map) {
>
> wouldn't it have made more sense if you passed TSNEP_TX_TYPE instead of
> bool dma_map ?
> here and in tsnep_xdp_xmit_frame_ring as well..
I will give it a try.
>> +/* This function requires __netif_tx_lock is held by the caller. */
>> +static int tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf,
>> + struct tsnep_tx *tx, bool dma_map)
>> +{
>> + struct skb_shared_info *shinfo =
>> xdp_get_shared_info_from_frame(xdpf);
>> + unsigned long flags;
>> + int count = 1;
>> + struct tsnep_tx_entry *entry;
>> + int length;
>> + int i;
>> + int retval;
>
> Maciiej already commented on this, and i agree with him, the whole series
> needs some work on rev xmas tree variable declaration, code will look much
> neater.
So far I ordered the variable declaration by variable usage with common
variables like i and retval at the end. I will take a look an that.
>> +
>> + if (unlikely(xdp_frame_has_frags(xdpf)))
>> + count += shinfo->nr_frags;
>> +
>> + spin_lock_irqsave(&tx->lock, flags);
>> +
>> + if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1 + count)) {
>> + /* prevent full TX ring due to XDP */
>> + spin_unlock_irqrestore(&tx->lock, flags);
>> +
>> + return -EBUSY;
>
> You don't really do anything with the retval, so just return a boolean.
Will be changed.
>> + }
>> +
>> + entry = &tx->entry[tx->write];
>> + entry->xdpf = xdpf;
>> +
>> + retval = tsnep_xdp_tx_map(xdpf, tx, shinfo, count, dma_map);
>> + if (retval < 0) {
>> + tsnep_tx_unmap(tx, tx->write, count);
>> + entry->xdpf = NULL;
>> +
>> + tx->dropped++;
>> +
>> + spin_unlock_irqrestore(&tx->lock, flags);
>> +
>> + netdev_err(tx->adapter->netdev, "XDP TX DMA map failed\n");
>
> please avoid printing in data path, find other means to expose such info.
> stats
> tracepoints
> debug_message rate limited, etc ..
I will improve that.
Gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup
2022-12-08 5:40 [PATCH net-next v2 0/6] tsnep: XDP support Gerhard Engleder
2022-12-08 5:40 ` [PATCH net-next v2 1/6] tsnep: Add adapter down state Gerhard Engleder
2022-12-08 5:40 ` [PATCH net-next v2 2/6] tsnep: Add XDP TX support Gerhard Engleder
@ 2022-12-08 5:40 ` Gerhard Engleder
2022-12-09 0:43 ` Saeed Mahameed
2022-12-08 5:40 ` [PATCH net-next v2 4/6] tsnep: Prepare RX buffer for XDP support Gerhard Engleder
` (2 subsequent siblings)
5 siblings, 1 reply; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 5:40 UTC (permalink / raw)
To: netdev, bpf
Cc: davem, kuba, edumazet, pabeni, ast, daniel, hawk, john.fastabend,
Gerhard Engleder
Implement setup of BPF programs for XDP RX path with command
XDP_SETUP_PROG of ndo_bpf(). This is prework for XDP RX path support.
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/Makefile | 2 +-
drivers/net/ethernet/engleder/tsnep.h | 13 +++++++++++
drivers/net/ethernet/engleder/tsnep_main.c | 17 ++++++++++++--
drivers/net/ethernet/engleder/tsnep_xdp.c | 27 ++++++++++++++++++++++
4 files changed, 56 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/ethernet/engleder/tsnep_xdp.c
diff --git a/drivers/net/ethernet/engleder/Makefile b/drivers/net/ethernet/engleder/Makefile
index b6e3b16623de..0901801cfcc9 100644
--- a/drivers/net/ethernet/engleder/Makefile
+++ b/drivers/net/ethernet/engleder/Makefile
@@ -6,5 +6,5 @@
obj-$(CONFIG_TSNEP) += tsnep.o
tsnep-objs := tsnep_main.o tsnep_ethtool.o tsnep_ptp.o tsnep_tc.o \
- tsnep_rxnfc.o $(tsnep-y)
+ tsnep_rxnfc.o tsnep_xdp.o $(tsnep-y)
tsnep-$(CONFIG_TSNEP_SELFTESTS) += tsnep_selftests.o
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index 29b04127f529..0e7fc36a64e1 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -183,6 +183,8 @@ struct tsnep_adapter {
int rxnfc_count;
int rxnfc_max;
+ struct bpf_prog *xdp_prog;
+
int num_tx_queues;
struct tsnep_tx tx[TSNEP_MAX_QUEUES];
int num_rx_queues;
@@ -192,6 +194,9 @@ struct tsnep_adapter {
struct tsnep_queue queue[TSNEP_MAX_QUEUES];
};
+int tsnep_netdev_open(struct net_device *netdev);
+int tsnep_netdev_close(struct net_device *netdev);
+
extern const struct ethtool_ops tsnep_ethtool_ops;
int tsnep_ptp_init(struct tsnep_adapter *adapter);
@@ -215,6 +220,14 @@ int tsnep_rxnfc_add_rule(struct tsnep_adapter *adapter,
int tsnep_rxnfc_del_rule(struct tsnep_adapter *adapter,
struct ethtool_rxnfc *cmd);
+int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack);
+
+static inline bool tsnep_xdp_is_enabled(struct tsnep_adapter *adapter)
+{
+ return !!adapter->xdp_prog;
+}
+
#if IS_ENABLED(CONFIG_TSNEP_SELFTESTS)
int tsnep_ethtool_get_test_count(void);
void tsnep_ethtool_get_test_strings(u8 *data);
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index b97cfd5fa1fa..0a1957259df8 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -1233,7 +1233,7 @@ static void tsnep_free_irq(struct tsnep_queue *queue, bool first)
memset(queue->name, 0, sizeof(queue->name));
}
-static int tsnep_netdev_open(struct net_device *netdev)
+int tsnep_netdev_open(struct net_device *netdev)
{
struct tsnep_adapter *adapter = netdev_priv(netdev);
int i;
@@ -1312,7 +1312,7 @@ static int tsnep_netdev_open(struct net_device *netdev)
return retval;
}
-static int tsnep_netdev_close(struct net_device *netdev)
+int tsnep_netdev_close(struct net_device *netdev)
{
struct tsnep_adapter *adapter = netdev_priv(netdev);
int i;
@@ -1481,6 +1481,18 @@ static ktime_t tsnep_netdev_get_tstamp(struct net_device *netdev,
return ns_to_ktime(timestamp);
}
+static int tsnep_netdev_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ struct tsnep_adapter *adapter = netdev_priv(dev);
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return tsnep_xdp_setup_prog(adapter, bpf->prog, bpf->extack);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int tsnep_netdev_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **xdp, u32 flags)
{
@@ -1533,6 +1545,7 @@ static const struct net_device_ops tsnep_netdev_ops = {
.ndo_set_features = tsnep_netdev_set_features,
.ndo_get_tstamp = tsnep_netdev_get_tstamp,
.ndo_setup_tc = tsnep_tc_setup,
+ .ndo_bpf = tsnep_netdev_bpf,
.ndo_xdp_xmit = tsnep_netdev_xdp_xmit,
};
diff --git a/drivers/net/ethernet/engleder/tsnep_xdp.c b/drivers/net/ethernet/engleder/tsnep_xdp.c
new file mode 100644
index 000000000000..02d84dfbdde4
--- /dev/null
+++ b/drivers/net/ethernet/engleder/tsnep_xdp.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Gerhard Engleder <gerhard@engleder-embedded.com> */
+
+#include <linux/if_vlan.h>
+#include <net/xdp_sock_drv.h>
+
+#include "tsnep.h"
+
+int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = adapter->netdev;
+ bool if_running = netif_running(dev);
+ struct bpf_prog *old_prog;
+
+ if (if_running)
+ tsnep_netdev_close(dev);
+
+ old_prog = xchg(&adapter->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (if_running)
+ tsnep_netdev_open(dev);
+
+ return 0;
+}
--
2.30.2
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup
2022-12-08 5:40 ` [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup Gerhard Engleder
@ 2022-12-09 0:43 ` Saeed Mahameed
2022-12-09 8:06 ` Gerhard Engleder
0 siblings, 1 reply; 21+ messages in thread
From: Saeed Mahameed @ 2022-12-09 0:43 UTC (permalink / raw)
To: Gerhard Engleder
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 08 Dec 06:40, Gerhard Engleder wrote:
>Implement setup of BPF programs for XDP RX path with command
>XDP_SETUP_PROG of ndo_bpf(). This is prework for XDP RX path support.
>
>Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
[ ... ]
>+int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog,
>+ struct netlink_ext_ack *extack)
>+{
>+ struct net_device *dev = adapter->netdev;
>+ bool if_running = netif_running(dev);
>+ struct bpf_prog *old_prog;
>+
>+ if (if_running)
>+ tsnep_netdev_close(dev);
>+
>+ old_prog = xchg(&adapter->xdp_prog, prog);
>+ if (old_prog)
>+ bpf_prog_put(old_prog);
>+
>+ if (if_running)
>+ tsnep_netdev_open(dev);
this could fail silently, and then cause double free, when close ndo
will be called, the stack won't be aware of the closed state..
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup
2022-12-09 0:43 ` Saeed Mahameed
@ 2022-12-09 8:06 ` Gerhard Engleder
2022-12-21 20:19 ` Gerhard Engleder
0 siblings, 1 reply; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-09 8:06 UTC (permalink / raw)
To: Saeed Mahameed
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 09.12.22 01:43, Saeed Mahameed wrote:
>> +int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct
>> bpf_prog *prog,
>> + struct netlink_ext_ack *extack)
>> +{
>> + struct net_device *dev = adapter->netdev;
>> + bool if_running = netif_running(dev);
>> + struct bpf_prog *old_prog;
>> +
>> + if (if_running)
>> + tsnep_netdev_close(dev);
>> +
>> + old_prog = xchg(&adapter->xdp_prog, prog);
>> + if (old_prog)
>> + bpf_prog_put(old_prog);
>> +
>> + if (if_running)
>> + tsnep_netdev_open(dev);
>
> this could fail silently, and then cause double free, when close ndo
> will be called, the stack won't be aware of the closed state..
I will ensure that no double free will happen when ndo_close is called.
Gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup
2022-12-09 8:06 ` Gerhard Engleder
@ 2022-12-21 20:19 ` Gerhard Engleder
0 siblings, 0 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-21 20:19 UTC (permalink / raw)
To: Saeed Mahameed
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 09.12.22 09:06, Gerhard Engleder wrote:
> On 09.12.22 01:43, Saeed Mahameed wrote:
>>> +int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct
>>> bpf_prog *prog,
>>> + struct netlink_ext_ack *extack)
>>> +{
>>> + struct net_device *dev = adapter->netdev;
>>> + bool if_running = netif_running(dev);
>>> + struct bpf_prog *old_prog;
>>> +
>>> + if (if_running)
>>> + tsnep_netdev_close(dev);
>>> +
>>> + old_prog = xchg(&adapter->xdp_prog, prog);
>>> + if (old_prog)
>>> + bpf_prog_put(old_prog);
>>> +
>>> + if (if_running)
>>> + tsnep_netdev_open(dev);
>>
>> this could fail silently, and then cause double free, when close ndo
>> will be called, the stack won't be aware of the closed state..
>
> I will ensure that no double free will happen when ndo_close is called.
Other drivers like igc/igb/netsec/stmmac also fail silently and
I don't see any measures against double free in this drivers.
mvneta forwards the return value of open. How are these drivers
solving this issue? I cannot find this detail, but I also do not
believe that all this drivers are buggy.
gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH net-next v2 4/6] tsnep: Prepare RX buffer for XDP support
2022-12-08 5:40 [PATCH net-next v2 0/6] tsnep: XDP support Gerhard Engleder
` (2 preceding siblings ...)
2022-12-08 5:40 ` [PATCH net-next v2 3/6] tsnep: Support XDP BPF program setup Gerhard Engleder
@ 2022-12-08 5:40 ` Gerhard Engleder
2022-12-09 0:46 ` Saeed Mahameed
2022-12-08 5:40 ` [PATCH net-next v2 5/6] tsnep: Add RX queue info " Gerhard Engleder
2022-12-08 5:40 ` [PATCH net-next v2 6/6] tsnep: Add XDP RX support Gerhard Engleder
5 siblings, 1 reply; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 5:40 UTC (permalink / raw)
To: netdev, bpf
Cc: davem, kuba, edumazet, pabeni, ast, daniel, hawk, john.fastabend,
Gerhard Engleder
Reserve XDP_PACKET_HEADROOM in front of RX buffer if XDP is enabled.
Also set DMA direction properly in this case.
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/tsnep_main.c | 31 +++++++++++++++-------
1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 0a1957259df8..ebfc08c1c46d 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -26,9 +26,10 @@
#include <linux/etherdevice.h>
#include <linux/phy.h>
#include <linux/iopoll.h>
+#include <linux/bpf.h>
#define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
-#define TSNEP_HEADROOM ALIGN(TSNEP_SKB_PAD, 4)
+#define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
#define TSNEP_MAX_RX_BUF_SIZE (PAGE_SIZE - TSNEP_HEADROOM - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -781,6 +782,16 @@ static void tsnep_tx_close(struct tsnep_tx *tx)
tsnep_tx_ring_cleanup(tx);
}
+static unsigned int tsnep_rx_offset(struct tsnep_rx *rx)
+{
+ struct tsnep_adapter *adapter = rx->adapter;
+
+ if (tsnep_xdp_is_enabled(adapter))
+ return XDP_PACKET_HEADROOM;
+
+ return TSNEP_SKB_PAD;
+}
+
static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
{
struct device *dmadev = rx->adapter->dmadev;
@@ -842,9 +853,10 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx)
pp_params.pool_size = TSNEP_RING_SIZE;
pp_params.nid = dev_to_node(dmadev);
pp_params.dev = dmadev;
- pp_params.dma_dir = DMA_FROM_DEVICE;
+ pp_params.dma_dir = tsnep_xdp_is_enabled(rx->adapter) ?
+ DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
pp_params.max_len = TSNEP_MAX_RX_BUF_SIZE;
- pp_params.offset = TSNEP_SKB_PAD;
+ pp_params.offset = tsnep_rx_offset(rx);
rx->page_pool = page_pool_create(&pp_params);
if (IS_ERR(rx->page_pool)) {
retval = PTR_ERR(rx->page_pool);
@@ -879,7 +891,7 @@ static void tsnep_rx_set_page(struct tsnep_rx *rx, struct tsnep_rx_entry *entry,
entry->page = page;
entry->len = TSNEP_MAX_RX_BUF_SIZE;
entry->dma = page_pool_get_dma_addr(entry->page);
- entry->desc->rx = __cpu_to_le64(entry->dma + TSNEP_SKB_PAD);
+ entry->desc->rx = __cpu_to_le64(entry->dma + tsnep_rx_offset(rx));
}
static int tsnep_rx_alloc_buffer(struct tsnep_rx *rx, int index)
@@ -983,14 +995,14 @@ static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
return NULL;
/* update pointers within the skb to store the data */
- skb_reserve(skb, TSNEP_SKB_PAD + TSNEP_RX_INLINE_METADATA_SIZE);
+ skb_reserve(skb, tsnep_rx_offset(rx) + TSNEP_RX_INLINE_METADATA_SIZE);
__skb_put(skb, length - TSNEP_RX_INLINE_METADATA_SIZE - ETH_FCS_LEN);
if (rx->adapter->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL) {
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
struct tsnep_rx_inline *rx_inline =
(struct tsnep_rx_inline *)(page_address(page) +
- TSNEP_SKB_PAD);
+ tsnep_rx_offset(rx));
skb_shinfo(skb)->tx_flags |=
SKBTX_HW_TSTAMP_NETDEV;
@@ -1050,11 +1062,12 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
*/
dma_rmb();
- prefetch(page_address(entry->page) + TSNEP_SKB_PAD);
+ prefetch(page_address(entry->page) + tsnep_rx_offset(rx));
length = __le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_LENGTH_MASK;
- dma_sync_single_range_for_cpu(dmadev, entry->dma, TSNEP_SKB_PAD,
- length, dma_dir);
+ dma_sync_single_range_for_cpu(dmadev, entry->dma,
+ tsnep_rx_offset(rx), length,
+ dma_dir);
rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
desc_available++;
--
2.30.2
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 4/6] tsnep: Prepare RX buffer for XDP support
2022-12-08 5:40 ` [PATCH net-next v2 4/6] tsnep: Prepare RX buffer for XDP support Gerhard Engleder
@ 2022-12-09 0:46 ` Saeed Mahameed
0 siblings, 0 replies; 21+ messages in thread
From: Saeed Mahameed @ 2022-12-09 0:46 UTC (permalink / raw)
To: Gerhard Engleder
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 08 Dec 06:40, Gerhard Engleder wrote:
>Reserve XDP_PACKET_HEADROOM in front of RX buffer if XDP is enabled.
>Also set DMA direction properly in this case.
>
>Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
LGTM:
Reviewed-by: Saeed Mahameed <saeed@kernel.org>
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH net-next v2 5/6] tsnep: Add RX queue info for XDP support
2022-12-08 5:40 [PATCH net-next v2 0/6] tsnep: XDP support Gerhard Engleder
` (3 preceding siblings ...)
2022-12-08 5:40 ` [PATCH net-next v2 4/6] tsnep: Prepare RX buffer for XDP support Gerhard Engleder
@ 2022-12-08 5:40 ` Gerhard Engleder
2022-12-08 12:59 ` Maciej Fijalkowski
2022-12-08 5:40 ` [PATCH net-next v2 6/6] tsnep: Add XDP RX support Gerhard Engleder
5 siblings, 1 reply; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 5:40 UTC (permalink / raw)
To: netdev, bpf
Cc: davem, kuba, edumazet, pabeni, ast, daniel, hawk, john.fastabend,
Gerhard Engleder
Register xdp_rxq_info with page_pool memory model. This is needed for
XDP buffer handling.
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/tsnep.h | 5 ++--
drivers/net/ethernet/engleder/tsnep_main.c | 34 +++++++++++++++++-----
2 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index 0e7fc36a64e1..70bc133d4a9d 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -127,6 +127,7 @@ struct tsnep_rx {
u32 owner_counter;
int increment_owner_counter;
struct page_pool *page_pool;
+ struct xdp_rxq_info xdp_rxq;
u32 packets;
u32 bytes;
@@ -139,11 +140,11 @@ struct tsnep_queue {
struct tsnep_adapter *adapter;
char name[IFNAMSIZ + 9];
+ struct napi_struct napi;
+
struct tsnep_tx *tx;
struct tsnep_rx *rx;
- struct napi_struct napi;
-
int irq;
u32 irq_mask;
void __iomem *irq_delay_addr;
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index ebfc08c1c46d..2b662a98b62a 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -806,6 +806,9 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
entry->page = NULL;
}
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+
if (rx->page_pool)
page_pool_destroy(rx->page_pool);
@@ -821,7 +824,7 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
}
}
-static int tsnep_rx_ring_init(struct tsnep_rx *rx)
+static int tsnep_rx_ring_init(struct tsnep_rx *rx, unsigned int napi_id)
{
struct device *dmadev = rx->adapter->dmadev;
struct tsnep_rx_entry *entry;
@@ -864,6 +867,15 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx)
goto failed;
}
+ retval = xdp_rxq_info_reg(&rx->xdp_rxq, rx->adapter->netdev,
+ rx->queue_index, napi_id);
+ if (retval)
+ goto failed;
+ retval = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ rx->page_pool);
+ if (retval)
+ goto failed;
+
for (i = 0; i < TSNEP_RING_SIZE; i++) {
entry = &rx->entry[i];
next_entry = &rx->entry[(i + 1) % TSNEP_RING_SIZE];
@@ -1112,7 +1124,8 @@ static bool tsnep_rx_pending(struct tsnep_rx *rx)
}
static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr,
- int queue_index, struct tsnep_rx *rx)
+ unsigned int napi_id, int queue_index,
+ struct tsnep_rx *rx)
{
dma_addr_t dma;
int retval;
@@ -1122,7 +1135,7 @@ static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr,
rx->addr = addr;
rx->queue_index = queue_index;
- retval = tsnep_rx_ring_init(rx);
+ retval = tsnep_rx_ring_init(rx, napi_id);
if (retval)
return retval;
@@ -1250,6 +1263,7 @@ int tsnep_netdev_open(struct net_device *netdev)
{
struct tsnep_adapter *adapter = netdev_priv(netdev);
int i;
+ unsigned int napi_id;
void __iomem *addr;
int tx_queue_index = 0;
int rx_queue_index = 0;
@@ -1257,6 +1271,11 @@ int tsnep_netdev_open(struct net_device *netdev)
for (i = 0; i < adapter->num_queues; i++) {
adapter->queue[i].adapter = adapter;
+
+ netif_napi_add(adapter->netdev, &adapter->queue[i].napi,
+ tsnep_poll);
+ napi_id = adapter->queue[i].napi.napi_id;
+
if (adapter->queue[i].tx) {
addr = adapter->addr + TSNEP_QUEUE(tx_queue_index);
retval = tsnep_tx_open(adapter, addr, tx_queue_index,
@@ -1267,7 +1286,7 @@ int tsnep_netdev_open(struct net_device *netdev)
}
if (adapter->queue[i].rx) {
addr = adapter->addr + TSNEP_QUEUE(rx_queue_index);
- retval = tsnep_rx_open(adapter, addr,
+ retval = tsnep_rx_open(adapter, addr, napi_id,
rx_queue_index,
adapter->queue[i].rx);
if (retval)
@@ -1299,8 +1318,6 @@ int tsnep_netdev_open(struct net_device *netdev)
goto phy_failed;
for (i = 0; i < adapter->num_queues; i++) {
- netif_napi_add(adapter->netdev, &adapter->queue[i].napi,
- tsnep_poll);
napi_enable(&adapter->queue[i].napi);
tsnep_enable_irq(adapter, adapter->queue[i].irq_mask);
@@ -1321,6 +1338,8 @@ int tsnep_netdev_open(struct net_device *netdev)
tsnep_rx_close(adapter->queue[i].rx);
if (adapter->queue[i].tx)
tsnep_tx_close(adapter->queue[i].tx);
+
+ netif_napi_del(&adapter->queue[i].napi);
}
return retval;
}
@@ -1339,7 +1358,6 @@ int tsnep_netdev_close(struct net_device *netdev)
tsnep_disable_irq(adapter, adapter->queue[i].irq_mask);
napi_disable(&adapter->queue[i].napi);
- netif_napi_del(&adapter->queue[i].napi);
tsnep_free_irq(&adapter->queue[i], i == 0);
@@ -1347,6 +1365,8 @@ int tsnep_netdev_close(struct net_device *netdev)
tsnep_rx_close(adapter->queue[i].rx);
if (adapter->queue[i].tx)
tsnep_tx_close(adapter->queue[i].tx);
+
+ netif_napi_del(&adapter->queue[i].napi);
}
return 0;
--
2.30.2
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 5/6] tsnep: Add RX queue info for XDP support
2022-12-08 5:40 ` [PATCH net-next v2 5/6] tsnep: Add RX queue info " Gerhard Engleder
@ 2022-12-08 12:59 ` Maciej Fijalkowski
2022-12-08 20:32 ` Gerhard Engleder
0 siblings, 1 reply; 21+ messages in thread
From: Maciej Fijalkowski @ 2022-12-08 12:59 UTC (permalink / raw)
To: Gerhard Engleder
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On Thu, Dec 08, 2022 at 06:40:44AM +0100, Gerhard Engleder wrote:
> Register xdp_rxq_info with page_pool memory model. This is needed for
> XDP buffer handling.
>
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> ---
> drivers/net/ethernet/engleder/tsnep.h | 5 ++--
> drivers/net/ethernet/engleder/tsnep_main.c | 34 +++++++++++++++++-----
> 2 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
> index 0e7fc36a64e1..70bc133d4a9d 100644
> --- a/drivers/net/ethernet/engleder/tsnep.h
> +++ b/drivers/net/ethernet/engleder/tsnep.h
> @@ -127,6 +127,7 @@ struct tsnep_rx {
> u32 owner_counter;
> int increment_owner_counter;
> struct page_pool *page_pool;
> + struct xdp_rxq_info xdp_rxq;
this occupies full cacheline, did you make sure that you don't break
tsnep_rx layout with having xdp_rxq_info in the middle of the way?
>
> u32 packets;
> u32 bytes;
> @@ -139,11 +140,11 @@ struct tsnep_queue {
> struct tsnep_adapter *adapter;
> char name[IFNAMSIZ + 9];
>
> + struct napi_struct napi;
> +
> struct tsnep_tx *tx;
> struct tsnep_rx *rx;
>
> - struct napi_struct napi;
why this move?
> -
> int irq;
> u32 irq_mask;
> void __iomem *irq_delay_addr;
> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> index ebfc08c1c46d..2b662a98b62a 100644
> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> @@ -806,6 +806,9 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
> entry->page = NULL;
> }
>
> + if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
> + xdp_rxq_info_unreg(&rx->xdp_rxq);
> +
> if (rx->page_pool)
> page_pool_destroy(rx->page_pool);
>
> @@ -821,7 +824,7 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
> }
> }
>
> -static int tsnep_rx_ring_init(struct tsnep_rx *rx)
> +static int tsnep_rx_ring_init(struct tsnep_rx *rx, unsigned int napi_id)
> {
> struct device *dmadev = rx->adapter->dmadev;
> struct tsnep_rx_entry *entry;
> @@ -864,6 +867,15 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx)
> goto failed;
> }
>
> + retval = xdp_rxq_info_reg(&rx->xdp_rxq, rx->adapter->netdev,
> + rx->queue_index, napi_id);
> + if (retval)
> + goto failed;
> + retval = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, MEM_TYPE_PAGE_POOL,
> + rx->page_pool);
> + if (retval)
> + goto failed;
> +
> for (i = 0; i < TSNEP_RING_SIZE; i++) {
> entry = &rx->entry[i];
> next_entry = &rx->entry[(i + 1) % TSNEP_RING_SIZE];
> @@ -1112,7 +1124,8 @@ static bool tsnep_rx_pending(struct tsnep_rx *rx)
> }
>
> static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr,
> - int queue_index, struct tsnep_rx *rx)
> + unsigned int napi_id, int queue_index,
> + struct tsnep_rx *rx)
> {
> dma_addr_t dma;
> int retval;
> @@ -1122,7 +1135,7 @@ static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr,
> rx->addr = addr;
> rx->queue_index = queue_index;
>
> - retval = tsnep_rx_ring_init(rx);
> + retval = tsnep_rx_ring_init(rx, napi_id);
> if (retval)
> return retval;
>
> @@ -1250,6 +1263,7 @@ int tsnep_netdev_open(struct net_device *netdev)
> {
> struct tsnep_adapter *adapter = netdev_priv(netdev);
> int i;
> + unsigned int napi_id;
> void __iomem *addr;
> int tx_queue_index = 0;
> int rx_queue_index = 0;
> @@ -1257,6 +1271,11 @@ int tsnep_netdev_open(struct net_device *netdev)
>
> for (i = 0; i < adapter->num_queues; i++) {
> adapter->queue[i].adapter = adapter;
> +
> + netif_napi_add(adapter->netdev, &adapter->queue[i].napi,
> + tsnep_poll);
> + napi_id = adapter->queue[i].napi.napi_id;
> +
> if (adapter->queue[i].tx) {
> addr = adapter->addr + TSNEP_QUEUE(tx_queue_index);
> retval = tsnep_tx_open(adapter, addr, tx_queue_index,
> @@ -1267,7 +1286,7 @@ int tsnep_netdev_open(struct net_device *netdev)
> }
> if (adapter->queue[i].rx) {
> addr = adapter->addr + TSNEP_QUEUE(rx_queue_index);
> - retval = tsnep_rx_open(adapter, addr,
> + retval = tsnep_rx_open(adapter, addr, napi_id,
> rx_queue_index,
> adapter->queue[i].rx);
> if (retval)
> @@ -1299,8 +1318,6 @@ int tsnep_netdev_open(struct net_device *netdev)
> goto phy_failed;
>
> for (i = 0; i < adapter->num_queues; i++) {
> - netif_napi_add(adapter->netdev, &adapter->queue[i].napi,
> - tsnep_poll);
> napi_enable(&adapter->queue[i].napi);
>
> tsnep_enable_irq(adapter, adapter->queue[i].irq_mask);
> @@ -1321,6 +1338,8 @@ int tsnep_netdev_open(struct net_device *netdev)
> tsnep_rx_close(adapter->queue[i].rx);
> if (adapter->queue[i].tx)
> tsnep_tx_close(adapter->queue[i].tx);
> +
> + netif_napi_del(&adapter->queue[i].napi);
> }
> return retval;
> }
> @@ -1339,7 +1358,6 @@ int tsnep_netdev_close(struct net_device *netdev)
> tsnep_disable_irq(adapter, adapter->queue[i].irq_mask);
>
> napi_disable(&adapter->queue[i].napi);
> - netif_napi_del(&adapter->queue[i].napi);
>
> tsnep_free_irq(&adapter->queue[i], i == 0);
>
> @@ -1347,6 +1365,8 @@ int tsnep_netdev_close(struct net_device *netdev)
> tsnep_rx_close(adapter->queue[i].rx);
> if (adapter->queue[i].tx)
> tsnep_tx_close(adapter->queue[i].tx);
> +
> + netif_napi_del(&adapter->queue[i].napi);
> }
>
> return 0;
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 5/6] tsnep: Add RX queue info for XDP support
2022-12-08 12:59 ` Maciej Fijalkowski
@ 2022-12-08 20:32 ` Gerhard Engleder
2022-12-09 0:53 ` Saeed Mahameed
0 siblings, 1 reply; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 20:32 UTC (permalink / raw)
To: Maciej Fijalkowski
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 08.12.22 13:59, Maciej Fijalkowski wrote:
> On Thu, Dec 08, 2022 at 06:40:44AM +0100, Gerhard Engleder wrote:
>> Register xdp_rxq_info with page_pool memory model. This is needed for
>> XDP buffer handling.
>>
>> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
>> ---
>> drivers/net/ethernet/engleder/tsnep.h | 5 ++--
>> drivers/net/ethernet/engleder/tsnep_main.c | 34 +++++++++++++++++-----
>> 2 files changed, 30 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
>> index 0e7fc36a64e1..70bc133d4a9d 100644
>> --- a/drivers/net/ethernet/engleder/tsnep.h
>> +++ b/drivers/net/ethernet/engleder/tsnep.h
>> @@ -127,6 +127,7 @@ struct tsnep_rx {
>> u32 owner_counter;
>> int increment_owner_counter;
>> struct page_pool *page_pool;
>> + struct xdp_rxq_info xdp_rxq;
>
> this occupies full cacheline, did you make sure that you don't break
> tsnep_rx layout with having xdp_rxq_info in the middle of the way?
Actually I did no cacheline optimisation for this structure so far.
I saw that igb/igc put xdp_rxq_info to the end. Is this best practice
to prevent other variables in the same cacheline of xdp_rxq?
>>
>> u32 packets;
>> u32 bytes;
>> @@ -139,11 +140,11 @@ struct tsnep_queue {
>> struct tsnep_adapter *adapter;
>> char name[IFNAMSIZ + 9];
>>
>> + struct napi_struct napi;
>> +
>> struct tsnep_tx *tx;
>> struct tsnep_rx *rx;
>>
>> - struct napi_struct napi;
>
> why this move?
I reordered it because napi is now initialised before tx/rx. A cosmetic
move.
Gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 5/6] tsnep: Add RX queue info for XDP support
2022-12-08 20:32 ` Gerhard Engleder
@ 2022-12-09 0:53 ` Saeed Mahameed
2022-12-09 8:11 ` Gerhard Engleder
0 siblings, 1 reply; 21+ messages in thread
From: Saeed Mahameed @ 2022-12-09 0:53 UTC (permalink / raw)
To: Gerhard Engleder
Cc: Maciej Fijalkowski, netdev, bpf, davem, kuba, edumazet, pabeni,
ast, daniel, hawk, john.fastabend
On 08 Dec 21:32, Gerhard Engleder wrote:
>On 08.12.22 13:59, Maciej Fijalkowski wrote:
>>On Thu, Dec 08, 2022 at 06:40:44AM +0100, Gerhard Engleder wrote:
>>>Register xdp_rxq_info with page_pool memory model. This is needed for
>>>XDP buffer handling.
>>>
>>>Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
>>>---
>>> drivers/net/ethernet/engleder/tsnep.h | 5 ++--
>>> drivers/net/ethernet/engleder/tsnep_main.c | 34 +++++++++++++++++-----
>>> 2 files changed, 30 insertions(+), 9 deletions(-)
>>>
>>>diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
>>>index 0e7fc36a64e1..70bc133d4a9d 100644
>>>--- a/drivers/net/ethernet/engleder/tsnep.h
>>>+++ b/drivers/net/ethernet/engleder/tsnep.h
>>>@@ -127,6 +127,7 @@ struct tsnep_rx {
>>> u32 owner_counter;
>>> int increment_owner_counter;
>>> struct page_pool *page_pool;
>>>+ struct xdp_rxq_info xdp_rxq;
>>
>>this occupies full cacheline, did you make sure that you don't break
>>tsnep_rx layout with having xdp_rxq_info in the middle of the way?
>
>Actually I did no cacheline optimisation for this structure so far.
>I saw that igb/igc put xdp_rxq_info to the end. Is this best practice
>to prevent other variables in the same cacheline of xdp_rxq?
a rule of thumb, organize the structure in the same order they are
being accessed in the data path.. but this doesn't go without saying you
need to do some layout testing via pahole for example..
It's up to you and the maintainer of this driver to decide how critical this
is.
Reviewed-by: Saeed Mahameed <saeed@kernel.org>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 5/6] tsnep: Add RX queue info for XDP support
2022-12-09 0:53 ` Saeed Mahameed
@ 2022-12-09 8:11 ` Gerhard Engleder
0 siblings, 0 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-09 8:11 UTC (permalink / raw)
To: Saeed Mahameed
Cc: Maciej Fijalkowski, netdev, bpf, davem, kuba, edumazet, pabeni,
ast, daniel, hawk, john.fastabend
On 09.12.22 01:53, Saeed Mahameed wrote:
> On 08 Dec 21:32, Gerhard Engleder wrote:
>> On 08.12.22 13:59, Maciej Fijalkowski wrote:
>>> On Thu, Dec 08, 2022 at 06:40:44AM +0100, Gerhard Engleder wrote:
>>>> Register xdp_rxq_info with page_pool memory model. This is needed for
>>>> XDP buffer handling.
>>>>
>>>> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
>>>> ---
>>>> drivers/net/ethernet/engleder/tsnep.h | 5 ++--
>>>> drivers/net/ethernet/engleder/tsnep_main.c | 34 +++++++++++++++++-----
>>>> 2 files changed, 30 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/engleder/tsnep.h
>>>> b/drivers/net/ethernet/engleder/tsnep.h
>>>> index 0e7fc36a64e1..70bc133d4a9d 100644
>>>> --- a/drivers/net/ethernet/engleder/tsnep.h
>>>> +++ b/drivers/net/ethernet/engleder/tsnep.h
>>>> @@ -127,6 +127,7 @@ struct tsnep_rx {
>>>> u32 owner_counter;
>>>> int increment_owner_counter;
>>>> struct page_pool *page_pool;
>>>> + struct xdp_rxq_info xdp_rxq;
>>>
>>> this occupies full cacheline, did you make sure that you don't break
>>> tsnep_rx layout with having xdp_rxq_info in the middle of the way?
>>
>> Actually I did no cacheline optimisation for this structure so far.
>> I saw that igb/igc put xdp_rxq_info to the end. Is this best practice
>> to prevent other variables in the same cacheline of xdp_rxq?
>
> a rule of thumb, organize the structure in the same order they are
> being accessed in the data path.. but this doesn't go without saying you
> need to do some layout testing via pahole for example..
> It's up to you and the maintainer of this driver to decide how critical
> this
> is.
>
> Reviewed-by: Saeed Mahameed <saeed@kernel.org>
Thanks for the clarification, I will think about it.
I wrote the driver and I'm responsible to keep it working. Is this equal
to being the maintainer?
Thanks for the review!
Gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH net-next v2 6/6] tsnep: Add XDP RX support
2022-12-08 5:40 [PATCH net-next v2 0/6] tsnep: XDP support Gerhard Engleder
` (4 preceding siblings ...)
2022-12-08 5:40 ` [PATCH net-next v2 5/6] tsnep: Add RX queue info " Gerhard Engleder
@ 2022-12-08 5:40 ` Gerhard Engleder
2022-12-08 13:40 ` Maciej Fijalkowski
5 siblings, 1 reply; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 5:40 UTC (permalink / raw)
To: netdev, bpf
Cc: davem, kuba, edumazet, pabeni, ast, daniel, hawk, john.fastabend,
Gerhard Engleder
If BPF program is set up, then run BPF program for every received frame
and execute the selected action.
Test results with A53 1.2GHz:
XDP_DROP (samples/bpf/xdp1)
proto 17: 883878 pkt/s
XDP_TX (samples/bpf/xdp2)
proto 17: 255693 pkt/s
XDP_REDIRECT (samples/bpf/xdpsock)
sock0@eth2:0 rxdrop xdp-drv
pps pkts 1.00
rx 855,582 5,404,523
tx 0 0
XDP_REDIRECT (samples/bpf/xdp_redirect)
eth2->eth1 613,267 rx/s 0 err,drop/s 613,272 xmit/s
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/tsnep_main.c | 126 +++++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 2b662a98b62a..d59cb714c8cd 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -27,6 +27,7 @@
#include <linux/phy.h>
#include <linux/iopoll.h>
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
@@ -44,6 +45,9 @@
#define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
+#define TSNEP_XDP_TX BIT(0)
+#define TSNEP_XDP_REDIRECT BIT(1)
+
enum {
__TSNEP_DOWN,
};
@@ -626,6 +630,33 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
}
+static int tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
+ struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+ int cpu = smp_processor_id();
+ int queue;
+ struct netdev_queue *nq;
+ int retval;
+
+ if (unlikely(!xdpf))
+ return -EFAULT;
+
+ queue = cpu % adapter->num_tx_queues;
+ nq = netdev_get_tx_queue(adapter->netdev, queue);
+
+ __netif_tx_lock(nq, cpu);
+
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+
+ retval = tsnep_xdp_xmit_frame_ring(xdpf, &adapter->tx[queue], false);
+
+ __netif_tx_unlock(nq);
+
+ return retval;
+}
+
static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
{
unsigned long flags;
@@ -792,6 +823,11 @@ static unsigned int tsnep_rx_offset(struct tsnep_rx *rx)
return TSNEP_SKB_PAD;
}
+static unsigned int tsnep_rx_offset_xdp(void)
+{
+ return XDP_PACKET_HEADROOM;
+}
+
static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
{
struct device *dmadev = rx->adapter->dmadev;
@@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
return i;
}
+static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
+ struct xdp_buff *xdp, int *status)
+{
+ unsigned int length;
+ unsigned int sync;
+ u32 act;
+
+ length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp();
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+ sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp();
+ sync = max(sync, length);
+
+ switch (act) {
+ case XDP_PASS:
+ return false;
+ case XDP_TX:
+ if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0)
+ goto out_failure;
+ *status |= TSNEP_XDP_TX;
+ return true;
+ case XDP_REDIRECT:
+ if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
+ goto out_failure;
+ *status |= TSNEP_XDP_REDIRECT;
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+out_failure:
+ trace_xdp_exception(rx->adapter->netdev, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
+ sync, true);
+ return true;
+ }
+}
+
+static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status)
+{
+ int cpu = smp_processor_id();
+ int queue;
+ struct netdev_queue *nq;
+
+ if (status & TSNEP_XDP_TX) {
+ queue = cpu % adapter->num_tx_queues;
+ nq = netdev_get_tx_queue(adapter->netdev, queue);
+
+ __netif_tx_lock(nq, cpu);
+ tsnep_xdp_xmit_flush(&adapter->tx[queue]);
+ __netif_tx_unlock(nq);
+ }
+
+ if (status & TSNEP_XDP_REDIRECT)
+ xdp_do_flush();
+}
+
static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
int length)
{
@@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
int desc_available;
int done = 0;
enum dma_data_direction dma_dir;
+ struct bpf_prog *prog;
struct tsnep_rx_entry *entry;
+ struct xdp_buff xdp;
+ int xdp_status = 0;
struct sk_buff *skb;
int length;
desc_available = tsnep_rx_desc_available(rx);
dma_dir = page_pool_get_dma_dir(rx->page_pool);
+ prog = READ_ONCE(rx->adapter->xdp_prog);
while (likely(done < budget) && (rx->read != rx->write)) {
entry = &rx->entry[rx->read];
@@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
desc_available++;
+ if (prog) {
+ bool consume;
+
+ xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp, page_address(entry->page),
+ tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE,
+ length - TSNEP_RX_INLINE_METADATA_SIZE,
+ false);
+
+ consume = tsnep_xdp_run_prog(rx, prog, &xdp,
+ &xdp_status);
+ if (consume) {
+ rx->packets++;
+ rx->bytes +=
+ length - TSNEP_RX_INLINE_METADATA_SIZE;
+
+ entry->page = NULL;
+
+ continue;
+ }
+ }
+
skb = tsnep_build_skb(rx, entry->page, length);
if (skb) {
page_pool_release_page(rx->page_pool, entry->page);
@@ -1102,6 +1225,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
entry->page = NULL;
}
+ if (xdp_status)
+ tsnep_finalize_xdp(rx->adapter, xdp_status);
+
if (desc_available)
tsnep_rx_refill(rx, desc_available, false);
--
2.30.2
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 6/6] tsnep: Add XDP RX support
2022-12-08 5:40 ` [PATCH net-next v2 6/6] tsnep: Add XDP RX support Gerhard Engleder
@ 2022-12-08 13:40 ` Maciej Fijalkowski
2022-12-08 22:12 ` Gerhard Engleder
0 siblings, 1 reply; 21+ messages in thread
From: Maciej Fijalkowski @ 2022-12-08 13:40 UTC (permalink / raw)
To: Gerhard Engleder
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On Thu, Dec 08, 2022 at 06:40:45AM +0100, Gerhard Engleder wrote:
> If BPF program is set up, then run BPF program for every received frame
> and execute the selected action.
>
> Test results with A53 1.2GHz:
>
> XDP_DROP (samples/bpf/xdp1)
> proto 17: 883878 pkt/s
>
> XDP_TX (samples/bpf/xdp2)
> proto 17: 255693 pkt/s
>
> XDP_REDIRECT (samples/bpf/xdpsock)
> sock0@eth2:0 rxdrop xdp-drv
> pps pkts 1.00
> rx 855,582 5,404,523
> tx 0 0
>
> XDP_REDIRECT (samples/bpf/xdp_redirect)
> eth2->eth1 613,267 rx/s 0 err,drop/s 613,272 xmit/s
>
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> ---
> drivers/net/ethernet/engleder/tsnep_main.c | 126 +++++++++++++++++++++
> 1 file changed, 126 insertions(+)
>
> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> index 2b662a98b62a..d59cb714c8cd 100644
> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> @@ -27,6 +27,7 @@
> #include <linux/phy.h>
> #include <linux/iopoll.h>
> #include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>
> #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
> #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
> @@ -44,6 +45,9 @@
> #define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
> ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
>
> +#define TSNEP_XDP_TX BIT(0)
> +#define TSNEP_XDP_REDIRECT BIT(1)
> +
> enum {
> __TSNEP_DOWN,
> };
> @@ -626,6 +630,33 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
> iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
> }
>
> +static int tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
> + struct xdp_buff *xdp)
> +{
> + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> + int cpu = smp_processor_id();
> + int queue;
> + struct netdev_queue *nq;
> + int retval;
> +
> + if (unlikely(!xdpf))
> + return -EFAULT;
> +
> + queue = cpu % adapter->num_tx_queues;
> + nq = netdev_get_tx_queue(adapter->netdev, queue);
> +
> + __netif_tx_lock(nq, cpu);
> +
> + /* Avoid transmit queue timeout since we share it with the slow path */
> + txq_trans_cond_update(nq);
> +
> + retval = tsnep_xdp_xmit_frame_ring(xdpf, &adapter->tx[queue], false);
> +
> + __netif_tx_unlock(nq);
> +
> + return retval;
> +}
> +
> static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
> {
> unsigned long flags;
> @@ -792,6 +823,11 @@ static unsigned int tsnep_rx_offset(struct tsnep_rx *rx)
> return TSNEP_SKB_PAD;
> }
>
> +static unsigned int tsnep_rx_offset_xdp(void)
> +{
> + return XDP_PACKET_HEADROOM;
> +}
I don't see much of a value in this func :P
> +
> static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
> {
> struct device *dmadev = rx->adapter->dmadev;
> @@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
> return i;
> }
>
> +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
> + struct xdp_buff *xdp, int *status)
> +{
> + unsigned int length;
> + unsigned int sync;
> + u32 act;
> +
> + length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp();
could this be xdp->data_end - xdp->data - TSNEP_RX_INLINE_METADATA_SIZE ?
Can you tell a bit more about that metadata macro that you have to handle
by yourself all the time? would be good to tell about the impact on
data_meta since you're not configuring it on xdp_prepare_buff().
> +
> + act = bpf_prog_run_xdp(prog, xdp);
> +
> + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
> + sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp();
> + sync = max(sync, length);
> +
> + switch (act) {
> + case XDP_PASS:
> + return false;
> + case XDP_TX:
> + if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0)
> + goto out_failure;
> + *status |= TSNEP_XDP_TX;
> + return true;
> + case XDP_REDIRECT:
> + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
> + goto out_failure;
> + *status |= TSNEP_XDP_REDIRECT;
> + return true;
> + default:
> + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
> + fallthrough;
> + case XDP_ABORTED:
> +out_failure:
> + trace_xdp_exception(rx->adapter->netdev, prog, act);
> + fallthrough;
> + case XDP_DROP:
> + page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
> + sync, true);
> + return true;
> + }
> +}
> +
> +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status)
> +{
> + int cpu = smp_processor_id();
> + int queue;
> + struct netdev_queue *nq;
do you care about RCT, or?
> +
> + if (status & TSNEP_XDP_TX) {
> + queue = cpu % adapter->num_tx_queues;
> + nq = netdev_get_tx_queue(adapter->netdev, queue);
> +
> + __netif_tx_lock(nq, cpu);
> + tsnep_xdp_xmit_flush(&adapter->tx[queue]);
> + __netif_tx_unlock(nq);
> + }
> +
> + if (status & TSNEP_XDP_REDIRECT)
> + xdp_do_flush();
> +}
> +
> static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
> int length)
did you consider making tsnep_build_skb() to work on xdp_buff directly?
probably will help you once you'll implement XDP mbuf support here.
> {
> @@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
> int desc_available;
> int done = 0;
> enum dma_data_direction dma_dir;
> + struct bpf_prog *prog;
> struct tsnep_rx_entry *entry;
> + struct xdp_buff xdp;
> + int xdp_status = 0;
> struct sk_buff *skb;
> int length;
>
> desc_available = tsnep_rx_desc_available(rx);
> dma_dir = page_pool_get_dma_dir(rx->page_pool);
> + prog = READ_ONCE(rx->adapter->xdp_prog);
>
> while (likely(done < budget) && (rx->read != rx->write)) {
> entry = &rx->entry[rx->read];
> @@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
> rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
> desc_available++;
>
> + if (prog) {
> + bool consume;
> +
> + xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq);
> + xdp_prepare_buff(&xdp, page_address(entry->page),
> + tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE,
> + length - TSNEP_RX_INLINE_METADATA_SIZE,
Would it make sense to subtract TSNEP_RX_INLINE_METADATA_SIZE from length
right after dma_sync_single_range_for_cpu?
> + false);
> +
> + consume = tsnep_xdp_run_prog(rx, prog, &xdp,
> + &xdp_status);
> + if (consume) {
> + rx->packets++;
> + rx->bytes +=
> + length - TSNEP_RX_INLINE_METADATA_SIZE;
> +
> + entry->page = NULL;
> +
> + continue;
> + }
> + }
> +
> skb = tsnep_build_skb(rx, entry->page, length);
> if (skb) {
> page_pool_release_page(rx->page_pool, entry->page);
> @@ -1102,6 +1225,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
> entry->page = NULL;
> }
>
> + if (xdp_status)
> + tsnep_finalize_xdp(rx->adapter, xdp_status);
> +
> if (desc_available)
> tsnep_rx_refill(rx, desc_available, false);
>
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next v2 6/6] tsnep: Add XDP RX support
2022-12-08 13:40 ` Maciej Fijalkowski
@ 2022-12-08 22:12 ` Gerhard Engleder
0 siblings, 0 replies; 21+ messages in thread
From: Gerhard Engleder @ 2022-12-08 22:12 UTC (permalink / raw)
To: Maciej Fijalkowski
Cc: netdev, bpf, davem, kuba, edumazet, pabeni, ast, daniel, hawk,
john.fastabend
On 08.12.22 14:40, Maciej Fijalkowski wrote:
>> +static unsigned int tsnep_rx_offset_xdp(void)
>> +{
>> + return XDP_PACKET_HEADROOM;
>> +}
>
> I don't see much of a value in this func :P
It is a variant of tsnep_rx_offset() for the XDP path to prevent
unneeded calls of tsnep_xdp_is_enabled(). With this function I
keep the RX offset local. But yes, it provides actually no
functionality.
>> +
>> static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
>> {
>> struct device *dmadev = rx->adapter->dmadev;
>> @@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
>> return i;
>> }
>>
>> +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
>> + struct xdp_buff *xdp, int *status)
>> +{
>> + unsigned int length;
>> + unsigned int sync;
>> + u32 act;
>> +
>> + length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp();
>
> could this be xdp->data_end - xdp->data - TSNEP_RX_INLINE_METADATA_SIZE ?
xdp->data points to the start of the Ethernet frame after
TSNEP_RX_INLINE_METADATA_SIZE, so it would be wrong to substract the
metadata which is not there.
Actually xdp->data_end - xdp->data + TSNEP_RX_INLINE_METADATA_SIZE would
be equivalent
TSNEP_RX_INLINE_METADATA_SIZE contains timestamps of received frames. It
is written by DMA at the beginning of the RX buffer. So it extends the
DMA length and needs to be considered for DMA sync.
> Can you tell a bit more about that metadata macro that you have to handle
> by yourself all the time? would be good to tell about the impact on
> data_meta since you're not configuring it on xdp_prepare_buff().
I will add comments.
>> +
>> + act = bpf_prog_run_xdp(prog, xdp);
>> +
>> + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
>> + sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp();
>> + sync = max(sync, length);
>> +
>> + switch (act) {
>> + case XDP_PASS:
>> + return false;
>> + case XDP_TX:
>> + if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0)
>> + goto out_failure;
>> + *status |= TSNEP_XDP_TX;
>> + return true;
>> + case XDP_REDIRECT:
>> + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
>> + goto out_failure;
>> + *status |= TSNEP_XDP_REDIRECT;
>> + return true;
>> + default:
>> + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
>> + fallthrough;
>> + case XDP_ABORTED:
>> +out_failure:
>> + trace_xdp_exception(rx->adapter->netdev, prog, act);
>> + fallthrough;
>> + case XDP_DROP:
>> + page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
>> + sync, true);
>> + return true;
>> + }
>> +}
>> +
>> +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status)
>> +{
>> + int cpu = smp_processor_id();
>> + int queue;
>> + struct netdev_queue *nq;
>
> do you care about RCT, or?
Do you mean Redundancy Control Trailer (RCT) of PRP? This is new to me.
Do I have to take care about it in the driver? There are no plans to use
redundancy protocols with this device so far.
>> +
>> + if (status & TSNEP_XDP_TX) {
>> + queue = cpu % adapter->num_tx_queues;
>> + nq = netdev_get_tx_queue(adapter->netdev, queue);
>> +
>> + __netif_tx_lock(nq, cpu);
>> + tsnep_xdp_xmit_flush(&adapter->tx[queue]);
>> + __netif_tx_unlock(nq);
>> + }
>> +
>> + if (status & TSNEP_XDP_REDIRECT)
>> + xdp_do_flush();
>> +}
>> +finalize_xdp
>> static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
>> int length)
>
> did you consider making tsnep_build_skb() to work on xdp_buff directly?
> probably will help you once you'll implement XDP mbuf support here.
I saw it in other drivers. I did not consider it, because in my opinion
there was no advantage for this driver. Currently xdp_buff is only
initialized on demand if BPF program is there. So for me there was no
reason to change tsnep_build_skb().
>> {
>> @@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>> int desc_available;
>> int done = 0;
>> enum dma_data_direction dma_dir;
>> + struct bpf_prog *prog;
>> struct tsnep_rx_entry *entry;
>> + struct xdp_buff xdp;
>> + int xdp_status = 0;
>> struct sk_buff *skb;
>> int length;
>>
>> desc_available = tsnep_rx_desc_available(rx);
>> dma_dir = page_pool_get_dma_dir(rx->page_pool);
>> + prog = READ_ONCE(rx->adapter->xdp_prog);
>>
>> while (likely(done < budget) && (rx->read != rx->write)) {
>> entry = &rx->entry[rx->read];
>> @@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>> rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
>> desc_available++;
>>
>> + if (prog) {
>> + bool consume;
>> +
>> + xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq);
>> + xdp_prepare_buff(&xdp, page_address(entry->page),
>> + tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE,
>> + length - TSNEP_RX_INLINE_METADATA_SIZE,
>
> Would it make sense to subtract TSNEP_RX_INLINE_METADATA_SIZE from length
> right after dma_sync_single_range_for_cpu?
Yes, this could make the code simpler and more readable. I will try it.
Thanks for the review!
Gerhard
^ permalink raw reply [flat|nested] 21+ messages in thread