* [PATCH intel-next 1/5] i40e: add pre-xdp page_count in rx_buffer
2022-12-13 10:50 [PATCH intel-next 0/5] i40e: support XDP multi-buffer Tirthendu Sarkar
@ 2022-12-13 10:50 ` Tirthendu Sarkar
2022-12-13 10:50 ` [PATCH intel-next 2/5] i40e: avoid per buffer next_to_clean access from i40e_ring Tirthendu Sarkar
` (4 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Tirthendu Sarkar @ 2022-12-13 10:50 UTC (permalink / raw)
To: tirtha, jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
pabeni, ast, daniel, hawk, john.fastabend, intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
Page count of rx_buffer needs to be stored prior to XDP call to prevent
page recycling in case that buffer would be freed within xdp redirect
path. Instead of storing it on the stack, now it is stored in the
rx_buffer struct. This will help in processing multi-buffers as the page
counts of all rx_buffers (of the same packet) don't need to be stored on
stack.
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 23 +++++++--------------
drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 +
2 files changed, 9 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 924f972b91fa..a7fba294a8f4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1970,7 +1970,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
* i40e_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buffer: buffer containing the page
* @rx_stats: rx stats structure for the rx ring
- * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling i40e_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
@@ -1981,8 +1980,7 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
* or busy if it could not be reused.
*/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
- struct i40e_rx_queue_stats *rx_stats,
- int rx_buffer_pgcnt)
+ struct i40e_rx_queue_stats *rx_stats)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
@@ -1995,7 +1993,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) {
+ if (unlikely((rx_buffer->page_count - pagecnt_bias) > 1)) {
rx_stats->page_busy_count++;
return false;
}
@@ -2058,19 +2056,17 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
* i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
* @rx_ring: rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
- * @rx_buffer_pgcnt: buffer page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
- const unsigned int size,
- int *rx_buffer_pgcnt)
+ const unsigned int size)
{
struct i40e_rx_buffer *rx_buffer;
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
- *rx_buffer_pgcnt =
+ rx_buffer->page_count =
#if (PAGE_SIZE < 8192)
page_count(rx_buffer->page);
#else
@@ -2226,16 +2222,14 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
- * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call
*
* This function will clean up the contents of the rx_buffer. It will
* either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- int rx_buffer_pgcnt)
+ struct i40e_rx_buffer *rx_buffer)
{
- if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) {
+ if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) {
/* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
} else {
@@ -2457,7 +2451,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
- int rx_buffer_pgcnt;
unsigned int size;
u64 qword;
@@ -2500,7 +2493,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
break;
i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
- rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size);
/* retrieve a buffer from the ring */
if (!skb) {
@@ -2541,7 +2534,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
break;
}
- i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
cleaned_count++;
i40e_inc_ntc(rx_ring);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 768290dc6f48..eec4a4a99b9c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -277,6 +277,7 @@ struct i40e_rx_buffer {
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
+ __u32 page_count;
};
struct i40e_queue_stats {
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH intel-next 2/5] i40e: avoid per buffer next_to_clean access from i40e_ring
2022-12-13 10:50 [PATCH intel-next 0/5] i40e: support XDP multi-buffer Tirthendu Sarkar
2022-12-13 10:50 ` [PATCH intel-next 1/5] i40e: add pre-xdp page_count in rx_buffer Tirthendu Sarkar
@ 2022-12-13 10:50 ` Tirthendu Sarkar
2022-12-13 10:50 ` [PATCH intel-next 3/5] i40e: introduce next_to_process to i40e_ring Tirthendu Sarkar
` (3 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Tirthendu Sarkar @ 2022-12-13 10:50 UTC (permalink / raw)
To: tirtha, jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
pabeni, ast, daniel, hawk, john.fastabend, intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
Change read/write of next_to_clean from per buffer to beginning and end
of i40_clean_rx_irq(). Use local variables for per buffer increments.
Move out default reading of buffer from 'next_to_clean' in
i40e_get_rx_buffer() and use caller provided index instead. This will
enable walking through all buffers of a multi-buffer packet.
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 33 ++++++++-------------
1 file changed, 13 insertions(+), 20 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index a7fba294a8f4..c6296cf08294 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -12,6 +12,7 @@
#include "i40e_xsk.h"
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+#define I40E_IDX_NEXT(n, max) { if (++(n) > (max)) n = 0; }
/**
* i40e_fdir - Generate a Flow Director descriptor based on fdata
* @tx_ring: Tx ring to send buffer on
@@ -2056,16 +2057,18 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
* i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
* @rx_ring: rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
+ * @next: index of the buffer to be fetched
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
- const unsigned int size)
+ const unsigned int size,
+ u32 next)
{
struct i40e_rx_buffer *rx_buffer;
- rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ rx_buffer = i40e_rx_bi(rx_ring, next);
rx_buffer->page_count =
#if (PAGE_SIZE < 8192)
page_count(rx_buffer->page);
@@ -2402,19 +2405,6 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
}
}
-/**
- * i40e_inc_ntc: Advance the next_to_clean index
- * @rx_ring: Rx ring
- **/
-static void i40e_inc_ntc(struct i40e_ring *rx_ring)
-{
- u32 ntc = rx_ring->next_to_clean + 1;
-
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
- prefetch(I40E_RX_DESC(rx_ring, ntc));
-}
-
/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
@@ -2435,6 +2425,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
unsigned int offset = rx_ring->rx_offset;
struct sk_buff *skb = rx_ring->skb;
+ u16 ntc = rx_ring->next_to_clean;
+ u16 rmax = rx_ring->count - 1;
unsigned int xdp_xmit = 0;
struct bpf_prog *xdp_prog;
bool failure = false;
@@ -2461,7 +2453,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
cleaned_count = 0;
}
- rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
+ rx_desc = I40E_RX_DESC(rx_ring, ntc);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
@@ -2480,8 +2472,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
- rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
- i40e_inc_ntc(rx_ring);
+ rx_buffer = i40e_rx_bi(rx_ring, ntc);
+ I40E_IDX_NEXT(ntc, rmax);
i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
@@ -2493,7 +2485,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
break;
i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
- rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size, ntc);
/* retrieve a buffer from the ring */
if (!skb) {
@@ -2537,7 +2529,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
i40e_put_rx_buffer(rx_ring, rx_buffer);
cleaned_count++;
- i40e_inc_ntc(rx_ring);
+ I40E_IDX_NEXT(ntc, rmax);
if (i40e_is_non_eop(rx_ring, rx_desc))
continue;
@@ -2559,6 +2551,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
/* update budget accounting */
total_rx_packets++;
}
+ rx_ring->next_to_clean = ntc;
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
rx_ring->skb = skb;
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH intel-next 3/5] i40e: introduce next_to_process to i40e_ring
2022-12-13 10:50 [PATCH intel-next 0/5] i40e: support XDP multi-buffer Tirthendu Sarkar
2022-12-13 10:50 ` [PATCH intel-next 1/5] i40e: add pre-xdp page_count in rx_buffer Tirthendu Sarkar
2022-12-13 10:50 ` [PATCH intel-next 2/5] i40e: avoid per buffer next_to_clean access from i40e_ring Tirthendu Sarkar
@ 2022-12-13 10:50 ` Tirthendu Sarkar
2022-12-13 10:50 ` [PATCH intel-next 4/5] i40e: pull out rx buffer allocation to end of i40e_clean_rx_irq() Tirthendu Sarkar
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Tirthendu Sarkar @ 2022-12-13 10:50 UTC (permalink / raw)
To: tirtha, jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
pabeni, ast, daniel, hawk, john.fastabend, intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
Add a new field called next_to_process in the i40e_ring that is
advanced for every buffer and change the semantics of next_to_clean to
point to the first buffer of a packet. Driver will use next_to_process
in the same way next_to_clean was used previously.
For the non multi-buffer case, next_to_process and next_to_clean will
always be the same since each packet consists of a single buffer.
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 15 ++++++++++-----
drivers/net/ethernet/intel/i40e/i40e_txrx.h | 4 ++++
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index c6296cf08294..e01bcc91a196 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -13,6 +13,7 @@
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
#define I40E_IDX_NEXT(n, max) { if (++(n) > (max)) n = 0; }
+#define I40E_INC_NEXT(p, c, max) do { I40E_IDX_NEXT(p, max); c = p; } while (0)
/**
* i40e_fdir - Generate a Flow Director descriptor based on fdata
* @tx_ring: Tx ring to send buffer on
@@ -1526,6 +1527,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+ rx_ring->next_to_process = 0;
}
/**
@@ -1576,6 +1578,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
}
rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_process = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -2425,6 +2428,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
unsigned int offset = rx_ring->rx_offset;
struct sk_buff *skb = rx_ring->skb;
+ u16 ntp = rx_ring->next_to_process;
u16 ntc = rx_ring->next_to_clean;
u16 rmax = rx_ring->count - 1;
unsigned int xdp_xmit = 0;
@@ -2453,7 +2457,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
cleaned_count = 0;
}
- rx_desc = I40E_RX_DESC(rx_ring, ntc);
+ rx_desc = I40E_RX_DESC(rx_ring, ntp);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
@@ -2472,8 +2476,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
- rx_buffer = i40e_rx_bi(rx_ring, ntc);
- I40E_IDX_NEXT(ntc, rmax);
+ rx_buffer = i40e_rx_bi(rx_ring, ntp);
+ I40E_INC_NEXT(ntp, ntc, rmax);
i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
@@ -2485,7 +2489,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
break;
i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
- rx_buffer = i40e_get_rx_buffer(rx_ring, size, ntc);
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size, ntp);
/* retrieve a buffer from the ring */
if (!skb) {
@@ -2529,7 +2533,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
i40e_put_rx_buffer(rx_ring, rx_buffer);
cleaned_count++;
- I40E_IDX_NEXT(ntc, rmax);
+ I40E_INC_NEXT(ntp, ntc, rmax);
if (i40e_is_non_eop(rx_ring, rx_desc))
continue;
@@ -2551,6 +2555,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
/* update budget accounting */
total_rx_packets++;
}
+ rx_ring->next_to_process = ntp;
rx_ring->next_to_clean = ntc;
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index eec4a4a99b9c..c1b5013f5c9c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -337,6 +337,10 @@ struct i40e_ring {
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;
+ u16 next_to_process; /* Next descriptor to be processed; for MB packet
+ * next_to_clean will be updated only after all
+ * buffers have been processed.
+ */
/* high bit set means dynamic, use accessor routines to read/write.
* hardware only supports 2us resolution for the ITR registers.
* these values always store the USER setting, and must be converted
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH intel-next 4/5] i40e: pull out rx buffer allocation to end of i40e_clean_rx_irq()
2022-12-13 10:50 [PATCH intel-next 0/5] i40e: support XDP multi-buffer Tirthendu Sarkar
` (2 preceding siblings ...)
2022-12-13 10:50 ` [PATCH intel-next 3/5] i40e: introduce next_to_process to i40e_ring Tirthendu Sarkar
@ 2022-12-13 10:50 ` Tirthendu Sarkar
2022-12-13 16:09 ` Alexander H Duyck
2022-12-13 10:50 ` [PATCH intel-next 5/5] i40e: add support for XDP multi-buffer Rx Tirthendu Sarkar
2022-12-13 15:58 ` [PATCH intel-next 0/5] i40e: support XDP multi-buffer Alexander H Duyck
5 siblings, 1 reply; 10+ messages in thread
From: Tirthendu Sarkar @ 2022-12-13 10:50 UTC (permalink / raw)
To: tirtha, jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
pabeni, ast, daniel, hawk, john.fastabend, intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
Previously i40e_alloc_rx_buffers() was called for every 32 cleaned
buffers. For multi-buffers this may not be optimal as there may be more
cleaned buffers in each i40e_clean_rx_irq() call. So this is now pulled
out of the loop and moved to the end of i40e_clean_rx_irq().
As a consequence instead of counting the number of buffers to be cleaned,
I40E_DESC_UNUSED() can be used to call i40e_alloc_rx_buffers().
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e01bcc91a196..dc9dc0acdd37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2425,7 +2425,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
unsigned int *rx_cleaned)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
- u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
unsigned int offset = rx_ring->rx_offset;
struct sk_buff *skb = rx_ring->skb;
u16 ntp = rx_ring->next_to_process;
@@ -2450,13 +2449,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
unsigned int size;
u64 qword;
- /* return some buffers to hardware, one at a time is too slow */
- if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- failure = failure ||
- i40e_alloc_rx_buffers(rx_ring, cleaned_count);
- cleaned_count = 0;
- }
-
rx_desc = I40E_RX_DESC(rx_ring, ntp);
/* status_error_len will always be zero for unused descriptors
@@ -2479,7 +2471,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
rx_buffer = i40e_rx_bi(rx_ring, ntp);
I40E_INC_NEXT(ntp, ntc, rmax);
i40e_reuse_rx_page(rx_ring, rx_buffer);
- cleaned_count++;
continue;
}
@@ -2531,7 +2522,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
}
i40e_put_rx_buffer(rx_ring, rx_buffer);
- cleaned_count++;
I40E_INC_NEXT(ntp, ntc, rmax);
if (i40e_is_non_eop(rx_ring, rx_desc))
@@ -2558,6 +2548,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
rx_ring->next_to_process = ntp;
rx_ring->next_to_clean = ntc;
+ failure = i40e_alloc_rx_buffers(rx_ring, I40E_DESC_UNUSED(rx_ring));
+
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
rx_ring->skb = skb;
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH intel-next 4/5] i40e: pull out rx buffer allocation to end of i40e_clean_rx_irq()
2022-12-13 10:50 ` [PATCH intel-next 4/5] i40e: pull out rx buffer allocation to end of i40e_clean_rx_irq() Tirthendu Sarkar
@ 2022-12-13 16:09 ` Alexander H Duyck
0 siblings, 0 replies; 10+ messages in thread
From: Alexander H Duyck @ 2022-12-13 16:09 UTC (permalink / raw)
To: Tirthendu Sarkar, tirtha, jesse.brandeburg, anthony.l.nguyen,
davem, edumazet, kuba, pabeni, ast, daniel, hawk, john.fastabend,
intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
On Tue, 2022-12-13 at 16:20 +0530, Tirthendu Sarkar wrote:
> Previously i40e_alloc_rx_buffers() was called for every 32 cleaned
> buffers. For multi-buffers this may not be optimal as there may be more
> cleaned buffers in each i40e_clean_rx_irq() call. So this is now pulled
> out of the loop and moved to the end of i40e_clean_rx_irq().
>
> As a consequence instead of counting the number of buffers to be cleaned,
> I40E_DESC_UNUSED() can be used to call i40e_alloc_rx_buffers().
>
> Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
I suspect this will lead to performance issues on systems configured
with smaller ring sizes. Specifically with this change you are limiting
things to only allocating every 64 (NAPI_POLL_WEIGHT/budget) packets.
> ---
> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 ++----------
> 1 file changed, 2 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index e01bcc91a196..dc9dc0acdd37 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -2425,7 +2425,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
> unsigned int *rx_cleaned)
> {
> unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
> - u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
> unsigned int offset = rx_ring->rx_offset;
> struct sk_buff *skb = rx_ring->skb;
> u16 ntp = rx_ring->next_to_process;
> @@ -2450,13 +2449,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
> unsigned int size;
> u64 qword;
>
> - /* return some buffers to hardware, one at a time is too slow */
> - if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
> - failure = failure ||
> - i40e_alloc_rx_buffers(rx_ring, cleaned_count);
> - cleaned_count = 0;
> - }
> -
> rx_desc = I40E_RX_DESC(rx_ring, ntp);
>
> /* status_error_len will always be zero for unused descriptors
> @@ -2479,7 +2471,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
> rx_buffer = i40e_rx_bi(rx_ring, ntp);
> I40E_INC_NEXT(ntp, ntc, rmax);
> i40e_reuse_rx_page(rx_ring, rx_buffer);
> - cleaned_count++;
> continue;
> }
>
> @@ -2531,7 +2522,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
> }
>
> i40e_put_rx_buffer(rx_ring, rx_buffer);
> - cleaned_count++;
>
> I40E_INC_NEXT(ntp, ntc, rmax);
> if (i40e_is_non_eop(rx_ring, rx_desc))
> @@ -2558,6 +2548,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
> rx_ring->next_to_process = ntp;
> rx_ring->next_to_clean = ntc;
>
> + failure = i40e_alloc_rx_buffers(rx_ring, I40E_DESC_UNUSED(rx_ring));
> +
> i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
> rx_ring->skb = skb;
I am not a fan of this "failure" approach either. I hadn't noticed it
before but it is problematic. It would make much more sense to take an
approach similar to what we did for Tx where we kick the ring
periodically if it looks like it is stuck, in this case empty.
The problem is if you have memory allocation issues the last thing you
probably need is a NIC deciding to become memory hungry itself and
sticking in an allocation loop.
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH intel-next 5/5] i40e: add support for XDP multi-buffer Rx
2022-12-13 10:50 [PATCH intel-next 0/5] i40e: support XDP multi-buffer Tirthendu Sarkar
` (3 preceding siblings ...)
2022-12-13 10:50 ` [PATCH intel-next 4/5] i40e: pull out rx buffer allocation to end of i40e_clean_rx_irq() Tirthendu Sarkar
@ 2022-12-13 10:50 ` Tirthendu Sarkar
2022-12-13 15:58 ` [PATCH intel-next 0/5] i40e: support XDP multi-buffer Alexander H Duyck
5 siblings, 0 replies; 10+ messages in thread
From: Tirthendu Sarkar @ 2022-12-13 10:50 UTC (permalink / raw)
To: tirtha, jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
pabeni, ast, daniel, hawk, john.fastabend, intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
This patch adds multi-buffer support for the i40e_driver.
i40e_clean_rx_irq() is modified to collate all the buffers of a packet
before calling the XDP program. EOP check is done before getting rx_buffer
and 'next_to_process' is incremented for all non-EOP frags while
'next_to_clean' stays at the first descriptor of the packet. Once EOP is
encountered, xdp_buff is built for the packet along with all the frags.
New functions are added for building multi-buffer xdp and for post
processing of the buffers once the xdp prog has run. For XDP_PASS this
results in a skb with multiple fragments.
i40e_build_skb() builds the skb around xdp buffer that already contains
frags data. So i40e_add_rx_frag() helper function is now removed. Since
fields before 'dataref' in skb_shared_info are cleared during
napi_skb_build(), xdp_update_skb_shared_info() is called to set those.
For i40e_construct_skb(), all the frags data needs to be copied from
xdp_buffer's shared_skb_info to newly constructed skb's shared_skb_info.
This also means 'skb' does not need to be preserved across i40e_napi_poll()
calls and hence is removed from i40e_ring structure.
With this patchset the performance improves it by 1% to 3 % for 64B sized
packets depending on the type of action for xdp_rxq_info program from
samples/bpf/.
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 18 +-
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 331 +++++++++++++++-----
drivers/net/ethernet/intel/i40e/i40e_txrx.h | 8 -
3 files changed, 260 insertions(+), 97 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 95485b56d6c3..ce2bcc22f6cc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2922,8 +2922,10 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
if (i40e_enabled_xdp_vsi(vsi)) {
int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ struct bpf_prog *prog = READ_ONCE(vsi->xdp_prog);
- if (frame_size > i40e_max_xdp_frame_size(vsi))
+ if (!prog->aux->xdp_has_frags &&
+ frame_size > i40e_max_xdp_frame_size(vsi))
return -EINVAL;
}
@@ -13276,16 +13278,20 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
- int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
bool need_reset;
int i;
- /* Don't allow frames that span over multiple buffers */
- if (frame_size > vsi->rx_buf_len) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
- return -EINVAL;
+ if (prog && !prog->aux->xdp_has_frags) {
+ int fsz = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ /* Don't allow frames that span over multiple buffers if
+ * underlying XDP program does not support frags
+ */
+ if (fsz > vsi->rx_buf_len) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU is too large for linear frames and XDP prog does not support frags");
+ return -EINVAL;
+ }
}
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index dc9dc0acdd37..c67e71ff9b7c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -14,6 +14,8 @@
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
#define I40E_IDX_NEXT(n, max) { if (++(n) > (max)) n = 0; }
#define I40E_INC_NEXT(p, c, max) do { I40E_IDX_NEXT(p, max); c = p; } while (0)
+#define I40E_IS_FDIR(b) (!(b)->page)
+
/**
* i40e_fdir - Generate a Flow Director descriptor based on fdata
* @tx_ring: Tx ring to send buffer on
@@ -1479,9 +1481,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
- dev_kfree_skb(rx_ring->skb);
- rx_ring->skb = NULL;
-
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
@@ -2022,40 +2021,6 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
return true;
}
-/**
- * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: buffer containing page to add
- * @skb: sk_buff to place the data into
- * @size: packet length from rx_desc
- *
- * This function will add the data contained in rx_buffer->page to the skb.
- * It will just attach the page as a frag to the skb.
- *
- * The function will then update the page offset.
- **/
-static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- struct sk_buff *skb,
- unsigned int size)
-{
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
-#endif
-
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
- rx_buffer->page_offset, size, truesize);
-
- /* page is being used so we must update the page offset */
-#if (PAGE_SIZE < 8192)
- rx_buffer->page_offset ^= truesize;
-#else
- rx_buffer->page_offset += truesize;
-#endif
-}
-
/**
* i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
* @rx_ring: rx descriptor ring to transact packets on
@@ -2113,12 +2078,18 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
+ struct skb_shared_info *sinfo;
unsigned int headlen;
struct sk_buff *skb;
+ u32 nr_frags = 0;
/* prefetch first cache line of first page */
net_prefetch(xdp->data);
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@@ -2155,6 +2126,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
/* update all of the pointers */
size -= headlen;
if (size) {
+ if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
skb_add_rx_frag(skb, 0, rx_buffer->page,
rx_buffer->page_offset + headlen,
size, truesize);
@@ -2170,6 +2145,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
rx_buffer->pagecnt_bias++;
}
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ struct skb_shared_info *skinfo = skb_shinfo(skb);
+
+ memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
+ sizeof(skb_frag_t) * nr_frags);
+
+ xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
+ }
return skb;
}
@@ -2194,8 +2180,14 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif
+ struct skb_shared_info *sinfo;
struct sk_buff *skb;
+ u32 nr_frags;
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
* likely have a consumer accessing first few bytes of meta
@@ -2220,6 +2212,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
#else
rx_buffer->page_offset += truesize;
#endif
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
@@ -2408,6 +2405,163 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
}
}
+/**
+ * i40e_process_rx_buffers- Post XDP processing of buffers
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @ntc: first desc index of the packet
+ * @ntp: last desc index of the packet
+ * @xdp_res: Result of the XDP program
+ * @xdp: xdp_buff pointing to the data
+ * @esize: EOP data size
+ * @total_rx_bytes: Received bytes counter
+ **/
+static void i40e_process_rx_buffers(struct i40e_ring *rx_ring, u32 ntc,
+ u32 ntp, int xdp_res, struct xdp_buff *xdp,
+ u32 esize, u32 *total_rx_bytes)
+
+{
+ struct i40e_rx_buffer *rx_buffer = i40e_rx_bi(rx_ring, ntc);
+ bool buf_flip = (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR));
+ bool buf_put = (xdp_res != I40E_XDP_EXIT);
+ u32 size = rx_ring->rx_buf_len;
+ u16 rmax = rx_ring->count - 1;
+ u32 nr_frags = 0;
+
+ xdp_buff_clear_frags_flag(xdp);
+
+ if (xdp_res == I40E_XDP_PASS) {
+ buf_flip = true;
+ /* First buffer has already been flipped during skb build, now
+ * put it back on ring.
+ */
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
+ /* Flip the EOP buffer. It will be put back on ring after
+ * returning from i40e_process_rx_buffers().
+ */
+ i40e_rx_buffer_flip(rx_ring, i40e_rx_bi(rx_ring, ntp), esize);
+ nr_frags++;
+ goto next_buf;
+ }
+
+ while (1) {
+ if (nr_frags++ < MAX_SKB_FRAGS) {
+ if (buf_flip)
+ i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
+ else
+ rx_buffer->pagecnt_bias++;
+ if (buf_put)
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
+ } else {
+ xdp->data = NULL;
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
+ }
+next_buf:
+ I40E_IDX_NEXT(ntc, rmax);
+ if (ntc == ntp) {
+ if (buf_put && xdp->data)
+ *total_rx_bytes += nr_frags * size;
+ return;
+ }
+ rx_buffer = i40e_rx_bi(rx_ring, ntc);
+ if (I40E_IS_FDIR(rx_buffer))
+ goto next_buf;
+ }
+}
+
+/**
+ * i40e_build_xdp - Build XDP buffer from a Rx buffer
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @offset: rx_ring offset
+ * @rxb: Rx buffer to pull data from
+ * @size: size of data
+ * @xdp: xdp_buff pointing to the data
+ **/
+static void i40e_build_xdp(struct i40e_ring *rx_ring, u32 offset,
+ struct i40e_rx_buffer *rxb, u32 size,
+ struct xdp_buff *xdp)
+{
+ unsigned char *hard_start;
+
+ hard_start = page_address(rxb->page) + rxb->page_offset - offset;
+ xdp_prepare_buff(xdp, hard_start, offset, size, true);
+#if (PAGE_SIZE > 4096)
+ /* At larger PAGE_SIZE, frame_sz depend on len size */
+ xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size);
+#endif
+}
+
+/**
+ * i40e_set_skb_frag - set page, offset and size of a frag
+ * @frag: skb frag being set
+ * @rx_buffer: rx buffer to pull data from
+ * @size: size of the frag
+ * @xdp: xdp_buff pointing to the data
+ */
+static void i40e_set_skb_frag(skb_frag_t *frag,
+ struct i40e_rx_buffer *rx_buffer,
+ u32 size, struct xdp_buff *xdp)
+{
+ __skb_frag_set_page(frag, rx_buffer->page);
+ skb_frag_off_set(frag, rx_buffer->page_offset);
+ skb_frag_size_set(frag, size);
+
+ if (page_is_pfmemalloc(rx_buffer->page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+}
+
+/**
+ * i40e_build_xdp_mb - Build XDP buffer from multiple Rx buffers
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @offset: rx_ring offset
+ * @rx_buffer: EOP rx buffer to pull data from
+ * @eop_size: size of data for the last fragment. All preceding
+ * fragments will be of max rx_buffer size
+ * @ntc: first desc index of the packet
+ * @ntp: last desc index of the packet
+ * @xdp: xdp_buff pointing to the data
+ **/
+static int i40e_build_xdp_mb(struct i40e_ring *rx_ring, u32 offset,
+ struct i40e_rx_buffer *rx_buffer, u32 eop_size,
+ u32 ntc, u32 ntp, struct xdp_buff *xdp)
+{
+ u32 neop_size = rx_ring->rx_buf_len; /* size of non-EOP buffers */
+ u16 rmax = rx_ring->count - 1;
+ struct skb_shared_info *sinfo;
+ struct i40e_rx_buffer *rxb;
+ skb_frag_t *frag;
+ u32 num = 0;
+
+ rxb = i40e_get_rx_buffer(rx_ring, neop_size, ntc);
+ i40e_build_xdp(rx_ring, offset, rxb, neop_size, xdp);
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ frag = &sinfo->frags[0];
+ xdp_buff_set_frags_flag(xdp);
+
+ do {
+ I40E_IDX_NEXT(ntc, rmax);
+ if (ntc == ntp) {
+ i40e_set_skb_frag(frag, rx_buffer, eop_size, xdp);
+ sinfo->xdp_frags_size = num * neop_size + eop_size;
+ sinfo->nr_frags = num + 1;
+ return 0;
+ }
+ if (I40E_IS_FDIR(i40e_rx_bi(rx_ring, ntc)))
+ continue;
+ rxb = i40e_get_rx_buffer(rx_ring, neop_size, ntc);
+ i40e_set_skb_frag(frag, rxb, neop_size, xdp);
+ frag++;
+ num++;
+ } while (likely(num < MAX_SKB_FRAGS));
+
+ /* Unlikely error case: increase pagecnt_bias of the EOP buffer before
+ * putting it back on ring. For rest of the buffers this will be done
+ * in i40e_process_rx_buffers().
+ */
+ rx_buffer->pagecnt_bias++;
+
+ return -EOPNOTSUPP;
+}
+
/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
@@ -2426,7 +2580,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
unsigned int offset = rx_ring->rx_offset;
- struct sk_buff *skb = rx_ring->skb;
u16 ntp = rx_ring->next_to_process;
u16 ntc = rx_ring->next_to_clean;
u16 rmax = rx_ring->count - 1;
@@ -2446,6 +2599,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
+ struct sk_buff *skb;
unsigned int size;
u64 qword;
@@ -2469,7 +2623,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
rx_desc->raw.qword[0],
qword);
rx_buffer = i40e_rx_bi(rx_ring, ntp);
- I40E_INC_NEXT(ntp, ntc, rmax);
+ if (ntc == ntp)
+ I40E_INC_NEXT(ntp, ntc, rmax);
+ else
+ I40E_IDX_NEXT(ntp, rmax);
i40e_reuse_rx_page(rx_ring, rx_buffer);
continue;
}
@@ -2480,23 +2637,25 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
break;
i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
- rx_buffer = i40e_get_rx_buffer(rx_ring, size, ntp);
- /* retrieve a buffer from the ring */
- if (!skb) {
- unsigned char *hard_start;
-
- hard_start = page_address(rx_buffer->page) +
- rx_buffer->page_offset - offset;
- xdp_prepare_buff(&xdp, hard_start, offset, size, true);
- xdp_buff_clear_frags_flag(&xdp);
-#if (PAGE_SIZE > 4096)
- /* At larger PAGE_SIZE, frame_sz depend on len size */
- xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
-#endif
- xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog);
+ if (i40e_is_non_eop(rx_ring, rx_desc)) {
+ I40E_IDX_NEXT(ntp, rmax);
+ continue;
}
+ /* retrieve EOP buffer from the ring */
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size, ntp);
+
+ if (likely(ntc == ntp))
+ i40e_build_xdp(rx_ring, offset, rx_buffer, size, &xdp);
+ else
+ if (i40e_build_xdp_mb(rx_ring, offset, rx_buffer,
+ size, ntc, ntp, &xdp)) {
+ xdp_res = I40E_XDP_CONSUMED;
+ goto process_frags;
+ }
+ xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog);
+
if (xdp_res) {
if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
xdp_xmit |= xdp_res;
@@ -2504,46 +2663,53 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
} else {
rx_buffer->pagecnt_bias++;
}
- total_rx_bytes += size;
- total_rx_packets++;
- } else if (skb) {
- i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
- } else if (ring_uses_build_skb(rx_ring)) {
- skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
} else {
- skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
- }
+ struct i40e_rx_buffer *rxb = i40e_rx_bi(rx_ring, ntc);
- /* exit if we failed to retrieve a buffer */
- if (!xdp_res && !skb) {
- rx_ring->rx_stats.alloc_buff_failed++;
- rx_buffer->pagecnt_bias++;
- break;
- }
+ if (ring_uses_build_skb(rx_ring))
+ skb = i40e_build_skb(rx_ring, rxb, &xdp);
+ else
+ skb = i40e_construct_skb(rx_ring, rxb, &xdp);
- i40e_put_rx_buffer(rx_ring, rx_buffer);
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buff_failed++;
+ rx_buffer->pagecnt_bias++;
+ if (ntc == ntp)
+ break;
+ xdp_res = I40E_XDP_EXIT;
+ goto process_frags;
+ }
- I40E_INC_NEXT(ntp, ntc, rmax);
- if (i40e_is_non_eop(rx_ring, rx_desc))
- continue;
+ if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
+ xdp.data = NULL;
+ goto process_frags;
+ }
+ /* populate checksum, VLAN, and protocol */
+ i40e_process_skb_fields(rx_ring, rx_desc, skb);
- if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
- skb = NULL;
- continue;
+ i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
/* probably a little skewed due to removing CRC */
- total_rx_bytes += skb->len;
-
- /* populate checksum, VLAN, and protocol */
- i40e_process_skb_fields(rx_ring, rx_desc, skb);
-
- i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
- napi_gro_receive(&rx_ring->q_vector->napi, skb);
- skb = NULL;
+ total_rx_bytes += size;
/* update budget accounting */
total_rx_packets++;
+
+process_frags:
+ if (unlikely(xdp_buff_has_frags(&xdp))) {
+ i40e_process_rx_buffers(rx_ring, ntc, ntp, xdp_res,
+ &xdp, size, &total_rx_bytes);
+ if (xdp_res == I40E_XDP_EXIT) {
+ /* Roll back ntp to first desc on the packet */
+ ntp = ntc;
+ break;
+ }
+ }
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
+ I40E_INC_NEXT(ntp, ntc, rmax);
}
rx_ring->next_to_process = ntp;
rx_ring->next_to_clean = ntc;
@@ -2551,7 +2717,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
failure = i40e_alloc_rx_buffers(rx_ring, I40E_DESC_UNUSED(rx_ring));
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
- rx_ring->skb = skb;
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index c1b5013f5c9c..2809be8f23d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -385,14 +385,6 @@ struct i40e_ring {
struct rcu_head rcu; /* to avoid race on free */
u16 next_to_alloc;
- struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must
- * return before it sees the EOP for
- * the current packet, we save that skb
- * here and resume receiving this
- * packet the next time
- * i40e_clean_rx_ring_irq() is called
- * for this ring.
- */
struct i40e_channel *ch;
u16 rx_offset;
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH intel-next 0/5] i40e: support XDP multi-buffer
2022-12-13 10:50 [PATCH intel-next 0/5] i40e: support XDP multi-buffer Tirthendu Sarkar
` (4 preceding siblings ...)
2022-12-13 10:50 ` [PATCH intel-next 5/5] i40e: add support for XDP multi-buffer Rx Tirthendu Sarkar
@ 2022-12-13 15:58 ` Alexander H Duyck
2022-12-14 15:56 ` Sarkar, Tirthendu
5 siblings, 1 reply; 10+ messages in thread
From: Alexander H Duyck @ 2022-12-13 15:58 UTC (permalink / raw)
To: Tirthendu Sarkar, tirtha, jesse.brandeburg, anthony.l.nguyen,
davem, edumazet, kuba, pabeni, ast, daniel, hawk, john.fastabend,
intel-wired-lan
Cc: netdev, linux-kernel, bpf, magnus.karlsson, maciej.fijalkowski
On Tue, 2022-12-13 at 16:20 +0530, Tirthendu Sarkar wrote:
> This patchset adds multi-buffer support for XDP. The first four patches
> are prepatory patches while the fifth one contains actual multi-buffer
> changes.
>
> Tirthendu Sarkar (5):
> i40e: add pre-xdp page_count in rx_buffer
> i40e: avoid per buffer next_to_clean access from i40e_ring
> i40e: introduce next_to_process to i40e_ring
> i40e: pull out rx buffer allocation to end of i40e_clean_rx_irq()
> i40e: add support for XDP multi-buffer Rx
>
> drivers/net/ethernet/intel/i40e/i40e_main.c | 18 +-
> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 378 ++++++++++++++------
> drivers/net/ethernet/intel/i40e/i40e_txrx.h | 13 +-
> 3 files changed, 280 insertions(+), 129 deletions(-)
>
This approach seems kind of convoluted to me. Basically you are trying
to clean the ring without cleaning the ring in the cases where you
encounter a non EOP descriptor.
Why not just replace the skb pointer with an xdp_buff in the ring? Then
you just build an xdp_buff w/ frags and then convert it after after
i40e_is_non_eop? You should then still be able to use all the same page
counting tricks and the pages would just be dropped into the shared
info of an xdp_buff instead of an skb and function the same assuming
you have all the logic in place to clean them up correctly.
^ permalink raw reply [flat|nested] 10+ messages in thread* RE: [PATCH intel-next 0/5] i40e: support XDP multi-buffer
2022-12-13 15:58 ` [PATCH intel-next 0/5] i40e: support XDP multi-buffer Alexander H Duyck
@ 2022-12-14 15:56 ` Sarkar, Tirthendu
2022-12-14 17:16 ` Alexander Duyck
0 siblings, 1 reply; 10+ messages in thread
From: Sarkar, Tirthendu @ 2022-12-14 15:56 UTC (permalink / raw)
To: Alexander H Duyck, tirtha@gmail.com, Brandeburg, Jesse,
Nguyen, Anthony L, davem@davemloft.net, edumazet@google.com,
kuba@kernel.org, pabeni@redhat.com, ast@kernel.org,
daniel@iogearbox.net, hawk@kernel.org, john.fastabend@gmail.com,
intel-wired-lan@lists.osuosl.org
Cc: netdev@vger.kernel.org, bpf@vger.kernel.org, Karlsson, Magnus,
Fijalkowski, Maciej
> From: Alexander H Duyck <alexander.duyck@gmail.com>
> Sent: Tuesday, December 13, 2022 9:28 PM
>
> This approach seems kind of convoluted to me. Basically you are trying
> to clean the ring without cleaning the ring in the cases where you
> encounter a non EOP descriptor.
>
> Why not just replace the skb pointer with an xdp_buff in the ring? Then
> you just build an xdp_buff w/ frags and then convert it after after
> i40e_is_non_eop? You should then still be able to use all the same page
> counting tricks and the pages would just be dropped into the shared
> info of an xdp_buff instead of an skb and function the same assuming
> you have all the logic in place to clean them up correctly.
We have another approach similar to what you have suggested which sort
of is a bit cleaner but not free of a burden of getting the rx_buffer struct
back again for all of the packet frags post i40e_run_xdp() for recycling.
We will examine if that turns out to be better.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH intel-next 0/5] i40e: support XDP multi-buffer
2022-12-14 15:56 ` Sarkar, Tirthendu
@ 2022-12-14 17:16 ` Alexander Duyck
0 siblings, 0 replies; 10+ messages in thread
From: Alexander Duyck @ 2022-12-14 17:16 UTC (permalink / raw)
To: Sarkar, Tirthendu
Cc: tirtha@gmail.com, Brandeburg, Jesse, Nguyen, Anthony L,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, ast@kernel.org, daniel@iogearbox.net,
hawk@kernel.org, john.fastabend@gmail.com,
intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
bpf@vger.kernel.org, Karlsson, Magnus, Fijalkowski, Maciej
On Wed, Dec 14, 2022 at 7:56 AM Sarkar, Tirthendu
<tirthendu.sarkar@intel.com> wrote:
>
> > From: Alexander H Duyck <alexander.duyck@gmail.com>
> > Sent: Tuesday, December 13, 2022 9:28 PM
> >
> > This approach seems kind of convoluted to me. Basically you are trying
> > to clean the ring without cleaning the ring in the cases where you
> > encounter a non EOP descriptor.
> >
> > Why not just replace the skb pointer with an xdp_buff in the ring? Then
> > you just build an xdp_buff w/ frags and then convert it after after
> > i40e_is_non_eop? You should then still be able to use all the same page
> > counting tricks and the pages would just be dropped into the shared
> > info of an xdp_buff instead of an skb and function the same assuming
> > you have all the logic in place to clean them up correctly.
>
> We have another approach similar to what you have suggested which sort
> of is a bit cleaner but not free of a burden of getting the rx_buffer struct
> back again for all of the packet frags post i40e_run_xdp() for recycling.
> We will examine if that turns out to be better.
Sounds good. Keep in mind that there are multiple use cases for the
NIC so you don't want to optimize for the less likely to be used ones
such as XDP_DROP/XDP_ABORT over standard use cases such as simply
passing packets up to the network stack.
^ permalink raw reply [flat|nested] 10+ messages in thread