* [PATCH v2 1/4] i40e: clean zero-copy XDP Tx ring on shutdown/reset
2018-09-07 8:18 [PATCH v2 0/4] i40e AF_XDP zero-copy buffer leak fixes Björn Töpel
@ 2018-09-07 8:18 ` Björn Töpel
2018-09-21 7:35 ` [Intel-wired-lan] " Björn Töpel
2018-09-07 8:18 ` [PATCH v2 2/4] net: xsk: add a simple buffer reuse queue Björn Töpel
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Björn Töpel @ 2018-09-07 8:18 UTC (permalink / raw)
To: ast, daniel, jeffrey.t.kirsher, intel-wired-lan, jakub.kicinski
Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, netdev
From: Björn Töpel <bjorn.topel@intel.com>
When the zero-copy enabled XDP Tx ring is torn down, due to
configuration changes, outstandning frames on the hardware descriptor
ring are queued on the completion ring.
The completion ring has a back-pressure mechanism that will guarantee
that there is sufficient space on the ring.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 +++++++----
.../ethernet/intel/i40e/i40e_txrx_common.h | 2 ++
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 30 +++++++++++++++++++
3 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 37bd4e50ccde..7f85d4ba8b54 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -636,13 +636,18 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
unsigned long bi_size;
u16 i;
- /* ring already cleared, nothing to do */
- if (!tx_ring->tx_bi)
- return;
+ if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+ i40e_xsk_clean_tx_ring(tx_ring);
+ } else {
+ /* ring already cleared, nothing to do */
+ if (!tx_ring->tx_bi)
+ return;
- /* Free all the Tx ring sk_buffs */
- for (i = 0; i < tx_ring->count; i++)
- i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
+ /* Free all the Tx ring sk_buffs */
+ for (i = 0; i < tx_ring->count; i++)
+ i40e_unmap_and_free_tx_resource(tx_ring,
+ &tx_ring->tx_bi[i]);
+ }
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_bi, 0, bi_size);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index b5afd479a9c5..29c68b29d36f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -87,4 +87,6 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
}
}
+void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
+
#endif /* I40E_TXRX_COMMON_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 2ebfc78bbd09..99116277c4d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -830,3 +830,33 @@ int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
return 0;
}
+
+/**
+ * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
+ * @xdp_ring: XDP Tx ring
+ **/
+void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
+{
+ u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+ struct xdp_umem *umem = tx_ring->xsk_umem;
+ struct i40e_tx_buffer *tx_bi;
+ u32 xsk_frames = 0;
+
+ while (ntc != ntu) {
+ tx_bi = &tx_ring->tx_bi[ntc];
+
+ if (tx_bi->xdpf)
+ i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
+ else
+ xsk_frames++;
+
+ tx_bi->xdpf = NULL;
+
+ ntc++;
+ if (ntc > tx_ring->count)
+ ntc = 0;
+ }
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(umem, xsk_frames);
+}
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Intel-wired-lan] [PATCH v2 1/4] i40e: clean zero-copy XDP Tx ring on shutdown/reset
2018-09-07 8:18 ` [PATCH v2 1/4] i40e: clean zero-copy XDP Tx ring on shutdown/reset Björn Töpel
@ 2018-09-21 7:35 ` Björn Töpel
2018-09-21 18:17 ` Jeff Kirsher
0 siblings, 1 reply; 7+ messages in thread
From: Björn Töpel @ 2018-09-21 7:35 UTC (permalink / raw)
To: Jeff Kirsher, intel-wired-lan
Cc: Netdev, Björn Töpel, Magnus Karlsson, Karlsson, Magnus,
Jakub Kicinski, Daniel Borkmann, ast
Jeff,
Den fre 7 sep. 2018 kl 10:29 skrev Björn Töpel <bjorn.topel@gmail.com>:
>
> From: Björn Töpel <bjorn.topel@intel.com>
>
> When the zero-copy enabled XDP Tx ring is torn down, due to
> configuration changes, outstandning frames on the hardware descriptor
> ring are queued on the completion ring.
>
> The completion ring has a back-pressure mechanism that will guarantee
> that there is sufficient space on the ring.
>
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 +++++++----
> .../ethernet/intel/i40e/i40e_txrx_common.h | 2 ++
> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 30 +++++++++++++++++++
> 3 files changed, 43 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index 37bd4e50ccde..7f85d4ba8b54 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -636,13 +636,18 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
> unsigned long bi_size;
> u16 i;
>
> - /* ring already cleared, nothing to do */
> - if (!tx_ring->tx_bi)
> - return;
> + if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
> + i40e_xsk_clean_tx_ring(tx_ring);
> + } else {
> + /* ring already cleared, nothing to do */
> + if (!tx_ring->tx_bi)
> + return;
>
> - /* Free all the Tx ring sk_buffs */
> - for (i = 0; i < tx_ring->count; i++)
> - i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
> + /* Free all the Tx ring sk_buffs */
> + for (i = 0; i < tx_ring->count; i++)
> + i40e_unmap_and_free_tx_resource(tx_ring,
> + &tx_ring->tx_bi[i]);
> + }
>
> bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
> memset(tx_ring->tx_bi, 0, bi_size);
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
> index b5afd479a9c5..29c68b29d36f 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
> @@ -87,4 +87,6 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
> }
> }
>
> +void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
> +
> #endif /* I40E_TXRX_COMMON_ */
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> index 2ebfc78bbd09..99116277c4d2 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> @@ -830,3 +830,33 @@ int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
>
> return 0;
> }
> +
> +/**
> + * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
> + * @xdp_ring: XDP Tx ring
> + **/
> +void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
> +{
> + u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
> + struct xdp_umem *umem = tx_ring->xsk_umem;
> + struct i40e_tx_buffer *tx_bi;
> + u32 xsk_frames = 0;
> +
> + while (ntc != ntu) {
> + tx_bi = &tx_ring->tx_bi[ntc];
> +
> + if (tx_bi->xdpf)
> + i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
> + else
> + xsk_frames++;
> +
> + tx_bi->xdpf = NULL;
> +
> + ntc++;
> + if (ntc > tx_ring->count)
This is an off-by-one error, and should be:
if (ntc == tx_ring->count)
Can you fix it up, or should I respin the patch?
Thanks!
Björn
> + ntc = 0;
> + }
> +
> + if (xsk_frames)
> + xsk_umem_complete_tx(umem, xsk_frames);
> +}
> --
> 2.17.1
>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Intel-wired-lan] [PATCH v2 1/4] i40e: clean zero-copy XDP Tx ring on shutdown/reset
2018-09-21 7:35 ` [Intel-wired-lan] " Björn Töpel
@ 2018-09-21 18:17 ` Jeff Kirsher
0 siblings, 0 replies; 7+ messages in thread
From: Jeff Kirsher @ 2018-09-21 18:17 UTC (permalink / raw)
To: Björn Töpel, intel-wired-lan
Cc: Netdev, Björn Töpel, Magnus Karlsson, Karlsson, Magnus,
Jakub Kicinski, Daniel Borkmann, ast
[-- Attachment #1: Type: text/plain, Size: 1272 bytes --]
On Fri, 2018-09-21 at 09:35 +0200, Björn Töpel wrote:
> > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > @@ -830,3 +830,33 @@ int i40e_xsk_async_xmit(struct net_device
> > *dev, u32 queue_id)
> >
> > return 0;
> > }
> > +
> > +/**
> > + * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
> > + * @xdp_ring: XDP Tx ring
> > + **/
> > +void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
> > +{
> > + u16 ntc = tx_ring->next_to_clean, ntu = tx_ring-
> > >next_to_use;
> > + struct xdp_umem *umem = tx_ring->xsk_umem;
> > + struct i40e_tx_buffer *tx_bi;
> > + u32 xsk_frames = 0;
> > +
> > + while (ntc != ntu) {
> > + tx_bi = &tx_ring->tx_bi[ntc];
> > +
> > + if (tx_bi->xdpf)
> > + i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
> > + else
> > + xsk_frames++;
> > +
> > + tx_bi->xdpf = NULL;
> > +
> > + ntc++;
> > + if (ntc > tx_ring->count)
>
> This is an off-by-one error, and should be:
> if (ntc == tx_ring->count)
>
> Can you fix it up, or should I respin the patch?
I can fix it up.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/4] net: xsk: add a simple buffer reuse queue
2018-09-07 8:18 [PATCH v2 0/4] i40e AF_XDP zero-copy buffer leak fixes Björn Töpel
2018-09-07 8:18 ` [PATCH v2 1/4] i40e: clean zero-copy XDP Tx ring on shutdown/reset Björn Töpel
@ 2018-09-07 8:18 ` Björn Töpel
2018-09-07 8:18 ` [PATCH v2 3/4] i40e: clean zero-copy XDP Rx ring on shutdown/reset Björn Töpel
2018-09-07 8:18 ` [PATCH v2 4/4] i40e: disallow changing the number of descriptors when AF_XDP is on Björn Töpel
3 siblings, 0 replies; 7+ messages in thread
From: Björn Töpel @ 2018-09-07 8:18 UTC (permalink / raw)
To: ast, daniel, jeffrey.t.kirsher, intel-wired-lan, jakub.kicinski
Cc: magnus.karlsson, magnus.karlsson, netdev
From: Jakub Kicinski <jakub.kicinski@netronome.com>
XSK UMEM is strongly single producer single consumer so reuse of
frames is challenging. Add a simple "stash" of FILL packets to
reuse for drivers to optionally make use of. This is useful
when driver has to free (ndo_stop) or resize a ring with an active
AF_XDP ZC socket.
v2: Fixed build issues for !CONFIG_XDP_SOCKETS.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
include/net/xdp_sock.h | 69 ++++++++++++++++++++++++++++++++++++++++++
net/xdp/xdp_umem.c | 2 ++
net/xdp/xsk_queue.c | 55 +++++++++++++++++++++++++++++++++
net/xdp/xsk_queue.h | 3 ++
4 files changed, 129 insertions(+)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 932ca0dad6f3..70a115bea4f4 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -21,6 +21,12 @@ struct xdp_umem_page {
dma_addr_t dma;
};
+struct xdp_umem_fq_reuse {
+ u32 nentries;
+ u32 length;
+ u64 handles[];
+};
+
struct xdp_umem {
struct xsk_queue *fq;
struct xsk_queue *cq;
@@ -37,6 +43,7 @@ struct xdp_umem {
struct page **pgs;
u32 npgs;
struct net_device *dev;
+ struct xdp_umem_fq_reuse *fq_reuse;
u16 queue_id;
bool zc;
spinlock_t xsk_list_lock;
@@ -75,6 +82,10 @@ void xsk_umem_discard_addr(struct xdp_umem *umem);
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+ struct xdp_umem_fq_reuse *newq);
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
@@ -85,6 +96,35 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
}
+
+/* Reuse-queue aware version of FILL queue helpers */
+static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+{
+ struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+ if (!rq->length)
+ return xsk_umem_peek_addr(umem, addr);
+
+ *addr = rq->handles[rq->length - 1];
+ return addr;
+}
+
+static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
+{
+ struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+ if (!rq->length)
+ xsk_umem_discard_addr(umem);
+ else
+ rq->length--;
+}
+
+static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
+{
+ struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+ rq->handles[rq->length++] = addr;
+}
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
@@ -128,6 +168,21 @@ static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
{
}
+static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+ return NULL;
+}
+
+static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
+ struct xdp_umem *umem,
+ struct xdp_umem_fq_reuse *newq)
+{
+ return NULL;
+}
+static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+}
+
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return NULL;
@@ -137,6 +192,20 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return 0;
}
+
+static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+{
+ return NULL;
+}
+
+static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
+{
+}
+
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index b3b632c5aeae..555427b3e0fe 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -165,6 +165,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
+ xsk_reuseq_destroy(umem);
+
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 2dc1384d9f27..b66504592d9b 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -3,7 +3,9 @@
* Copyright(c) 2018 Intel Corporation.
*/
+#include <linux/log2.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
#include "xsk_queue.h"
@@ -62,3 +64,56 @@ void xskq_destroy(struct xsk_queue *q)
page_frag_free(q->ring);
kfree(q);
}
+
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+ struct xdp_umem_fq_reuse *newq;
+
+ /* Check for overflow */
+ if (nentries > (u32)roundup_pow_of_two(nentries))
+ return NULL;
+ nentries = roundup_pow_of_two(nentries);
+
+ newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
+ if (!newq)
+ return NULL;
+ memset(newq, 0, offsetof(typeof(*newq), handles));
+
+ newq->nentries = nentries;
+ return newq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
+
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+ struct xdp_umem_fq_reuse *newq)
+{
+ struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
+
+ if (!oldq) {
+ umem->fq_reuse = newq;
+ return NULL;
+ }
+
+ if (newq->nentries < oldq->length)
+ return newq;
+
+ memcpy(newq->handles, oldq->handles,
+ array_size(oldq->length, sizeof(u64)));
+ newq->length = oldq->length;
+
+ umem->fq_reuse = newq;
+ return oldq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
+
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+ kvfree(rq);
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_free);
+
+void xsk_reuseq_destroy(struct xdp_umem *umem)
+{
+ xsk_reuseq_free(umem->fq_reuse);
+ umem->fq_reuse = NULL;
+}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 82252cccb4e0..bcb5cbb40419 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -258,4 +258,7 @@ void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
+/* Executed by the core when the entire UMEM gets freed */
+void xsk_reuseq_destroy(struct xdp_umem *umem);
+
#endif /* _LINUX_XSK_QUEUE_H */
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 3/4] i40e: clean zero-copy XDP Rx ring on shutdown/reset
2018-09-07 8:18 [PATCH v2 0/4] i40e AF_XDP zero-copy buffer leak fixes Björn Töpel
2018-09-07 8:18 ` [PATCH v2 1/4] i40e: clean zero-copy XDP Tx ring on shutdown/reset Björn Töpel
2018-09-07 8:18 ` [PATCH v2 2/4] net: xsk: add a simple buffer reuse queue Björn Töpel
@ 2018-09-07 8:18 ` Björn Töpel
2018-09-07 8:18 ` [PATCH v2 4/4] i40e: disallow changing the number of descriptors when AF_XDP is on Björn Töpel
3 siblings, 0 replies; 7+ messages in thread
From: Björn Töpel @ 2018-09-07 8:18 UTC (permalink / raw)
To: ast, daniel, jeffrey.t.kirsher, intel-wired-lan, jakub.kicinski
Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, netdev
From: Björn Töpel <bjorn.topel@intel.com>
Outstanding Rx descriptors are temporarily stored on a stash/reuse
queue. When/if the HW rings comes up again, entries from the stash are
used to re-populate the ring.
The latter required some restructuring of the allocation scheme for
the AF_XDP zero-copy implementation. There is now a fast, and a slow
allocation. The "fast allocation" is used from the fast-path and
obtains free buffers from the fill ring and the internal recycle
mechanism. The "slow allocation" is only used in ring setup, and
obtains buffers from the fill ring and the stash (if any).
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 4 +-
.../ethernet/intel/i40e/i40e_txrx_common.h | 1 +
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 100 ++++++++++++++++--
3 files changed, 96 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 7f85d4ba8b54..740ea58ba938 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1355,8 +1355,10 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->skb = NULL;
}
- if (rx_ring->xsk_umem)
+ if (rx_ring->xsk_umem) {
+ i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
+ }
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 29c68b29d36f..8d46acff6f2e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -87,6 +87,7 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
}
}
+void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
#endif /* I40E_TXRX_COMMON_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 99116277c4d2..e4b62e871afc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -140,6 +140,7 @@ static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid)
{
+ struct xdp_umem_fq_reuse *reuseq;
bool if_running;
int err;
@@ -156,6 +157,12 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
return -EBUSY;
}
+ reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+ if (!reuseq)
+ return -ENOMEM;
+
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
err = i40e_xsk_umem_dma_map(vsi, umem);
if (err)
return err;
@@ -353,16 +360,46 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
}
/**
- * i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
+ * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer
* @rx_ring: Rx ring
- * @count: The number of buffers to allocate
+ * @bi: Rx buffer to populate
*
- * This function allocates a number of Rx buffers and places them on
- * the Rx ring.
+ * This function allocates an Rx buffer. The buffer can come from fill
+ * queue, or via the reuse queue.
*
* Returns true for a successful allocation, false otherwise
**/
-bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
+static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *bi)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ u64 handle, hr;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ handle &= rx_ring->xsk_umem->chunk_mask;
+
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ bi->dma = xdp_umem_get_dma(umem, handle);
+ bi->dma += hr;
+
+ bi->addr = xdp_umem_get_data(umem, handle);
+ bi->addr += hr;
+
+ bi->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr_rq(umem);
+ return true;
+}
+
+static __always_inline bool __i40e_alloc_rx_buffers_zc(
+ struct i40e_ring *rx_ring, u16 count,
+ bool alloc(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *bi))
{
u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
@@ -372,7 +409,7 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
rx_desc = I40E_RX_DESC(rx_ring, ntu);
bi = &rx_ring->rx_bi[ntu];
do {
- if (!i40e_alloc_buffer_zc(rx_ring, bi)) {
+ if (!alloc(rx_ring, bi)) {
ok = false;
goto no_buffers;
}
@@ -404,6 +441,38 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
return ok;
}
+/**
+ * i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the reuse queue
+ * or fill ring and places them on the Rx ring.
+ *
+ * Returns true for a successful allocation, false otherwise
+ **/
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
+{
+ return __i40e_alloc_rx_buffers_zc(rx_ring, count,
+ i40e_alloc_buffer_slow_zc);
+}
+
+/**
+ * i40e_alloc_rx_buffers_fast_zc - Allocates a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns true for a successful allocation, false otherwise
+ **/
+static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
+{
+ return __i40e_alloc_rx_buffers_zc(rx_ring, count,
+ i40e_alloc_buffer_zc);
+}
+
/**
* i40e_get_rx_buffer_zc - Return the current Rx buffer
* @rx_ring: Rx ring
@@ -571,8 +640,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
failure = failure ||
- !i40e_alloc_rx_buffers_zc(rx_ring,
- cleaned_count);
+ !i40e_alloc_rx_buffers_fast_zc(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -831,6 +900,21 @@ int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
return 0;
}
+void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
+{
+ u16 i;
+
+ for (i = 0; i < rx_ring->count; i++) {
+ struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+
+ if (!rx_bi->addr)
+ continue;
+
+ xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_bi->handle);
+ rx_bi->addr = NULL;
+ }
+}
+
/**
* i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
* @xdp_ring: XDP Tx ring
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 4/4] i40e: disallow changing the number of descriptors when AF_XDP is on
2018-09-07 8:18 [PATCH v2 0/4] i40e AF_XDP zero-copy buffer leak fixes Björn Töpel
` (2 preceding siblings ...)
2018-09-07 8:18 ` [PATCH v2 3/4] i40e: clean zero-copy XDP Rx ring on shutdown/reset Björn Töpel
@ 2018-09-07 8:18 ` Björn Töpel
3 siblings, 0 replies; 7+ messages in thread
From: Björn Töpel @ 2018-09-07 8:18 UTC (permalink / raw)
To: ast, daniel, jeffrey.t.kirsher, intel-wired-lan, jakub.kicinski
Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, netdev
From: Björn Töpel <bjorn.topel@intel.com>
When an AF_XDP UMEM is attached to any of the Rx rings, we disallow a
user to change the number of descriptors via e.g. "ethtool -G IFNAME".
Otherwise, the size of the stash/reuse queue can grow unbounded, which
would result in OOM or leaking userspace buffers.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
.../net/ethernet/intel/i40e/i40e_ethtool.c | 9 +++++++-
.../ethernet/intel/i40e/i40e_txrx_common.h | 1 +
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 22 +++++++++++++++++++
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index d7d3974beca2..3cd2c88c72f8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -5,7 +5,7 @@
#include "i40e.h"
#include "i40e_diag.h"
-
+#include "i40e_txrx_common.h"
#include "i40e_ethtool_stats.h"
#define I40E_PF_STAT(_name, _stat) \
@@ -1493,6 +1493,13 @@ static int i40e_set_ringparam(struct net_device *netdev,
(new_rx_count == vsi->rx_rings[0]->count))
return 0;
+ /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ * disallow changing the number of descriptors -- regardless
+ * if the netdev is running or not.
+ */
+ if (i40e_xsk_any_rx_ring_enabled(vsi))
+ return -EBUSY;
+
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
timeout--;
if (!timeout)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 8d46acff6f2e..09809dffe399 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -89,5 +89,6 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
+bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
#endif /* I40E_TXRX_COMMON_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index e4b62e871afc..119f59ec7cc0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -944,3 +944,25 @@ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
if (xsk_frames)
xsk_umem_complete_tx(umem, xsk_frames);
}
+
+/**
+ * i40e_xsk_any_rx_ring_enabled - Checks whether any of the Rx rings
+ * has AF_XDP UMEM attached
+ * @vsi: vsi
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ **/
+bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
+{
+ int i;
+
+ if (!vsi->xsk_umems)
+ return false;
+
+ for (i = 0; i < vsi->num_queue_pairs; i++) {
+ if (vsi->xsk_umems[i])
+ return true;
+ }
+
+ return false;
+}
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread