BPF List
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: bpf@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	andrii@kernel.org
Cc: netdev@vger.kernel.org, magnus.karlsson@intel.com,
	bjorn@kernel.org, maciej.fijalkowski@intel.com,
	echaudro@redhat.com, lorenzo@kernel.org, martin.lau@linux.dev,
	tirthendu.sarkar@intel.com, john.fastabend@gmail.com,
	horms@kernel.org
Subject: [PATCH v5 bpf 05/11] i40e: handle multi-buffer packets that are shrunk by xdp prog
Date: Mon, 22 Jan 2024 23:16:04 +0100	[thread overview]
Message-ID: <20240122221610.556746-6-maciej.fijalkowski@intel.com> (raw)
In-Reply-To: <20240122221610.556746-1-maciej.fijalkowski@intel.com>

From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>

XDP programs can shrink packets by calling the bpf_xdp_adjust_tail()
helper function. For multi-buffer packets this may lead to reduction of
frag count stored in skb_shared_info area of the xdp_buff struct. This
results in issues with the current handling of XDP_PASS and XDP_DROP
cases.

For XDP_PASS, currently skb is being built using frag count of
xdp_buffer before it was processed by XDP prog and thus will result in
an inconsistent skb when frag count gets reduced by XDP prog. To fix
this, get correct frag count while building the skb instead of using
pre-obtained frag count.

For XDP_DROP, current page recycling logic will not reuse the page but
instead will adjust the pagecnt_bias so that the page can be freed. This
again results in inconsistent behavior as the page refcnt has already
been changed by the helper while freeing the frag(s) as part of
shrinking the packet. To fix this, only adjust pagecnt_bias for buffers
that are stillpart of the packet post-xdp prog run.

Fixes: e213ced19bef ("i40e: add support for XDP multi-buffer Rx")
Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 40 ++++++++++++---------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b82df5bdfac0..377e05dc0c68 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2099,7 +2099,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 				  struct xdp_buff *xdp)
 {
-	u32 next = rx_ring->next_to_clean;
+	u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 next = rx_ring->next_to_clean, i = 0;
 	struct i40e_rx_buffer *rx_buffer;
 
 	xdp->flags = 0;
@@ -2112,10 +2113,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 		if (!rx_buffer->page)
 			continue;
 
-		if (xdp_res == I40E_XDP_CONSUMED)
-			rx_buffer->pagecnt_bias++;
-		else
+		if (xdp_res != I40E_XDP_CONSUMED)
 			i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+		else if (i++ <= nr_frags)
+			rx_buffer->pagecnt_bias++;
 
 		/* EOP buffer will be put in i40e_clean_rx_irq() */
 		if (next == rx_ring->next_to_process)
@@ -2129,20 +2130,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
  * i40e_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
  * @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
  *
  * This function allocates an skb.  It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
  * skb correctly.
  */
 static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
-					  struct xdp_buff *xdp,
-					  u32 nr_frags)
+					  struct xdp_buff *xdp)
 {
 	unsigned int size = xdp->data_end - xdp->data;
 	struct i40e_rx_buffer *rx_buffer;
+	struct skb_shared_info *sinfo;
 	unsigned int headlen;
 	struct sk_buff *skb;
+	u32 nr_frags = 0;
 
 	/* prefetch first cache line of first page */
 	net_prefetch(xdp->data);
@@ -2180,6 +2181,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	memcpy(__skb_put(skb, headlen), xdp->data,
 	       ALIGN(headlen, sizeof(long)));
 
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
 	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 	/* update all of the pointers */
 	size -= headlen;
@@ -2199,9 +2204,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	}
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
-		struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
 
-		sinfo = xdp_get_shared_info_from_buff(xdp);
 		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
 		       sizeof(skb_frag_t) * nr_frags);
 
@@ -2224,17 +2228,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
  * i40e_build_skb - Build skb around an existing buffer
  * @rx_ring: Rx descriptor ring to transact packets on
  * @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
  *
  * This function builds an skb around an existing Rx buffer, taking care
  * to set up the skb correctly and avoid any memcpy overhead.
  */
 static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
-				      struct xdp_buff *xdp,
-				      u32 nr_frags)
+				      struct xdp_buff *xdp)
 {
 	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo;
 	struct sk_buff *skb;
+	u32 nr_frags;
 
 	/* Prefetch first cache line of first page. If xdp->data_meta
 	 * is unused, this points exactly as xdp->data, otherwise we
@@ -2243,6 +2247,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 	 */
 	net_prefetch(xdp->data_meta);
 
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
 	/* build an skb around the page buffer */
 	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
 	if (unlikely(!skb))
@@ -2255,9 +2264,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 		skb_metadata_set(skb, metasize);
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
-		struct skb_shared_info *sinfo;
-
-		sinfo = xdp_get_shared_info_from_buff(xdp);
 		xdp_update_skb_shared_info(skb, nr_frags,
 					   sinfo->xdp_frags_size,
 					   nr_frags * xdp->frame_sz,
@@ -2602,9 +2608,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
 			total_rx_bytes += size;
 		} else {
 			if (ring_uses_build_skb(rx_ring))
-				skb = i40e_build_skb(rx_ring, xdp, nfrags);
+				skb = i40e_build_skb(rx_ring, xdp);
 			else
-				skb = i40e_construct_skb(rx_ring, xdp, nfrags);
+				skb = i40e_construct_skb(rx_ring, xdp);
 
 			/* drop if we failed to retrieve a buffer */
 			if (!skb) {
-- 
2.34.1


  parent reply	other threads:[~2024-01-22 22:16 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-22 22:15 [PATCH v5 bpf 00/11] net: bpf_xdp_adjust_tail() and Intel mbuf fixes Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 01/11] xsk: recycle buffer in case Rx queue was full Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 02/11] xsk: make xsk_buff_pool responsible for clearing xdp_buff::flags Maciej Fijalkowski
2024-01-24  8:20   ` Magnus Karlsson
2024-01-24 11:42     ` Maciej Fijalkowski
2024-01-24 11:49       ` Magnus Karlsson
2024-01-22 22:16 ` [PATCH v5 bpf 03/11] xsk: fix usage of multi-buffer BPF helpers for ZC XDP Maciej Fijalkowski
2024-01-24  1:53   ` Jakub Kicinski
2024-01-24 12:02     ` Maciej Fijalkowski
2024-01-24 16:53       ` Jakub Kicinski
2024-01-24 16:56         ` Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 04/11] ice: work on pre-XDP prog frag count Maciej Fijalkowski
2024-01-24  8:37   ` Magnus Karlsson
2024-01-24 14:05     ` Maciej Fijalkowski
2024-01-22 22:16 ` Maciej Fijalkowski [this message]
2024-01-22 22:16 ` [PATCH v5 bpf 06/11] ice: remove redundant xdp_rxq_info registration Maciej Fijalkowski
2024-01-24  8:39   ` Magnus Karlsson
2024-01-22 22:16 ` [PATCH v5 bpf 07/11] intel: xsk: initialize skb_frag_t::bv_offset in ZC drivers Maciej Fijalkowski
2024-01-24  8:44   ` Magnus Karlsson
2024-01-24 16:21     ` Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 08/11] ice: update xdp_rxq_info::frag_size for ZC enabled Rx queue Maciej Fijalkowski
2024-01-24  8:51   ` Magnus Karlsson
2024-01-24 13:58     ` Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 09/11] xdp: reflect tail increase for MEM_TYPE_XSK_BUFF_POOL Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 10/11] i40e: set xdp_rxq_info::frag_size Maciej Fijalkowski
2024-01-22 22:16 ` [PATCH v5 bpf 11/11] i40e: update xdp_rxq_info::frag_size for ZC enabled Rx queue Maciej Fijalkowski
2024-01-24  8:52   ` Magnus Karlsson
2024-01-24  8:58 ` [PATCH v5 bpf 00/11] net: bpf_xdp_adjust_tail() and Intel mbuf fixes Magnus Karlsson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240122221610.556746-6-maciej.fijalkowski@intel.com \
    --to=maciej.fijalkowski@intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=echaudro@redhat.com \
    --cc=horms@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=lorenzo@kernel.org \
    --cc=magnus.karlsson@intel.com \
    --cc=martin.lau@linux.dev \
    --cc=netdev@vger.kernel.org \
    --cc=tirthendu.sarkar@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox