[Intel-wired-lan] [PATCH iwl-next v3] e1000e: Avoid DMA re-mapping on RX copybreak

Intel-Wired-Lan Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matt Vollrath <tactii@gmail.com>
To: intel-wired-lan@osuosl.org
Cc: Matt Vollrath <tactii@gmail.com>,
	Aleksander Loktionov <aleksandr.loktionov@intel.com>,
	Paul Menzel <pmenzel@molgen.mpg.de>
Subject: [Intel-wired-lan] [PATCH iwl-next v3] e1000e: Avoid DMA re-mapping on RX copybreak
Date: Tue, 28 Apr 2026 21:43:25 -0400	[thread overview]
Message-ID: <20260429014325.19136-1-tactii@gmail.com> (raw)

This patch factors out DMA re-mapping for skbs which were recycled in
the RX path due to copybreak or errors. There is only one path out of
the e1000_clean_rx_irq() loop where the skb is consumed and DMA needs
to be re-mapped, so don't unmap it before checking the conditions.

The buffer allocation loop is adjusted to not assume that DMA is
unmapped, handling mapping errors gracefully.

On systems with IOMMU enabled, the cost of re-mapping DMA is greater
than the cost of copying data out of the ring buffer. When I use this
patch and configure e1000e with copybreak=2048, my system with IOMMU
completes RX roughly twice as fast under load.

Informal performance comparisons were based on Asus Gryphon Z97 which
includes an I218-V and with a Xeon E3-1240 v3 in the socket. ktime_get()
measurement was injected into e1000e_poll() wrapping the
adapter->clean_rx() call. The total time spent in clean_rx() was divided
by work_done to print the average time spent per buffer. iperf3 -R was
used to saturate the RX path and awk was used for statistics. Control
revision was set to 7.1-rc1 because iwl-next hadn't been updated yet.

  rev     | iommu | copybreak | samples | mean (ns) |   stdev
  7.1-rc1 |   off |         0 |    4748 |    453.72 |  155.82
  7.1-rc1 |   off |      2048 |    4743 |    554.83 |  103.67
  7.1-rc1 |    on |         0 |    4751 |   1139.22 |  150.56
* 7.1-rc1 |    on |      2048 |    4737 |   1267.02 |  184.62
   +patch |   off |         0 |    4739 |    456.30 |  146.33
   +patch |   off |      2048 |    4739 |    538.56 |  132.97
   +patch |    on |         0 |    4769 |   1165.97 |  140.19
*  +patch |    on |      2048 |    4745 |    562.25 |  171.80

No surprises here, IOMMU DMA ops are known to be expensive. For most
users the kernel default is iommu=on and driver default is
copybreak=256, so unless the workload is small packets, some tuning of
either knob would be needed to see the full benefit of this change.

The kludge of unconditional unmapping has existed since this driver was
introduced in 2007[1], inherited from the e1000 driver which has since
factored it out[2]. IOMMU tech was new at the time.

[1] Commit bc7f75fa9788 ("[E1000E]: New pci-express e1000 driver (currently for ICH9 devices only)")
[2] Commit 2b294b18689c ("e1000: perform copybreak ahead of DMA unmap")

Assisted-by: Claude:claude-4-7-opus
Signed-off-by: Matt Vollrath <tactii@gmail.com>
---
v3:
* refactor unmapping bypass, use goto instead of redundant branch
* remove Aleksandr's sign-off due to logic change
* benchmark details
* cite historic commits
v2:
* proofread description with Aleksandr
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 32 ++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 7ce0cc8ab8f4..62bf85c768d6 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -663,6 +663,8 @@ static void e1000_alloc_rx_buffers(struct e1000_ring *rx_ring,
 		skb = buffer_info->skb;
 		if (skb) {
 			skb_trim(skb, 0);
+			if (likely(buffer_info->dma))
+				goto write_desc;
 			goto map_skb;
 		}
 
@@ -680,10 +682,12 @@ static void e1000_alloc_rx_buffers(struct e1000_ring *rx_ring,
 						  DMA_FROM_DEVICE);
 		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
 			dev_err(&pdev->dev, "Rx DMA map failed\n");
+			buffer_info->dma = 0;
 			adapter->rx_dma_failed++;
 			break;
 		}
 
+write_desc:
 		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
 		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
 
@@ -941,7 +945,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
 		dma_rmb();	/* read descriptor and rx_buffer_info after status DD */
 
 		skb = buffer_info->skb;
-		buffer_info->skb = NULL;
 
 		prefetch(skb->data - NET_IP_ALIGN);
 
@@ -955,9 +958,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
 
 		cleaned = true;
 		cleaned_count++;
-		dma_unmap_single(&pdev->dev, buffer_info->dma,
-				 adapter->rx_buffer_len, DMA_FROM_DEVICE);
-		buffer_info->dma = 0;
 
 		length = le16_to_cpu(rx_desc->wb.upper.length);
 
@@ -973,8 +973,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
 		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
 			/* All receives must fit into a single buffer */
 			e_dbg("Receive packet consumed multiple buffers\n");
-			/* recycle */
-			buffer_info->skb = skb;
 			if (staterr & E1000_RXD_STAT_EOP)
 				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
 			goto next_desc;
@@ -982,8 +980,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
 
 		if (unlikely((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
 			     !(netdev->features & NETIF_F_RXALL))) {
-			/* recycle */
-			buffer_info->skb = skb;
 			goto next_desc;
 		}
 
@@ -1010,19 +1006,33 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
 			struct sk_buff *new_skb =
 				napi_alloc_skb(&adapter->napi, length);
 			if (new_skb) {
+				dma_sync_single_for_cpu(&pdev->dev,
+							buffer_info->dma,
+							adapter->rx_buffer_len,
+							DMA_FROM_DEVICE);
 				skb_copy_to_linear_data_offset(new_skb,
 							       -NET_IP_ALIGN,
 							       (skb->data -
 								NET_IP_ALIGN),
 							       (length +
 								NET_IP_ALIGN));
-				/* save the skb in buffer_info as good */
-				buffer_info->skb = skb;
+				dma_sync_single_for_device(&pdev->dev,
+							   buffer_info->dma,
+							   adapter->rx_buffer_len,
+							   DMA_FROM_DEVICE);
 				skb = new_skb;
+				goto copybreak_done;
 			}
 			/* else just continue with the old one */
 		}
-		/* end copybreak code */
+
+		buffer_info->skb = NULL;
+		dma_unmap_single(&pdev->dev, buffer_info->dma,
+				 adapter->rx_buffer_len,
+				 DMA_FROM_DEVICE);
+		buffer_info->dma = 0;
+
+copybreak_done:
 		skb_put(skb, length);
 
 		/* Receive Checksum Offload */
-- 
2.43.0

next             reply	other threads:[~2026-04-29  1:43 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-29  1:43 Matt Vollrath [this message]
2026-04-29  9:18 ` [Intel-wired-lan] [PATCH iwl-next v3] e1000e: Avoid DMA re-mapping on RX copybreak Loktionov, Aleksandr
2026-05-17  6:59   ` Cohen, MichalX
2026-04-29 10:32 ` Paul Menzel

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7ce0cc8ab8f dfblob:62bf85c768d )
 OR (
bs:"[Intel-wired-lan] [PATCH iwl-next v3] e1000e: Avoid DMA re-mapping on RX copybreak" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260429014325.19136-1-tactii@gmail.com \
    --to=tactii@gmail.com \
    --cc=aleksandr.loktionov@intel.com \
    --cc=intel-wired-lan@osuosl.org \
    --cc=pmenzel@molgen.mpg.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox