All of lore.kernel.org
 help / color / mirror / Atom feed
From: Feifei Wang <feifei.wang2@arm.com>
To: Beilei Xing <beilei.xing@intel.com>,
	Bruce Richardson <bruce.richardson@intel.com>,
	Konstantin Ananyev <konstantin.ananyev@intel.com>,
	Ruifeng Wang <ruifeng.wang@arm.com>
Cc: dev@dpdk.org, nd@arm.com, Feifei Wang <feifei.wang2@arm.com>,
	Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Subject: [PATCH v1 2/5] net/i40e: enable direct rearm mode
Date: Wed, 20 Apr 2022 16:16:47 +0800	[thread overview]
Message-ID: <20220420081650.2043183-3-feifei.wang2@arm.com> (raw)
In-Reply-To: <20220420081650.2043183-1-feifei.wang2@arm.com>

For i40e driver, enable direct re-arm mode. This patch supports the case
of mapping Rx/Tx queues from the same single lcore.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 drivers/net/i40e/i40e_rxtx.h            |   4 +
 drivers/net/i40e/i40e_rxtx_common_avx.h | 269 ++++++++++++++++++++++++
 drivers/net/i40e/i40e_rxtx_vec_avx2.c   |  14 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 249 +++++++++++++++++++++-
 drivers/net/i40e/i40e_rxtx_vec_neon.c   | 141 ++++++++++++-
 drivers/net/i40e/i40e_rxtx_vec_sse.c    | 170 ++++++++++++++-
 6 files changed, 839 insertions(+), 8 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 5e6eecc501..1fdf4305f4 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -102,6 +102,8 @@ struct i40e_rx_queue {
 
 	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
 	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
+	uint16_t direct_rxrearm_port; /** device TX port ID for direct re-arm mode */
+	uint16_t direct_rxrearm_queue; /** TX queue index for direct re-arm mode */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 
 	uint16_t port_id; /**< device port ID */
@@ -121,6 +123,8 @@ struct i40e_rx_queue {
 	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
 	uint8_t dcb_tc;         /**< Traffic class of rx queue */
 	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
+	/**<  0 if direct re-arm mode disabled, 1 when enabled */
+	bool direct_rxrearm_enable;
 	const struct rte_memzone *mz;
 };
 
diff --git a/drivers/net/i40e/i40e_rxtx_common_avx.h b/drivers/net/i40e/i40e_rxtx_common_avx.h
index cfc1e63173..a742723e07 100644
--- a/drivers/net/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/i40e/i40e_rxtx_common_avx.h
@@ -209,6 +209,275 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 	/* Update the tail pointer on the NIC */
 	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
+
+static __rte_always_inline void
+i40e_rxq_direct_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_tx_queue *txq;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_tx_entry *txep;
+	struct i40e_rx_entry *rxep;
+	struct rte_mbuf *m[RTE_I40E_RXQ_REARM_THRESH];
+	uint16_t tx_port_id, tx_queue_id;
+	uint16_t rx_id;
+	uint16_t i, n;
+	uint16_t nb_rearm = 0;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	tx_port_id = rxq->direct_rxrearm_port;
+	tx_queue_id = rxq->direct_rxrearm_queue;
+	dev = &rte_eth_devices[tx_port_id];
+	txq = dev->data->tx_queues[tx_queue_id];
+
+	/* check Rx queue is able to take in the whole
+	 * batch of free mbufs from Tx queue
+	 */
+	if (rxq->rxrearm_nb > txq->tx_rs_thresh) {
+		/* check DD bits on threshold descriptor */
+		if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+				rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) {
+			goto mempool_bulk;
+		}
+
+		if (txq->tx_rs_thresh != RTE_I40E_RXQ_REARM_THRESH)
+			goto mempool_bulk;
+
+		n = txq->tx_rs_thresh;
+
+		/* first buffer to free from S/W ring is at index
+		 * tx_next_dd - (tx_rs_thresh-1)
+		 */
+		txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+
+		if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+			/* directly put mbufs from Tx to Rx,
+			 * and initialize the mbufs in vector
+			 */
+			for (i = 0; i < n; i++)
+				rxep[i].mbuf = txep[i].mbuf;
+		} else {
+			for (i = 0; i < n; i++) {
+				m[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+				/* ensure each Tx freed buffer is valid */
+				if (m[i] != NULL)
+					nb_rearm++;
+			}
+
+			if (nb_rearm != n) {
+				txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+				txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+				if (txq->tx_next_dd >= txq->nb_tx_desc)
+					txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+				goto mempool_bulk;
+			} else {
+				for (i = 0; i < n; i++)
+					rxep[i].mbuf = m[i];
+			}
+		}
+
+		/* update counters for Tx */
+		txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+		txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+		if (txq->tx_next_dd >= txq->nb_tx_desc)
+			txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+	} else {
+mempool_bulk:
+		/* if TX did not free bufs into Rx sw-ring,
+		 * get new bufs from mempool
+		 */
+		n = RTE_I40E_RXQ_REARM_THRESH;
+
+		/* Pull 'n' more MBUFs into the software ring */
+		if (rte_mempool_get_bulk(rxq->mp,
+					(void *)rxep,
+					RTE_I40E_RXQ_REARM_THRESH) < 0) {
+			if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+				rxq->nb_rx_desc) {
+				__m128i dma_addr0;
+				dma_addr0 = _mm_setzero_si128();
+				for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+					rxep[i].mbuf = &rxq->fake_mbuf;
+					_mm_store_si128((__m128i *)&rxdp[i].read,
+							dma_addr0);
+				}
+			}
+			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+				RTE_I40E_RXQ_REARM_THRESH;
+			return;
+		}
+	}
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < n; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef __AVX512VL__
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < n; i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						   vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						   vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else {
+#endif /* __AVX512VL__*/
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < n; i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 = _mm256_inserti128_si256
+				(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+			vaddr2_3 = _mm256_inserti128_si256
+				(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += n;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc;
+		if (!rxq->rxrearm_start)
+			rx_id = rxq->nb_rx_desc - 1;
+		else
+			rx_id = rxq->rxrearm_start - 1;
+	}
+
+	rxq->rxrearm_nb -= n;
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
 #endif /* __AVX2__*/
 
 #endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index c73b2a321b..fcb7ba0273 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -25,6 +25,12 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	return i40e_rxq_rearm_common(rxq, false);
 }
 
+static __rte_always_inline void
+i40e_rxq_direct_rearm(struct i40e_rx_queue *rxq)
+{
+	return i40e_rxq_direct_rearm_common(rxq, false);
+}
+
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 /* Handles 32B descriptor FDIR ID processing:
  * rxdp: receive descriptor ring, required to load 2nd 16B half of each desc
@@ -128,8 +134,12 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
-		i40e_rxq_rearm(rxq);
+	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) {
+		if (rxq->direct_rxrearm_enable)
+			i40e_rxq_direct_rearm(rxq);
+		else
+			i40e_rxq_rearm(rxq);
+	}
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 2e8a3f0df6..d967095edc 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -21,6 +21,12 @@
 
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
+enum i40e_direct_rearm_type_value {
+	I40E_DIRECT_REARM_TYPE_NORMAL		= 0x0,
+	I40E_DIRECT_REARM_TYPE_FAST_FREE	= 0x1,
+	I40E_DIRECT_REARM_TYPE_PRE_FREE		= 0x2,
+};
+
 static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
@@ -150,6 +156,241 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
+static __rte_always_inline void
+i40e_rxq_direct_rearm(struct i40e_rx_queue *rxq)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_tx_queue *txq;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_vec_tx_entry *txep;
+	struct i40e_rx_entry *rxep;
+	struct rte_mbuf *m[RTE_I40E_RXQ_REARM_THRESH];
+	uint16_t tx_port_id, tx_queue_id;
+	uint16_t rx_id;
+	uint16_t i, n;
+	uint16_t j = 0;
+	uint16_t nb_rearm = 0;
+	enum i40e_direct_rearm_type_value type;
+	struct rte_mempool_cache *cache = NULL;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	tx_port_id = rxq->direct_rxrearm_port;
+	tx_queue_id = rxq->direct_rxrearm_queue;
+	dev = &rte_eth_devices[tx_port_id];
+	txq = dev->data->tx_queues[tx_queue_id];
+
+	/* check Rx queue is able to take in the whole
+	 * batch of free mbufs from Tx queue
+	 */
+	if (rxq->rxrearm_nb > txq->tx_rs_thresh) {
+		/* check DD bits on threshold descriptor */
+		if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+			rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) {
+			goto mempool_bulk;
+		}
+
+		if (txq->tx_rs_thresh != RTE_I40E_RXQ_REARM_THRESH)
+			goto mempool_bulk;
+
+		n = txq->tx_rs_thresh;
+
+		/* first buffer to free from S/W ring is at index
+		 * tx_next_dd - (tx_rs_thresh-1)
+		 */
+		txep = (void *)txq->sw_ring;
+		txep += txq->tx_next_dd - (n - 1);
+
+		if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+			/* directly put mbufs from Tx to Rx */
+			uint32_t copied = 0;
+			/* n is multiple of 32 */
+			while (copied < n) {
+				const __m512i a = _mm512_load_si512(&txep[copied]);
+				const __m512i b = _mm512_load_si512(&txep[copied + 8]);
+				const __m512i c = _mm512_load_si512(&txep[copied + 16]);
+				const __m512i d = _mm512_load_si512(&txep[copied + 24]);
+
+				_mm512_storeu_si512(&rxep[copied], a);
+				_mm512_storeu_si512(&rxep[copied + 8], b);
+				_mm512_storeu_si512(&rxep[copied + 16], c);
+				_mm512_storeu_si512(&rxep[copied + 24], d);
+				copied += 32;
+			}
+			type = I40E_DIRECT_REARM_TYPE_FAST_FREE;
+		} else {
+			for (i = 0; i < n; i++) {
+				m[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+				/* ensure each Tx freed buffer is valid */
+				if (m[i] != NULL)
+					nb_rearm++;
+			}
+
+			if (nb_rearm != n) {
+				txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+				txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+				if (txq->tx_next_dd >= txq->nb_tx_desc)
+					txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+				goto mempool_bulk;
+			} else {
+				type = I40E_DIRECT_REARM_TYPE_PRE_FREE;
+			}
+		}
+
+	/* update counters for Tx */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+	} else {
+mempool_bulk:
+		cache = rte_mempool_default_cache(rxq->mp, rte_lcore_id());
+
+		if (unlikely(!cache))
+			return i40e_rxq_rearm_common(rxq, true);
+
+		n = RTE_I40E_RXQ_REARM_THRESH;
+
+		/* We need to pull 'n' more MBUFs into the software ring from mempool
+		 * We inline the mempool function here, so we can vectorize the copy
+		 * from the cache into the shadow ring.
+		 */
+
+		if (cache->len < RTE_I40E_RXQ_REARM_THRESH) {
+			/* No. Backfill the cache first, and then fill from it */
+			uint32_t req = RTE_I40E_RXQ_REARM_THRESH + (cache->size -
+					cache->len);
+
+			/* How many do we require
+			 * i.e. number to fill the cache + the request
+			 */
+			int ret = rte_mempool_ops_dequeue_bulk(rxq->mp,
+					&cache->objs[cache->len], req);
+			if (ret == 0) {
+				cache->len += req;
+			} else {
+				if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+						rxq->nb_rx_desc) {
+					__m128i dma_addr0;
+
+					dma_addr0 = _mm_setzero_si128();
+					for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+						rxep[i].mbuf = &rxq->fake_mbuf;
+						_mm_store_si128
+							((__m128i *)&rxdp[i].read,
+								dma_addr0);
+					}
+				}
+				rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+						RTE_I40E_RXQ_REARM_THRESH;
+				return;
+			}
+		}
+
+		type = I40E_DIRECT_REARM_TYPE_NORMAL;
+	}
+
+	const __m512i iova_offsets =  _mm512_set1_epi64
+		(offsetof(struct rte_mbuf, buf_iova));
+	const __m512i headroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+	/* to shuffle the addresses to correct slots. Values 4-7 will contain
+	 * zeros, so use 7 for a zero-value.
+	 */
+	const __m512i permute_idx = _mm512_set_epi64(7, 7, 3, 1, 7, 7, 2, 0);
+#else
+	const __m512i permute_idx = _mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0);
+#endif
+
+	__m512i mbuf_ptrs;
+
+	/* Initialize the mbufs in vector, process 8 mbufs in one loop, taking
+	 * from mempool cache and populating both shadow and HW rings
+	 */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH / 8; i++) {
+		switch (type) {
+		case I40E_DIRECT_REARM_TYPE_FAST_FREE:
+			mbuf_ptrs = _mm512_loadu_si512(rxep);
+			break;
+		case I40E_DIRECT_REARM_TYPE_PRE_FREE:
+			mbuf_ptrs = _mm512_loadu_si512(&m[j]);
+			_mm512_store_si512(rxep, mbuf_ptrs);
+			j += 8;
+			break;
+		case I40E_DIRECT_REARM_TYPE_NORMAL:
+			mbuf_ptrs = _mm512_loadu_si512
+				(&cache->objs[cache->len - 8]);
+			_mm512_store_si512(rxep, mbuf_ptrs);
+			cache->len -= 8;
+			break;
+		}
+
+		/* gather iova of mbuf0-7 into one zmm reg */
+		const __m512i iova_base_addrs = _mm512_i64gather_epi64
+			(_mm512_add_epi64(mbuf_ptrs, iova_offsets),
+				0, /* base */
+				1 /* scale */);
+		const __m512i iova_addrs = _mm512_add_epi64(iova_base_addrs,
+				headroom);
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+		const __m512i iovas0 = _mm512_castsi256_si512
+			(_mm512_extracti64x4_epi64(iova_addrs, 0));
+		const __m512i iovas1 = _mm512_castsi256_si512
+			(_mm512_extracti64x4_epi64(iova_addrs, 1));
+
+		/* permute leaves desc 2-3 addresses in header address slots 0-1
+		 * but these are ignored by driver since header split not
+		 * enabled. Similarly for desc 4 & 5.
+		 */
+		const __m512i desc_rd_0_1 = _mm512_permutexvar_epi64
+			(permute_idx, iovas0);
+		const __m512i desc_rd_2_3 = _mm512_bsrli_epi128(desc_rd_0_1, 8);
+
+		const __m512i desc_rd_4_5 = _mm512_permutexvar_epi64
+			(permute_idx, iovas1);
+		const __m512i desc_rd_6_7 = _mm512_bsrli_epi128(desc_rd_4_5, 8);
+
+		_mm512_store_si512((void *)rxdp, desc_rd_0_1);
+		_mm512_store_si512((void *)(rxdp + 2), desc_rd_2_3);
+		_mm512_store_si512((void *)(rxdp + 4), desc_rd_4_5);
+		_mm512_store_si512((void *)(rxdp + 6), desc_rd_6_7);
+#else
+		/* permute leaves desc 4-7 addresses in header address slots 0-3
+		 * but these are ignored by driver since header split not
+		 * enabled.
+		 */
+		const __m512i desc_rd_0_3 = _mm512_permutexvar_epi64
+			(permute_idx, iova_addrs);
+		const __m512i desc_rd_4_7 = _mm512_bsrli_epi128(desc_rd_0_3, 8);
+
+		_mm512_store_si512((void *)rxdp, desc_rd_0_3);
+		_mm512_store_si512((void *)(rxdp + 4), desc_rd_4_7);
+#endif
+		rxdp += 8, rxep += 8;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += n;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc;
+		if (!rxq->rxrearm_start)
+			rx_id = rxq->nb_rx_desc - 1;
+		else
+			rx_id = rxq->rxrearm_start - 1;
+	}
+
+	rxq->rxrearm_nb -= n;
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 /* Handles 32B descriptor FDIR ID processing:
  * rxdp: receive descriptor ring, required to load 2nd 16B half of each desc
@@ -252,8 +493,12 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
-		i40e_rxq_rearm(rxq);
+	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) {
+		if (rxq->direct_rxrearm_enable)
+			i40e_rxq_direct_rearm(rxq);
+		else
+			i40e_rxq_rearm(rxq);
+	}
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index fa9e6582c5..dc78e3c90b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -77,6 +77,139 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
 }
 
+static inline void
+i40e_rxq_direct_rearm(struct i40e_rx_queue *rxq)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_tx_queue *txq;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_tx_entry *txep;
+	struct i40e_rx_entry *rxep;
+	uint16_t tx_port_id, tx_queue_id;
+	uint16_t rx_id;
+	struct rte_mbuf *mb0, *mb1, *m;
+	uint64x2_t dma_addr0, dma_addr1;
+	uint64x2_t zero = vdupq_n_u64(0);
+	uint64_t paddr;
+	uint16_t i, n;
+	uint16_t nb_rearm = 0;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	tx_port_id = rxq->direct_rxrearm_port;
+	tx_queue_id = rxq->direct_rxrearm_queue;
+	dev = &rte_eth_devices[tx_port_id];
+	txq = dev->data->tx_queues[tx_queue_id];
+
+	/* check Rx queue is able to take in the whole
+	 * batch of free mbufs from Tx queue
+	 */
+	if (rxq->rxrearm_nb > txq->tx_rs_thresh) {
+		/* check DD bits on threshold descriptor */
+		if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+				rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) {
+			goto mempool_bulk;
+		}
+
+		n = txq->tx_rs_thresh;
+
+		/* first buffer to free from S/W ring is at index
+		 * tx_next_dd - (tx_rs_thresh-1)
+		 */
+		txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+
+		if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+			/* directly put mbufs from Tx to Rx,
+			 * and initialize the mbufs in vector
+			 */
+			for (i = 0; i < n; i++, rxep++, txep++) {
+				rxep[0].mbuf = txep[0].mbuf;
+
+				/* Initialize rxdp descs */
+				mb0 = txep[0].mbuf;
+
+				paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+				dma_addr0 = vdupq_n_u64(paddr);
+				/* flush desc with pa dma_addr */
+				vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
+			}
+		} else {
+			for (i = 0; i < n; i++) {
+				m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+				if (m != NULL) {
+					rxep[i].mbuf = m;
+
+					/* Initialize rxdp descs */
+					paddr = m->buf_iova + RTE_PKTMBUF_HEADROOM;
+					dma_addr0 = vdupq_n_u64(paddr);
+					/* flush desc with pa dma_addr */
+					vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
+					nb_rearm++;
+				}
+			}
+			n = nb_rearm;
+		}
+
+		/* update counters for Tx */
+		txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+		txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+		if (txq->tx_next_dd >= txq->nb_tx_desc)
+			txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+	} else {
+mempool_bulk:
+		/* if TX did not free bufs into Rx sw-ring,
+		 * get new bufs from mempool
+		 */
+		n = RTE_I40E_RXQ_REARM_THRESH;
+		if (unlikely(rte_mempool_get_bulk(rxq->mp, (void *)rxep, n) < 0)) {
+			if (rxq->rxrearm_nb + n >= rxq->nb_rx_desc) {
+				for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+					rxep[i].mbuf = &rxq->fake_mbuf;
+					vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+				}
+			}
+			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += n;
+			return;
+		}
+
+		/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+		for (i = 0; i < n; i += 2, rxep += 2) {
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+
+			paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+			dma_addr0 = vdupq_n_u64(paddr);
+			/* flush desc with pa dma_addr */
+			vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
+
+			paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+			dma_addr1 = vdupq_n_u64(paddr);
+			/* flush desc with pa dma_addr */
+			vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
+		}
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += n;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc;
+		if (!rxq->rxrearm_start)
+			rx_id = rxq->nb_rx_desc - 1;
+		else
+			rx_id = rxq->rxrearm_start - 1;
+	}
+
+	rxq->rxrearm_nb -= n;
+
+	rte_io_wmb();
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+}
+
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 /* NEON version of FDIR mark extraction for 4 32B descriptors at a time */
 static inline uint32x4_t
@@ -381,8 +514,12 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
-		i40e_rxq_rearm(rxq);
+	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) {
+		if (rxq->direct_rxrearm_enable)
+			i40e_rxq_direct_rearm(rxq);
+		else
+			i40e_rxq_rearm(rxq);
+	}
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 3782e8052f..b2f1ab2c8d 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -89,6 +89,168 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
+static inline void
+i40e_rxq_direct_rearm(struct i40e_rx_queue *rxq)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_tx_queue *txq;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_tx_entry *txep;
+	struct i40e_rx_entry *rxep;
+	uint16_t tx_port_id, tx_queue_id;
+	uint16_t rx_id;
+	struct rte_mbuf *mb0, *mb1, *m;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	__m128i dma_addr0, dma_addr1;
+	__m128i vaddr0, vaddr1;
+	uint16_t i, n;
+	uint16_t nb_rearm = 0;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	tx_port_id = rxq->direct_rxrearm_port;
+	tx_queue_id = rxq->direct_rxrearm_queue;
+	dev = &rte_eth_devices[tx_port_id];
+	txq = dev->data->tx_queues[tx_queue_id];
+
+	/* check Rx queue is able to take in the whole
+	 * batch of free mbufs from Tx queue
+	 */
+	if (rxq->rxrearm_nb > txq->tx_rs_thresh) {
+		/* check DD bits on threshold descriptor */
+		if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+				rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) {
+			goto mempool_bulk;
+		}
+
+		n = txq->tx_rs_thresh;
+
+		/* first buffer to free from S/W ring is at index
+		 * tx_next_dd - (tx_rs_thresh-1)
+		 */
+		txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+
+		if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+			/* directly put mbufs from Tx to Rx,
+			 * and initialize the mbufs in vector
+			 */
+			for (i = 0; i < n; i++, rxep++, txep++) {
+				rxep[0].mbuf = txep[0].mbuf;
+
+				/* Initialize rxdp descs */
+				mb0 = txep[0].mbuf;
+
+				/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+				RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+						offsetof(struct rte_mbuf, buf_addr) + 8);
+				vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+
+				/* convert pa to dma_addr hdr/data */
+				dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+
+				/* add headroom to pa values */
+				dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+
+				/* flush desc with pa dma_addr */
+				_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+			}
+		} else {
+			for (i = 0; i < n; i++) {
+				m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+				if (m != NULL) {
+					rxep[i].mbuf = m;
+
+					/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+					RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+							offsetof(struct rte_mbuf, buf_addr) + 8);
+					vaddr0 = _mm_loadu_si128((__m128i *)&m->buf_addr);
+
+					/* convert pa to dma_addr hdr/data */
+					dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+
+					/* add headroom to pa values */
+					dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+
+					/* flush desc with pa dma_addr */
+					_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+					nb_rearm++;
+				}
+			}
+			n = nb_rearm;
+		}
+
+		/* update counters for Tx */
+		txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+		txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+		if (txq->tx_next_dd >= txq->nb_tx_desc)
+			txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+	} else {
+mempool_bulk:
+		/* if TX did not free bufs into Rx sw-ring,
+		 * get new bufs from mempool
+		 */
+		n = RTE_I40E_RXQ_REARM_THRESH;
+		/* Pull 'n' more MBUFs into the software ring */
+		if (rte_mempool_get_bulk(rxq->mp, (void *)rxep, n) < 0) {
+			if (rxq->rxrearm_nb + n >= rxq->nb_rx_desc) {
+				dma_addr0 = _mm_setzero_si128();
+				for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+					rxep[i].mbuf = &rxq->fake_mbuf;
+					_mm_store_si128((__m128i *)&rxdp[i].read,
+							dma_addr0);
+				}
+			}
+			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+				RTE_I40E_RXQ_REARM_THRESH;
+			return;
+		}
+
+		/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+			dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+			/* add headroom to pa values */
+			dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+			dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+			_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+		}
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += n;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc;
+		if (!rxq->rxrearm_start)
+			rx_id = rxq->nb_rx_desc - 1;
+		else
+			rx_id = rxq->rxrearm_start - 1;
+	}
+
+	rxq->rxrearm_nb -= n;
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+}
+
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 /* SSE version of FDIR mark extraction for 4 32B descriptors at a time */
 static inline __m128i
@@ -394,8 +556,12 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
-		i40e_rxq_rearm(rxq);
+	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) {
+		if (rxq->direct_rxrearm_enable)
+			i40e_rxq_direct_rearm(rxq);
+		else
+			i40e_rxq_rearm(rxq);
+	}
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
-- 
2.25.1


  parent reply	other threads:[~2022-04-20  8:17 UTC|newest]

Thread overview: 145+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-20  8:16 [PATCH v1 0/5] Direct re-arming of buffers on receive side Feifei Wang
2022-04-20  8:16 ` [PATCH v1 1/5] net/i40e: remove redundant Dtype initialization Feifei Wang
2022-04-20  8:16 ` Feifei Wang [this message]
2022-05-11 22:28   ` [PATCH v1 2/5] net/i40e: enable direct rearm mode Konstantin Ananyev
2022-04-20  8:16 ` [PATCH v1 3/5] ethdev: add API for " Feifei Wang
2022-04-20  9:59   ` Morten Brørup
2022-04-29  2:42     ` 回复: " Feifei Wang
2022-04-20 10:41   ` Andrew Rybchenko
2022-04-29  6:28     ` 回复: " Feifei Wang
2022-05-10 22:49       ` Honnappa Nagarahalli
2022-06-03 10:19         ` Andrew Rybchenko
2022-04-20 10:50   ` Jerin Jacob
2022-05-02  3:09     ` 回复: " Feifei Wang
2022-04-21 14:57   ` Stephen Hemminger
2022-04-29  6:35     ` 回复: " Feifei Wang
2022-04-20  8:16 ` [PATCH v1 4/5] net/i40e: add direct rearm mode internal API Feifei Wang
2022-05-11 22:31   ` Konstantin Ananyev
2022-04-20  8:16 ` [PATCH v1 5/5] examples/l3fwd: enable direct rearm mode Feifei Wang
2022-04-20 10:10   ` Morten Brørup
2022-04-21  2:35     ` Honnappa Nagarahalli
2022-04-21  6:40       ` Morten Brørup
2022-05-10 22:01         ` Honnappa Nagarahalli
2022-05-11  7:17           ` Morten Brørup
2022-05-11 22:33   ` Konstantin Ananyev
2022-05-27 11:28     ` Konstantin Ananyev
2022-05-31 17:14       ` Honnappa Nagarahalli
2022-06-03 10:32         ` Andrew Rybchenko
2022-06-06 11:27         ` Konstantin Ananyev
2022-06-29 21:25           ` Honnappa Nagarahalli
2022-05-11 23:00 ` [PATCH v1 0/5] Direct re-arming of buffers on receive side Konstantin Ananyev
     [not found] ` <20220516061012.618787-1-feifei.wang2@arm.com>
2022-05-24  1:25   ` Konstantin Ananyev
2022-05-24 12:40     ` Morten Brørup
2022-05-24 20:14     ` Honnappa Nagarahalli
2022-05-28 12:22       ` Konstantin Ananyev
2022-06-01  1:00         ` Honnappa Nagarahalli
2022-06-03 23:32           ` Konstantin Ananyev
2022-06-04  8:07             ` Morten Brørup
2022-06-29 21:58               ` Honnappa Nagarahalli
2022-06-30 15:21                 ` Morten Brørup
2022-07-01 19:30                   ` Honnappa Nagarahalli
2022-07-01 20:28                     ` Morten Brørup
2022-06-13  5:55     ` 回复: " Feifei Wang
2023-01-04  7:30 ` [PATCH v3 0/3] " Feifei Wang
2023-01-04  7:30   ` [PATCH v3 1/3] ethdev: enable direct rearm with separate API Feifei Wang
2023-01-04  8:21     ` Morten Brørup
2023-01-04  8:51       ` 回复: " Feifei Wang
2023-01-04 10:11         ` Morten Brørup
2023-02-24  8:55           ` 回复: " Feifei Wang
2023-03-06 12:49       ` Ferruh Yigit
2023-03-06 13:26         ` Morten Brørup
2023-03-06 14:53           ` 回复: " Feifei Wang
2023-03-06 15:02           ` Ferruh Yigit
2023-03-07  6:12             ` Honnappa Nagarahalli
2023-03-07 10:52               ` Konstantin Ananyev
2023-03-07 20:41               ` Ferruh Yigit
2023-03-22 14:43                 ` Honnappa Nagarahalli
2023-02-02 14:33     ` Konstantin Ananyev
2023-02-24  9:45       ` 回复: " Feifei Wang
2023-02-27 19:31         ` Konstantin Ananyev
2023-02-28  2:16           ` 回复: " Feifei Wang
2023-02-28  8:09           ` Morten Brørup
2023-03-01  7:34             ` 回复: " Feifei Wang
2023-01-04  7:30   ` [PATCH v3 2/3] net/i40e: " Feifei Wang
2023-02-02 14:37     ` Konstantin Ananyev
2023-02-24  9:50       ` 回复: " Feifei Wang
2023-02-27 19:35         ` Konstantin Ananyev
2023-02-28  2:15           ` 回复: " Feifei Wang
2023-03-07 11:01             ` Konstantin Ananyev
2023-03-14  6:07               ` 回复: " Feifei Wang
2023-03-19 16:11                 ` Konstantin Ananyev
2023-03-23 10:49                   ` Feifei Wang
2023-01-04  7:30   ` [PATCH v3 3/3] net/ixgbe: " Feifei Wang
2023-01-31  6:13   ` 回复: [PATCH v3 0/3] Direct re-arming of buffers on receive side Feifei Wang
2023-02-01  1:10     ` Konstantin Ananyev
2023-02-01  2:24       ` 回复: " Feifei Wang
2023-03-22 12:56   ` Morten Brørup
2023-03-22 13:41     ` Honnappa Nagarahalli
2023-03-22 14:04       ` Morten Brørup
2023-08-02  7:38 ` [PATCH v8 0/4] Recycle mbufs from Tx queue into Rx queue Feifei Wang
2023-08-02  7:38   ` [PATCH v8 1/4] ethdev: add API for mbufs recycle mode Feifei Wang
2023-08-02  7:38   ` [PATCH v8 2/4] net/i40e: implement " Feifei Wang
2023-08-02  7:38   ` [PATCH v8 3/4] net/ixgbe: " Feifei Wang
2023-08-02  7:38   ` [PATCH v8 4/4] app/testpmd: add recycle mbufs engine Feifei Wang
2023-08-02  8:08 ` [PATCH v9 0/4] Recycle mbufs from Tx queue into Rx queue Feifei Wang
2023-08-02  8:08   ` [PATCH v9 1/4] ethdev: add API for mbufs recycle mode Feifei Wang
2023-08-02  8:08   ` [PATCH v9 2/4] net/i40e: implement " Feifei Wang
2023-08-02  8:08   ` [PATCH v9 3/4] net/ixgbe: " Feifei Wang
2023-08-02  8:08   ` [PATCH v9 4/4] app/testpmd: add recycle mbufs engine Feifei Wang
2023-08-04  9:24 ` [PATCH v10 0/4] Recycle mbufs from Tx queue into Rx queue Feifei Wang
2023-08-04  9:24   ` [PATCH v10 1/4] ethdev: add API for mbufs recycle mode Feifei Wang
2023-08-04  9:24   ` [PATCH v10 2/4] net/i40e: implement " Feifei Wang
2023-08-04  9:24   ` [PATCH v10 3/4] net/ixgbe: " Feifei Wang
2023-08-04  9:24   ` [PATCH v10 4/4] app/testpmd: add recycle mbufs engine Feifei Wang
2023-08-22  7:27 ` [PATCH v11 0/4] Recycle mbufs from Tx queue into Rx queue Feifei Wang
2023-08-22  7:27   ` [PATCH v11 1/4] ethdev: add API for mbufs recycle mode Feifei Wang
2023-08-22 14:02     ` Stephen Hemminger
2023-08-24  3:16       ` Feifei Wang
2023-08-22 23:33     ` Konstantin Ananyev
2023-08-24  3:38     ` Feifei Wang
2023-08-22  7:27   ` [PATCH v11 2/4] net/i40e: implement " Feifei Wang
2023-08-22 23:43     ` Konstantin Ananyev
2023-08-24  6:10     ` Feifei Wang
2023-08-31 17:24       ` Konstantin Ananyev
2023-08-31 23:49         ` Konstantin Ananyev
2023-09-01 12:22         ` Feifei Wang
2023-09-01 14:22           ` Konstantin Ananyev
2023-09-04  6:59             ` Feifei Wang
2023-09-04  7:49               ` Konstantin Ananyev
2023-09-04  9:24                 ` Feifei Wang
2023-09-04 10:21                   ` Konstantin Ananyev
2023-09-05  3:11                     ` Feifei Wang
2023-09-22 14:58                       ` Feifei Wang
2023-09-22 15:46                         ` Feifei Wang
2023-09-22 16:40                           ` Konstantin Ananyev
2023-09-23  5:52                             ` Feifei Wang
2023-09-23 20:40                               ` Konstantin Ananyev
2023-09-25  3:26                               ` Feifei Wang
2023-08-22  7:27   ` [PATCH v11 3/4] net/ixgbe: " Feifei Wang
2023-08-22  7:27   ` [PATCH v11 4/4] app/testpmd: add recycle mbufs engine Feifei Wang
2023-08-22  7:33   ` [PATCH v11 0/4] Recycle mbufs from Tx queue into Rx queue Feifei Wang
2023-08-22 13:59   ` Stephen Hemminger
2023-08-24  3:11     ` Feifei Wang
2023-08-24  7:36 ` [PATCH v12 " Feifei Wang
2023-08-24  7:36   ` [PATCH v12 1/4] ethdev: add API for mbufs recycle mode Feifei Wang
2023-08-31  9:16     ` Feifei Wang
2023-09-20 13:10     ` Ferruh Yigit
2023-08-24  7:36   ` [PATCH v12 2/4] net/i40e: implement " Feifei Wang
2023-08-24  7:36   ` [PATCH v12 3/4] net/ixgbe: " Feifei Wang
2023-08-24  7:36   ` [PATCH v12 4/4] app/testpmd: add recycle mbufs engine Feifei Wang
2023-09-20 13:11     ` Ferruh Yigit
2023-09-20 13:12   ` [PATCH v12 0/4] Recycle mbufs from Tx queue into Rx queue Ferruh Yigit
2023-09-22 15:30     ` Ferruh Yigit
2023-09-25  3:19 ` [PATCH v13 " Feifei Wang
2023-09-25  3:19   ` [PATCH v13 1/4] ethdev: add API for mbufs recycle mode Feifei Wang
2023-09-25  4:40     ` Ajit Khaparde
2023-09-25  3:19   ` [PATCH v13 2/4] net/i40e: implement " Feifei Wang
2023-09-26  8:26     ` Ferruh Yigit
2023-09-26  8:56       ` Konstantin Ananyev
2023-09-26 13:34     ` Konstantin Ananyev
2023-09-25  3:19   ` [PATCH v13 3/4] net/ixgbe: " Feifei Wang
2023-09-26 13:30     ` Konstantin Ananyev
2023-09-25  3:19   ` [PATCH v13 4/4] app/testpmd: add recycle mbufs engine Feifei Wang
2023-09-26 13:30     ` Konstantin Ananyev
2023-09-26 16:38       ` Ajit Khaparde
2023-09-27 17:24   ` [PATCH v13 0/4] Recycle mbufs from Tx queue into Rx queue Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220420081650.2043183-3-feifei.wang2@arm.com \
    --to=feifei.wang2@arm.com \
    --cc=beilei.xing@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=honnappa.nagarahalli@arm.com \
    --cc=konstantin.ananyev@intel.com \
    --cc=nd@arm.com \
    --cc=ruifeng.wang@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.