From mboxrd@z Thu Jan 1 00:00:00 1970 From: Pavan Nikhilesh Bhagavatula Subject: [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode Date: Sun, 31 Mar 2019 13:14:20 +0000 Message-ID: <20190331131341.12924-1-pbhagavatula@marvell.com> References: <20190228194128.14236-1-pbhagavatula@marvell.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Cc: "dev@dpdk.org" , Pavan Nikhilesh Bhagavatula To: Jerin Jacob Kollanukkaran , "thomas@monjalon.net" , "arybchenko@solarflare.com" , "ferruh.yigit@intel.com" , "bernard.iremonger@intel.com" Return-path: Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com [67.231.148.174]) by dpdk.org (Postfix) with ESMTP id 1B3382B82 for ; Sun, 31 Mar 2019 15:14:29 +0200 (CEST) In-Reply-To: <20190228194128.14236-1-pbhagavatula@marvell.com> Content-Language: en-US List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Pavan Nikhilesh Optimize testpmd txonly mode by 1. Moving per packet ethernet header copy above the loop. 2. Use bulk ops for allocating segments instead of having a inner loop for every segment. Also, move the packet prepare logic into a separate function so that it can be reused later. Signed-off-by: Pavan Nikhilesh --- v5 Changes - Remove unnecessary change to struct rte_port *txp (movement). (Bernard) v4 Changes: - Fix packet len calculation. v3 Changes: - Split the patches for easier review. (Thomas) - Remove unnecessary assignments to 0. (Bernard) v2 Changes: - Use bulk ops for fetching segments. (Andrew Rybchenko) - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko) - Fix mbufs not being freed when there is no more mbufs available for segments. (Andrew Rybchenko) app/test-pmd/txonly.c | 139 +++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 63 deletions(-) diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index 1f08b6ed3..9c0147089 100644 --- a/app/test-pmd/txonly.c +++ b/app/test-pmd/txonly.c @@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr, ip_hdr->hdr_checksum =3D (uint16_t) ip_cksum; } +static inline bool +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + struct ether_hdr *eth_hdr, const uint16_t vlan_tci, + const uint16_t vlan_tci_outer, const uint64_t ol_flags) +{ + struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT]; + struct rte_mbuf *pkt_seg; + uint32_t nb_segs, pkt_len; + uint8_t i; + + if (unlikely(tx_pkt_split =3D=3D TX_PKT_SPLIT_RND)) + nb_segs =3D random() % tx_pkt_nb_segs + 1; + else + nb_segs =3D tx_pkt_nb_segs; + + if (nb_segs > 1) { + if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs)) + return false; + } + + rte_pktmbuf_reset_headroom(pkt); + pkt->data_len =3D tx_pkt_seg_lengths[0]; + pkt->ol_flags =3D ol_flags; + pkt->vlan_tci =3D vlan_tci; + pkt->vlan_tci_outer =3D vlan_tci_outer; + pkt->l2_len =3D sizeof(struct ether_hdr); + pkt->l3_len =3D sizeof(struct ipv4_hdr); + + pkt_len =3D pkt->data_len; + pkt_seg =3D pkt; + for (i =3D 1; i < nb_segs; i++) { + pkt_seg->next =3D pkt_segs[i - 1]; + pkt_seg =3D pkt_seg->next; + pkt_seg->data_len =3D tx_pkt_seg_lengths[i]; + pkt_len +=3D pkt_seg->data_len; + } + pkt_seg->next =3D NULL; /* Last segment of packet. */ + /* + * Copy headers in first packet segment(s). + */ + copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0); + copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, + sizeof(struct ether_hdr)); + copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, + sizeof(struct ether_hdr) + + sizeof(struct ipv4_hdr)); + + /* + * Complete first mbuf of packet and append it to the + * burst of packets to be transmitted. + */ + pkt->nb_segs =3D nb_segs; + pkt->pkt_len =3D pkt_len; + + return true; +} + /* * Transmit a burst of multi-segments packets. */ @@ -156,7 +213,6 @@ pkt_burst_transmit(struct fwd_stream *fs) struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_port *txp; struct rte_mbuf *pkt; - struct rte_mbuf *pkt_seg; struct rte_mempool *mbp; struct ether_hdr eth_hdr; uint16_t nb_tx; @@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs) uint16_t vlan_tci, vlan_tci_outer; uint32_t retry; uint64_t ol_flags =3D 0; - uint8_t i; uint64_t tx_offloads; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif - uint32_t nb_segs, pkt_len; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc =3D rte_rdtsc(); @@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs) ol_flags |=3D PKT_TX_QINQ_PKT; if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT) ol_flags |=3D PKT_TX_MACSEC; + + /* + * Initialize Ethernet header. + */ + ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr.d_addr); + ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr); + eth_hdr.ether_type =3D rte_cpu_to_be_16(ETHER_TYPE_IPv4); + for (nb_pkt =3D 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { pkt =3D rte_mbuf_raw_alloc(mbp); - if (pkt =3D=3D NULL) { - nomore_mbuf: - if (nb_pkt =3D=3D 0) - return; + if (pkt =3D=3D NULL) + break; + if (unlikely(!pkt_burst_prepare(pkt, mbp, + ð_hdr, vlan_tci, + vlan_tci_outer, + ol_flags))) { + rte_mempool_put(mbp, pkt); break; } - - /* - * Using raw alloc is good to improve performance, - * but some consumers may use the headroom and so - * decrement data_off. We need to make sure it is - * reset to default value. - */ - rte_pktmbuf_reset_headroom(pkt); - pkt->data_len =3D tx_pkt_seg_lengths[0]; - pkt_seg =3D pkt; - if (tx_pkt_split =3D=3D TX_PKT_SPLIT_RND) - nb_segs =3D random() % tx_pkt_nb_segs + 1; - else - nb_segs =3D tx_pkt_nb_segs; - pkt_len =3D pkt->data_len; - for (i =3D 1; i < nb_segs; i++) { - pkt_seg->next =3D rte_mbuf_raw_alloc(mbp); - if (pkt_seg->next =3D=3D NULL) { - pkt->nb_segs =3D i; - rte_pktmbuf_free(pkt); - goto nomore_mbuf; - } - pkt_seg =3D pkt_seg->next; - pkt_seg->data_len =3D tx_pkt_seg_lengths[i]; - pkt_len +=3D pkt_seg->data_len; - } - pkt_seg->next =3D NULL; /* Last segment of packet. */ - - /* - * Initialize Ethernet header. - */ - ether_addr_copy(&peer_eth_addrs[fs->peer_addr],ð_hdr.d_addr); - ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr); - eth_hdr.ether_type =3D rte_cpu_to_be_16(ETHER_TYPE_IPv4); - - /* - * Copy headers in first packet segment(s). - */ - copy_buf_to_pkt(ð_hdr, sizeof(eth_hdr), pkt, 0); - copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, - sizeof(struct ether_hdr)); - copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, - sizeof(struct ether_hdr) + - sizeof(struct ipv4_hdr)); - - /* - * Complete first mbuf of packet and append it to the - * burst of packets to be transmitted. - */ - pkt->nb_segs =3D nb_segs; - pkt->pkt_len =3D pkt_len; - pkt->ol_flags =3D ol_flags; - pkt->vlan_tci =3D vlan_tci; - pkt->vlan_tci_outer =3D vlan_tci_outer; - pkt->l2_len =3D sizeof(struct ether_hdr); - pkt->l3_len =3D sizeof(struct ipv4_hdr); pkts_burst[nb_pkt] =3D pkt; } + + if (nb_pkt =3D=3D 0) + return; + nb_tx =3D rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt)= ; /* * Retry if necessary -- 2.21.0