All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: Gerhard Engleder <gerhard@engleder-embedded.com>
Cc: <netdev@vger.kernel.org>, <bpf@vger.kernel.org>,
	<davem@davemloft.net>, <kuba@kernel.org>, <edumazet@google.com>,
	<pabeni@redhat.com>, <bjorn@kernel.org>,
	<magnus.karlsson@intel.com>, <jonathan.lemon@gmail.com>
Subject: Re: [PATCH net-next v3 6/6] tsnep: Add XDP socket zero-copy TX support
Date: Thu, 20 Apr 2023 22:17:41 +0200	[thread overview]
Message-ID: <ZEGd5QHTInP8WRlZ@boxer> (raw)
In-Reply-To: <20230418190459.19326-7-gerhard@engleder-embedded.com>

On Tue, Apr 18, 2023 at 09:04:59PM +0200, Gerhard Engleder wrote:
> Send and complete XSK pool frames within TX NAPI context. NAPI context
> is triggered by ndo_xsk_wakeup.
> 
> Test results with A53 1.2GHz:
> 
> xdpsock txonly copy mode:
>                    pps            pkts           1.00
> tx                 284,409        11,398,144
> Two CPUs with 100% and 10% utilization.
> 
> xdpsock txonly zero-copy mode:
>                    pps            pkts           1.00
> tx                 511,929        5,890,368
> Two CPUs with 100% and 1% utilization.

Hmm, I think l2fwd ZC numbers should be included here not in the previous
patch?

> 
> Packet rate increases and CPU utilization is reduced.
> 
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> ---
>  drivers/net/ethernet/engleder/tsnep.h      |   2 +
>  drivers/net/ethernet/engleder/tsnep_main.c | 127 +++++++++++++++++++--
>  2 files changed, 119 insertions(+), 10 deletions(-)
> 

(...)

> +static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx)
> +{
> +	struct tsnep_tx_entry *entry;
> +	dma_addr_t dma;
> +
> +	entry = &tx->entry[tx->write];
> +	entry->zc = true;
> +
> +	dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr);
> +	xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len);
> +
> +	entry->type = TSNEP_TX_TYPE_XSK;
> +	entry->len = xdpd->len;
> +
> +	entry->desc->tx = __cpu_to_le64(dma);
> +
> +	return xdpd->len;
> +}
> +
> +static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd,
> +					 struct tsnep_tx *tx)
> +{
> +	int length;
> +
> +	length = tsnep_xdp_tx_map_zc(xdpd, tx);
> +
> +	tsnep_tx_activate(tx, tx->write, length, true);
> +	tx->write = (tx->write + 1) & TSNEP_RING_MASK;
> +}
> +
> +static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx)
> +{
> +	int desc_available = tsnep_tx_desc_available(tx);
> +	struct xdp_desc *descs = tx->xsk_pool->tx_descs;
> +	int batch, i;
> +
> +	/* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS
> +	 * will be available for normal TX path and queue is stopped there if
> +	 * necessary
> +	 */
> +	if (desc_available <= (MAX_SKB_FRAGS + 1))
> +		return;
> +	desc_available -= MAX_SKB_FRAGS + 1;
> +
> +	batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available);
> +	for (i = 0; i < batch; i++)
> +		tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx);
> +
> +	if (batch) {
> +		/* descriptor properties shall be valid before hardware is
> +		 * notified
> +		 */
> +		dma_wmb();
> +
> +		tsnep_xdp_xmit_flush(tx);
> +	}
> +}
> +
>  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  {
>  	struct tsnep_tx_entry *entry;
>  	struct netdev_queue *nq;
> +	int xsk_frames = 0;
>  	int budget = 128;
>  	int length;
>  	int count;
> @@ -676,7 +771,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  		if ((entry->type & TSNEP_TX_TYPE_SKB) &&
>  		    skb_shinfo(entry->skb)->nr_frags > 0)
>  			count += skb_shinfo(entry->skb)->nr_frags;
> -		else if (!(entry->type & TSNEP_TX_TYPE_SKB) &&
> +		else if ((entry->type & TSNEP_TX_TYPE_XDP) &&
>  			 xdp_frame_has_frags(entry->xdpf))
>  			count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
>  
> @@ -705,9 +800,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  
>  		if (entry->type & TSNEP_TX_TYPE_SKB)
>  			napi_consume_skb(entry->skb, napi_budget);
> -		else
> +		else if (entry->type & TSNEP_TX_TYPE_XDP)
>  			xdp_return_frame_rx_napi(entry->xdpf);
> -		/* xdpf is union with skb */
> +		else
> +			xsk_frames++;
> +		/* xdpf and zc are union with skb */
>  		entry->skb = NULL;
>  
>  		tx->read = (tx->read + count) & TSNEP_RING_MASK;
> @@ -718,6 +815,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  		budget--;
>  	} while (likely(budget));
>  
> +	if (tx->xsk_pool) {
> +		if (xsk_frames)
> +			xsk_tx_completed(tx->xsk_pool, xsk_frames);
> +		if (xsk_uses_need_wakeup(tx->xsk_pool))
> +			xsk_set_tx_need_wakeup(tx->xsk_pool);
> +		tsnep_xdp_xmit_zc(tx);

would be good to signal to NAPI if we are done with the work or is there a
need to be rescheduled (when you didn't manage to consume all of the descs
from XSK Tx ring).

> +	}
> +
>  	if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
>  	    netif_tx_queue_stopped(nq)) {
>  		netif_tx_wake_queue(nq);
> @@ -765,12 +870,6 @@ static int tsnep_tx_open(struct tsnep_tx *tx)
>  
>  static void tsnep_tx_close(struct tsnep_tx *tx)
>  {
> -	u32 val;
> -
> -	readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
> -			   ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
> -			   1000000);
> -
>  	tsnep_tx_ring_cleanup(tx);
>  }
>  
> @@ -1786,12 +1885,18 @@ static void tsnep_queue_enable(struct tsnep_queue *queue)
>  	napi_enable(&queue->napi);
>  	tsnep_enable_irq(queue->adapter, queue->irq_mask);
>  
> +	if (queue->tx)
> +		tsnep_tx_enable(queue->tx);
> +
>  	if (queue->rx)
>  		tsnep_rx_enable(queue->rx);
>  }
>  
>  static void tsnep_queue_disable(struct tsnep_queue *queue)
>  {
> +	if (queue->tx)
> +		tsnep_tx_disable(queue->tx, &queue->napi);
> +
>  	napi_disable(&queue->napi);
>  	tsnep_disable_irq(queue->adapter, queue->irq_mask);
>  
> @@ -1908,6 +2013,7 @@ int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
>  	if (running)
>  		tsnep_queue_disable(queue);
>  
> +	queue->tx->xsk_pool = pool;
>  	queue->rx->xsk_pool = pool;
>  
>  	if (running) {
> @@ -1928,6 +2034,7 @@ void tsnep_disable_xsk(struct tsnep_queue *queue)
>  	tsnep_rx_free_zc(queue->rx);
>  
>  	queue->rx->xsk_pool = NULL;
> +	queue->tx->xsk_pool = NULL;
>  
>  	if (running) {
>  		tsnep_rx_reopen(queue->rx);
> -- 
> 2.30.2
> 

  reply	other threads:[~2023-04-20 20:18 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-18 19:04 [PATCH net-next v3 0/6] tsnep: XDP socket zero-copy support Gerhard Engleder
2023-04-18 19:04 ` [PATCH net-next v3 1/6] tsnep: Replace modulo operation with mask Gerhard Engleder
2023-04-20 14:23   ` Maciej Fijalkowski
2023-04-20 15:10     ` Stephen Hemminger
2023-04-20 18:51       ` Gerhard Engleder
2023-04-20 18:40     ` Gerhard Engleder
2023-04-18 19:04 ` [PATCH net-next v3 2/6] tsnep: Rework TX/RX queue initialization Gerhard Engleder
2023-04-18 19:04 ` [PATCH net-next v3 3/6] tsnep: Add functions for queue enable/disable Gerhard Engleder
2023-04-18 19:04 ` [PATCH net-next v3 4/6] tsnep: Move skb receive action to separate function Gerhard Engleder
2023-04-18 19:04 ` [PATCH net-next v3 5/6] tsnep: Add XDP socket zero-copy RX support Gerhard Engleder
2023-04-20 19:46   ` Maciej Fijalkowski
2023-04-21 18:54     ` Gerhard Engleder
2023-04-18 19:04 ` [PATCH net-next v3 6/6] tsnep: Add XDP socket zero-copy TX support Gerhard Engleder
2023-04-20 20:17   ` Maciej Fijalkowski [this message]
2023-04-21 19:02     ` Gerhard Engleder
2023-04-24 12:03       ` Maciej Fijalkowski
2023-04-24 18:40         ` Gerhard Engleder
2023-04-20 20:19 ` [PATCH net-next v3 0/6] tsnep: XDP socket zero-copy support Maciej Fijalkowski
2023-04-21 19:05   ` Gerhard Engleder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZEGd5QHTInP8WRlZ@boxer \
    --to=maciej.fijalkowski@intel.com \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gerhard@engleder-embedded.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kuba@kernel.org \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.