All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: Horatiu Vultur <horatiu.vultur@microchip.com>
Cc: <linux-kernel@vger.kernel.org>, <netdev@vger.kernel.org>,
	<UNGLinuxDriver@microchip.com>, <davem@davemloft.net>,
	<pabeni@redhat.com>, <michael@walle.cc>
Subject: Re: [PATCH net-next v3 3/4] net: lan966x: Add FDMA functionality
Date: Tue, 5 Apr 2022 21:12:30 -0700	[thread overview]
Message-ID: <20220405211230.4a1a868d@kernel.org> (raw)
In-Reply-To: <20220404130655.4004204-4-horatiu.vultur@microchip.com>

On Mon, 4 Apr 2022 15:06:54 +0200 Horatiu Vultur wrote:
> Ethernet frames can be extracted or injected to or from the device's
> DDR memory. There is one channel for injection and one channel for
> extraction. Each of these channels contain a linked list of DCBs which
> contains DB. The DCB contains only 1 DB for both the injection and
> extraction. Each DB contains a frame. Every time when a frame is received
> or transmitted an interrupt is generated.
> 
> It is not possible to use both the FDMA and the manual
> injection/extraction of the frames. Therefore the FDMA has priority over
> the manual because of better performance values.
> 
> FDMA:
> iperf -c 192.168.1.1
> [  5]   0.00-10.02  sec   420 MBytes   352 Mbits/sec    0 sender
> [  5]   0.00-10.03  sec   420 MBytes   351 Mbits/sec      receiver
> 
> iperf -c 192.168.1.1 -R
> [  5]   0.00-10.01  sec   528 MBytes   442 Mbits/sec    0 sender
> [  5]   0.00-10.00  sec   524 MBytes   440 Mbits/sec      receiver
> 
> Manual:
> iperf -c 192.168.1.1
> [  5]   0.00-10.02  sec  93.8 MBytes  78.5 Mbits/sec    0 sender
> [  5]   0.00-10.03  sec  93.8 MBytes  78.4 Mbits/sec      receiver
> 
> ipers -c 192.168.1.1 -R
> [  5]   0.00-10.03  sec   121 MBytes   101 Mbits/sec    0 sender
> [  5]   0.00-10.01  sec   118 MBytes  99.0 Mbits/sec      receiver
> 
> Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>

> +static struct sk_buff *lan966x_fdma_rx_alloc_skb(struct lan966x_rx *rx,
> +						 struct lan966x_db *db)
> +{
> +	struct lan966x *lan966x = rx->lan966x;
> +	struct sk_buff *skb;
> +	dma_addr_t dma_addr;
> +	struct page *page;
> +	void *buff_addr;
> +
> +	page = dev_alloc_pages(rx->page_order);
> +	if (unlikely(!page))
> +		return NULL;
> +
> +	dma_addr = dma_map_page(lan966x->dev, page, 0,
> +				PAGE_SIZE << rx->page_order,
> +				DMA_FROM_DEVICE);
> +	if (unlikely(dma_mapping_error(lan966x->dev, dma_addr)))
> +		goto free_page;
> +
> +	buff_addr = page_address(page);
> +	skb = build_skb(buff_addr, PAGE_SIZE << rx->page_order);

build_skb() after the packet comes in rather than upfront, that way 
the skb will be in the CPU cache already when sent up the stack.

> +	if (unlikely(!skb))
> +		goto unmap_page;
> +
> +	db->dataptr = dma_addr;
> +
> +	return skb;
> +
> +unmap_page:
> +	dma_unmap_page(lan966x->dev, dma_addr,
> +		       PAGE_SIZE << rx->page_order,
> +		       DMA_FROM_DEVICE);
> +
> +free_page:
> +	__free_pages(page, rx->page_order);
> +	return NULL;
> +}

> +static int lan966x_fdma_tx_alloc(struct lan966x_tx *tx)
> +{
> +	struct lan966x *lan966x = tx->lan966x;
> +	struct lan966x_tx_dcb *dcb;
> +	struct lan966x_db *db;
> +	int size;
> +	int i, j;
> +
> +	tx->dcbs_buf = kcalloc(FDMA_DCB_MAX, sizeof(struct lan966x_tx_dcb_buf),
> +			       GFP_ATOMIC);
> +	if (!tx->dcbs_buf)
> +		return -ENOMEM;
> +
> +	/* calculate how many pages are needed to allocate the dcbs */
> +	size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
> +	size = ALIGN(size, PAGE_SIZE);
> +	tx->dcbs = dma_alloc_coherent(lan966x->dev, size, &tx->dma, GFP_ATOMIC);

This functions seems to only be called from probe, so GFP_KERNEL 
is better.

> +	if (!tx->dcbs)
> +		goto out;
> +
> +	/* Now for each dcb allocate the db */
> +	for (i = 0; i < FDMA_DCB_MAX; ++i) {
> +		dcb = &tx->dcbs[i];
> +
> +		for (j = 0; j < FDMA_TX_DCB_MAX_DBS; ++j) {
> +			db = &dcb->db[j];
> +			db->dataptr = 0;
> +			db->status = 0;
> +		}
> +
> +		lan966x_fdma_tx_add_dcb(tx, dcb);
> +	}
> +
> +	return 0;
> +
> +out:
> +	kfree(tx->dcbs_buf);
> +	return -ENOMEM;
> +}

> +static void lan966x_fdma_wakeup_netdev(struct lan966x *lan966x)
> +{
> +	struct lan966x_port *port;
> +	int i;
> +
> +	for (i = 0; i < lan966x->num_phys_ports; ++i) {
> +		port = lan966x->ports[i];
> +		if (!port)
> +			continue;
> +
> +		if (netif_queue_stopped(port->dev))
> +			netif_wake_queue(port->dev);
> +	}
> +}
> +
> +static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
> +{
> +	struct lan966x_tx *tx = &lan966x->tx;
> +	struct lan966x_tx_dcb_buf *dcb_buf;
> +	struct lan966x_db *db;
> +	unsigned long flags;
> +	bool clear = false;
> +	int i;
> +
> +	spin_lock_irqsave(&lan966x->tx_lock, flags);
> +	for (i = 0; i < FDMA_DCB_MAX; ++i) {
> +		dcb_buf = &tx->dcbs_buf[i];
> +
> +		if (!dcb_buf->used)
> +			continue;
> +
> +		db = &tx->dcbs[i].db[0];
> +		if (!(db->status & FDMA_DCB_STATUS_DONE))
> +			continue;
> +
> +		dcb_buf->dev->stats.tx_packets++;
> +		dcb_buf->dev->stats.tx_bytes += dcb_buf->skb->len;
> +
> +		dcb_buf->used = false;
> +		dma_unmap_single(lan966x->dev,
> +				 dcb_buf->dma_addr,
> +				 dcb_buf->skb->len,
> +				 DMA_TO_DEVICE);
> +		if (!dcb_buf->ptp)
> +			dev_kfree_skb_any(dcb_buf->skb);
> +
> +		clear = true;
> +	}
> +	spin_unlock_irqrestore(&lan966x->tx_lock, flags);
> +
> +	if (clear)
> +		lan966x_fdma_wakeup_netdev(lan966x);

You may want to keep this call inside the lock so that the tx path
doesn't kick in between unlock and wake and fill up the queues.

> +}
> +
> +static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx)
> +{
> +	struct lan966x *lan966x = rx->lan966x;
> +	u64 src_port, timestamp;
> +	struct lan966x_db *db;
> +	struct sk_buff *skb;
> +
> +	/* Check if there is any data */
> +	db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
> +	if (unlikely(!(db->status & FDMA_DCB_STATUS_DONE)))
> +		return NULL;
> +
> +	/* Get the received frame and unmap it */
> +	skb = rx->skb[rx->dcb_index][rx->db_index];
> +	dma_unmap_single(lan966x->dev, (dma_addr_t)db->dataptr,
> +			 FDMA_DCB_STATUS_BLOCKL(db->status),
> +			 DMA_FROM_DEVICE);
> +
> +	skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status));
> +
> +	lan966x_ifh_get_src_port(skb->data, &src_port);
> +	lan966x_ifh_get_timestamp(skb->data, &timestamp);
> +
> +	WARN_ON(src_port >= lan966x->num_phys_ports);
> +
> +	skb->dev = lan966x->ports[src_port]->dev;
> +	skb_pull(skb, IFH_LEN * sizeof(u32));
> +
> +	if (likely(!(skb->dev->features & NETIF_F_RXFCS)))
> +		skb_trim(skb, skb->len - ETH_FCS_LEN);
> +
> +	lan966x_ptp_rxtstamp(lan966x, skb, timestamp);
> +	skb->protocol = eth_type_trans(skb, skb->dev);
> +
> +	if (lan966x->bridge_mask & BIT(src_port)) {
> +		skb->offload_fwd_mark = 1;
> +
> +		skb_reset_network_header(skb);
> +		if (!lan966x_hw_offload(lan966x, src_port, skb))
> +			skb->offload_fwd_mark = 0;
> +	}
> +
> +	skb->dev->stats.rx_bytes += skb->len;
> +	skb->dev->stats.rx_packets++;
> +
> +	return skb;
> +}
> +
> +static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
> +{
> +	struct lan966x *lan966x = container_of(napi, struct lan966x, napi);
> +	struct lan966x_rx *rx = &lan966x->rx;
> +	int dcb_reload = rx->dcb_index;
> +	struct lan966x_rx_dcb *old_dcb;
> +	struct lan966x_db *db;
> +	struct sk_buff *skb;
> +	int counter = 0;
> +	u64 nextptr;
> +
> +	lan966x_fdma_tx_clear_buf(lan966x, weight);
> +
> +	/* Get all received skb */
> +	while (counter < weight) {
> +		skb = lan966x_fdma_rx_get_frame(rx);
> +		if (!skb)
> +			break;
> +		napi_gro_receive(&lan966x->napi, skb);
> +
> +		rx->skb[rx->dcb_index][rx->db_index] = NULL;
> +
> +		rx->dcb_index++;
> +		rx->dcb_index &= FDMA_DCB_MAX - 1;
> +		counter++;
> +	}
> +
> +	/* Allocate new skbs and map them */
> +	while (dcb_reload != rx->dcb_index) {
> +		db = &rx->dcbs[dcb_reload].db[rx->db_index];
> +		skb = lan966x_fdma_rx_alloc_skb(rx, db);
> +		if (unlikely(!skb))
> +			break;
> +		rx->skb[dcb_reload][rx->db_index] = skb;
> +
> +		old_dcb = &rx->dcbs[dcb_reload];
> +		dcb_reload++;
> +		dcb_reload &= FDMA_DCB_MAX - 1;
> +
> +		nextptr = rx->dma + ((unsigned long)old_dcb -
> +				     (unsigned long)rx->dcbs);
> +		lan966x_fdma_rx_add_dcb(rx, old_dcb, nextptr);
> +		lan966x_fdma_rx_reload(rx);
> +	}
> +
> +	if (counter < weight && napi_complete_done(napi, counter))
> +		lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
> +
> +	return counter;
> +}
> +
> +irqreturn_t lan966x_fdma_irq_handler(int irq, void *args)
> +{
> +	struct lan966x *lan966x = args;
> +	u32 db, err, err_type;
> +
> +	db = lan_rd(lan966x, FDMA_INTR_DB);
> +	err = lan_rd(lan966x, FDMA_INTR_ERR);
> +
> +	if (db) {
> +		lan_wr(0, lan966x, FDMA_INTR_DB_ENA);
> +		lan_wr(db, lan966x, FDMA_INTR_DB);
> +
> +		napi_schedule(&lan966x->napi);
> +	}
> +
> +	if (err) {
> +		err_type = lan_rd(lan966x, FDMA_ERRORS);
> +
> +		WARN(1, "Unexpected error: %d, error_type: %d\n", err, err_type);
> +
> +		lan_wr(err, lan966x, FDMA_INTR_ERR);
> +		lan_wr(err_type, lan966x, FDMA_ERRORS);
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx)
> +{
> +	struct lan966x_tx_dcb_buf *dcb_buf;
> +	int i;
> +
> +	for (i = 0; i < FDMA_DCB_MAX; ++i) {
> +		dcb_buf = &tx->dcbs_buf[i];
> +		if (!dcb_buf->used && i != tx->last_in_use)
> +			return i;
> +	}
> +
> +	return -1;
> +}
> +
> +int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> +{
> +	struct lan966x_port *port = netdev_priv(dev);
> +	struct lan966x *lan966x = port->lan966x;
> +	struct lan966x_tx_dcb_buf *next_dcb_buf;
> +	struct lan966x_tx_dcb *next_dcb, *dcb;
> +	struct lan966x_tx *tx = &lan966x->tx;
> +	struct lan966x_db *next_db;
> +	int needed_headroom;
> +	int needed_tailroom;
> +	dma_addr_t dma_addr;
> +	int next_to_use;
> +	int err;
> +
> +	/* Get next index */
> +	next_to_use = lan966x_fdma_get_next_dcb(tx);
> +	if (next_to_use < 0) {
> +		netif_stop_queue(dev);
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	if (skb_put_padto(skb, ETH_ZLEN)) {
> +		dev->stats.tx_dropped++;
> +		return NETDEV_TX_OK;
> +	}
> +
> +	/* skb processing */
> +	needed_headroom = max_t(int, IFH_LEN * sizeof(u32) - skb_headroom(skb), 0);
> +	needed_tailroom = max_t(int, ETH_FCS_LEN - skb_tailroom(skb), 0);
> +	if (needed_headroom || needed_tailroom || skb_header_cloned(skb)) {
> +		err = pskb_expand_head(skb, needed_headroom, needed_tailroom,
> +				       GFP_ATOMIC);
> +		if (unlikely(err)) {
> +			dev->stats.tx_dropped++;
> +			err = NETDEV_TX_OK;
> +			goto release;
> +		}
> +	}
> +
> +	skb_tx_timestamp(skb);

This could move down after the dma mapping, so it's closer to when 
the devices gets ownership.

> +	skb_push(skb, IFH_LEN * sizeof(u32));
> +	memcpy(skb->data, ifh, IFH_LEN * sizeof(u32));
> +	skb_put(skb, 4);
> +
> +	dma_addr = dma_map_single(lan966x->dev, skb->data, skb->len,
> +				  DMA_TO_DEVICE);
> +	if (dma_mapping_error(lan966x->dev, dma_addr)) {
> +		dev->stats.tx_dropped++;
> +		err = NETDEV_TX_OK;
> +		goto release;
> +	}
> +
> +	/* Setup next dcb */
> +	next_dcb = &tx->dcbs[next_to_use];
> +	next_dcb->nextptr = FDMA_DCB_INVALID_DATA;
> +
> +	next_db = &next_dcb->db[0];
> +	next_db->dataptr = dma_addr;
> +	next_db->status = FDMA_DCB_STATUS_SOF |
> +			  FDMA_DCB_STATUS_EOF |
> +			  FDMA_DCB_STATUS_INTR |
> +			  FDMA_DCB_STATUS_BLOCKO(0) |
> +			  FDMA_DCB_STATUS_BLOCKL(skb->len);
> +
> +	/* Fill up the buffer */
> +	next_dcb_buf = &tx->dcbs_buf[next_to_use];
> +	next_dcb_buf->skb = skb;
> +	next_dcb_buf->dma_addr = dma_addr;
> +	next_dcb_buf->used = true;
> +	next_dcb_buf->ptp = false;
> +	next_dcb_buf->dev = dev;
> +
> +	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
> +	    LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
> +		next_dcb_buf->ptp = true;
> +
> +	if (likely(lan966x->tx.activated)) {
> +		/* Connect current dcb to the next db */
> +		dcb = &tx->dcbs[tx->last_in_use];
> +		dcb->nextptr = tx->dma + (next_to_use *
> +					  sizeof(struct lan966x_tx_dcb));
> +
> +		lan966x_fdma_tx_reload(tx);
> +	} else {
> +		/* Because it is first time, then just activate */
> +		lan966x->tx.activated = true;
> +		lan966x_fdma_tx_activate(tx);
> +	}
> +
> +	/* Move to next dcb because this last in use */
> +	tx->last_in_use = next_to_use;
> +
> +	return NETDEV_TX_OK;
> +
> +release:
> +	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
> +	    LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
> +		lan966x_ptp_txtstamp_release(port, skb);
> +
> +	dev_kfree_skb_any(skb);
> +	return err;
> +}
> +
> +void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev)
> +{
> +	if (lan966x->fdma_ndev)
> +		return;
> +
> +	lan966x->fdma_ndev = dev;
> +	netif_napi_add(dev, &lan966x->napi, lan966x_fdma_napi_poll,
> +		       FDMA_WEIGHT);

Just use NAPI_POLL_WEIGHT. We should just remove the last argument 
to netif_napi_add() completely but that's another story..

> +	napi_enable(&lan966x->napi);
> +}

> +
> +	if (lan966x->fdma_irq) {
> +		disable_irq(lan966x->fdma_irq);

You don't add any enable_irq() calls, maybe devm_free_irq() is a better
choice?

> +		lan966x->fdma_irq = -ENXIO;

Semantics of lan966x->fdma_irq are pretty unclear.
Why is the condition not "> 0" for this block?

> +	}
>  }
>  
>  static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
> @@ -790,12 +801,12 @@ static void lan966x_init(struct lan966x *lan966x)
>  	/* Do byte-swap and expect status after last data word
>  	 * Extraction: Mode: manual extraction) | Byte_swap
>  	 */
> -	lan_wr(QS_XTR_GRP_CFG_MODE_SET(1) |
> +	lan_wr(QS_XTR_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) |
>  	       QS_XTR_GRP_CFG_BYTE_SWAP_SET(1),
>  	       lan966x, QS_XTR_GRP_CFG(0));
>  
>  	/* Injection: Mode: manual injection | Byte_swap */
> -	lan_wr(QS_INJ_GRP_CFG_MODE_SET(1) |
> +	lan_wr(QS_INJ_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) |
>  	       QS_INJ_GRP_CFG_BYTE_SWAP_SET(1),
>  	       lan966x, QS_INJ_GRP_CFG(0));
>  
> @@ -1017,6 +1028,17 @@ static int lan966x_probe(struct platform_device *pdev)
>  		lan966x->ptp = 1;
>  	}
>  
> +	lan966x->fdma_irq = platform_get_irq_byname(pdev, "fdma");
> +	if (lan966x->fdma_irq > 0) {
> +		err = devm_request_irq(&pdev->dev, lan966x->fdma_irq,
> +				       lan966x_fdma_irq_handler, 0,
> +				       "fdma irq", lan966x);
> +		if (err)
> +			return dev_err_probe(&pdev->dev, err, "Unable to use fdma irq");
> +
> +		lan966x->fdma = true;
> +	}
> +
>  	/* init switch */
>  	lan966x_init(lan966x);
>  	lan966x_stats_init(lan966x);
> @@ -1055,8 +1077,15 @@ static int lan966x_probe(struct platform_device *pdev)
>  	if (err)
>  		goto cleanup_fdb;
>  
> +	err = lan966x_fdma_init(lan966x);

At at quick read it's unclear why this call is not conditional 
on lan988x->fdma ?

> +	if (err)
> +		goto cleanup_ptp;

  reply	other threads:[~2022-04-06 13:12 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-04 13:06 [PATCH net-next v3 0/4] net: lan966x: Add support for FDMA Horatiu Vultur
2022-04-04 13:06 ` [PATCH net-next v3 1/4] net: lan966x: Add registers that are used " Horatiu Vultur
2022-04-04 13:06 ` [PATCH net-next v3 2/4] net: lan966x: Expose functions that are needed by FDMA Horatiu Vultur
2022-04-04 13:06 ` [PATCH net-next v3 3/4] net: lan966x: Add FDMA functionality Horatiu Vultur
2022-04-06  4:12   ` Jakub Kicinski [this message]
2022-04-06 11:21     ` Horatiu Vultur
2022-04-06 17:37       ` Jakub Kicinski
2022-04-07  7:17         ` Horatiu Vultur
2022-04-04 13:06 ` [PATCH net-next v3 4/4] net: lan966x: Update FDMA to change MTU Horatiu Vultur
2022-04-04 17:20 ` [PATCH net-next v3 0/4] net: lan966x: Add support for FDMA Michael Walle

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220405211230.4a1a868d@kernel.org \
    --to=kuba@kernel.org \
    --cc=UNGLinuxDriver@microchip.com \
    --cc=davem@davemloft.net \
    --cc=horatiu.vultur@microchip.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael@walle.cc \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.