Re: [net-next v13 06/11] net: mtip: Add mtip_switch_{rx|tx} functions to the L2 switch driver

devicetree.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Lukasz Majewski <lukma@denx.de>
To: Paolo Abeni <pabeni@redhat.com>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>,
	davem@davemloft.net, Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Rob Herring <robh@kernel.org>,
	Krzysztof Kozlowski <krzk+dt@kernel.org>,
	Conor Dooley <conor+dt@kernel.org>,
	Shawn Guo <shawnguo@kernel.org>,
	Sascha Hauer <s.hauer@pengutronix.de>,
	Pengutronix Kernel Team <kernel@pengutronix.de>,
	Fabio Estevam <festevam@gmail.com>,
	Richard Cochran <richardcochran@gmail.com>,
	netdev@vger.kernel.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org, imx@lists.linux.dev,
	linux-arm-kernel@lists.infradead.org,
	Stefan Wahren <wahrenst@gmx.net>, Simon Horman <horms@kernel.org>
Subject: Re: [net-next v13 06/11] net: mtip: Add mtip_switch_{rx|tx} functions to the L2 switch driver
Date: Tue, 24 Jun 2025 23:54:54 +0200	[thread overview]
Message-ID: <20250624235454.1bf0b96a@wsk> (raw)
In-Reply-To: <0de412ee-c9ce-463b-92ef-58a33fd132d1@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 10947 bytes --]

Hi Paolo,

> On 6/22/25 11:37 AM, Lukasz Majewski wrote:
> > This patch provides mtip_switch_tx and mtip_switch_rx functions
> > code for MTIP L2 switch.
> > 
> > Signed-off-by: Lukasz Majewski <lukma@denx.de>
> > ---
> > Changes for v13:
> > - New patch - created by excluding some code from large (i.e. v12
> > and earlier) MTIP driver
> > ---
> >  .../net/ethernet/freescale/mtipsw/mtipl2sw.c  | 252
> > ++++++++++++++++++ 1 file changed, 252 insertions(+)
> > 
> > diff --git a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
> > b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c index
> > 813cd39d6d56..a4e38e0d773e 100644 ---
> > a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c +++
> > b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c @@ -228,6
> > +228,39 @@ struct mtip_port_info *mtip_portinfofifo_read(struct
> > switch_enet_private *fep) return info; }
> >  
> > +static void mtip_atable_get_entry_port_number(struct
> > switch_enet_private *fep,
> > +					      unsigned char
> > *mac_addr, u8 *port) +{
> > +	int block_index, block_index_end, entry;
> > +	u32 mac_addr_lo, mac_addr_hi;
> > +	u32 read_lo, read_hi;
> > +
> > +	mac_addr_lo = (u32)((mac_addr[3] << 24) | (mac_addr[2] <<
> > 16) |
> > +			    (mac_addr[1] << 8) | mac_addr[0]);
> > +	mac_addr_hi = (u32)((mac_addr[5] << 8) | (mac_addr[4]));
> > +
> > +	block_index = GET_BLOCK_PTR(crc8_calc(mac_addr));
> > +	block_index_end = block_index + ATABLE_ENTRY_PER_SLOT;
> > +
> > +	/* now search all the entries in the selected block */
> > +	for (entry = block_index; entry < block_index_end;
> > entry++) {
> > +		mtip_read_atable(fep, entry, &read_lo, &read_hi);
> > +		*port = MTIP_PORT_FORWARDING_INIT;
> > +
> > +		if (read_lo == mac_addr_lo &&
> > +		    ((read_hi & 0x0000FFFF) ==
> > +		     (mac_addr_hi & 0x0000FFFF))) {
> > +			/* found the correct address */
> > +			if ((read_hi & (1 << 16)) && (!(read_hi &
> > (1 << 17))))
> > +				*port = FIELD_GET(AT_PORT_MASK,
> > read_hi);
> > +			break;
> > +		}
> > +	}
> > +
> > +	dev_dbg(&fep->pdev->dev, "%s: MAC: %pM PORT: 0x%x\n",
> > __func__,
> > +		mac_addr, *port);
> > +}
> > +
> >  /* Clear complete MAC Look Up Table */
> >  void mtip_clear_atable(struct switch_enet_private *fep)
> >  {
> > @@ -820,10 +853,229 @@ static irqreturn_t mtip_interrupt(int irq,
> > void *ptr_fep) 
> >  static void mtip_switch_tx(struct net_device *dev)
> >  {
> > +	struct mtip_ndev_priv *priv = netdev_priv(dev);
> > +	struct switch_enet_private *fep = priv->fep;
> > +	unsigned short status;
> > +	struct sk_buff *skb;
> > +	unsigned long flags;
> > +	struct cbd_t *bdp;
> > +
> > +	spin_lock_irqsave(&fep->hw_lock, flags);  
> 
> This is called from napi (bh) context, and every other caller
> is/should be BH, too. You should use
> 
> 	spin_lock_bh()
> 

Ok.

> Also please test your patches with CONFIG_LOCKDEP and
> CONFIG_DEBUG_SPINLOCK enabled, thet will help finding this king of
> issues.

Ok. I will check with lockdep.

> 
> /P
> 
> > +	bdp = fep->dirty_tx;
> > +
> > +	while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) {
> > +		if (bdp == fep->cur_tx && fep->tx_full == 0)
> > +			break;
> > +
> > +		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
> > +				 MTIP_SWITCH_TX_FRSIZE,
> > DMA_TO_DEVICE);
> > +		bdp->cbd_bufaddr = 0;
> > +		skb = fep->tx_skbuff[fep->skb_dirty];
> > +		/* Check for errors */
> > +		if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
> > +				   BD_ENET_TX_RL | BD_ENET_TX_UN |
> > +				   BD_ENET_TX_CSL)) {
> > +			dev->stats.tx_errors++;
> > +			if (status & BD_ENET_TX_HB)  /* No
> > heartbeat */
> > +				dev->stats.tx_heartbeat_errors++;
> > +			if (status & BD_ENET_TX_LC)  /* Late
> > collision */
> > +				dev->stats.tx_window_errors++;
> > +			if (status & BD_ENET_TX_RL)  /* Retrans
> > limit */
> > +				dev->stats.tx_aborted_errors++;
> > +			if (status & BD_ENET_TX_UN)  /* Underrun */
> > +				dev->stats.tx_fifo_errors++;
> > +			if (status & BD_ENET_TX_CSL) /* Carrier
> > lost */
> > +				dev->stats.tx_carrier_errors++;
> > +		} else {
> > +			dev->stats.tx_packets++;
> > +		}
> > +
> > +		if (status & BD_ENET_TX_READY)
> > +			dev_err(&fep->pdev->dev,
> > +				"Enet xmit interrupt and
> > TX_READY.\n"); +
> > +		/* Deferred means some collisions occurred during
> > transmit,
> > +		 * but we eventually sent the packet OK.
> > +		 */
> > +		if (status & BD_ENET_TX_DEF)
> > +			dev->stats.collisions++;
> > +
> > +		/* Free the sk buffer associated with this last
> > transmit */
> > +		dev_consume_skb_irq(skb);
> > +		fep->tx_skbuff[fep->skb_dirty] = NULL;
> > +		fep->skb_dirty = (fep->skb_dirty + 1) &
> > TX_RING_MOD_MASK; +
> > +		/* Update pointer to next buffer descriptor to be
> > transmitted */
> > +		if (status & BD_ENET_TX_WRAP)
> > +			bdp = fep->tx_bd_base;
> > +		else
> > +			bdp++;
> > +
> > +		/* Since we have freed up a buffer, the ring is no
> > longer
> > +		 * full.
> > +		 */
> > +		if (fep->tx_full) {
> > +			fep->tx_full = 0;
> > +			if (netif_queue_stopped(dev))
> > +				netif_wake_queue(dev);
> > +		}
> > +	}
> > +	fep->dirty_tx = bdp;
> > +	spin_unlock_irqrestore(&fep->hw_lock, flags);
> >  }
> >  
> > +/* During a receive, the cur_rx points to the current incoming
> > buffer.
> > + * When we update through the ring, if the next incoming buffer has
> > + * not been given to the system, we just set the empty indicator,
> > + * effectively tossing the packet.
> > + */
> >  static int mtip_switch_rx(struct net_device *dev, int budget, int
> > *port) {
> > +	struct mtip_ndev_priv *priv = netdev_priv(dev);
> > +	u8 *data, rx_port = MTIP_PORT_FORWARDING_INIT;
> > +	struct switch_enet_private *fep = priv->fep;
> > +	unsigned short status, pkt_len;
> > +	struct net_device *pndev;
> > +	struct ethhdr *eth_hdr;
> > +	int pkt_received = 0;
> > +	struct sk_buff *skb;
> > +	struct cbd_t *bdp;
> > +	struct page *page;
> > +
> > +	spin_lock_bh(&fep->hw_lock);
> > +
> > +	/* First, grab all of the stats for the incoming packet.
> > +	 * These get messed up if we get called due to a busy
> > condition.
> > +	 */
> > +	bdp = fep->cur_rx;
> > +
> > +	while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
> > +		if (pkt_received >= budget)
> > +			break;
> > +
> > +		pkt_received++;
> > +		/* Since we have allocated space to hold a
> > complete frame,
> > +		 * the last indicator should be set.
> > +		 */
> > +		if ((status & BD_ENET_RX_LAST) == 0)
> > +			dev_warn_ratelimited(&dev->dev,
> > +					     "SWITCH ENET: rcv is
> > not +last\n");  
> 
> Probably you want to break the look when this condition is hit.

Ok.

> 
> > +
> > +		if (!fep->usage_count)
> > +			goto rx_processing_done;
> > +
> > +		/* Check for errors. */
> > +		if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH |
> > BD_ENET_RX_NO |
> > +			      BD_ENET_RX_CR | BD_ENET_RX_OV)) {
> > +			dev->stats.rx_errors++;
> > +			if (status & (BD_ENET_RX_LG |
> > BD_ENET_RX_SH)) {
> > +				/* Frame too long or too short. */
> > +				dev->stats.rx_length_errors++;
> > +			}
> > +			if (status & BD_ENET_RX_NO)	/*
> > Frame alignment */
> > +				dev->stats.rx_frame_errors++;
> > +			if (status & BD_ENET_RX_CR)	/* CRC
> > Error */
> > +				dev->stats.rx_crc_errors++;
> > +			if (status & BD_ENET_RX_OV)	/* FIFO
> > overrun */
> > +				dev->stats.rx_fifo_errors++;
> > +		}
> > +
> > +		/* Report late collisions as a frame error.
> > +		 * On this error, the BD is closed, but we don't
> > know what we
> > +		 * have in the buffer.  So, just drop this frame
> > on the floor.
> > +		 */
> > +		if (status & BD_ENET_RX_CL) {
> > +			dev->stats.rx_errors++;
> > +			dev->stats.rx_frame_errors++;
> > +			goto rx_processing_done;
> > +		}
> > +
> > +		/* Get correct RX page */
> > +		page = fep->page[bdp - fep->rx_bd_base];
> > +		/* Process the incoming frame */
> > +		pkt_len = bdp->cbd_datlen;
> > +		data = (__u8 *)__va(bdp->cbd_bufaddr);
> > +
> > +		dma_sync_single_for_cpu(&fep->pdev->dev,
> > bdp->cbd_bufaddr,
> > +					pkt_len, DMA_FROM_DEVICE);
> > +		prefetch(page_address(page));  
> 
> Use net_prefetch() instead
> 

Ok.

> > +
> > +		if (fep->quirks & FEC_QUIRK_SWAP_FRAME)
> > +			swap_buffer(data, pkt_len);
> > +
> > +		if (data) {
> > +			eth_hdr = (struct ethhdr *)data;
> > +			mtip_atable_get_entry_port_number(fep,
> > +
> > eth_hdr->h_source,
> > +
> > &rx_port);
> > +			if (rx_port == MTIP_PORT_FORWARDING_INIT)
> > +
> > mtip_atable_dynamicms_learn_migration(fep,
> > +
> >    fep->curr_time,
> > +
> >    eth_hdr->h_source,
> > +
> >    &rx_port);
> > +		}
> > +
> > +		if ((rx_port == 1 || rx_port == 2) &&
> > fep->ndev[rx_port - 1])
> > +			pndev = fep->ndev[rx_port - 1];
> > +		else
> > +			pndev = dev;
> > +
> > +		*port = rx_port;  
> 
> Do i read correctly that several network device use the same napi
> instance? That will break napi assumptions and will incorrectly allow
> napi merging packets coming from different devices.

Isn't that for what the L2 switch is for? :-)


To present the problem is this IP block:

1. I will put aside the "separate" mode, which is only used until the
bridge is created [*]. In this mode we do have separate uDMAs for each
ports (i.e. uDMA0 and uDMA1).

2. When offloading in HW L2 frames switching we do have single uDMA0 for
the bridge (other such devices have separate DMAs for each port - like
ti's cpsw - the offloading is only by setting one bit - i.e. this bit
enables switching without touching DMA configuration/setup).

That is why the NAPI is as single instance defined inside
struct switch_enet_private. It reflects the single uDMA0 used when
switching offloading is activated.

> 
> > +
> > +		/* This does 16 byte alignment, exactly what we
> > need.
> > +		 * The packet length includes FCS, but we don't
> > want to
> > +		 * include that when passing upstream as it messes
> > up
> > +		 * bridging applications.
> > +		 */
> > +		skb = netdev_alloc_skb(pndev, pkt_len +
> > NET_IP_ALIGN);
> > +		if (unlikely(!skb)) {
> > +			dev_dbg(&fep->pdev->dev,
> > +				"%s: Memory squeeze, dropping
> > packet.\n",
> > +				pndev->name);
> > +			page_pool_recycle_direct(fep->page_pool,
> > page);
> > +			pndev->stats.rx_dropped++;
> > +			goto err_mem;
> > +		} else {  
> 
> No need for the else statement above.
> 

Ok.

> /P
> 

Note:

[*]:

ip link add name br0 type bridge;
ip link set lan0 master br0; 
ip link set lan1 master br0; 


Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Erika Unter
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

next prev parent reply	other threads:[~2025-06-24 21:55 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-22  9:37 [net-next v13 00/11] net: mtip: Add support for MTIP imx287 L2 switch driver Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 01/11] dt-bindings: net: Add MTIP L2 switch description Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 02/11] ARM: dts: nxp: mxs: Adjust the imx28.dtsi " Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 03/11] ARM: dts: nxp: mxs: Adjust XEA board's DTS to support L2 switch Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 04/11] net: mtip: The L2 switch driver for imx287 Lukasz Majewski
2025-06-24 13:24   ` Paolo Abeni
2025-06-24 21:04     ` Lukasz Majewski
2025-06-25  7:13       ` Paolo Abeni
2025-06-26  6:28         ` Lukasz Majewski
2025-06-24 13:37   ` Paolo Abeni
2025-06-24 21:22     ` Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 05/11] net: mtip: Add net_device_ops functions to the L2 switch driver Lukasz Majewski
2025-06-24 13:42   ` Paolo Abeni
2025-06-24 21:33     ` Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 06/11] net: mtip: Add mtip_switch_{rx|tx} " Lukasz Majewski
2025-06-24 13:58   ` Paolo Abeni
2025-06-24 21:54     ` Lukasz Majewski [this message]
2025-06-30 11:34     ` Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 07/11] net: mtip: Extend the L2 switch driver with management operations Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 08/11] net: mtip: Extend the L2 switch driver for imx287 with bridge operations Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 09/11] ARM: mxs_defconfig: Enable CONFIG_NFS_FSCACHE Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 10/11] ARM: mxs_defconfig: Update mxs_defconfig to 6.16-rc1 Lukasz Majewski
2025-06-22  9:37 ` [net-next v13 11/11] ARM: mxs_defconfig: Enable CONFIG_FEC_MTIP_L2SW to support MTIP L2 switch Lukasz Majewski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250624235454.1bf0b96a@wsk \
    --to=lukma@denx.de \
    --cc=andrew+netdev@lunn.ch \
    --cc=conor+dt@kernel.org \
    --cc=davem@davemloft.net \
    --cc=devicetree@vger.kernel.org \
    --cc=edumazet@google.com \
    --cc=festevam@gmail.com \
    --cc=horms@kernel.org \
    --cc=imx@lists.linux.dev \
    --cc=kernel@pengutronix.de \
    --cc=krzk+dt@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=richardcochran@gmail.com \
    --cc=robh@kernel.org \
    --cc=s.hauer@pengutronix.de \
    --cc=shawnguo@kernel.org \
    --cc=wahrenst@gmx.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).