linux-sunxi.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH AUTOSEL 6.5 01/45] spi: sun6i: add quirk for dual and quad SPI modes support
@ 2023-09-08 18:12 Sasha Levin
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary Sasha Levin
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Sasha Levin @ 2023-09-08 18:12 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Maksim Kiselev, Mark Brown, Sasha Levin, wens, jernej.skrabec,
	samuel, linux-spi, linux-arm-kernel, linux-sunxi

From: Maksim Kiselev <bigunclemax@gmail.com>

[ Upstream commit 0605d9fb411f3337482976842a3901d6c125d298 ]

New Allwinner's SPI controllers can support dual and quad SPI modes.
To enable one of these modes, we should set the corresponding bit in
the SUN6I_BURST_CTL_CNT_REG register. DRM (28 bits) for dual mode and
Quad_EN (29 bits) for quad transmission.

Signed-off-by: Maksim Kiselev <bigunclemax@gmail.com>
Link: https://lore.kernel.org/r/20230624131632.2972546-2-bigunclemax@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/spi/spi-sun6i.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 30d541612253e..cec2747235abf 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -83,6 +83,9 @@
 #define SUN6I_XMIT_CNT_REG		0x34
 
 #define SUN6I_BURST_CTL_CNT_REG		0x38
+#define SUN6I_BURST_CTL_CNT_STC_MASK		GENMASK(23, 0)
+#define SUN6I_BURST_CTL_CNT_DRM			BIT(28)
+#define SUN6I_BURST_CTL_CNT_QUAD_EN		BIT(29)
 
 #define SUN6I_TXDATA_REG		0x200
 #define SUN6I_RXDATA_REG		0x300
@@ -90,6 +93,7 @@
 struct sun6i_spi_cfg {
 	unsigned long		fifo_depth;
 	bool			has_clk_ctl;
+	u32			mode_bits;
 };
 
 struct sun6i_spi {
@@ -266,7 +270,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	unsigned int div, div_cdr1, div_cdr2, timeout;
 	unsigned int start, end, tx_time;
 	unsigned int trig_level;
-	unsigned int tx_len = 0, rx_len = 0;
+	unsigned int tx_len = 0, rx_len = 0, nbits = 0;
 	bool use_dma;
 	int ret = 0;
 	u32 reg;
@@ -418,13 +422,29 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	sun6i_spi_write(sspi, SUN6I_GBL_CTL_REG, reg);
 
 	/* Setup the transfer now... */
-	if (sspi->tx_buf)
+	if (sspi->tx_buf) {
 		tx_len = tfr->len;
+		nbits = tfr->tx_nbits;
+	} else if (tfr->rx_buf) {
+		nbits = tfr->rx_nbits;
+	}
+
+	switch (nbits) {
+	case SPI_NBITS_DUAL:
+		reg = SUN6I_BURST_CTL_CNT_DRM;
+		break;
+	case SPI_NBITS_QUAD:
+		reg = SUN6I_BURST_CTL_CNT_QUAD_EN;
+		break;
+	case SPI_NBITS_SINGLE:
+	default:
+		reg = FIELD_PREP(SUN6I_BURST_CTL_CNT_STC_MASK, tx_len);
+	}
 
 	/* Setup the counters */
+	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, reg);
 	sun6i_spi_write(sspi, SUN6I_BURST_CNT_REG, tfr->len);
 	sun6i_spi_write(sspi, SUN6I_XMIT_CNT_REG, tx_len);
-	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, tx_len);
 
 	if (!use_dma) {
 		/* Fill the TX FIFO */
@@ -623,7 +643,8 @@ static int sun6i_spi_probe(struct platform_device *pdev)
 	master->set_cs = sun6i_spi_set_cs;
 	master->transfer_one = sun6i_spi_transfer_one;
 	master->num_chipselect = 4;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST |
+			    sspi->cfg->mode_bits;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary
  2023-09-08 18:12 [PATCH AUTOSEL 6.5 01/45] spi: sun6i: add quirk for dual and quad SPI modes support Sasha Levin
@ 2023-09-08 18:12 ` Sasha Levin
  2023-09-13 14:43   ` Jisheng Zhang
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 13/45] can: sun4i_can: Add acceptance register quirk Sasha Levin
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 14/45] can: sun4i_can: Add support for the Allwinner D1 Sasha Levin
  2 siblings, 1 reply; 5+ messages in thread
From: Sasha Levin @ 2023-09-08 18:12 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Jisheng Zhang, Jakub Kicinski, Sasha Levin, alexandre.torgue,
	joabreu, davem, edumazet, pabeni, mcoquelin.stm32, wens,
	jernej.skrabec, samuel, netdev, linux-stm32, linux-arm-kernel,
	linux-sunxi

From: Jisheng Zhang <jszhang@kernel.org>

[ Upstream commit 133466c3bbe171f826294161db203f7670bb30c8 ]

Currently, there are two major issues with stmmac driver statistics
First of all, statistics in stmmac_extra_stats, stmmac_rxq_stats
and stmmac_txq_stats are 32 bit variables on 32 bit platforms. This
can cause some stats to overflow after several minutes of
high traffic, for example rx_pkt_n, tx_pkt_n and so on.

Secondly, if HW supports multiqueues, there are frequent cacheline
ping pongs on some driver statistic vars, for example, normal_irq_n,
tx_pkt_n and so on. What's more, frequent cacheline ping pongs on
normal_irq_n happens in ISR, this makes the situation worse.

To improve the driver, we convert those statistics to 64 bit, implement
ndo_get_stats64 and update .get_ethtool_stats implementation
accordingly. We also use per-queue statistics where necessary to remove
the cacheline ping pongs as much as possible to make multiqueue
operations faster. Those statistics which are not possible to overflow
and not frequently updated are kept as is.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20230717160630.1892-3-jszhang@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/common.h  |  39 ++--
 .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c |  12 +-
 .../ethernet/stmicro/stmmac/dwmac100_dma.c    |   7 +-
 .../ethernet/stmicro/stmmac/dwmac4_descs.c    |  16 +-
 .../net/ethernet/stmicro/stmmac/dwmac4_lib.c  |  15 +-
 .../net/ethernet/stmicro/stmmac/dwmac_lib.c   |  12 +-
 .../ethernet/stmicro/stmmac/dwxgmac2_descs.c  |   6 +-
 .../ethernet/stmicro/stmmac/dwxgmac2_dma.c    |  14 +-
 .../net/ethernet/stmicro/stmmac/enh_desc.c    |  20 +-
 drivers/net/ethernet/stmicro/stmmac/hwif.h    |  12 +-
 .../net/ethernet/stmicro/stmmac/norm_desc.c   |  15 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |   2 +
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  | 123 ++++++++---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 200 ++++++++++++++----
 14 files changed, 335 insertions(+), 158 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 16e67c18b6f71..57f2137bbe9d9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -59,13 +59,25 @@
 /* #define FRAME_FILTER_DEBUG */
 
 struct stmmac_txq_stats {
-	unsigned long tx_pkt_n;
-	unsigned long tx_normal_irq_n;
+	u64 tx_bytes;
+	u64 tx_packets;
+	u64 tx_pkt_n;
+	u64 tx_normal_irq_n;
+	u64 napi_poll;
+	u64 tx_clean;
+	u64 tx_set_ic_bit;
+	u64 tx_tso_frames;
+	u64 tx_tso_nfrags;
+	struct u64_stats_sync syncp;
 };
 
 struct stmmac_rxq_stats {
-	unsigned long rx_pkt_n;
-	unsigned long rx_normal_irq_n;
+	u64 rx_bytes;
+	u64 rx_packets;
+	u64 rx_pkt_n;
+	u64 rx_normal_irq_n;
+	u64 napi_poll;
+	struct u64_stats_sync syncp;
 };
 
 /* Extra statistic and debug information exposed by ethtool */
@@ -81,6 +93,7 @@ struct stmmac_extra_stats {
 	unsigned long tx_frame_flushed;
 	unsigned long tx_payload_error;
 	unsigned long tx_ip_header_error;
+	unsigned long tx_collision;
 	/* Receive errors */
 	unsigned long rx_desc;
 	unsigned long sa_filter_fail;
@@ -113,14 +126,6 @@ struct stmmac_extra_stats {
 	/* Tx/Rx IRQ Events */
 	unsigned long rx_early_irq;
 	unsigned long threshold;
-	unsigned long tx_pkt_n;
-	unsigned long rx_pkt_n;
-	unsigned long normal_irq_n;
-	unsigned long rx_normal_irq_n;
-	unsigned long napi_poll;
-	unsigned long tx_normal_irq_n;
-	unsigned long tx_clean;
-	unsigned long tx_set_ic_bit;
 	unsigned long irq_receive_pmt_irq_n;
 	/* MMC info */
 	unsigned long mmc_tx_irq_n;
@@ -190,18 +195,16 @@ struct stmmac_extra_stats {
 	unsigned long mtl_rx_fifo_ctrl_active;
 	unsigned long mac_rx_frame_ctrl_fifo;
 	unsigned long mac_gmii_rx_proto_engine;
-	/* TSO */
-	unsigned long tx_tso_frames;
-	unsigned long tx_tso_nfrags;
 	/* EST */
 	unsigned long mtl_est_cgce;
 	unsigned long mtl_est_hlbs;
 	unsigned long mtl_est_hlbf;
 	unsigned long mtl_est_btre;
 	unsigned long mtl_est_btrlm;
-	/* per queue statistics */
-	struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
-	struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
+	unsigned long rx_dropped;
+	unsigned long rx_errors;
+	unsigned long tx_dropped;
+	unsigned long tx_errors;
 };
 
 /* Safety Feature statistics exposed by ethtool */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 1e714380d1250..b20f8ba34efd9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -440,8 +440,10 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
 				     struct stmmac_extra_stats *x, u32 chan,
 				     u32 dir)
 {
-	u32 v;
+	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
 	int ret = 0;
+	u32 v;
 
 	v = readl(ioaddr + EMAC_INT_STA);
 
@@ -452,7 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
 
 	if (v & EMAC_TX_INT) {
 		ret |= handle_tx;
-		x->tx_normal_irq_n++;
+		u64_stats_update_begin(&tx_q->txq_stats.syncp);
+		tx_q->txq_stats.tx_normal_irq_n++;
+		u64_stats_update_end(&tx_q->txq_stats.syncp);
 	}
 
 	if (v & EMAC_TX_DMA_STOP_INT)
@@ -474,7 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
 
 	if (v & EMAC_RX_INT) {
 		ret |= handle_rx;
-		x->rx_normal_irq_n++;
+		u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+		rx_q->rxq_stats.rx_normal_irq_n++;
+		u64_stats_update_end(&rx_q->rxq_stats.syncp);
 	}
 
 	if (v & EMAC_RX_BUF_UA_INT)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index 1c32b1788f02e..dea270f60cc3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -82,29 +82,24 @@ static void dwmac100_dump_dma_regs(struct stmmac_priv *priv,
 }
 
 /* DMA controller has two counters to track the number of the missed frames. */
-static void dwmac100_dma_diagnostic_fr(struct net_device_stats *stats,
-				       struct stmmac_extra_stats *x,
+static void dwmac100_dma_diagnostic_fr(struct stmmac_extra_stats *x,
 				       void __iomem *ioaddr)
 {
 	u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR);
 
 	if (unlikely(csr8)) {
 		if (csr8 & DMA_MISSED_FRAME_OVE) {
-			stats->rx_over_errors += 0x800;
 			x->rx_overflow_cntr += 0x800;
 		} else {
 			unsigned int ove_cntr;
 			ove_cntr = ((csr8 & DMA_MISSED_FRAME_OVE_CNTR) >> 17);
-			stats->rx_over_errors += ove_cntr;
 			x->rx_overflow_cntr += ove_cntr;
 		}
 
 		if (csr8 & DMA_MISSED_FRAME_OVE_M) {
-			stats->rx_missed_errors += 0xffff;
 			x->rx_missed_cntr += 0xffff;
 		} else {
 			unsigned int miss_f = (csr8 & DMA_MISSED_FRAME_M_CNTR);
-			stats->rx_missed_errors += miss_f;
 			x->rx_missed_cntr += miss_f;
 		}
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 6a011d8633e8e..89a14084c6117 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -13,8 +13,7 @@
 #include "dwmac4.h"
 #include "dwmac4_descs.h"
 
-static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
-				       struct stmmac_extra_stats *x,
+static int dwmac4_wrback_get_tx_status(struct stmmac_extra_stats *x,
 				       struct dma_desc *p,
 				       void __iomem *ioaddr)
 {
@@ -40,15 +39,13 @@ static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
 			x->tx_frame_flushed++;
 		if (unlikely(tdes3 & TDES3_LOSS_CARRIER)) {
 			x->tx_losscarrier++;
-			stats->tx_carrier_errors++;
 		}
 		if (unlikely(tdes3 & TDES3_NO_CARRIER)) {
 			x->tx_carrier++;
-			stats->tx_carrier_errors++;
 		}
 		if (unlikely((tdes3 & TDES3_LATE_COLLISION) ||
 			     (tdes3 & TDES3_EXCESSIVE_COLLISION)))
-			stats->collisions +=
+			x->tx_collision +=
 			    (tdes3 & TDES3_COLLISION_COUNT_MASK)
 			    >> TDES3_COLLISION_COUNT_SHIFT;
 
@@ -73,8 +70,7 @@ static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
 	return ret;
 }
 
-static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
-				       struct stmmac_extra_stats *x,
+static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x,
 				       struct dma_desc *p)
 {
 	unsigned int rdes1 = le32_to_cpu(p->des1);
@@ -93,7 +89,7 @@ static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
 
 	if (unlikely(rdes3 & RDES3_ERROR_SUMMARY)) {
 		if (unlikely(rdes3 & RDES3_GIANT_PACKET))
-			stats->rx_length_errors++;
+			x->rx_length++;
 		if (unlikely(rdes3 & RDES3_OVERFLOW_ERROR))
 			x->rx_gmac_overflow++;
 
@@ -103,10 +99,8 @@ static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
 		if (unlikely(rdes3 & RDES3_RECEIVE_ERROR))
 			x->rx_mii++;
 
-		if (unlikely(rdes3 & RDES3_CRC_ERROR)) {
+		if (unlikely(rdes3 & RDES3_CRC_ERROR))
 			x->rx_crc_errors++;
-			stats->rx_crc_errors++;
-		}
 
 		if (unlikely(rdes3 & RDES3_DRIBBLE_ERROR))
 			x->dribbling_bit++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 03ceb6a940732..980e5f8a37ec5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -171,6 +171,8 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 	const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
 	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
 	u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
+	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
 	int ret = 0;
 
 	if (dir == DMA_DIR_RX)
@@ -198,18 +200,19 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 		}
 	}
 	/* TX/RX NORMAL interrupts */
-	if (likely(intr_status & DMA_CHAN_STATUS_NIS))
-		x->normal_irq_n++;
 	if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
-		x->rx_normal_irq_n++;
-		x->rxq_stats[chan].rx_normal_irq_n++;
+		u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+		rx_q->rxq_stats.rx_normal_irq_n++;
+		u64_stats_update_end(&rx_q->rxq_stats.syncp);
 		ret |= handle_rx;
 	}
 	if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
-		x->tx_normal_irq_n++;
-		x->txq_stats[chan].tx_normal_irq_n++;
+		u64_stats_update_begin(&tx_q->txq_stats.syncp);
+		tx_q->txq_stats.tx_normal_irq_n++;
+		u64_stats_update_end(&tx_q->txq_stats.syncp);
 		ret |= handle_tx;
 	}
+
 	if (unlikely(intr_status & DMA_CHAN_STATUS_TBU))
 		ret |= handle_tx;
 	if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 0b6f999a83052..aaa09b16b016f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -10,6 +10,7 @@
 #include <linux/iopoll.h>
 #include "common.h"
 #include "dwmac_dma.h"
+#include "stmmac.h"
 
 #define GMAC_HI_REG_AE		0x80000000
 
@@ -161,6 +162,8 @@ static void show_rx_process_state(unsigned int status)
 int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 			struct stmmac_extra_stats *x, u32 chan, u32 dir)
 {
+	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
 	int ret = 0;
 	/* read the status register (CSR5) */
 	u32 intr_status = readl(ioaddr + DMA_STATUS);
@@ -208,17 +211,20 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 	}
 	/* TX/RX NORMAL interrupts */
 	if (likely(intr_status & DMA_STATUS_NIS)) {
-		x->normal_irq_n++;
 		if (likely(intr_status & DMA_STATUS_RI)) {
 			u32 value = readl(ioaddr + DMA_INTR_ENA);
 			/* to schedule NAPI on real RIE event. */
 			if (likely(value & DMA_INTR_ENA_RIE)) {
-				x->rx_normal_irq_n++;
+				u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+				rx_q->rxq_stats.rx_normal_irq_n++;
+				u64_stats_update_end(&rx_q->rxq_stats.syncp);
 				ret |= handle_rx;
 			}
 		}
 		if (likely(intr_status & DMA_STATUS_TI)) {
-			x->tx_normal_irq_n++;
+			u64_stats_update_begin(&tx_q->txq_stats.syncp);
+			tx_q->txq_stats.tx_normal_irq_n++;
+			u64_stats_update_end(&tx_q->txq_stats.syncp);
 			ret |= handle_tx;
 		}
 		if (unlikely(intr_status & DMA_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 13c347ee8be9c..fc82862a612c7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -8,8 +8,7 @@
 #include "common.h"
 #include "dwxgmac2.h"
 
-static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
-				  struct stmmac_extra_stats *x,
+static int dwxgmac2_get_tx_status(struct stmmac_extra_stats *x,
 				  struct dma_desc *p, void __iomem *ioaddr)
 {
 	unsigned int tdes3 = le32_to_cpu(p->des3);
@@ -23,8 +22,7 @@ static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
 	return ret;
 }
 
-static int dwxgmac2_get_rx_status(struct net_device_stats *stats,
-				  struct stmmac_extra_stats *x,
+static int dwxgmac2_get_rx_status(struct stmmac_extra_stats *x,
 				  struct dma_desc *p)
 {
 	unsigned int rdes3 = le32_to_cpu(p->des3);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 070bd912580b7..3b5f8c595219b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -337,6 +337,8 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
 				  struct stmmac_extra_stats *x, u32 chan,
 				  u32 dir)
 {
+	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
 	u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
 	u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
 	int ret = 0;
@@ -364,16 +366,16 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
 
 	/* TX/RX NORMAL interrupts */
 	if (likely(intr_status & XGMAC_NIS)) {
-		x->normal_irq_n++;
-
 		if (likely(intr_status & XGMAC_RI)) {
-			x->rx_normal_irq_n++;
-			x->rxq_stats[chan].rx_normal_irq_n++;
+			u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+			rx_q->rxq_stats.rx_normal_irq_n++;
+			u64_stats_update_end(&rx_q->rxq_stats.syncp);
 			ret |= handle_rx;
 		}
 		if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
-			x->tx_normal_irq_n++;
-			x->txq_stats[chan].tx_normal_irq_n++;
+			u64_stats_update_begin(&tx_q->txq_stats.syncp);
+			tx_q->txq_stats.tx_normal_irq_n++;
+			u64_stats_update_end(&tx_q->txq_stats.syncp);
 			ret |= handle_tx;
 		}
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index a91d8f13a931d..937b7a0466fca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -12,8 +12,7 @@
 #include "common.h"
 #include "descs_com.h"
 
-static int enh_desc_get_tx_status(struct net_device_stats *stats,
-				  struct stmmac_extra_stats *x,
+static int enh_desc_get_tx_status(struct stmmac_extra_stats *x,
 				  struct dma_desc *p, void __iomem *ioaddr)
 {
 	unsigned int tdes0 = le32_to_cpu(p->des0);
@@ -38,15 +37,13 @@ static int enh_desc_get_tx_status(struct net_device_stats *stats,
 
 		if (unlikely(tdes0 & ETDES0_LOSS_CARRIER)) {
 			x->tx_losscarrier++;
-			stats->tx_carrier_errors++;
 		}
 		if (unlikely(tdes0 & ETDES0_NO_CARRIER)) {
 			x->tx_carrier++;
-			stats->tx_carrier_errors++;
 		}
 		if (unlikely((tdes0 & ETDES0_LATE_COLLISION) ||
 			     (tdes0 & ETDES0_EXCESSIVE_COLLISIONS)))
-			stats->collisions +=
+			x->tx_collision +=
 				(tdes0 & ETDES0_COLLISION_COUNT_MASK) >> 3;
 
 		if (unlikely(tdes0 & ETDES0_EXCESSIVE_DEFERRAL))
@@ -117,8 +114,7 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
 	return ret;
 }
 
-static void enh_desc_get_ext_status(struct net_device_stats *stats,
-				    struct stmmac_extra_stats *x,
+static void enh_desc_get_ext_status(struct stmmac_extra_stats *x,
 				    struct dma_extended_desc *p)
 {
 	unsigned int rdes0 = le32_to_cpu(p->basic.des0);
@@ -182,8 +178,7 @@ static void enh_desc_get_ext_status(struct net_device_stats *stats,
 	}
 }
 
-static int enh_desc_get_rx_status(struct net_device_stats *stats,
-				  struct stmmac_extra_stats *x,
+static int enh_desc_get_rx_status(struct stmmac_extra_stats *x,
 				  struct dma_desc *p)
 {
 	unsigned int rdes0 = le32_to_cpu(p->des0);
@@ -193,14 +188,14 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
 		return dma_own;
 
 	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
-		stats->rx_length_errors++;
+		x->rx_length++;
 		return discard_frame;
 	}
 
 	if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
 		if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) {
 			x->rx_desc++;
-			stats->rx_length_errors++;
+			x->rx_length++;
 		}
 		if (unlikely(rdes0 & RDES0_OVERFLOW_ERROR))
 			x->rx_gmac_overflow++;
@@ -209,7 +204,7 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
 			pr_err("\tIPC Csum Error/Giant frame\n");
 
 		if (unlikely(rdes0 & RDES0_COLLISION))
-			stats->collisions++;
+			x->rx_collision++;
 		if (unlikely(rdes0 & RDES0_RECEIVE_WATCHDOG))
 			x->rx_watchdog++;
 
@@ -218,7 +213,6 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
 
 		if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
 			x->rx_crc_errors++;
-			stats->rx_crc_errors++;
 		}
 		ret = discard_frame;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 6ee7cf07cfd76..652af8f6e75ff 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -57,8 +57,7 @@ struct stmmac_desc_ops {
 	/* Last tx segment reports the transmit status */
 	int (*get_tx_ls)(struct dma_desc *p);
 	/* Return the transmit status looking at the TDES1 */
-	int (*tx_status)(struct net_device_stats *stats,
-			 struct stmmac_extra_stats *x,
+	int (*tx_status)(struct stmmac_extra_stats *x,
 			 struct dma_desc *p, void __iomem *ioaddr);
 	/* Get the buffer size from the descriptor */
 	int (*get_tx_len)(struct dma_desc *p);
@@ -67,11 +66,9 @@ struct stmmac_desc_ops {
 	/* Get the receive frame size */
 	int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
 	/* Return the reception status looking at the RDES1 */
-	int (*rx_status)(struct net_device_stats *stats,
-			 struct stmmac_extra_stats *x,
+	int (*rx_status)(struct stmmac_extra_stats *x,
 			 struct dma_desc *p);
-	void (*rx_extended_status)(struct net_device_stats *stats,
-				   struct stmmac_extra_stats *x,
+	void (*rx_extended_status)(struct stmmac_extra_stats *x,
 				   struct dma_extended_desc *p);
 	/* Set tx timestamp enable bit */
 	void (*enable_tx_timestamp) (struct dma_desc *p);
@@ -191,8 +188,7 @@ struct stmmac_dma_ops {
 	void (*dma_tx_mode)(struct stmmac_priv *priv, void __iomem *ioaddr,
 			    int mode, u32 channel, int fifosz, u8 qmode);
 	/* To track extra statistic (if supported) */
-	void (*dma_diagnostic_fr)(struct net_device_stats *stats,
-				  struct stmmac_extra_stats *x,
+	void (*dma_diagnostic_fr)(struct stmmac_extra_stats *x,
 				  void __iomem *ioaddr);
 	void (*enable_dma_transmission) (void __iomem *ioaddr);
 	void (*enable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 350e6670a5766..68a7cfcb1d8f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -12,8 +12,7 @@
 #include "common.h"
 #include "descs_com.h"
 
-static int ndesc_get_tx_status(struct net_device_stats *stats,
-			       struct stmmac_extra_stats *x,
+static int ndesc_get_tx_status(struct stmmac_extra_stats *x,
 			       struct dma_desc *p, void __iomem *ioaddr)
 {
 	unsigned int tdes0 = le32_to_cpu(p->des0);
@@ -31,15 +30,12 @@ static int ndesc_get_tx_status(struct net_device_stats *stats,
 	if (unlikely(tdes0 & TDES0_ERROR_SUMMARY)) {
 		if (unlikely(tdes0 & TDES0_UNDERFLOW_ERROR)) {
 			x->tx_underflow++;
-			stats->tx_fifo_errors++;
 		}
 		if (unlikely(tdes0 & TDES0_NO_CARRIER)) {
 			x->tx_carrier++;
-			stats->tx_carrier_errors++;
 		}
 		if (unlikely(tdes0 & TDES0_LOSS_CARRIER)) {
 			x->tx_losscarrier++;
-			stats->tx_carrier_errors++;
 		}
 		if (unlikely((tdes0 & TDES0_EXCESSIVE_DEFERRAL) ||
 			     (tdes0 & TDES0_EXCESSIVE_COLLISIONS) ||
@@ -47,7 +43,7 @@ static int ndesc_get_tx_status(struct net_device_stats *stats,
 			unsigned int collisions;
 
 			collisions = (tdes0 & TDES0_COLLISION_COUNT_MASK) >> 3;
-			stats->collisions += collisions;
+			x->tx_collision += collisions;
 		}
 		ret = tx_err;
 	}
@@ -70,8 +66,7 @@ static int ndesc_get_tx_len(struct dma_desc *p)
  * and, if required, updates the multicast statistics.
  * In case of success, it returns good_frame because the GMAC device
  * is supposed to be able to compute the csum in HW. */
-static int ndesc_get_rx_status(struct net_device_stats *stats,
-			       struct stmmac_extra_stats *x,
+static int ndesc_get_rx_status(struct stmmac_extra_stats *x,
 			       struct dma_desc *p)
 {
 	int ret = good_frame;
@@ -81,7 +76,7 @@ static int ndesc_get_rx_status(struct net_device_stats *stats,
 		return dma_own;
 
 	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
-		stats->rx_length_errors++;
+		x->rx_length++;
 		return discard_frame;
 	}
 
@@ -96,11 +91,9 @@ static int ndesc_get_rx_status(struct net_device_stats *stats,
 			x->ipc_csum_error++;
 		if (unlikely(rdes0 & RDES0_COLLISION)) {
 			x->rx_collision++;
-			stats->collisions++;
 		}
 		if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
 			x->rx_crc_errors++;
-			stats->rx_crc_errors++;
 		}
 		ret = discard_frame;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 07ea5ab0a60ba..4ce5eaaae5135 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -77,6 +77,7 @@ struct stmmac_tx_queue {
 	dma_addr_t dma_tx_phy;
 	dma_addr_t tx_tail_addr;
 	u32 mss;
+	struct stmmac_txq_stats txq_stats;
 };
 
 struct stmmac_rx_buffer {
@@ -121,6 +122,7 @@ struct stmmac_rx_queue {
 		unsigned int len;
 		unsigned int error;
 	} state;
+	struct stmmac_rxq_stats rxq_stats;
 };
 
 struct stmmac_channel {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2ae73ab842d45..b7ac7abecdd35 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -89,14 +89,6 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	/* Tx/Rx IRQ Events */
 	STMMAC_STAT(rx_early_irq),
 	STMMAC_STAT(threshold),
-	STMMAC_STAT(tx_pkt_n),
-	STMMAC_STAT(rx_pkt_n),
-	STMMAC_STAT(normal_irq_n),
-	STMMAC_STAT(rx_normal_irq_n),
-	STMMAC_STAT(napi_poll),
-	STMMAC_STAT(tx_normal_irq_n),
-	STMMAC_STAT(tx_clean),
-	STMMAC_STAT(tx_set_ic_bit),
 	STMMAC_STAT(irq_receive_pmt_irq_n),
 	/* MMC info */
 	STMMAC_STAT(mmc_tx_irq_n),
@@ -163,9 +155,6 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(mtl_rx_fifo_ctrl_active),
 	STMMAC_STAT(mac_rx_frame_ctrl_fifo),
 	STMMAC_STAT(mac_gmii_rx_proto_engine),
-	/* TSO */
-	STMMAC_STAT(tx_tso_frames),
-	STMMAC_STAT(tx_tso_nfrags),
 	/* EST */
 	STMMAC_STAT(mtl_est_cgce),
 	STMMAC_STAT(mtl_est_hlbs),
@@ -175,6 +164,23 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 };
 #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
 
+/* statistics collected in queue which will be summed up for all TX or RX
+ * queues, or summed up for both TX and RX queues(napi_poll, normal_irq_n).
+ */
+static const char stmmac_qstats_string[][ETH_GSTRING_LEN] = {
+	"rx_pkt_n",
+	"rx_normal_irq_n",
+	"tx_pkt_n",
+	"tx_normal_irq_n",
+	"tx_clean",
+	"tx_set_ic_bit",
+	"tx_tso_frames",
+	"tx_tso_nfrags",
+	"normal_irq_n",
+	"napi_poll",
+};
+#define STMMAC_QSTATS ARRAY_SIZE(stmmac_qstats_string)
+
 /* HW MAC Management counters (if supported) */
 #define STMMAC_MMC_STAT(m)	\
 	{ #m, sizeof_field(struct stmmac_counters, m),	\
@@ -535,23 +541,44 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
 {
 	u32 tx_cnt = priv->plat->tx_queues_to_use;
 	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	unsigned int start;
 	int q, stat;
+	u64 *pos;
 	char *p;
 
+	pos = data;
 	for (q = 0; q < tx_cnt; q++) {
-		p = (char *)priv + offsetof(struct stmmac_priv,
-					    xstats.txq_stats[q].tx_pkt_n);
+		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[q];
+		struct stmmac_txq_stats snapshot;
+
+		data = pos;
+		do {
+			start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
+			snapshot = tx_q->txq_stats;
+		} while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
+
+		p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
 		for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
-			*data++ = (*(unsigned long *)p);
-			p += sizeof(unsigned long);
+			*data++ += (*(u64 *)p);
+			p += sizeof(u64);
 		}
 	}
+
+	pos = data;
 	for (q = 0; q < rx_cnt; q++) {
-		p = (char *)priv + offsetof(struct stmmac_priv,
-					    xstats.rxq_stats[q].rx_pkt_n);
+		struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[q];
+		struct stmmac_rxq_stats snapshot;
+
+		data = pos;
+		do {
+			start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
+			snapshot = rx_q->rxq_stats;
+		} while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
+
+		p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
 		for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
-			*data++ = (*(unsigned long *)p);
-			p += sizeof(unsigned long);
+			*data++ += (*(u64 *)p);
+			p += sizeof(u64);
 		}
 	}
 }
@@ -562,8 +589,10 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_queues_count = priv->plat->rx_queues_to_use;
 	u32 tx_queues_count = priv->plat->tx_queues_to_use;
+	u64 napi_poll = 0, normal_irq_n = 0;
+	int i, j = 0, pos, ret;
 	unsigned long count;
-	int i, j = 0, ret;
+	unsigned int start;
 
 	if (priv->dma_cap.asp) {
 		for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
@@ -574,8 +603,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 	}
 
 	/* Update the DMA HW counters for dwmac10/100 */
-	ret = stmmac_dma_diagnostic_fr(priv, &dev->stats, (void *) &priv->xstats,
-			priv->ioaddr);
+	ret = stmmac_dma_diagnostic_fr(priv, &priv->xstats, priv->ioaddr);
 	if (ret) {
 		/* If supported, for new GMAC chips expose the MMC counters */
 		if (priv->dma_cap.rmon) {
@@ -606,6 +634,48 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 		data[j++] = (stmmac_gstrings_stats[i].sizeof_stat ==
 			     sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p);
 	}
+
+	pos = j;
+	for (i = 0; i < rx_queues_count; i++) {
+		struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[i];
+		struct stmmac_rxq_stats snapshot;
+
+		j = pos;
+		do {
+			start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
+			snapshot = rx_q->rxq_stats;
+		} while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
+
+		data[j++] += snapshot.rx_pkt_n;
+		data[j++] += snapshot.rx_normal_irq_n;
+		normal_irq_n += snapshot.rx_normal_irq_n;
+		napi_poll += snapshot.napi_poll;
+	}
+
+	pos = j;
+	for (i = 0; i < tx_queues_count; i++) {
+		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[i];
+		struct stmmac_txq_stats snapshot;
+
+		j = pos;
+		do {
+			start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
+			snapshot = tx_q->txq_stats;
+		} while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
+
+		data[j++] += snapshot.tx_pkt_n;
+		data[j++] += snapshot.tx_normal_irq_n;
+		normal_irq_n += snapshot.tx_normal_irq_n;
+		data[j++] += snapshot.tx_clean;
+		data[j++] += snapshot.tx_set_ic_bit;
+		data[j++] += snapshot.tx_tso_frames;
+		data[j++] += snapshot.tx_tso_nfrags;
+		napi_poll += snapshot.napi_poll;
+	}
+	normal_irq_n += priv->xstats.rx_early_irq;
+	data[j++] = normal_irq_n;
+	data[j++] = napi_poll;
+
 	stmmac_get_per_qstats(priv, &data[j]);
 }
 
@@ -618,7 +688,7 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		len = STMMAC_STATS_LEN +
+		len = STMMAC_STATS_LEN + STMMAC_QSTATS +
 		      STMMAC_TXQ_STATS * tx_cnt +
 		      STMMAC_RXQ_STATS * rx_cnt;
 
@@ -691,8 +761,11 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 				p += ETH_GSTRING_LEN;
 			}
 		for (i = 0; i < STMMAC_STATS_LEN; i++) {
-			memcpy(p, stmmac_gstrings_stats[i].stat_string,
-				ETH_GSTRING_LEN);
+			memcpy(p, stmmac_gstrings_stats[i].stat_string, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < STMMAC_QSTATS; i++) {
+			memcpy(p, stmmac_qstats_string[i], ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
 		stmmac_get_qstats_string(priv, p);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4727f7be4f86e..78d4143f5dea3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2432,6 +2432,8 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
 	struct dma_desc *tx_desc = NULL;
 	struct xdp_desc xdp_desc;
 	bool work_done = true;
+	u32 tx_set_ic_bit = 0;
+	unsigned long flags;
 
 	/* Avoids TX time-out as we are sharing with slow path */
 	txq_trans_cond_update(nq);
@@ -2492,7 +2494,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
 		if (set_ic) {
 			tx_q->tx_count_frames = 0;
 			stmmac_set_tx_ic(priv, tx_desc);
-			priv->xstats.tx_set_ic_bit++;
+			tx_set_ic_bit++;
 		}
 
 		stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len,
@@ -2504,6 +2506,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
 		entry = tx_q->cur_tx;
 	}
+	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+	tx_q->txq_stats.tx_set_ic_bit += tx_set_ic_bit;
+	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
 
 	if (tx_desc) {
 		stmmac_flush_tx_descriptors(priv, queue);
@@ -2545,11 +2550,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 	unsigned int entry, xmits = 0, count = 0;
+	u32 tx_packets = 0, tx_errors = 0;
+	unsigned long flags;
 
 	__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
 
-	priv->xstats.tx_clean++;
-
 	tx_q->xsk_frames_done = 0;
 
 	entry = tx_q->dirty_tx;
@@ -2580,8 +2585,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 		else
 			p = tx_q->dma_tx + entry;
 
-		status = stmmac_tx_status(priv, &priv->dev->stats,
-				&priv->xstats, p, priv->ioaddr);
+		status = stmmac_tx_status(priv,	&priv->xstats, p, priv->ioaddr);
 		/* Check if the descriptor is owned by the DMA */
 		if (unlikely(status & tx_dma_own))
 			break;
@@ -2597,13 +2601,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 		if (likely(!(status & tx_not_ls))) {
 			/* ... verify the status error condition */
 			if (unlikely(status & tx_err)) {
-				priv->dev->stats.tx_errors++;
+				tx_errors++;
 				if (unlikely(status & tx_err_bump_tc))
 					stmmac_bump_dma_threshold(priv, queue);
 			} else {
-				priv->dev->stats.tx_packets++;
-				priv->xstats.tx_pkt_n++;
-				priv->xstats.txq_stats[queue].tx_pkt_n++;
+				tx_packets++;
 			}
 			if (skb)
 				stmmac_get_tx_hwtstamp(priv, p, skb);
@@ -2707,6 +2709,14 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 			      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
 			      HRTIMER_MODE_REL);
 
+	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+	tx_q->txq_stats.tx_packets += tx_packets;
+	tx_q->txq_stats.tx_pkt_n += tx_packets;
+	tx_q->txq_stats.tx_clean++;
+	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+
+	priv->xstats.tx_errors += tx_errors;
+
 	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
 
 	/* Combine decisions from TX clean and XSK TX */
@@ -2734,7 +2744,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 			    tx_q->dma_tx_phy, chan);
 	stmmac_start_tx_dma(priv, chan);
 
-	priv->dev->stats.tx_errors++;
+	priv->xstats.tx_errors++;
 	netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, chan));
 }
 
@@ -4110,6 +4120,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct stmmac_tx_queue *tx_q;
 	bool has_vlan, set_ic;
 	u8 proto_hdr_len, hdr;
+	unsigned long flags;
 	u32 pay_len, mss;
 	dma_addr_t des;
 	int i;
@@ -4258,7 +4269,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		tx_q->tx_count_frames = 0;
 		stmmac_set_tx_ic(priv, desc);
-		priv->xstats.tx_set_ic_bit++;
 	}
 
 	/* We've used all descriptors we need for this skb, however,
@@ -4274,9 +4284,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 
-	dev->stats.tx_bytes += skb->len;
-	priv->xstats.tx_tso_frames++;
-	priv->xstats.tx_tso_nfrags += nfrags;
+	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+	tx_q->txq_stats.tx_bytes += skb->len;
+	tx_q->txq_stats.tx_tso_frames++;
+	tx_q->txq_stats.tx_tso_nfrags += nfrags;
+	if (set_ic)
+		tx_q->txq_stats.tx_set_ic_bit++;
+	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
 
 	if (priv->sarc_type)
 		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4326,7 +4340,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 dma_map_err:
 	dev_err(priv->device, "Tx dma map failed\n");
 	dev_kfree_skb(skb);
-	priv->dev->stats.tx_dropped++;
+	priv->xstats.tx_dropped++;
 	return NETDEV_TX_OK;
 }
 
@@ -4352,6 +4366,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct stmmac_tx_queue *tx_q;
 	bool has_vlan, set_ic;
 	int entry, first_tx;
+	unsigned long flags;
 	dma_addr_t des;
 
 	tx_q = &priv->dma_conf.tx_queue[queue];
@@ -4480,7 +4495,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		tx_q->tx_count_frames = 0;
 		stmmac_set_tx_ic(priv, desc);
-		priv->xstats.tx_set_ic_bit++;
 	}
 
 	/* We've used all descriptors we need for this skb, however,
@@ -4507,7 +4521,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 
-	dev->stats.tx_bytes += skb->len;
+	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+	tx_q->txq_stats.tx_bytes += skb->len;
+	if (set_ic)
+		tx_q->txq_stats.tx_set_ic_bit++;
+	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
 
 	if (priv->sarc_type)
 		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4569,7 +4587,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 dma_map_err:
 	netdev_err(priv->dev, "Tx DMA map failed\n");
 	dev_kfree_skb(skb);
-	priv->dev->stats.tx_dropped++;
+	priv->xstats.tx_dropped++;
 	return NETDEV_TX_OK;
 }
 
@@ -4770,9 +4788,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
 		set_ic = false;
 
 	if (set_ic) {
+		unsigned long flags;
 		tx_q->tx_count_frames = 0;
 		stmmac_set_tx_ic(priv, tx_desc);
-		priv->xstats.tx_set_ic_bit++;
+		flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+		tx_q->txq_stats.tx_set_ic_bit++;
+		u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
 	}
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -4917,16 +4938,18 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
 				   struct dma_desc *p, struct dma_desc *np,
 				   struct xdp_buff *xdp)
 {
+	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
 	struct stmmac_channel *ch = &priv->channel[queue];
 	unsigned int len = xdp->data_end - xdp->data;
 	enum pkt_hash_types hash_type;
 	int coe = priv->hw->rx_csum;
+	unsigned long flags;
 	struct sk_buff *skb;
 	u32 hash;
 
 	skb = stmmac_construct_skb_zc(ch, xdp);
 	if (!skb) {
-		priv->dev->stats.rx_dropped++;
+		priv->xstats.rx_dropped++;
 		return;
 	}
 
@@ -4945,8 +4968,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
 	skb_record_rx_queue(skb, queue);
 	napi_gro_receive(&ch->rxtx_napi, skb);
 
-	priv->dev->stats.rx_packets++;
-	priv->dev->stats.rx_bytes += len;
+	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+	rx_q->rxq_stats.rx_pkt_n++;
+	rx_q->rxq_stats.rx_bytes += len;
+	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
 }
 
 static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
@@ -5023,9 +5048,11 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 	unsigned int count = 0, error = 0, len = 0;
 	int dirty = stmmac_rx_dirty(priv, queue);
 	unsigned int next_entry = rx_q->cur_rx;
+	u32 rx_errors = 0, rx_dropped = 0;
 	unsigned int desc_size;
 	struct bpf_prog *prog;
 	bool failure = false;
+	unsigned long flags;
 	int xdp_status = 0;
 	int status = 0;
 
@@ -5081,8 +5108,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 			p = rx_q->dma_rx + entry;
 
 		/* read the status of the incoming frame */
-		status = stmmac_rx_status(priv, &priv->dev->stats,
-					  &priv->xstats, p);
+		status = stmmac_rx_status(priv, &priv->xstats, p);
 		/* check if managed by the DMA otherwise go ahead */
 		if (unlikely(status & dma_own))
 			break;
@@ -5104,8 +5130,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 			break;
 
 		if (priv->extend_desc)
-			stmmac_rx_extended_status(priv, &priv->dev->stats,
-						  &priv->xstats,
+			stmmac_rx_extended_status(priv, &priv->xstats,
 						  rx_q->dma_erx + entry);
 		if (unlikely(status == discard_frame)) {
 			xsk_buff_free(buf->xdp);
@@ -5113,7 +5138,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 			dirty++;
 			error = 1;
 			if (!priv->hwts_rx_en)
-				priv->dev->stats.rx_errors++;
+				rx_errors++;
 		}
 
 		if (unlikely(error && (status & rx_not_ls)))
@@ -5161,7 +5186,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 			break;
 		case STMMAC_XDP_CONSUMED:
 			xsk_buff_free(buf->xdp);
-			priv->dev->stats.rx_dropped++;
+			rx_dropped++;
 			break;
 		case STMMAC_XDP_TX:
 		case STMMAC_XDP_REDIRECT:
@@ -5182,8 +5207,12 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 
 	stmmac_finalize_xdp_rx(priv, xdp_status);
 
-	priv->xstats.rx_pkt_n += count;
-	priv->xstats.rxq_stats[queue].rx_pkt_n += count;
+	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+	rx_q->rxq_stats.rx_pkt_n += count;
+	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+
+	priv->xstats.rx_dropped += rx_dropped;
+	priv->xstats.rx_errors += rx_errors;
 
 	if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
 		if (failure || stmmac_rx_dirty(priv, queue) > 0)
@@ -5207,6 +5236,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
  */
 static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
+	u32 rx_errors = 0, rx_dropped = 0, rx_bytes = 0, rx_packets = 0;
 	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
 	struct stmmac_channel *ch = &priv->channel[queue];
 	unsigned int count = 0, error = 0, len = 0;
@@ -5216,6 +5246,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 	unsigned int desc_size;
 	struct sk_buff *skb = NULL;
 	struct stmmac_xdp_buff ctx;
+	unsigned long flags;
 	int xdp_status = 0;
 	int buf_sz;
 
@@ -5271,8 +5302,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			p = rx_q->dma_rx + entry;
 
 		/* read the status of the incoming frame */
-		status = stmmac_rx_status(priv, &priv->dev->stats,
-				&priv->xstats, p);
+		status = stmmac_rx_status(priv, &priv->xstats, p);
 		/* check if managed by the DMA otherwise go ahead */
 		if (unlikely(status & dma_own))
 			break;
@@ -5289,14 +5319,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 		prefetch(np);
 
 		if (priv->extend_desc)
-			stmmac_rx_extended_status(priv, &priv->dev->stats,
-					&priv->xstats, rx_q->dma_erx + entry);
+			stmmac_rx_extended_status(priv, &priv->xstats, rx_q->dma_erx + entry);
 		if (unlikely(status == discard_frame)) {
 			page_pool_recycle_direct(rx_q->page_pool, buf->page);
 			buf->page = NULL;
 			error = 1;
 			if (!priv->hwts_rx_en)
-				priv->dev->stats.rx_errors++;
+				rx_errors++;
 		}
 
 		if (unlikely(error && (status & rx_not_ls)))
@@ -5364,7 +5393,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 							   virt_to_head_page(ctx.xdp.data),
 							   sync_len, true);
 					buf->page = NULL;
-					priv->dev->stats.rx_dropped++;
+					rx_dropped++;
 
 					/* Clear skb as it was set as
 					 * status by XDP program.
@@ -5393,7 +5422,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
 			skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
 			if (!skb) {
-				priv->dev->stats.rx_dropped++;
+				rx_dropped++;
 				count++;
 				goto drain_data;
 			}
@@ -5453,8 +5482,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 		napi_gro_receive(&ch->rx_napi, skb);
 		skb = NULL;
 
-		priv->dev->stats.rx_packets++;
-		priv->dev->stats.rx_bytes += len;
+		rx_packets++;
+		rx_bytes += len;
 		count++;
 	}
 
@@ -5469,8 +5498,14 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
 	stmmac_rx_refill(priv, queue);
 
-	priv->xstats.rx_pkt_n += count;
-	priv->xstats.rxq_stats[queue].rx_pkt_n += count;
+	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+	rx_q->rxq_stats.rx_packets += rx_packets;
+	rx_q->rxq_stats.rx_bytes += rx_bytes;
+	rx_q->rxq_stats.rx_pkt_n += count;
+	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+
+	priv->xstats.rx_dropped += rx_dropped;
+	priv->xstats.rx_errors += rx_errors;
 
 	return count;
 }
@@ -5480,10 +5515,15 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, rx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
+	struct stmmac_rx_queue *rx_q;
 	u32 chan = ch->index;
+	unsigned long flags;
 	int work_done;
 
-	priv->xstats.napi_poll++;
+	rx_q = &priv->dma_conf.rx_queue[chan];
+	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+	rx_q->rxq_stats.napi_poll++;
+	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
 
 	work_done = stmmac_rx(priv, budget, chan);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5502,10 +5542,15 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, tx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
+	struct stmmac_tx_queue *tx_q;
 	u32 chan = ch->index;
+	unsigned long flags;
 	int work_done;
 
-	priv->xstats.napi_poll++;
+	tx_q = &priv->dma_conf.tx_queue[chan];
+	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+	tx_q->txq_stats.napi_poll++;
+	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
 
 	work_done = stmmac_tx_clean(priv, budget, chan);
 	work_done = min(work_done, budget);
@@ -5527,9 +5572,20 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
 		container_of(napi, struct stmmac_channel, rxtx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
 	int rx_done, tx_done, rxtx_done;
+	struct stmmac_rx_queue *rx_q;
+	struct stmmac_tx_queue *tx_q;
 	u32 chan = ch->index;
+	unsigned long flags;
+
+	rx_q = &priv->dma_conf.rx_queue[chan];
+	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+	rx_q->rxq_stats.napi_poll++;
+	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
 
-	priv->xstats.napi_poll++;
+	tx_q = &priv->dma_conf.tx_queue[chan];
+	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+	tx_q->txq_stats.napi_poll++;
+	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
 
 	tx_done = stmmac_tx_clean(priv, budget, chan);
 	tx_done = min(tx_done, budget);
@@ -6788,6 +6844,56 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
 	return 0;
 }
 
+static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	unsigned int start;
+	int q;
+
+	for (q = 0; q < tx_cnt; q++) {
+		struct stmmac_txq_stats *txq_stats = &priv->dma_conf.tx_queue[q].txq_stats;
+		u64 tx_packets;
+		u64 tx_bytes;
+
+		do {
+			start = u64_stats_fetch_begin(&txq_stats->syncp);
+			tx_packets = txq_stats->tx_packets;
+			tx_bytes   = txq_stats->tx_bytes;
+		} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes += tx_bytes;
+	}
+
+	for (q = 0; q < rx_cnt; q++) {
+		struct stmmac_rxq_stats *rxq_stats = &priv->dma_conf.rx_queue[q].rxq_stats;
+		u64 rx_packets;
+		u64 rx_bytes;
+
+		do {
+			start = u64_stats_fetch_begin(&rxq_stats->syncp);
+			rx_packets = rxq_stats->rx_packets;
+			rx_bytes   = rxq_stats->rx_bytes;
+		} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes += rx_bytes;
+	}
+
+	stats->rx_dropped = priv->xstats.rx_dropped;
+	stats->rx_errors = priv->xstats.rx_errors;
+	stats->tx_dropped = priv->xstats.tx_dropped;
+	stats->tx_errors = priv->xstats.tx_errors;
+	stats->tx_carrier_errors = priv->xstats.tx_losscarrier + priv->xstats.tx_carrier;
+	stats->collisions = priv->xstats.tx_collision + priv->xstats.rx_collision;
+	stats->rx_length_errors = priv->xstats.rx_length;
+	stats->rx_crc_errors = priv->xstats.rx_crc_errors;
+	stats->rx_over_errors = priv->xstats.rx_overflow_cntr;
+	stats->rx_missed_errors = priv->xstats.rx_missed_cntr;
+}
+
 static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_open = stmmac_open,
 	.ndo_start_xmit = stmmac_xmit,
@@ -6798,6 +6904,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_set_rx_mode = stmmac_set_rx_mode,
 	.ndo_tx_timeout = stmmac_tx_timeout,
 	.ndo_eth_ioctl = stmmac_ioctl,
+	.ndo_get_stats64 = stmmac_get_stats64,
 	.ndo_setup_tc = stmmac_setup_tc,
 	.ndo_select_queue = stmmac_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -7160,6 +7267,11 @@ int stmmac_dvr_probe(struct device *device,
 	priv->device = device;
 	priv->dev = ndev;
 
+	for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
+		u64_stats_init(&priv->dma_conf.rx_queue[i].rxq_stats.syncp);
+	for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
+		u64_stats_init(&priv->dma_conf.tx_queue[i].txq_stats.syncp);
+
 	stmmac_set_ethtool_ops(ndev);
 	priv->pause = pause;
 	priv->plat = plat_dat;
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 6.5 13/45] can: sun4i_can: Add acceptance register quirk
  2023-09-08 18:12 [PATCH AUTOSEL 6.5 01/45] spi: sun6i: add quirk for dual and quad SPI modes support Sasha Levin
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary Sasha Levin
@ 2023-09-08 18:12 ` Sasha Levin
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 14/45] can: sun4i_can: Add support for the Allwinner D1 Sasha Levin
  2 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2023-09-08 18:12 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: John Watts, Marc Kleine-Budde, Sasha Levin, wg, davem, edumazet,
	kuba, pabeni, wens, jernej.skrabec, samuel, linux-can, netdev,
	linux-arm-kernel, linux-sunxi

From: John Watts <contact@jookia.org>

[ Upstream commit 8cda0c6dfd42ee6f2586e7dffb553aaf1fcb62ca ]

The Allwinner D1's CAN controllers have the ACPC and ACPM registers
moved down. Compensate for this by adding an offset quirk for the
acceptance registers.

Signed-off-by: John Watts <contact@jookia.org>
Link: https://lore.kernel.org/all/20230721221552.1973203-5-contact@jookia.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/can/sun4i_can.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index 0827830bbf28c..1f90fe6dbb8bb 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -205,9 +205,11 @@
  * struct sun4ican_quirks - Differences between SoC variants.
  *
  * @has_reset: SoC needs reset deasserted.
+ * @acp_offset: Offset of ACPC and ACPM registers
  */
 struct sun4ican_quirks {
 	bool has_reset;
+	int acp_offset;
 };
 
 struct sun4ican_priv {
@@ -216,6 +218,7 @@ struct sun4ican_priv {
 	struct clk *clk;
 	struct reset_control *reset;
 	spinlock_t cmdreg_lock;	/* lock for concurrent cmd register writes */
+	int acp_offset;
 };
 
 static const struct can_bittiming_const sun4ican_bittiming_const = {
@@ -338,8 +341,8 @@ static int sun4i_can_start(struct net_device *dev)
 	}
 
 	/* set filters - we accept all */
-	writel(0x00000000, priv->base + SUN4I_REG_ACPC_ADDR);
-	writel(0xFFFFFFFF, priv->base + SUN4I_REG_ACPM_ADDR);
+	writel(0x00000000, priv->base + SUN4I_REG_ACPC_ADDR + priv->acp_offset);
+	writel(0xFFFFFFFF, priv->base + SUN4I_REG_ACPM_ADDR + priv->acp_offset);
 
 	/* clear error counters and error code capture */
 	writel(0, priv->base + SUN4I_REG_ERRC_ADDR);
@@ -768,10 +771,12 @@ static const struct ethtool_ops sun4ican_ethtool_ops = {
 
 static const struct sun4ican_quirks sun4ican_quirks_a10 = {
 	.has_reset = false,
+	.acp_offset = 0,
 };
 
 static const struct sun4ican_quirks sun4ican_quirks_r40 = {
 	.has_reset = true,
+	.acp_offset = 0,
 };
 
 static const struct of_device_id sun4ican_of_match[] = {
@@ -870,6 +875,7 @@ static int sun4ican_probe(struct platform_device *pdev)
 	priv->base = addr;
 	priv->clk = clk;
 	priv->reset = reset;
+	priv->acp_offset = quirks->acp_offset;
 	spin_lock_init(&priv->cmdreg_lock);
 
 	platform_set_drvdata(pdev, dev);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 6.5 14/45] can: sun4i_can: Add support for the Allwinner D1
  2023-09-08 18:12 [PATCH AUTOSEL 6.5 01/45] spi: sun6i: add quirk for dual and quad SPI modes support Sasha Levin
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary Sasha Levin
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 13/45] can: sun4i_can: Add acceptance register quirk Sasha Levin
@ 2023-09-08 18:12 ` Sasha Levin
  2 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2023-09-08 18:12 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: John Watts, Marc Kleine-Budde, Sasha Levin, wg, davem, edumazet,
	kuba, pabeni, wens, jernej.skrabec, samuel, linux-can, netdev,
	linux-arm-kernel, linux-sunxi

From: John Watts <contact@jookia.org>

[ Upstream commit 8abb95250ae6af2d51993da8fcae18da2ce24cc4 ]

The controllers present in the D1 are extremely similar to the R40
and require the same reset quirks, but An extra quirk is needed to support
receiving packets.

Signed-off-by: John Watts <contact@jookia.org>
Link: https://lore.kernel.org/all/20230721221552.1973203-6-contact@jookia.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/can/Kconfig     |  4 ++--
 drivers/net/can/sun4i_can.c | 12 +++++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index a5c5036dfb943..e626de33e735d 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -185,10 +185,10 @@ config CAN_SLCAN
 
 config CAN_SUN4I
 	tristate "Allwinner A10 CAN controller"
-	depends on MACH_SUN4I || MACH_SUN7I || COMPILE_TEST
+	depends on MACH_SUN4I || MACH_SUN7I || RISCV || COMPILE_TEST
 	help
 	  Say Y here if you want to use CAN controller found on Allwinner
-	  A10/A20 SoCs.
+	  A10/A20/D1 SoCs.
 
 	  To compile this driver as a module, choose M here: the module will
 	  be called sun4i_can.
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index 1f90fe6dbb8bb..c508a328e38d4 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -91,6 +91,8 @@
 #define SUN4I_REG_BUF12_ADDR	0x0070	/* CAN Tx/Rx Buffer 12 */
 #define SUN4I_REG_ACPC_ADDR	0x0040	/* CAN Acceptance Code 0 */
 #define SUN4I_REG_ACPM_ADDR	0x0044	/* CAN Acceptance Mask 0 */
+#define SUN4I_REG_ACPC_ADDR_D1	0x0028	/* CAN Acceptance Code 0 on the D1 */
+#define SUN4I_REG_ACPM_ADDR_D1	0x002C	/* CAN Acceptance Mask 0 on the D1 */
 #define SUN4I_REG_RBUF_RBACK_START_ADDR	0x0180	/* CAN transmit buffer start */
 #define SUN4I_REG_RBUF_RBACK_END_ADDR	0x01b0	/* CAN transmit buffer end */
 
@@ -779,6 +781,11 @@ static const struct sun4ican_quirks sun4ican_quirks_r40 = {
 	.acp_offset = 0,
 };
 
+static const struct sun4ican_quirks sun4ican_quirks_d1 = {
+	.has_reset = true,
+	.acp_offset = (SUN4I_REG_ACPC_ADDR_D1 - SUN4I_REG_ACPC_ADDR),
+};
+
 static const struct of_device_id sun4ican_of_match[] = {
 	{
 		.compatible = "allwinner,sun4i-a10-can",
@@ -789,6 +796,9 @@ static const struct of_device_id sun4ican_of_match[] = {
 	}, {
 		.compatible = "allwinner,sun8i-r40-can",
 		.data = &sun4ican_quirks_r40
+	}, {
+		.compatible = "allwinner,sun20i-d1-can",
+		.data = &sun4ican_quirks_d1
 	}, {
 		/* sentinel */
 	},
@@ -913,4 +923,4 @@ module_platform_driver(sun4i_can_driver);
 MODULE_AUTHOR("Peter Chen <xingkongcp@gmail.com>");
 MODULE_AUTHOR("Gerhard Bertelsmann <info@gerhard-bertelsmann.de>");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("CAN driver for Allwinner SoCs (A10/A20)");
+MODULE_DESCRIPTION("CAN driver for Allwinner SoCs (A10/A20/D1)");
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary
  2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary Sasha Levin
@ 2023-09-13 14:43   ` Jisheng Zhang
  0 siblings, 0 replies; 5+ messages in thread
From: Jisheng Zhang @ 2023-09-13 14:43 UTC (permalink / raw)
  To: Sasha Levin
  Cc: linux-kernel, stable, Jakub Kicinski, alexandre.torgue, joabreu,
	davem, edumazet, pabeni, mcoquelin.stm32, wens, jernej.skrabec,
	samuel, netdev, linux-stm32, linux-arm-kernel, linux-sunxi

On Fri, Sep 08, 2023 at 02:12:46PM -0400, Sasha Levin wrote:
> From: Jisheng Zhang <jszhang@kernel.org>
> 
> [ Upstream commit 133466c3bbe171f826294161db203f7670bb30c8 ]
> 
> Currently, there are two major issues with stmmac driver statistics
> First of all, statistics in stmmac_extra_stats, stmmac_rxq_stats
> and stmmac_txq_stats are 32 bit variables on 32 bit platforms. This
> can cause some stats to overflow after several minutes of
> high traffic, for example rx_pkt_n, tx_pkt_n and so on.
> 
> Secondly, if HW supports multiqueues, there are frequent cacheline
> ping pongs on some driver statistic vars, for example, normal_irq_n,
> tx_pkt_n and so on. What's more, frequent cacheline ping pongs on
> normal_irq_n happens in ISR, this makes the situation worse.
> 
> To improve the driver, we convert those statistics to 64 bit, implement
> ndo_get_stats64 and update .get_ethtool_stats implementation
> accordingly. We also use per-queue statistics where necessary to remove
> the cacheline ping pongs as much as possible to make multiqueue
> operations faster. Those statistics which are not possible to overflow
> and not frequently updated are kept as is.

Hi Sasha,

This patch cause a bug report recently[1], and I'm trying to fix it. So
could this back-port-to-stable tree be hold on until the fix is merged.

PS: If we want to apply this patch to stable tree, besides the above
fix, we also need to apply below preparation commit:
2eb85b750512 ("net: stmmac: don't clear network statistics in .ndo_open())

Thanks in advance

Link: https://lore.kernel.org/netdev/20230911171102.cwieugrpthm7ywbm@pengutronix.de/ [1]

> 
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> Link: https://lore.kernel.org/r/20230717160630.1892-3-jszhang@kernel.org
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> Signed-off-by: Sasha Levin <sashal@kernel.org>
> ---
>  drivers/net/ethernet/stmicro/stmmac/common.h  |  39 ++--
>  .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c |  12 +-
>  .../ethernet/stmicro/stmmac/dwmac100_dma.c    |   7 +-
>  .../ethernet/stmicro/stmmac/dwmac4_descs.c    |  16 +-
>  .../net/ethernet/stmicro/stmmac/dwmac4_lib.c  |  15 +-
>  .../net/ethernet/stmicro/stmmac/dwmac_lib.c   |  12 +-
>  .../ethernet/stmicro/stmmac/dwxgmac2_descs.c  |   6 +-
>  .../ethernet/stmicro/stmmac/dwxgmac2_dma.c    |  14 +-
>  .../net/ethernet/stmicro/stmmac/enh_desc.c    |  20 +-
>  drivers/net/ethernet/stmicro/stmmac/hwif.h    |  12 +-
>  .../net/ethernet/stmicro/stmmac/norm_desc.c   |  15 +-
>  drivers/net/ethernet/stmicro/stmmac/stmmac.h  |   2 +
>  .../ethernet/stmicro/stmmac/stmmac_ethtool.c  | 123 ++++++++---
>  .../net/ethernet/stmicro/stmmac/stmmac_main.c | 200 ++++++++++++++----
>  14 files changed, 335 insertions(+), 158 deletions(-)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
> index 16e67c18b6f71..57f2137bbe9d9 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
> @@ -59,13 +59,25 @@
>  /* #define FRAME_FILTER_DEBUG */
>  
>  struct stmmac_txq_stats {
> -	unsigned long tx_pkt_n;
> -	unsigned long tx_normal_irq_n;
> +	u64 tx_bytes;
> +	u64 tx_packets;
> +	u64 tx_pkt_n;
> +	u64 tx_normal_irq_n;
> +	u64 napi_poll;
> +	u64 tx_clean;
> +	u64 tx_set_ic_bit;
> +	u64 tx_tso_frames;
> +	u64 tx_tso_nfrags;
> +	struct u64_stats_sync syncp;
>  };
>  
>  struct stmmac_rxq_stats {
> -	unsigned long rx_pkt_n;
> -	unsigned long rx_normal_irq_n;
> +	u64 rx_bytes;
> +	u64 rx_packets;
> +	u64 rx_pkt_n;
> +	u64 rx_normal_irq_n;
> +	u64 napi_poll;
> +	struct u64_stats_sync syncp;
>  };
>  
>  /* Extra statistic and debug information exposed by ethtool */
> @@ -81,6 +93,7 @@ struct stmmac_extra_stats {
>  	unsigned long tx_frame_flushed;
>  	unsigned long tx_payload_error;
>  	unsigned long tx_ip_header_error;
> +	unsigned long tx_collision;
>  	/* Receive errors */
>  	unsigned long rx_desc;
>  	unsigned long sa_filter_fail;
> @@ -113,14 +126,6 @@ struct stmmac_extra_stats {
>  	/* Tx/Rx IRQ Events */
>  	unsigned long rx_early_irq;
>  	unsigned long threshold;
> -	unsigned long tx_pkt_n;
> -	unsigned long rx_pkt_n;
> -	unsigned long normal_irq_n;
> -	unsigned long rx_normal_irq_n;
> -	unsigned long napi_poll;
> -	unsigned long tx_normal_irq_n;
> -	unsigned long tx_clean;
> -	unsigned long tx_set_ic_bit;
>  	unsigned long irq_receive_pmt_irq_n;
>  	/* MMC info */
>  	unsigned long mmc_tx_irq_n;
> @@ -190,18 +195,16 @@ struct stmmac_extra_stats {
>  	unsigned long mtl_rx_fifo_ctrl_active;
>  	unsigned long mac_rx_frame_ctrl_fifo;
>  	unsigned long mac_gmii_rx_proto_engine;
> -	/* TSO */
> -	unsigned long tx_tso_frames;
> -	unsigned long tx_tso_nfrags;
>  	/* EST */
>  	unsigned long mtl_est_cgce;
>  	unsigned long mtl_est_hlbs;
>  	unsigned long mtl_est_hlbf;
>  	unsigned long mtl_est_btre;
>  	unsigned long mtl_est_btrlm;
> -	/* per queue statistics */
> -	struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
> -	struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
> +	unsigned long rx_dropped;
> +	unsigned long rx_errors;
> +	unsigned long tx_dropped;
> +	unsigned long tx_errors;
>  };
>  
>  /* Safety Feature statistics exposed by ethtool */
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
> index 1e714380d1250..b20f8ba34efd9 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
> @@ -440,8 +440,10 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
>  				     struct stmmac_extra_stats *x, u32 chan,
>  				     u32 dir)
>  {
> -	u32 v;
> +	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
> +	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
>  	int ret = 0;
> +	u32 v;
>  
>  	v = readl(ioaddr + EMAC_INT_STA);
>  
> @@ -452,7 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
>  
>  	if (v & EMAC_TX_INT) {
>  		ret |= handle_tx;
> -		x->tx_normal_irq_n++;
> +		u64_stats_update_begin(&tx_q->txq_stats.syncp);
> +		tx_q->txq_stats.tx_normal_irq_n++;
> +		u64_stats_update_end(&tx_q->txq_stats.syncp);
>  	}
>  
>  	if (v & EMAC_TX_DMA_STOP_INT)
> @@ -474,7 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
>  
>  	if (v & EMAC_RX_INT) {
>  		ret |= handle_rx;
> -		x->rx_normal_irq_n++;
> +		u64_stats_update_begin(&rx_q->rxq_stats.syncp);
> +		rx_q->rxq_stats.rx_normal_irq_n++;
> +		u64_stats_update_end(&rx_q->rxq_stats.syncp);
>  	}
>  
>  	if (v & EMAC_RX_BUF_UA_INT)
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
> index 1c32b1788f02e..dea270f60cc3e 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
> @@ -82,29 +82,24 @@ static void dwmac100_dump_dma_regs(struct stmmac_priv *priv,
>  }
>  
>  /* DMA controller has two counters to track the number of the missed frames. */
> -static void dwmac100_dma_diagnostic_fr(struct net_device_stats *stats,
> -				       struct stmmac_extra_stats *x,
> +static void dwmac100_dma_diagnostic_fr(struct stmmac_extra_stats *x,
>  				       void __iomem *ioaddr)
>  {
>  	u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR);
>  
>  	if (unlikely(csr8)) {
>  		if (csr8 & DMA_MISSED_FRAME_OVE) {
> -			stats->rx_over_errors += 0x800;
>  			x->rx_overflow_cntr += 0x800;
>  		} else {
>  			unsigned int ove_cntr;
>  			ove_cntr = ((csr8 & DMA_MISSED_FRAME_OVE_CNTR) >> 17);
> -			stats->rx_over_errors += ove_cntr;
>  			x->rx_overflow_cntr += ove_cntr;
>  		}
>  
>  		if (csr8 & DMA_MISSED_FRAME_OVE_M) {
> -			stats->rx_missed_errors += 0xffff;
>  			x->rx_missed_cntr += 0xffff;
>  		} else {
>  			unsigned int miss_f = (csr8 & DMA_MISSED_FRAME_M_CNTR);
> -			stats->rx_missed_errors += miss_f;
>  			x->rx_missed_cntr += miss_f;
>  		}
>  	}
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> index 6a011d8633e8e..89a14084c6117 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> @@ -13,8 +13,7 @@
>  #include "dwmac4.h"
>  #include "dwmac4_descs.h"
>  
> -static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
> -				       struct stmmac_extra_stats *x,
> +static int dwmac4_wrback_get_tx_status(struct stmmac_extra_stats *x,
>  				       struct dma_desc *p,
>  				       void __iomem *ioaddr)
>  {
> @@ -40,15 +39,13 @@ static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
>  			x->tx_frame_flushed++;
>  		if (unlikely(tdes3 & TDES3_LOSS_CARRIER)) {
>  			x->tx_losscarrier++;
> -			stats->tx_carrier_errors++;
>  		}
>  		if (unlikely(tdes3 & TDES3_NO_CARRIER)) {
>  			x->tx_carrier++;
> -			stats->tx_carrier_errors++;
>  		}
>  		if (unlikely((tdes3 & TDES3_LATE_COLLISION) ||
>  			     (tdes3 & TDES3_EXCESSIVE_COLLISION)))
> -			stats->collisions +=
> +			x->tx_collision +=
>  			    (tdes3 & TDES3_COLLISION_COUNT_MASK)
>  			    >> TDES3_COLLISION_COUNT_SHIFT;
>  
> @@ -73,8 +70,7 @@ static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
>  	return ret;
>  }
>  
> -static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
> -				       struct stmmac_extra_stats *x,
> +static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x,
>  				       struct dma_desc *p)
>  {
>  	unsigned int rdes1 = le32_to_cpu(p->des1);
> @@ -93,7 +89,7 @@ static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
>  
>  	if (unlikely(rdes3 & RDES3_ERROR_SUMMARY)) {
>  		if (unlikely(rdes3 & RDES3_GIANT_PACKET))
> -			stats->rx_length_errors++;
> +			x->rx_length++;
>  		if (unlikely(rdes3 & RDES3_OVERFLOW_ERROR))
>  			x->rx_gmac_overflow++;
>  
> @@ -103,10 +99,8 @@ static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
>  		if (unlikely(rdes3 & RDES3_RECEIVE_ERROR))
>  			x->rx_mii++;
>  
> -		if (unlikely(rdes3 & RDES3_CRC_ERROR)) {
> +		if (unlikely(rdes3 & RDES3_CRC_ERROR))
>  			x->rx_crc_errors++;
> -			stats->rx_crc_errors++;
> -		}
>  
>  		if (unlikely(rdes3 & RDES3_DRIBBLE_ERROR))
>  			x->dribbling_bit++;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
> index 03ceb6a940732..980e5f8a37ec5 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
> @@ -171,6 +171,8 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
>  	const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
>  	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
>  	u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
> +	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
> +	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
>  	int ret = 0;
>  
>  	if (dir == DMA_DIR_RX)
> @@ -198,18 +200,19 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
>  		}
>  	}
>  	/* TX/RX NORMAL interrupts */
> -	if (likely(intr_status & DMA_CHAN_STATUS_NIS))
> -		x->normal_irq_n++;
>  	if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
> -		x->rx_normal_irq_n++;
> -		x->rxq_stats[chan].rx_normal_irq_n++;
> +		u64_stats_update_begin(&rx_q->rxq_stats.syncp);
> +		rx_q->rxq_stats.rx_normal_irq_n++;
> +		u64_stats_update_end(&rx_q->rxq_stats.syncp);
>  		ret |= handle_rx;
>  	}
>  	if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
> -		x->tx_normal_irq_n++;
> -		x->txq_stats[chan].tx_normal_irq_n++;
> +		u64_stats_update_begin(&tx_q->txq_stats.syncp);
> +		tx_q->txq_stats.tx_normal_irq_n++;
> +		u64_stats_update_end(&tx_q->txq_stats.syncp);
>  		ret |= handle_tx;
>  	}
> +
>  	if (unlikely(intr_status & DMA_CHAN_STATUS_TBU))
>  		ret |= handle_tx;
>  	if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
> index 0b6f999a83052..aaa09b16b016f 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
> @@ -10,6 +10,7 @@
>  #include <linux/iopoll.h>
>  #include "common.h"
>  #include "dwmac_dma.h"
> +#include "stmmac.h"
>  
>  #define GMAC_HI_REG_AE		0x80000000
>  
> @@ -161,6 +162,8 @@ static void show_rx_process_state(unsigned int status)
>  int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
>  			struct stmmac_extra_stats *x, u32 chan, u32 dir)
>  {
> +	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
> +	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
>  	int ret = 0;
>  	/* read the status register (CSR5) */
>  	u32 intr_status = readl(ioaddr + DMA_STATUS);
> @@ -208,17 +211,20 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
>  	}
>  	/* TX/RX NORMAL interrupts */
>  	if (likely(intr_status & DMA_STATUS_NIS)) {
> -		x->normal_irq_n++;
>  		if (likely(intr_status & DMA_STATUS_RI)) {
>  			u32 value = readl(ioaddr + DMA_INTR_ENA);
>  			/* to schedule NAPI on real RIE event. */
>  			if (likely(value & DMA_INTR_ENA_RIE)) {
> -				x->rx_normal_irq_n++;
> +				u64_stats_update_begin(&rx_q->rxq_stats.syncp);
> +				rx_q->rxq_stats.rx_normal_irq_n++;
> +				u64_stats_update_end(&rx_q->rxq_stats.syncp);
>  				ret |= handle_rx;
>  			}
>  		}
>  		if (likely(intr_status & DMA_STATUS_TI)) {
> -			x->tx_normal_irq_n++;
> +			u64_stats_update_begin(&tx_q->txq_stats.syncp);
> +			tx_q->txq_stats.tx_normal_irq_n++;
> +			u64_stats_update_end(&tx_q->txq_stats.syncp);
>  			ret |= handle_tx;
>  		}
>  		if (unlikely(intr_status & DMA_STATUS_ERI))
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> index 13c347ee8be9c..fc82862a612c7 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> @@ -8,8 +8,7 @@
>  #include "common.h"
>  #include "dwxgmac2.h"
>  
> -static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
> -				  struct stmmac_extra_stats *x,
> +static int dwxgmac2_get_tx_status(struct stmmac_extra_stats *x,
>  				  struct dma_desc *p, void __iomem *ioaddr)
>  {
>  	unsigned int tdes3 = le32_to_cpu(p->des3);
> @@ -23,8 +22,7 @@ static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
>  	return ret;
>  }
>  
> -static int dwxgmac2_get_rx_status(struct net_device_stats *stats,
> -				  struct stmmac_extra_stats *x,
> +static int dwxgmac2_get_rx_status(struct stmmac_extra_stats *x,
>  				  struct dma_desc *p)
>  {
>  	unsigned int rdes3 = le32_to_cpu(p->des3);
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
> index 070bd912580b7..3b5f8c595219b 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
> @@ -337,6 +337,8 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
>  				  struct stmmac_extra_stats *x, u32 chan,
>  				  u32 dir)
>  {
> +	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
> +	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
>  	u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
>  	u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
>  	int ret = 0;
> @@ -364,16 +366,16 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
>  
>  	/* TX/RX NORMAL interrupts */
>  	if (likely(intr_status & XGMAC_NIS)) {
> -		x->normal_irq_n++;
> -
>  		if (likely(intr_status & XGMAC_RI)) {
> -			x->rx_normal_irq_n++;
> -			x->rxq_stats[chan].rx_normal_irq_n++;
> +			u64_stats_update_begin(&rx_q->rxq_stats.syncp);
> +			rx_q->rxq_stats.rx_normal_irq_n++;
> +			u64_stats_update_end(&rx_q->rxq_stats.syncp);
>  			ret |= handle_rx;
>  		}
>  		if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
> -			x->tx_normal_irq_n++;
> -			x->txq_stats[chan].tx_normal_irq_n++;
> +			u64_stats_update_begin(&tx_q->txq_stats.syncp);
> +			tx_q->txq_stats.tx_normal_irq_n++;
> +			u64_stats_update_end(&tx_q->txq_stats.syncp);
>  			ret |= handle_tx;
>  		}
>  	}
> diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
> index a91d8f13a931d..937b7a0466fca 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
> @@ -12,8 +12,7 @@
>  #include "common.h"
>  #include "descs_com.h"
>  
> -static int enh_desc_get_tx_status(struct net_device_stats *stats,
> -				  struct stmmac_extra_stats *x,
> +static int enh_desc_get_tx_status(struct stmmac_extra_stats *x,
>  				  struct dma_desc *p, void __iomem *ioaddr)
>  {
>  	unsigned int tdes0 = le32_to_cpu(p->des0);
> @@ -38,15 +37,13 @@ static int enh_desc_get_tx_status(struct net_device_stats *stats,
>  
>  		if (unlikely(tdes0 & ETDES0_LOSS_CARRIER)) {
>  			x->tx_losscarrier++;
> -			stats->tx_carrier_errors++;
>  		}
>  		if (unlikely(tdes0 & ETDES0_NO_CARRIER)) {
>  			x->tx_carrier++;
> -			stats->tx_carrier_errors++;
>  		}
>  		if (unlikely((tdes0 & ETDES0_LATE_COLLISION) ||
>  			     (tdes0 & ETDES0_EXCESSIVE_COLLISIONS)))
> -			stats->collisions +=
> +			x->tx_collision +=
>  				(tdes0 & ETDES0_COLLISION_COUNT_MASK) >> 3;
>  
>  		if (unlikely(tdes0 & ETDES0_EXCESSIVE_DEFERRAL))
> @@ -117,8 +114,7 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
>  	return ret;
>  }
>  
> -static void enh_desc_get_ext_status(struct net_device_stats *stats,
> -				    struct stmmac_extra_stats *x,
> +static void enh_desc_get_ext_status(struct stmmac_extra_stats *x,
>  				    struct dma_extended_desc *p)
>  {
>  	unsigned int rdes0 = le32_to_cpu(p->basic.des0);
> @@ -182,8 +178,7 @@ static void enh_desc_get_ext_status(struct net_device_stats *stats,
>  	}
>  }
>  
> -static int enh_desc_get_rx_status(struct net_device_stats *stats,
> -				  struct stmmac_extra_stats *x,
> +static int enh_desc_get_rx_status(struct stmmac_extra_stats *x,
>  				  struct dma_desc *p)
>  {
>  	unsigned int rdes0 = le32_to_cpu(p->des0);
> @@ -193,14 +188,14 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
>  		return dma_own;
>  
>  	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
> -		stats->rx_length_errors++;
> +		x->rx_length++;
>  		return discard_frame;
>  	}
>  
>  	if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
>  		if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) {
>  			x->rx_desc++;
> -			stats->rx_length_errors++;
> +			x->rx_length++;
>  		}
>  		if (unlikely(rdes0 & RDES0_OVERFLOW_ERROR))
>  			x->rx_gmac_overflow++;
> @@ -209,7 +204,7 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
>  			pr_err("\tIPC Csum Error/Giant frame\n");
>  
>  		if (unlikely(rdes0 & RDES0_COLLISION))
> -			stats->collisions++;
> +			x->rx_collision++;
>  		if (unlikely(rdes0 & RDES0_RECEIVE_WATCHDOG))
>  			x->rx_watchdog++;
>  
> @@ -218,7 +213,6 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
>  
>  		if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
>  			x->rx_crc_errors++;
> -			stats->rx_crc_errors++;
>  		}
>  		ret = discard_frame;
>  	}
> diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
> index 6ee7cf07cfd76..652af8f6e75ff 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
> @@ -57,8 +57,7 @@ struct stmmac_desc_ops {
>  	/* Last tx segment reports the transmit status */
>  	int (*get_tx_ls)(struct dma_desc *p);
>  	/* Return the transmit status looking at the TDES1 */
> -	int (*tx_status)(struct net_device_stats *stats,
> -			 struct stmmac_extra_stats *x,
> +	int (*tx_status)(struct stmmac_extra_stats *x,
>  			 struct dma_desc *p, void __iomem *ioaddr);
>  	/* Get the buffer size from the descriptor */
>  	int (*get_tx_len)(struct dma_desc *p);
> @@ -67,11 +66,9 @@ struct stmmac_desc_ops {
>  	/* Get the receive frame size */
>  	int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
>  	/* Return the reception status looking at the RDES1 */
> -	int (*rx_status)(struct net_device_stats *stats,
> -			 struct stmmac_extra_stats *x,
> +	int (*rx_status)(struct stmmac_extra_stats *x,
>  			 struct dma_desc *p);
> -	void (*rx_extended_status)(struct net_device_stats *stats,
> -				   struct stmmac_extra_stats *x,
> +	void (*rx_extended_status)(struct stmmac_extra_stats *x,
>  				   struct dma_extended_desc *p);
>  	/* Set tx timestamp enable bit */
>  	void (*enable_tx_timestamp) (struct dma_desc *p);
> @@ -191,8 +188,7 @@ struct stmmac_dma_ops {
>  	void (*dma_tx_mode)(struct stmmac_priv *priv, void __iomem *ioaddr,
>  			    int mode, u32 channel, int fifosz, u8 qmode);
>  	/* To track extra statistic (if supported) */
> -	void (*dma_diagnostic_fr)(struct net_device_stats *stats,
> -				  struct stmmac_extra_stats *x,
> +	void (*dma_diagnostic_fr)(struct stmmac_extra_stats *x,
>  				  void __iomem *ioaddr);
>  	void (*enable_dma_transmission) (void __iomem *ioaddr);
>  	void (*enable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
> diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
> index 350e6670a5766..68a7cfcb1d8f3 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
> @@ -12,8 +12,7 @@
>  #include "common.h"
>  #include "descs_com.h"
>  
> -static int ndesc_get_tx_status(struct net_device_stats *stats,
> -			       struct stmmac_extra_stats *x,
> +static int ndesc_get_tx_status(struct stmmac_extra_stats *x,
>  			       struct dma_desc *p, void __iomem *ioaddr)
>  {
>  	unsigned int tdes0 = le32_to_cpu(p->des0);
> @@ -31,15 +30,12 @@ static int ndesc_get_tx_status(struct net_device_stats *stats,
>  	if (unlikely(tdes0 & TDES0_ERROR_SUMMARY)) {
>  		if (unlikely(tdes0 & TDES0_UNDERFLOW_ERROR)) {
>  			x->tx_underflow++;
> -			stats->tx_fifo_errors++;
>  		}
>  		if (unlikely(tdes0 & TDES0_NO_CARRIER)) {
>  			x->tx_carrier++;
> -			stats->tx_carrier_errors++;
>  		}
>  		if (unlikely(tdes0 & TDES0_LOSS_CARRIER)) {
>  			x->tx_losscarrier++;
> -			stats->tx_carrier_errors++;
>  		}
>  		if (unlikely((tdes0 & TDES0_EXCESSIVE_DEFERRAL) ||
>  			     (tdes0 & TDES0_EXCESSIVE_COLLISIONS) ||
> @@ -47,7 +43,7 @@ static int ndesc_get_tx_status(struct net_device_stats *stats,
>  			unsigned int collisions;
>  
>  			collisions = (tdes0 & TDES0_COLLISION_COUNT_MASK) >> 3;
> -			stats->collisions += collisions;
> +			x->tx_collision += collisions;
>  		}
>  		ret = tx_err;
>  	}
> @@ -70,8 +66,7 @@ static int ndesc_get_tx_len(struct dma_desc *p)
>   * and, if required, updates the multicast statistics.
>   * In case of success, it returns good_frame because the GMAC device
>   * is supposed to be able to compute the csum in HW. */
> -static int ndesc_get_rx_status(struct net_device_stats *stats,
> -			       struct stmmac_extra_stats *x,
> +static int ndesc_get_rx_status(struct stmmac_extra_stats *x,
>  			       struct dma_desc *p)
>  {
>  	int ret = good_frame;
> @@ -81,7 +76,7 @@ static int ndesc_get_rx_status(struct net_device_stats *stats,
>  		return dma_own;
>  
>  	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
> -		stats->rx_length_errors++;
> +		x->rx_length++;
>  		return discard_frame;
>  	}
>  
> @@ -96,11 +91,9 @@ static int ndesc_get_rx_status(struct net_device_stats *stats,
>  			x->ipc_csum_error++;
>  		if (unlikely(rdes0 & RDES0_COLLISION)) {
>  			x->rx_collision++;
> -			stats->collisions++;
>  		}
>  		if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
>  			x->rx_crc_errors++;
> -			stats->rx_crc_errors++;
>  		}
>  		ret = discard_frame;
>  	}
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index 07ea5ab0a60ba..4ce5eaaae5135 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -77,6 +77,7 @@ struct stmmac_tx_queue {
>  	dma_addr_t dma_tx_phy;
>  	dma_addr_t tx_tail_addr;
>  	u32 mss;
> +	struct stmmac_txq_stats txq_stats;
>  };
>  
>  struct stmmac_rx_buffer {
> @@ -121,6 +122,7 @@ struct stmmac_rx_queue {
>  		unsigned int len;
>  		unsigned int error;
>  	} state;
> +	struct stmmac_rxq_stats rxq_stats;
>  };
>  
>  struct stmmac_channel {
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
> index 2ae73ab842d45..b7ac7abecdd35 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
> @@ -89,14 +89,6 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
>  	/* Tx/Rx IRQ Events */
>  	STMMAC_STAT(rx_early_irq),
>  	STMMAC_STAT(threshold),
> -	STMMAC_STAT(tx_pkt_n),
> -	STMMAC_STAT(rx_pkt_n),
> -	STMMAC_STAT(normal_irq_n),
> -	STMMAC_STAT(rx_normal_irq_n),
> -	STMMAC_STAT(napi_poll),
> -	STMMAC_STAT(tx_normal_irq_n),
> -	STMMAC_STAT(tx_clean),
> -	STMMAC_STAT(tx_set_ic_bit),
>  	STMMAC_STAT(irq_receive_pmt_irq_n),
>  	/* MMC info */
>  	STMMAC_STAT(mmc_tx_irq_n),
> @@ -163,9 +155,6 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
>  	STMMAC_STAT(mtl_rx_fifo_ctrl_active),
>  	STMMAC_STAT(mac_rx_frame_ctrl_fifo),
>  	STMMAC_STAT(mac_gmii_rx_proto_engine),
> -	/* TSO */
> -	STMMAC_STAT(tx_tso_frames),
> -	STMMAC_STAT(tx_tso_nfrags),
>  	/* EST */
>  	STMMAC_STAT(mtl_est_cgce),
>  	STMMAC_STAT(mtl_est_hlbs),
> @@ -175,6 +164,23 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
>  };
>  #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
>  
> +/* statistics collected in queue which will be summed up for all TX or RX
> + * queues, or summed up for both TX and RX queues(napi_poll, normal_irq_n).
> + */
> +static const char stmmac_qstats_string[][ETH_GSTRING_LEN] = {
> +	"rx_pkt_n",
> +	"rx_normal_irq_n",
> +	"tx_pkt_n",
> +	"tx_normal_irq_n",
> +	"tx_clean",
> +	"tx_set_ic_bit",
> +	"tx_tso_frames",
> +	"tx_tso_nfrags",
> +	"normal_irq_n",
> +	"napi_poll",
> +};
> +#define STMMAC_QSTATS ARRAY_SIZE(stmmac_qstats_string)
> +
>  /* HW MAC Management counters (if supported) */
>  #define STMMAC_MMC_STAT(m)	\
>  	{ #m, sizeof_field(struct stmmac_counters, m),	\
> @@ -535,23 +541,44 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
>  {
>  	u32 tx_cnt = priv->plat->tx_queues_to_use;
>  	u32 rx_cnt = priv->plat->rx_queues_to_use;
> +	unsigned int start;
>  	int q, stat;
> +	u64 *pos;
>  	char *p;
>  
> +	pos = data;
>  	for (q = 0; q < tx_cnt; q++) {
> -		p = (char *)priv + offsetof(struct stmmac_priv,
> -					    xstats.txq_stats[q].tx_pkt_n);
> +		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[q];
> +		struct stmmac_txq_stats snapshot;
> +
> +		data = pos;
> +		do {
> +			start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
> +			snapshot = tx_q->txq_stats;
> +		} while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
> +
> +		p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
>  		for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
> -			*data++ = (*(unsigned long *)p);
> -			p += sizeof(unsigned long);
> +			*data++ += (*(u64 *)p);
> +			p += sizeof(u64);
>  		}
>  	}
> +
> +	pos = data;
>  	for (q = 0; q < rx_cnt; q++) {
> -		p = (char *)priv + offsetof(struct stmmac_priv,
> -					    xstats.rxq_stats[q].rx_pkt_n);
> +		struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[q];
> +		struct stmmac_rxq_stats snapshot;
> +
> +		data = pos;
> +		do {
> +			start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
> +			snapshot = rx_q->rxq_stats;
> +		} while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
> +
> +		p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
>  		for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
> -			*data++ = (*(unsigned long *)p);
> -			p += sizeof(unsigned long);
> +			*data++ += (*(u64 *)p);
> +			p += sizeof(u64);
>  		}
>  	}
>  }
> @@ -562,8 +589,10 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
>  	struct stmmac_priv *priv = netdev_priv(dev);
>  	u32 rx_queues_count = priv->plat->rx_queues_to_use;
>  	u32 tx_queues_count = priv->plat->tx_queues_to_use;
> +	u64 napi_poll = 0, normal_irq_n = 0;
> +	int i, j = 0, pos, ret;
>  	unsigned long count;
> -	int i, j = 0, ret;
> +	unsigned int start;
>  
>  	if (priv->dma_cap.asp) {
>  		for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
> @@ -574,8 +603,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
>  	}
>  
>  	/* Update the DMA HW counters for dwmac10/100 */
> -	ret = stmmac_dma_diagnostic_fr(priv, &dev->stats, (void *) &priv->xstats,
> -			priv->ioaddr);
> +	ret = stmmac_dma_diagnostic_fr(priv, &priv->xstats, priv->ioaddr);
>  	if (ret) {
>  		/* If supported, for new GMAC chips expose the MMC counters */
>  		if (priv->dma_cap.rmon) {
> @@ -606,6 +634,48 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
>  		data[j++] = (stmmac_gstrings_stats[i].sizeof_stat ==
>  			     sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p);
>  	}
> +
> +	pos = j;
> +	for (i = 0; i < rx_queues_count; i++) {
> +		struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[i];
> +		struct stmmac_rxq_stats snapshot;
> +
> +		j = pos;
> +		do {
> +			start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
> +			snapshot = rx_q->rxq_stats;
> +		} while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
> +
> +		data[j++] += snapshot.rx_pkt_n;
> +		data[j++] += snapshot.rx_normal_irq_n;
> +		normal_irq_n += snapshot.rx_normal_irq_n;
> +		napi_poll += snapshot.napi_poll;
> +	}
> +
> +	pos = j;
> +	for (i = 0; i < tx_queues_count; i++) {
> +		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[i];
> +		struct stmmac_txq_stats snapshot;
> +
> +		j = pos;
> +		do {
> +			start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
> +			snapshot = tx_q->txq_stats;
> +		} while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
> +
> +		data[j++] += snapshot.tx_pkt_n;
> +		data[j++] += snapshot.tx_normal_irq_n;
> +		normal_irq_n += snapshot.tx_normal_irq_n;
> +		data[j++] += snapshot.tx_clean;
> +		data[j++] += snapshot.tx_set_ic_bit;
> +		data[j++] += snapshot.tx_tso_frames;
> +		data[j++] += snapshot.tx_tso_nfrags;
> +		napi_poll += snapshot.napi_poll;
> +	}
> +	normal_irq_n += priv->xstats.rx_early_irq;
> +	data[j++] = normal_irq_n;
> +	data[j++] = napi_poll;
> +
>  	stmmac_get_per_qstats(priv, &data[j]);
>  }
>  
> @@ -618,7 +688,7 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
>  
>  	switch (sset) {
>  	case ETH_SS_STATS:
> -		len = STMMAC_STATS_LEN +
> +		len = STMMAC_STATS_LEN + STMMAC_QSTATS +
>  		      STMMAC_TXQ_STATS * tx_cnt +
>  		      STMMAC_RXQ_STATS * rx_cnt;
>  
> @@ -691,8 +761,11 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
>  				p += ETH_GSTRING_LEN;
>  			}
>  		for (i = 0; i < STMMAC_STATS_LEN; i++) {
> -			memcpy(p, stmmac_gstrings_stats[i].stat_string,
> -				ETH_GSTRING_LEN);
> +			memcpy(p, stmmac_gstrings_stats[i].stat_string, ETH_GSTRING_LEN);
> +			p += ETH_GSTRING_LEN;
> +		}
> +		for (i = 0; i < STMMAC_QSTATS; i++) {
> +			memcpy(p, stmmac_qstats_string[i], ETH_GSTRING_LEN);
>  			p += ETH_GSTRING_LEN;
>  		}
>  		stmmac_get_qstats_string(priv, p);
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 4727f7be4f86e..78d4143f5dea3 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -2432,6 +2432,8 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
>  	struct dma_desc *tx_desc = NULL;
>  	struct xdp_desc xdp_desc;
>  	bool work_done = true;
> +	u32 tx_set_ic_bit = 0;
> +	unsigned long flags;
>  
>  	/* Avoids TX time-out as we are sharing with slow path */
>  	txq_trans_cond_update(nq);
> @@ -2492,7 +2494,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
>  		if (set_ic) {
>  			tx_q->tx_count_frames = 0;
>  			stmmac_set_tx_ic(priv, tx_desc);
> -			priv->xstats.tx_set_ic_bit++;
> +			tx_set_ic_bit++;
>  		}
>  
>  		stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len,
> @@ -2504,6 +2506,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
>  		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
>  		entry = tx_q->cur_tx;
>  	}
> +	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +	tx_q->txq_stats.tx_set_ic_bit += tx_set_ic_bit;
> +	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
>  
>  	if (tx_desc) {
>  		stmmac_flush_tx_descriptors(priv, queue);
> @@ -2545,11 +2550,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
>  	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
>  	unsigned int bytes_compl = 0, pkts_compl = 0;
>  	unsigned int entry, xmits = 0, count = 0;
> +	u32 tx_packets = 0, tx_errors = 0;
> +	unsigned long flags;
>  
>  	__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
>  
> -	priv->xstats.tx_clean++;
> -
>  	tx_q->xsk_frames_done = 0;
>  
>  	entry = tx_q->dirty_tx;
> @@ -2580,8 +2585,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
>  		else
>  			p = tx_q->dma_tx + entry;
>  
> -		status = stmmac_tx_status(priv, &priv->dev->stats,
> -				&priv->xstats, p, priv->ioaddr);
> +		status = stmmac_tx_status(priv,	&priv->xstats, p, priv->ioaddr);
>  		/* Check if the descriptor is owned by the DMA */
>  		if (unlikely(status & tx_dma_own))
>  			break;
> @@ -2597,13 +2601,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
>  		if (likely(!(status & tx_not_ls))) {
>  			/* ... verify the status error condition */
>  			if (unlikely(status & tx_err)) {
> -				priv->dev->stats.tx_errors++;
> +				tx_errors++;
>  				if (unlikely(status & tx_err_bump_tc))
>  					stmmac_bump_dma_threshold(priv, queue);
>  			} else {
> -				priv->dev->stats.tx_packets++;
> -				priv->xstats.tx_pkt_n++;
> -				priv->xstats.txq_stats[queue].tx_pkt_n++;
> +				tx_packets++;
>  			}
>  			if (skb)
>  				stmmac_get_tx_hwtstamp(priv, p, skb);
> @@ -2707,6 +2709,14 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
>  			      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
>  			      HRTIMER_MODE_REL);
>  
> +	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +	tx_q->txq_stats.tx_packets += tx_packets;
> +	tx_q->txq_stats.tx_pkt_n += tx_packets;
> +	tx_q->txq_stats.tx_clean++;
> +	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
> +
> +	priv->xstats.tx_errors += tx_errors;
> +
>  	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
>  
>  	/* Combine decisions from TX clean and XSK TX */
> @@ -2734,7 +2744,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
>  			    tx_q->dma_tx_phy, chan);
>  	stmmac_start_tx_dma(priv, chan);
>  
> -	priv->dev->stats.tx_errors++;
> +	priv->xstats.tx_errors++;
>  	netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, chan));
>  }
>  
> @@ -4110,6 +4120,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct stmmac_tx_queue *tx_q;
>  	bool has_vlan, set_ic;
>  	u8 proto_hdr_len, hdr;
> +	unsigned long flags;
>  	u32 pay_len, mss;
>  	dma_addr_t des;
>  	int i;
> @@ -4258,7 +4269,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  
>  		tx_q->tx_count_frames = 0;
>  		stmmac_set_tx_ic(priv, desc);
> -		priv->xstats.tx_set_ic_bit++;
>  	}
>  
>  	/* We've used all descriptors we need for this skb, however,
> @@ -4274,9 +4284,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
>  	}
>  
> -	dev->stats.tx_bytes += skb->len;
> -	priv->xstats.tx_tso_frames++;
> -	priv->xstats.tx_tso_nfrags += nfrags;
> +	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +	tx_q->txq_stats.tx_bytes += skb->len;
> +	tx_q->txq_stats.tx_tso_frames++;
> +	tx_q->txq_stats.tx_tso_nfrags += nfrags;
> +	if (set_ic)
> +		tx_q->txq_stats.tx_set_ic_bit++;
> +	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
>  
>  	if (priv->sarc_type)
>  		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
> @@ -4326,7 +4340,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  dma_map_err:
>  	dev_err(priv->device, "Tx dma map failed\n");
>  	dev_kfree_skb(skb);
> -	priv->dev->stats.tx_dropped++;
> +	priv->xstats.tx_dropped++;
>  	return NETDEV_TX_OK;
>  }
>  
> @@ -4352,6 +4366,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct stmmac_tx_queue *tx_q;
>  	bool has_vlan, set_ic;
>  	int entry, first_tx;
> +	unsigned long flags;
>  	dma_addr_t des;
>  
>  	tx_q = &priv->dma_conf.tx_queue[queue];
> @@ -4480,7 +4495,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  
>  		tx_q->tx_count_frames = 0;
>  		stmmac_set_tx_ic(priv, desc);
> -		priv->xstats.tx_set_ic_bit++;
>  	}
>  
>  	/* We've used all descriptors we need for this skb, however,
> @@ -4507,7 +4521,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
>  	}
>  
> -	dev->stats.tx_bytes += skb->len;
> +	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +	tx_q->txq_stats.tx_bytes += skb->len;
> +	if (set_ic)
> +		tx_q->txq_stats.tx_set_ic_bit++;
> +	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
>  
>  	if (priv->sarc_type)
>  		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
> @@ -4569,7 +4587,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  dma_map_err:
>  	netdev_err(priv->dev, "Tx DMA map failed\n");
>  	dev_kfree_skb(skb);
> -	priv->dev->stats.tx_dropped++;
> +	priv->xstats.tx_dropped++;
>  	return NETDEV_TX_OK;
>  }
>  
> @@ -4770,9 +4788,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
>  		set_ic = false;
>  
>  	if (set_ic) {
> +		unsigned long flags;
>  		tx_q->tx_count_frames = 0;
>  		stmmac_set_tx_ic(priv, tx_desc);
> -		priv->xstats.tx_set_ic_bit++;
> +		flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +		tx_q->txq_stats.tx_set_ic_bit++;
> +		u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
>  	}
>  
>  	stmmac_enable_dma_transmission(priv, priv->ioaddr);
> @@ -4917,16 +4938,18 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
>  				   struct dma_desc *p, struct dma_desc *np,
>  				   struct xdp_buff *xdp)
>  {
> +	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
>  	struct stmmac_channel *ch = &priv->channel[queue];
>  	unsigned int len = xdp->data_end - xdp->data;
>  	enum pkt_hash_types hash_type;
>  	int coe = priv->hw->rx_csum;
> +	unsigned long flags;
>  	struct sk_buff *skb;
>  	u32 hash;
>  
>  	skb = stmmac_construct_skb_zc(ch, xdp);
>  	if (!skb) {
> -		priv->dev->stats.rx_dropped++;
> +		priv->xstats.rx_dropped++;
>  		return;
>  	}
>  
> @@ -4945,8 +4968,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
>  	skb_record_rx_queue(skb, queue);
>  	napi_gro_receive(&ch->rxtx_napi, skb);
>  
> -	priv->dev->stats.rx_packets++;
> -	priv->dev->stats.rx_bytes += len;
> +	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
> +	rx_q->rxq_stats.rx_pkt_n++;
> +	rx_q->rxq_stats.rx_bytes += len;
> +	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
>  }
>  
>  static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
> @@ -5023,9 +5048,11 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>  	unsigned int count = 0, error = 0, len = 0;
>  	int dirty = stmmac_rx_dirty(priv, queue);
>  	unsigned int next_entry = rx_q->cur_rx;
> +	u32 rx_errors = 0, rx_dropped = 0;
>  	unsigned int desc_size;
>  	struct bpf_prog *prog;
>  	bool failure = false;
> +	unsigned long flags;
>  	int xdp_status = 0;
>  	int status = 0;
>  
> @@ -5081,8 +5108,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>  			p = rx_q->dma_rx + entry;
>  
>  		/* read the status of the incoming frame */
> -		status = stmmac_rx_status(priv, &priv->dev->stats,
> -					  &priv->xstats, p);
> +		status = stmmac_rx_status(priv, &priv->xstats, p);
>  		/* check if managed by the DMA otherwise go ahead */
>  		if (unlikely(status & dma_own))
>  			break;
> @@ -5104,8 +5130,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>  			break;
>  
>  		if (priv->extend_desc)
> -			stmmac_rx_extended_status(priv, &priv->dev->stats,
> -						  &priv->xstats,
> +			stmmac_rx_extended_status(priv, &priv->xstats,
>  						  rx_q->dma_erx + entry);
>  		if (unlikely(status == discard_frame)) {
>  			xsk_buff_free(buf->xdp);
> @@ -5113,7 +5138,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>  			dirty++;
>  			error = 1;
>  			if (!priv->hwts_rx_en)
> -				priv->dev->stats.rx_errors++;
> +				rx_errors++;
>  		}
>  
>  		if (unlikely(error && (status & rx_not_ls)))
> @@ -5161,7 +5186,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>  			break;
>  		case STMMAC_XDP_CONSUMED:
>  			xsk_buff_free(buf->xdp);
> -			priv->dev->stats.rx_dropped++;
> +			rx_dropped++;
>  			break;
>  		case STMMAC_XDP_TX:
>  		case STMMAC_XDP_REDIRECT:
> @@ -5182,8 +5207,12 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>  
>  	stmmac_finalize_xdp_rx(priv, xdp_status);
>  
> -	priv->xstats.rx_pkt_n += count;
> -	priv->xstats.rxq_stats[queue].rx_pkt_n += count;
> +	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
> +	rx_q->rxq_stats.rx_pkt_n += count;
> +	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
> +
> +	priv->xstats.rx_dropped += rx_dropped;
> +	priv->xstats.rx_errors += rx_errors;
>  
>  	if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
>  		if (failure || stmmac_rx_dirty(priv, queue) > 0)
> @@ -5207,6 +5236,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
>   */
>  static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  {
> +	u32 rx_errors = 0, rx_dropped = 0, rx_bytes = 0, rx_packets = 0;
>  	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
>  	struct stmmac_channel *ch = &priv->channel[queue];
>  	unsigned int count = 0, error = 0, len = 0;
> @@ -5216,6 +5246,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  	unsigned int desc_size;
>  	struct sk_buff *skb = NULL;
>  	struct stmmac_xdp_buff ctx;
> +	unsigned long flags;
>  	int xdp_status = 0;
>  	int buf_sz;
>  
> @@ -5271,8 +5302,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  			p = rx_q->dma_rx + entry;
>  
>  		/* read the status of the incoming frame */
> -		status = stmmac_rx_status(priv, &priv->dev->stats,
> -				&priv->xstats, p);
> +		status = stmmac_rx_status(priv, &priv->xstats, p);
>  		/* check if managed by the DMA otherwise go ahead */
>  		if (unlikely(status & dma_own))
>  			break;
> @@ -5289,14 +5319,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  		prefetch(np);
>  
>  		if (priv->extend_desc)
> -			stmmac_rx_extended_status(priv, &priv->dev->stats,
> -					&priv->xstats, rx_q->dma_erx + entry);
> +			stmmac_rx_extended_status(priv, &priv->xstats, rx_q->dma_erx + entry);
>  		if (unlikely(status == discard_frame)) {
>  			page_pool_recycle_direct(rx_q->page_pool, buf->page);
>  			buf->page = NULL;
>  			error = 1;
>  			if (!priv->hwts_rx_en)
> -				priv->dev->stats.rx_errors++;
> +				rx_errors++;
>  		}
>  
>  		if (unlikely(error && (status & rx_not_ls)))
> @@ -5364,7 +5393,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  							   virt_to_head_page(ctx.xdp.data),
>  							   sync_len, true);
>  					buf->page = NULL;
> -					priv->dev->stats.rx_dropped++;
> +					rx_dropped++;
>  
>  					/* Clear skb as it was set as
>  					 * status by XDP program.
> @@ -5393,7 +5422,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  
>  			skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
>  			if (!skb) {
> -				priv->dev->stats.rx_dropped++;
> +				rx_dropped++;
>  				count++;
>  				goto drain_data;
>  			}
> @@ -5453,8 +5482,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  		napi_gro_receive(&ch->rx_napi, skb);
>  		skb = NULL;
>  
> -		priv->dev->stats.rx_packets++;
> -		priv->dev->stats.rx_bytes += len;
> +		rx_packets++;
> +		rx_bytes += len;
>  		count++;
>  	}
>  
> @@ -5469,8 +5498,14 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>  
>  	stmmac_rx_refill(priv, queue);
>  
> -	priv->xstats.rx_pkt_n += count;
> -	priv->xstats.rxq_stats[queue].rx_pkt_n += count;
> +	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
> +	rx_q->rxq_stats.rx_packets += rx_packets;
> +	rx_q->rxq_stats.rx_bytes += rx_bytes;
> +	rx_q->rxq_stats.rx_pkt_n += count;
> +	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
> +
> +	priv->xstats.rx_dropped += rx_dropped;
> +	priv->xstats.rx_errors += rx_errors;
>  
>  	return count;
>  }
> @@ -5480,10 +5515,15 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
>  	struct stmmac_channel *ch =
>  		container_of(napi, struct stmmac_channel, rx_napi);
>  	struct stmmac_priv *priv = ch->priv_data;
> +	struct stmmac_rx_queue *rx_q;
>  	u32 chan = ch->index;
> +	unsigned long flags;
>  	int work_done;
>  
> -	priv->xstats.napi_poll++;
> +	rx_q = &priv->dma_conf.rx_queue[chan];
> +	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
> +	rx_q->rxq_stats.napi_poll++;
> +	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
>  
>  	work_done = stmmac_rx(priv, budget, chan);
>  	if (work_done < budget && napi_complete_done(napi, work_done)) {
> @@ -5502,10 +5542,15 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
>  	struct stmmac_channel *ch =
>  		container_of(napi, struct stmmac_channel, tx_napi);
>  	struct stmmac_priv *priv = ch->priv_data;
> +	struct stmmac_tx_queue *tx_q;
>  	u32 chan = ch->index;
> +	unsigned long flags;
>  	int work_done;
>  
> -	priv->xstats.napi_poll++;
> +	tx_q = &priv->dma_conf.tx_queue[chan];
> +	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +	tx_q->txq_stats.napi_poll++;
> +	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
>  
>  	work_done = stmmac_tx_clean(priv, budget, chan);
>  	work_done = min(work_done, budget);
> @@ -5527,9 +5572,20 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
>  		container_of(napi, struct stmmac_channel, rxtx_napi);
>  	struct stmmac_priv *priv = ch->priv_data;
>  	int rx_done, tx_done, rxtx_done;
> +	struct stmmac_rx_queue *rx_q;
> +	struct stmmac_tx_queue *tx_q;
>  	u32 chan = ch->index;
> +	unsigned long flags;
> +
> +	rx_q = &priv->dma_conf.rx_queue[chan];
> +	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
> +	rx_q->rxq_stats.napi_poll++;
> +	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
>  
> -	priv->xstats.napi_poll++;
> +	tx_q = &priv->dma_conf.tx_queue[chan];
> +	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
> +	tx_q->txq_stats.napi_poll++;
> +	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
>  
>  	tx_done = stmmac_tx_clean(priv, budget, chan);
>  	tx_done = min(tx_done, budget);
> @@ -6788,6 +6844,56 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
>  	return 0;
>  }
>  
> +static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
> +{
> +	struct stmmac_priv *priv = netdev_priv(dev);
> +	u32 tx_cnt = priv->plat->tx_queues_to_use;
> +	u32 rx_cnt = priv->plat->rx_queues_to_use;
> +	unsigned int start;
> +	int q;
> +
> +	for (q = 0; q < tx_cnt; q++) {
> +		struct stmmac_txq_stats *txq_stats = &priv->dma_conf.tx_queue[q].txq_stats;
> +		u64 tx_packets;
> +		u64 tx_bytes;
> +
> +		do {
> +			start = u64_stats_fetch_begin(&txq_stats->syncp);
> +			tx_packets = txq_stats->tx_packets;
> +			tx_bytes   = txq_stats->tx_bytes;
> +		} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
> +
> +		stats->tx_packets += tx_packets;
> +		stats->tx_bytes += tx_bytes;
> +	}
> +
> +	for (q = 0; q < rx_cnt; q++) {
> +		struct stmmac_rxq_stats *rxq_stats = &priv->dma_conf.rx_queue[q].rxq_stats;
> +		u64 rx_packets;
> +		u64 rx_bytes;
> +
> +		do {
> +			start = u64_stats_fetch_begin(&rxq_stats->syncp);
> +			rx_packets = rxq_stats->rx_packets;
> +			rx_bytes   = rxq_stats->rx_bytes;
> +		} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
> +
> +		stats->rx_packets += rx_packets;
> +		stats->rx_bytes += rx_bytes;
> +	}
> +
> +	stats->rx_dropped = priv->xstats.rx_dropped;
> +	stats->rx_errors = priv->xstats.rx_errors;
> +	stats->tx_dropped = priv->xstats.tx_dropped;
> +	stats->tx_errors = priv->xstats.tx_errors;
> +	stats->tx_carrier_errors = priv->xstats.tx_losscarrier + priv->xstats.tx_carrier;
> +	stats->collisions = priv->xstats.tx_collision + priv->xstats.rx_collision;
> +	stats->rx_length_errors = priv->xstats.rx_length;
> +	stats->rx_crc_errors = priv->xstats.rx_crc_errors;
> +	stats->rx_over_errors = priv->xstats.rx_overflow_cntr;
> +	stats->rx_missed_errors = priv->xstats.rx_missed_cntr;
> +}
> +
>  static const struct net_device_ops stmmac_netdev_ops = {
>  	.ndo_open = stmmac_open,
>  	.ndo_start_xmit = stmmac_xmit,
> @@ -6798,6 +6904,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
>  	.ndo_set_rx_mode = stmmac_set_rx_mode,
>  	.ndo_tx_timeout = stmmac_tx_timeout,
>  	.ndo_eth_ioctl = stmmac_ioctl,
> +	.ndo_get_stats64 = stmmac_get_stats64,
>  	.ndo_setup_tc = stmmac_setup_tc,
>  	.ndo_select_queue = stmmac_select_queue,
>  #ifdef CONFIG_NET_POLL_CONTROLLER
> @@ -7160,6 +7267,11 @@ int stmmac_dvr_probe(struct device *device,
>  	priv->device = device;
>  	priv->dev = ndev;
>  
> +	for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
> +		u64_stats_init(&priv->dma_conf.rx_queue[i].rxq_stats.syncp);
> +	for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
> +		u64_stats_init(&priv->dma_conf.tx_queue[i].txq_stats.syncp);
> +
>  	stmmac_set_ethtool_ops(ndev);
>  	priv->pause = pause;
>  	priv->plat = plat_dat;
> -- 
> 2.40.1
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-09-13 14:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-08 18:12 [PATCH AUTOSEL 6.5 01/45] spi: sun6i: add quirk for dual and quad SPI modes support Sasha Levin
2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 05/45] net: stmmac: use per-queue 64 bit statistics where necessary Sasha Levin
2023-09-13 14:43   ` Jisheng Zhang
2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 13/45] can: sun4i_can: Add acceptance register quirk Sasha Levin
2023-09-08 18:12 ` [PATCH AUTOSEL 6.5 14/45] can: sun4i_can: Add support for the Allwinner D1 Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).