> The airoha_dev_get_hw_stats() function had two correctness issues in the > way it collects hardware MIB counters. > > Bug 1: Read-clear race causes silent packet loss in statistics > > airoha_update_hw_stats() read all MIB registers and then cleared them > via REG_FE_GDM_MIB_CLEAR. There is a time window between the last > register read and the hardware clear. Any packet that the hardware > counts during this window is lost: the register is incremented, then > cleared, without the increment ever being read by software. Under > sustained traffic this causes a permanent and growing undercount in all > reported statistics. > > This is particularly misleading for tx_ok_pkts and tx_ok_bytes, which > routers and traffic monitors use to detect packet forwarding loss > between two points in a hardware-accelerated path (e.g., between two > netdevs in the QDMA/PPE fast-path). An inaccurate count makes it > impossible to reliably attribute drops in the forwarding pipeline > without capturing traffic at both ends independently. > > Bug 2: 32-bit counter overflow causes stat corruption > > Several MIB registers are only 32 bits wide: tx_drops, tx_broadcast, > tx_multicast, rx_drops, rx_broadcast, rx_multicast, rx_errors, > rx_crc_error, rx_over_errors, rx_fragment, rx_jabber, and the runt and > long buckets of the tx_len[]/rx_len[]. > > The original code relied on MIB_CLEAR to keep register values small > enough that a simple '+= val' per cycle did not lose data across a > wrap. Once clearing is removed (to fix Bug 1), raw '+= val' silently > corrupts the accumulated software counter on overflow. > > Fix both issues together: > > - 64-bit H+L register pairs (tx_ok_pkts, tx_ok_bytes, tx_len[1..5], > rx_ok_pkts, rx_ok_bytes, rx_len[1..5]): read directly from hardware > without clearing. Hardware accumulates the full running total; a > single direct assignment per poll is correct and lossless. > > - 32-bit registers (tx_drops, tx_broadcast, tx_multicast, rx_drops, > rx_broadcast, rx_multicast, rx_errors, rx_crc_error, rx_over_errors, > rx_fragment, rx_jabber, and the runt/long buckets in tx_len[0]/[6] > and rx_len[0]/[6]): track the previous hardware value in a new > hw_prev_stats sub-struct inside airoha_hw_stats and accumulate > (u32)(curr - prev) into the 64-bit software counter. Unsigned > subtraction handles wrap-around transparently: > prev=0xFFFFFF00, curr=0x00000010 -> delta=(u32)(0x10-0xFFFFFF00)=0x110 > > Remove the REG_FE_GDM_MIB_CLEAR write from airoha_update_hw_stats() > entirely. Because the driver no longer clears hardware counters, the > read-clear race window is eliminated. > > The hw_prev_stats fields are zero-initialised by the existing > devm_kzalloc() call in airoha_alloc_gdm_device(). > > Fixes: 8f4695fb67b2 ("net: airoha: better handle MIBs for GDM ports with multiple devs attached") > Signed-off-by: Aniket Negi > --- > drivers/net/ethernet/airoha/airoha_eth.c | 132 +++++++++++------------ > drivers/net/ethernet/airoha/airoha_eth.h | 22 ++++ > 2 files changed, 86 insertions(+), 68 deletions(-) > > diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c > index 1caf6766f2c0..7ae4e294478e 100644 > --- a/drivers/net/ethernet/airoha/airoha_eth.c > +++ b/drivers/net/ethernet/airoha/airoha_eth.c > @@ -1696,133 +1696,133 @@ static void airoha_dev_get_hw_stats(struct airoha_gdm_dev *dev) > > u64_stats_update_begin(&dev->stats.syncp); > > - /* TX */ > + /* TX - 64-bit H+L registers: hw accumulates the total, read directly. */ > val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_H(port->id)); > - dev->stats.tx_ok_pkts += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_L(port->id)); > - dev->stats.tx_ok_pkts += val; > + dev->stats.tx_ok_pkts = (u64)val << 32; I guess it is more readable to store REG_FE_GDM_TX_OK_PKT_CNT_L() read in val here. Something like: val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_L(port->id)); dev->stats.tx_ok_pkts += val; This apply even to occurrence below > + dev->stats.tx_ok_pkts += airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_BYTE_CNT_H(port->id)); > - dev->stats.tx_ok_bytes += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_BYTE_CNT_L(port->id)); > - dev->stats.tx_ok_bytes += val; > + dev->stats.tx_ok_bytes = (u64)val << 32; > + dev->stats.tx_ok_bytes += airoha_fe_rr(eth, REG_FE_GDM_TX_OK_BYTE_CNT_L(port->id)); > > + /* TX - 32-bit registers: accumulate delta to handle wrap-around. */ > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_DROP_CNT(port->id)); > - dev->stats.tx_drops += val; > + dev->stats.tx_drops += (u32)(val - dev->stats.hw_prev_stats.tx_drops); > + dev->stats.hw_prev_stats.tx_drops = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_BC_CNT(port->id)); > - dev->stats.tx_broadcast += val; > + dev->stats.tx_broadcast += (u32)(val - dev->stats.hw_prev_stats.tx_broadcast); > + dev->stats.hw_prev_stats.tx_broadcast = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_MC_CNT(port->id)); > - dev->stats.tx_multicast += val; > + dev->stats.tx_multicast += (u32)(val - dev->stats.hw_prev_stats.tx_multicast); > + dev->stats.hw_prev_stats.tx_multicast = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_RUNT_CNT(port->id)); > - dev->stats.tx_len[i] += val; > + dev->stats.tx_len[i] += (u32)(val - dev->stats.hw_prev_stats.tx_len[i]); > + dev->stats.hw_prev_stats.tx_len[i] = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_E64_CNT_H(port->id)); > - dev->stats.tx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_E64_CNT_L(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] += (u64)val << 32; Since now we do not reset MIB counters, this is wrong, you can't use "+=" > + dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_E64_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L64_CNT_H(port->id)); > - dev->stats.tx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L64_CNT_L(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] = (u64)val << 32; > + dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L64_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L127_CNT_H(port->id)); > - dev->stats.tx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L127_CNT_L(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] = (u64)val << 32; > + dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L127_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L255_CNT_H(port->id)); > - dev->stats.tx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L255_CNT_L(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] = (u64)val << 32; > + dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L255_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L511_CNT_H(port->id)); > - dev->stats.tx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L511_CNT_L(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] = (u64)val << 32; > + dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L511_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L1023_CNT_H(port->id)); > - dev->stats.tx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L1023_CNT_L(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] = (u64)val << 32; > + dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L1023_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_LONG_CNT(port->id)); > - dev->stats.tx_len[i++] += val; > + dev->stats.tx_len[i] += (u32)(val - dev->stats.hw_prev_stats.tx_len[i]); > + dev->stats.hw_prev_stats.tx_len[i++] = val; > > /* RX */ > val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_PKT_CNT_H(port->id)); > - dev->stats.rx_ok_pkts += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_PKT_CNT_L(port->id)); > - dev->stats.rx_ok_pkts += val; > + dev->stats.rx_ok_pkts = (u64)val << 32; > + dev->stats.rx_ok_pkts += airoha_fe_rr(eth, REG_FE_GDM_RX_OK_PKT_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_BYTE_CNT_H(port->id)); > - dev->stats.rx_ok_bytes += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_BYTE_CNT_L(port->id)); > - dev->stats.rx_ok_bytes += val; > + dev->stats.rx_ok_bytes = (u64)val << 32; > + dev->stats.rx_ok_bytes += airoha_fe_rr(eth, REG_FE_GDM_RX_OK_BYTE_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_DROP_CNT(port->id)); > - dev->stats.rx_drops += val; > + dev->stats.rx_drops += (u32)(val - dev->stats.hw_prev_stats.rx_drops); > + dev->stats.hw_prev_stats.rx_drops = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_BC_CNT(port->id)); > - dev->stats.rx_broadcast += val; > + dev->stats.rx_broadcast += (u32)(val - dev->stats.hw_prev_stats.rx_broadcast); > + dev->stats.hw_prev_stats.rx_broadcast = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_MC_CNT(port->id)); > - dev->stats.rx_multicast += val; > + dev->stats.rx_multicast += (u32)(val - dev->stats.hw_prev_stats.rx_multicast); > + dev->stats.hw_prev_stats.rx_multicast = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ERROR_DROP_CNT(port->id)); > - dev->stats.rx_errors += val; > + dev->stats.rx_errors += (u32)(val - dev->stats.hw_prev_stats.rx_errors); > + dev->stats.hw_prev_stats.rx_errors = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_CRC_ERR_CNT(port->id)); > - dev->stats.rx_crc_error += val; > + dev->stats.rx_crc_error += (u32)(val - dev->stats.hw_prev_stats.rx_crc_error); > + dev->stats.hw_prev_stats.rx_crc_error = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_OVERFLOW_DROP_CNT(port->id)); > - dev->stats.rx_over_errors += val; > + dev->stats.rx_over_errors += (u32)(val - dev->stats.hw_prev_stats.rx_over_errors); > + dev->stats.hw_prev_stats.rx_over_errors = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_FRAG_CNT(port->id)); > - dev->stats.rx_fragment += val; > + dev->stats.rx_fragment += (u32)(val - dev->stats.hw_prev_stats.rx_fragment); > + dev->stats.hw_prev_stats.rx_fragment = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_JABBER_CNT(port->id)); > - dev->stats.rx_jabber += val; > + dev->stats.rx_jabber += (u32)(val - dev->stats.hw_prev_stats.rx_jabber); > + dev->stats.hw_prev_stats.rx_jabber = val; > > i = 0; > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_RUNT_CNT(port->id)); > - dev->stats.rx_len[i] += val; > + dev->stats.rx_len[i] += (u32)(val - dev->stats.hw_prev_stats.rx_len[i]); > + dev->stats.hw_prev_stats.rx_len[i] = val; > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_E64_CNT_H(port->id)); > - dev->stats.rx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_E64_CNT_L(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] += (u64)val << 32; same here. > + dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_E64_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L64_CNT_H(port->id)); > - dev->stats.rx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L64_CNT_L(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] = (u64)val << 32; > + dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L64_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L127_CNT_H(port->id)); > - dev->stats.rx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L127_CNT_L(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] = (u64)val << 32; > + dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L127_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L255_CNT_H(port->id)); > - dev->stats.rx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L255_CNT_L(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] = (u64)val << 32; > + dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L255_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L511_CNT_H(port->id)); > - dev->stats.rx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L511_CNT_L(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] = (u64)val << 32; > + dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L511_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L1023_CNT_H(port->id)); > - dev->stats.rx_len[i] += ((u64)val << 32); > - val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L1023_CNT_L(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] = (u64)val << 32; > + dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L1023_CNT_L(port->id)); > > val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_LONG_CNT(port->id)); > - dev->stats.rx_len[i++] += val; > + dev->stats.rx_len[i] += (u32)(val - dev->stats.hw_prev_stats.rx_len[i]); > + dev->stats.hw_prev_stats.rx_len[i++] = val; > > u64_stats_update_end(&dev->stats.syncp); > } > @@ -1839,10 +1839,6 @@ static void airoha_update_hw_stats(struct airoha_gdm_dev *dev) > airoha_dev_get_hw_stats(port->devs[i]); > } > > - /* Reset MIB counters */ > - airoha_fe_set(dev->eth, REG_FE_GDM_MIB_CLEAR(port->id), > - FE_GDM_MIB_RX_CLEAR_MASK | FE_GDM_MIB_TX_CLEAR_MASK); > - > spin_unlock(&port->stats_lock); > } > > diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h > index 2765244d937c..af12ad6eac17 100644 > --- a/drivers/net/ethernet/airoha/airoha_eth.h > +++ b/drivers/net/ethernet/airoha/airoha_eth.h > @@ -244,6 +244,28 @@ struct airoha_hw_stats { > u64 rx_fragment; > u64 rx_jabber; > u64 rx_len[7]; > + > + struct { > + /* Previous HW register values for 32-bit counter delta tracking. > + * Storing the last seen value and accumulating (u32)(curr - prev) > + * in 64-bit software counter & handles wrap-around transparently > + * via unsigned arithmetic. These fields are never reported to > + * userspace. > + */ can you please align the comment here? > + u32 tx_drops; > + u32 tx_broadcast; > + u32 tx_multicast; > + u32 tx_len[7]; > + u32 rx_drops; > + u32 rx_broadcast; > + u32 rx_multicast; > + u32 rx_errors; > + u32 rx_crc_error; > + u32 rx_over_errors; > + u32 rx_fragment; > + u32 rx_jabber; > + u32 rx_len[7]; > + } hw_prev_stats; Maybe something like "prev_val32" ? Regards, Lorenzo > }; > > enum { > -- > 2.43.0 >