public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Paolo Abeni <pabeni@redhat.com>
To: Harshitha Ramamurthy <hramamurthy@google.com>, netdev@vger.kernel.org
Cc: joshwash@google.com, andrew+netdev@lunn.ch, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, willemb@google.com,
	maolson@google.com, nktgrg@google.com, jfraker@google.com,
	ziweixiao@google.com, jacob.e.keller@intel.com,
	pkaligineedi@google.com, shailend@google.com,
	jordanrhee@google.com, stable@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Debarghya Kundu <debarghyak@google.com>,
	Pin-yen Lin <treapking@google.com>
Subject: Re: [PATCH net 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted
Date: Thu, 23 Apr 2026 13:46:59 +0200	[thread overview]
Message-ID: <0e1c941e-dcaa-40fb-9df2-ac1db429f60a@redhat.com> (raw)
In-Reply-To: <20260420171837.455487-3-hramamurthy@google.com>

On 4/20/26 7:18 PM, Harshitha Ramamurthy wrote:
> From: Debarghya Kundu <debarghyak@google.com>
> 
> gve_get_base_stats() sets all the stats to 0, so the stats go backwards
> when interface goes down or configuration is adjusted.
> 
> Fix this by persisting baseline stats across interface down.
> 
> This was discovered by drivers/net/stats.py selftest.
> 
> Cc: stable@vger.kernel.org
> Fixes: 2e5e0932dff5 ("gve: add support for basic queue stats")
> Signed-off-by: Debarghya Kundu <debarghyak@google.com>
> Signed-off-by: Pin-yen Lin <treapking@google.com>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
> ---
>  drivers/net/ethernet/google/gve/gve.h      |  6 ++
>  drivers/net/ethernet/google/gve/gve_main.c | 64 +++++++++++++++++++---
>  2 files changed, 63 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index cbdf3a842cfe..ff7797043908 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -794,6 +794,10 @@ struct gve_ptp {
>  	struct gve_priv *priv;
>  };
>  
> +struct gve_ring_err_stats {
> +	u64 rx_alloc_fails;
> +};
> +
>  struct gve_priv {
>  	struct net_device *dev;
>  	struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
> @@ -882,6 +886,8 @@ struct gve_priv {
>  	unsigned long service_task_flags;
>  	unsigned long state_flags;
>  
> +	struct gve_ring_err_stats base_ring_err_stats;
> +	struct rtnl_link_stats64 base_net_stats;
>  	struct gve_stats_report *stats_report;
>  	u64 stats_report_len;
>  	dma_addr_t stats_report_bus; /* dma address for the stats report */
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index 675382e9756c..8617782791e0 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -105,9 +105,22 @@ static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		return gve_tx_dqo(skb, dev);
>  }
>  
> -static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> +static void gve_add_base_stats(struct gve_priv *priv,
> +			       struct rtnl_link_stats64 *s)
> +{
> +	struct rtnl_link_stats64 *base_stats = &priv->base_net_stats;
> +
> +	s->rx_packets += base_stats->rx_packets;
> +	s->rx_bytes += base_stats->rx_bytes;
> +	s->rx_dropped += base_stats->rx_dropped;
> +	s->tx_packets += base_stats->tx_packets;
> +	s->tx_bytes += base_stats->tx_bytes;
> +	s->tx_dropped += base_stats->tx_dropped;
> +}
> +
> +static void gve_get_ring_stats(struct gve_priv *priv,
> +			       struct rtnl_link_stats64 *s)
>  {
> -	struct gve_priv *priv = netdev_priv(dev);
>  	unsigned int start;
>  	u64 packets, bytes;
>  	int num_tx_queues;
> @@ -142,6 +155,14 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
>  	}
>  }
>  
> +static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> +{
> +	struct gve_priv *priv = netdev_priv(dev);
> +
> +	gve_get_ring_stats(priv, s);
> +	gve_add_base_stats(priv, s);
> +}
> +
>  static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
>  {
>  	struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
> @@ -1493,6 +1514,23 @@ static int gve_queues_stop(struct gve_priv *priv)
>  	return gve_reset_recovery(priv, false);
>  }
>  
> +static void gve_get_ring_err_stats(struct gve_priv *priv,
> +				   struct gve_ring_err_stats *err_stats)
> +{
> +	int ring;
> +
> +	for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
> +		unsigned int start;
> +		struct gve_rx_ring *rx = &priv->rx[ring];
> +
> +		do {
> +			start = u64_stats_fetch_begin(&rx->statss);
> +			err_stats->rx_alloc_fails +=
> +				rx->rx_skb_alloc_fail + rx->rx_buf_alloc_fail;
> +		} while (u64_stats_fetch_retry(&rx->statss, start));

Sashiko says:

Could this loop improperly inflate the baseline metric by double counting?
If a concurrent update causes the sequence counter to change,
u64_stats_fetch_retry() forces the loop to restart. Because the addition
is performed in-place on err_stats->rx_alloc_fails, the same ring's
error values will be added again.
Would it be safer to use local variables inside the retry loop and update
the global accumulator only after the loop completes successfully, similar
to the pattern established in gve_get_ring_stats()?


> +	}
> +}
> +
>  static int gve_close(struct net_device *dev)
>  {
>  	struct gve_priv *priv = netdev_priv(dev);
> @@ -1502,6 +1540,10 @@ static int gve_close(struct net_device *dev)
>  	if (err)
>  		return err;
>  
> +	/* Save ring queue and err stats before closing the interface */
> +	gve_get_ring_stats(priv, &priv->base_net_stats);
> +	gve_get_ring_err_stats(priv, &priv->base_ring_err_stats);

Sashiko says:

Does this create a temporary spike in reported statistics?
During gve_close(), the active ring stats are added to base_net_stats.
However, priv->rx and priv->tx are not set to NULL until the memory
teardown completes in gve_queues_mem_remove().
If ndo_get_stats64 is called concurrently during this window, it will
add both the active ring stats and the newly updated base stats together.
This causes the reported statistics to temporarily double until the
teardown finishes.

Note that you are expected to proactively comment/reply on the ML to the
concerns raised by sashiko reviews.

Thanks,

Paolo


  reply	other threads:[~2026-04-23 11:47 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-20 17:18 [PATCH net 0/4] gve: Fixes for issues discovered via net selftests Harshitha Ramamurthy
2026-04-20 17:18 ` [PATCH net 1/4] gve: Add NULL pointer checks for per-queue statistics Harshitha Ramamurthy
2026-04-20 17:18 ` [PATCH net 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted Harshitha Ramamurthy
2026-04-23 11:46   ` Paolo Abeni [this message]
2026-04-20 17:18 ` [PATCH net 3/4] gve: Use default min ring size when device option values are 0 Harshitha Ramamurthy
2026-04-20 17:18 ` [PATCH net 4/4] gve: Make ethtool config changes synchronous Harshitha Ramamurthy
2026-04-23 13:25   ` Paolo Abeni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0e1c941e-dcaa-40fb-9df2-ac1db429f60a@redhat.com \
    --to=pabeni@redhat.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=debarghyak@google.com \
    --cc=edumazet@google.com \
    --cc=hramamurthy@google.com \
    --cc=jacob.e.keller@intel.com \
    --cc=jfraker@google.com \
    --cc=jordanrhee@google.com \
    --cc=joshwash@google.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maolson@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=nktgrg@google.com \
    --cc=pkaligineedi@google.com \
    --cc=shailend@google.com \
    --cc=stable@vger.kernel.org \
    --cc=treapking@google.com \
    --cc=willemb@google.com \
    --cc=ziweixiao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox