* [PATCH net v2 1/4] gve: Add NULL pointer checks for per-queue statistics
2026-04-25 0:24 [PATCH net v2 0/4] gve: Fixes for issues discovered via net selftests Harshitha Ramamurthy
@ 2026-04-25 0:24 ` Harshitha Ramamurthy
2026-04-27 20:22 ` Pin-yen Lin
2026-04-25 0:24 ` [PATCH net v2 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted Harshitha Ramamurthy
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: Harshitha Ramamurthy @ 2026-04-25 0:24 UTC (permalink / raw)
To: netdev
Cc: joshwash, hramamurthy, andrew+netdev, davem, edumazet, kuba,
pabeni, willemb, maolson, nktgrg, jfraker, ziweixiao,
jacob.e.keller, pkaligineedi, shailend, jordanrhee, stable,
linux-kernel, Debarghya Kundu, Pin-yen Lin
From: Debarghya Kundu <debarghyak@google.com>
gve_get_[tx/rx]_queue_stats references the [tx/rx] null rings when the
link is down. Add NULL pointer checks to guard this
This was discovered by drivers/net/stats.py selftest.
Cc: stable@vger.kernel.org
Fixes: 2e5e0932dff5 ("gve: add support for basic queue stats")
Signed-off-by: Debarghya Kundu <debarghyak@google.com>
Signed-off-by: Pin-yen Lin <treapking@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
---
drivers/net/ethernet/google/gve/gve_main.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 424d973c97f2..ef00d9ca1643 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -2746,9 +2746,13 @@ static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
struct netdev_queue_stats_rx *rx_stats)
{
struct gve_priv *priv = netdev_priv(dev);
- struct gve_rx_ring *rx = &priv->rx[idx];
+ struct gve_rx_ring *rx;
unsigned int start;
+ if (!priv->rx)
+ return;
+ rx = &priv->rx[idx];
+
do {
start = u64_stats_fetch_begin(&rx->statss);
rx_stats->packets = rx->rpackets;
@@ -2762,9 +2766,13 @@ static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
struct netdev_queue_stats_tx *tx_stats)
{
struct gve_priv *priv = netdev_priv(dev);
- struct gve_tx_ring *tx = &priv->tx[idx];
+ struct gve_tx_ring *tx;
unsigned int start;
+ if (!priv->tx)
+ return;
+ tx = &priv->tx[idx];
+
do {
start = u64_stats_fetch_begin(&tx->statss);
tx_stats->packets = tx->pkt_done;
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH net v2 1/4] gve: Add NULL pointer checks for per-queue statistics
2026-04-25 0:24 ` [PATCH net v2 1/4] gve: Add NULL pointer checks for per-queue statistics Harshitha Ramamurthy
@ 2026-04-27 20:22 ` Pin-yen Lin
0 siblings, 0 replies; 9+ messages in thread
From: Pin-yen Lin @ 2026-04-27 20:22 UTC (permalink / raw)
To: Harshitha Ramamurthy
Cc: netdev, joshwash, andrew+netdev, davem, edumazet, kuba, pabeni,
willemb, maolson, nktgrg, jfraker, ziweixiao, jacob.e.keller,
pkaligineedi, shailend, jordanrhee, stable, linux-kernel,
Debarghya Kundu
On Fri, Apr 24, 2026 at 5:24 PM Harshitha Ramamurthy
<hramamurthy@google.com> wrote:
>
> From: Debarghya Kundu <debarghyak@google.com>
>
> gve_get_[tx/rx]_queue_stats references the [tx/rx] null rings when the
> link is down. Add NULL pointer checks to guard this
>
> This was discovered by drivers/net/stats.py selftest.
>
> Cc: stable@vger.kernel.org
> Fixes: 2e5e0932dff5 ("gve: add support for basic queue stats")
> Signed-off-by: Debarghya Kundu <debarghyak@google.com>
> Signed-off-by: Pin-yen Lin <treapking@google.com>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
> ---
> drivers/net/ethernet/google/gve/gve_main.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index 424d973c97f2..ef00d9ca1643 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -2746,9 +2746,13 @@ static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
> struct netdev_queue_stats_rx *rx_stats)
> {
> struct gve_priv *priv = netdev_priv(dev);
> - struct gve_rx_ring *rx = &priv->rx[idx];
> + struct gve_rx_ring *rx;
> unsigned int start;
>
> + if (!priv->rx)
> + return;
Sashiko says:
Does this NULL check safely prevent a use-after-free regression in lockless
paths?
Queue statistics can be accessed locklessly from the core network stack
(e.g., via ndo_get_stats64 under rcu_read_lock). If gve_close() is called
concurrently, gve_queues_mem_free() frees priv->rx and priv->tx using
kvfree() without an RCU grace period (e.g., synchronize_net()).
A concurrent reader could pass this NULL check, stall briefly, and then
dereference the freed array memory.
Sashiko also made a similar comment about the tx stats.
While this race between stats and gve_close() is a pre-existing issue,
we will address it in a separate patch in v3.
> + rx = &priv->rx[idx];
> +
> do {
> start = u64_stats_fetch_begin(&rx->statss);
> rx_stats->packets = rx->rpackets;
> @@ -2762,9 +2766,13 @@ static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
> struct netdev_queue_stats_tx *tx_stats)
> {
> struct gve_priv *priv = netdev_priv(dev);
> - struct gve_tx_ring *tx = &priv->tx[idx];
> + struct gve_tx_ring *tx;
> unsigned int start;
>
> + if (!priv->tx)
> + return;
> + tx = &priv->tx[idx];
> +
> do {
> start = u64_stats_fetch_begin(&tx->statss);
> tx_stats->packets = tx->pkt_done;
> --
> 2.54.0.545.g6539524ca2-goog
>
Regards,
Pin-yen
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH net v2 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted
2026-04-25 0:24 [PATCH net v2 0/4] gve: Fixes for issues discovered via net selftests Harshitha Ramamurthy
2026-04-25 0:24 ` [PATCH net v2 1/4] gve: Add NULL pointer checks for per-queue statistics Harshitha Ramamurthy
@ 2026-04-25 0:24 ` Harshitha Ramamurthy
2026-04-27 20:30 ` Pin-yen Lin
2026-04-25 0:24 ` [PATCH net v2 3/4] gve: Use default min ring size when device option values are 0 Harshitha Ramamurthy
2026-04-25 0:24 ` [PATCH net v2 4/4] gve: Make ethtool config changes synchronous Harshitha Ramamurthy
3 siblings, 1 reply; 9+ messages in thread
From: Harshitha Ramamurthy @ 2026-04-25 0:24 UTC (permalink / raw)
To: netdev
Cc: joshwash, hramamurthy, andrew+netdev, davem, edumazet, kuba,
pabeni, willemb, maolson, nktgrg, jfraker, ziweixiao,
jacob.e.keller, pkaligineedi, shailend, jordanrhee, stable,
linux-kernel, Debarghya Kundu, Pin-yen Lin
From: Debarghya Kundu <debarghyak@google.com>
gve_get_base_stats() sets all the stats to 0, so the stats go backwards
when interface goes down or configuration is adjusted.
Fix this by persisting baseline stats across interface down.
Cc: stable@vger.kernel.org
Fixes: 2e5e0932dff5 ("gve: add support for basic queue stats")
Signed-off-by: Debarghya Kundu <debarghyak@google.com>
Co-developed-by: Pin-yen Lin <treapking@google.com>
Signed-off-by: Pin-yen Lin <treapking@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
---
Changes in v2:
- Add a NULL pointer check in gve_get_ring_err_stats() (Sashiko)
- Use local variable to prevent inflates from u64_stats_fetch_retry()
(Sashiko)
- Add u64_stats_fetch/begin to protect base stats (Sashiko)
drivers/net/ethernet/google/gve/gve.h | 7 ++
drivers/net/ethernet/google/gve/gve_main.c | 88 ++++++++++++++++++++--
2 files changed, 88 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 1d66d3834f7e..702b1641d984 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -794,6 +794,10 @@ struct gve_ptp {
struct gve_priv *priv;
};
+struct gve_ring_err_stats {
+ u64 rx_alloc_fails;
+};
+
struct gve_priv {
struct net_device *dev;
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
@@ -883,6 +887,9 @@ struct gve_priv {
unsigned long service_task_flags;
unsigned long state_flags;
+ struct gve_ring_err_stats base_ring_err_stats;
+ struct rtnl_link_stats64 base_net_stats;
+ struct u64_stats_sync base_statss; /* sync stats for 32bit archs */
struct gve_stats_report *stats_report;
u64 stats_report_len;
dma_addr_t stats_report_bus; /* dma address for the stats report */
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index ef00d9ca1643..1fec8e1e4821 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -106,9 +106,34 @@ static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
return gve_tx_dqo(skb, dev);
}
-static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+static void gve_add_base_stats(struct gve_priv *priv,
+ struct rtnl_link_stats64 *s)
+{
+ struct rtnl_link_stats64 *base_stats = &priv->base_net_stats;
+ unsigned int start;
+ u64 rx_packets, rx_bytes, rx_dropped, tx_packets, tx_bytes, tx_dropped;
+
+ do {
+ start = u64_stats_fetch_begin(&priv->base_statss);
+ rx_packets = base_stats->rx_packets;
+ rx_bytes = base_stats->rx_bytes;
+ rx_dropped = base_stats->rx_dropped;
+ tx_packets = base_stats->tx_packets;
+ tx_bytes = base_stats->tx_bytes;
+ tx_dropped = base_stats->tx_dropped;
+ } while (u64_stats_fetch_retry(&priv->base_statss, start));
+
+ s->rx_packets += rx_packets;
+ s->rx_bytes += rx_bytes;
+ s->rx_dropped += rx_dropped;
+ s->tx_packets += tx_packets;
+ s->tx_bytes += tx_bytes;
+ s->tx_dropped += tx_dropped;
+}
+
+static void gve_get_ring_stats(struct gve_priv *priv,
+ struct rtnl_link_stats64 *s)
{
- struct gve_priv *priv = netdev_priv(dev);
unsigned int start;
u64 packets, bytes;
int num_tx_queues;
@@ -143,6 +168,14 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
}
}
+static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ gve_get_ring_stats(priv, s);
+ gve_add_base_stats(priv, s);
+}
+
static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
{
struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
@@ -1533,6 +1566,29 @@ static int gve_queues_stop(struct gve_priv *priv)
return gve_reset_recovery(priv, false);
}
+static void gve_get_ring_err_stats(struct gve_priv *priv,
+ struct gve_ring_err_stats *err_stats)
+{
+ int ring;
+
+ if (!priv->rx)
+ return;
+
+ for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+ struct gve_rx_ring *rx = &priv->rx[ring];
+ unsigned int start;
+ u64 rx_alloc_fails;
+
+ do {
+ start = u64_stats_fetch_begin(&rx->statss);
+ rx_alloc_fails = rx->rx_skb_alloc_fail +
+ rx->rx_buf_alloc_fail;
+ } while (u64_stats_fetch_retry(&rx->statss, start));
+
+ err_stats->rx_alloc_fails += rx_alloc_fails;
+ }
+}
+
static int gve_close(struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
@@ -1542,6 +1598,12 @@ static int gve_close(struct net_device *dev)
if (err)
return err;
+ /* Save ring queue and err stats before closing the interface */
+ u64_stats_update_begin(&priv->base_statss);
+ gve_get_ring_stats(priv, &priv->base_net_stats);
+ gve_get_ring_err_stats(priv, &priv->base_ring_err_stats);
+ u64_stats_update_end(&priv->base_statss);
+
gve_queues_mem_remove(priv);
return 0;
}
@@ -2784,12 +2846,24 @@ static void gve_get_base_stats(struct net_device *dev,
struct netdev_queue_stats_rx *rx,
struct netdev_queue_stats_tx *tx)
{
- rx->packets = 0;
- rx->bytes = 0;
- rx->alloc_fail = 0;
+ const struct gve_ring_err_stats *base_err_stats;
+ const struct rtnl_link_stats64 *base_stats;
+ struct gve_priv *priv;
+ unsigned int start;
- tx->packets = 0;
- tx->bytes = 0;
+ priv = netdev_priv(dev);
+ base_stats = &priv->base_net_stats;
+ base_err_stats = &priv->base_ring_err_stats;
+
+ do {
+ start = u64_stats_fetch_begin(&priv->base_statss);
+ rx->packets = base_stats->rx_packets;
+ rx->bytes = base_stats->rx_bytes;
+ rx->alloc_fail = base_err_stats->rx_alloc_fails;
+
+ tx->packets = base_stats->tx_packets;
+ tx->bytes = base_stats->tx_bytes;
+ } while (u64_stats_fetch_retry(&priv->base_statss, start));
}
static const struct netdev_stat_ops gve_stat_ops = {
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH net v2 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted
2026-04-25 0:24 ` [PATCH net v2 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted Harshitha Ramamurthy
@ 2026-04-27 20:30 ` Pin-yen Lin
0 siblings, 0 replies; 9+ messages in thread
From: Pin-yen Lin @ 2026-04-27 20:30 UTC (permalink / raw)
To: Harshitha Ramamurthy
Cc: netdev, joshwash, andrew+netdev, davem, edumazet, kuba, pabeni,
willemb, maolson, nktgrg, jfraker, ziweixiao, jacob.e.keller,
pkaligineedi, shailend, jordanrhee, stable, linux-kernel,
Debarghya Kundu
On Fri, Apr 24, 2026 at 5:24 PM Harshitha Ramamurthy
<hramamurthy@google.com> wrote:
>
> From: Debarghya Kundu <debarghyak@google.com>
>
> gve_get_base_stats() sets all the stats to 0, so the stats go backwards
> when interface goes down or configuration is adjusted.
>
> Fix this by persisting baseline stats across interface down.
>
> Cc: stable@vger.kernel.org
> Fixes: 2e5e0932dff5 ("gve: add support for basic queue stats")
> Signed-off-by: Debarghya Kundu <debarghyak@google.com>
> Co-developed-by: Pin-yen Lin <treapking@google.com>
> Signed-off-by: Pin-yen Lin <treapking@google.com>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
> ---
> Changes in v2:
> - Add a NULL pointer check in gve_get_ring_err_stats() (Sashiko)
> - Use local variable to prevent inflates from u64_stats_fetch_retry()
> (Sashiko)
> - Add u64_stats_fetch/begin to protect base stats (Sashiko)
>
> drivers/net/ethernet/google/gve/gve.h | 7 ++
> drivers/net/ethernet/google/gve/gve_main.c | 88 ++++++++++++++++++++--
> 2 files changed, 88 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 1d66d3834f7e..702b1641d984 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -794,6 +794,10 @@ struct gve_ptp {
> struct gve_priv *priv;
> };
>
> +struct gve_ring_err_stats {
> + u64 rx_alloc_fails;
> +};
> +
> struct gve_priv {
> struct net_device *dev;
> struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
> @@ -883,6 +887,9 @@ struct gve_priv {
> unsigned long service_task_flags;
> unsigned long state_flags;
>
> + struct gve_ring_err_stats base_ring_err_stats;
> + struct rtnl_link_stats64 base_net_stats;
> + struct u64_stats_sync base_statss; /* sync stats for 32bit archs */
Sashiko says:
Is u64_stats_init(&priv->base_statss) ever explicitly called during driver
probe?
netdev_alloc_priv() zero-allocates the memory, but on 32-bit architectures
or when lockdep is enabled, the underlying seqcount_t requires explicit
initialization to set up its lockdep class key.
Could failing to initialize it trigger a lockdep splat when the interface
goes down and u64_stats_update_begin() is invoked?
Yes, this is missing in this series. We will include this in v3.
> struct gve_stats_report *stats_report;
> u64 stats_report_len;
> dma_addr_t stats_report_bus; /* dma address for the stats report */
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index ef00d9ca1643..1fec8e1e4821 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -106,9 +106,34 @@ static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
> return gve_tx_dqo(skb, dev);
> }
>
> -static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> +static void gve_add_base_stats(struct gve_priv *priv,
> + struct rtnl_link_stats64 *s)
> +{
> + struct rtnl_link_stats64 *base_stats = &priv->base_net_stats;
> + unsigned int start;
> + u64 rx_packets, rx_bytes, rx_dropped, tx_packets, tx_bytes, tx_dropped;
> +
> + do {
> + start = u64_stats_fetch_begin(&priv->base_statss);
> + rx_packets = base_stats->rx_packets;
> + rx_bytes = base_stats->rx_bytes;
> + rx_dropped = base_stats->rx_dropped;
> + tx_packets = base_stats->tx_packets;
> + tx_bytes = base_stats->tx_bytes;
> + tx_dropped = base_stats->tx_dropped;
> + } while (u64_stats_fetch_retry(&priv->base_statss, start));
> +
> + s->rx_packets += rx_packets;
> + s->rx_bytes += rx_bytes;
> + s->rx_dropped += rx_dropped;
> + s->tx_packets += tx_packets;
> + s->tx_bytes += tx_bytes;
> + s->tx_dropped += tx_dropped;
> +}
> +
> +static void gve_get_ring_stats(struct gve_priv *priv,
> + struct rtnl_link_stats64 *s)
> {
> - struct gve_priv *priv = netdev_priv(dev);
> unsigned int start;
> u64 packets, bytes;
> int num_tx_queues;
> @@ -143,6 +168,14 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> }
> }
>
> +static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
> +{
> + struct gve_priv *priv = netdev_priv(dev);
> +
> + gve_get_ring_stats(priv, s);
> + gve_add_base_stats(priv, s);
> +}
> +
> static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
> {
> struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
> @@ -1533,6 +1566,29 @@ static int gve_queues_stop(struct gve_priv *priv)
> return gve_reset_recovery(priv, false);
> }
>
> +static void gve_get_ring_err_stats(struct gve_priv *priv,
> + struct gve_ring_err_stats *err_stats)
> +{
> + int ring;
> +
> + if (!priv->rx)
> + return;
> +
> + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
> + struct gve_rx_ring *rx = &priv->rx[ring];
> + unsigned int start;
> + u64 rx_alloc_fails;
> +
> + do {
> + start = u64_stats_fetch_begin(&rx->statss);
> + rx_alloc_fails = rx->rx_skb_alloc_fail +
> + rx->rx_buf_alloc_fail;
> + } while (u64_stats_fetch_retry(&rx->statss, start));
> +
> + err_stats->rx_alloc_fails += rx_alloc_fails;
> + }
> +}
> +
> static int gve_close(struct net_device *dev)
> {
> struct gve_priv *priv = netdev_priv(dev);
> @@ -1542,6 +1598,12 @@ static int gve_close(struct net_device *dev)
> if (err)
> return err;
>
> + /* Save ring queue and err stats before closing the interface */
> + u64_stats_update_begin(&priv->base_statss);
Sashiko says:
Is there a risk of a deadlock here on 32-bit platforms?
ndo_get_stats64 (which calls gve_get_stats()) can execute concurrently in
softirq context. u64_stats_update_begin() on 32-bit platforms acquires a
seqcount lock but does not disable bottom halves.
If a softirq interrupts this update on the same CPU, could the lockless
reader spin infinitely waiting for the sequence counter to become even,
causing a hard deadlock?
Should this update be wrapped in local_bh_disable() and local_bh_enable()?
We will add this in v3.
> + gve_get_ring_stats(priv, &priv->base_net_stats);
> + gve_get_ring_err_stats(priv, &priv->base_ring_err_stats);
> + u64_stats_update_end(&priv->base_statss);
> +
> gve_queues_mem_remove(priv);
Sashiko says:
Will this sequence cause a transient double-counting of statistics?
The active ring stats are accumulated into base_net_stats, but the ring
pointers (priv->rx, priv->tx) remain active and are not cleared until
gve_queues_mem_remove() executes later.
If gve_get_stats() executes concurrently during this window, does it sum both
the active ring stats and the newly updated base_net_stats (which now
incorporates those same ring stats)?
One additional pre-existing issue related to this code path: does
gve_queues_mem_remove() leave a window for a use-after-free?
ndo_get_stats64 executes locklessly under RCU and checks if (priv->rx)
before reading ring stats. gve_queues_mem_remove() frees priv->rx using
kvfree(), but sets the pointer to NULL only after the memory is freed.
If ndo_get_stats64 checks priv->rx, gets preempted, and resumes after
gve_close() frees the array, could it dereference freed memory?
Both point to a pre-existing race between stats and gve_close(). We
will address this in v3.
> return 0;
> }
> @@ -2784,12 +2846,24 @@ static void gve_get_base_stats(struct net_device *dev,
> struct netdev_queue_stats_rx *rx,
> struct netdev_queue_stats_tx *tx)
> {
> - rx->packets = 0;
> - rx->bytes = 0;
> - rx->alloc_fail = 0;
> + const struct gve_ring_err_stats *base_err_stats;
> + const struct rtnl_link_stats64 *base_stats;
> + struct gve_priv *priv;
> + unsigned int start;
>
> - tx->packets = 0;
> - tx->bytes = 0;
> + priv = netdev_priv(dev);
> + base_stats = &priv->base_net_stats;
> + base_err_stats = &priv->base_ring_err_stats;
> +
> + do {
> + start = u64_stats_fetch_begin(&priv->base_statss);
> + rx->packets = base_stats->rx_packets;
> + rx->bytes = base_stats->rx_bytes;
> + rx->alloc_fail = base_err_stats->rx_alloc_fails;
> +
> + tx->packets = base_stats->tx_packets;
> + tx->bytes = base_stats->tx_bytes;
> + } while (u64_stats_fetch_retry(&priv->base_statss, start));
> }
>
> static const struct netdev_stat_ops gve_stat_ops = {
> --
> 2.54.0.545.g6539524ca2-goog
>
Regards,
Pin-yen
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH net v2 3/4] gve: Use default min ring size when device option values are 0
2026-04-25 0:24 [PATCH net v2 0/4] gve: Fixes for issues discovered via net selftests Harshitha Ramamurthy
2026-04-25 0:24 ` [PATCH net v2 1/4] gve: Add NULL pointer checks for per-queue statistics Harshitha Ramamurthy
2026-04-25 0:24 ` [PATCH net v2 2/4] gve: Fix backward stats when interface goes down or configuration is adjusted Harshitha Ramamurthy
@ 2026-04-25 0:24 ` Harshitha Ramamurthy
2026-04-27 20:33 ` Pin-yen Lin
2026-04-25 0:24 ` [PATCH net v2 4/4] gve: Make ethtool config changes synchronous Harshitha Ramamurthy
3 siblings, 1 reply; 9+ messages in thread
From: Harshitha Ramamurthy @ 2026-04-25 0:24 UTC (permalink / raw)
To: netdev
Cc: joshwash, hramamurthy, andrew+netdev, davem, edumazet, kuba,
pabeni, willemb, maolson, nktgrg, jfraker, ziweixiao,
jacob.e.keller, pkaligineedi, shailend, jordanrhee, stable,
linux-kernel, Pin-yen Lin
From: Pin-yen Lin <treapking@google.com>
On gvnic devices that support reporting minimum ring sizes, the device
option always includes the min_(rx|tx)_ring_size fields, and the values
will be 0 if they are not configured to be exposed. This makes the
driver allow unexpected small ring size configurations from the
userspace.
Use the default ring size in the driver if the min ring sizes from the
device option are 0.
This was discovered by drivers/net/ring_reconfig.py selftest.
Cc: stable@vger.kernel.org
Fixes: ed4fb326947d ("gve: add support to read ring size ranges from the device")
Reviewed-by: Joshua Washington <joshwash@google.com>
Reviewed-by: Jordan Rhee <jordanrhee@google.com>
Signed-off-by: Pin-yen Lin <treapking@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
---
drivers/net/ethernet/google/gve/gve_adminq.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 08587bf40ed4..2cd0dd6ced94 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -189,7 +189,9 @@ void gve_parse_device_option(struct gve_priv *priv,
*dev_op_modify_ring = (void *)(option + 1);
/* device has not provided min ring size */
- if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE)
+ if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE ||
+ be16_to_cpu((*dev_op_modify_ring)->min_rx_ring_size) == 0 ||
+ be16_to_cpu((*dev_op_modify_ring)->min_tx_ring_size) == 0)
priv->default_min_ring_size = true;
break;
case GVE_DEV_OPT_ID_FLOW_STEERING:
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH net v2 3/4] gve: Use default min ring size when device option values are 0
2026-04-25 0:24 ` [PATCH net v2 3/4] gve: Use default min ring size when device option values are 0 Harshitha Ramamurthy
@ 2026-04-27 20:33 ` Pin-yen Lin
0 siblings, 0 replies; 9+ messages in thread
From: Pin-yen Lin @ 2026-04-27 20:33 UTC (permalink / raw)
To: Harshitha Ramamurthy
Cc: netdev, joshwash, andrew+netdev, davem, edumazet, kuba, pabeni,
willemb, maolson, nktgrg, jfraker, ziweixiao, jacob.e.keller,
pkaligineedi, shailend, jordanrhee, stable, linux-kernel
On Fri, Apr 24, 2026 at 5:25 PM Harshitha Ramamurthy
<hramamurthy@google.com> wrote:
>
> From: Pin-yen Lin <treapking@google.com>
>
> On gvnic devices that support reporting minimum ring sizes, the device
> option always includes the min_(rx|tx)_ring_size fields, and the values
> will be 0 if they are not configured to be exposed. This makes the
> driver allow unexpected small ring size configurations from the
> userspace.
>
> Use the default ring size in the driver if the min ring sizes from the
> device option are 0.
>
> This was discovered by drivers/net/ring_reconfig.py selftest.
>
> Cc: stable@vger.kernel.org
> Fixes: ed4fb326947d ("gve: add support to read ring size ranges from the device")
> Reviewed-by: Joshua Washington <joshwash@google.com>
> Reviewed-by: Jordan Rhee <jordanrhee@google.com>
> Signed-off-by: Pin-yen Lin <treapking@google.com>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
> ---
> drivers/net/ethernet/google/gve/gve_adminq.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
> index 08587bf40ed4..2cd0dd6ced94 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.c
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.c
> @@ -189,7 +189,9 @@ void gve_parse_device_option(struct gve_priv *priv,
> *dev_op_modify_ring = (void *)(option + 1);
>
> /* device has not provided min ring size */
> - if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE)
> + if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE ||
> + be16_to_cpu((*dev_op_modify_ring)->min_rx_ring_size) == 0 ||
> + be16_to_cpu((*dev_op_modify_ring)->min_tx_ring_size) == 0)
Sashiko says:
Could this cause an out-of-bounds memory read when processing malformed
device option lengths?
Looking at the earlier validation in gve_parse_device_option() for
GVE_DEV_OPT_ID_MODIFY_RING, the length is only verified to be at least
GVE_DEVICE_OPTION_NO_MIN_RING_SIZE (8 bytes), while the full
struct gve_device_option_modify_ring is 12 bytes:
if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) {
If the device provides an option_length of 9, 10, or 11, the initial
option_length == 8 check will evaluate to false. The code will then
evaluate the next conditions and read min_rx_ring_size and
min_tx_ring_size.
Since these fields are located at offsets 8-11, this reads past the end
of the provided option payload.
Because the descriptor buffer is allocated via a page-aligned dma_pool_alloc
of exactly GVE_ADMINQ_BUFFER_SIZE, if the malformed option ends exactly at
the page boundary, could this read cross into adjacent, potentially unmapped
memory and result in a page fault?
Would it be safer to verify that option_length is large enough to contain
the minimum ring size fields before dereferencing them?
GVNIC will never return an option length of 9, 10, or 11, so this
shouldn't be an issue.
> priv->default_min_ring_size = true;
> break;
> case GVE_DEV_OPT_ID_FLOW_STEERING:
> --
> 2.54.0.545.g6539524ca2-goog
>
Regards,
Pin-yen
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH net v2 4/4] gve: Make ethtool config changes synchronous
2026-04-25 0:24 [PATCH net v2 0/4] gve: Fixes for issues discovered via net selftests Harshitha Ramamurthy
` (2 preceding siblings ...)
2026-04-25 0:24 ` [PATCH net v2 3/4] gve: Use default min ring size when device option values are 0 Harshitha Ramamurthy
@ 2026-04-25 0:24 ` Harshitha Ramamurthy
2026-04-27 20:34 ` Pin-yen Lin
3 siblings, 1 reply; 9+ messages in thread
From: Harshitha Ramamurthy @ 2026-04-25 0:24 UTC (permalink / raw)
To: netdev
Cc: joshwash, hramamurthy, andrew+netdev, davem, edumazet, kuba,
pabeni, willemb, maolson, nktgrg, jfraker, ziweixiao,
jacob.e.keller, pkaligineedi, shailend, jordanrhee, stable,
linux-kernel, Pin-yen Lin
From: Pin-yen Lin <treapking@google.com>
When modifying device features via ethtool, the driver queues the
carrier status update to its workqueue (gve_wq). This leads to a
short link-down state after running the ethtool command.
Use `gve_turnup_and_check_status()` instead of `gve_turnup()` in
`gve_queues_start()` to update the carrier status before returning to
the userspace.
This was discovered by drivers/net/ping.py selftest. The test calls
ping command right after an ethtool configuration, but the interface
could be down without this fix.
Cc: stable@vger.kernel.org
Fixes: 5f08cd3d6423 ("gve: Alloc before freeing when adjusting queues")
Signed-off-by: Pin-yen Lin <treapking@google.com>
Reviewed-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
---
drivers/net/ethernet/google/gve/gve_main.c | 56 +++++++++++-----------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 1fec8e1e4821..2c461be12a75 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1424,6 +1424,33 @@ static void gve_queues_mem_remove(struct gve_priv *priv)
priv->rx = NULL;
}
+static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
+{
+ if (!gve_get_napi_enabled(priv))
+ return;
+
+ if (link_status == netif_carrier_ok(priv->dev))
+ return;
+
+ if (link_status) {
+ netdev_info(priv->dev, "Device link is up.\n");
+ netif_carrier_on(priv->dev);
+ } else {
+ netdev_info(priv->dev, "Device link is down.\n");
+ netif_carrier_off(priv->dev);
+ }
+}
+
+static void gve_turnup_and_check_status(struct gve_priv *priv)
+{
+ u32 status;
+
+ gve_turnup(priv);
+ status = ioread32be(&priv->reg_bar0->device_status);
+ gve_handle_link_status(priv,
+ GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+}
+
/* The passed-in queue memory is stored into priv and the queues are made live.
* No memory is allocated. Passed-in memory is freed on errors.
*/
@@ -1486,8 +1513,7 @@ static int gve_queues_start(struct gve_priv *priv,
round_jiffies(jiffies +
msecs_to_jiffies(priv->stats_report_timer_period)));
- gve_turnup(priv);
- queue_work(priv->gve_wq, &priv->service_task);
+ gve_turnup_and_check_status(priv);
priv->interface_up_cnt++;
return 0;
@@ -1608,23 +1634,6 @@ static int gve_close(struct net_device *dev)
return 0;
}
-static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
-{
- if (!gve_get_napi_enabled(priv))
- return;
-
- if (link_status == netif_carrier_ok(priv->dev))
- return;
-
- if (link_status) {
- netdev_info(priv->dev, "Device link is up.\n");
- netif_carrier_on(priv->dev);
- } else {
- netdev_info(priv->dev, "Device link is down.\n");
- netif_carrier_off(priv->dev);
- }
-}
-
static int gve_configure_rings_xdp(struct gve_priv *priv,
u16 num_xdp_rings)
{
@@ -2099,15 +2108,6 @@ static void gve_turnup(struct gve_priv *priv)
gve_set_napi_enabled(priv);
}
-static void gve_turnup_and_check_status(struct gve_priv *priv)
-{
- u32 status;
-
- gve_turnup(priv);
- status = ioread32be(&priv->reg_bar0->device_status);
- gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
-}
-
static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv,
unsigned int txqueue)
{
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH net v2 4/4] gve: Make ethtool config changes synchronous
2026-04-25 0:24 ` [PATCH net v2 4/4] gve: Make ethtool config changes synchronous Harshitha Ramamurthy
@ 2026-04-27 20:34 ` Pin-yen Lin
0 siblings, 0 replies; 9+ messages in thread
From: Pin-yen Lin @ 2026-04-27 20:34 UTC (permalink / raw)
To: Harshitha Ramamurthy
Cc: netdev, joshwash, andrew+netdev, davem, edumazet, kuba, pabeni,
willemb, maolson, nktgrg, jfraker, ziweixiao, jacob.e.keller,
pkaligineedi, shailend, jordanrhee, stable, linux-kernel
On Fri, Apr 24, 2026 at 5:25 PM Harshitha Ramamurthy
<hramamurthy@google.com> wrote:
>
> From: Pin-yen Lin <treapking@google.com>
>
> When modifying device features via ethtool, the driver queues the
> carrier status update to its workqueue (gve_wq). This leads to a
> short link-down state after running the ethtool command.
>
> Use `gve_turnup_and_check_status()` instead of `gve_turnup()` in
> `gve_queues_start()` to update the carrier status before returning to
> the userspace.
>
> This was discovered by drivers/net/ping.py selftest. The test calls
> ping command right after an ethtool configuration, but the interface
> could be down without this fix.
>
> Cc: stable@vger.kernel.org
> Fixes: 5f08cd3d6423 ("gve: Alloc before freeing when adjusting queues")
> Signed-off-by: Pin-yen Lin <treapking@google.com>
> Reviewed-by: Joshua Washington <joshwash@google.com>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
> ---
> drivers/net/ethernet/google/gve/gve_main.c | 56 +++++++++++-----------
> 1 file changed, 28 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index 1fec8e1e4821..2c461be12a75 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -1424,6 +1424,33 @@ static void gve_queues_mem_remove(struct gve_priv *priv)
> priv->rx = NULL;
> }
Sashiko says:
This isn't a bug introduced by this patch, but is there a pre-existing
use-after-free risk here involving stats_report_task?
Looking at gve_stats_report_task(), it iterates over priv->tx and priv->rx
to collect statistics without holding the rtnl_lock. During teardown,
gve_queues_stop() deletes the timer via timer_delete_sync(), but it does
not appear to explicitly cancel the work item with
cancel_work_sync(&priv->stats_report_task).
If the timer queues the work item just before timer_delete_sync() executes,
could the task remain pending on the workqueue?
When gve_close() then calls gve_queues_mem_remove(), the memory for
priv->tx and priv->rx is freed and the pointers are set to NULL. If the
pending stats_report_task runs concurrently and evaluates if (priv->tx)
right before the memory is freed, could it dereference stale pointers and
cause a use-after-free?
Additionally, during module removal, gve_teardown_priv_resources() frees
priv->stats_report and sets it to NULL. Since destroy_workqueue() is
called immediately after, could this flush and execute the pending task,
leading to a NULL pointer dereference?
Sashiko pointed out the same issue at patch 1/4. We will address this in v3.
>
> +static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
> +{
> + if (!gve_get_napi_enabled(priv))
> + return;
> +
> + if (link_status == netif_carrier_ok(priv->dev))
> + return;
> +
> + if (link_status) {
> + netdev_info(priv->dev, "Device link is up.\n");
> + netif_carrier_on(priv->dev);
> + } else {
> + netdev_info(priv->dev, "Device link is down.\n");
> + netif_carrier_off(priv->dev);
> + }
> +}
> +
> +static void gve_turnup_and_check_status(struct gve_priv *priv)
> +{
> + u32 status;
> +
> + gve_turnup(priv);
> + status = ioread32be(&priv->reg_bar0->device_status);
> + gve_handle_link_status(priv,
> + GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
> +}
> +
> /* The passed-in queue memory is stored into priv and the queues are made live.
> * No memory is allocated. Passed-in memory is freed on errors.
> */
> @@ -1486,8 +1513,7 @@ static int gve_queues_start(struct gve_priv *priv,
> round_jiffies(jiffies +
> msecs_to_jiffies(priv->stats_report_timer_period)));
>
> - gve_turnup(priv);
> - queue_work(priv->gve_wq, &priv->service_task);
> + gve_turnup_and_check_status(priv);
> priv->interface_up_cnt++;
> return 0;
>
> @@ -1608,23 +1634,6 @@ static int gve_close(struct net_device *dev)
> return 0;
> }
>
> -static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
> -{
> - if (!gve_get_napi_enabled(priv))
> - return;
> -
> - if (link_status == netif_carrier_ok(priv->dev))
> - return;
> -
> - if (link_status) {
> - netdev_info(priv->dev, "Device link is up.\n");
> - netif_carrier_on(priv->dev);
> - } else {
> - netdev_info(priv->dev, "Device link is down.\n");
> - netif_carrier_off(priv->dev);
> - }
> -}
> -
> static int gve_configure_rings_xdp(struct gve_priv *priv,
> u16 num_xdp_rings)
> {
> @@ -2099,15 +2108,6 @@ static void gve_turnup(struct gve_priv *priv)
> gve_set_napi_enabled(priv);
> }
>
> -static void gve_turnup_and_check_status(struct gve_priv *priv)
> -{
> - u32 status;
> -
> - gve_turnup(priv);
> - status = ioread32be(&priv->reg_bar0->device_status);
> - gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
> -}
> -
> static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv,
> unsigned int txqueue)
> {
> --
> 2.54.0.545.g6539524ca2-goog
>
Regards,
Pin-yen
^ permalink raw reply [flat|nested] 9+ messages in thread