public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Ahmed Zaki <anzaki@gmail.com>
To: netfilter-devel@vger.kernel.org, andrew@lunn.ch,
	olteanv@gmail.com, pablo@netfilter.org, fw@strlen.de,
	kuba@kernel.org, pabeni@redhat.com, edumazet@google.com
Cc: coreteam@netfilter.org, netdev@vger.kernel.org
Subject: [PATCH nf-next v2 1/2] netfilter: flowtable: update netdev stats with HW_OFFLOAD flows
Date: Tue, 24 Mar 2026 14:40:15 -0600	[thread overview]
Message-ID: <20260324204016.2089193-2-anzaki@gmail.com> (raw)
In-Reply-To: <20260324204016.2089193-1-anzaki@gmail.com>

Some drivers (notably DSA) delegate the nft flowtable HW_OFFLOAD flows
to a parent driver. While the parent driver is able to report the
offloaded traffic stats directly from the HW, the delegating driver
does not report the stats. This fails SNMP-based monitoring tools that
rely on netdev stats to report the network traffic.

Add a new struct pcpu_sw_netstats "fstats" to net_device that gets
allocated only if the new flag "flow_offload_via_parent" is set by the
driver. The new stats are lazily allocated by the nft flow offloading
code when the first flow is offloaded. The stats are updated periodically
in flow_offload_work_stats() and also once in flow_offload_work_del()
before the flow is deleted. For this, flow_offload_work_del() had to
be moved below flow_offload_tuple_stats().

Signed-off-by: Ahmed Zaki <anzaki@gmail.com>
---
 include/linux/netdevice.h             | 45 ++++++++++++
 net/core/dev.c                        |  8 +++
 net/netfilter/nf_flow_table_offload.c | 98 +++++++++++++++++++++++++--
 3 files changed, 145 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67e25f6d15a4..647758f78213 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1840,6 +1840,11 @@ enum netdev_reg_state {
  *	@stats:		Statistics struct, which was left as a legacy, use
  *			rtnl_link_stats64 instead
  *
+ *	@fstats:	HW offloaded flow statistics: RX/TX packets,
+ *			RX/TX bytes. Lazily allocated by the flow offload
+ *			path on the first offloaded flow for devices that
+ *			set @flow_offload_via_parent. Freed by free_netdev().
+ *
  *	@core_stats:	core networking counters,
  *			do not use this in drivers
  *	@carrier_up_count:	Number of times the carrier has been up
@@ -2048,6 +2053,12 @@ enum netdev_reg_state {
  *	@change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN
  *	@netns_immutable: interface can't change network namespaces
  *	@fcoe_mtu:	device supports maximum FCoE MTU, 2158 bytes
+ *	@flow_offload_via_parent: device delegates nft flowtable hardware
+ *				  offload to a parent/conduit device (e.g. DSA
+ *				  user ports delegate to their conduit MAC).
+ *				  The parent's HW count the offloaded traffic
+ *				  but this device's sw netstats path does not.
+ *				  @fstats is allocated to fill that gap.
  *
  *	@net_notifier_list:	List of per-net netdev notifier block
  *				that follow this device when it is moved
@@ -2233,6 +2244,7 @@ struct net_device {
 
 	struct net_device_stats	stats; /* not used by modern drivers */
 
+	struct pcpu_sw_netstats __percpu *fstats;
 	struct net_device_core_stats __percpu *core_stats;
 
 	/* Stats to monitor link on/off, flapping */
@@ -2463,6 +2475,7 @@ struct net_device {
 	unsigned long		change_proto_down:1;
 	unsigned long		netns_immutable:1;
 	unsigned long		fcoe_mtu:1;
+	unsigned long		flow_offload_via_parent:1;
 
 	struct list_head	net_notifier_list;
 
@@ -2992,6 +3005,38 @@ struct pcpu_lstats {
 
 void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes);
 
+static inline void dev_fstats_rx_add(struct net_device *dev,
+				     unsigned int packets,
+				     unsigned int len)
+{
+	struct pcpu_sw_netstats *fstats;
+
+	if (!dev->fstats)
+		return;
+
+	fstats = this_cpu_ptr(dev->fstats);
+	u64_stats_update_begin(&fstats->syncp);
+	u64_stats_add(&fstats->rx_bytes, len);
+	u64_stats_add(&fstats->rx_packets, packets);
+	u64_stats_update_end(&fstats->syncp);
+}
+
+static inline void dev_fstats_tx_add(struct net_device *dev,
+				     unsigned int packets,
+				     unsigned int len)
+{
+	struct pcpu_sw_netstats *fstats;
+
+	if (!dev->fstats)
+		return;
+
+	fstats = this_cpu_ptr(dev->fstats);
+	u64_stats_update_begin(&fstats->syncp);
+	u64_stats_add(&fstats->tx_bytes, len);
+	u64_stats_add(&fstats->tx_packets, packets);
+	u64_stats_update_end(&fstats->syncp);
+}
+
 static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len)
 {
 	struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
diff --git a/net/core/dev.c b/net/core/dev.c
index f48dc299e4b2..07fb315ad42c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11865,6 +11865,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	const struct net_device_core_stats __percpu *p;
+	const struct pcpu_sw_netstats __percpu *fstats;
 
 	/*
 	 * IPv{4,6} and udp tunnels share common stat helpers and use
@@ -11893,6 +11894,11 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 		netdev_stats_to_stats64(storage, &dev->stats);
 	}
 
+	/* This READ_ONCE() pairs with cmpxchg in flow_offload_fstats_ensure() */
+	fstats = READ_ONCE(dev->fstats);
+	if (fstats)
+		dev_fetch_sw_netstats(storage, fstats);
+
 	/* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
 	p = READ_ONCE(dev->core_stats);
 	if (p) {
@@ -12212,6 +12218,8 @@ void free_netdev(struct net_device *dev)
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
 #endif
+	free_percpu(dev->fstats);
+	dev->fstats = NULL;
 	free_percpu(dev->core_stats);
 	dev->core_stats = NULL;
 	free_percpu(dev->xdp_bulkq);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b2e4fb6fa011..fc1e67a79904 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -925,13 +925,80 @@ static void flow_offload_work_add(struct flow_offload_work *offload)
 	nf_flow_offload_destroy(flow_rule);
 }
 
-static void flow_offload_work_del(struct flow_offload_work *offload)
+static bool flow_offload_fstats_ensure(struct net_device *dev)
 {
-	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
-	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
-	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
-		flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
-	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
+	struct pcpu_sw_netstats __percpu *p;
+
+	if (!dev->flow_offload_via_parent)
+		return false;
+
+	/* Pairs with cmpxchg() below. */
+	if (likely(READ_ONCE(dev->fstats)))
+		return true;
+
+	p = __netdev_alloc_pcpu_stats(struct pcpu_sw_netstats, GFP_ATOMIC);
+	if (!p)
+		return false;
+
+	if (cmpxchg(&dev->fstats, NULL, p))
+		free_percpu(p);	/* lost the race, discard and use winner's */
+
+	return true;
+}
+
+static u32 flow_offload_egress_ifidx(const struct flow_offload_tuple *tuple)
+{
+	switch (tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		return tuple->ifidx;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		return tuple->out.ifidx;
+	default:
+		return 0;
+	}
+}
+
+static void flow_offload_netdev_update(struct flow_offload_work *offload,
+				       struct flow_stats *stats)
+{
+	const struct flow_offload_tuple *tuple;
+	struct net_device *indev, *outdev;
+	struct net *net;
+
+	rcu_read_lock();
+	net = read_pnet(&offload->flowtable->net);
+	if (stats[FLOW_OFFLOAD_DIR_ORIGINAL].pkts) {
+		tuple = &offload->flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple;
+		indev = dev_get_by_index_rcu(net, tuple->iifidx);
+		if (indev && flow_offload_fstats_ensure(indev))
+			dev_fstats_rx_add(indev,
+					  stats[FLOW_OFFLOAD_DIR_ORIGINAL].pkts,
+					  stats[FLOW_OFFLOAD_DIR_ORIGINAL].bytes);
+
+		outdev = dev_get_by_index_rcu(net,
+					      flow_offload_egress_ifidx(tuple));
+		if (outdev && flow_offload_fstats_ensure(outdev))
+			dev_fstats_tx_add(outdev,
+					  stats[FLOW_OFFLOAD_DIR_ORIGINAL].pkts,
+					  stats[FLOW_OFFLOAD_DIR_ORIGINAL].bytes);
+	}
+
+	if (stats[FLOW_OFFLOAD_DIR_REPLY].pkts) {
+		tuple = &offload->flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple;
+		indev = dev_get_by_index_rcu(net, tuple->iifidx);
+		if (indev && flow_offload_fstats_ensure(indev))
+			dev_fstats_rx_add(indev,
+					  stats[FLOW_OFFLOAD_DIR_REPLY].pkts,
+					  stats[FLOW_OFFLOAD_DIR_REPLY].bytes);
+
+		outdev = dev_get_by_index_rcu(net,
+					      flow_offload_egress_ifidx(tuple));
+		if (outdev && flow_offload_fstats_ensure(outdev))
+			dev_fstats_tx_add(outdev,
+					  stats[FLOW_OFFLOAD_DIR_REPLY].pkts,
+					  stats[FLOW_OFFLOAD_DIR_REPLY].bytes);
+	}
+	rcu_read_unlock();
 }
 
 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
@@ -968,6 +1035,25 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
 				       FLOW_OFFLOAD_DIR_REPLY,
 				       stats[1].pkts, stats[1].bytes);
 	}
+
+	flow_offload_netdev_update(offload, stats);
+}
+
+static void flow_offload_work_del(struct flow_offload_work *offload)
+{
+	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
+
+	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+		flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
+					 &stats[1]);
+	flow_offload_netdev_update(offload, stats);
+
+	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+		flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
 }
 
 static void flow_offload_work_handler(struct work_struct *work)
-- 
2.43.0


  reply	other threads:[~2026-03-24 20:41 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-24 20:40 [PATCH nf-next v2 0/2] Update (DSA) netdev stats with offloaded flows Ahmed Zaki
2026-03-24 20:40 ` Ahmed Zaki [this message]
2026-03-24 21:28   ` [PATCH nf-next v2 1/2] netfilter: flowtable: update netdev stats with HW_OFFLOAD flows Pablo Neira Ayuso
2026-03-24 23:27     ` Ahmed Zaki
2026-03-24 20:40 ` [PATCH nf-next v2 2/2] net: dsa: update net_device stats with HW offloaded flows stats Ahmed Zaki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260324204016.2089193-2-anzaki@gmail.com \
    --to=anzaki@gmail.com \
    --cc=andrew@lunn.ch \
    --cc=coreteam@netfilter.org \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=olteanv@gmail.com \
    --cc=pabeni@redhat.com \
    --cc=pablo@netfilter.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox