From: Ahmed Zaki <anzaki@gmail.com>
To: netfilter-devel@vger.kernel.org, andrew@lunn.ch,
olteanv@gmail.com, pablo@netfilter.org, fw@strlen.de,
kuba@kernel.org, pabeni@redhat.com, edumazet@google.com
Cc: coreteam@netfilter.org, netdev@vger.kernel.org
Subject: [PATCH nf-next v2 1/2] netfilter: flowtable: update netdev stats with HW_OFFLOAD flows
Date: Tue, 24 Mar 2026 14:40:15 -0600 [thread overview]
Message-ID: <20260324204016.2089193-2-anzaki@gmail.com> (raw)
In-Reply-To: <20260324204016.2089193-1-anzaki@gmail.com>
Some drivers (notably DSA) delegate the nft flowtable HW_OFFLOAD flows
to a parent driver. While the parent driver is able to report the
offloaded traffic stats directly from the HW, the delegating driver
does not report the stats. This fails SNMP-based monitoring tools that
rely on netdev stats to report the network traffic.
Add a new struct pcpu_sw_netstats "fstats" to net_device that gets
allocated only if the new flag "flow_offload_via_parent" is set by the
driver. The new stats are lazily allocated by the nft flow offloading
code when the first flow is offloaded. The stats are updated periodically
in flow_offload_work_stats() and also once in flow_offload_work_del()
before the flow is deleted. For this, flow_offload_work_del() had to
be moved below flow_offload_tuple_stats().
Signed-off-by: Ahmed Zaki <anzaki@gmail.com>
---
include/linux/netdevice.h | 45 ++++++++++++
net/core/dev.c | 8 +++
net/netfilter/nf_flow_table_offload.c | 98 +++++++++++++++++++++++++--
3 files changed, 145 insertions(+), 6 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67e25f6d15a4..647758f78213 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1840,6 +1840,11 @@ enum netdev_reg_state {
* @stats: Statistics struct, which was left as a legacy, use
* rtnl_link_stats64 instead
*
+ * @fstats: HW offloaded flow statistics: RX/TX packets,
+ * RX/TX bytes. Lazily allocated by the flow offload
+ * path on the first offloaded flow for devices that
+ * set @flow_offload_via_parent. Freed by free_netdev().
+ *
* @core_stats: core networking counters,
* do not use this in drivers
* @carrier_up_count: Number of times the carrier has been up
@@ -2048,6 +2053,12 @@ enum netdev_reg_state {
* @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN
* @netns_immutable: interface can't change network namespaces
* @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes
+ * @flow_offload_via_parent: device delegates nft flowtable hardware
+ * offload to a parent/conduit device (e.g. DSA
+ * user ports delegate to their conduit MAC).
+ * The parent's HW count the offloaded traffic
+ * but this device's sw netstats path does not.
+ * @fstats is allocated to fill that gap.
*
* @net_notifier_list: List of per-net netdev notifier block
* that follow this device when it is moved
@@ -2233,6 +2244,7 @@ struct net_device {
struct net_device_stats stats; /* not used by modern drivers */
+ struct pcpu_sw_netstats __percpu *fstats;
struct net_device_core_stats __percpu *core_stats;
/* Stats to monitor link on/off, flapping */
@@ -2463,6 +2475,7 @@ struct net_device {
unsigned long change_proto_down:1;
unsigned long netns_immutable:1;
unsigned long fcoe_mtu:1;
+ unsigned long flow_offload_via_parent:1;
struct list_head net_notifier_list;
@@ -2992,6 +3005,38 @@ struct pcpu_lstats {
void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes);
+static inline void dev_fstats_rx_add(struct net_device *dev,
+ unsigned int packets,
+ unsigned int len)
+{
+ struct pcpu_sw_netstats *fstats;
+
+ if (!dev->fstats)
+ return;
+
+ fstats = this_cpu_ptr(dev->fstats);
+ u64_stats_update_begin(&fstats->syncp);
+ u64_stats_add(&fstats->rx_bytes, len);
+ u64_stats_add(&fstats->rx_packets, packets);
+ u64_stats_update_end(&fstats->syncp);
+}
+
+static inline void dev_fstats_tx_add(struct net_device *dev,
+ unsigned int packets,
+ unsigned int len)
+{
+ struct pcpu_sw_netstats *fstats;
+
+ if (!dev->fstats)
+ return;
+
+ fstats = this_cpu_ptr(dev->fstats);
+ u64_stats_update_begin(&fstats->syncp);
+ u64_stats_add(&fstats->tx_bytes, len);
+ u64_stats_add(&fstats->tx_packets, packets);
+ u64_stats_update_end(&fstats->syncp);
+}
+
static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len)
{
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
diff --git a/net/core/dev.c b/net/core/dev.c
index f48dc299e4b2..07fb315ad42c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11865,6 +11865,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct net_device_core_stats __percpu *p;
+ const struct pcpu_sw_netstats __percpu *fstats;
/*
* IPv{4,6} and udp tunnels share common stat helpers and use
@@ -11893,6 +11894,11 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
netdev_stats_to_stats64(storage, &dev->stats);
}
+ /* This READ_ONCE() pairs with cmpxchg in flow_offload_fstats_ensure() */
+ fstats = READ_ONCE(dev->fstats);
+ if (fstats)
+ dev_fetch_sw_netstats(storage, fstats);
+
/* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
p = READ_ONCE(dev->core_stats);
if (p) {
@@ -12212,6 +12218,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
#endif
+ free_percpu(dev->fstats);
+ dev->fstats = NULL;
free_percpu(dev->core_stats);
dev->core_stats = NULL;
free_percpu(dev->xdp_bulkq);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b2e4fb6fa011..fc1e67a79904 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -925,13 +925,80 @@ static void flow_offload_work_add(struct flow_offload_work *offload)
nf_flow_offload_destroy(flow_rule);
}
-static void flow_offload_work_del(struct flow_offload_work *offload)
+static bool flow_offload_fstats_ensure(struct net_device *dev)
{
- clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
- flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
- if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
- flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
- set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
+ struct pcpu_sw_netstats __percpu *p;
+
+ if (!dev->flow_offload_via_parent)
+ return false;
+
+ /* Pairs with cmpxchg() below. */
+ if (likely(READ_ONCE(dev->fstats)))
+ return true;
+
+ p = __netdev_alloc_pcpu_stats(struct pcpu_sw_netstats, GFP_ATOMIC);
+ if (!p)
+ return false;
+
+ if (cmpxchg(&dev->fstats, NULL, p))
+ free_percpu(p); /* lost the race, discard and use winner's */
+
+ return true;
+}
+
+static u32 flow_offload_egress_ifidx(const struct flow_offload_tuple *tuple)
+{
+ switch (tuple->xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ return tuple->ifidx;
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ return tuple->out.ifidx;
+ default:
+ return 0;
+ }
+}
+
+static void flow_offload_netdev_update(struct flow_offload_work *offload,
+ struct flow_stats *stats)
+{
+ const struct flow_offload_tuple *tuple;
+ struct net_device *indev, *outdev;
+ struct net *net;
+
+ rcu_read_lock();
+ net = read_pnet(&offload->flowtable->net);
+ if (stats[FLOW_OFFLOAD_DIR_ORIGINAL].pkts) {
+ tuple = &offload->flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple;
+ indev = dev_get_by_index_rcu(net, tuple->iifidx);
+ if (indev && flow_offload_fstats_ensure(indev))
+ dev_fstats_rx_add(indev,
+ stats[FLOW_OFFLOAD_DIR_ORIGINAL].pkts,
+ stats[FLOW_OFFLOAD_DIR_ORIGINAL].bytes);
+
+ outdev = dev_get_by_index_rcu(net,
+ flow_offload_egress_ifidx(tuple));
+ if (outdev && flow_offload_fstats_ensure(outdev))
+ dev_fstats_tx_add(outdev,
+ stats[FLOW_OFFLOAD_DIR_ORIGINAL].pkts,
+ stats[FLOW_OFFLOAD_DIR_ORIGINAL].bytes);
+ }
+
+ if (stats[FLOW_OFFLOAD_DIR_REPLY].pkts) {
+ tuple = &offload->flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple;
+ indev = dev_get_by_index_rcu(net, tuple->iifidx);
+ if (indev && flow_offload_fstats_ensure(indev))
+ dev_fstats_rx_add(indev,
+ stats[FLOW_OFFLOAD_DIR_REPLY].pkts,
+ stats[FLOW_OFFLOAD_DIR_REPLY].bytes);
+
+ outdev = dev_get_by_index_rcu(net,
+ flow_offload_egress_ifidx(tuple));
+ if (outdev && flow_offload_fstats_ensure(outdev))
+ dev_fstats_tx_add(outdev,
+ stats[FLOW_OFFLOAD_DIR_REPLY].pkts,
+ stats[FLOW_OFFLOAD_DIR_REPLY].bytes);
+ }
+ rcu_read_unlock();
}
static void flow_offload_tuple_stats(struct flow_offload_work *offload,
@@ -968,6 +1035,25 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
FLOW_OFFLOAD_DIR_REPLY,
stats[1].pkts, stats[1].bytes);
}
+
+ flow_offload_netdev_update(offload, stats);
+}
+
+static void flow_offload_work_del(struct flow_offload_work *offload)
+{
+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
+
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
+ &stats[1]);
+ flow_offload_netdev_update(offload, stats);
+
+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
}
static void flow_offload_work_handler(struct work_struct *work)
--
2.43.0
next prev parent reply other threads:[~2026-03-24 20:41 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-24 20:40 [PATCH nf-next v2 0/2] Update (DSA) netdev stats with offloaded flows Ahmed Zaki
2026-03-24 20:40 ` Ahmed Zaki [this message]
2026-03-24 21:28 ` [PATCH nf-next v2 1/2] netfilter: flowtable: update netdev stats with HW_OFFLOAD flows Pablo Neira Ayuso
2026-03-24 23:27 ` Ahmed Zaki
2026-03-24 20:40 ` [PATCH nf-next v2 2/2] net: dsa: update net_device stats with HW offloaded flows stats Ahmed Zaki
2026-05-04 11:18 ` [PATCH nf-next v2 0/2] Update (DSA) netdev stats with offloaded flows Ahmed Zaki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260324204016.2089193-2-anzaki@gmail.com \
--to=anzaki@gmail.com \
--cc=andrew@lunn.ch \
--cc=coreteam@netfilter.org \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=olteanv@gmail.com \
--cc=pabeni@redhat.com \
--cc=pablo@netfilter.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.