From: Jihong Min <hurryman2212@gmail.com>
To: netdev@vger.kernel.org
Cc: Jay Vosburgh <jv@jvosburgh.net>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>,
Steffen Klassert <steffen.klassert@secunet.com>,
Herbert Xu <herbert@gondor.apana.org.au>,
linux-kernel@vger.kernel.org, Jihong Min <hurryman2212@gmail.com>
Subject: [PATCH RFC net-next 1/4] xfrm: add a lower-device offload handle resolver
Date: Wed, 20 May 2026 17:10:01 +0900 [thread overview]
Message-ID: <20260520081004.2232091-2-hurryman2212@gmail.com> (raw)
In-Reply-To: <20260520081004.2232091-1-hurryman2212@gmail.com>
An upper device can own an XFRM offload state while the selected
datapath device is one of its lower devices. A single xso.offload_handle
is not enough for that case because each lower device may return a
different hardware handle for the same state.
Add an optional xfrmdev_ops resolver and a lower-driver opt-in flag so
helper-aware lower drivers can resolve the handle for the lower device
they are transmitting or receiving on. Keep the direct-device path as
the fast path and clear upper private state when device offload state is
freed.
Assisted-by: Codex:gpt-5.5
Signed-off-by: Jihong Min <hurryman2212@gmail.com>
---
include/linux/netdevice.h | 27 ++++++++++++++++++++++
include/net/xfrm.h | 48 +++++++++++++++++++++++++++++++++++++--
net/xfrm/xfrm_state.c | 1 +
3 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0e1e581efc5a..b4e844e90db8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1033,6 +1033,16 @@ struct netdev_bpf {
#define XDP_WAKEUP_TX (1 << 1)
#ifdef CONFIG_XFRM_OFFLOAD
+/*
+ * xfrmdev_ops.flags values.
+ *
+ * XFRMDEV_OPS_F_LOWER_HANDLE marks a lower driver whose datapath gets XFRM
+ * hardware handles with xfrm_dev_state_lower_handle(). This is required when
+ * the XFRM state is owned by an upper device because xso.offload_handle may
+ * not contain the handle for the current lower device.
+ */
+#define XFRMDEV_OPS_F_LOWER_HANDLE BIT(0)
+
struct xfrmdev_ops {
int (*xdo_dev_state_add)(struct net_device *dev,
struct xfrm_state *x,
@@ -1048,6 +1058,23 @@ struct xfrmdev_ops {
int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
void (*xdo_dev_policy_free) (struct xfrm_policy *x);
+ /*
+ * Resolve the offload handle for lower_dev when this upper device
+ * owns the XFRM state. This belongs in xfrmdev_ops because the
+ * resolver is an XFRM offload operation of the device that owns the
+ * state. Keeping the dispatch here avoids a bonding-specific dependency
+ * in the XFRM helper.
+ *
+ * Upper devices like bonding may implement this callback when they
+ * keep the lower-device handle mapping. Lower devices must leave it
+ * NULL because they do not own that map. Lower drivers advertise
+ * that their datapath calls the resolver with
+ * XFRMDEV_OPS_F_LOWER_HANDLE instead.
+ */
+ unsigned long (*xdo_dev_state_lower_handle)(struct net_device *dev,
+ struct xfrm_state *x,
+ struct net_device *lower_dev);
+ u32 flags;
};
#endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10d3edde6b2f..b61e2c023eb4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -162,6 +162,10 @@ struct xfrm_dev_offload {
*/
struct net_device *real_dev;
unsigned long offload_handle;
+ /* Private state owned by dev in this structure when that device is an
+ * upper device. Lower drivers must not use this directly.
+ */
+ void __rcu *upper_priv;
u8 dir : 2;
u8 type : 2;
u8 flags : 2;
@@ -1700,6 +1704,37 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
int xfrm_state_check_expire(struct xfrm_state *x);
void xfrm_state_update_stats(struct net *net);
#ifdef CONFIG_XFRM_OFFLOAD
+/*
+ * Return the hardware offload handle lower_dev should use for x. States
+ * installed directly on lower_dev use xso.offload_handle. States owned by an
+ * upper device are resolved through the owner's xdo_dev_state_lower_handle().
+ * Bonding uses that callback for replicated XFRM states because it installs the
+ * state on each slave and keeps the per-slave hardware handles internally.
+ */
+static inline unsigned long
+xfrm_dev_state_lower_handle(struct xfrm_state *x, struct net_device *lower_dev)
+{
+ struct xfrm_dev_offload *xdo = &x->xso;
+ struct net_device *real_dev = READ_ONCE(xdo->real_dev);
+ struct net_device *dev = READ_ONCE(xdo->dev);
+ unsigned long offload_handle = READ_ONCE(xdo->offload_handle);
+
+ if (!dev || !lower_dev)
+ return 0;
+
+ if (dev == lower_dev)
+ return offload_handle;
+
+ if (dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_lower_handle)
+ return dev->xfrmdev_ops->xdo_dev_state_lower_handle(dev, x,
+ lower_dev);
+
+ if (real_dev == lower_dev)
+ return offload_handle;
+
+ return 0;
+}
+
static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
{
struct xfrm_dev_offload *xdo = &x->xso;
@@ -1711,6 +1746,12 @@ static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
}
#else
+static inline unsigned long
+xfrm_dev_state_lower_handle(struct xfrm_state *x, struct net_device *lower_dev)
+{
+ return 0;
+}
+
static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {}
#endif
void xfrm_state_insert(struct xfrm_state *x);
@@ -2089,15 +2130,18 @@ static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
+ bool has_offload_state;
struct xfrm_dst *xdst;
if (!x || !x->type_offload)
return false;
xdst = (struct xfrm_dst *) dst;
- if (!x->xso.offload_handle && !xdst->child->xfrm)
+ has_offload_state = x->xso.offload_handle ||
+ rcu_access_pointer(x->xso.upper_priv);
+ if (!has_offload_state && !xdst->child->xfrm)
return true;
- if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
+ if (has_offload_state && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
!xdst->child->xfrm)
return true;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 686014d39429..584f913751bf 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -791,6 +791,7 @@ void xfrm_dev_state_free(struct xfrm_state *x)
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(dev, x);
WRITE_ONCE(xso->dev, NULL);
+ RCU_INIT_POINTER(xso->upper_priv, NULL);
xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
netdev_put(dev, &xso->dev_tracker);
}
--
2.53.0
next prev parent reply other threads:[~2026-05-20 8:10 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-20 8:10 [PATCH RFC net-next 0/4] bonding: support LAG IPsec offload with replicated SAs Jihong Min
2026-05-20 8:10 ` Jihong Min [this message]
2026-05-20 8:10 ` [PATCH RFC net-next 2/4] bonding: replicate XFRM offload state across LAG slaves Jihong Min
2026-05-20 8:10 ` [PATCH RFC net-next 3/4] bonding: expose user-controlled IPsec features for LAG Jihong Min
2026-05-20 8:10 ` [PATCH RFC net-next 4/4] bonding: handle replicated IPsec SAs across LAG changes Jihong Min
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260520081004.2232091-2-hurryman2212@gmail.com \
--to=hurryman2212@gmail.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=horms@kernel.org \
--cc=jv@jvosburgh.net \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=steffen.klassert@secunet.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox