Netdev List
 help / color / mirror / Atom feed
From: Jihong Min <hurryman2212@gmail.com>
To: netdev@vger.kernel.org
Cc: Jay Vosburgh <jv@jvosburgh.net>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Simon Horman <horms@kernel.org>,
	Steffen Klassert <steffen.klassert@secunet.com>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	linux-kernel@vger.kernel.org, Jihong Min <hurryman2212@gmail.com>
Subject: [PATCH RFC net-next 1/4] xfrm: add a lower-device offload handle resolver
Date: Wed, 20 May 2026 17:10:01 +0900	[thread overview]
Message-ID: <20260520081004.2232091-2-hurryman2212@gmail.com> (raw)
In-Reply-To: <20260520081004.2232091-1-hurryman2212@gmail.com>

An upper device can own an XFRM offload state while the selected
datapath device is one of its lower devices. A single xso.offload_handle
is not enough for that case because each lower device may return a
different hardware handle for the same state.

Add an optional xfrmdev_ops resolver and a lower-driver opt-in flag so
helper-aware lower drivers can resolve the handle for the lower device
they are transmitting or receiving on. Keep the direct-device path as
the fast path and clear upper private state when device offload state is
freed.

Assisted-by: Codex:gpt-5.5
Signed-off-by: Jihong Min <hurryman2212@gmail.com>
---
 include/linux/netdevice.h | 27 ++++++++++++++++++++++
 include/net/xfrm.h        | 48 +++++++++++++++++++++++++++++++++++++--
 net/xfrm/xfrm_state.c     |  1 +
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0e1e581efc5a..b4e844e90db8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1033,6 +1033,16 @@ struct netdev_bpf {
 #define XDP_WAKEUP_TX (1 << 1)
 
 #ifdef CONFIG_XFRM_OFFLOAD
+/*
+ * xfrmdev_ops.flags values.
+ *
+ * XFRMDEV_OPS_F_LOWER_HANDLE marks a lower driver whose datapath gets XFRM
+ * hardware handles with xfrm_dev_state_lower_handle(). This is required when
+ * the XFRM state is owned by an upper device because xso.offload_handle may
+ * not contain the handle for the current lower device.
+ */
+#define XFRMDEV_OPS_F_LOWER_HANDLE	BIT(0)
+
 struct xfrmdev_ops {
 	int	(*xdo_dev_state_add)(struct net_device *dev,
 				     struct xfrm_state *x,
@@ -1048,6 +1058,23 @@ struct xfrmdev_ops {
 	int	(*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
 	void	(*xdo_dev_policy_delete) (struct xfrm_policy *x);
 	void	(*xdo_dev_policy_free) (struct xfrm_policy *x);
+	/*
+	 * Resolve the offload handle for lower_dev when this upper device
+	 * owns the XFRM state. This belongs in xfrmdev_ops because the
+	 * resolver is an XFRM offload operation of the device that owns the
+	 * state. Keeping the dispatch here avoids a bonding-specific dependency
+	 * in the XFRM helper.
+	 *
+	 * Upper devices like bonding may implement this callback when they
+	 * keep the lower-device handle mapping. Lower devices must leave it
+	 * NULL because they do not own that map. Lower drivers advertise
+	 * that their datapath calls the resolver with
+	 * XFRMDEV_OPS_F_LOWER_HANDLE instead.
+	 */
+	unsigned long (*xdo_dev_state_lower_handle)(struct net_device *dev,
+						    struct xfrm_state *x,
+						    struct net_device *lower_dev);
+	u32	flags;
 };
 #endif
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10d3edde6b2f..b61e2c023eb4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -162,6 +162,10 @@ struct xfrm_dev_offload {
 	 */
 	struct net_device	*real_dev;
 	unsigned long		offload_handle;
+	/* Private state owned by dev in this structure when that device is an
+	 * upper device. Lower drivers must not use this directly.
+	 */
+	void __rcu		*upper_priv;
 	u8			dir : 2;
 	u8			type : 2;
 	u8			flags : 2;
@@ -1700,6 +1704,37 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
 int xfrm_state_check_expire(struct xfrm_state *x);
 void xfrm_state_update_stats(struct net *net);
 #ifdef CONFIG_XFRM_OFFLOAD
+/*
+ * Return the hardware offload handle lower_dev should use for x. States
+ * installed directly on lower_dev use xso.offload_handle. States owned by an
+ * upper device are resolved through the owner's xdo_dev_state_lower_handle().
+ * Bonding uses that callback for replicated XFRM states because it installs the
+ * state on each slave and keeps the per-slave hardware handles internally.
+ */
+static inline unsigned long
+xfrm_dev_state_lower_handle(struct xfrm_state *x, struct net_device *lower_dev)
+{
+	struct xfrm_dev_offload *xdo = &x->xso;
+	struct net_device *real_dev = READ_ONCE(xdo->real_dev);
+	struct net_device *dev = READ_ONCE(xdo->dev);
+	unsigned long offload_handle = READ_ONCE(xdo->offload_handle);
+
+	if (!dev || !lower_dev)
+		return 0;
+
+	if (dev == lower_dev)
+		return offload_handle;
+
+	if (dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_lower_handle)
+		return dev->xfrmdev_ops->xdo_dev_state_lower_handle(dev, x,
+								    lower_dev);
+
+	if (real_dev == lower_dev)
+		return offload_handle;
+
+	return 0;
+}
+
 static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
 {
 	struct xfrm_dev_offload *xdo = &x->xso;
@@ -1711,6 +1746,12 @@ static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
 
 }
 #else
+static inline unsigned long
+xfrm_dev_state_lower_handle(struct xfrm_state *x, struct net_device *lower_dev)
+{
+	return 0;
+}
+
 static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {}
 #endif
 void xfrm_state_insert(struct xfrm_state *x);
@@ -2089,15 +2130,18 @@ static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
 static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 {
 	struct xfrm_state *x = dst->xfrm;
+	bool has_offload_state;
 	struct xfrm_dst *xdst;
 
 	if (!x || !x->type_offload)
 		return false;
 
 	xdst = (struct xfrm_dst *) dst;
-	if (!x->xso.offload_handle && !xdst->child->xfrm)
+	has_offload_state = x->xso.offload_handle ||
+			    rcu_access_pointer(x->xso.upper_priv);
+	if (!has_offload_state && !xdst->child->xfrm)
 		return true;
-	if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
+	if (has_offload_state && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
 	    !xdst->child->xfrm)
 		return true;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 686014d39429..584f913751bf 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -791,6 +791,7 @@ void xfrm_dev_state_free(struct xfrm_state *x)
 		if (dev->xfrmdev_ops->xdo_dev_state_free)
 			dev->xfrmdev_ops->xdo_dev_state_free(dev, x);
 		WRITE_ONCE(xso->dev, NULL);
+		RCU_INIT_POINTER(xso->upper_priv, NULL);
 		xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
 		netdev_put(dev, &xso->dev_tracker);
 	}
-- 
2.53.0

  reply	other threads:[~2026-05-20  8:10 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-20  8:10 [PATCH RFC net-next 0/4] bonding: support LAG IPsec offload with replicated SAs Jihong Min
2026-05-20  8:10 ` Jihong Min [this message]
2026-05-20  8:10 ` [PATCH RFC net-next 2/4] bonding: replicate XFRM offload state across LAG slaves Jihong Min
2026-05-20  8:10 ` [PATCH RFC net-next 3/4] bonding: expose user-controlled IPsec features for LAG Jihong Min
2026-05-20  8:10 ` [PATCH RFC net-next 4/4] bonding: handle replicated IPsec SAs across LAG changes Jihong Min

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260520081004.2232091-2-hurryman2212@gmail.com \
    --to=hurryman2212@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=horms@kernel.org \
    --cc=jv@jvosburgh.net \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox