* [PATCH nf v2 1/1] netfilter: nf_flow_table: separate tunnel route state from direct xmit
@ 2026-06-17 17:06 Ren Wei
2026-06-18 10:30 ` Lorenzo Bianconi
0 siblings, 1 reply; 2+ messages in thread
From: Ren Wei @ 2026-06-17 17:06 UTC (permalink / raw)
To: netfilter-devel
Cc: pablo, fw, phil, lorenzo, yuantan098, yifanwucs, tomapufckgml,
bird, chzhengyang2023, n05ec
From: Zhengyang Chen<chzhengyang2023@lzu.edu.cn>
When a flow tuple carries tunnel metadata and uses
FLOW_OFFLOAD_XMIT_DIRECT, the transmit path may still need route state
for tunnel push. However, the current tuple layout stores direct xmit
L2 state and route state in overlapping runtime storage.
As a result, a tuple may keep tun_num set while the tunnel push path
later reads tuple->dst_cache, even though a direct xmit tuple only has
out.ifidx/out.h_source/out.h_dest stored in that area. This leads to
invalid dst usage and can trigger a crash in the tunnel transmit path.
Fix this by moving dst_cache and dst_cookie out of the runtime union so
that they can be shared by neighbour, xfrm, and direct tunnel flows.
For FLOW_OFFLOAD_XMIT_DIRECT tuples carrying tunnel metadata, preserve
route state in these shared fields and release it through the common
dst release path.
Keep dst validation on the forwarding tuple before the packet is
modified, and validate the tunnel consumer tuple from the same early
control point. This preserves protection for current NEIGH/XFRM users
of tuplehash->tuple.dst_cache while avoiding the late-check fallback
after decap, NAT, and TTL updates.
Hardware offload rule construction still assumes that direct xmit flows
do not carry tunnel route state, so reject that combination there for
now to avoid undefined offload behaviour.
Fixes: d30301ba4b07 ("netfilter: flowtable: Add IPIP tx sw acceleration")
Cc: stable@vger.kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Zhengyang Chen <chzhengyang2023@lzu.edu.cn>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
changes in v2:
- Move dst_cache and dst_cookie out of the runtime union instead of
introducing dedicated tunnel dst fields
- Reuse the shared dst_cache/dst_cookie storage for DIRECT tunnel
flows
- Simplify dst release through the common dst_cache path
- Update Fixes: to d30301ba4b07 ("netfilter: flowtable: Add IPIP tx sw
acceleration")
- v1 Link: https://lore.kernel.org/all/3947a39286d335b6136bbee26f8bf44b23471c69.1780580352.git.chzhengyang2023@lzu.edu.cn/
include/net/netfilter/nf_flow_table.h | 4 ++--
net/netfilter/nf_flow_table_core.c | 12 ++++++++----
net/netfilter/nf_flow_table_ip.c | 19 +++++++++++++++++++
net/netfilter/nf_flow_table_offload.c | 3 +++
4 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 7b23b245a5a8..369f6a717811 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -155,11 +155,11 @@ struct flow_offload_tuple {
tun_num:2,
in_vlan_ingress:2;
u16 mtu;
+ struct dst_entry *dst_cache;
+ u32 dst_cookie;
union {
struct {
- struct dst_entry *dst_cache;
u32 ifidx;
- u32 dst_cookie;
};
struct {
u32 ifidx;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 785d8c244a77..252b081319a7 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -127,12 +127,18 @@ static int flow_offload_fill_route(struct flow_offload *flow,
switch (route->tuple[dir].xmit_type) {
case FLOW_OFFLOAD_XMIT_DIRECT:
+ if (flow_tuple->tun_num) {
+ flow_tuple->dst_cache = dst;
+ flow_tuple->dst_cookie =
+ flow_offload_dst_cookie(flow_tuple);
+ }
memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
ETH_ALEN);
memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
- dst_release(dst);
+ if (!flow_tuple->tun_num)
+ dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -152,9 +158,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
static void nft_flow_dst_release(struct flow_offload *flow,
enum flow_offload_tuple_dir dir)
{
- if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
- flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
- dst_release(flow->tuplehash[dir].tuple.dst_cache);
+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
}
void flow_offload_route_init(struct flow_offload *flow,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9c05a50d6013..b125868ab1fb 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -299,6 +299,11 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
{
+ if (tuple->tun_num &&
+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT &&
+ !dst_check(tuple->dst_cache, tuple->dst_cookie))
+ return false;
+
if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
return true;
@@ -482,6 +487,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
struct flow_offload_tuple_rhash *tuplehash,
struct sk_buff *skb)
{
+ struct flow_offload_tuple *other_tuple;
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
unsigned int thoff, mtu;
@@ -507,6 +513,12 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
return 0;
}
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ if (other_tuple->tun_num && !nf_flow_dst_check(other_tuple)) {
+ flow_offload_teardown(flow);
+ return 0;
+ }
+
if (skb_ensure_writable(skb, thoff + ctx->hdrsize))
return -1;
@@ -1091,6 +1103,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
struct flow_offload_tuple_rhash *tuplehash,
struct sk_buff *skb, int encap_limit)
{
+ struct flow_offload_tuple *other_tuple;
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
unsigned int thoff, mtu;
@@ -1119,6 +1132,12 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
return 0;
}
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ if (other_tuple->tun_num && !nf_flow_dst_check(other_tuple)) {
+ flow_offload_teardown(flow);
+ return 0;
+ }
+
if (skb_ensure_writable(skb, thoff + ctx->hdrsize))
return -1;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 002ec15d988b..e3ace6435074 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -820,6 +820,9 @@ nf_flow_offload_rule_alloc(struct net *net,
tuple = &flow->tuplehash[dir].tuple;
other_tuple = &flow->tuplehash[!dir].tuple;
+ if (other_tuple->tun_num &&
+ other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+ goto err_flow_match;
if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
other_dst = other_tuple->dst_cache;
--
2.43.0
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH nf v2 1/1] netfilter: nf_flow_table: separate tunnel route state from direct xmit
2026-06-17 17:06 [PATCH nf v2 1/1] netfilter: nf_flow_table: separate tunnel route state from direct xmit Ren Wei
@ 2026-06-18 10:30 ` Lorenzo Bianconi
0 siblings, 0 replies; 2+ messages in thread
From: Lorenzo Bianconi @ 2026-06-18 10:30 UTC (permalink / raw)
To: Ren Wei
Cc: netfilter-devel, pablo, fw, phil, yuantan098, yifanwucs,
tomapufckgml, bird, chzhengyang2023
[-- Attachment #1: Type: text/plain, Size: 8005 bytes --]
On Jun 18, Ren Wei wrote:
> From: Zhengyang Chen<chzhengyang2023@lzu.edu.cn>
>
> When a flow tuple carries tunnel metadata and uses
> FLOW_OFFLOAD_XMIT_DIRECT, the transmit path may still need route state
> for tunnel push. However, the current tuple layout stores direct xmit
> L2 state and route state in overlapping runtime storage.
>
> As a result, a tuple may keep tun_num set while the tunnel push path
> later reads tuple->dst_cache, even though a direct xmit tuple only has
> out.ifidx/out.h_source/out.h_dest stored in that area. This leads to
> invalid dst usage and can trigger a crash in the tunnel transmit path.
>
> Fix this by moving dst_cache and dst_cookie out of the runtime union so
> that they can be shared by neighbour, xfrm, and direct tunnel flows.
> For FLOW_OFFLOAD_XMIT_DIRECT tuples carrying tunnel metadata, preserve
> route state in these shared fields and release it through the common
> dst release path.
>
> Keep dst validation on the forwarding tuple before the packet is
> modified, and validate the tunnel consumer tuple from the same early
> control point. This preserves protection for current NEIGH/XFRM users
> of tuplehash->tuple.dst_cache while avoiding the late-check fallback
> after decap, NAT, and TTL updates.
>
> Hardware offload rule construction still assumes that direct xmit flows
> do not carry tunnel route state, so reject that combination there for
> now to avoid undefined offload behaviour.
>
> Fixes: d30301ba4b07 ("netfilter: flowtable: Add IPIP tx sw acceleration")
> Cc: stable@vger.kernel.org
> Reported-by: Yuan Tan <yuantan098@gmail.com>
> Reported-by: Yifan Wu <yifanwucs@gmail.com>
> Reported-by: Juefei Pu <tomapufckgml@gmail.com>
> Reported-by: Xin Liu <bird@lzu.edu.cn>
> Signed-off-by: Zhengyang Chen <chzhengyang2023@lzu.edu.cn>
> Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
> ---
> changes in v2:
> - Move dst_cache and dst_cookie out of the runtime union instead of
> introducing dedicated tunnel dst fields
> - Reuse the shared dst_cache/dst_cookie storage for DIRECT tunnel
> flows
> - Simplify dst release through the common dst_cache path
> - Update Fixes: to d30301ba4b07 ("netfilter: flowtable: Add IPIP tx sw
> acceleration")
> - v1 Link: https://lore.kernel.org/all/3947a39286d335b6136bbee26f8bf44b23471c69.1780580352.git.chzhengyang2023@lzu.edu.cn/
>
> include/net/netfilter/nf_flow_table.h | 4 ++--
> net/netfilter/nf_flow_table_core.c | 12 ++++++++----
> net/netfilter/nf_flow_table_ip.c | 19 +++++++++++++++++++
> net/netfilter/nf_flow_table_offload.c | 3 +++
> 4 files changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index 7b23b245a5a8..369f6a717811 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -155,11 +155,11 @@ struct flow_offload_tuple {
> tun_num:2,
> in_vlan_ingress:2;
> u16 mtu;
> + struct dst_entry *dst_cache;
> + u32 dst_cookie;
> union {
> struct {
> - struct dst_entry *dst_cache;
> u32 ifidx;
> - u32 dst_cookie;
> };
> struct {
> u32 ifidx;
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 785d8c244a77..252b081319a7 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -127,12 +127,18 @@ static int flow_offload_fill_route(struct flow_offload *flow,
>
> switch (route->tuple[dir].xmit_type) {
> case FLOW_OFFLOAD_XMIT_DIRECT:
> + if (flow_tuple->tun_num) {
> + flow_tuple->dst_cache = dst;
> + flow_tuple->dst_cookie =
> + flow_offload_dst_cookie(flow_tuple);
Hi Ren Wei,
If I read the code correctly flow_tuple->tun and flow_tuple->tun_num are set
according to route->tuple[].in.tun and route->tuple[].in.num_tuns. Moreover,
route->tuple[].in.tun/route->tuple[].in.num_tuns are set just in
nft_dev_forward_path() that is executed only for FLOW_OFFLOAD_XMIT_NEIGH
entries. Am I missing something here?
Moreover, can you please add a selftest for this case in
tools/testing/selftests/net/netfilter/nft_flowtable.sh? Thanks.
Regards,
Lorenzo
> + }
> memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
> ETH_ALEN);
> memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
> ETH_ALEN);
> flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
> - dst_release(dst);
> + if (!flow_tuple->tun_num)
> + dst_release(dst);
> break;
> case FLOW_OFFLOAD_XMIT_XFRM:
> case FLOW_OFFLOAD_XMIT_NEIGH:
> @@ -152,9 +158,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
> static void nft_flow_dst_release(struct flow_offload *flow,
> enum flow_offload_tuple_dir dir)
> {
> - if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
> - flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
> - dst_release(flow->tuplehash[dir].tuple.dst_cache);
> + dst_release(flow->tuplehash[dir].tuple.dst_cache);
> }
>
> void flow_offload_route_init(struct flow_offload *flow,
> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> index 9c05a50d6013..b125868ab1fb 100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
> @@ -299,6 +299,11 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
>
> static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
> {
> + if (tuple->tun_num &&
> + tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT &&
> + !dst_check(tuple->dst_cache, tuple->dst_cookie))
> + return false;
> +
> if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
> tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
> return true;
> @@ -482,6 +487,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
> struct flow_offload_tuple_rhash *tuplehash,
> struct sk_buff *skb)
> {
> + struct flow_offload_tuple *other_tuple;
> enum flow_offload_tuple_dir dir;
> struct flow_offload *flow;
> unsigned int thoff, mtu;
> @@ -507,6 +513,12 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
> return 0;
> }
>
> + other_tuple = &flow->tuplehash[!dir].tuple;
> + if (other_tuple->tun_num && !nf_flow_dst_check(other_tuple)) {
> + flow_offload_teardown(flow);
> + return 0;
> + }
> +
> if (skb_ensure_writable(skb, thoff + ctx->hdrsize))
> return -1;
>
> @@ -1091,6 +1103,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
> struct flow_offload_tuple_rhash *tuplehash,
> struct sk_buff *skb, int encap_limit)
> {
> + struct flow_offload_tuple *other_tuple;
> enum flow_offload_tuple_dir dir;
> struct flow_offload *flow;
> unsigned int thoff, mtu;
> @@ -1119,6 +1132,12 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
> return 0;
> }
>
> + other_tuple = &flow->tuplehash[!dir].tuple;
> + if (other_tuple->tun_num && !nf_flow_dst_check(other_tuple)) {
> + flow_offload_teardown(flow);
> + return 0;
> + }
> +
> if (skb_ensure_writable(skb, thoff + ctx->hdrsize))
> return -1;
>
> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> index 002ec15d988b..e3ace6435074 100644
> --- a/net/netfilter/nf_flow_table_offload.c
> +++ b/net/netfilter/nf_flow_table_offload.c
> @@ -820,6 +820,9 @@ nf_flow_offload_rule_alloc(struct net *net,
>
> tuple = &flow->tuplehash[dir].tuple;
> other_tuple = &flow->tuplehash[!dir].tuple;
> + if (other_tuple->tun_num &&
> + other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
> + goto err_flow_match;
> if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
> other_dst = other_tuple->dst_cache;
>
> --
> 2.43.0
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-06-18 10:31 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-17 17:06 [PATCH nf v2 1/1] netfilter: nf_flow_table: separate tunnel route state from direct xmit Ren Wei
2026-06-18 10:30 ` Lorenzo Bianconi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.