netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Lorenzo Bianconi <lorenzo@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>,
	David Ahern <dsahern@kernel.org>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Simon Horman <horms@kernel.org>,
	Jozsef Kadlecsik <kadlec@netfilter.org>,
	Shuah Khan <shuah@kernel.org>,
	Andrew Lunn <andrew+netdev@lunn.ch>, Phil Sutter <phil@nwl.cc>,
	Florian Westphal <fw@strlen.de>,
	netdev@vger.kernel.org, netfilter-devel@vger.kernel.org,
	coreteam@netfilter.org, linux-kselftest@vger.kernel.org
Subject: Re: [PATCH nf-next v8 2/3] net: netfilter: Add IPIP flowtable tx sw acceleration
Date: Tue, 4 Nov 2025 23:52:14 +0100	[thread overview]
Message-ID: <aQqDnjv8KLtQJaOW@calendula> (raw)
In-Reply-To: <20251023-nf-flowtable-ipip-v8-2-5d5d8595c730@kernel.org>

On Thu, Oct 23, 2025 at 10:50:16AM +0200, Lorenzo Bianconi wrote:
[...]
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 0355461960ce3c0db49e00a6f77f48b031a635dc..eb8058fd7139a2b5457008146f979590f9f03c1d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -897,6 +897,9 @@ struct net_device_path {
>  			};
>  
>  			u8	l3_proto;
> +			u8	tos;
> +			u8	ttl;
> +			__be16	df;
>  		} tun;
>  		struct {
>  			enum {
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index 6d00a8aa52584ad96d200683297c1b02bf1f6d4f..fe792f5a8f0528de021c27382b235688532614e4 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -119,6 +119,9 @@ struct flow_offload_tunnel {
>  	};
>  
>  	u8	l3_proto;
> +	u8	tos;
> +	u8	ttl;
> +	__be16	df;

This is now included in the hash that is used for the lookup, is it
intentional to include these fields here? For rx, we cannot know ttl
of the received packet?

Maybe this needs to be moved after the placeholder:

        struct { }                      __hash;

>  };
>  
>  struct flow_offload_tuple {
[...]
> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> index 76081d5d2f71c10e0c65e906b3fb2769e3ab1466..a66ffa0c7fbe780a9f9a545e42d44dfe408e7cb2 100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
[...]
> @@ -533,6 +589,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>  	struct flow_offload *flow;
>  	struct neighbour *neigh;
>  	struct rtable *rt;
> +	__be32 dest;
>  	int ret;
>  
>  	tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
> @@ -555,8 +612,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>  
>  	dir = tuplehash->tuple.dir;
>  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> +	reply_tuple = &flow->tuplehash[!dir].tuple;

Nit: I'd suggest 'other_tuple' instead 'reply_tuple' given this is not
strictly the reply tuple, just the tuple from the other direction.

> -	if (nf_flow_encap_push(skb, &flow->tuplehash[!dir].tuple) < 0)
> +	if (nf_flow_encap_push(state->net, skb, reply_tuple))
>  		return NF_DROP;
>  
>  	switch (tuplehash->tuple.xmit_type) {
> @@ -567,7 +625,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>  			flow_offload_teardown(flow);
>  			return NF_DROP;
>  		}
> -		neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr));
> +		dest = reply_tuple->tun_num ? reply_tuple->tun.src_v4.s_addr
> +					    : reply_tuple->src_v4.s_addr;
> +		neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, dest));
>  		if (IS_ERR(neigh)) {
>  			flow_offload_teardown(flow);
>  			return NF_DROP;
> diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
> index bd5e9bf1ca393ab793976ba98a027b60f84882ba..cd0be2efe97596d0947621a5ea604373d5b61da8 100644
> --- a/net/netfilter/nf_flow_table_path.c
> +++ b/net/netfilter/nf_flow_table_path.c
> @@ -190,7 +190,43 @@ static bool nft_flowtable_find_dev(const struct net_device *dev,
>  	return found;
>  }
>  
> -static void nft_dev_forward_path(struct nf_flow_route *route,
> +static int nft_flow_tunnel_update_route(const struct nft_pktinfo *pkt,
> +					struct nf_flow_route *route,
> +					enum ip_conntrack_dir dir)
> +{
> +	struct dst_entry *tun_dst = NULL;
> +	struct flowi fl = {};
> +
> +	switch (nft_pf(pkt)) {
> +	case NFPROTO_IPV4:
> +		fl.u.ip4.daddr = route->tuple[!dir].in.tun.src_v4.s_addr;
> +		fl.u.ip4.saddr = route->tuple[!dir].in.tun.dst_v4.s_addr;
> +		fl.u.ip4.flowi4_iif = nft_in(pkt)->ifindex;
> +		fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
> +		fl.u.ip4.flowi4_mark = pkt->skb->mark;
> +		fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
> +		break;
> +	case NFPROTO_IPV6:
> +		fl.u.ip6.daddr = route->tuple[!dir].in.tun.src_v6;
> +		fl.u.ip6.saddr = route->tuple[!dir].in.tun.dst_v6;
> +		fl.u.ip6.flowi6_iif = nft_in(pkt)->ifindex;
> +		fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
> +		fl.u.ip6.flowi6_mark = pkt->skb->mark;
> +		fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
> +		break;
> +	}
> +
> +	nf_route(nft_net(pkt), &tun_dst, &fl, false, nft_pf(pkt));
> +	if (!tun_dst)
> +		return -ENOENT;
> +
> +	nft_default_forward_path(route, tun_dst, dir);

This overrides the previous dst that is set on here, is this leaking
such dst?

> +
> +	return 0;
> +}
> +
> +static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
> +				 struct nf_flow_route *route,
>  				 const struct nf_conn *ct,
>  				 enum ip_conntrack_dir dir,
>  				 struct nft_flowtable *ft)
> @@ -218,6 +254,12 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
>  		route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
>  		route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
>  		route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
> +		route->tuple[!dir].in.tun.tos = info.tun.tos;
> +		route->tuple[!dir].in.tun.ttl = info.tun.ttl;
> +		route->tuple[!dir].in.tun.df = info.tun.df;
> +
> +		if (nft_flow_tunnel_update_route(pkt, route, dir))
> +			return;

If tunnel route is found...

>  	}
>
>  	route->tuple[!dir].in.num_encaps = info.num_encaps;

... num_encaps is never set?

Would you also extend the selftest to combine IPIP with vlan? Thanks.

> @@ -274,9 +316,9 @@ int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
>  	nft_default_forward_path(route, other_dst, !dir);
>  
>  	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH)
> -		nft_dev_forward_path(route, ct, dir, ft);
> +		nft_dev_forward_path(pkt, route, ct, dir, ft);
>  	if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
> -		nft_dev_forward_path(route, ct, !dir, ft);
> +		nft_dev_forward_path(pkt, route, ct, !dir, ft);
>  
>  	return 0;
>  }
> 
> -- 
> 2.51.0
> 

  reply	other threads:[~2025-11-04 22:52 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-23  8:50 [PATCH nf-next v8 0/3] Add IPIP flowtable SW acceleration Lorenzo Bianconi
2025-10-23  8:50 ` [PATCH nf-next v8 1/3] net: netfilter: Add IPIP flowtable rx sw acceleration Lorenzo Bianconi
2025-10-23  8:50 ` [PATCH nf-next v8 2/3] net: netfilter: Add IPIP flowtable tx " Lorenzo Bianconi
2025-11-04 22:52   ` Pablo Neira Ayuso [this message]
2025-11-05 16:36     ` Lorenzo Bianconi
2025-10-23  8:50 ` [PATCH nf-next v8 3/3] selftests: netfilter: nft_flowtable.sh: Add IPIP flowtable selftest Lorenzo Bianconi
2025-11-04 11:08 ` [PATCH nf-next v8 0/3] Add IPIP flowtable SW acceleration Lorenzo Bianconi
2025-11-04 11:19   ` Florian Westphal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aQqDnjv8KLtQJaOW@calendula \
    --to=pablo@netfilter.org \
    --cc=andrew+netdev@lunn.ch \
    --cc=coreteam@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=horms@kernel.org \
    --cc=kadlec@netfilter.org \
    --cc=kuba@kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=lorenzo@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=phil@nwl.cc \
    --cc=shuah@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).