netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Toshiaki Makita <toshiaki.makita1@gmail.com>
To: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Saeed Mahameed <saeedm@nvidia.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Jiri Pirko <jiri@resnulli.us>,
	Jozsef Kadlecsik <kadlec@netfilter.org>,
	Florian Westphal <fw@strlen.de>,
	netdev@vger.kernel.org, netfilter-devel@vger.kernel.org,
	coreteam@netfilter.org, Paul Blakey <paulb@nvidia.com>
Subject: Re: [PATCH net-next 1/3] netfilter: flowtable: Support GRE
Date: Tue, 8 Feb 2022 23:30:03 +0900	[thread overview]
Message-ID: <9d4fd782-896d-4a44-b596-517c84d97d5a@gmail.com> (raw)
In-Reply-To: <YgFdS0ak3LIR2waA@salvia>

On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
>> Support GREv0 without NAT.
>>
>> Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
>> ---
>>   net/netfilter/nf_flow_table_core.c    | 10 +++++--
>>   net/netfilter/nf_flow_table_ip.c      | 54 ++++++++++++++++++++++++++++-------
>>   net/netfilter/nf_flow_table_offload.c | 19 +++++++-----
>>   net/netfilter/nft_flow_offload.c      | 13 +++++++++
>>   4 files changed, 77 insertions(+), 19 deletions(-)
>>
>> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
>> index b90eca7..e66a375 100644
>> --- a/net/netfilter/nf_flow_table_core.c
>> +++ b/net/netfilter/nf_flow_table_core.c
>> @@ -39,8 +39,14 @@
>>   
>>   	ft->l3proto = ctt->src.l3num;
>>   	ft->l4proto = ctt->dst.protonum;
>> -	ft->src_port = ctt->src.u.tcp.port;
>> -	ft->dst_port = ctt->dst.u.tcp.port;
>> +
>> +	switch (ctt->dst.protonum) {
>> +	case IPPROTO_TCP:
>> +	case IPPROTO_UDP:
>> +		ft->src_port = ctt->src.u.tcp.port;
>> +		ft->dst_port = ctt->dst.u.tcp.port;
>> +		break;
>> +	}
>>   }
>>   
>>   struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
>> index 889cf88..48e2f58 100644
>> --- a/net/netfilter/nf_flow_table_ip.c
>> +++ b/net/netfilter/nf_flow_table_ip.c
>> @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   	struct flow_ports *ports;
>>   	unsigned int thoff;
>>   	struct iphdr *iph;
>> +	u8 ipproto;
>>
>>   	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
>>   		return -1;
>> @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   
>>   	thoff += offset;
>>   
>> -	switch (iph->protocol) {
>> +	ipproto = iph->protocol;
>> +	switch (ipproto) {
>>   	case IPPROTO_TCP:
>>   		*hdrsize = sizeof(struct tcphdr);
>>   		break;
>>   	case IPPROTO_UDP:
>>   		*hdrsize = sizeof(struct udphdr);
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE:
>> +		*hdrsize = sizeof(struct gre_base_hdr);
>> +		break;
>> +#endif
>>   	default:
>>   		return -1;
>>   	}
>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>   		return -1;
>>   
>> +	if (ipproto == IPPROTO_GRE) {
> 
> No ifdef here? Maybe remove these ifdef everywhere?

I wanted to avoid adding many ifdefs and I expect this to be compiled out when 
CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather may have 
been unintuitive though.

Removing all of these ifdefs will cause inconsistent behavior between 
CONFIG_NF_CT_PROTO_GRE=n/y.
When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus it will 
track GREv1 without key infomation, and the flow will be offloaded.
When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not be 
offloaded.
I wanted to just refuse offloading of GRE to avoid this inconsistency.
Anyway this kind of inconsistency seems to happen in software conntrack, so if you'd 
like to remove ifdefs, I will do.

>> +		struct gre_base_hdr *greh;
>> +
>> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
>> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
>> +			return -1;
>> +	}
>> +
>>   	iph = (struct iphdr *)(skb_network_header(skb) + offset);
>> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>>   
>>   	tuple->src_v4.s_addr	= iph->saddr;
>>   	tuple->dst_v4.s_addr	= iph->daddr;
>> -	tuple->src_port		= ports->source;
>> -	tuple->dst_port		= ports->dest;
>> +	if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) {
>> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>> +		tuple->src_port		= ports->source;
>> +		tuple->dst_port		= ports->dest;
>> +	}
> 
> maybe:
> 
>          switch (ipproto) {
>          case IPPROTO_TCP:
>          case IPPROTO_UDP:
>                  ...
>                  break;
>          case IPPROTO_GRE:
>                  break;
>          }
> 
> ?

Sure, will fix.

>>   	tuple->l3proto		= AF_INET;
>> -	tuple->l4proto		= iph->protocol;
>> +	tuple->l4proto		= ipproto;
>>   	tuple->iifidx		= dev->ifindex;
>>   	nf_flow_tuple_encap(skb, tuple);
>>   
>> @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   	struct flow_ports *ports;
>>   	struct ipv6hdr *ip6h;
>>   	unsigned int thoff;
>> +	u8 nexthdr;
>>   
>>   	thoff = sizeof(*ip6h) + offset;
>>   	if (!pskb_may_pull(skb, thoff))
>> @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   
>>   	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>>   
>> -	switch (ip6h->nexthdr) {
>> +	nexthdr = ip6h->nexthdr;
>> +	switch (nexthdr) {
>>   	case IPPROTO_TCP:
>>   		*hdrsize = sizeof(struct tcphdr);
>>   		break;
>>   	case IPPROTO_UDP:
>>   		*hdrsize = sizeof(struct udphdr);
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE:
>> +		*hdrsize = sizeof(struct gre_base_hdr);
>> +		break;
>> +#endif
>>   	default:
>>   		return -1;
>>   	}
>> @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>   		return -1;
>>   
>> +	if (nexthdr == IPPROTO_GRE) {
>> +		struct gre_base_hdr *greh;
>> +
>> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
>> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
>> +			return -1;
>> +	}
>> +
>>   	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>>   
>>   	tuple->src_v6		= ip6h->saddr;
>>   	tuple->dst_v6		= ip6h->daddr;
>> -	tuple->src_port		= ports->source;
>> -	tuple->dst_port		= ports->dest;
>> +	if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) {
>> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>> +		tuple->src_port		= ports->source;
>> +		tuple->dst_port		= ports->dest;
>> +	}
>>   	tuple->l3proto		= AF_INET6;
>> -	tuple->l4proto		= ip6h->nexthdr;
>> +	tuple->l4proto		= nexthdr;
>>   	tuple->iifidx		= dev->ifindex;
>>   	nf_flow_tuple_encap(skb, tuple);
>>   
>> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
>> index b561e0a..9b81080 100644
>> --- a/net/netfilter/nf_flow_table_offload.c
>> +++ b/net/netfilter/nf_flow_table_offload.c
>> @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>>   		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
>>   		break;
>>   	case IPPROTO_UDP:
>> +	case IPPROTO_GRE:
>>   		break;
>>   	default:
>>   		return -EOPNOTSUPP;
>> @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>>   	key->basic.ip_proto = tuple->l4proto;
>>   	mask->basic.ip_proto = 0xff;
>>   
>> -	key->tp.src = tuple->src_port;
>> -	mask->tp.src = 0xffff;
>> -	key->tp.dst = tuple->dst_port;
>> -	mask->tp.dst = 0xffff;
>> -
>>   	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
>>   				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
>> -				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
>> -				      BIT(FLOW_DISSECTOR_KEY_PORTS);
>> +				      BIT(FLOW_DISSECTOR_KEY_BASIC);
>> +
>> +	if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) {
>> +		key->tp.src = tuple->src_port;
>> +		mask->tp.src = 0xffff;
>> +		key->tp.dst = tuple->dst_port;
>> +		mask->tp.dst = 0xffff;
>> +
>> +		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
>> +	}
>> +
>>   	return 0;
>>   }
>>   
>> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
>> index 0af34ad..731b5d8 100644
>> --- a/net/netfilter/nft_flow_offload.c
>> +++ b/net/netfilter/nft_flow_offload.c
>> @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>>   		break;
>>   	case IPPROTO_UDP:
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE: {
>> +		struct nf_conntrack_tuple *tuple;
>> +
>> +		if (ct->status & IPS_NAT_MASK)
>> +			goto out;
> 
> Why this NAT check?

NAT requires more work. I'd like to start with a minimal GRE support.
Maybe we can add NAT support later.

Toshiaki Makita

>> +		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
>> +		/* No support for GRE v1 */
>> +		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
>> +			goto out;
>> +		break;
>> +	}
>> +#endif
>>   	default:
>>   		goto out;
>>   	}
>> -- 
>> 1.8.3.1
>>

  reply	other threads:[~2022-02-08 14:30 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-03 11:59 [PATCH net-next 0/3] Conntrack GRE offload Toshiaki Makita
2022-02-03 11:59 ` [PATCH net-next 1/3] netfilter: flowtable: Support GRE Toshiaki Makita
2022-02-07 17:56   ` Pablo Neira Ayuso
2022-02-08 14:30     ` Toshiaki Makita [this message]
2022-02-09 10:01       ` Pablo Neira Ayuso
2022-02-12  1:54         ` Toshiaki Makita
2022-02-19 12:51           ` Toshiaki Makita
2022-02-23 10:44             ` Pablo Neira Ayuso
2022-02-03 11:59 ` [PATCH net-next 2/3] act_ct: Support GRE offload Toshiaki Makita
2022-02-10 12:48   ` Paul Blakey
2022-02-03 11:59 ` [PATCH net-next 3/3] net/mlx5: Support GRE conntrack offload Toshiaki Makita
2022-02-10 12:44   ` Paul Blakey

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9d4fd782-896d-4a44-b596-517c84d97d5a@gmail.com \
    --to=toshiaki.makita1@gmail.com \
    --cc=coreteam@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=fw@strlen.de \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=kadlec@netfilter.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=paulb@nvidia.com \
    --cc=saeedm@nvidia.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).