netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
To: Paul Blakey <paulb@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>,
	Oz Shlomo <ozsh@mellanox.com>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	Vlad Buslov <vladbu@mellanox.com>,
	David Miller <davem@davemloft.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	Jiri Pirko <jiri@mellanox.com>, Roi Dayan <roid@mellanox.com>
Subject: Re: [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows
Date: Tue, 3 Mar 2020 13:51:25 -0300	[thread overview]
Message-ID: <20200303165125.GE2546@localhost.localdomain> (raw)
In-Reply-To: <1583251072-10396-4-git-send-email-paulb@mellanox.com>

On Tue, Mar 03, 2020 at 05:57:52PM +0200, Paul Blakey wrote:
> Offload nf conntrack processing by looking up the 5-tuple in the
> zone's flow table.
> 
> The nf conntrack module will process the packets until a connection is
> in established state. Once in established state, the ct state pointer
> (nf_conn) will be restored on the skb from a successful ft lookup.
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>

Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
> Changelog:
>   v5->v6:
>    Refactor tcp fin/rst check, get tcp header inside filler, and test in caller
>    Pull tcp with ports instead of two pulls
>   v4->v5:
>    Re-read ip/ip6 header after pulling as skb ptrs may change
>    Use pskb_network_may_pull instaed of pskb_may_pull
>   v1->v2:
>    Add !skip_add curly braces
>    Removed extra setting thoff again
>    Check tcp proto outside of tcf_ct_flow_table_check_tcp
> 
>  net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 150 insertions(+), 2 deletions(-)
> 
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 2ab38431..23eba61 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
>  	tcf_ct_flow_table_add(ct_ft, ct, tcp);
>  }
>  
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
> +				  struct flow_offload_tuple *tuple,
> +				  struct tcphdr **tcph)
> +{
> +	struct flow_ports *ports;
> +	unsigned int thoff;
> +	struct iphdr *iph;
> +
> +	if (!pskb_network_may_pull(skb, sizeof(*iph)))
> +		return false;
> +
> +	iph = ip_hdr(skb);
> +	thoff = iph->ihl * 4;
> +
> +	if (ip_is_fragment(iph) ||
> +	    unlikely(thoff != sizeof(struct iphdr)))
> +		return false;
> +
> +	if (iph->protocol != IPPROTO_TCP &&
> +	    iph->protocol != IPPROTO_UDP)
> +		return false;
> +
> +	if (iph->ttl <= 1)
> +		return false;
> +
> +	if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
> +					thoff + sizeof(struct tcphdr) :
> +					thoff + sizeof(*ports)))
> +		return false;
> +
> +	iph = ip_hdr(skb);
> +	if (iph->protocol == IPPROTO_TCP)
> +		*tcph = (void *)(skb_network_header(skb) + thoff);
> +
> +	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> +	tuple->src_v4.s_addr = iph->saddr;
> +	tuple->dst_v4.s_addr = iph->daddr;
> +	tuple->src_port = ports->source;
> +	tuple->dst_port = ports->dest;
> +	tuple->l3proto = AF_INET;
> +	tuple->l4proto = iph->protocol;
> +
> +	return true;
> +}
> +
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
> +				  struct flow_offload_tuple *tuple,
> +				  struct tcphdr **tcph)
> +{
> +	struct flow_ports *ports;
> +	struct ipv6hdr *ip6h;
> +	unsigned int thoff;
> +
> +	if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
> +		return false;
> +
> +	ip6h = ipv6_hdr(skb);
> +
> +	if (ip6h->nexthdr != IPPROTO_TCP &&
> +	    ip6h->nexthdr != IPPROTO_UDP)
> +		return false;
> +
> +	if (ip6h->hop_limit <= 1)
> +		return false;
> +
> +	thoff = sizeof(*ip6h);
> +	if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
> +					thoff + sizeof(struct tcphdr) :
> +					thoff + sizeof(*ports)))
> +		return false;
> +
> +	ip6h = ipv6_hdr(skb);
> +	if (ip6h->nexthdr == IPPROTO_TCP)
> +		*tcph = (void *)(skb_network_header(skb) + thoff);
> +
> +	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> +	tuple->src_v6 = ip6h->saddr;
> +	tuple->dst_v6 = ip6h->daddr;
> +	tuple->src_port = ports->source;
> +	tuple->dst_port = ports->dest;
> +	tuple->l3proto = AF_INET6;
> +	tuple->l4proto = ip6h->nexthdr;
> +
> +	return true;
> +}
> +
> +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
> +				     struct sk_buff *skb,
> +				     u8 family)
> +{
> +	struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft;
> +	struct flow_offload_tuple_rhash *tuplehash;
> +	struct flow_offload_tuple tuple = {};
> +	enum ip_conntrack_info ctinfo;
> +	struct tcphdr *tcph = NULL;
> +	struct flow_offload *flow;
> +	struct nf_conn *ct;
> +	u8 dir;
> +
> +	/* Previously seen or loopback */
> +	ct = nf_ct_get(skb, &ctinfo);
> +	if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
> +		return false;
> +
> +	switch (family) {
> +	case NFPROTO_IPV4:
> +		if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
> +			return false;
> +		break;
> +	case NFPROTO_IPV6:
> +		if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	tuplehash = flow_offload_lookup(nf_ft, &tuple);
> +	if (!tuplehash)
> +		return false;
> +
> +	dir = tuplehash->tuple.dir;
> +	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> +	ct = flow->ct;
> +
> +	if (tcph && (unlikely(tcph->fin || tcph->rst))) {
> +		flow_offload_teardown(flow);
> +		return false;
> +	}
> +
> +	ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
> +						    IP_CT_ESTABLISHED_REPLY;
> +
> +	nf_conntrack_get(&ct->ct_general);
> +	nf_ct_set(skb, ct, ctinfo);
> +
> +	return true;
> +}
> +
>  static int tcf_ct_flow_tables_init(void)
>  {
>  	return rhashtable_init(&zones_ht, &zones_params);
> @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  	struct nf_hook_state state;
>  	int nh_ofs, err, retval;
>  	struct tcf_ct_params *p;
> +	bool skip_add = false;
>  	struct nf_conn *ct;
>  	u8 family;
>  
> @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  	 */
>  	cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
>  	if (!cached) {
> +		if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
> +			skip_add = true;
> +			goto do_nat;
> +		}
> +
>  		/* Associate skb with specified zone. */
>  		if (tmpl) {
>  			ct = nf_ct_get(skb, &ctinfo);
> @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  			goto out_push;
>  	}
>  
> +do_nat:
>  	ct = nf_ct_get(skb, &ctinfo);
>  	if (!ct)
>  		goto out_push;
> @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  		 * even if the connection is already confirmed.
>  		 */
>  		nf_conntrack_confirm(skb);
> +	} else if (!skip_add) {
> +		tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
>  	}
>  
> -	tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> -
>  out_push:
>  	skb_push_rcsum(skb, nh_ofs);
>  
> -- 
> 1.8.3.1
> 

  reply	other threads:[~2020-03-03 16:51 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-03 15:57 [PATCH net-next v6 0/3] act_ct: Software offload of conntrack_in Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
2020-03-03 16:51   ` Marcelo Ricardo Leitner
2020-03-07 20:12   ` Eric Dumazet
2020-03-07 20:53     ` Eric Dumazet
2020-03-08  8:11       ` Paul Blakey
2020-03-08  8:15         ` Paul Blakey
2020-03-08 20:42           ` Eric Dumazet
2020-03-09  8:02             ` Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
2020-03-03 16:51   ` Marcelo Ricardo Leitner
2020-03-03 15:57 ` [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2020-03-03 16:51   ` Marcelo Ricardo Leitner [this message]
2020-03-03 18:32   ` Nikolay Aleksandrov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200303165125.GE2546@localhost.localdomain \
    --to=marcelo.leitner@gmail.com \
    --cc=davem@davemloft.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jiri@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=ozsh@mellanox.com \
    --cc=paulb@mellanox.com \
    --cc=roid@mellanox.com \
    --cc=saeedm@mellanox.com \
    --cc=vladbu@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).