All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
To: Paul Blakey <paulb@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>,
	Oz Shlomo <ozsh@mellanox.com>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	Vlad Buslov <vladbu@mellanox.com>,
	David Miller <davem@davemloft.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	Jiri Pirko <jiri@mellanox.com>, Roi Dayan <roid@mellanox.com>
Subject: Re: [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows
Date: Tue, 3 Mar 2020 13:51:25 -0300	[thread overview]
Message-ID: <20200303165125.GE2546@localhost.localdomain> (raw)
In-Reply-To: <1583251072-10396-4-git-send-email-paulb@mellanox.com>

On Tue, Mar 03, 2020 at 05:57:52PM +0200, Paul Blakey wrote:
> Offload nf conntrack processing by looking up the 5-tuple in the
> zone's flow table.
> 
> The nf conntrack module will process the packets until a connection is
> in established state. Once in established state, the ct state pointer
> (nf_conn) will be restored on the skb from a successful ft lookup.
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>

Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
> Changelog:
>   v5->v6:
>    Refactor tcp fin/rst check, get tcp header inside filler, and test in caller
>    Pull tcp with ports instead of two pulls
>   v4->v5:
>    Re-read ip/ip6 header after pulling as skb ptrs may change
>    Use pskb_network_may_pull instaed of pskb_may_pull
>   v1->v2:
>    Add !skip_add curly braces
>    Removed extra setting thoff again
>    Check tcp proto outside of tcf_ct_flow_table_check_tcp
> 
>  net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 150 insertions(+), 2 deletions(-)
> 
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 2ab38431..23eba61 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
>  	tcf_ct_flow_table_add(ct_ft, ct, tcp);
>  }
>  
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
> +				  struct flow_offload_tuple *tuple,
> +				  struct tcphdr **tcph)
> +{
> +	struct flow_ports *ports;
> +	unsigned int thoff;
> +	struct iphdr *iph;
> +
> +	if (!pskb_network_may_pull(skb, sizeof(*iph)))
> +		return false;
> +
> +	iph = ip_hdr(skb);
> +	thoff = iph->ihl * 4;
> +
> +	if (ip_is_fragment(iph) ||
> +	    unlikely(thoff != sizeof(struct iphdr)))
> +		return false;
> +
> +	if (iph->protocol != IPPROTO_TCP &&
> +	    iph->protocol != IPPROTO_UDP)
> +		return false;
> +
> +	if (iph->ttl <= 1)
> +		return false;
> +
> +	if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
> +					thoff + sizeof(struct tcphdr) :
> +					thoff + sizeof(*ports)))
> +		return false;
> +
> +	iph = ip_hdr(skb);
> +	if (iph->protocol == IPPROTO_TCP)
> +		*tcph = (void *)(skb_network_header(skb) + thoff);
> +
> +	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> +	tuple->src_v4.s_addr = iph->saddr;
> +	tuple->dst_v4.s_addr = iph->daddr;
> +	tuple->src_port = ports->source;
> +	tuple->dst_port = ports->dest;
> +	tuple->l3proto = AF_INET;
> +	tuple->l4proto = iph->protocol;
> +
> +	return true;
> +}
> +
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
> +				  struct flow_offload_tuple *tuple,
> +				  struct tcphdr **tcph)
> +{
> +	struct flow_ports *ports;
> +	struct ipv6hdr *ip6h;
> +	unsigned int thoff;
> +
> +	if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
> +		return false;
> +
> +	ip6h = ipv6_hdr(skb);
> +
> +	if (ip6h->nexthdr != IPPROTO_TCP &&
> +	    ip6h->nexthdr != IPPROTO_UDP)
> +		return false;
> +
> +	if (ip6h->hop_limit <= 1)
> +		return false;
> +
> +	thoff = sizeof(*ip6h);
> +	if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
> +					thoff + sizeof(struct tcphdr) :
> +					thoff + sizeof(*ports)))
> +		return false;
> +
> +	ip6h = ipv6_hdr(skb);
> +	if (ip6h->nexthdr == IPPROTO_TCP)
> +		*tcph = (void *)(skb_network_header(skb) + thoff);
> +
> +	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> +	tuple->src_v6 = ip6h->saddr;
> +	tuple->dst_v6 = ip6h->daddr;
> +	tuple->src_port = ports->source;
> +	tuple->dst_port = ports->dest;
> +	tuple->l3proto = AF_INET6;
> +	tuple->l4proto = ip6h->nexthdr;
> +
> +	return true;
> +}
> +
> +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
> +				     struct sk_buff *skb,
> +				     u8 family)
> +{
> +	struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft;
> +	struct flow_offload_tuple_rhash *tuplehash;
> +	struct flow_offload_tuple tuple = {};
> +	enum ip_conntrack_info ctinfo;
> +	struct tcphdr *tcph = NULL;
> +	struct flow_offload *flow;
> +	struct nf_conn *ct;
> +	u8 dir;
> +
> +	/* Previously seen or loopback */
> +	ct = nf_ct_get(skb, &ctinfo);
> +	if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
> +		return false;
> +
> +	switch (family) {
> +	case NFPROTO_IPV4:
> +		if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
> +			return false;
> +		break;
> +	case NFPROTO_IPV6:
> +		if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	tuplehash = flow_offload_lookup(nf_ft, &tuple);
> +	if (!tuplehash)
> +		return false;
> +
> +	dir = tuplehash->tuple.dir;
> +	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> +	ct = flow->ct;
> +
> +	if (tcph && (unlikely(tcph->fin || tcph->rst))) {
> +		flow_offload_teardown(flow);
> +		return false;
> +	}
> +
> +	ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
> +						    IP_CT_ESTABLISHED_REPLY;
> +
> +	nf_conntrack_get(&ct->ct_general);
> +	nf_ct_set(skb, ct, ctinfo);
> +
> +	return true;
> +}
> +
>  static int tcf_ct_flow_tables_init(void)
>  {
>  	return rhashtable_init(&zones_ht, &zones_params);
> @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  	struct nf_hook_state state;
>  	int nh_ofs, err, retval;
>  	struct tcf_ct_params *p;
> +	bool skip_add = false;
>  	struct nf_conn *ct;
>  	u8 family;
>  
> @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  	 */
>  	cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
>  	if (!cached) {
> +		if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
> +			skip_add = true;
> +			goto do_nat;
> +		}
> +
>  		/* Associate skb with specified zone. */
>  		if (tmpl) {
>  			ct = nf_ct_get(skb, &ctinfo);
> @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  			goto out_push;
>  	}
>  
> +do_nat:
>  	ct = nf_ct_get(skb, &ctinfo);
>  	if (!ct)
>  		goto out_push;
> @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  		 * even if the connection is already confirmed.
>  		 */
>  		nf_conntrack_confirm(skb);
> +	} else if (!skip_add) {
> +		tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
>  	}
>  
> -	tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> -
>  out_push:
>  	skb_push_rcsum(skb, nh_ofs);
>  
> -- 
> 1.8.3.1
> 

  reply	other threads:[~2020-03-03 16:51 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-03 15:57 [PATCH net-next v6 0/3] act_ct: Software offload of conntrack_in Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
2020-03-03 16:51   ` Marcelo Ricardo Leitner
2020-03-07 20:12   ` Eric Dumazet
2020-03-07 20:53     ` Eric Dumazet
2020-03-08  8:11       ` Paul Blakey
2020-03-08  8:15         ` Paul Blakey
2020-03-08 20:42           ` Eric Dumazet
2020-03-09  8:02             ` Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
2020-03-03 16:51   ` Marcelo Ricardo Leitner
2020-03-03 15:57 ` [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2020-03-03 16:51   ` Marcelo Ricardo Leitner [this message]
2020-03-03 18:32   ` Nikolay Aleksandrov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200303165125.GE2546@localhost.localdomain \
    --to=marcelo.leitner@gmail.com \
    --cc=davem@davemloft.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jiri@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=ozsh@mellanox.com \
    --cc=paulb@mellanox.com \
    --cc=roid@mellanox.com \
    --cc=saeedm@mellanox.com \
    --cc=vladbu@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.