netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nambiar, Amritha" <amritha.nambiar@intel.com>
To: Jiri Pirko <jiri@resnulli.us>
Cc: netdev@vger.kernel.org, davem@davemloft.net,
	jakub.kicinski@netronome.com, sridhar.samudrala@intel.com,
	jhs@mojatatu.com, xiyou.wangcong@gmail.com
Subject: Re: [net-next PATCH] net: sched: cls_flower: Classify packets using port ranges
Date: Thu, 18 Oct 2018 11:24:44 -0700	[thread overview]
Message-ID: <28d1bd21-fd85-07e5-c6e6-18e7a0b19da4@intel.com> (raw)
In-Reply-To: <20181018121727.GA4558@nanopsycho.orion>

On 10/18/2018 5:17 AM, Jiri Pirko wrote:
> Fri, Oct 12, 2018 at 03:53:30PM CEST, amritha.nambiar@intel.com wrote:
>> Added support in tc flower for filtering based on port ranges.
>> This is a rework of the RFC patch at:
>> https://patchwork.ozlabs.org/patch/969595/
>>
>> Example:
>> 1. Match on a port range:
>> -------------------------
>> $ tc filter add dev enp4s0 protocol ip parent ffff:\
>>  prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\
>>  action drop
>>
>> $ tc -s filter show dev enp4s0 parent ffff:
>> filter protocol ip pref 1 flower chain 0
>> filter protocol ip pref 1 flower chain 0 handle 0x1
>>  eth_type ipv4
>>  ip_proto tcp
>>  dst_port_min 20
>>  dst_port_max 30
>>  skip_hw
>>  not_in_hw
>>        action order 1: gact action drop
>>         random type none pass val 0
>>         index 1 ref 1 bind 1 installed 181 sec used 5 sec
>>        Action statistics:
>>        Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0)
>>        backlog 0b 0p requeues 0
>>
>> 2. Match on IP address and port range:
>> --------------------------------------
>> $ tc filter add dev enp4s0 protocol ip parent ffff:\
>>  prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\
>>  skip_hw action drop
>>
>> $ tc -s filter show dev enp4s0 parent ffff:
>> filter protocol ip pref 1 flower chain 0 handle 0x2
>>  eth_type ipv4
>>  ip_proto tcp
>>  dst_ip 192.168.1.1
>>  dst_port_min 100
>>  dst_port_max 200
>>  skip_hw
>>  not_in_hw
>>        action order 1: gact action drop
>>         random type none pass val 0
>>         index 2 ref 1 bind 1 installed 28 sec used 6 sec
>>        Action statistics:
>>        Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0)
>>        backlog 0b 0p requeues 0
>>
>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>> ---
>> include/uapi/linux/pkt_cls.h |    5 ++
>> net/sched/cls_flower.c       |  134 ++++++++++++++++++++++++++++++++++++++++--
>> 2 files changed, 132 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
>> index 401d0c1..b569308 100644
>> --- a/include/uapi/linux/pkt_cls.h
>> +++ b/include/uapi/linux/pkt_cls.h
>> @@ -405,6 +405,11 @@ enum {
>> 	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
>> 	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
>>
>> +	TCA_FLOWER_KEY_PORT_SRC_MIN,	/* be16 */
>> +	TCA_FLOWER_KEY_PORT_SRC_MAX,	/* be16 */
>> +	TCA_FLOWER_KEY_PORT_DST_MIN,	/* be16 */
>> +	TCA_FLOWER_KEY_PORT_DST_MAX,	/* be16 */
>> +
>> 	TCA_FLOWER_FLAGS,
>> 	TCA_FLOWER_KEY_VLAN_ID,		/* be16 */
>> 	TCA_FLOWER_KEY_VLAN_PRIO,	/* u8   */
>> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
>> index 9aada2d..5f135f0 100644
>> --- a/net/sched/cls_flower.c
>> +++ b/net/sched/cls_flower.c
>> @@ -55,6 +55,9 @@ struct fl_flow_key {
>> 	struct flow_dissector_key_ip ip;
>> 	struct flow_dissector_key_ip enc_ip;
>> 	struct flow_dissector_key_enc_opts enc_opts;
>> +
>> +	struct flow_dissector_key_ports tp_min;
>> +	struct flow_dissector_key_ports tp_max;
>> } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
>>
>> struct fl_flow_mask_range {
>> @@ -103,6 +106,11 @@ struct cls_fl_filter {
>> 	struct net_device *hw_dev;
>> };
>>
>> +enum fl_endpoint {
>> +	FLOWER_ENDPOINT_DST,
>> +	FLOWER_ENDPOINT_SRC
>> +};
>> +
>> static const struct rhashtable_params mask_ht_params = {
>> 	.key_offset = offsetof(struct fl_flow_mask, key),
>> 	.key_len = sizeof(struct fl_flow_key),
>> @@ -179,11 +187,86 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
>> 	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
>> }
>>
>> +static int fl_range_compare_params(struct cls_fl_filter *filter,
>> +				   struct fl_flow_key *key,
>> +				   struct fl_flow_key *mkey,
>> +				   enum fl_endpoint endpoint)
>> +{
>> +	__be16 min_mask, max_mask, min_val, max_val;
>> +
>> +	if (endpoint == FLOWER_ENDPOINT_DST) {
>> +		min_mask = htons(filter->mask->key.tp_min.dst);
>> +		max_mask = htons(filter->mask->key.tp_max.dst);
>> +		min_val = htons(filter->key.tp_min.dst);
>> +		max_val = htons(filter->key.tp_max.dst);
>> +
>> +		if (min_mask && max_mask) {
>> +			if (htons(key->tp.dst) < min_val ||
>> +			    htons(key->tp.dst) > max_val)
>> +				return -1;
>> +
>> +			/* skb does not have min and max values */
>> +			mkey->tp_min.dst = filter->mkey.tp_min.dst;
>> +			mkey->tp_max.dst = filter->mkey.tp_max.dst;
>> +		}
>> +	} else {
>> +		min_mask = htons(filter->mask->key.tp_min.src);
>> +		max_mask = htons(filter->mask->key.tp_max.src);
>> +		min_val = htons(filter->key.tp_min.src);
>> +		max_val = htons(filter->key.tp_max.src);
>> +
>> +		if (min_mask && max_mask) {
>> +			if (htons(key->tp.src) < min_val ||
>> +			    htons(key->tp.src) > max_val)
>> +				return -1;
>> +
>> +			/* skb does not have min and max values */
>> +			mkey->tp_min.src = filter->mkey.tp_min.src;
>> +			mkey->tp_max.src = filter->mkey.tp_max.src;
>> +		}
> 
> You basically have 2 functions in 1 here. Just have 2 functions:
> fl_port_range_dst_cmp()
> and
> fl_port_range_src_cmp()
> 
> And avoid the "endpoint enum.
> Also, as you return -1 or 0, just make it bool.
> 

Makes sense. Will do.

> 
>> +	}
>> +	return 0;
>> +}
>> +
>> +static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
>> +					     struct fl_flow_key *mkey,
>> +					     struct fl_flow_key *key)
>> +{
>> +	struct cls_fl_filter *filter, *f;
>> +	int ret;
>> +
>> +	list_for_each_entry_rcu(filter, &mask->filters, list) {
>> +		ret = fl_range_compare_params(filter, key, mkey,
>> +					      FLOWER_ENDPOINT_DST);
>> +		if (ret < 0)
>> +			continue;
>> +
>> +		ret = fl_range_compare_params(filter, key, mkey,
>> +					      FLOWER_ENDPOINT_SRC);
>> +		if (ret < 0)
>> +			continue;
>> +
>> +		f = rhashtable_lookup_fast(&mask->ht,
>> +					   fl_key_get_start(mkey, mask),
>> +					   mask->filter_ht_params);
>> +		if (f)
>> +			return f;
>> +	}
>> +	return NULL;
>> +}
>> +
>> static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
>> -				       struct fl_flow_key *mkey)
>> +				       struct fl_flow_key *mkey,
>> +				       struct fl_flow_key *key, bool is_skb)
>> {
>> -	return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
>> -				      mask->filter_ht_params);
>> +	if ((!(mask->key.tp_min.dst && mask->key.tp_max.dst) &&
>> +	     !(mask->key.tp_min.src && mask->key.tp_max.src)) || !is_skb) {
> 
> Would be probably good to have a dedicated bit to check for and decide
> if you do normal/range lookup. This is fast path. 
> 

Will fix in v2.

> 
>> +		return  rhashtable_lookup_fast(&mask->ht,
> 
> Remove double space   ^^
> 

Will fix in v2.

> 
>> +					       fl_key_get_start(mkey, mask),
>> +					       mask->filter_ht_params);
>> +	}
>> +	/* Classify based on range */
>> +	return fl_lookup_range(mask, mkey, key);
>> }
>>
>> static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>> @@ -207,8 +290,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>> 		skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
>>
>> 		fl_set_masked_key(&skb_mkey, &skb_key, mask);
>> +		f = fl_lookup(mask, &skb_mkey, &skb_key, true);
>>
>> -		f = fl_lookup(mask, &skb_mkey);
>> 		if (f && !tc_skip_sw(f->flags)) {
>> 			*res = f->res;
>> 			return tcf_exts_exec(skb, &f->exts, res);
>> @@ -909,6 +992,23 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
>> 			       sizeof(key->arp.tha));
>> 	}
>>
>> +	if (key->basic.ip_proto == IPPROTO_TCP ||
>> +	    key->basic.ip_proto == IPPROTO_UDP ||
>> +	    key->basic.ip_proto == IPPROTO_SCTP) {
>> +		fl_set_key_val(tb, &key->tp_min.dst,
>> +			       TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
>> +			       TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
>> +		fl_set_key_val(tb, &key->tp_max.dst,
>> +			       TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
>> +			       TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
>> +		fl_set_key_val(tb, &key->tp_min.src,
>> +			       TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
>> +			       TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
>> +		fl_set_key_val(tb, &key->tp_max.src,
>> +			       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
>> +			       TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
>> +	}
>> +
>> 	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
>> 	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
>> 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
>> @@ -1026,8 +1126,7 @@ static void fl_init_dissector(struct flow_dissector *dissector,
>> 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
>> 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>> 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
>> -	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>> -			     FLOW_DISSECTOR_KEY_PORTS, tp);
>> +	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
>> 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>> 			     FLOW_DISSECTOR_KEY_IP, ip);
>> 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>> @@ -1227,7 +1326,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>> 		goto errout_idr;
>>
>> 	if (!tc_skip_sw(fnew->flags)) {
>> -		if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
>> +		if (!fold && fl_lookup(fnew->mask, &fnew->mkey, NULL, false)) {
> 
> 
> I don't undestand why do you need the "is_skb" arg here. Could you
> please explain?
> 
> Thanks!
> 

The reason to keep the 'is_skb' arg is because, fl_lookup is called in
two cases, one for skb classification and another for checking if a
filter exists every-time a new filter is added. In case of skb
classification, we need to go through the range-comparator to decide if
the skb's port-value falls within the range-filter's min and max limits.
In case of filter validation, the range-filter that we are trying to add
will have min and max values, and we are validating it against other
range-filters with min and max values. So, rhashtable lookup will
suffice here and there is no need to go through the range-comparator in
this case. In the above code, we are validating if a range-filter
exists, so 'is_skb' is false.

> 
>> 			err = -EEXIST;
>> 			goto errout_mask;
>> 		}
>> @@ -1800,6 +1899,27 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
>> 				  sizeof(key->arp.tha))))
>> 		goto nla_put_failure;
>>
>> +	if ((key->basic.ip_proto == IPPROTO_TCP ||
>> +	     key->basic.ip_proto == IPPROTO_UDP ||
>> +	     key->basic.ip_proto == IPPROTO_SCTP) &&
>> +	     (fl_dump_key_val(skb, &key->tp_min.dst,
>> +			      TCA_FLOWER_KEY_PORT_DST_MIN,
>> +			      &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
>> +			      sizeof(key->tp_min.dst)) ||
>> +	      fl_dump_key_val(skb, &key->tp_max.dst,
>> +			      TCA_FLOWER_KEY_PORT_DST_MAX,
>> +			      &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
>> +			      sizeof(key->tp_max.dst)) ||
>> +	      fl_dump_key_val(skb, &key->tp_min.src,
>> +			      TCA_FLOWER_KEY_PORT_SRC_MIN,
>> +			      &mask->tp_min.src, TCA_FLOWER_UNSPEC,
>> +			      sizeof(key->tp_min.src)) ||
>> +	      fl_dump_key_val(skb, &key->tp_max.src,
>> +			      TCA_FLOWER_KEY_PORT_SRC_MAX,
>> +			      &mask->tp_max.src, TCA_FLOWER_UNSPEC,
>> +			      sizeof(key->tp_max.src))))
>> +		goto nla_put_failure;
>> +
>> 	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
>> 	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
>> 			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
>>

  reply	other threads:[~2018-10-19  2:26 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-12 13:53 [net-next PATCH] net: sched: cls_flower: Classify packets using port ranges Amritha Nambiar
2018-10-18  4:42 ` David Miller
2018-10-18  5:41   ` Cong Wang
2018-10-18  7:22     ` Nambiar, Amritha
2018-10-18 12:17 ` Jiri Pirko
2018-10-18 18:24   ` Nambiar, Amritha [this message]
2018-10-19  8:52     ` Jiri Pirko
2018-11-08  1:52       ` Nambiar, Amritha

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=28d1bd21-fd85-07e5-c6e6-18e7a0b19da4@intel.com \
    --to=amritha.nambiar@intel.com \
    --cc=davem@davemloft.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=sridhar.samudrala@intel.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).