From: Paolo Abeni <pabeni@redhat.com>
To: Roopa Prabhu <roopa@cumulusnetworks.com>,
davem@davemloft.net, netdev@vger.kernel.org
Cc: dsa@cumulusnetworks.com, nikolay@cumulusnetworks.com,
idosch@mellanox.com
Subject: Re: [PATCH net-next v2 4/5] ipv4: route: dissect flow in input path if fib rules need it
Date: Wed, 28 Feb 2018 10:59:55 +0100 [thread overview]
Message-ID: <1519811995.2595.15.camel@redhat.com> (raw)
In-Reply-To: <1519789965-3465-5-git-send-email-roopa@cumulusnetworks.com>
On Tue, 2018-02-27 at 19:52 -0800, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> Dissect flow in fwd path if fib rules require it. Controlled by
> a flag to avoid penatly for the common case. Flag is set when fib
> rules with sport, dport and proto match that require flow dissect
> are installed. Also passes the dissected hash keys to the multipath
> hash function when applicable to avoid dissecting the flow again.
> icmp packets will continue to use inner header for hash
> calculations (Thanks to Nikolay Aleksandrov for some review here).
>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
> include/net/ip_fib.h | 27 ++++++++++++++++++++++++++-
> include/net/netns/ipv4.h | 1 +
> net/ipv4/fib_rules.c | 8 ++++++++
> net/ipv4/fib_semantics.c | 2 +-
> net/ipv4/route.c | 43 +++++++++++++++++++++++++++++--------------
> 5 files changed, 65 insertions(+), 16 deletions(-)
>
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index f805243..9c4f52c 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
> return 0;
> }
>
> +static inline bool fib4_rules_early_flow_dissect(struct net *net,
> + struct sk_buff *skb,
> + struct flowi4 *fl4,
> + struct flow_keys *flkeys)
> +{
> + return false;
> +}
> #else /* CONFIG_IP_MULTIPLE_TABLES */
> int __net_init fib4_rules_init(struct net *net);
> void __net_exit fib4_rules_exit(struct net *net);
> @@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
> int fib4_rules_dump(struct net *net, struct notifier_block *nb);
> unsigned int fib4_rules_seq_read(struct net *net);
>
> +static inline bool fib4_rules_early_flow_dissect(struct net *net,
> + struct sk_buff *skb,
> + struct flowi4 *fl4,
> + struct flow_keys *flkeys)
> +{
> + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
> +
> + if (!net->ipv4.fib_rules_require_fldissect)
> + return false;
> +
> + skb_flow_dissect_flow_keys(skb, flkeys, flag);
> + fl4->fl4_sport = flkeys->ports.src;
> + fl4->fl4_dport = flkeys->ports.dst;
> + fl4->flowi4_proto = flkeys->basic.ip_proto;
> +
> + return true;
> +}
> +
> #endif /* CONFIG_IP_MULTIPLE_TABLES */
>
> /* Exported by fib_frontend.c */
> @@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
>
> #ifdef CONFIG_IP_ROUTE_MULTIPATH
> int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
> - const struct sk_buff *skb);
> + const struct sk_buff *skb, struct flow_keys *flkeys);
> #endif
> void fib_select_multipath(struct fib_result *res, int hash);
> void fib_select_path(struct net *net, struct fib_result *res,
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 44668c2..3a970e4 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -52,6 +52,7 @@ struct netns_ipv4 {
> #ifdef CONFIG_IP_MULTIPLE_TABLES
> struct fib_rules_ops *rules_ops;
> bool fib_has_custom_rules;
> + unsigned int fib_rules_require_fldissect;
> struct fib_table __rcu *fib_main;
> struct fib_table __rcu *fib_default;
> #endif
> diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
> index 16083b8..737d11b 100644
> --- a/net/ipv4/fib_rules.c
> +++ b/net/ipv4/fib_rules.c
> @@ -255,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
> }
> #endif
>
> + if (fib_rule_requires_fldissect(rule))
> + net->ipv4.fib_rules_require_fldissect++;
> +
> rule4->src_len = frh->src_len;
> rule4->srcmask = inet_make_mask(rule4->src_len);
> rule4->dst_len = frh->dst_len;
> @@ -283,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
> net->ipv4.fib_num_tclassid_users--;
> #endif
> net->ipv4.fib_has_custom_rules = true;
> +
> + if (net->ipv4.fib_rules_require_fldissect &&
> + fib_rule_requires_fldissect(rule))
> + net->ipv4.fib_rules_require_fldissect--;
> errout:
> return err;
> }
> @@ -400,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
> goto fail;
> net->ipv4.rules_ops = ops;
> net->ipv4.fib_has_custom_rules = false;
> + net->ipv4.fib_rules_require_fldissect = 0;
> return 0;
>
> fail:
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index cd46d76..b0c398a 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
>
> #ifdef CONFIG_IP_ROUTE_MULTIPATH
> if (res->fi->fib_nhs > 1) {
> - int h = fib_multipath_hash(res->fi, fl4, skb);
> + int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
>
> fib_select_multipath(res, h);
> }
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 26eefa2..3bb686d 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
>
> /* if skb is set it will be used and fl4 can be NULL */
> int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
> - const struct sk_buff *skb)
> + const struct sk_buff *skb, struct flow_keys *flkeys)
> {
> struct net *net = fi->fib_net;
> struct flow_keys hash_keys;
> @@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
> if (skb->l4_hash)
> return skb_get_hash_raw(skb) >> 1;
> memset(&hash_keys, 0, sizeof(hash_keys));
> - skb_flow_dissect_flow_keys(skb, &keys, flag);
>
> - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
> - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
> - hash_keys.ports.src = keys.ports.src;
> - hash_keys.ports.dst = keys.ports.dst;
> - hash_keys.basic.ip_proto = keys.basic.ip_proto;
> + if (flkeys) {
> + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> + hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
> + hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
> + hash_keys.ports.src = flkeys->ports.src;
> + hash_keys.ports.dst = flkeys->ports.dst;
> + hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
> + } else {
> + skb_flow_dissect_flow_keys(skb, &keys, flag);
> + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
> + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
> + hash_keys.ports.src = keys.ports.src;
> + hash_keys.ports.dst = keys.ports.dst;
> + hash_keys.basic.ip_proto = keys.basic.ip_proto;
> + }
> } else {
> memset(&hash_keys, 0, sizeof(hash_keys));
> hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> @@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
> static int ip_mkroute_input(struct sk_buff *skb,
> struct fib_result *res,
> struct in_device *in_dev,
> - __be32 daddr, __be32 saddr, u32 tos)
> + __be32 daddr, __be32 saddr, u32 tos,
> + struct flow_keys *hkeys)
> {
> #ifdef CONFIG_IP_ROUTE_MULTIPATH
> if (res->fi && res->fi->fib_nhs > 1) {
> - int h = fib_multipath_hash(res->fi, NULL, skb);
> + int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
>
> fib_select_multipath(res, h);
> }
> @@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> struct fib_result *res)
> {
> struct in_device *in_dev = __in_dev_get_rcu(dev);
> + struct flow_keys *flkeys = NULL, _flkeys;
> + struct net *net = dev_net(dev);
> struct ip_tunnel_info *tun_info;
> - struct flowi4 fl4;
> + int err = -EINVAL;
> unsigned int flags = 0;
> u32 itag = 0;
> struct rtable *rth;
> - int err = -EINVAL;
> - struct net *net = dev_net(dev);
> + struct flowi4 fl4;
> bool do_cache;
>
> /* IP on this device is disabled. */
> @@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> fl4.daddr = daddr;
> fl4.saddr = saddr;
> fl4.flowi4_uid = sock_net_uid(net, NULL);
> +
> + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
> + flkeys = &_flkeys;
> +
> err = fib_lookup(net, &fl4, res, 0);
> if (err != 0) {
> if (!IN_DEV_FORWARD(in_dev))
> @@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> if (res->type != RTN_UNICAST)
> goto martian_destination;
>
> - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
> + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
> out: return err;
>
> brd_input:
LGTM
Acked-by: Paolo Abeni <pabeni@redhat.com>
next prev parent reply other threads:[~2018-02-28 9:59 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-28 3:52 [PATCH net-next v2 0/5] fib_rules: support sport, dport and proto match Roopa Prabhu
2018-02-28 3:52 ` [PATCH net-next v2 1/5] net: fib_rules: support for match on ip_proto, sport and dport Roopa Prabhu
2018-02-28 13:08 ` Nikolay Aleksandrov
2018-03-01 22:48 ` Eric Dumazet
2018-03-01 23:08 ` Roopa Prabhu
2018-02-28 3:52 ` [PATCH net-next v2 2/5] ipv4: fib_rules: support match on sport, dport and ip proto Roopa Prabhu
2018-02-28 13:09 ` Nikolay Aleksandrov
2018-02-28 3:52 ` [PATCH net-next v2 3/5] ipv6: fib6_rules: support for " Roopa Prabhu
2018-02-28 13:10 ` Nikolay Aleksandrov
2018-02-28 3:52 ` [PATCH net-next v2 4/5] ipv4: route: dissect flow in input path if fib rules need it Roopa Prabhu
2018-02-28 9:59 ` Paolo Abeni [this message]
2018-02-28 13:13 ` Nikolay Aleksandrov
2018-02-28 3:52 ` [PATCH net-next v2 5/5] ipv6: " Roopa Prabhu
2018-02-28 10:05 ` Paolo Abeni
2018-02-28 13:20 ` Nikolay Aleksandrov
2018-02-28 16:59 ` [PATCH net-next v2 0/5] fib_rules: support sport, dport and proto match David Miller
2018-02-28 18:11 ` David Ahern
2018-02-28 19:30 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1519811995.2595.15.camel@redhat.com \
--to=pabeni@redhat.com \
--cc=davem@davemloft.net \
--cc=dsa@cumulusnetworks.com \
--cc=idosch@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=nikolay@cumulusnetworks.com \
--cc=roopa@cumulusnetworks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.