* [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit
@ 2014-10-07 0:05 Martin KaFai Lau
2014-10-07 0:05 ` [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check Martin KaFai Lau
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-07 0:05 UTC (permalink / raw)
To: netdev; +Cc: Hannes Frederic Sowa
Notes: Same as the last two versions but fixed a title and missing
Signed-Off issue.
I am trying to understand why there is a need to restart fib6_lookup() after
getting rt with RTF_CACHE.
I have adapted davem's udpflood test
(https://urldefense.proofpoint.com/v1/url?u=https://git.kernel.org/pub/scm/linux/kernel/git/davem/net_test_tools.git&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=%2Faj1ZOQObwbmtLwlDw3XzQ%3D%3D%0A&m=j4KoKiV%2FLl4Dx6wOKiDLZPDODlbMJ5UBybTiTzIRHTM%3D%0A&s=68cac2d1d239e23b104065419b4ad89ea80bc1401571034ccce3b4a52a98a8d3) to
support IPv6 and here is the result:
#root > time ./udpflood -l 20000000 -c 250 2401:db00:face:face::2
Before:
real 0m33.224s
user 0m2.941s
sys 0m30.232s
After:
real 0m31.517s
user 0m2.938s
sys 0m28.536s
/****************************** udpflood.c ******************************/
/* It is an adaptation of the Eric Dumazet's and David Miller's
* udpflood tool, by adding IPv6 support.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdint.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#define _GNU_SOURCE
#include <getopt.h>
typedef uint32_t u32;
static int debug = 0;
/* Allow -fstrict-aliasing */
typedef union sa_u {
struct sockaddr_storage a46;
struct sockaddr_in a4;
struct sockaddr_in6 a6;
} sa_u;
static int usage(void)
{
printf("usage: udpflood [ -l count ] [ -m message_size ] [ -c num_ip_addrs ] IP_ADDRESS\n");
return -1;
}
static u32 get_last32h(const sa_u *sa)
{
if (sa->a46.ss_family == PF_INET)
return ntohl(sa->a4.sin_addr.s_addr);
else
return ntohl(sa->a6.sin6_addr.s6_addr32[3]);
}
static void set_last32h(sa_u *sa, u32 last32h)
{
if (sa->a46.ss_family == PF_INET)
sa->a4.sin_addr.s_addr = htonl(last32h);
else
sa->a6.sin6_addr.s6_addr32[3] = htonl(last32h);
}
static void print_saddr(const sa_u *sa, const char *msg)
{
char buf[64];
if (!debug)
return;
switch (sa->a46.ss_family) {
case PF_INET:
inet_ntop(PF_INET, &(sa->a4.sin_addr.s_addr), buf,
sizeof(buf));
break;
case PF_INET6:
inet_ntop(PF_INET6, &(sa->a6.sin6_addr), buf, sizeof(buf));
break;
}
printf("%s: %s\n", msg, buf);
}
static int send_packets(const sa_u *sa, size_t num_addrs, int count, int msg_sz)
{
char *msg = malloc(msg_sz);
sa_u saddr;
u32 start_addr32h, end_addr32h, cur_addr32h;
int fd, i, err;
if (!msg)
return -ENOMEM;
memset(msg, 0, msg_sz);
memcpy(&saddr, sa, sizeof(saddr));
cur_addr32h = start_addr32h = get_last32h(&saddr);
end_addr32h = start_addr32h + num_addrs;
fd = socket(saddr.a46.ss_family, SOCK_DGRAM, 0);
if (fd < 0) {
perror("socket");
err = fd;
goto out_nofd;
}
/* connect to avoid the kernel spending time in figuring
* out the source address (i.e pin the src address)
*/
err = connect(fd, (struct sockaddr *) &saddr, sizeof(saddr));
if (err < 0) {
perror("connect");
goto out;
}
print_saddr(&saddr, "start_addr");
for (i = 0; i < count; i++) {
print_saddr(&saddr, "sendto");
err = sendto(fd, msg, msg_sz, 0, (struct sockaddr *)&saddr,
sizeof(saddr));
if (err < 0) {
perror("sendto");
goto out;
}
if (++cur_addr32h >= end_addr32h)
cur_addr32h = start_addr32h;
set_last32h(&saddr, cur_addr32h);
}
err = 0;
out:
close(fd);
out_nofd:
free(msg);
return err;
}
int main(int argc, char **argv, char **envp)
{
int port, msg_sz, count, num_addrs, ret;
sa_u start_addr;
port = 6000;
msg_sz = 32;
count = 10000000;
num_addrs = 1;
while ((ret = getopt(argc, argv, "dl:s:p:c:")) >= 0) {
switch (ret) {
case 'l':
sscanf(optarg, "%d", &count);
break;
case 's':
sscanf(optarg, "%d", &msg_sz);
break;
case 'p':
sscanf(optarg, "%d", &port);
break;
case 'c':
sscanf(optarg, "%d", &num_addrs);
break;
case 'd':
debug = 1;
break;
case '?':
return usage();
}
}
if (num_addrs < 1)
return usage();
if (!argv[optind])
return usage();
start_addr.a4.sin_port = htons(port);
if (inet_pton(PF_INET, argv[optind], &start_addr.a4.sin_addr))
start_addr.a46.ss_family = PF_INET;
else if (inet_pton(PF_INET6, argv[optind], &start_addr.a6.sin6_addr.s6_addr))
start_addr.a46.ss_family = PF_INET6;
else
return usage();
return send_packets(&start_addr, num_addrs, count, msg_sz);
}
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check
2014-10-07 0:05 [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Martin KaFai Lau
@ 2014-10-07 0:05 ` Martin KaFai Lau
2014-10-08 19:08 ` David Miller
2014-10-07 0:05 ` [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
2014-10-07 0:10 ` [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Hannes Frederic Sowa
2 siblings, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-07 0:05 UTC (permalink / raw)
To: netdev; +Cc: Hannes Frederic Sowa
The above BACKTRACK have already caught the rt == net->ipv6.ip6_null_entry case
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
net/ipv6/route.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bafde82..d53dc4f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -936,8 +936,7 @@ restart:
if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
BACKTRACK(net, &fl6->saddr);
- if (rt == net->ipv6.ip6_null_entry ||
- rt->rt6i_flags & RTF_CACHE)
+ if (rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->dst);
--
1.8.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check
2014-10-07 0:05 ` [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check Martin KaFai Lau
@ 2014-10-08 19:08 ` David Miller
0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2014-10-08 19:08 UTC (permalink / raw)
To: kafai; +Cc: netdev, hannes
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 6 Oct 2014 17:05:14 -0700
> The above BACKTRACK have already caught the rt == net->ipv6.ip6_null_entry case
>
> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
...
> @@ -936,8 +936,7 @@ restart:
> if (rt->rt6i_nsiblings)
> rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
> BACKTRACK(net, &fl6->saddr);
> - if (rt == net->ipv6.ip6_null_entry ||
> - rt->rt6i_flags & RTF_CACHE)
> + if (rt->rt6i_flags & RTF_CACHE)
> goto out;
>
> dst_hold(&rt->dst);
I think this is sort of going in the wrong direction.
The BACKTRACK() macro hides a lot of side effects inside of it's
implementation, and worst of all it hides a change of control flow
with it's "goto out;" and "goto restart;"
I'd rather see us clean this up in some way that someone auditing this
code won't be tricked into missing the control flow side effects, than
adding more dependencies upon BACKTRACK()'s implementation.
Thanks.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case
2014-10-07 0:05 [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Martin KaFai Lau
2014-10-07 0:05 ` [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check Martin KaFai Lau
@ 2014-10-07 0:05 ` Martin KaFai Lau
2014-10-08 19:32 ` David Miller
2014-10-07 0:10 ` [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Hannes Frederic Sowa
2 siblings, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-07 0:05 UTC (permalink / raw)
To: netdev; +Cc: Hannes Frederic Sowa
When there is a RTF_CACHE hit, no need to redo fib6_lookup()
with reachable=0.
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
net/ipv6/route.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d53dc4f..e40b5dc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -937,7 +937,7 @@ restart:
rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
BACKTRACK(net, &fl6->saddr);
if (rt->rt6i_flags & RTF_CACHE)
- goto out;
+ goto out1;
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
@@ -974,6 +974,7 @@ out:
reachable = 0;
goto restart_2;
}
+out1:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
out2:
--
1.8.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case
2014-10-07 0:05 ` [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
@ 2014-10-08 19:32 ` David Miller
2014-10-09 4:19 ` Martin Lau
0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2014-10-08 19:32 UTC (permalink / raw)
To: kafai; +Cc: netdev, hannes
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 6 Oct 2014 17:05:15 -0700
> When there is a RTF_CACHE hit, no need to redo fib6_lookup()
> with reachable=0.
>
> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Ok this looks fine, and I can see how there is a dependency with patch
#1.
But I also want to point out how this change show my point about
BACKTRACK() even more. Read this function after this patch is
applied and someone auditing might say "oh, 'out' label is now
unused, we can remove it"
Again, hidden control flow is really bad, and we've had very serious
bugs in the past because of it (which we've fixed by ditching the
side effect causing macros in favor of properly designed inline
functions).
Trying to be constructive, why don't we go in a direction like
the following?
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a318dd89..99612c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -772,6 +772,26 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
#endif
+static struct fib6_node *rt6_backtrack(struct net *net, struct rt6_info *rt, struct fib6_node *fn, const struct in6_addr *saddr)
+{
+ if (rt == net->ipv6.ip6_null_entry) {
+ struct fib6_node *pn;
+
+ while (1) {
+ if (fn->fn_flags & RTN_TL_ROOT)
+ break;
+ pn = fn->parent;
+ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
+ fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+ else
+ fn = pn;
+ if (fn->fn_flags & RTN_RTINFO)
+ break;
+ }
+ }
+ return fn;
+}
+
#define BACKTRACK(__net, saddr) \
do { \
if (rt == __net->ipv6.ip6_null_entry) { \
@@ -934,10 +954,15 @@ restart:
rt = rt6_select(fn, oif, strict | reachable);
if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
- BACKTRACK(net, &fl6->saddr);
- if (rt == net->ipv6.ip6_null_entry ||
- rt->rt6i_flags & RTF_CACHE)
- goto out;
+ fn = rt6_backtrack(net, rt, fn, &fl6->saddr);
+ if (rt == net->ipv6.ip6_null_entry) {
+ if (fn->fn_flags & RTN_TL_ROOT)
+ goto out;
+ if (fn->fn_flags & RTN_RTINFO)
+ goto restart;
+ }
+ if (rt->rt6i_flags & RTF_CACHE)
+ goto out1;
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
@@ -974,6 +999,7 @@ out:
reachable = 0;
goto restart_2;
}
+out1:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
out2:
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case
2014-10-08 19:32 ` David Miller
@ 2014-10-09 4:19 ` Martin Lau
0 siblings, 0 replies; 7+ messages in thread
From: Martin Lau @ 2014-10-09 4:19 UTC (permalink / raw)
To: David Miller; +Cc: netdev, hannes
Hi,
> Ok this looks fine, and I can see how there is a dependency with patch
> #1.
>
> But I also want to point out how this change show my point about
> BACKTRACK() even more. Read this function after this patch is
> applied and someone auditing might say "oh, 'out' label is now
> unused, we can remove it"
>
> Again, hidden control flow is really bad, and we've had very serious
> bugs in the past because of it (which we've fixed by ditching the
> side effect causing macros in favor of properly designed inline
> functions).
Agree.
> Trying to be constructive, why don't we go in a direction like
> the following?
Looks good. There is another place calling BACKTRACK. Similar change
also needs to happen there or some more re-factoring is needed.
I also have another idea to further reduce the number of fib6_lookup() in
the CONFIG_IPV6_MULTIPLE_TABLES case. I will give it another attempt together.
Thanks,
Martin
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index a318dd89..99612c5 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -772,6 +772,26 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
> }
> #endif
>
> +static struct fib6_node *rt6_backtrack(struct net *net, struct rt6_info *rt, struct fib6_node *fn, const struct in6_addr *saddr)
> +{
> + if (rt == net->ipv6.ip6_null_entry) {
> + struct fib6_node *pn;
> +
> + while (1) {
> + if (fn->fn_flags & RTN_TL_ROOT)
> + break;
> + pn = fn->parent;
> + if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
> + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
> + else
> + fn = pn;
> + if (fn->fn_flags & RTN_RTINFO)
> + break;
> + }
> + }
> + return fn;
> +}
> +
> #define BACKTRACK(__net, saddr) \
> do { \
> if (rt == __net->ipv6.ip6_null_entry) { \
> @@ -934,10 +954,15 @@ restart:
> rt = rt6_select(fn, oif, strict | reachable);
> if (rt->rt6i_nsiblings)
> rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
> - BACKTRACK(net, &fl6->saddr);
> - if (rt == net->ipv6.ip6_null_entry ||
> - rt->rt6i_flags & RTF_CACHE)
> - goto out;
> + fn = rt6_backtrack(net, rt, fn, &fl6->saddr);
> + if (rt == net->ipv6.ip6_null_entry) {
> + if (fn->fn_flags & RTN_TL_ROOT)
> + goto out;
> + if (fn->fn_flags & RTN_RTINFO)
> + goto restart;
> + }
> + if (rt->rt6i_flags & RTF_CACHE)
> + goto out1;
>
> dst_hold(&rt->dst);
> read_unlock_bh(&table->tb6_lock);
> @@ -974,6 +999,7 @@ out:
> reachable = 0;
> goto restart_2;
> }
> +out1:
> dst_hold(&rt->dst);
> read_unlock_bh(&table->tb6_lock);
> out2:
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit
2014-10-07 0:05 [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Martin KaFai Lau
2014-10-07 0:05 ` [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check Martin KaFai Lau
2014-10-07 0:05 ` [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
@ 2014-10-07 0:10 ` Hannes Frederic Sowa
2 siblings, 0 replies; 7+ messages in thread
From: Hannes Frederic Sowa @ 2014-10-07 0:10 UTC (permalink / raw)
To: Martin KaFai Lau, netdev
On Tue, Oct 7, 2014, at 02:05, Martin KaFai Lau wrote:
> Notes: Same as the last two versions but fixed a title and missing
> Signed-Off issue.
>
> I am trying to understand why there is a need to restart fib6_lookup()
> after
> getting rt with RTF_CACHE.
>
> I have adapted davem's udpflood test
> (https://urldefense.proofpoint.com/v1/url?u=https://git.kernel.org/pub/scm/linux/kernel/git/davem/net_test_tools.git&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=%2Faj1ZOQObwbmtLwlDw3XzQ%3D%3D%0A&m=j4KoKiV%2FLl4Dx6wOKiDLZPDODlbMJ5UBybTiTzIRHTM%3D%0A&s=68cac2d1d239e23b104065419b4ad89ea80bc1401571034ccce3b4a52a98a8d3)
> to
> support IPv6 and here is the result:
>
> #root > time ./udpflood -l 20000000 -c 250 2401:db00:face:face::2
>
> Before:
> real 0m33.224s
> user 0m2.941s
> sys 0m30.232s
>
> After:
> real 0m31.517s
> user 0m2.938s
> sys 0m28.536s
Thanks, Martin! I'll review them ASAP.
Bye,
Hannes
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2014-10-09 4:20 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-07 0:05 [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Martin KaFai Lau
2014-10-07 0:05 ` [PATCH RFC v3 net 1/2] ipv6: Remove the net->ipv6.ip6_null_entry check Martin KaFai Lau
2014-10-08 19:08 ` David Miller
2014-10-07 0:05 ` [PATCH RFC v3 net 2/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
2014-10-08 19:32 ` David Miller
2014-10-09 4:19 ` Martin Lau
2014-10-07 0:10 ` [PATCH RFC v3 net 0/2] ipv6: Avoid restarting fib6_lookup() for RTF_CACHE hit Hannes Frederic Sowa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).