* [PATCH v2 bpf-next 2/2] selftests/bpf: Wait for the netstamp_needed_key static key to be turned on
2024-01-20 6:05 [PATCH v2 bpf-next 1/2] selftests/bpf: Fix the flaky tc_redirect_dtime test Martin KaFai Lau
@ 2024-01-20 6:05 ` Martin KaFai Lau
2024-01-23 5:02 ` [PATCH v2 bpf-next 1/2] selftests/bpf: Fix the flaky tc_redirect_dtime test Yonghong Song
2024-01-24 18:20 ` patchwork-bot+netdevbpf
2 siblings, 0 replies; 4+ messages in thread
From: Martin KaFai Lau @ 2024-01-20 6:05 UTC (permalink / raw)
To: bpf; +Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, kernel-team
From: Martin KaFai Lau <martin.lau@kernel.org>
After the previous patch that speeded up the test (by avoiding neigh
discovery in IPv6), the BPF CI occasionally hits this error:
rcv tstamp unexpected pkt rcv tstamp: actual 0 == expected 0
The test complains about the cmsg returned from the recvmsg() does not
have the rcv timestamp. Setting skb->tstamp or not is
controlled by a kernel static key "netstamp_needed_key". The static
key is enabled whenever this is at least one sk with the SOCK_TIMESTAMP
set.
The test_redirect_dtime does use setsockopt() to turn on
the SOCK_TIMESTAMP for the reading sk. In the kernel
net_enable_timestamp() has a delay to enable the "netstamp_needed_key"
when CONFIG_JUMP_LABEL is set. This potential delay is the likely reason
for packet missing rcv timestamp occasionally.
This patch is to create udp sockets with SOCK_TIMESTAMP set.
It sends and receives some packets until the received packet
has a rcv timestamp. It currently retries at most 5 times with 1s
in between. This should be enough to wait for the "netstamp_needed_key".
It then holds on to the socket and only closes it at the end of the test.
This guarantees that the test has the "netstamp_needed_key" key turned
on from the beginning.
To simplify the udp sockets setup, they are sending/receiving packets
in the same netns (ns_dst is used) and communicate over the "lo" dev.
Hence, the patch enables the "lo" dev in the ns_dst.
Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh")
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
v2: It is a new patch in v2 to address occasionally missing
skb->tstamp in the recvmesg().
.../selftests/bpf/prog_tests/tc_redirect.c | 79 ++++++++++++++++++-
1 file changed, 75 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 610887157fd8..dbe06aeaa2b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -291,6 +291,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
SYS(fail, "ip link set dev dst up");
+ SYS(fail, "ip link set dev lo up");
SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
@@ -468,7 +469,7 @@ static int set_forwarding(bool enable)
return 0;
}
-static void rcv_tstamp(int fd, const char *expected, size_t s)
+static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
{
struct __kernel_timespec pkt_ts = {};
char ctl[CMSG_SPACE(sizeof(pkt_ts))];
@@ -489,7 +490,7 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
ret = recvmsg(fd, &msg, 0);
if (!ASSERT_EQ(ret, s, "recvmsg"))
- return;
+ return -1;
ASSERT_STRNEQ(data, expected, s, "expected rcv data");
cmsg = CMSG_FIRSTHDR(&msg);
@@ -498,6 +499,12 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
+ if (tstamp) {
+ /* caller will check the tstamp itself */
+ *tstamp = pkt_ns;
+ return 0;
+ }
+
ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
ret = clock_gettime(CLOCK_REALTIME, &now_ts);
@@ -507,6 +514,60 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
"check rcv tstamp");
+ return 0;
+}
+
+static void rcv_tstamp(int fd, const char *expected, size_t s)
+{
+ __rcv_tstamp(fd, expected, s, NULL);
+}
+
+static int wait_netstamp_needed_key(void)
+{
+ int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+ __u64 tstamp = 0;
+
+ nstoken = open_netns(NS_DST);
+ if (!nstoken)
+ return -1;
+
+ srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto done;
+
+ err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
+ goto done;
+
+again:
+ n = write(cli_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
+ if (!ASSERT_OK(err, "__rcv_tstamp"))
+ goto done;
+ if (!tstamp && nretries++ < 5) {
+ sleep(1);
+ printf("netstamp_needed_key retry#%d\n", nretries);
+ goto again;
+ }
+
+done:
+ if (!tstamp && srv_fd != -1) {
+ close(srv_fd);
+ srv_fd = -1;
+ }
+ if (cli_fd != -1)
+ close(cli_fd);
+ close_netns(nstoken);
+ return srv_fd;
}
static void snd_tstamp(int fd, char *b, size_t s)
@@ -843,11 +904,20 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
{
struct test_tc_dtime *skel;
struct nstoken *nstoken;
- int err;
+ int hold_tstamp_fd, err;
+
+ /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
+ * is no delay in the kernel net_enable_timestamp().
+ * This ensures the following tests must have
+ * non zero rcv tstamp in the recvmsg().
+ */
+ hold_tstamp_fd = wait_netstamp_needed_key();
+ if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
+ return;
skel = test_tc_dtime__open();
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
@@ -892,6 +962,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
done:
test_tc_dtime__destroy(skel);
+ close(hold_tstamp_fd);
}
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH v2 bpf-next 1/2] selftests/bpf: Fix the flaky tc_redirect_dtime test
2024-01-20 6:05 [PATCH v2 bpf-next 1/2] selftests/bpf: Fix the flaky tc_redirect_dtime test Martin KaFai Lau
2024-01-20 6:05 ` [PATCH v2 bpf-next 2/2] selftests/bpf: Wait for the netstamp_needed_key static key to be turned on Martin KaFai Lau
@ 2024-01-23 5:02 ` Yonghong Song
2024-01-24 18:20 ` patchwork-bot+netdevbpf
2 siblings, 0 replies; 4+ messages in thread
From: Yonghong Song @ 2024-01-23 5:02 UTC (permalink / raw)
To: Martin KaFai Lau, bpf
Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, kernel-team
On 1/19/24 10:05 PM, Martin KaFai Lau wrote:
> From: Martin KaFai Lau <martin.lau@kernel.org>
>
> BPF CI has been reporting the tc_redirect_dtime test failing
> from time to time:
>
> test_inet_dtime:PASS:setns src 0 nsec
> (network_helpers.c:253: errno: No route to host) Failed to connect to server
> close_netns:PASS:setns 0 nsec
> test_inet_dtime:FAIL:connect_to_fd unexpected connect_to_fd: actual -1 < expected 0
> test_tcp_clear_dtime:PASS:tcp ip6 clear dtime ingress_fwdns_p100 0 nsec
>
> The connect_to_fd failure (EHOSTUNREACH) is from the
> test_tcp_clear_dtime() test and it is the very first IPv6 traffic
> after setting up all the links, addresses, and routes.
>
> The symptom is this first connect() is always slow. In my setup, it
> could take ~3s.
>
> After some tracing and tcpdump, the slowness is mostly spent in
> the neighbor solicitation in the "ns_fwd" namespace while
> the "ns_src" and "ns_dst" are fine.
>
> I forced the kernel to drop the neighbor solicitation messages.
> I can then reproduce EHOSTUNREACH. What actually happen could be:
> - the neighbor advertisement came back a little slow.
> - the "ns_fwd" namespace concluded a neighbor discovery failure
> and triggered the ndisc_error_report() => ip6_link_failure() =>
> icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0)
> - the client's connect() reports EHOSTUNREACH after receiving
> the ICMPV6_DEST_UNREACH message.
>
> The neigh table of both "ns_src" and "ns_dst" namespace has already
> been manually populated but not the "ns_fwd" namespace. This patch
I tried one experiment which comments out manual population of neigh mac address
like below:
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 518f143c5b0f..feb477366393 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -241,14 +241,14 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
-
+#if 0
if (result->dev_mode == MODE_VETH) {
SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
src_fwd_addr);
SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
src_fwd_addr);
}
-
+#endif
close_netns(nstoken);
/** setup in 'fwd' namespace */
@@ -284,12 +284,12 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
-
+#if 0
if (result->dev_mode == MODE_VETH) {
SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
}
-
+#endif
close_netns(nstoken);
return 0;
And I can 100% reliably trigger the following error:
$ ./test_progs -t tc_redirect
...
All error logs:
test_tc_redirect:PASS:pthread_create 0 nsec
netns_setup_namespaces:PASS:ip netns add ns_src 0 nsec
netns_setup_namespaces:PASS:ip netns add ns_fwd 0 nsec
netns_setup_namespaces:PASS:ip netns add ns_dst 0 nsec
test_tc_redirect_run_tests:PASS:setup namespaces 0 nsec
...
test_tcp:PASS:setns dst 0 nsec
test_tcp:PASS:listen 0 nsec
close_netns:PASS:setns 0 nsec
open_netns:PASS:malloc token 0 nsec
open_netns:PASS:open /proc/self/ns/net 0 nsec
open_netns:PASS:open netns fd 0 nsec
open_netns:PASS:setns 0 nsec
test_tcp:PASS:setns src 0 nsec
(network_helpers.c:253: errno: Operation now in progress) Failed to connect to server
test_tcp:FAIL:connect_to_fd unexpected connect_to_fd: actual -1 < expected 0
close_netns:PASS:setns 0 nsec
test_ping:FAIL:ip netns exec ns_src ping -i 0.2 -c 3 -w 10 -q 172.16.2.100 > /dev/null unexpected error: 256 (errno 115)
...
open_netns:PASS:setns 0 nsec
test_tcp:PASS:setns src 0 nsec
(network_helpers.c:253: errno: Operation now in progress) Failed to connect to server
test_tcp:FAIL:connect_to_fd unexpected connect_to_fd: actual -1 < expected 0
close_netns:PASS:setns 0 nsec
test_ping:FAIL:ip netns exec ns_src ping6 -i 0.2 -c 3 -w 10 -q 0::2:dead:beef:cafe > /dev/null unexpected error: 256 (errno 115)
close_netns:PASS:setns 0 nsec
The error message:
(network_helpers.c:253: errno: Operation now in progress) Failed to connect to server
not exactly the same but very similar to the failure observed in CI:
(network_helpers.c:253: errno: No route to host) Failed to connect to server
The above error 'Operation now in progress' may indicate some slowness in the kernel
which caused the test failure, with or without my above hack.
> fixes it by manually populating the neigh table also in the "ns_fwd"
> namespace.
>
> Although the namespace configuration part had been existed before
> the tc_redirect_dtime test, still Fixes-tagging the patch when
> the tc_redirect_dtime test was added since it is the only test
> hitting it so far.
>
> Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh")
> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
> ---
> tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 11 +++++++++++
> 1 file changed, 11 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
> index 518f143c5b0f..610887157fd8 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
> @@ -188,6 +188,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
> {
> struct nstoken *nstoken = NULL;
> char src_fwd_addr[IFADDR_STR_LEN+1] = {};
> + char src_addr[IFADDR_STR_LEN + 1] = {};
> int err;
>
> if (result->dev_mode == MODE_VETH) {
> @@ -208,6 +209,9 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
> if (get_ifaddr("src_fwd", src_fwd_addr))
> goto fail;
>
> + if (get_ifaddr("src", src_addr))
> + goto fail;
> +
> result->ifindex_src = if_nametoindex("src");
> if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
> goto fail;
> @@ -270,6 +274,13 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
> SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
> SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
>
> + if (result->dev_mode == MODE_VETH) {
> + SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
> + SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
> + SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
> + SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
> + }
> +
> close_netns(nstoken);
>
> /** setup in 'dst' namespace */
^ permalink raw reply related [flat|nested] 4+ messages in thread