From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>,
Kuniyuki Iwashima <kuni1840@gmail.com>, <netdev@vger.kernel.org>
Subject: [PATCH v1 net] selftest: Don't reuse port for SO_INCOMING_CPU test.
Date: Fri, 19 Jan 2024 19:16:42 -0800 [thread overview]
Message-ID: <20240120031642.67014-1-kuniyu@amazon.com> (raw)
Jakub reported that ASSERT_EQ(cpu, i) in so_incoming_cpu.c seems to
fire somewhat randomly.
# # RUN so_incoming_cpu.before_reuseport.test3 ...
# # so_incoming_cpu.c:191:test3:Expected cpu (32) == i (0)
# # test3: Test terminated by assertion
# # FAIL so_incoming_cpu.before_reuseport.test3
# not ok 3 so_incoming_cpu.before_reuseport.test3
When the test failed, not-yet-accepted CLOSE_WAIT sockets received
SYN with a "challenging" SEQ number, which was sent from an unexpected
CPU that did not create the receiver.
The test basically does:
1. for each cpu:
1-1. create a server
1-2. set SO_INCOMING_CPU
2. for each cpu:
2-1. set cpu affinity
2-2. create some clients
2-3. let clients connect() to the server on the same cpu
2-4. close() clients
3. for each server:
3-1. accept() all child sockets
3-2. check if all children have the same SO_INCOMING_CPU with the server
The root cause was the close() in 2-4. and net.ipv4.tcp_tw_reuse.
In a loop of 2., close() changed the client state to FIN_WAIT_2, and
the peer transitioned to CLOSE_WAIT.
In another loop of 2., connect() happened to select the same port of
the FIN_WAIT_2 socket, and it was reused as the default value of
net.ipv4.tcp_tw_reuse is 2.
As a result, the new client sent SYN to the CLOSE_WAIT socket from
a different CPU, and the receiver's sk_incoming_cpu was overwritten
with unexpected CPU ID.
Also, the SYN had a different SEQ number, so the CLOSE_WAIT socket
responded with Challenge ACK. The new client properly returned RST
and effectively killed the CLOSE_WAIT socket.
This way, all clients were created successfully, but the error was
detected later by 3-2., ASSERT_EQ(cpu, i).
To avoid the failure, let's make sure that (i) the number of clients
is less than the number of available ports and (ii) such reuse never
happens.
Fixes: 6df96146b202 ("selftest: Add test for SO_INCOMING_CPU.")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
tools/testing/selftests/net/so_incoming_cpu.c | 68 ++++++++++++++-----
1 file changed, 50 insertions(+), 18 deletions(-)
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index a14818164102..e9fa14e10732 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -3,19 +3,16 @@
#define _GNU_SOURCE
#include <sched.h>
+#include <fcntl.h>
+
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/sysinfo.h>
#include "../kselftest_harness.h"
-#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */
-#define NR_SERVER self->nproc
-#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER)
-
FIXTURE(so_incoming_cpu)
{
- int nproc;
int *servers;
union {
struct sockaddr addr;
@@ -56,12 +53,47 @@ FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
.when_to_set = AFTER_ALL_LISTEN,
};
+static void write_sysctl(struct __test_metadata *_metadata,
+ char *filename, char *string)
+{
+ int fd, len, ret;
+
+ fd = open(filename, O_WRONLY);
+ ASSERT_NE(fd, -1);
+
+ len = strlen(string);
+ ret = write(fd, string, len);
+ ASSERT_EQ(ret, len);
+}
+
+static void setup_netns(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+ ASSERT_EQ(system("ip link set lo up"), 0);
+
+ write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001");
+ write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0");
+}
+
+#define NR_PORT (60001 - 10000 - 1)
+#define NR_CLIENT_PER_SERVER_DEFAULT 32
+static int nr_client_per_server, nr_server, nr_client;
+
FIXTURE_SETUP(so_incoming_cpu)
{
- self->nproc = get_nprocs();
- ASSERT_LE(2, self->nproc);
+ setup_netns(_metadata);
+
+ nr_server = get_nprocs();
+ ASSERT_LE(2, nr_server);
+
+ if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT)
+ nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT;
+ else
+ nr_client_per_server = NR_PORT / nr_server;
+
+ nr_client = nr_client_per_server * nr_server;
- self->servers = malloc(sizeof(int) * NR_SERVER);
+ self->servers = malloc(sizeof(int) * nr_server);
ASSERT_NE(self->servers, NULL);
self->in_addr.sin_family = AF_INET;
@@ -74,7 +106,7 @@ FIXTURE_TEARDOWN(so_incoming_cpu)
{
int i;
- for (i = 0; i < NR_SERVER; i++)
+ for (i = 0; i < nr_server; i++)
close(self->servers[i]);
free(self->servers);
@@ -110,10 +142,10 @@ int create_server(struct __test_metadata *_metadata,
if (variant->when_to_set == BEFORE_LISTEN)
set_so_incoming_cpu(_metadata, fd, cpu);
- /* We don't use CLIENT_PER_SERVER here not to block
+ /* We don't use nr_client_per_server here not to block
* this test at connect() if SO_INCOMING_CPU is broken.
*/
- ret = listen(fd, NR_CLIENT);
+ ret = listen(fd, nr_client);
ASSERT_EQ(ret, 0);
if (variant->when_to_set == AFTER_LISTEN)
@@ -128,7 +160,7 @@ void create_servers(struct __test_metadata *_metadata,
{
int i, ret;
- for (i = 0; i < NR_SERVER; i++) {
+ for (i = 0; i < nr_server; i++) {
self->servers[i] = create_server(_metadata, self, variant, i);
if (i == 0) {
@@ -138,7 +170,7 @@ void create_servers(struct __test_metadata *_metadata,
}
if (variant->when_to_set == AFTER_ALL_LISTEN) {
- for (i = 0; i < NR_SERVER; i++)
+ for (i = 0; i < nr_server; i++)
set_so_incoming_cpu(_metadata, self->servers[i], i);
}
}
@@ -149,7 +181,7 @@ void create_clients(struct __test_metadata *_metadata,
cpu_set_t cpu_set;
int i, j, fd, ret;
- for (i = 0; i < NR_SERVER; i++) {
+ for (i = 0; i < nr_server; i++) {
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);
@@ -162,7 +194,7 @@ void create_clients(struct __test_metadata *_metadata,
ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
ASSERT_EQ(ret, 0);
- for (j = 0; j < CLIENT_PER_SERVER; j++) {
+ for (j = 0; j < nr_client_per_server; j++) {
fd = socket(AF_INET, SOCK_STREAM, 0);
ASSERT_NE(fd, -1);
@@ -180,8 +212,8 @@ void verify_incoming_cpu(struct __test_metadata *_metadata,
int i, j, fd, cpu, ret, total = 0;
socklen_t len = sizeof(int);
- for (i = 0; i < NR_SERVER; i++) {
- for (j = 0; j < CLIENT_PER_SERVER; j++) {
+ for (i = 0; i < nr_server; i++) {
+ for (j = 0; j < nr_client_per_server; j++) {
/* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
fd = accept(self->servers[i], &self->addr, &self->addrlen);
ASSERT_NE(fd, -1);
@@ -195,7 +227,7 @@ void verify_incoming_cpu(struct __test_metadata *_metadata,
}
}
- ASSERT_EQ(total, NR_CLIENT);
+ ASSERT_EQ(total, nr_client);
TH_LOG("SO_INCOMING_CPU is very likely to be "
"working correctly with %d sockets.", total);
}
--
2.30.2
next reply other threads:[~2024-01-20 3:17 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-20 3:16 Kuniyuki Iwashima [this message]
2024-01-22 21:13 ` [PATCH v1 net] selftest: Don't reuse port for SO_INCOMING_CPU test Jakub Kicinski
2024-01-23 10:00 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240120031642.67014-1-kuniyu@amazon.com \
--to=kuniyu@amazon.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=kuni1840@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.