* [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener
2020-05-04 17:34 [PATCH bpf-next 0/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
@ 2020-05-04 17:34 ` Stanislav Fomichev
2020-05-05 6:23 ` Andrii Nakryiko
2020-05-04 17:34 ` [PATCH bpf-next 2/4] selftests/bpf: adopt accept_timeout from sockmap_listen Stanislav Fomichev
` (2 subsequent siblings)
3 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2020-05-04 17:34 UTC (permalink / raw)
To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev, Andrey Ignatov
Move the following routines that let us start a background listener
thread and connect to a server by fd to the test_prog:
* start_server_thread - start background INADDR_ANY thread
* stop_server_thread - stop the thread
* connect_to_fd - connect to the server identified by fd
These will be used in the next commit.
Also, extend these helpers to support AF_INET6 and accept the family
as an argument.
Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
.../selftests/bpf/prog_tests/tcp_rtt.c | 115 +--------------
tools/testing/selftests/bpf/test_progs.c | 138 ++++++++++++++++++
tools/testing/selftests/bpf/test_progs.h | 3 +
3 files changed, 144 insertions(+), 112 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index e56b52ab41da..7d2b3b269b5f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -87,34 +87,6 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
return err;
}
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server");
- goto out;
- }
-
- return fd;
-
-out:
- close(fd);
- return -1;
-}
static int run_test(int cgroup_fd, int server_fd)
{
@@ -145,7 +117,7 @@ static int run_test(int cgroup_fd, int server_fd)
goto close_bpf_object;
}
- client_fd = connect_to_server(server_fd);
+ client_fd = connect_to_fd(AF_INET, server_fd);
if (client_fd < 0) {
err = -1;
goto close_bpf_object;
@@ -180,103 +152,22 @@ static int run_test(int cgroup_fd, int server_fd)
return err;
}
-static int start_server(void)
-{
- struct sockaddr_in addr = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
- if (fd < 0) {
- log_err("Failed to create server socket");
- return -1;
- }
-
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
- log_err("Failed to bind socket");
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
-static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
-static volatile bool server_done = false;
-
-static void *server_thread(void *arg)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd = *(int *)arg;
- int client_fd;
- int err;
-
- err = listen(fd, 1);
-
- pthread_mutex_lock(&server_started_mtx);
- pthread_cond_signal(&server_started);
- pthread_mutex_unlock(&server_started_mtx);
-
- if (CHECK_FAIL(err < 0)) {
- perror("Failed to listed on socket");
- return ERR_PTR(err);
- }
-
- while (true) {
- client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (client_fd == -1 && errno == EAGAIN) {
- usleep(50);
- continue;
- }
- break;
- }
- if (CHECK_FAIL(client_fd < 0)) {
- perror("Failed to accept client");
- return ERR_PTR(err);
- }
-
- while (!server_done)
- usleep(50);
-
- close(client_fd);
-
- return NULL;
-}
-
void test_tcp_rtt(void)
{
int server_fd, cgroup_fd;
- pthread_t tid;
- void *server_res;
cgroup_fd = test__join_cgroup("/tcp_rtt");
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server();
+ server_fd = start_server_thread(AF_INET);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
- goto close_server_fd;
-
- pthread_mutex_lock(&server_started_mtx);
- pthread_cond_wait(&server_started, &server_started_mtx);
- pthread_mutex_unlock(&server_started_mtx);
-
CHECK_FAIL(run_test(cgroup_fd, server_fd));
- server_done = true;
- CHECK_FAIL(pthread_join(tid, &server_res));
- CHECK_FAIL(IS_ERR(server_res));
+ stop_server_thread(server_fd);
-close_server_fd:
- close(server_fd);
close_cgroup_fd:
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 93970ec1c9e9..ebf1b3272848 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -371,6 +371,144 @@ void *spin_lock_thread(void *arg)
pthread_exit(arg);
}
+static int start_server(int family)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len;
+ int fd;
+
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (void *)&addr;
+
+ sin->sin_family = AF_INET;
+ len = sizeof(*sin);
+ } else {
+ struct sockaddr_in6 *sin6 = (void *)&addr;
+
+ sin6->sin6_family = AF_INET6;
+ len = sizeof(*sin6);
+ }
+
+ fd = socket(family, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (fd < 0) {
+ log_err("Failed to create server socket");
+ return -1;
+ }
+
+ if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Failed to bind socket");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+static volatile bool server_done;
+pthread_t server_tid;
+
+static void *server_thread(void *arg)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd = *(int *)arg;
+ int client_fd;
+ int err;
+
+ err = listen(fd, 1);
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_signal(&server_started);
+ pthread_mutex_unlock(&server_started_mtx);
+
+ if (CHECK_FAIL(err < 0)) {
+ perror("Failed to listed on socket");
+ return ERR_PTR(err);
+ }
+
+ while (true) {
+ client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+ if (client_fd == -1 && errno == EAGAIN) {
+ usleep(50);
+ continue;
+ }
+ break;
+ }
+ if (CHECK_FAIL(client_fd < 0)) {
+ perror("Failed to accept client");
+ return ERR_PTR(err);
+ }
+
+ while (!server_done)
+ usleep(50);
+
+ close(client_fd);
+
+ return NULL;
+}
+
+int start_server_thread(int family)
+{
+ int fd = start_server(family);
+
+ if (fd < 0)
+ return -1;
+
+ if (CHECK_FAIL(pthread_create(&server_tid, NULL, server_thread,
+ (void *)&fd)))
+ goto err;
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_wait(&server_started, &server_started_mtx);
+ pthread_mutex_unlock(&server_started_mtx);
+
+ return fd;
+err:
+ close(fd);
+ return -1;
+}
+
+void stop_server_thread(int fd)
+{
+ void *server_res;
+
+ server_done = true;
+ CHECK_FAIL(pthread_join(server_tid, &server_res));
+ CHECK_FAIL(IS_ERR(server_res));
+ close(fd);
+}
+
+int connect_to_fd(int family, int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(family, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Fail to connect to server with family %d", family);
+ goto out;
+ }
+
+ return fd;
+
+out:
+ close(fd);
+ return -1;
+}
+
/* extern declarations for test funcs */
#define DEFINE_TEST(name) extern void test_##name(void);
#include <prog_tests/tests.h>
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 10188cc8e9e0..363a3f2273a4 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -157,6 +157,9 @@ int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
int extract_build_id(char *build_id, size_t size);
void *spin_lock_thread(void *arg);
+int start_server_thread(int family);
+void stop_server_thread(int fd);
+int connect_to_fd(int family, int server_fd);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
--
2.26.2.526.g744177e7f7-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener
2020-05-04 17:34 ` [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener Stanislav Fomichev
@ 2020-05-05 6:23 ` Andrii Nakryiko
2020-05-05 16:08 ` sdf
0 siblings, 1 reply; 16+ messages in thread
From: Andrii Nakryiko @ 2020-05-05 6:23 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Networking, bpf, David S. Miller, Alexei Starovoitov,
Daniel Borkmann, Andrey Ignatov
On Mon, May 4, 2020 at 10:37 AM Stanislav Fomichev <sdf@google.com> wrote:
>
> Move the following routines that let us start a background listener
> thread and connect to a server by fd to the test_prog:
> * start_server_thread - start background INADDR_ANY thread
> * stop_server_thread - stop the thread
> * connect_to_fd - connect to the server identified by fd
>
> These will be used in the next commit.
>
> Also, extend these helpers to support AF_INET6 and accept the family
> as an argument.
>
> Cc: Andrey Ignatov <rdna@fb.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
> .../selftests/bpf/prog_tests/tcp_rtt.c | 115 +--------------
> tools/testing/selftests/bpf/test_progs.c | 138 ++++++++++++++++++
> tools/testing/selftests/bpf/test_progs.h | 3 +
> 3 files changed, 144 insertions(+), 112 deletions(-)
Can this functionality be moved into a separate file, similar to
cgroup_helpers.{c.h}? This doesn't seem like helper routines needed
for most tests, so it doesn't make sense to me to keep piling
everything into test_progs.{c,h}.
[...]
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener
2020-05-05 6:23 ` Andrii Nakryiko
@ 2020-05-05 16:08 ` sdf
2020-05-05 18:50 ` Andrii Nakryiko
0 siblings, 1 reply; 16+ messages in thread
From: sdf @ 2020-05-05 16:08 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: Networking, bpf, David S. Miller, Alexei Starovoitov,
Daniel Borkmann, Andrey Ignatov
On 05/04, Andrii Nakryiko wrote:
> On Mon, May 4, 2020 at 10:37 AM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > Move the following routines that let us start a background listener
> > thread and connect to a server by fd to the test_prog:
> > * start_server_thread - start background INADDR_ANY thread
> > * stop_server_thread - stop the thread
> > * connect_to_fd - connect to the server identified by fd
> >
> > These will be used in the next commit.
> >
> > Also, extend these helpers to support AF_INET6 and accept the family
> > as an argument.
> >
> > Cc: Andrey Ignatov <rdna@fb.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> > .../selftests/bpf/prog_tests/tcp_rtt.c | 115 +--------------
> > tools/testing/selftests/bpf/test_progs.c | 138 ++++++++++++++++++
> > tools/testing/selftests/bpf/test_progs.h | 3 +
> > 3 files changed, 144 insertions(+), 112 deletions(-)
> Can this functionality be moved into a separate file, similar to
> cgroup_helpers.{c.h}? This doesn't seem like helper routines needed
> for most tests, so it doesn't make sense to me to keep piling
> everything into test_progs.{c,h}.
test_progs_helpers.{c,h}? And maybe move existing helpers like
bpf_find_map, spin_lock_thread, etc in there?
Or do you think that these networking helpers should be completely
independent
from test_progs?
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener
2020-05-05 16:08 ` sdf
@ 2020-05-05 18:50 ` Andrii Nakryiko
0 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2020-05-05 18:50 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Networking, bpf, David S. Miller, Alexei Starovoitov,
Daniel Borkmann, Andrey Ignatov
On Tue, May 5, 2020 at 9:08 AM <sdf@google.com> wrote:
>
> On 05/04, Andrii Nakryiko wrote:
> > On Mon, May 4, 2020 at 10:37 AM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > Move the following routines that let us start a background listener
> > > thread and connect to a server by fd to the test_prog:
> > > * start_server_thread - start background INADDR_ANY thread
> > > * stop_server_thread - stop the thread
> > > * connect_to_fd - connect to the server identified by fd
> > >
> > > These will be used in the next commit.
> > >
> > > Also, extend these helpers to support AF_INET6 and accept the family
> > > as an argument.
> > >
> > > Cc: Andrey Ignatov <rdna@fb.com>
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > > .../selftests/bpf/prog_tests/tcp_rtt.c | 115 +--------------
> > > tools/testing/selftests/bpf/test_progs.c | 138 ++++++++++++++++++
> > > tools/testing/selftests/bpf/test_progs.h | 3 +
> > > 3 files changed, 144 insertions(+), 112 deletions(-)
>
> > Can this functionality be moved into a separate file, similar to
> > cgroup_helpers.{c.h}? This doesn't seem like helper routines needed
> > for most tests, so it doesn't make sense to me to keep piling
> > everything into test_progs.{c,h}.
> test_progs_helpers.{c,h}? And maybe move existing helpers like
> bpf_find_map, spin_lock_thread, etc in there?
bpf_find_map is generic enough, I'd leave it in test_progs.c, at least
for now. spin_lock_thread is used in just two files, I'd just put it
(copy/pasted) in those two and remove it from test_progs. But that can
be done separately.
As for these new functions, they are network-specific, so I'd just add
a new network_helpers.c or something along those lines.
>
> Or do you think that these networking helpers should be completely
> independent
> from test_progs?
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH bpf-next 2/4] selftests/bpf: adopt accept_timeout from sockmap_listen
2020-05-04 17:34 [PATCH bpf-next 0/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
2020-05-04 17:34 ` [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener Stanislav Fomichev
@ 2020-05-04 17:34 ` Stanislav Fomichev
2020-05-04 17:34 ` [PATCH bpf-next 3/4] net: refactor arguments of inet{,6}_bind Stanislav Fomichev
2020-05-04 17:34 ` [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
3 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2020-05-04 17:34 UTC (permalink / raw)
To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev, Andrey Ignatov
Move accept_timeout and recv_timeout to the common place so they
can be reused by the other tests. Switch to accept_timeout() in
test_progs instead of doing while loop around accept().
This prevents the tests that use start_server_thread/stop_server_thread
from being stuck when the error occurs.
Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
.../selftests/bpf/prog_tests/sockmap_listen.c | 34 ---------------
tools/testing/selftests/bpf/test_progs.c | 43 +++++++++++++++----
tools/testing/selftests/bpf/test_progs.h | 4 ++
3 files changed, 39 insertions(+), 42 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..a91c31e64274 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -195,40 +195,6 @@
__ret; \
})
-static int poll_read(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set rfds;
- int r;
-
- FD_ZERO(&rfds);
- FD_SET(fd, &rfds);
-
- r = select(fd + 1, &rfds, NULL, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
-
- return r == 1 ? 0 : -1;
-}
-
-static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return accept(fd, addr, len);
-}
-
-static int recv_timeout(int fd, void *buf, size_t len, int flags,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return recv(fd, buf, len, flags);
-}
-
static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index ebf1b3272848..306056bc63ae 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -428,14 +428,7 @@ static void *server_thread(void *arg)
return ERR_PTR(err);
}
- while (true) {
- client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (client_fd == -1 && errno == EAGAIN) {
- usleep(50);
- continue;
- }
- break;
- }
+ client_fd = accept_timeout(fd, (struct sockaddr *)&addr, &len, 3);
if (CHECK_FAIL(client_fd < 0)) {
perror("Failed to accept client");
return ERR_PTR(err);
@@ -509,6 +502,40 @@ int connect_to_fd(int family, int server_fd)
return -1;
}
+static int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
/* extern declarations for test funcs */
#define DEFINE_TEST(name) extern void test_##name(void);
#include <prog_tests/tests.h>
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 363a3f2273a4..4a48654a4f08 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -160,6 +160,10 @@ void *spin_lock_thread(void *arg);
int start_server_thread(int family);
void stop_server_thread(int fd);
int connect_to_fd(int family, int server_fd);
+int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec);
+int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
--
2.26.2.526.g744177e7f7-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH bpf-next 3/4] net: refactor arguments of inet{,6}_bind
2020-05-04 17:34 [PATCH bpf-next 0/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
2020-05-04 17:34 ` [PATCH bpf-next 1/4] selftests/bpf: generalize helpers to control backround listener Stanislav Fomichev
2020-05-04 17:34 ` [PATCH bpf-next 2/4] selftests/bpf: adopt accept_timeout from sockmap_listen Stanislav Fomichev
@ 2020-05-04 17:34 ` Stanislav Fomichev
2020-05-05 18:16 ` Martin KaFai Lau
2020-05-04 17:34 ` [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
3 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2020-05-04 17:34 UTC (permalink / raw)
To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev, Andrey Ignatov
The intent is to add an additional bind parameter in the next commit.
Instead of adding another argument, let's convert all existing
flag arguments into an extendable bit field.
No functional changes.
Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/net/inet_common.h | 6 +++++-
include/net/ipv6_stubs.h | 2 +-
net/core/filter.c | 6 ++++--
net/ipv4/af_inet.c | 10 +++++-----
net/ipv6/af_inet6.c | 10 +++++-----
5 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index ae2ba897675c..a0fb68f5bf59 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -35,8 +35,12 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+// Don't allocate port at this moment, defer to connect.
+#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
+// Grab and release socket lock.
+#define BIND_WITH_LOCK (1 << 1)
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
+ u32 flags);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index a5f7c12c326a..6e622dd3122e 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -63,7 +63,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
struct ipv6_bpf_stub {
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
+ u32 flags);
struct sock *(*udp6_lib_lookup)(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
diff --git a/net/core/filter.c b/net/core/filter.c
index dfaf5df13722..fa9ddab5dd1f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4538,7 +4538,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
return err;
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
return err;
- return __inet_bind(sk, addr, addr_len, true, false);
+ return __inet_bind(sk, addr, addr_len,
+ BIND_FORCE_ADDRESS_NO_PORT);
#if IS_ENABLED(CONFIG_IPV6)
} else if (addr->sa_family == AF_INET6) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -4548,7 +4549,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
/* ipv6_bpf_stub cannot be NULL, since it's called from
* bpf_cgroup_inet6_connect hook and ipv6 is already loaded
*/
- return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+ return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len,
+ BIND_FORCE_ADDRESS_NO_PORT);
#endif /* CONFIG_IPV6 */
}
#endif /* CONFIG_INET */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6177c4ba0037..68e74b1b0f26 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -450,12 +450,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (err)
return err;
- return __inet_bind(sk, uaddr, addr_len, false, true);
+ return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock)
+ u32 flags)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -506,7 +506,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
- if (with_lock)
+ if (flags & BIND_WITH_LOCK)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
@@ -520,7 +520,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
/* Make sure we are allowed to bind here. */
if (snum || !(inet->bind_address_no_port ||
- force_bind_address_no_port)) {
+ (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
if (sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
@@ -543,7 +543,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
sk_dst_reset(sk);
err = 0;
out_release_sock:
- if (with_lock)
+ if (flags & BIND_WITH_LOCK)
release_sock(sk);
out:
return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 345baa0a754f..552c2592b81c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -273,7 +273,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
}
static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock)
+ u32 flags)
{
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -297,7 +297,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
- if (with_lock)
+ if (flags & BIND_WITH_LOCK)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
@@ -400,7 +400,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
/* Make sure we are allowed to bind here. */
if (snum || !(inet->bind_address_no_port ||
- force_bind_address_no_port)) {
+ (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
if (sk->sk_prot->get_port(sk, snum)) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
@@ -423,7 +423,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
- if (with_lock)
+ if (flags & BIND_WITH_LOCK)
release_sock(sk);
return err;
out_unlock:
@@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (err)
return err;
- return __inet6_bind(sk, uaddr, addr_len, false, true);
+ return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
}
EXPORT_SYMBOL(inet6_bind);
--
2.26.2.526.g744177e7f7-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 3/4] net: refactor arguments of inet{,6}_bind
2020-05-04 17:34 ` [PATCH bpf-next 3/4] net: refactor arguments of inet{,6}_bind Stanislav Fomichev
@ 2020-05-05 18:16 ` Martin KaFai Lau
2020-05-05 18:19 ` Stanislav Fomichev
0 siblings, 1 reply; 16+ messages in thread
From: Martin KaFai Lau @ 2020-05-05 18:16 UTC (permalink / raw)
To: Stanislav Fomichev; +Cc: netdev, bpf, davem, ast, daniel, Andrey Ignatov
On Mon, May 04, 2020 at 10:34:29AM -0700, Stanislav Fomichev wrote:
> The intent is to add an additional bind parameter in the next commit.
> Instead of adding another argument, let's convert all existing
> flag arguments into an extendable bit field.
>
> No functional changes.
>
> Cc: Andrey Ignatov <rdna@fb.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
> include/net/inet_common.h | 6 +++++-
> include/net/ipv6_stubs.h | 2 +-
> net/core/filter.c | 6 ++++--
> net/ipv4/af_inet.c | 10 +++++-----
> net/ipv6/af_inet6.c | 10 +++++-----
> 5 files changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/include/net/inet_common.h b/include/net/inet_common.h
> index ae2ba897675c..a0fb68f5bf59 100644
> --- a/include/net/inet_common.h
> +++ b/include/net/inet_common.h
> @@ -35,8 +35,12 @@ int inet_shutdown(struct socket *sock, int how);
> int inet_listen(struct socket *sock, int backlog);
> void inet_sock_destruct(struct sock *sk);
> int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
> +// Don't allocate port at this moment, defer to connect.
nit. stay with /* ... */
> +#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
> +// Grab and release socket lock.
> +#define BIND_WITH_LOCK (1 << 1)
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 3/4] net: refactor arguments of inet{,6}_bind
2020-05-05 18:16 ` Martin KaFai Lau
@ 2020-05-05 18:19 ` Stanislav Fomichev
0 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2020-05-05 18:19 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Netdev, bpf, David Miller, Alexei Starovoitov, Daniel Borkmann,
Andrey Ignatov
On Tue, May 5, 2020 at 11:16 AM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Mon, May 04, 2020 at 10:34:29AM -0700, Stanislav Fomichev wrote:
> > The intent is to add an additional bind parameter in the next commit.
> > Instead of adding another argument, let's convert all existing
> > flag arguments into an extendable bit field.
> >
> > No functional changes.
> >
> > Cc: Andrey Ignatov <rdna@fb.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> > include/net/inet_common.h | 6 +++++-
> > include/net/ipv6_stubs.h | 2 +-
> > net/core/filter.c | 6 ++++--
> > net/ipv4/af_inet.c | 10 +++++-----
> > net/ipv6/af_inet6.c | 10 +++++-----
> > 5 files changed, 20 insertions(+), 14 deletions(-)
> >
> > diff --git a/include/net/inet_common.h b/include/net/inet_common.h
> > index ae2ba897675c..a0fb68f5bf59 100644
> > --- a/include/net/inet_common.h
> > +++ b/include/net/inet_common.h
> > @@ -35,8 +35,12 @@ int inet_shutdown(struct socket *sock, int how);
> > int inet_listen(struct socket *sock, int backlog);
> > void inet_sock_destruct(struct sock *sk);
> > int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
> > +// Don't allocate port at this moment, defer to connect.
> nit. stay with /* ... */
Oh, good catch, thank you!
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-04 17:34 [PATCH bpf-next 0/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
` (2 preceding siblings ...)
2020-05-04 17:34 ` [PATCH bpf-next 3/4] net: refactor arguments of inet{,6}_bind Stanislav Fomichev
@ 2020-05-04 17:34 ` Stanislav Fomichev
2020-05-04 23:22 ` Andrey Ignatov
3 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2020-05-04 17:34 UTC (permalink / raw)
To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev, Andrey Ignatov
We want to have a tighter control on what ports we bind to in
the BPF_CGROUP_INET{4,6}_CONNECT hooks even if it means
connect() becomes slightly more expensive. The expensive part
comes from the fact that we now need to call inet_csk_get_port()
that verifies that the port is not used and allocates an entry
in the hash table for it.
Since we can't rely on "snum || !bind_address_no_port" to prevent
us from calling POST_BIND hook anymore, let's add another bind flag
to indicate that the call site is BPF program.
Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/net/inet_common.h | 2 +
net/core/filter.c | 9 +-
net/ipv4/af_inet.c | 10 +-
net/ipv6/af_inet6.c | 12 +-
.../bpf/prog_tests/connect_force_port.c | 104 ++++++++++++++++++
.../selftests/bpf/progs/connect_force_port4.c | 28 +++++
.../selftests/bpf/progs/connect_force_port6.c | 28 +++++
7 files changed, 177 insertions(+), 16 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/connect_force_port.c
create mode 100644 tools/testing/selftests/bpf/progs/connect_force_port4.c
create mode 100644 tools/testing/selftests/bpf/progs/connect_force_port6.c
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index a0fb68f5bf59..4288fd052266 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -39,6 +39,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
// Grab and release socket lock.
#define BIND_WITH_LOCK (1 << 1)
+// Called from BPF program.
+#define BIND_FROM_BPF (1 << 2)
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
u32 flags);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/net/core/filter.c b/net/core/filter.c
index fa9ddab5dd1f..fc5161b9ff6a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4527,29 +4527,24 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
struct sock *sk = ctx->sk;
int err;
- /* Binding to port can be expensive so it's prohibited in the helper.
- * Only binding to IP is supported.
- */
err = -EINVAL;
if (addr_len < offsetofend(struct sockaddr, sa_family))
return err;
if (addr->sa_family == AF_INET) {
if (addr_len < sizeof(struct sockaddr_in))
return err;
- if (((struct sockaddr_in *)addr)->sin_port != htons(0))
- return err;
return __inet_bind(sk, addr, addr_len,
+ BIND_FROM_BPF |
BIND_FORCE_ADDRESS_NO_PORT);
#if IS_ENABLED(CONFIG_IPV6)
} else if (addr->sa_family == AF_INET6) {
if (addr_len < SIN6_LEN_RFC2133)
return err;
- if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
- return err;
/* ipv6_bpf_stub cannot be NULL, since it's called from
* bpf_cgroup_inet6_connect hook and ipv6 is already loaded
*/
return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len,
+ BIND_FROM_BPF |
BIND_FORCE_ADDRESS_NO_PORT);
#endif /* CONFIG_IPV6 */
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 68e74b1b0f26..fcf0d12a407a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -526,10 +526,12 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = -EADDRINUSE;
goto out_release_sock;
}
- err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
- if (err) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- goto out_release_sock;
+ if (!(flags & BIND_FROM_BPF)) {
+ err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+ if (err) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ goto out_release_sock;
+ }
}
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 552c2592b81c..771a462a8322 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -407,11 +407,13 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = -EADDRINUSE;
goto out;
}
- err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
- if (err) {
- sk->sk_ipv6only = saved_ipv6only;
- inet_reset_saddr(sk);
- goto out;
+ if (!(flags & BIND_FROM_BPF)) {
+ err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+ if (err) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ goto out;
+ }
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
new file mode 100644
index 000000000000..ef2e5d02f4ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+static int verify_port(int family, int fd, int expected)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ __u16 port;
+
+
+ if (getsockname(fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ if (family == AF_INET)
+ port = ((struct sockaddr_in *)&addr)->sin_port;
+ else
+ port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+ if (ntohs(port) != expected) {
+ log_err("Unexpected port %d, expected %d", ntohs(port),
+ expected);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int run_test(int cgroup_fd, int server_fd, int family)
+{
+ struct bpf_prog_load_attr attr = {
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ };
+ struct bpf_object *obj;
+ int expected_port;
+ int prog_fd;
+ int err;
+ int fd;
+
+ if (family == AF_INET) {
+ attr.file = "./connect_force_port4.o";
+ attr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+ expected_port = 22222;
+ } else {
+ attr.file = "./connect_force_port6.o";
+ attr.expected_attach_type = BPF_CGROUP_INET6_CONNECT;
+ expected_port = 22223;
+ }
+
+ err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ if (err) {
+ log_err("Failed to load BPF object");
+ return -1;
+ }
+
+ err = bpf_prog_attach(prog_fd, cgroup_fd, attr.expected_attach_type,
+ 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ fd = connect_to_fd(family, server_fd);
+ if (fd < 0) {
+ err = -1;
+ goto close_bpf_object;
+ }
+
+ err = verify_port(family, fd, expected_port);
+
+ close(fd);
+
+close_bpf_object:
+ bpf_object__close(obj);
+ return err;
+}
+
+void test_connect_force_port(void)
+{
+ int server_fd, cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/connect_force_port");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
+
+ server_fd = start_server_thread(AF_INET);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET));
+ stop_server_thread(server_fd);
+
+ server_fd = start_server_thread(AF_INET6);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6));
+ stop_server_thread(server_fd);
+
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
new file mode 100644
index 000000000000..1b8eb34b2db0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int _connect4(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {};
+
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(22222);
+ sa.sin_addr.s_addr = bpf_htonl(0x7f000001); /* 127.0.0.1 */
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
new file mode 100644
index 000000000000..8cd1a9e81f64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int _connect6(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa = {};
+
+ sa.sin6_family = AF_INET;
+ sa.sin6_port = bpf_htons(22223);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(1); /* ::1 */
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ return 1;
+}
--
2.26.2.526.g744177e7f7-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-04 17:34 ` [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper Stanislav Fomichev
@ 2020-05-04 23:22 ` Andrey Ignatov
2020-05-05 16:02 ` sdf
0 siblings, 1 reply; 16+ messages in thread
From: Andrey Ignatov @ 2020-05-04 23:22 UTC (permalink / raw)
To: Stanislav Fomichev; +Cc: netdev, bpf, davem, ast, daniel
Stanislav Fomichev <sdf@google.com> [Mon, 2020-05-04 10:34 -0700]:
> We want to have a tighter control on what ports we bind to in
> the BPF_CGROUP_INET{4,6}_CONNECT hooks even if it means
> connect() becomes slightly more expensive. The expensive part
> comes from the fact that we now need to call inet_csk_get_port()
> that verifies that the port is not used and allocates an entry
> in the hash table for it.
FWIW: Initially that IP_BIND_ADDRESS_NO_PORT limitation came from the
fact that on my specific use-case (mysql client making 200-500 connects
per sec to mysql server) disabling the option was making application
pretty much unusable (inet_csk_get_port was taking more time than mysql
client connect timeout == 3sec).
But I guess for some use-cases that call sys_connect not too often it
makes sense.
> Since we can't rely on "snum || !bind_address_no_port" to prevent
> us from calling POST_BIND hook anymore, let's add another bind flag
> to indicate that the call site is BPF program.
>
> Cc: Andrey Ignatov <rdna@fb.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
> include/net/inet_common.h | 2 +
> net/core/filter.c | 9 +-
> net/ipv4/af_inet.c | 10 +-
> net/ipv6/af_inet6.c | 12 +-
> .../bpf/prog_tests/connect_force_port.c | 104 ++++++++++++++++++
> .../selftests/bpf/progs/connect_force_port4.c | 28 +++++
> .../selftests/bpf/progs/connect_force_port6.c | 28 +++++
> 7 files changed, 177 insertions(+), 16 deletions(-)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/connect_force_port.c
> create mode 100644 tools/testing/selftests/bpf/progs/connect_force_port4.c
> create mode 100644 tools/testing/selftests/bpf/progs/connect_force_port6.c
Documentation in include/uapi/linux/bpf.h should be updated as well
since now it states this:
* **AF_INET6**). Looking for a free port to bind to can be
* expensive, therefore binding to port is not permitted by the
* helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
* must be set to zero.
IMO it's also worth to keep a note on performance implications of
setting port to non zero.
> diff --git a/net/core/filter.c b/net/core/filter.c
> index fa9ddab5dd1f..fc5161b9ff6a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4527,29 +4527,24 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
> struct sock *sk = ctx->sk;
> int err;
>
> - /* Binding to port can be expensive so it's prohibited in the helper.
> - * Only binding to IP is supported.
> - */
> err = -EINVAL;
> if (addr_len < offsetofend(struct sockaddr, sa_family))
> return err;
> if (addr->sa_family == AF_INET) {
> if (addr_len < sizeof(struct sockaddr_in))
> return err;
> - if (((struct sockaddr_in *)addr)->sin_port != htons(0))
> - return err;
> return __inet_bind(sk, addr, addr_len,
> + BIND_FROM_BPF |
> BIND_FORCE_ADDRESS_NO_PORT);
Should BIND_FORCE_ADDRESS_NO_PORT be passed only if port is zero?
Passing non zero port and BIND_FORCE_ADDRESS_NO_PORT at the same time
looks confusing (even though it works).
--
Andrey Ignatov
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-04 23:22 ` Andrey Ignatov
@ 2020-05-05 16:02 ` sdf
2020-05-05 17:09 ` sdf
2020-05-05 18:20 ` Andrey Ignatov
0 siblings, 2 replies; 16+ messages in thread
From: sdf @ 2020-05-05 16:02 UTC (permalink / raw)
To: Andrey Ignatov; +Cc: netdev, bpf, davem, ast, daniel
On 05/04, Andrey Ignatov wrote:
> Stanislav Fomichev <sdf@google.com> [Mon, 2020-05-04 10:34 -0700]:
> > We want to have a tighter control on what ports we bind to in
> > the BPF_CGROUP_INET{4,6}_CONNECT hooks even if it means
> > connect() becomes slightly more expensive. The expensive part
> > comes from the fact that we now need to call inet_csk_get_port()
> > that verifies that the port is not used and allocates an entry
> > in the hash table for it.
> FWIW: Initially that IP_BIND_ADDRESS_NO_PORT limitation came from the
> fact that on my specific use-case (mysql client making 200-500 connects
> per sec to mysql server) disabling the option was making application
> pretty much unusable (inet_csk_get_port was taking more time than mysql
> client connect timeout == 3sec).
> But I guess for some use-cases that call sys_connect not too often it
> makes sense.
Yeah, I don't think we plan to reach those QPS numbers.
But, for the record, did you try to bind to a random port in that
case? And did you bail out on error or did a couple of retries?
> > Since we can't rely on "snum || !bind_address_no_port" to prevent
> > us from calling POST_BIND hook anymore, let's add another bind flag
> > to indicate that the call site is BPF program.
> >
> > Cc: Andrey Ignatov <rdna@fb.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> > include/net/inet_common.h | 2 +
> > net/core/filter.c | 9 +-
> > net/ipv4/af_inet.c | 10 +-
> > net/ipv6/af_inet6.c | 12 +-
> > .../bpf/prog_tests/connect_force_port.c | 104 ++++++++++++++++++
> > .../selftests/bpf/progs/connect_force_port4.c | 28 +++++
> > .../selftests/bpf/progs/connect_force_port6.c | 28 +++++
> > 7 files changed, 177 insertions(+), 16 deletions(-)
> > create mode 100644
> tools/testing/selftests/bpf/prog_tests/connect_force_port.c
> > create mode 100644
> tools/testing/selftests/bpf/progs/connect_force_port4.c
> > create mode 100644
> tools/testing/selftests/bpf/progs/connect_force_port6.c
> Documentation in include/uapi/linux/bpf.h should be updated as well
> since now it states this:
> * **AF_INET6**). Looking for a free port to bind to can be
> * expensive, therefore binding to port is not permitted by
> the
> * helper: *addr*\ **->sin_port** (or **sin6_port**,
> respectively)
> * must be set to zero.
> IMO it's also worth to keep a note on performance implications of
> setting port to non zero.
Ah, thank you, will do!
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index fa9ddab5dd1f..fc5161b9ff6a 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -4527,29 +4527,24 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern
> *, ctx, struct sockaddr *, addr,
> > struct sock *sk = ctx->sk;
> > int err;
> >
> > - /* Binding to port can be expensive so it's prohibited in the helper.
> > - * Only binding to IP is supported.
> > - */
> > err = -EINVAL;
> > if (addr_len < offsetofend(struct sockaddr, sa_family))
> > return err;
> > if (addr->sa_family == AF_INET) {
> > if (addr_len < sizeof(struct sockaddr_in))
> > return err;
> > - if (((struct sockaddr_in *)addr)->sin_port != htons(0))
> > - return err;
> > return __inet_bind(sk, addr, addr_len,
> > + BIND_FROM_BPF |
> > BIND_FORCE_ADDRESS_NO_PORT);
> Should BIND_FORCE_ADDRESS_NO_PORT be passed only if port is zero?
> Passing non zero port and BIND_FORCE_ADDRESS_NO_PORT at the same time
> looks confusing (even though it works).
Makes sense, will remove it here, thx.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-05 16:02 ` sdf
@ 2020-05-05 17:09 ` sdf
2020-05-05 17:33 ` Andrey Ignatov
2020-05-05 18:20 ` Andrey Ignatov
1 sibling, 1 reply; 16+ messages in thread
From: sdf @ 2020-05-05 17:09 UTC (permalink / raw)
To: Andrey Ignatov; +Cc: netdev, bpf, davem, ast, daniel
On 05/05, Stanislav Fomichev wrote:
> On 05/04, Andrey Ignatov wrote:
> > Stanislav Fomichev <sdf@google.com> [Mon, 2020-05-04 10:34 -0700]:
> > > [...]
> > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > index fa9ddab5dd1f..fc5161b9ff6a 100644
> > > --- a/net/core/filter.c
> > > +++ b/net/core/filter.c
> > > @@ -4527,29 +4527,24 @@ BPF_CALL_3(bpf_bind, struct
> bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
> > > struct sock *sk = ctx->sk;
> > > int err;
> > >
> > > - /* Binding to port can be expensive so it's prohibited in the
> helper.
> > > - * Only binding to IP is supported.
> > > - */
> > > err = -EINVAL;
> > > if (addr_len < offsetofend(struct sockaddr, sa_family))
> > > return err;
> > > if (addr->sa_family == AF_INET) {
> > > if (addr_len < sizeof(struct sockaddr_in))
> > > return err;
> > > - if (((struct sockaddr_in *)addr)->sin_port != htons(0))
> > > - return err;
> > > return __inet_bind(sk, addr, addr_len,
> > > + BIND_FROM_BPF |
> > > BIND_FORCE_ADDRESS_NO_PORT);
> >
> > Should BIND_FORCE_ADDRESS_NO_PORT be passed only if port is zero?
> > Passing non zero port and BIND_FORCE_ADDRESS_NO_PORT at the same time
> > looks confusing (even though it works).
> Makes sense, will remove it here, thx.
Looking at it some more, I think we need to always have that
BIND_FORCE_ADDRESS_NO_PORT. Otherwise, it might regress your
usecase with zero port:
if (snum || !(inet->bind_address_no_port ||
(flags & BIND_FORCE_ADDRESS_NO_PORT)))
If snum == 0 we want to have either the flag on or
IP_BIND_ADDRESS_NO_PORT being set on the socket to prevent the port
allocation a bind time.
If snum != 0, BIND_FORCE_ADDRESS_NO_PORT doesn't matter and the port
is passed as an argument. We don't need to search for a free one, just
to confirm it's not used.
Am I missing something?
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-05 17:09 ` sdf
@ 2020-05-05 17:33 ` Andrey Ignatov
2020-05-05 17:43 ` sdf
0 siblings, 1 reply; 16+ messages in thread
From: Andrey Ignatov @ 2020-05-05 17:33 UTC (permalink / raw)
To: sdf; +Cc: netdev, bpf, davem, ast, daniel
sdf@google.com <sdf@google.com> [Tue, 2020-05-05 10:09 -0700]:
> On 05/05, Stanislav Fomichev wrote:
> > On 05/04, Andrey Ignatov wrote:
> > > Stanislav Fomichev <sdf@google.com> [Mon, 2020-05-04 10:34 -0700]:
> > > > [...]
> > > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > > index fa9ddab5dd1f..fc5161b9ff6a 100644
> > > > --- a/net/core/filter.c
> > > > +++ b/net/core/filter.c
> > > > @@ -4527,29 +4527,24 @@ BPF_CALL_3(bpf_bind, struct
> > bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
> > > > struct sock *sk = ctx->sk;
> > > > int err;
> > > >
> > > > - /* Binding to port can be expensive so it's prohibited in the
> > helper.
> > > > - * Only binding to IP is supported.
> > > > - */
> > > > err = -EINVAL;
> > > > if (addr_len < offsetofend(struct sockaddr, sa_family))
> > > > return err;
> > > > if (addr->sa_family == AF_INET) {
> > > > if (addr_len < sizeof(struct sockaddr_in))
> > > > return err;
> > > > - if (((struct sockaddr_in *)addr)->sin_port != htons(0))
> > > > - return err;
> > > > return __inet_bind(sk, addr, addr_len,
> > > > + BIND_FROM_BPF |
> > > > BIND_FORCE_ADDRESS_NO_PORT);
> > >
> > > Should BIND_FORCE_ADDRESS_NO_PORT be passed only if port is zero?
> > > Passing non zero port and BIND_FORCE_ADDRESS_NO_PORT at the same time
> > > looks confusing (even though it works).
> > Makes sense, will remove it here, thx.
> Looking at it some more, I think we need to always have that
> BIND_FORCE_ADDRESS_NO_PORT. Otherwise, it might regress your
> usecase with zero port:
>
> if (snum || !(inet->bind_address_no_port ||
> (flags & BIND_FORCE_ADDRESS_NO_PORT)))
>
> If snum == 0 we want to have either the flag on or
> IP_BIND_ADDRESS_NO_PORT being set on the socket to prevent the port
> allocation a bind time.
Yes, if snum == 0 then flag is needed, that's why my previous comment
has "only if port is zero" part.
> If snum != 0, BIND_FORCE_ADDRESS_NO_PORT doesn't matter and the port
> is passed as an argument. We don't need to search for a free one, just
> to confirm it's not used.
Yes, if snum != 0 then flag doesn't matter. So both cases are covered by
your current code and that's what I meant by "(even though it works)".
My point is in the "snum != 0" case it would look better not to pass the
flag since:
1) as we see the flag doesn't matter on one hand;
2) but passing both port number and flag that says "bind only to address,
but not to port" can look confusing and raises a question "which
options wins? the one that sets the port or the one that asks to
ignore the port" and that question can be answered only by looking at
__inet_bind implementation.
so basically what I mean is:
flags = BIND_FROM_BPF;
if (((struct sockaddr_in *)addr)->sin_port == htons(0))
flags &= BIND_FORCE_ADDRESS_NO_PORT;
That won't change anything for "snum == 0" case, but it would make the
"snum != 0" case more readable IMO.
Does it clarify?
--
Andrey Ignatov
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-05 17:33 ` Andrey Ignatov
@ 2020-05-05 17:43 ` sdf
0 siblings, 0 replies; 16+ messages in thread
From: sdf @ 2020-05-05 17:43 UTC (permalink / raw)
To: Andrey Ignatov; +Cc: netdev, bpf, davem, ast, daniel
On 05/05, Andrey Ignatov wrote:
> sdf@google.com <sdf@google.com> [Tue, 2020-05-05 10:09 -0700]:
> > On 05/05, Stanislav Fomichev wrote:
> > > On 05/04, Andrey Ignatov wrote:
> > > > Stanislav Fomichev <sdf@google.com> [Mon, 2020-05-04 10:34 -0700]:
> > > > > [...]
> > > > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > > > index fa9ddab5dd1f..fc5161b9ff6a 100644
> > > > > --- a/net/core/filter.c
> > > > > +++ b/net/core/filter.c
> > > > > @@ -4527,29 +4527,24 @@ BPF_CALL_3(bpf_bind, struct
> > > bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
> > > > > struct sock *sk = ctx->sk;
> > > > > int err;
> > > > >
> > > > > - /* Binding to port can be expensive so it's prohibited in the
> > > helper.
> > > > > - * Only binding to IP is supported.
> > > > > - */
> > > > > err = -EINVAL;
> > > > > if (addr_len < offsetofend(struct sockaddr, sa_family))
> > > > > return err;
> > > > > if (addr->sa_family == AF_INET) {
> > > > > if (addr_len < sizeof(struct sockaddr_in))
> > > > > return err;
> > > > > - if (((struct sockaddr_in *)addr)->sin_port != htons(0))
> > > > > - return err;
> > > > > return __inet_bind(sk, addr, addr_len,
> > > > > + BIND_FROM_BPF |
> > > > > BIND_FORCE_ADDRESS_NO_PORT);
> > > >
> > > > Should BIND_FORCE_ADDRESS_NO_PORT be passed only if port is zero?
> > > > Passing non zero port and BIND_FORCE_ADDRESS_NO_PORT at the same
> time
> > > > looks confusing (even though it works).
> > > Makes sense, will remove it here, thx.
> > Looking at it some more, I think we need to always have that
> > BIND_FORCE_ADDRESS_NO_PORT. Otherwise, it might regress your
> > usecase with zero port:
> >
> > if (snum || !(inet->bind_address_no_port ||
> > (flags & BIND_FORCE_ADDRESS_NO_PORT)))
> >
> > If snum == 0 we want to have either the flag on or
> > IP_BIND_ADDRESS_NO_PORT being set on the socket to prevent the port
> > allocation a bind time.
> Yes, if snum == 0 then flag is needed, that's why my previous comment
> has "only if port is zero" part.
> > If snum != 0, BIND_FORCE_ADDRESS_NO_PORT doesn't matter and the port
> > is passed as an argument. We don't need to search for a free one, just
> > to confirm it's not used.
> Yes, if snum != 0 then flag doesn't matter. So both cases are covered by
> your current code and that's what I meant by "(even though it works)".
> My point is in the "snum != 0" case it would look better not to pass the
> flag since:
> 1) as we see the flag doesn't matter on one hand;
> 2) but passing both port number and flag that says "bind only to address,
> but not to port" can look confusing and raises a question "which
> options wins? the one that sets the port or the one that asks to
> ignore the port" and that question can be answered only by looking at
> __inet_bind implementation.
> so basically what I mean is:
> flags = BIND_FROM_BPF;
> if (((struct sockaddr_in *)addr)->sin_port == htons(0))
> flags &= BIND_FORCE_ADDRESS_NO_PORT;
> That won't change anything for "snum == 0" case, but it would make the
> "snum != 0" case more readable IMO.
> Does it clarify?
Yes, it does, thanks! I somehow missed your 'only if port is zero' part.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH bpf-next 4/4] bpf: allow any port in bpf_bind helper
2020-05-05 16:02 ` sdf
2020-05-05 17:09 ` sdf
@ 2020-05-05 18:20 ` Andrey Ignatov
1 sibling, 0 replies; 16+ messages in thread
From: Andrey Ignatov @ 2020-05-05 18:20 UTC (permalink / raw)
To: sdf; +Cc: netdev, bpf, davem, ast, daniel
sdf@google.com <sdf@google.com> [Tue, 2020-05-05 09:02 -0700]:
> On 05/04, Andrey Ignatov wrote:
> > Stanislav Fomichev <sdf@google.com> [Mon, 2020-05-04 10:34 -0700]:
> > > We want to have a tighter control on what ports we bind to in
> > > the BPF_CGROUP_INET{4,6}_CONNECT hooks even if it means
> > > connect() becomes slightly more expensive. The expensive part
> > > comes from the fact that we now need to call inet_csk_get_port()
> > > that verifies that the port is not used and allocates an entry
> > > in the hash table for it.
>
> > FWIW: Initially that IP_BIND_ADDRESS_NO_PORT limitation came from the
> > fact that on my specific use-case (mysql client making 200-500 connects
> > per sec to mysql server) disabling the option was making application
> > pretty much unusable (inet_csk_get_port was taking more time than mysql
> > client connect timeout == 3sec).
>
> > But I guess for some use-cases that call sys_connect not too often it
> > makes sense.
> Yeah, I don't think we plan to reach those QPS numbers.
> But, for the record, did you try to bind to a random port in that
> case? And did you bail out on error or did a couple of retries?
Random port.
As for retries: no retries on low-level (no reconnecting to that same
server if sys_connect failed), but I don't quite remember how
higher-level behaved (it was choosing a server to connect to according
to some sharding scheme and I don't remember whether it was trying to
connect to next replica or not if current replica failed), sorry.
--
Andrey Ignatov
^ permalink raw reply [flat|nested] 16+ messages in thread