* [PATCH v2 net-next 1/2] af_unix: Put a named socket in the global hash table.
2022-07-02 1:44 [PATCH v2 net-next 0/2] af_unix: Fix regression by the per-netns hash table series Kuniyuki Iwashima
@ 2022-07-02 1:44 ` Kuniyuki Iwashima
2022-07-02 1:44 ` [PATCH v2 net-next 2/2] selftests: net: af_unix: Test connect() with different netns Kuniyuki Iwashima
1 sibling, 0 replies; 3+ messages in thread
From: Kuniyuki Iwashima @ 2022-07-02 1:44 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Sachin Sant, Leonard Crestez, Nathan Chancellor,
Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
accidentally broke user API for named sockets. A named socket was able to
connect to a peer whose file was visible even if they were in different
network namespaces.
The commit puts all sockets into a per-netns hash table. As a result,
connect() to a socket in a different netns fails to find the peer and
returns -ECONNREFUSED even when the task can view the peer socket file.
We can reproduce this issue by:
Console A:
# python3
>>> from socket import *
>>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>>> s.bind('test')
>>> s.listen(32)
Console B:
# ip netns add test
# ip netns exec test sh
# python3
>>> from socket import *
>>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>>> s.connect('test')
Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
filtered only by netns. In other words, even if they are visible and
connectable, all sockets in different netns are skipped while iterating
sockets. Thus, we need a fix only for finding a peer socket.
This patch adds a global hash table for named sockets, links them with
sk_bind_node, and uses it in unix_find_socket_byinode(). By doing so,
we can keep sockets in per-netns hash tables and dump them easily.
Thanks to Sachin Sant and Leonard Crestez for reports, logs and a
reproducer.
Fixes: cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
Reported-by: Sachin Sant <sachinp@linux.ibm.com>
Reported-by: Leonard Crestez <cdleonard@gmail.com>
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/unix/af_unix.c | 47 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 37 insertions(+), 10 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49f6626330c3..526b872cc710 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -119,6 +119,8 @@
#include "scm.h"
static atomic_long_t unix_nr_socks;
+static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
+static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
/* SMP locking strategy:
* hash table is protected with spinlock.
@@ -328,6 +330,24 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
spin_unlock(&net->unx.table.locks[sk->sk_hash]);
}
+static void unix_insert_bsd_socket(struct sock *sk)
+{
+ spin_lock(&bsd_socket_locks[sk->sk_hash]);
+ sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
+ spin_unlock(&bsd_socket_locks[sk->sk_hash]);
+}
+
+static void unix_remove_bsd_socket(struct sock *sk)
+{
+ if (!hlist_unhashed(&sk->sk_bind_node)) {
+ spin_lock(&bsd_socket_locks[sk->sk_hash]);
+ __sk_del_bind_node(sk);
+ spin_unlock(&bsd_socket_locks[sk->sk_hash]);
+
+ sk_node_init(&sk->sk_bind_node);
+ }
+}
+
static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
int len, unsigned int hash)
@@ -358,22 +378,22 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
return s;
}
-static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+static struct sock *unix_find_socket_byinode(struct inode *i)
{
unsigned int hash = unix_bsd_hash(i);
struct sock *s;
- spin_lock(&net->unx.table.locks[hash]);
- sk_for_each(s, &net->unx.table.buckets[hash]) {
+ spin_lock(&bsd_socket_locks[hash]);
+ sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
- spin_unlock(&net->unx.table.locks[hash]);
+ spin_unlock(&bsd_socket_locks[hash]);
return s;
}
}
- spin_unlock(&net->unx.table.locks[hash]);
+ spin_unlock(&bsd_socket_locks[hash]);
return NULL;
}
@@ -577,6 +597,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
int state;
unix_remove_socket(sock_net(sk), sk);
+ unix_remove_bsd_socket(sk);
/* Clear state */
unix_state_lock(sk);
@@ -988,8 +1009,8 @@ static int unix_release(struct socket *sock)
return 0;
}
-static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
- int addr_len, int type)
+static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
+ int type)
{
struct inode *inode;
struct path path;
@@ -1010,7 +1031,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
if (!S_ISSOCK(inode->i_mode))
goto path_put;
- sk = unix_find_socket_byinode(net, inode);
+ sk = unix_find_socket_byinode(inode);
if (!sk)
goto path_put;
@@ -1058,7 +1079,7 @@ static struct sock *unix_find_other(struct net *net,
struct sock *sk;
if (sunaddr->sun_path[0])
- sk = unix_find_bsd(net, sunaddr, addr_len, type);
+ sk = unix_find_bsd(sunaddr, addr_len, type);
else
sk = unix_find_abstract(net, sunaddr, addr_len, type);
@@ -1179,6 +1200,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
u->path.dentry = dget(dentry);
__unix_set_addr_hash(net, sk, addr, new_hash);
unix_table_double_unlock(net, old_hash, new_hash);
+ unix_insert_bsd_socket(sk);
mutex_unlock(&u->bindlock);
done_path_create(&parent, dentry);
return 0;
@@ -3682,10 +3704,15 @@ static void __init bpf_iter_register(void)
static int __init af_unix_init(void)
{
- int rc = -1;
+ int i, rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
+ for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
+ spin_lock_init(&bsd_socket_locks[i]);
+ INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
+ }
+
rc = proto_register(&unix_dgram_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
--
2.30.2
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2 net-next 2/2] selftests: net: af_unix: Test connect() with different netns.
2022-07-02 1:44 [PATCH v2 net-next 0/2] af_unix: Fix regression by the per-netns hash table series Kuniyuki Iwashima
2022-07-02 1:44 ` [PATCH v2 net-next 1/2] af_unix: Put a named socket in the global hash table Kuniyuki Iwashima
@ 2022-07-02 1:44 ` Kuniyuki Iwashima
1 sibling, 0 replies; 3+ messages in thread
From: Kuniyuki Iwashima @ 2022-07-02 1:44 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Sachin Sant, Leonard Crestez, Nathan Chancellor,
Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
This patch add a test that checks connect()ivity between two sockets:
unnamed socket -> bound socket
* SOCK_STREAM or SOCK_DGRAM
* pathname or abstract
* same or different netns
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
tools/testing/selftests/net/.gitignore | 1 +
tools/testing/selftests/net/af_unix/Makefile | 3 +-
.../selftests/net/af_unix/unix_connect.c | 149 ++++++++++++++++++
3 files changed, 152 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/net/af_unix/unix_connect.c
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index a29f79618934..1257baa79286 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -37,3 +37,4 @@ gro
ioam6_parser
toeplitz
cmsg_sender
+unix_connect
\ No newline at end of file
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index df341648f818..969620ae9928 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,2 +1,3 @@
-TEST_GEN_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob unix_connect
+
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
new file mode 100644
index 000000000000..5b231d8c4683
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(unix_connect)
+{
+ int server, client;
+ int family;
+};
+
+FIXTURE_VARIANT(unix_connect)
+{
+ int type;
+ char sun_path[8];
+ int len;
+ int flags;
+ int err;
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = CLONE_NEWNET,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = CLONE_NEWNET,
+ .err = ECONNREFUSED,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = CLONE_NEWNET,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = CLONE_NEWNET,
+ .err = ECONNREFUSED,
+};
+
+FIXTURE_SETUP(unix_connect)
+{
+ self->family = AF_UNIX;
+}
+
+FIXTURE_TEARDOWN(unix_connect)
+{
+ close(self->server);
+ close(self->client);
+
+ if (variant->sun_path[0])
+ remove("test");
+}
+
+#define offsetof(type, member) ((size_t)&((type *)0)->member)
+
+TEST_F(unix_connect, test)
+{
+ socklen_t addrlen;
+ struct sockaddr_un addr = {
+ .sun_family = self->family,
+ };
+ int err;
+
+ self->server = socket(self->family, variant->type, 0);
+ ASSERT_NE(-1, self->server);
+
+ addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len;
+ memcpy(&addr.sun_path, variant->sun_path, variant->len);
+
+ err = bind(self->server, (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, err);
+
+ if (variant->type == SOCK_STREAM) {
+ err = listen(self->server, 32);
+ ASSERT_EQ(0, err);
+ }
+
+ err = unshare(variant->flags);
+ ASSERT_EQ(0, err);
+
+ self->client = socket(self->family, variant->type, 0);
+ ASSERT_LT(0, self->client);
+
+ err = connect(self->client, (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(variant->err, err == -1 ? errno : 0);
+}
+
+TEST_HARNESS_MAIN
--
2.30.2
^ permalink raw reply related [flat|nested] 3+ messages in thread