From: Christian Brauner <brauner@kernel.org>
To: Eric Dumazet <edumazet@google.com>,
Kuniyuki Iwashima <kuniyu@amazon.com>,
Oleg Nesterov <oleg@redhat.com>,
linux-fsdevel@vger.kernel.org, Jann Horn <jannh@google.com>
Cc: "David S. Miller" <davem@davemloft.net>,
"Alexander Viro" <viro@zeniv.linux.org.uk>,
"Daan De Meyer" <daan.j.demeyer@gmail.com>,
"David Rheinsberg" <david@readahead.eu>,
"Jakub Kicinski" <kuba@kernel.org>, "Jan Kara" <jack@suse.cz>,
"Lennart Poettering" <lennart@poettering.net>,
"Luca Boccassi" <bluca@debian.org>, "Mike Yuan" <me@yhndnzj.com>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>,
"Zbigniew Jędrzejewski-Szmek" <zbyszek@in.waw.pl>,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
"Christian Brauner" <brauner@kernel.org>,
"Alexander Mikhalitsyn" <alexander@mihalicyn.com>
Subject: [PATCH RFC v3 03/10] net: reserve prefix
Date: Mon, 05 May 2025 13:13:41 +0200 [thread overview]
Message-ID: <20250505-work-coredump-socket-v3-3-e1832f0e1eae@kernel.org> (raw)
In-Reply-To: <20250505-work-coredump-socket-v3-0-e1832f0e1eae@kernel.org>
Add the reserved "linuxafsk/" prefix for AF_UNIX sockets and require
CAP_NET_ADMIN in the owning user namespace of the network namespace to
bind it. This will be used in next patches to support the coredump
socket but is a generally useful concept.
The collision risk is so low that we can just start using it. Userspace
must already be prepared to retry if a given abstract address isn't
usable anyway.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
include/uapi/linux/un.h | 2 ++
net/unix/af_unix.c | 45 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/un.h b/include/uapi/linux/un.h
index 0ad59dc8b686..bbd5ad508dfa 100644
--- a/include/uapi/linux/un.h
+++ b/include/uapi/linux/un.h
@@ -5,6 +5,8 @@
#include <linux/socket.h>
#define UNIX_PATH_MAX 108
+/* reserved AF_UNIX socket namespace. */
+#define UNIX_SOCKET_NAMESPACE "linuxafsk/"
struct sockaddr_un {
__kernel_sa_family_t sun_family; /* AF_UNIX */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 472f8aa9ea15..edc2f143f401 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,9 @@ static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
+static struct sockaddr_un linuxafsk_addr;
+static size_t linuxafsk_addr_len;
+
/* SMP locking strategy:
* hash table is protected with spinlock.
* each socket state is protected by separate spinlock.
@@ -436,6 +439,30 @@ static struct sock *__unix_find_socket_byname(struct net *net,
return NULL;
}
+static int unix_may_bind_name(struct net *net, struct sockaddr_un *sunname,
+ int len, unsigned int hash)
+{
+ struct sock *s;
+
+ s = __unix_find_socket_byname(net, sunname, len, hash);
+ if (s)
+ return -EADDRINUSE;
+
+ /*
+ * Check whether this is our reserved prefix and if so ensure
+ * that only privileged processes can bind it.
+ */
+ if (linuxafsk_addr_len <= len &&
+ !memcmp(&linuxafsk_addr, sunname, linuxafsk_addr_len)) {
+ /* Don't bind the namespace itself. */
+ if (linuxafsk_addr_len == len)
+ return -ECONNREFUSED;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -ECONNREFUSED;
+ }
+ return 0;
+}
+
static inline struct sock *unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
int len, unsigned int hash)
@@ -1258,10 +1285,10 @@ static int unix_autobind(struct sock *sk)
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
+ if (unix_may_bind_name(net, addr->name, addr->len, new_hash)) {
unix_table_double_unlock(net, old_hash, new_hash);
- /* __unix_find_socket_byname() may take long time if many names
+ /* unix_may_bind_name() may take long time if many names
* are already in use.
*/
cond_resched();
@@ -1379,7 +1406,8 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
+ err = unix_may_bind_name(net, addr->name, addr->len, new_hash);
+ if (err)
goto out_spin;
__unix_set_addr_hash(net, sk, addr, new_hash);
@@ -1389,7 +1417,6 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
out_spin:
unix_table_double_unlock(net, old_hash, new_hash);
- err = -EADDRINUSE;
out_mutex:
mutex_unlock(&u->bindlock);
out:
@@ -3841,6 +3868,16 @@ static int __init af_unix_init(void)
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
+ /*
+ * We need a leading NUL byte for the abstract namespace. Just
+ * use the trailing one given by sizeof().
+ */
+ linuxafsk_addr_len = offsetof(struct sockaddr_un, sun_path) + sizeof(UNIX_SOCKET_NAMESPACE);
+ linuxafsk_addr.sun_family = AF_UNIX;
+ memcpy(linuxafsk_addr.sun_path + 1, UNIX_SOCKET_NAMESPACE, sizeof(UNIX_SOCKET_NAMESPACE) - 1);
+ /* Technically not needed, but let's be explicit. */
+ linuxafsk_addr.sun_path[0] = '\0';
+
for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
spin_lock_init(&bsd_socket_locks[i]);
INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
--
2.47.2
next prev parent reply other threads:[~2025-05-05 11:14 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-05 11:13 [PATCH RFC v3 00/10] coredump: add coredump socket Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 01/10] coredump: massage format_corname() Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 02/10] coredump: massage do_coredump() Christian Brauner
2025-05-05 11:13 ` Christian Brauner [this message]
2025-05-05 11:13 ` [PATCH RFC v3 04/10] coredump: add coredump socket Christian Brauner
2025-05-05 12:55 ` Jann Horn
2025-05-05 13:06 ` Luca Boccassi
2025-05-05 14:46 ` Christian Brauner
2025-05-05 18:48 ` Kuniyuki Iwashima
2025-05-06 8:24 ` Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 05/10] coredump: validate socket name as it is written Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 06/10] coredump: show supported coredump modes Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 07/10] pidfs, coredump: add PIDFD_INFO_COREDUMP Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 08/10] net, pidfs, coredump: only allow coredumping tasks to connect to coredump socket Christian Brauner
2025-05-05 13:08 ` Jann Horn
2025-05-05 14:06 ` Christian Brauner
2025-05-05 18:40 ` Kuniyuki Iwashima
2025-05-05 19:10 ` Jann Horn
2025-05-05 19:35 ` Kuniyuki Iwashima
2025-05-05 19:44 ` Kuniyuki Iwashima
2025-05-05 19:55 ` Jann Horn
2025-05-05 20:41 ` Kuniyuki Iwashima
2025-05-06 7:39 ` Christian Brauner
2025-05-06 14:51 ` Jann Horn
2025-05-06 15:16 ` Christian Brauner
2025-05-06 19:28 ` Kuniyuki Iwashima
2025-05-07 11:50 ` Mickaël Salaün
2025-05-05 19:55 ` Jann Horn
2025-05-05 20:30 ` Kuniyuki Iwashima
2025-05-06 8:06 ` Christian Brauner
2025-05-06 14:37 ` Jann Horn
2025-05-06 19:18 ` Kuniyuki Iwashima
2025-05-07 11:51 ` Mickaël Salaün
2025-05-07 14:22 ` Lennart Poettering
2025-05-07 22:10 ` Paul Moore
2025-05-05 11:13 ` [PATCH RFC v3 09/10] selftests/pidfd: add PIDFD_INFO_COREDUMP infrastructure Christian Brauner
2025-05-05 11:13 ` [PATCH RFC v3 10/10] selftests/coredump: add tests for AF_UNIX coredumps Christian Brauner
2025-05-05 14:41 ` [PATCH RFC v3 00/10] coredump: add coredump socket Mickaël Salaün
2025-05-05 14:56 ` Christian Brauner
2025-05-05 15:38 ` Mickaël Salaün
2025-05-05 14:59 ` Jann Horn
2025-05-05 15:39 ` Mickaël Salaün
2025-05-05 18:33 ` Kuniyuki Iwashima
2025-05-06 7:33 ` Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250505-work-coredump-socket-v3-3-e1832f0e1eae@kernel.org \
--to=brauner@kernel.org \
--cc=alexander@mihalicyn.com \
--cc=bluca@debian.org \
--cc=daan.j.demeyer@gmail.com \
--cc=davem@davemloft.net \
--cc=david@readahead.eu \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=jack@suse.cz \
--cc=jannh@google.com \
--cc=kuba@kernel.org \
--cc=kuniyu@amazon.com \
--cc=lennart@poettering.net \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=me@yhndnzj.com \
--cc=netdev@vger.kernel.org \
--cc=oleg@redhat.com \
--cc=pabeni@redhat.com \
--cc=viro@zeniv.linux.org.uk \
--cc=zbyszek@in.waw.pl \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.