linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christian Brauner <brauner@kernel.org>
To: Kuniyuki Iwashima <kuniyu@amazon.com>,
	linux-fsdevel@vger.kernel.org,  Jann Horn <jannh@google.com>
Cc: "Eric Dumazet" <edumazet@google.com>,
	"Oleg Nesterov" <oleg@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Daan De Meyer" <daan.j.demeyer@gmail.com>,
	"David Rheinsberg" <david@readahead.eu>,
	"Jakub Kicinski" <kuba@kernel.org>, "Jan Kara" <jack@suse.cz>,
	"Lennart Poettering" <lennart@poettering.net>,
	"Luca Boccassi" <bluca@debian.org>, "Mike Yuan" <me@yhndnzj.com>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Simon Horman" <horms@kernel.org>,
	"Zbigniew Jędrzejewski-Szmek" <zbyszek@in.waw.pl>,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	"Christian Brauner" <brauner@kernel.org>,
	"Alexander Mikhalitsyn" <alexander@mihalicyn.com>
Subject: [PATCH v4 04/11] net: reserve prefix
Date: Wed, 07 May 2025 18:13:37 +0200	[thread overview]
Message-ID: <20250507-work-coredump-socket-v4-4-af0ef317b2d0@kernel.org> (raw)
In-Reply-To: <20250507-work-coredump-socket-v4-0-af0ef317b2d0@kernel.org>

Add the reserved "linuxafsk/" prefix for AF_UNIX sockets and require
CAP_NET_ADMIN in the owning user namespace of the network namespace to
bind it. This will be used in next patches to support the coredump
socket but is a generally useful concept.

The collision risk is so low that we can just start using it. Userspace
must already be prepared to retry if a given abstract address isn't
usable anyway.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/un.h |  2 ++
 net/unix/af_unix.c      | 39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/un.h b/include/uapi/linux/un.h
index 0ad59dc8b686..bbd5ad508dfa 100644
--- a/include/uapi/linux/un.h
+++ b/include/uapi/linux/un.h
@@ -5,6 +5,8 @@
 #include <linux/socket.h>
 
 #define UNIX_PATH_MAX	108
+/* reserved AF_UNIX socket namespace. */
+#define UNIX_SOCKET_NAMESPACE "linuxafsk/"
 
 struct sockaddr_un {
 	__kernel_sa_family_t sun_family; /* AF_UNIX */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 472f8aa9ea15..148d008862e7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,13 @@ static atomic_long_t unix_nr_socks;
 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
 
+static const struct sockaddr_un linuxafsk_addr = {
+	.sun_family = AF_UNIX,
+	.sun_path = "\0"UNIX_SOCKET_NAMESPACE,
+};
+
+#define UNIX_SOCKET_NAMESPACE_ADDR_LEN (offsetof(struct sockaddr_un, sun_path) + sizeof(UNIX_SOCKET_NAMESPACE))
+
 /* SMP locking strategy:
  *    hash table is protected with spinlock.
  *    each socket state is protected by separate spinlock.
@@ -436,6 +443,30 @@ static struct sock *__unix_find_socket_byname(struct net *net,
 	return NULL;
 }
 
+static int unix_may_bind_name(struct net *net, struct sockaddr_un *sunname,
+			      int len, unsigned int hash)
+{
+	struct sock *s;
+
+	s = __unix_find_socket_byname(net, sunname, len, hash);
+	if (s)
+		return -EADDRINUSE;
+
+	/*
+	 * Check whether this is our reserved prefix and if so ensure
+	 * that only privileged processes can bind it.
+	 */
+	if (UNIX_SOCKET_NAMESPACE_ADDR_LEN <= len &&
+	    !memcmp(&linuxafsk_addr, sunname, UNIX_SOCKET_NAMESPACE_ADDR_LEN)) {
+		/* Don't bind the namespace itself. */
+		if (UNIX_SOCKET_NAMESPACE_ADDR_LEN == len)
+			return -ECONNREFUSED;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -ECONNREFUSED;
+	}
+	return 0;
+}
+
 static inline struct sock *unix_find_socket_byname(struct net *net,
 						   struct sockaddr_un *sunname,
 						   int len, unsigned int hash)
@@ -1258,10 +1289,10 @@ static int unix_autobind(struct sock *sk)
 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
 	unix_table_double_lock(net, old_hash, new_hash);
 
-	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
+	if (unix_may_bind_name(net, addr->name, addr->len, new_hash)) {
 		unix_table_double_unlock(net, old_hash, new_hash);
 
-		/* __unix_find_socket_byname() may take long time if many names
+		/* unix_may_bind_name() may take long time if many names
 		 * are already in use.
 		 */
 		cond_resched();
@@ -1379,7 +1410,8 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
 	unix_table_double_lock(net, old_hash, new_hash);
 
-	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
+	err = unix_may_bind_name(net, addr->name, addr->len, new_hash);
+	if (err)
 		goto out_spin;
 
 	__unix_set_addr_hash(net, sk, addr, new_hash);
@@ -1389,7 +1421,6 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
 
 out_spin:
 	unix_table_double_unlock(net, old_hash, new_hash);
-	err = -EADDRINUSE;
 out_mutex:
 	mutex_unlock(&u->bindlock);
 out:

-- 
2.47.2


  parent reply	other threads:[~2025-05-07 16:14 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-07 16:13 [PATCH v4 00/11] coredump: add coredump socket Christian Brauner
2025-05-07 16:13 ` [PATCH v4 01/11] coredump: massage format_corname() Christian Brauner
2025-05-07 16:13 ` [PATCH v4 02/11] coredump: massage do_coredump() Christian Brauner
2025-05-07 16:13 ` [PATCH v4 03/11] coredump: reflow dump helpers a little Christian Brauner
2025-05-07 16:13 ` Christian Brauner [this message]
2025-05-07 22:45   ` [PATCH v4 04/11] net: reserve prefix Kuniyuki Iwashima
2025-05-08  6:16     ` Christian Brauner
2025-05-08 21:47       ` Kuniyuki Iwashima
2025-05-09  5:54         ` Christian Brauner
2025-05-09  8:07           ` Daniel Borkmann
2025-05-07 16:13 ` [PATCH v4 05/11] coredump: add coredump socket Christian Brauner
2025-05-07 16:13 ` [PATCH v4 06/11] coredump: validate socket name as it is written Christian Brauner
2025-05-07 16:13 ` [PATCH v4 07/11] coredump: show supported coredump modes Christian Brauner
2025-05-07 16:13 ` [PATCH v4 08/11] pidfs, coredump: add PIDFD_INFO_COREDUMP Christian Brauner
2025-05-07 16:13 ` [PATCH v4 09/11] pidfs, coredump: allow to verify coredump connection Christian Brauner
2025-05-07 18:34   ` Mickaël Salaün
2025-05-07 16:13 ` [PATCH v4 10/11] selftests/pidfd: add PIDFD_INFO_COREDUMP infrastructure Christian Brauner
2025-05-07 16:13 ` [PATCH v4 11/11] selftests/coredump: add tests for AF_UNIX coredumps Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250507-work-coredump-socket-v4-4-af0ef317b2d0@kernel.org \
    --to=brauner@kernel.org \
    --cc=alexander@mihalicyn.com \
    --cc=bluca@debian.org \
    --cc=daan.j.demeyer@gmail.com \
    --cc=davem@davemloft.net \
    --cc=david@readahead.eu \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=kuba@kernel.org \
    --cc=kuniyu@amazon.com \
    --cc=lennart@poettering.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=me@yhndnzj.com \
    --cc=netdev@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=pabeni@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=zbyszek@in.waw.pl \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).