netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Lee Jones <lee@kernel.org>
To: lee@kernel.org, "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Kuniyuki Iwashima <kuniyu@amazon.com>,
	Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>,
	Michal Luczaj <mhal@rbox.co>, Rao Shoaib <Rao.Shoaib@oracle.com>,
	Pavel Begunkov <asml.silence@gmail.com>,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: stable@vger.kernel.org
Subject: [PATCH v6.6 07/26] af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd.
Date: Wed, 21 May 2025 14:45:15 +0000	[thread overview]
Message-ID: <20250521144803.2050504-8-lee@kernel.org> (raw)
In-Reply-To: <20250521144803.2050504-1-lee@kernel.org>

From: Kuniyuki Iwashima <kuniyu@amazon.com>

[ Upstream commit 1fbfdfaa590248c1d86407f578e40e5c65136330 ]

We will replace the garbage collection algorithm for AF_UNIX, where
we will consider each inflight AF_UNIX socket as a vertex and its file
descriptor as an edge in a directed graph.

This patch introduces a new struct unix_vertex representing a vertex
in the graph and adds its pointer to struct unix_sock.

When we send a fd using the SCM_RIGHTS message, we allocate struct
scm_fp_list to struct scm_cookie in scm_fp_copy().  Then, we bump
each refcount of the inflight fds' struct file and save them in
scm_fp_list.fp.

After that, unix_attach_fds() inexplicably clones scm_fp_list of
scm_cookie and sets it to skb.  (We will remove this part after
replacing GC.)

Here, we add a new function call in unix_attach_fds() to preallocate
struct unix_vertex per inflight AF_UNIX fd and link each vertex to
skb's scm_fp_list.vertices.

When sendmsg() succeeds later, if the socket of the inflight fd is
still not inflight yet, we will set the preallocated vertex to struct
unix_sock.vertex and link it to a global list unix_unvisited_vertices
under spin_lock(&unix_gc_lock).

If the socket is already inflight, we free the preallocated vertex.
This is to avoid taking the lock unnecessarily when sendmsg() could
fail later.

In the following patch, we will similarly allocate another struct
per edge, which will finally be linked to the inflight socket's
unix_vertex.edges.

And then, we will count the number of edges as unix_vertex.out_degree.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240325202425.60930-2-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 1fbfdfaa590248c1d86407f578e40e5c65136330)
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/net/af_unix.h |  9 +++++++++
 include/net/scm.h     |  3 +++
 net/core/scm.c        |  7 +++++++
 net/unix/af_unix.c    |  6 ++++++
 net/unix/garbage.c    | 38 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 63 insertions(+)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 3dee0b2721aa4..07f0f698c9490 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -22,9 +22,17 @@ extern unsigned int unix_tot_inflight;
 
 void unix_inflight(struct user_struct *user, struct file *fp);
 void unix_notinflight(struct user_struct *user, struct file *fp);
+int unix_prepare_fpl(struct scm_fp_list *fpl);
+void unix_destroy_fpl(struct scm_fp_list *fpl);
 void unix_gc(void);
 void wait_for_unix_gc(struct scm_fp_list *fpl);
 
+struct unix_vertex {
+	struct list_head edges;
+	struct list_head entry;
+	unsigned long out_degree;
+};
+
 struct sock *unix_peer_get(struct sock *sk);
 
 #define UNIX_HASH_MOD	(256 - 1)
@@ -62,6 +70,7 @@ struct unix_sock {
 	struct path		path;
 	struct mutex		iolock, bindlock;
 	struct sock		*peer;
+	struct unix_vertex	*vertex;
 	struct list_head	link;
 	unsigned long		inflight;
 	spinlock_t		lock;
diff --git a/include/net/scm.h b/include/net/scm.h
index 1ff6a28550644..11e86e55f332d 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -26,6 +26,9 @@ struct scm_fp_list {
 	short			count;
 	short			count_unix;
 	short			max;
+#ifdef CONFIG_UNIX
+	struct list_head	vertices;
+#endif
 	struct user_struct	*user;
 	struct file		*fp[SCM_MAX_FD];
 };
diff --git a/net/core/scm.c b/net/core/scm.c
index 574607b1c2d96..27e5634c958e8 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -89,6 +89,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 		fpl->count_unix = 0;
 		fpl->max = SCM_MAX_FD;
 		fpl->user = NULL;
+#if IS_ENABLED(CONFIG_UNIX)
+		INIT_LIST_HEAD(&fpl->vertices);
+#endif
 	}
 	fpp = &fpl->fp[fpl->count];
 
@@ -376,8 +379,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	if (new_fpl) {
 		for (i = 0; i < fpl->count; i++)
 			get_file(fpl->fp[i]);
+
 		new_fpl->max = new_fpl->count;
 		new_fpl->user = get_uid(fpl->user);
+#if IS_ENABLED(CONFIG_UNIX)
+		INIT_LIST_HEAD(&new_fpl->vertices);
+#endif
 	}
 	return new_fpl;
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 78758af2c6f38..6d62fa5b0e68d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -979,6 +979,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_destruct		= unix_sock_destructor;
 	u = unix_sk(sk);
 	u->inflight = 0;
+	u->vertex = NULL;
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
@@ -1782,6 +1783,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
 
+	if (unix_prepare_fpl(UNIXCB(skb).fp))
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -1792,6 +1796,8 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	scm->fp = UNIXCB(skb).fp;
 	UNIXCB(skb).fp = NULL;
 
+	unix_destroy_fpl(scm->fp);
+
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
 }
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 0104be9d47045..8ea7640e032e8 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -101,6 +101,44 @@ struct unix_sock *unix_get_socket(struct file *filp)
 	return NULL;
 }
 
+static void unix_free_vertices(struct scm_fp_list *fpl)
+{
+	struct unix_vertex *vertex, *next_vertex;
+
+	list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
+		list_del(&vertex->entry);
+		kfree(vertex);
+	}
+}
+
+int unix_prepare_fpl(struct scm_fp_list *fpl)
+{
+	struct unix_vertex *vertex;
+	int i;
+
+	if (!fpl->count_unix)
+		return 0;
+
+	for (i = 0; i < fpl->count_unix; i++) {
+		vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
+		if (!vertex)
+			goto err;
+
+		list_add(&vertex->entry, &fpl->vertices);
+	}
+
+	return 0;
+
+err:
+	unix_free_vertices(fpl);
+	return -ENOMEM;
+}
+
+void unix_destroy_fpl(struct scm_fp_list *fpl)
+{
+	unix_free_vertices(fpl);
+}
+
 DEFINE_SPINLOCK(unix_gc_lock);
 unsigned int unix_tot_inflight;
 static LIST_HEAD(gc_candidates);
-- 
2.49.0.1112.g889b7c5bd8-goog


  parent reply	other threads:[~2025-05-21 14:50 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-21 14:45 [PATCH v6.6 00/26] af_unix: Align with upstream to avoid a potential UAF Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 01/26] af_unix: Return struct unix_sock from unix_get_socket() Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 02/26] af_unix: Run GC on only one CPU Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 03/26] af_unix: Try to run GC async Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 04/26] af_unix: Replace BUG_ON() with WARN_ON_ONCE() Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 05/26] af_unix: Remove io_uring code for GC Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 06/26] af_unix: Remove CONFIG_UNIX_SCM Lee Jones
2025-05-21 14:45 ` Lee Jones [this message]
2025-05-21 14:45 ` [PATCH v6.6 08/26] af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 09/26] af_unix: Link struct unix_edge when queuing skb Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 10/26] af_unix: Bulk update unix_tot_inflight/unix_inflight " Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 11/26] af_unix: Iterate all vertices by DFS Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 12/26] af_unix: Detect Strongly Connected Components Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 13/26] af_unix: Save listener for embryo socket Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 14/26] af_unix: Fix up unix_edge.successor " Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 15/26] af_unix: Save O(n) setup of Tarjan's algo Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 16/26] af_unix: Skip GC if no cycle exists Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 17/26] af_unix: Avoid Tarjan's algorithm if unnecessary Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 18/26] af_unix: Assign a unique index to SCC Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 19/26] af_unix: Detect dead SCC Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 20/26] af_unix: Replace garbage collection algorithm Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 21/26] af_unix: Remove lock dance in unix_peek_fds() Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 22/26] af_unix: Try not to hold unix_gc_lock during accept() Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 23/26] af_unix: Don't access successor in unix_del_edges() during GC Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 24/26] af_unix: Add dead flag to struct scm_fp_list Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 25/26] af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS Lee Jones
2025-05-21 14:45 ` [PATCH v6.6 26/26] af_unix: Fix uninit-value in __unix_walk_scc() Lee Jones
2025-05-29 12:26 ` [PATCH v6.6 00/26] af_unix: Align with upstream to avoid a potential UAF Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250521144803.2050504-8-lee@kernel.org \
    --to=lee@kernel.org \
    --cc=Rao.Shoaib@oracle.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=kuniyu@amazon.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhal@rbox.co \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).