From: Christian Brauner <brauner@kernel.org>
To: Oleg Nesterov <oleg@redhat.com>,
Kuniyuki Iwashima <kuniyu@amazon.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
netdev@vger.kernel.org, David Rheinsberg <david@readahead.eu>,
Jan Kara <jack@suse.cz>,
Alexander Mikhalitsyn <alexander@mihalicyn.com>,
Luca Boccassi <bluca@debian.org>,
Lennart Poettering <lennart@poettering.net>,
Daan De Meyer <daan.j.demeyer@gmail.com>,
Mike Yuan <me@yhndnzj.com>,
Christian Brauner <brauner@kernel.org>
Subject: [PATCH v2 3/4] pidfs: get rid of __pidfd_prepare()
Date: Fri, 25 Apr 2025 10:11:32 +0200 [thread overview]
Message-ID: <20250425-work-pidfs-net-v2-3-450a19461e75@kernel.org> (raw)
In-Reply-To: <20250425-work-pidfs-net-v2-0-450a19461e75@kernel.org>
Fold it into pidfd_prepare() and rename PIDFD_CLONE to PIDFD_STALE to
indicate that the passed pid might not have task linkage and no explicit
check for that should be performed.
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/pidfs.c | 22 +++++++-----
include/linux/pid.h | 2 +-
include/uapi/linux/pidfd.h | 2 +-
kernel/fork.c | 83 ++++++++++++++++------------------------------
4 files changed, 44 insertions(+), 65 deletions(-)
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 308792d4b11a..0afaffd5a18a 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -768,7 +768,7 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
{
enum pid_type type;
- if (flags & PIDFD_CLONE)
+ if (flags & PIDFD_STALE)
return true;
/*
@@ -777,10 +777,14 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
* pidfd has been allocated perform another check that the pid
* is still alive. If it is exit information is available even
* if the task gets reaped before the pidfd is returned to
- * userspace. The only exception is PIDFD_CLONE where no task
- * linkage has been established for @pid yet and the kernel is
- * in the middle of process creation so there's nothing for
- * pidfs to miss.
+ * userspace. The only exception are indicated by PIDFD_STALE:
+ *
+ * (1) The kernel is in the middle of task creation and thus no
+ * task linkage has been established yet.
+ * (2) The caller knows @pid has been registered in pidfs at a
+ * time when the task was still alive.
+ *
+ * In both cases exit information will have been reported.
*/
if (flags & PIDFD_THREAD)
type = PIDTYPE_PID;
@@ -874,11 +878,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
int ret;
/*
- * Ensure that PIDFD_CLONE can be passed as a flag without
+ * Ensure that PIDFD_STALE can be passed as a flag without
* overloading other uapi pidfd flags.
*/
- BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD);
- BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK);
+ BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
+ BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
if (ret < 0)
@@ -887,7 +891,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
if (!pidfs_pid_valid(pid, &path, flags))
return ERR_PTR(-ESRCH);
- flags &= ~PIDFD_CLONE;
+ flags &= ~PIDFD_STALE;
pidfd_file = dentry_open(&path, flags, current_cred());
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
if (!IS_ERR(pidfd_file))
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 311ecebd7d56..453ae6d8a68d 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,7 +77,7 @@ struct file;
struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
-int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file);
void do_notify_pidfd(struct task_struct *task);
static inline struct pid *get_pid(struct pid *pid)
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 2970ef44655a..8c1511edd0e9 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -12,7 +12,7 @@
#define PIDFD_THREAD O_EXCL
#ifdef __KERNEL__
#include <linux/sched.h>
-#define PIDFD_CLONE CLONE_PIDFD
+#define PIDFD_STALE CLONE_PIDFD
#endif
/* Flags for pidfd_send_signal(). */
diff --git a/kernel/fork.c b/kernel/fork.c
index f7403e1fb0d4..1d95f4dae327 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2035,55 +2035,11 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
-/**
- * __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
- * @pid: the struct pid for which to create a pidfd
- * @flags: flags of the new @pidfd
- * @ret: Where to return the file for the pidfd.
- *
- * Allocate a new file that stashes @pid and reserve a new pidfd number in the
- * caller's file descriptor table. The pidfd is reserved but not installed yet.
- *
- * The helper doesn't perform checks on @pid which makes it useful for pidfds
- * created via CLONE_PIDFD where @pid has no task attached when the pidfd and
- * pidfd file are prepared.
- *
- * If this function returns successfully the caller is responsible to either
- * call fd_install() passing the returned pidfd and pidfd file as arguments in
- * order to install the pidfd into its file descriptor table or they must use
- * put_unused_fd() and fput() on the returned pidfd and pidfd file
- * respectively.
- *
- * This function is useful when a pidfd must already be reserved but there
- * might still be points of failure afterwards and the caller wants to ensure
- * that no pidfd is leaked into its file descriptor table.
- *
- * Return: On success, a reserved pidfd is returned from the function and a new
- * pidfd file is returned in the last argument to the function. On
- * error, a negative error code is returned from the function and the
- * last argument remains unchanged.
- */
-static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
-{
- struct file *pidfd_file;
-
- CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
- if (pidfd < 0)
- return pidfd;
-
- pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
- if (IS_ERR(pidfd_file))
- return PTR_ERR(pidfd_file);
-
- *ret = pidfd_file;
- return take_fd(pidfd);
-}
-
/**
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
- * @ret: Where to return the pidfd.
+ * @ret_file: return the new pidfs file
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
@@ -2106,16 +2062,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
-int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
+int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file)
{
+ struct file *pidfs_file;
+
/*
- * While holding the pidfd waitqueue lock removing the task
- * linkage for the thread-group leader pid (PIDTYPE_TGID) isn't
- * possible. Thus, if there's still task linkage for PIDTYPE_PID
- * not having thread-group leader linkage for the pid means it
- * wasn't a thread-group leader in the first place.
+ * PIDFD_STALE is only allowed to be passed if the caller knows
+ * that @pid is already registered in pidfs and thus
+ * PIDFD_INFO_EXIT information is guaranteed to be available.
*/
- scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) {
+ if (!(flags & PIDFD_STALE)) {
+ /*
+ * While holding the pidfd waitqueue lock removing the
+ * task linkage for the thread-group leader pid
+ * (PIDTYPE_TGID) isn't possible. Thus, if there's still
+ * task linkage for PIDTYPE_PID not having thread-group
+ * leader linkage for the pid means it wasn't a
+ * thread-group leader in the first place.
+ */
+ guard(spinlock_irq)(&pid->wait_pidfd.lock);
+
/* Task has already been reaped. */
if (!pid_has_task(pid, PIDTYPE_PID))
return -ESRCH;
@@ -2128,7 +2094,16 @@ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
return -ENOENT;
}
- return __pidfd_prepare(pid, flags, ret);
+ CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
+ if (pidfd < 0)
+ return pidfd;
+
+ pidfs_file = pidfs_alloc_file(pid, flags | O_RDWR);
+ if (IS_ERR(pidfs_file))
+ return PTR_ERR(pidfs_file);
+
+ *ret_file = pidfs_file;
+ return take_fd(pidfd);
}
static void __delayed_free_task(struct rcu_head *rhp)
@@ -2477,7 +2452,7 @@ __latent_entropy struct task_struct *copy_process(
* Note that no task has been attached to @pid yet indicate
* that via CLONE_PIDFD.
*/
- retval = __pidfd_prepare(pid, flags | PIDFD_CLONE, &pidfile);
+ retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
--
2.47.2
next prev parent reply other threads:[~2025-04-25 8:11 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-25 8:11 [PATCH v2 0/4] net, pidfs: enable handing out pidfds for reaped sk->sk_peer_pid Christian Brauner
2025-04-25 8:11 ` [PATCH v2 1/4] pidfs: register pid in pidfs Christian Brauner
2025-04-25 8:11 ` [PATCH v2 2/4] net, pidfs: prepare for handing out pidfds for reaped sk->sk_peer_pid Christian Brauner
2025-04-25 20:46 ` Kuniyuki Iwashima
2025-04-25 8:11 ` Christian Brauner [this message]
2025-04-25 8:11 ` [PATCH v2 4/4] net, pidfs: enable " Christian Brauner
2025-04-25 20:47 ` Kuniyuki Iwashima
2025-04-25 8:38 ` [PATCH v2 0/4] " David Rheinsberg
2025-04-26 6:28 ` Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250425-work-pidfs-net-v2-3-450a19461e75@kernel.org \
--to=brauner@kernel.org \
--cc=alexander@mihalicyn.com \
--cc=bluca@debian.org \
--cc=daan.j.demeyer@gmail.com \
--cc=davem@davemloft.net \
--cc=david@readahead.eu \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=jack@suse.cz \
--cc=kuba@kernel.org \
--cc=kuniyu@amazon.com \
--cc=lennart@poettering.net \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=me@yhndnzj.com \
--cc=netdev@vger.kernel.org \
--cc=oleg@redhat.com \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).