From: Oleg Nesterov <oleg@redhat.com>
To: Tycho Andersen <tycho@tycho.pizza>
Cc: Christian Brauner <brauner@kernel.org>,
linux-kernel@vger.kernel.org, linux-api@vger.kernel.org,
Tycho Andersen <tandersen@netflix.com>,
"Eric W. Biederman" <ebiederm@xmission.com>
Subject: [RFC PATCH] pidfd: implement PIDFD_THREAD flag for pidfd_open()
Date: Mon, 29 Jan 2024 12:23:15 +0100 [thread overview]
Message-ID: <20240129112313.GA11635@redhat.com> (raw)
In-Reply-To: <20240127210634.GE13787@redhat.com>
On 01/27, Oleg Nesterov wrote:
>
> I'll (hopefully) send v2 on top of
>
> pidfd: cleanup the usage of __pidfd_prepare's flags
> pidfd: don't do_notify_pidfd() if !thread_group_empty()
>
> on Monday
Sorry, I don't have time to finish v2 today, I need to update the comments
and write the changelog.
But the patch itself is ready, I am sending it for review.
Tycho, Christian, any comments?
Oleg.
From c31780f6c1136a72048d24701ac6d8401fc1afda Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 27 Jan 2024 16:59:18 +0100
Subject: [PATCH] pidfd: implement PIDFD_THREAD flag for pidfd_open()
---
include/uapi/linux/pidfd.h | 3 ++-
kernel/exit.c | 7 +++++++
kernel/fork.c | 29 +++++++++++++++++++++++++++--
kernel/pid.c | 2 +-
kernel/signal.c | 4 +++-
5 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 5406fbc13074..2e6461459877 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -7,6 +7,7 @@
#include <linux/fcntl.h>
/* Flags for pidfd_open(). */
-#define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_THREAD O_EXCL
#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index dfb963d2f862..74fe6bfb9577 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -739,6 +739,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
tsk->exit_state = EXIT_ZOMBIE;
+ /*
+ * sub-thread or delay_group_leader(), wake up the PIDFD_THREAD
+ * waiters.
+ */
+ if (!thread_group_empty(tsk))
+ do_notify_pidfd(tsk);
+
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
diff --git a/kernel/fork.c b/kernel/fork.c
index 347641398f9d..977b58c0eac6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -101,6 +101,7 @@
#include <linux/user_events.h>
#include <linux/iommu.h>
#include <linux/rseq.h>
+#include <uapi/linux/pidfd.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
seq_put_decimal_ll(m, "Pid:\t", nr);
+ /* TODO: report PIDFD_THREAD */
+
#ifdef CONFIG_PID_NS
seq_put_decimal_ll(m, "\nNSpid:\t", nr);
if (nr > 0) {
@@ -2068,12 +2071,27 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
}
#endif
+static bool pidfd_task_exited(struct pid *pid, bool thread)
+{
+ struct task_struct *task;
+ bool exited;
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ exited = !task ||
+ (READ_ONCE(task->exit_state) && (thread || thread_group_empty(task)));
+ rcu_read_unlock();
+
+ return exited;
+}
+
/*
* Poll support for process exit notification.
*/
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct pid *pid = file->private_data;
+ bool thread = file->f_flags & PIDFD_THREAD;
__poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
@@ -2083,7 +2101,7 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
* If the thread group leader exits before all other threads in the
* group, then poll(2) should block, similar to the wait(2) family.
*/
- if (thread_group_exited(pid))
+ if (pidfd_task_exited(pid, thread))
poll_flags = EPOLLIN | EPOLLRDNORM;
return poll_flags;
@@ -2141,6 +2159,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
return PTR_ERR(pidfd_file);
}
get_pid(pid); /* held by pidfd_file now */
+ /*
+ * anon_inode_getfile() ignores everything outside of the
+ * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually.
+ */
+ pidfd_file->f_flags |= (flags & PIDFD_THREAD);
*ret = pidfd_file;
return pidfd;
}
@@ -2173,7 +2196,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
- if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
+ bool thread = flags & PIDFD_THREAD;
+
+ if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID));
return -EINVAL;
return __pidfd_prepare(pid, flags, ret);
diff --git a/kernel/pid.c b/kernel/pid.c
index c7a3e359f8f5..04bdd5ecf183 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -629,7 +629,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
int fd;
struct pid *p;
- if (flags & ~PIDFD_NONBLOCK)
+ if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD))
return -EINVAL;
if (pid <= 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index 9561a3962ca6..919cd33a0405 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2051,7 +2051,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
/*
- * tsk is a group leader and has no threads, wake up the pidfd waiters.
+ * tsk is a group leader and has no threads, wake up the !PIDFD_THREAD
+ * waiters.
*/
if (thread_group_empty(tsk))
do_notify_pidfd(tsk);
@@ -3926,6 +3927,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
prepare_kill_siginfo(sig, &kinfo);
}
+ /* TODO: respect PIDFD_THREAD */
ret = kill_pid_info(sig, &kinfo, pid);
err:
--
2.25.1.362.g51ebf55
next prev parent reply other threads:[~2024-01-29 11:24 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-23 15:34 [PATCH v3 0/3] pidfds for non thread group leaders Tycho Andersen
2024-01-23 15:34 ` [PATCH v3 1/3] pidfd: allow pidfd_open() on non-thread-group leaders Tycho Andersen
2024-01-23 19:56 ` Oleg Nesterov
2024-01-23 21:10 ` Tycho Andersen
2024-01-23 22:22 ` Oleg Nesterov
2024-01-24 1:25 ` Oleg Nesterov
2024-01-25 14:08 ` Oleg Nesterov
2024-01-25 17:17 ` Christian Brauner
2024-01-25 17:51 ` Oleg Nesterov
2024-01-25 18:03 ` Tycho Andersen
2024-01-25 18:25 ` Oleg Nesterov
2024-01-25 18:30 ` Oleg Nesterov
2024-01-25 18:36 ` Tycho Andersen
2024-01-26 9:49 ` Christian Brauner
2024-01-26 9:42 ` Christian Brauner
2024-01-26 14:33 ` Oleg Nesterov
2024-01-26 9:47 ` Christian Brauner
2024-01-26 14:33 ` Oleg Nesterov
2024-01-27 14:26 ` Christian Brauner
2024-01-26 21:50 ` Tycho Andersen
2024-01-27 10:54 ` Oleg Nesterov
2024-01-27 14:33 ` Christian Brauner
2024-01-27 15:55 ` Tycho Andersen
2024-01-27 16:31 ` Oleg Nesterov
2024-01-27 17:20 ` Tycho Andersen
2024-01-27 19:31 ` Oleg Nesterov
2024-01-27 20:44 ` Tycho Andersen
2024-01-27 21:10 ` Oleg Nesterov
2024-01-29 11:23 ` Oleg Nesterov [this message]
2024-01-29 13:41 ` [RFC PATCH] pidfd: implement PIDFD_THREAD flag for pidfd_open() Christian Brauner
2024-01-29 14:31 ` Tycho Andersen
2024-01-29 15:14 ` Christian Brauner
2024-01-30 11:21 ` Oleg Nesterov
2024-01-31 18:11 ` Andy Lutomirski
2024-01-31 18:48 ` Oleg Nesterov
2024-01-31 19:14 ` Oleg Nesterov
2024-01-31 19:24 ` Andy Lutomirski
2024-01-31 19:46 ` Christian Brauner
2024-01-31 19:50 ` Andy Lutomirski
2024-02-01 13:30 ` Christian Brauner
2024-02-01 13:39 ` Christian Brauner
2024-02-01 19:33 ` Andy Lutomirski
2024-01-23 15:34 ` [PATCH v3 2/3] selftests/pidfd: add non-thread-group leader tests Tycho Andersen
2024-01-23 15:34 ` [PATCH v3 3/3] clone: allow CLONE_THREAD | CLONE_PIDFD together Tycho Andersen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240129112313.GA11635@redhat.com \
--to=oleg@redhat.com \
--cc=brauner@kernel.org \
--cc=ebiederm@xmission.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tandersen@netflix.com \
--cc=tycho@tycho.pizza \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).