* [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
@ 2022-01-03 21:32 ` Eric W. Biederman
2022-01-04 7:38 ` Christoph Hellwig
2022-01-07 3:48 ` Al Viro
2022-01-03 21:32 ` [PATCH 02/17] exit: Coredumps reach do_group_exit Eric W. Biederman
` (17 subsequent siblings)
18 siblings, 2 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:32 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
When I say remove I mean remove. All profile_task_exit and
profile_munmap do is call a blocking notifier chain. The helpers
profile_task_register and profile_task_unregister are not called
anywhere in the tree. Which means this is all dead code.
So remove the dead code and make it easier to read do_exit.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/profile.h | 26 ---------------------
kernel/exit.c | 1 -
kernel/profile.c | 50 -----------------------------------------
mm/mmap.c | 1 -
4 files changed, 78 deletions(-)
diff --git a/include/linux/profile.h b/include/linux/profile.h
index fd18ca96f557..f7eb2b57d890 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -31,11 +31,6 @@ static inline int create_proc_profile(void)
}
#endif
-enum profile_type {
- PROFILE_TASK_EXIT,
- PROFILE_MUNMAP
-};
-
#ifdef CONFIG_PROFILING
extern int prof_on __read_mostly;
@@ -66,23 +61,14 @@ static inline void profile_hit(int type, void *ip)
struct task_struct;
struct mm_struct;
-/* task is in do_exit() */
-void profile_task_exit(struct task_struct * task);
-
/* task is dead, free task struct ? Returns 1 if
* the task was taken, 0 if the task should be freed.
*/
int profile_handoff_task(struct task_struct * task);
-/* sys_munmap */
-void profile_munmap(unsigned long addr);
-
int task_handoff_register(struct notifier_block * n);
int task_handoff_unregister(struct notifier_block * n);
-int profile_event_register(enum profile_type, struct notifier_block * n);
-int profile_event_unregister(enum profile_type, struct notifier_block * n);
-
#else
#define prof_on 0
@@ -117,19 +103,7 @@ static inline int task_handoff_unregister(struct notifier_block * n)
return -ENOSYS;
}
-static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-#define profile_task_exit(a) do { } while (0)
#define profile_handoff_task(a) (0)
-#define profile_munmap(a) do { } while (0)
#endif /* CONFIG_PROFILING */
diff --git a/kernel/exit.c b/kernel/exit.c
index e7104f803be0..b5c35b520fda 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -737,7 +737,6 @@ void __noreturn do_exit(long code)
WARN_ON(blk_needs_flush_plug(tsk));
- profile_task_exit(tsk);
kcov_task_exit(tsk);
coredump_task_exit(tsk);
diff --git a/kernel/profile.c b/kernel/profile.c
index eb9c7f0f5ac5..9355cc934a96 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -135,14 +135,7 @@ int __ref profile_init(void)
/* Profile event notifications */
-static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
-static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
-
-void profile_task_exit(struct task_struct *task)
-{
- blocking_notifier_call_chain(&task_exit_notifier, 0, task);
-}
int profile_handoff_task(struct task_struct *task)
{
@@ -151,11 +144,6 @@ int profile_handoff_task(struct task_struct *task)
return (ret == NOTIFY_OK) ? 1 : 0;
}
-void profile_munmap(unsigned long addr)
-{
- blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
-}
-
int task_handoff_register(struct notifier_block *n)
{
return atomic_notifier_chain_register(&task_free_notifier, n);
@@ -168,44 +156,6 @@ int task_handoff_unregister(struct notifier_block *n)
}
EXPORT_SYMBOL_GPL(task_handoff_unregister);
-int profile_event_register(enum profile_type type, struct notifier_block *n)
-{
- int err = -EINVAL;
-
- switch (type) {
- case PROFILE_TASK_EXIT:
- err = blocking_notifier_chain_register(
- &task_exit_notifier, n);
- break;
- case PROFILE_MUNMAP:
- err = blocking_notifier_chain_register(
- &munmap_notifier, n);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_register);
-
-int profile_event_unregister(enum profile_type type, struct notifier_block *n)
-{
- int err = -EINVAL;
-
- switch (type) {
- case PROFILE_TASK_EXIT:
- err = blocking_notifier_chain_unregister(
- &task_exit_notifier, n);
- break;
- case PROFILE_MUNMAP:
- err = blocking_notifier_chain_unregister(
- &munmap_notifier, n);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_unregister);
-
#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
/*
* Each cpu has a pair of open-addressed hashtables for pending
diff --git a/mm/mmap.c b/mm/mmap.c
index bfb0ea164a90..70318c2a47c3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2928,7 +2928,6 @@ EXPORT_SYMBOL(vm_munmap);
SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
addr = untagged_addr(addr);
- profile_munmap(addr);
return __vm_munmap(addr, len, true);
}
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* Re: [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap
2022-01-03 21:32 ` [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap Eric W. Biederman
@ 2022-01-04 7:38 ` Christoph Hellwig
2022-01-07 3:48 ` Al Viro
1 sibling, 0 replies; 95+ messages in thread
From: Christoph Hellwig @ 2022-01-04 7:38 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro,
Kees Cook, linux-api
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap
2022-01-03 21:32 ` [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap Eric W. Biederman
2022-01-04 7:38 ` Christoph Hellwig
@ 2022-01-07 3:48 ` Al Viro
2022-01-08 16:10 ` Eric W. Biederman
1 sibling, 1 reply; 95+ messages in thread
From: Al Viro @ 2022-01-07 3:48 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-arch, Linus Torvalds, Oleg Nesterov,
Kees Cook, linux-api
On Mon, Jan 03, 2022 at 03:32:56PM -0600, Eric W. Biederman wrote:
> When I say remove I mean remove. All profile_task_exit and
> profile_munmap do is call a blocking notifier chain. The helpers
> profile_task_register and profile_task_unregister are not called
> anywhere in the tree. Which means this is all dead code.
>
> So remove the dead code and make it easier to read do_exit.
How about doing the same to profile_handoff_task() and
task_handoff_register()/task_handoff_unregister(),
while we are at it? Combined diff would be this:
diff --git a/include/linux/profile.h b/include/linux/profile.h
index fd18ca96f5574..6aa64730298a0 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -31,11 +31,6 @@ static inline int create_proc_profile(void)
}
#endif
-enum profile_type {
- PROFILE_TASK_EXIT,
- PROFILE_MUNMAP
-};
-
#ifdef CONFIG_PROFILING
extern int prof_on __read_mostly;
@@ -63,26 +58,6 @@ static inline void profile_hit(int type, void *ip)
profile_hits(type, ip, 1);
}
-struct task_struct;
-struct mm_struct;
-
-/* task is in do_exit() */
-void profile_task_exit(struct task_struct * task);
-
-/* task is dead, free task struct ? Returns 1 if
- * the task was taken, 0 if the task should be freed.
- */
-int profile_handoff_task(struct task_struct * task);
-
-/* sys_munmap */
-void profile_munmap(unsigned long addr);
-
-int task_handoff_register(struct notifier_block * n);
-int task_handoff_unregister(struct notifier_block * n);
-
-int profile_event_register(enum profile_type, struct notifier_block * n);
-int profile_event_unregister(enum profile_type, struct notifier_block * n);
-
#else
#define prof_on 0
@@ -107,30 +82,6 @@ static inline void profile_hit(int type, void *ip)
return;
}
-static inline int task_handoff_register(struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int task_handoff_unregister(struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-#define profile_task_exit(a) do { } while (0)
-#define profile_handoff_task(a) (0)
-#define profile_munmap(a) do { } while (0)
-
#endif /* CONFIG_PROFILING */
#endif /* _LINUX_PROFILE_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index f702a6a63686e..5086a5e9d02de 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -765,7 +765,6 @@ void __noreturn do_exit(long code)
preempt_count_set(PREEMPT_ENABLED);
}
- profile_task_exit(tsk);
kcov_task_exit(tsk);
coredump_task_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 3244cc56b697d..496c0b6c8cb83 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -754,9 +754,7 @@ void __put_task_struct(struct task_struct *tsk)
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
sched_core_free(tsk);
-
- if (!profile_handoff_task(tsk))
- free_task(tsk);
+ free_task(tsk);
}
EXPORT_SYMBOL_GPL(__put_task_struct);
diff --git a/kernel/profile.c b/kernel/profile.c
index eb9c7f0f5ac52..37640a0bd8a3c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -133,79 +133,6 @@ int __ref profile_init(void)
return -ENOMEM;
}
-/* Profile event notifications */
-
-static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
-static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
-static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
-
-void profile_task_exit(struct task_struct *task)
-{
- blocking_notifier_call_chain(&task_exit_notifier, 0, task);
-}
-
-int profile_handoff_task(struct task_struct *task)
-{
- int ret;
- ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
- return (ret == NOTIFY_OK) ? 1 : 0;
-}
-
-void profile_munmap(unsigned long addr)
-{
- blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
-}
-
-int task_handoff_register(struct notifier_block *n)
-{
- return atomic_notifier_chain_register(&task_free_notifier, n);
-}
-EXPORT_SYMBOL_GPL(task_handoff_register);
-
-int task_handoff_unregister(struct notifier_block *n)
-{
- return atomic_notifier_chain_unregister(&task_free_notifier, n);
-}
-EXPORT_SYMBOL_GPL(task_handoff_unregister);
-
-int profile_event_register(enum profile_type type, struct notifier_block *n)
-{
- int err = -EINVAL;
-
- switch (type) {
- case PROFILE_TASK_EXIT:
- err = blocking_notifier_chain_register(
- &task_exit_notifier, n);
- break;
- case PROFILE_MUNMAP:
- err = blocking_notifier_chain_register(
- &munmap_notifier, n);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_register);
-
-int profile_event_unregister(enum profile_type type, struct notifier_block *n)
-{
- int err = -EINVAL;
-
- switch (type) {
- case PROFILE_TASK_EXIT:
- err = blocking_notifier_chain_unregister(
- &task_exit_notifier, n);
- break;
- case PROFILE_MUNMAP:
- err = blocking_notifier_chain_unregister(
- &munmap_notifier, n);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_unregister);
-
#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
/*
* Each cpu has a pair of open-addressed hashtables for pending
diff --git a/mm/mmap.c b/mm/mmap.c
index bfb0ea164a90a..70318c2a47c39 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2928,7 +2928,6 @@ EXPORT_SYMBOL(vm_munmap);
SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
addr = untagged_addr(addr);
- profile_munmap(addr);
return __vm_munmap(addr, len, true);
}
^ permalink raw reply related [flat|nested] 95+ messages in thread
* Re: [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap
2022-01-07 3:48 ` Al Viro
@ 2022-01-08 16:10 ` Eric W. Biederman
0 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-08 16:10 UTC (permalink / raw)
To: Al Viro
Cc: linux-kernel, linux-arch, Linus Torvalds, Oleg Nesterov,
Kees Cook, linux-api
Al Viro <viro@zeniv.linux.org.uk> writes:
> On Mon, Jan 03, 2022 at 03:32:56PM -0600, Eric W. Biederman wrote:
>> When I say remove I mean remove. All profile_task_exit and
>> profile_munmap do is call a blocking notifier chain. The helpers
>> profile_task_register and profile_task_unregister are not called
>> anywhere in the tree. Which means this is all dead code.
>>
>> So remove the dead code and make it easier to read do_exit.
>
> How about doing the same to profile_handoff_task() and
> task_handoff_register()/task_handoff_unregister(),
> while we are at it? Combined diff would be this:
A very good idea. I have added this incremental patch to my queue.
Eric
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 8 Jan 2022 10:03:24 -0600
Subject: [PATCH] exit: Remove profile_handoff_task
All profile_handoff_task does is notify the task_free_notifier chain.
The helpers task_handoff_register and task_handoff_unregister are used
to add and delete entries from that chain and are never called.
So remove the dead code and make it much easier to read and reason
about __put_task_struct.
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/profile.h | 19 -------------------
kernel/fork.c | 4 +---
kernel/profile.c | 23 -----------------------
3 files changed, 1 insertion(+), 45 deletions(-)
diff --git a/include/linux/profile.h b/include/linux/profile.h
index f7eb2b57d890..11db1ec516e2 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -61,14 +61,6 @@ static inline void profile_hit(int type, void *ip)
struct task_struct;
struct mm_struct;
-/* task is dead, free task struct ? Returns 1 if
- * the task was taken, 0 if the task should be freed.
- */
-int profile_handoff_task(struct task_struct * task);
-
-int task_handoff_register(struct notifier_block * n);
-int task_handoff_unregister(struct notifier_block * n);
-
#else
#define prof_on 0
@@ -93,17 +85,6 @@ static inline void profile_hit(int type, void *ip)
return;
}
-static inline int task_handoff_register(struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int task_handoff_unregister(struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-#define profile_handoff_task(a) (0)
#endif /* CONFIG_PROFILING */
diff --git a/kernel/fork.c b/kernel/fork.c
index 6f0293cb29c9..494539ecb6d3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -754,9 +754,7 @@ void __put_task_struct(struct task_struct *tsk)
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
sched_core_free(tsk);
-
- if (!profile_handoff_task(tsk))
- free_task(tsk);
+ free_task(tsk);
}
EXPORT_SYMBOL_GPL(__put_task_struct);
diff --git a/kernel/profile.c b/kernel/profile.c
index 9355cc934a96..37640a0bd8a3 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -133,29 +133,6 @@ int __ref profile_init(void)
return -ENOMEM;
}
-/* Profile event notifications */
-
-static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
-
-int profile_handoff_task(struct task_struct *task)
-{
- int ret;
- ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
- return (ret == NOTIFY_OK) ? 1 : 0;
-}
-
-int task_handoff_register(struct notifier_block *n)
-{
- return atomic_notifier_chain_register(&task_free_notifier, n);
-}
-EXPORT_SYMBOL_GPL(task_handoff_register);
-
-int task_handoff_unregister(struct notifier_block *n)
-{
- return atomic_notifier_chain_unregister(&task_free_notifier, n);
-}
-EXPORT_SYMBOL_GPL(task_handoff_unregister);
-
#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
/*
* Each cpu has a pair of open-addressed hashtables for pending
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 02/17] exit: Coredumps reach do_group_exit
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
2022-01-03 21:32 ` [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap Eric W. Biederman
@ 2022-01-03 21:32 ` Eric W. Biederman
2022-01-03 21:32 ` [PATCH 03/17] exit: Fix the exit_code for wait_task_zombie Eric W. Biederman
` (16 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:32 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
The comment about coredumps not reaching do_group_exit and the
corresponding BUG_ON are bogus.
What happens and has happened for years is that get_signal calls
do_coredump (which sets SIGNAL_GROUP_EXIT and group_exit_code) and
then do_group_exit passing the signal number. Then do_group_exit
ignores the exit_code it is passed and uses signal->group_exit_code
from the coredump.
The comment and BUG_ON were correct when they were added during the
2.5 development cycle, but became obsolete and incorrect when
get_signal was changed to fall through to do_group_exit after
do_coredump in 2.6.10-rc2.
So remove the stale comment and BUG_ON
Fixes: 63bd6144f191 ("[PATCH] Invalid BUG_ONs in signal.c")
History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
kernel/exit.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/kernel/exit.c b/kernel/exit.c
index b5c35b520fda..34c43037450f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -904,8 +904,6 @@ do_group_exit(int exit_code)
{
struct signal_struct *sig = current->signal;
- BUG_ON(exit_code & 0x80); /* core dumps don't get here */
-
if (sig->flags & SIGNAL_GROUP_EXIT)
exit_code = sig->group_exit_code;
else if (sig->group_exec_task)
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 03/17] exit: Fix the exit_code for wait_task_zombie
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
2022-01-03 21:32 ` [PATCH 01/17] exit: Remove profile_task_exit & profile_munmap Eric W. Biederman
2022-01-03 21:32 ` [PATCH 02/17] exit: Coredumps reach do_group_exit Eric W. Biederman
@ 2022-01-03 21:32 ` Eric W. Biederman
2022-01-03 21:32 ` [PATCH 04/17] exit: Use the correct exit_code in /proc/<pid>/stat Eric W. Biederman
` (15 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:32 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
The function wait_task_zombie is defined to always returns the process not
thread exit status. Unfortunately when process group exit support
was added to wait_task_zombie the WNOWAIT case was overlooked.
Usually tsk->exit_code and tsk->signal->group_exit_code will be in sync
so fixing this is bug probably has no effect in practice. But fix
it anyway so that people aren't scratching their heads about why
the two code paths are different.
History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Fixes: 2c66151cbc2c ("[PATCH] sys_exit() threading improvements, BK-curr")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
kernel/exit.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/exit.c b/kernel/exit.c
index 34c43037450f..7121db37c411 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1011,7 +1011,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
return 0;
if (unlikely(wo->wo_flags & WNOWAIT)) {
- status = p->exit_code;
+ status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+ ? p->signal->group_exit_code : p->exit_code;
get_task_struct(p);
read_unlock(&tasklist_lock);
sched_annotate_sleep();
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 04/17] exit: Use the correct exit_code in /proc/<pid>/stat
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (2 preceding siblings ...)
2022-01-03 21:32 ` [PATCH 03/17] exit: Fix the exit_code for wait_task_zombie Eric W. Biederman
@ 2022-01-03 21:32 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 05/17] taskstats: Cleanup the use of task->exit_code Eric W. Biederman
` (14 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:32 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
Since do_proc_statt was modified to return process wide values instead
of per task values the exit_code calculation has never been updated.
Update it now to return the process wide exit_code when it is requested
and available.
History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Fixes: bf719d26a5c1 ("[PATCH] distinct tgid/tid CPU usage")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/proc/array.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ff869a66b34e..43a7abde9e42 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -468,6 +468,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
u64 cgtime, gtime;
unsigned long rsslim = 0;
unsigned long flags;
+ int exit_code = task->exit_code;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -531,6 +532,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
maj_flt += sig->maj_flt;
thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
+
+ if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
+ exit_code = sig->group_exit_code;
}
sid = task_session_nr_ns(task, ns);
@@ -630,7 +634,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_puts(m, " 0 0 0 0 0 0 0");
if (permitted)
- seq_put_decimal_ll(m, " ", task->exit_code);
+ seq_put_decimal_ll(m, " ", exit_code);
else
seq_puts(m, " 0");
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 05/17] taskstats: Cleanup the use of task->exit_code
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (3 preceding siblings ...)
2022-01-03 21:32 ` [PATCH 04/17] exit: Use the correct exit_code in /proc/<pid>/stat Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 06/17] ptrace: Remove second setting of PT_SEIZED in ptrace_attach Eric W. Biederman
` (13 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman, Balbir Singh
In the function bacct_add_task the code reading task->exit_code was
introduced in commit f3cef7a99469 ("[PATCH] csa: basic accounting over
taskstats"), and it is not entirely clear what the taskstats interface
is trying to return as only returning the exit_code of the first task
in a process doesn't make a lot of sense.
As best as I can figure the intent is to return task->exit_code after
a task exits. The field is returned with per task fields, so the
exit_code of the entire process is not wanted. Only the value of the
first task is returned so this is not a useful way to get the per task
ptrace stop code. The ordinary case of returning this value is
returning after a task exits, which also precludes use for getting
a ptrace value.
It is common to for the first task of a process to also be the last
task of a process so this field may have done something reasonable by
accident in testing.
Make ac_exitcode a reliable per task value by always returning it for
every exited task.
Setting ac_exitcode in a sensible mannter makes it possible to continue
to provide this value going forward.
Cc: Balbir Singh <bsingharora@gmail.com>
Fixes: f3cef7a99469 ("[PATCH] csa: basic accounting over taskstats")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
kernel/tsacct.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index f00de83d0246..1d261fbe367b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -38,11 +38,10 @@ void bacct_add_tsk(struct user_namespace *user_ns,
stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
stats->ac_btime64 = btime;
- if (thread_group_leader(tsk)) {
+ if (tsk->flags & PF_EXITING)
stats->ac_exitcode = tsk->exit_code;
- if (tsk->flags & PF_FORKNOEXEC)
- stats->ac_flag |= AFORK;
- }
+ if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC))
+ stats->ac_flag |= AFORK;
if (tsk->flags & PF_SUPERPRIV)
stats->ac_flag |= ASU;
if (tsk->flags & PF_DUMPCORE)
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 06/17] ptrace: Remove second setting of PT_SEIZED in ptrace_attach
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (4 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 05/17] taskstats: Cleanup the use of task->exit_code Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 07/17] ptrace: Remove unused regs argument from ptrace_report_syscall Eric W. Biederman
` (12 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
The code is totally redundant remove it.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
kernel/ptrace.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f8589bf8d7dc..eea265082e97 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -419,8 +419,6 @@ static int ptrace_attach(struct task_struct *task, long request,
if (task->ptrace)
goto unlock_tasklist;
- if (seize)
- flags |= PT_SEIZED;
task->ptrace = flags;
ptrace_link(task, current);
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 07/17] ptrace: Remove unused regs argument from ptrace_report_syscall
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (5 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 06/17] ptrace: Remove second setting of PT_SEIZED in ptrace_attach Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall Eric W. Biederman
` (11 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/tracehook.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 2564b7434b4d..88c007ab5ebc 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -54,8 +54,7 @@ struct linux_binprm;
/*
* ptrace report for syscall entry and exit looks identical.
*/
-static inline int ptrace_report_syscall(struct pt_regs *regs,
- unsigned long message)
+static inline int ptrace_report_syscall(unsigned long message)
{
int ptrace = current->ptrace;
@@ -102,7 +101,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs,
static inline __must_check int tracehook_report_syscall_entry(
struct pt_regs *regs)
{
- return ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_ENTRY);
+ return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY);
}
/**
@@ -127,7 +126,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
if (step)
user_single_step_report(regs);
else
- ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_EXIT);
+ ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT);
}
/**
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (6 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 07/17] ptrace: Remove unused regs argument from ptrace_report_syscall Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-10 15:26 ` Geert Uytterhoeven
2022-01-03 21:33 ` [PATCH 09/17] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
` (10 subsequent siblings)
18 siblings, 1 reply; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman, Geert Uytterhoeven
The generic function ptrace_report_syscall does a little more
than syscall_trace on m68k. The function ptrace_report_syscall
stops early if PT_TRACED is not set, it sets ptrace_message,
and returns the result of fatal_signal_pending.
Setting ptrace_message to a passed in value of 0 is effectively not
setting ptrace_message, making that additional work a noop.
Returning the result of fatal_signal_pending and letting the caller
ignore the result becomes a noop in this change.
When a process is ptraced, the flag PT_PTRACED is always set in
current->ptrace. Testing for PT_PTRACED in ptrace_report_syscall is
just an optimization to fail early if the process is not ptraced.
Later on in ptrace_notify, ptrace_stop will test current->ptrace under
tasklist_lock and skip performing any work if the task is not ptraced.
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
arch/m68k/kernel/ptrace.c | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 94b3b274186d..aa3a0b8d07e9 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -273,17 +273,7 @@ long arch_ptrace(struct task_struct *child, long request,
asmlinkage void syscall_trace(void)
{
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
+ ptrace_report_syscall(0);
}
#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-03 21:33 ` [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall Eric W. Biederman
@ 2022-01-10 15:26 ` Geert Uytterhoeven
2022-01-10 16:20 ` Al Viro
0 siblings, 1 reply; 95+ messages in thread
From: Geert Uytterhoeven @ 2022-01-10 15:26 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Linux Kernel Mailing List, Linux-Arch, Linus Torvalds,
Oleg Nesterov, Al Viro, Kees Cook, Linux API
On Mon, Jan 3, 2022 at 10:33 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> The generic function ptrace_report_syscall does a little more
> than syscall_trace on m68k. The function ptrace_report_syscall
> stops early if PT_TRACED is not set, it sets ptrace_message,
> and returns the result of fatal_signal_pending.
>
> Setting ptrace_message to a passed in value of 0 is effectively not
> setting ptrace_message, making that additional work a noop.
>
> Returning the result of fatal_signal_pending and letting the caller
> ignore the result becomes a noop in this change.
>
> When a process is ptraced, the flag PT_PTRACED is always set in
> current->ptrace. Testing for PT_PTRACED in ptrace_report_syscall is
> just an optimization to fail early if the process is not ptraced.
> Later on in ptrace_notify, ptrace_stop will test current->ptrace under
> tasklist_lock and skip performing any work if the task is not ptraced.
>
> Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
As this depends on the removal of a parameter from
ptrace_report_syscall() earlier in this series:
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-10 15:26 ` Geert Uytterhoeven
@ 2022-01-10 16:20 ` Al Viro
2022-01-10 16:25 ` Al Viro
2022-01-10 17:54 ` Geert Uytterhoeven
0 siblings, 2 replies; 95+ messages in thread
From: Al Viro @ 2022-01-10 16:20 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Eric W. Biederman, Linux Kernel Mailing List, Linux-Arch,
Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API
On Mon, Jan 10, 2022 at 04:26:57PM +0100, Geert Uytterhoeven wrote:
> On Mon, Jan 3, 2022 at 10:33 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> > The generic function ptrace_report_syscall does a little more
> > than syscall_trace on m68k. The function ptrace_report_syscall
> > stops early if PT_TRACED is not set, it sets ptrace_message,
> > and returns the result of fatal_signal_pending.
> >
> > Setting ptrace_message to a passed in value of 0 is effectively not
> > setting ptrace_message, making that additional work a noop.
> >
> > Returning the result of fatal_signal_pending and letting the caller
> > ignore the result becomes a noop in this change.
> >
> > When a process is ptraced, the flag PT_PTRACED is always set in
> > current->ptrace. Testing for PT_PTRACED in ptrace_report_syscall is
> > just an optimization to fail early if the process is not ptraced.
> > Later on in ptrace_notify, ptrace_stop will test current->ptrace under
> > tasklist_lock and skip performing any work if the task is not ptraced.
> >
> > Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>
> As this depends on the removal of a parameter from
> ptrace_report_syscall() earlier in this series:
> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
FWIW, I would suggest taking it a bit further: make syscall_trace_enter()
and syscall_trace_leave() in m68k ptrace.c unconditional, replace the
calls of syscall_trace() in entry.S with syscall_trace_enter() and
syscall_trace_leave() resp. and remove syscall_trace().
Geert, do you see any problems with that? The only difference is that
current->ptrace_message would be set to 1 for ptrace stop on entry and
2 - on leave. Currently m68k just has it 0 all along.
It is user-visible (the whole point is to let the tracer see which
stop it is - entry or exit one), so somebody using PTRACE_GETEVENTMSG
on syscall stops would start seeing 1 or 2 instead of "0 all along".
That's how it works on all other architectures (including m68k-nommu),
and I doubt that anything in userland will get broken.
Behaviour of PTRACE_GETEVENTMSG for other stops (fork, etc.) remains
as-is, of course.
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-10 16:20 ` Al Viro
@ 2022-01-10 16:25 ` Al Viro
2022-01-10 17:54 ` Geert Uytterhoeven
1 sibling, 0 replies; 95+ messages in thread
From: Al Viro @ 2022-01-10 16:25 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Eric W. Biederman, Linux Kernel Mailing List, Linux-Arch,
Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API
On Mon, Jan 10, 2022 at 04:20:03PM +0000, Al Viro wrote:
> Geert, do you see any problems with that? The only difference is that
> current->ptrace_message would be set to 1 for ptrace stop on entry and
> 2 - on leave. Currently m68k just has it 0 all along.
>
> It is user-visible (the whole point is to let the tracer see which
> stop it is - entry or exit one), so somebody using PTRACE_GETEVENTMSG
> on syscall stops would start seeing 1 or 2 instead of "0 all along".
> That's how it works on all other architectures (including m68k-nommu),
> and I doubt that anything in userland will get broken.
>
> Behaviour of PTRACE_GETEVENTMSG for other stops (fork, etc.) remains
> as-is, of course.
Actually, the current behaviour is "report what the last PTRACE_GETEVENTMSG
has reported, whatever kind of stop that used to be for". So I very much
doubt that anything could break there.
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-10 16:20 ` Al Viro
2022-01-10 16:25 ` Al Viro
@ 2022-01-10 17:54 ` Geert Uytterhoeven
2022-01-10 20:37 ` Al Viro
2022-01-11 1:33 ` Michael Schmitz
1 sibling, 2 replies; 95+ messages in thread
From: Geert Uytterhoeven @ 2022-01-10 17:54 UTC (permalink / raw)
To: Al Viro
Cc: Eric W. Biederman, Linux Kernel Mailing List, Linux-Arch,
Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API,
Michael Schmitz, linux-m68k
Hi Al,
CC Michael/m68k,
On Mon, Jan 10, 2022 at 5:20 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Mon, Jan 10, 2022 at 04:26:57PM +0100, Geert Uytterhoeven wrote:
> > On Mon, Jan 3, 2022 at 10:33 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> > > The generic function ptrace_report_syscall does a little more
> > > than syscall_trace on m68k. The function ptrace_report_syscall
> > > stops early if PT_TRACED is not set, it sets ptrace_message,
> > > and returns the result of fatal_signal_pending.
> > >
> > > Setting ptrace_message to a passed in value of 0 is effectively not
> > > setting ptrace_message, making that additional work a noop.
> > >
> > > Returning the result of fatal_signal_pending and letting the caller
> > > ignore the result becomes a noop in this change.
> > >
> > > When a process is ptraced, the flag PT_PTRACED is always set in
> > > current->ptrace. Testing for PT_PTRACED in ptrace_report_syscall is
> > > just an optimization to fail early if the process is not ptraced.
> > > Later on in ptrace_notify, ptrace_stop will test current->ptrace under
> > > tasklist_lock and skip performing any work if the task is not ptraced.
> > >
> > > Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> > > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> >
> > As this depends on the removal of a parameter from
> > ptrace_report_syscall() earlier in this series:
> > Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
>
> FWIW, I would suggest taking it a bit further: make syscall_trace_enter()
> and syscall_trace_leave() in m68k ptrace.c unconditional, replace the
> calls of syscall_trace() in entry.S with syscall_trace_enter() and
> syscall_trace_leave() resp. and remove syscall_trace().
>
> Geert, do you see any problems with that? The only difference is that
> current->ptrace_message would be set to 1 for ptrace stop on entry and
> 2 - on leave. Currently m68k just has it 0 all along.
>
> It is user-visible (the whole point is to let the tracer see which
> stop it is - entry or exit one), so somebody using PTRACE_GETEVENTMSG
> on syscall stops would start seeing 1 or 2 instead of "0 all along".
> That's how it works on all other architectures (including m68k-nommu),
> and I doubt that anything in userland will get broken.
>
> Behaviour of PTRACE_GETEVENTMSG for other stops (fork, etc.) remains
> as-is, of course.
In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
syscall_trace_enter/leave for m68k"[1], but that's still stuck...
[1] https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-10 17:54 ` Geert Uytterhoeven
@ 2022-01-10 20:37 ` Al Viro
2022-01-10 21:18 ` Eric W. Biederman
2022-01-11 1:33 ` Michael Schmitz
1 sibling, 1 reply; 95+ messages in thread
From: Al Viro @ 2022-01-10 20:37 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Eric W. Biederman, Linux Kernel Mailing List, Linux-Arch,
Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API,
Michael Schmitz, linux-m68k
On Mon, Jan 10, 2022 at 06:54:57PM +0100, Geert Uytterhoeven wrote:
> In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
> syscall_trace_enter/leave for m68k"[1], but that's still stuck...
>
> [1] https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
Looks sane, but I'd split it in two - switch to calling syscall_trace_{enter,leave}
and then handling the return values...
The former would keep the current behaviour (modulo reporting enter vs. leave
via PTRACE_GETEVENTMSG), the latter would allow syscall number change by tracer
and/or handling of seccomp/audit/whatnot.
For exit+signal work the former would suffice, and IMO it would be a good idea
to put that one into a shared branch to be pulled both by seccomp and by signal
series. Would reduce the conflicts...
Objections?
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-10 20:37 ` Al Viro
@ 2022-01-10 21:18 ` Eric W. Biederman
0 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-10 21:18 UTC (permalink / raw)
To: Al Viro
Cc: Geert Uytterhoeven, Linux Kernel Mailing List, Linux-Arch,
Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API,
Michael Schmitz, linux-m68k
Al Viro <viro@zeniv.linux.org.uk> writes:
> On Mon, Jan 10, 2022 at 06:54:57PM +0100, Geert Uytterhoeven wrote:
>
>> In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
>> syscall_trace_enter/leave for m68k"[1], but that's still stuck...
>>
>> [1] https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
>
> Looks sane, but I'd split it in two - switch to calling syscall_trace_{enter,leave}
> and then handling the return values...
>
> The former would keep the current behaviour (modulo reporting enter vs. leave
> via PTRACE_GETEVENTMSG), the latter would allow syscall number change by tracer
> and/or handling of seccomp/audit/whatnot.
>
> For exit+signal work the former would suffice, and IMO it would be a good idea
> to put that one into a shared branch to be pulled both by seccomp and by signal
> series. Would reduce the conflicts...
>
> Objections?
I have the version that Geert ack'ed queued up for v5.17 in my
signal-for-v5.17 branch, along with a couple others prior fixes in this
series of changes where it was clear they were just obviously correct
bug fixes. No need to delay the removal of profiling bits for example.
I would love to see the m68k perform syscall_trace_{enter,leave} but
just getting as far as ptrace_report_syscall will be enough to avoid any
dependencies on my side.
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-10 17:54 ` Geert Uytterhoeven
2022-01-10 20:37 ` Al Viro
@ 2022-01-11 1:33 ` Michael Schmitz
2022-01-11 22:42 ` Finn Thain
1 sibling, 1 reply; 95+ messages in thread
From: Michael Schmitz @ 2022-01-11 1:33 UTC (permalink / raw)
To: Geert Uytterhoeven, Al Viro
Cc: Eric W. Biederman, Linux Kernel Mailing List, Linux-Arch,
Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API, linux-m68k
Hi Geert,
Am 11.01.2022 um 06:54 schrieb Geert Uytterhoeven:
> Hi Al,
>
> CC Michael/m68k,
>
> On Mon, Jan 10, 2022 at 5:20 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>> On Mon, Jan 10, 2022 at 04:26:57PM +0100, Geert Uytterhoeven wrote:
>>> On Mon, Jan 3, 2022 at 10:33 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>>>> The generic function ptrace_report_syscall does a little more
>>>> than syscall_trace on m68k. The function ptrace_report_syscall
>>>> stops early if PT_TRACED is not set, it sets ptrace_message,
>>>> and returns the result of fatal_signal_pending.
>>>>
>>>> Setting ptrace_message to a passed in value of 0 is effectively not
>>>> setting ptrace_message, making that additional work a noop.
>>>>
>>>> Returning the result of fatal_signal_pending and letting the caller
>>>> ignore the result becomes a noop in this change.
>>>>
>>>> When a process is ptraced, the flag PT_PTRACED is always set in
>>>> current->ptrace. Testing for PT_PTRACED in ptrace_report_syscall is
>>>> just an optimization to fail early if the process is not ptraced.
>>>> Later on in ptrace_notify, ptrace_stop will test current->ptrace under
>>>> tasklist_lock and skip performing any work if the task is not ptraced.
>>>>
>>>> Cc: Geert Uytterhoeven <geert@linux-m68k.org>
>>>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>>>
>>> As this depends on the removal of a parameter from
>>> ptrace_report_syscall() earlier in this series:
>>> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
>>
>> FWIW, I would suggest taking it a bit further: make syscall_trace_enter()
>> and syscall_trace_leave() in m68k ptrace.c unconditional, replace the
>> calls of syscall_trace() in entry.S with syscall_trace_enter() and
>> syscall_trace_leave() resp. and remove syscall_trace().
>>
>> Geert, do you see any problems with that? The only difference is that
>> current->ptrace_message would be set to 1 for ptrace stop on entry and
>> 2 - on leave. Currently m68k just has it 0 all along.
>>
>> It is user-visible (the whole point is to let the tracer see which
>> stop it is - entry or exit one), so somebody using PTRACE_GETEVENTMSG
>> on syscall stops would start seeing 1 or 2 instead of "0 all along".
>> That's how it works on all other architectures (including m68k-nommu),
>> and I doubt that anything in userland will get broken.
>>
>> Behaviour of PTRACE_GETEVENTMSG for other stops (fork, etc.) remains
>> as-is, of course.
>
> In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
> syscall_trace_enter/leave for m68k"[1], but that's still stuck...
>
> [1] https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
That patch (for reasons I never found out) did interact badly with
Christoph Hellwig's 'remove set_fs' patches (and Al's signal fixes which
Christoph's patches are based upon). Caused format errors under memory
stress tests quite reliably, on my 030 hardware.
Probably needs a fresh look - the signal return path got changed by Al's
patches IIRC, and I might have relied on offsets to data on the stack
that are no longer correct with these patches. Or there's a race between
the syscall trap and signal handling when returning from interrupt
context ...
Still school hols over here so I won't have much peace and quiet until
February.
Cheers,
Michael
>
> Gr{oetje,eeting}s,
>
> Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
> -- Linus Torvalds
>
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-11 1:33 ` Michael Schmitz
@ 2022-01-11 22:42 ` Finn Thain
2022-01-12 0:20 ` Michael Schmitz
0 siblings, 1 reply; 95+ messages in thread
From: Finn Thain @ 2022-01-11 22:42 UTC (permalink / raw)
To: Michael Schmitz
Cc: Geert Uytterhoeven, Al Viro, Eric W. Biederman,
Linux Kernel Mailing List, Linux-Arch, Linus Torvalds,
Oleg Nesterov, Kees Cook, Linux API, linux-m68k
On Tue, 11 Jan 2022, Michael Schmitz wrote:
> Am 11.01.2022 um 06:54 schrieb Geert Uytterhoeven:
> > Hi Al,
> >
> > CC Michael/m68k,
> >
> > On Mon, Jan 10, 2022 at 5:20 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> >> On Mon, Jan 10, 2022 at 04:26:57PM +0100, Geert Uytterhoeven wrote:
> >>> On Mon, Jan 3, 2022 at 10:33 PM Eric W. Biederman <ebiederm@xmission.com>
> >>> wrote:
> >>>> The generic function ptrace_report_syscall does a little more
> >>>> than syscall_trace on m68k. The function ptrace_report_syscall
> >>>> stops early if PT_TRACED is not set, it sets ptrace_message,
> >>>> and returns the result of fatal_signal_pending.
> >>>>
> >>>> Setting ptrace_message to a passed in value of 0 is effectively not
> >>>> setting ptrace_message, making that additional work a noop.
> >>>>
> >>>> Returning the result of fatal_signal_pending and letting the caller
> >>>> ignore the result becomes a noop in this change.
> >>>>
> >>>> When a process is ptraced, the flag PT_PTRACED is always set in
> >>>> current->ptrace. Testing for PT_PTRACED in ptrace_report_syscall is
> >>>> just an optimization to fail early if the process is not ptraced.
> >>>> Later on in ptrace_notify, ptrace_stop will test current->ptrace under
> >>>> tasklist_lock and skip performing any work if the task is not ptraced.
> >>>>
> >>>> Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> >>>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> >>>
> >>> As this depends on the removal of a parameter from
> >>> ptrace_report_syscall() earlier in this series:
> >>> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
> >>
> >> FWIW, I would suggest taking it a bit further: make syscall_trace_enter()
> >> and syscall_trace_leave() in m68k ptrace.c unconditional, replace the
> >> calls of syscall_trace() in entry.S with syscall_trace_enter() and
> >> syscall_trace_leave() resp. and remove syscall_trace().
> >>
> >> Geert, do you see any problems with that? The only difference is that
> >> current->ptrace_message would be set to 1 for ptrace stop on entry and
> >> 2 - on leave. Currently m68k just has it 0 all along.
> >>
> >> It is user-visible (the whole point is to let the tracer see which
> >> stop it is - entry or exit one), so somebody using PTRACE_GETEVENTMSG
> >> on syscall stops would start seeing 1 or 2 instead of "0 all along".
> >> That's how it works on all other architectures (including m68k-nommu),
> >> and I doubt that anything in userland will get broken.
> >>
> >> Behaviour of PTRACE_GETEVENTMSG for other stops (fork, etc.) remains
> >> as-is, of course.
> >
> > In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
> > syscall_trace_enter/leave for m68k"[1], but that's still stuck...
> >
> > [1]
> > https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
>
> That patch (for reasons I never found out) did interact badly with
> Christoph Hellwig's 'remove set_fs' patches (and Al's signal fixes which
> Christoph's patches are based upon). Caused format errors under memory
> stress tests quite reliably, on my 030 hardware.
>
Those patches have since been merged, BTW.
> Probably needs a fresh look - the signal return path got changed by Al's
> patches IIRC, and I might have relied on offsets to data on the stack
> that are no longer correct with these patches. Or there's a race between
> the syscall trap and signal handling when returning from interrupt
> context ...
>
> Still school hols over here so I won't have much peace and quiet until
> February.
>
So the patch works okay with Aranym 68040 but not Motorola 68030? Since
there is at least one known issue affecting both Motorola 68030 and Hatari
68030, perhaps this patch is not the problem. In anycase, Al's suggestion
to split the patch into two may help in that testing two smaller patches
might narrow down the root cause.
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-11 22:42 ` Finn Thain
@ 2022-01-12 0:20 ` Michael Schmitz
2022-01-12 3:32 ` Finn Thain
2022-01-12 7:55 ` Geert Uytterhoeven
0 siblings, 2 replies; 95+ messages in thread
From: Michael Schmitz @ 2022-01-12 0:20 UTC (permalink / raw)
To: Finn Thain
Cc: Geert Uytterhoeven, Al Viro, Eric W. Biederman,
Linux Kernel Mailing List, Linux-Arch, Linus Torvalds,
Oleg Nesterov, Kees Cook, Linux API, linux-m68k
Hi Finn,
Am 12.01.2022 um 11:42 schrieb Finn Thain:
> On Tue, 11 Jan 2022, Michael Schmitz wrote:
>>> In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
>>> syscall_trace_enter/leave for m68k"[1], but that's still stuck...
>>>
>>> [1]
>>> https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
>>
>> That patch (for reasons I never found out) did interact badly with
>> Christoph Hellwig's 'remove set_fs' patches (and Al's signal fixes which
>> Christoph's patches are based upon). Caused format errors under memory
>> stress tests quite reliably, on my 030 hardware.
>>
>
> Those patches have since been merged, BTW.
Yes, that's why I advised caution with mine.
>
>> Probably needs a fresh look - the signal return path got changed by Al's
>> patches IIRC, and I might have relied on offsets to data on the stack
>> that are no longer correct with these patches. Or there's a race between
>> the syscall trap and signal handling when returning from interrupt
>> context ...
>>
>> Still school hols over here so I won't have much peace and quiet until
>> February.
>>
>
> So the patch works okay with Aranym 68040 but not Motorola 68030? Since
Correct - I seem to recall we also tested those on your 040 and there
was no regression there, but I may be misremembering that.
> there is at least one known issue affecting both Motorola 68030 and Hatari
> 68030, perhaps this patch is not the problem. In anycase, Al's suggestion
I hadn't ever made that connection, but it might be another explanation,
yes.
> to split the patch into two may help in that testing two smaller patches
> might narrow down the root cause.
That's certainly true.
What's the other reason these patches are still stuck, Geert? Did we
ever settle the dispute about what return code ought to abort a syscall
(in the seccomp context)?
Cheers,
Michael
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-12 0:20 ` Michael Schmitz
@ 2022-01-12 3:32 ` Finn Thain
2022-01-12 7:54 ` Michael Schmitz
2022-01-12 7:55 ` Geert Uytterhoeven
1 sibling, 1 reply; 95+ messages in thread
From: Finn Thain @ 2022-01-12 3:32 UTC (permalink / raw)
To: Michael Schmitz
Cc: Geert Uytterhoeven, Al Viro, Eric W. Biederman,
Linux Kernel Mailing List, Linux-Arch, Linus Torvalds,
Oleg Nesterov, Kees Cook, Linux API, linux-m68k
On Wed, 12 Jan 2022, Michael Schmitz wrote:
>
> I seem to recall we also tested those on your 040 and there was no
> regression there, but I may be misremembering that.
>
I abandoned that regression testing exercise when unpatched mainline
kernels began failing on that machine. I'm in the process of setting up a
different 68040 machine.
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-12 3:32 ` Finn Thain
@ 2022-01-12 7:54 ` Michael Schmitz
0 siblings, 0 replies; 95+ messages in thread
From: Michael Schmitz @ 2022-01-12 7:54 UTC (permalink / raw)
To: Finn Thain
Cc: Geert Uytterhoeven, Al Viro, Eric W. Biederman,
Linux Kernel Mailing List, Linux-Arch, Linus Torvalds,
Oleg Nesterov, Kees Cook, Linux API, linux-m68k
Hi Finn,
Am 12.01.2022 um 16:32 schrieb Finn Thain:
> On Wed, 12 Jan 2022, Michael Schmitz wrote:
>
>>
>> I seem to recall we also tested those on your 040 and there was no
>> regression there, but I may be misremembering that.
>>
>
> I abandoned that regression testing exercise when unpatched mainline
> kernels began failing on that machine. I'm in the process of setting up a
> different 68040 machine.
>
Thanks for refreshing my memory!
Splitting my first patch as suggested by Al in order to defer handling
of the syscall_trace_enter() return code would achieve what Geert
suggested (eliminate m68k syscall_trace() altogether) without risk of
regression. This would need to replace Eric's patch 8.
Do you want me to send such a version based on my old patch series, or
would you rather prepare that yourself, Eric?
Cheers,
Michael
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-12 0:20 ` Michael Schmitz
2022-01-12 3:32 ` Finn Thain
@ 2022-01-12 7:55 ` Geert Uytterhoeven
2022-01-12 8:05 ` Michael Schmitz
1 sibling, 1 reply; 95+ messages in thread
From: Geert Uytterhoeven @ 2022-01-12 7:55 UTC (permalink / raw)
To: Michael Schmitz
Cc: Finn Thain, Al Viro, Eric W. Biederman, Linux Kernel Mailing List,
Linux-Arch, Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API,
linux-m68k
Hi Michael,
On Wed, Jan 12, 2022 at 1:20 AM Michael Schmitz <schmitzmic@gmail.com> wrote:
> Am 12.01.2022 um 11:42 schrieb Finn Thain:
> > On Tue, 11 Jan 2022, Michael Schmitz wrote:
> >>> In fact Michael did so in "[PATCH v7 1/2] m68k/kernel - wire up
> >>> syscall_trace_enter/leave for m68k"[1], but that's still stuck...
> >>>
> >>> [1]
> >>> https://lore.kernel.org/r/1624924520-17567-2-git-send-email-schmitzmic@gmail.com/
> >>
> >> That patch (for reasons I never found out) did interact badly with
> >> Christoph Hellwig's 'remove set_fs' patches (and Al's signal fixes which
> >> Christoph's patches are based upon). Caused format errors under memory
> >> stress tests quite reliably, on my 030 hardware.
> >>
> >
> > Those patches have since been merged, BTW.
>
> Yes, that's why I advised caution with mine.
>
> >
> >> Probably needs a fresh look - the signal return path got changed by Al's
> >> patches IIRC, and I might have relied on offsets to data on the stack
> >> that are no longer correct with these patches. Or there's a race between
> >> the syscall trap and signal handling when returning from interrupt
> >> context ...
> >>
> >> Still school hols over here so I won't have much peace and quiet until
> >> February.
> >>
> >
> > So the patch works okay with Aranym 68040 but not Motorola 68030? Since
>
> Correct - I seem to recall we also tested those on your 040 and there
> was no regression there, but I may be misremembering that.
>
> > there is at least one known issue affecting both Motorola 68030 and Hatari
> > 68030, perhaps this patch is not the problem. In anycase, Al's suggestion
>
> I hadn't ever made that connection, but it might be another explanation,
> yes.
>
> > to split the patch into two may help in that testing two smaller patches
> > might narrow down the root cause.
>
> That's certainly true.
>
> What's the other reason these patches are still stuck, Geert? Did we
> ever settle the dispute about what return code ought to abort a syscall
> (in the seccomp context)?
IIRC, some (self)tests were still failing?
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall
2022-01-12 7:55 ` Geert Uytterhoeven
@ 2022-01-12 8:05 ` Michael Schmitz
0 siblings, 0 replies; 95+ messages in thread
From: Michael Schmitz @ 2022-01-12 8:05 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Finn Thain, Al Viro, Eric W. Biederman, Linux Kernel Mailing List,
Linux-Arch, Linus Torvalds, Oleg Nesterov, Kees Cook, Linux API,
linux-m68k
Hi Geert,
Am 12.01.2022 um 20:55 schrieb Geert Uytterhoeven:
> Hi Michael,
>
>> What's the other reason these patches are still stuck, Geert? Did we
>> ever settle the dispute about what return code ought to abort a syscall
>> (in the seccomp context)?
>
> IIRC, some (self)tests were still failing?
Too true - but I don't think my way of building the testsuite was
entirely according to the book. And I'm not sure I ran the testsuite
with more than one of the return code options. In all honesty, I had
been waiting for Adrian Glaubitz to test the patches with his seccomp
library port instead of relying on the testsuite.
Still, reason enough to split off the removal of syscall_trace() from
the seccomp stuff if it helps with Eric's patch series.
Cheers,
Michael
>
> Gr{oetje,eeting}s,
>
> Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
> -- Linus Torvalds
>
^ permalink raw reply [flat|nested] 95+ messages in thread
* [PATCH 09/17] ptrace: Move setting/clearing ptrace_message into ptrace_stop
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (7 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 08/17] ptrace/m68k: Stop open coding ptrace_report_syscall Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 10/17] ptrace: Return the signal to continue with from ptrace_stop Eric W. Biederman
` (9 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
Today ptrace_message is easy to overlook as it not a core part of
ptrace_stop. It has been overlooked so much that there are places
that set ptrace_message and don't clear it, and places that never set
it. So if you get an unlucky sequence of events the ptracer may be
able to read a ptrace_message that does not apply to the current
ptrace stop.
Move setting of ptrace_message into ptrace_stop so that it always gets
set before the stop, and always gets cleared after the stop. This
prevents non-sense from being reported to userspace and makes
ptrace_message more visible in the ptrace API so that kernel
developers can see it.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/ptrace.h | 5 ++---
include/linux/tracehook.h | 6 ++----
kernel/signal.c | 19 +++++++++++--------
3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 8aee2945ff08..06f27736c6f8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
extern void ptrace_disable(struct task_struct *);
extern int ptrace_request(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
-extern void ptrace_notify(int exit_code);
+extern void ptrace_notify(int exit_code, unsigned long message);
extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent,
const struct cred *ptracer_cred);
@@ -155,8 +155,7 @@ static inline bool ptrace_event_enabled(struct task_struct *task, int event)
static inline void ptrace_event(int event, unsigned long message)
{
if (unlikely(ptrace_event_enabled(current, event))) {
- current->ptrace_message = message;
- ptrace_notify((event << 8) | SIGTRAP);
+ ptrace_notify((event << 8) | SIGTRAP, message);
} else if (event == PTRACE_EVENT_EXEC) {
/* legacy EXEC report via SIGTRAP */
if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 88c007ab5ebc..5e60af8a11fc 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -61,8 +61,7 @@ static inline int ptrace_report_syscall(unsigned long message)
if (!(ptrace & PT_PTRACED))
return 0;
- current->ptrace_message = message;
- ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+ ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message);
/*
* this isn't the same as continuing with a signal, but it will do
@@ -74,7 +73,6 @@ static inline int ptrace_report_syscall(unsigned long message)
current->exit_code = 0;
}
- current->ptrace_message = 0;
return fatal_signal_pending(current);
}
@@ -143,7 +141,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
static inline void tracehook_signal_handler(int stepping)
{
if (stepping)
- ptrace_notify(SIGTRAP);
+ ptrace_notify(SIGTRAP, 0);
}
/**
diff --git a/kernel/signal.c b/kernel/signal.c
index 802acca0207b..75bb062d8534 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2197,7 +2197,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* If we actually decide not to stop at all because the tracer
* is gone, we keep current->exit_code unless clear_code.
*/
-static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t *info)
+static void ptrace_stop(int exit_code, int why, int clear_code,
+ unsigned long message, kernel_siginfo_t *info)
__releases(¤t->sighand->siglock)
__acquires(¤t->sighand->siglock)
{
@@ -2243,6 +2244,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
smp_wmb();
+ current->ptrace_message = message;
current->last_siginfo = info;
current->exit_code = exit_code;
@@ -2321,6 +2323,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
spin_lock_irq(¤t->sighand->siglock);
current->last_siginfo = NULL;
+ current->ptrace_message = 0;
/* LISTENING can be set only during STOP traps, clear it */
current->jobctl &= ~JOBCTL_LISTENING;
@@ -2333,7 +2336,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
recalc_sigpending_tsk(current);
}
-static void ptrace_do_notify(int signr, int exit_code, int why)
+static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
{
kernel_siginfo_t info;
@@ -2344,17 +2347,17 @@ static void ptrace_do_notify(int signr, int exit_code, int why)
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
/* Let the debugger run. */
- ptrace_stop(exit_code, why, 1, &info);
+ ptrace_stop(exit_code, why, 1, message, &info);
}
-void ptrace_notify(int exit_code)
+void ptrace_notify(int exit_code, unsigned long message)
{
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
if (unlikely(current->task_works))
task_work_run();
spin_lock_irq(¤t->sighand->siglock);
- ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED);
+ ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
spin_unlock_irq(¤t->sighand->siglock);
}
@@ -2510,10 +2513,10 @@ static void do_jobctl_trap(void)
signr = SIGTRAP;
WARN_ON_ONCE(!signr);
ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8),
- CLD_STOPPED);
+ CLD_STOPPED, 0);
} else {
WARN_ON_ONCE(!signr);
- ptrace_stop(signr, CLD_STOPPED, 0, NULL);
+ ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL);
current->exit_code = 0;
}
}
@@ -2567,7 +2570,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
* comment in dequeue_signal().
*/
current->jobctl |= JOBCTL_STOP_DEQUEUED;
- ptrace_stop(signr, CLD_TRAPPED, 0, info);
+ ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
/* We're back. Did the debugger cancel the sig? */
signr = current->exit_code;
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 10/17] ptrace: Return the signal to continue with from ptrace_stop
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (8 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 09/17] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 11/17] ptrace: Separate task->ptrace_code out from task->exit_code Eric W. Biederman
` (8 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
The signal a task should continue with after a ptrace stop is
inconsistently read, cleared, and sent. Solve this by reading and
clearing the signal to be sent in ptrace_stop.
In an ideal world everything except ptrace_signal would share a common
implementation of continuing with the signal, so ptracers could count
on the signal they ask to continue with actually being delivered. For
now retain bug compatibility and just return with the signal number
the ptracer requested the code continue with.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/ptrace.h | 2 +-
include/linux/tracehook.h | 10 +++++-----
kernel/signal.c | 31 ++++++++++++++++++-------------
3 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 06f27736c6f8..323c9950e705 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
extern void ptrace_disable(struct task_struct *);
extern int ptrace_request(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
-extern void ptrace_notify(int exit_code, unsigned long message);
+extern int ptrace_notify(int exit_code, unsigned long message);
extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent,
const struct cred *ptracer_cred);
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 5e60af8a11fc..2fd0bfe866c0 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -57,21 +57,21 @@ struct linux_binprm;
static inline int ptrace_report_syscall(unsigned long message)
{
int ptrace = current->ptrace;
+ int signr;
if (!(ptrace & PT_PTRACED))
return 0;
- ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message);
+ signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0),
+ message);
/*
* this isn't the same as continuing with a signal, but it will do
* for normal use. strace only continues with a signal if the
* stopping signal is not SIGTRAP. -brl
*/
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
+ if (signr)
+ send_sig(signr, current, 1);
return fatal_signal_pending(current);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 75bb062d8534..9903ff12e581 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2194,15 +2194,17 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* That makes it a way to test a stopped process for
* being ptrace-stopped vs being job-control-stopped.
*
- * If we actually decide not to stop at all because the tracer
- * is gone, we keep current->exit_code unless clear_code.
+ * Returns the signal the ptracer requested the code resume
+ * with. If the code did not stop because the tracer is gone,
+ * the stop signal remains unchanged unless clear_code.
*/
-static void ptrace_stop(int exit_code, int why, int clear_code,
+static int ptrace_stop(int exit_code, int why, int clear_code,
unsigned long message, kernel_siginfo_t *info)
__releases(¤t->sighand->siglock)
__acquires(¤t->sighand->siglock)
{
bool gstop_done = false;
+ bool read_code = true;
if (arch_ptrace_stop_needed()) {
/*
@@ -2311,8 +2313,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
/* tasklist protects us from ptrace_freeze_traced() */
__set_current_state(TASK_RUNNING);
+ read_code = false;
if (clear_code)
- current->exit_code = 0;
+ exit_code = 0;
read_unlock(&tasklist_lock);
}
@@ -2322,8 +2325,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
* any signal-sending on another CPU that wants to examine it.
*/
spin_lock_irq(¤t->sighand->siglock);
+ if (read_code) exit_code = current->exit_code;
current->last_siginfo = NULL;
current->ptrace_message = 0;
+ current->exit_code = 0;
/* LISTENING can be set only during STOP traps, clear it */
current->jobctl &= ~JOBCTL_LISTENING;
@@ -2334,9 +2339,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
* This sets TIF_SIGPENDING, but never clears it.
*/
recalc_sigpending_tsk(current);
+ return exit_code;
}
-static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
+static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
{
kernel_siginfo_t info;
@@ -2347,18 +2353,21 @@ static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long me
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
/* Let the debugger run. */
- ptrace_stop(exit_code, why, 1, message, &info);
+ return ptrace_stop(exit_code, why, 1, message, &info);
}
-void ptrace_notify(int exit_code, unsigned long message)
+int ptrace_notify(int exit_code, unsigned long message)
{
+ int signr;
+
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
if (unlikely(current->task_works))
task_work_run();
spin_lock_irq(¤t->sighand->siglock);
- ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
+ signr = ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
spin_unlock_irq(¤t->sighand->siglock);
+ return signr;
}
/**
@@ -2517,7 +2526,6 @@ static void do_jobctl_trap(void)
} else {
WARN_ON_ONCE(!signr);
ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL);
- current->exit_code = 0;
}
}
@@ -2570,15 +2578,12 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
* comment in dequeue_signal().
*/
current->jobctl |= JOBCTL_STOP_DEQUEUED;
- ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
+ signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
/* We're back. Did the debugger cancel the sig? */
- signr = current->exit_code;
if (signr == 0)
return signr;
- current->exit_code = 0;
-
/*
* Update the siginfo structure if the signal has
* changed. If the debugger wanted something
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 11/17] ptrace: Separate task->ptrace_code out from task->exit_code
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (9 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 10/17] ptrace: Return the signal to continue with from ptrace_stop Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 12/17] signal: Compute the process exit_code in get_signal Eric W. Biederman
` (7 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
A process can be marked for death by setting SIGNAL_GROUP_EXIT and
group_exit_code, long before do_exit is called. Unfortunately because
of PTRACE_EVENT_EXIT residing in do_exit this same tactic can not be
used for task death.
Correct this by adding a new task field task->ptrace_code that holds
the code for ptrace stops. This allows task->exit_code to be set to
the exit code long before the PTRACE_EVENT_EXIT ptrace stop.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/proc/array.c | 3 +++
include/linux/sched.h | 1 +
kernel/exit.c | 2 +-
kernel/ptrace.c | 12 ++++++------
kernel/signal.c | 18 +++++++++---------
5 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 43a7abde9e42..3042015c11ad 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -519,6 +519,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
cgtime = sig->cgtime;
rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
+ if (task_is_traced(task) && !(task->jobctl & JOBCTL_LISTENING))
+ exit_code = task->ptrace_code;
+
/* add up live thread stats at the group level */
if (whole) {
struct task_struct *t = task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52f2fdffa3ab..c3d732bf7833 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1174,6 +1174,7 @@ struct task_struct {
/* Ptrace state: */
unsigned long ptrace_message;
kernel_siginfo_t *last_siginfo;
+ int ptrace_code;
struct task_io_accounting ioac;
#ifdef CONFIG_PSI
diff --git a/kernel/exit.c b/kernel/exit.c
index 7121db37c411..aedefe5eb0eb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1134,7 +1134,7 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
{
if (ptrace) {
if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
- return &p->exit_code;
+ return &p->ptrace_code;
} else {
if (p->signal->flags & SIGNAL_STOP_STOPPED)
return &p->signal->group_exit_code;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index eea265082e97..8bbd73ab9a34 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -172,7 +172,7 @@ void __ptrace_unlink(struct task_struct *child)
static bool looks_like_a_spurious_pid(struct task_struct *task)
{
- if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP))
+ if (task->ptrace_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP))
return false;
if (task_pid_vnr(task) == task->ptrace_message)
@@ -573,7 +573,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
* tasklist_lock avoids the race with wait_task_stopped(), see
* the comment in ptrace_resume().
*/
- child->exit_code = data;
+ child->ptrace_code = data;
__ptrace_detach(current, child);
write_unlock_irq(&tasklist_lock);
@@ -863,11 +863,11 @@ static int ptrace_resume(struct task_struct *child, long request,
}
/*
- * Change ->exit_code and ->state under siglock to avoid the race
- * with wait_task_stopped() in between; a non-zero ->exit_code will
+ * Change ->ptrace_code and ->state under siglock to avoid the race
+ * with wait_task_stopped() in between; a non-zero ->ptrace_code will
* wrongly look like another report from tracee.
*
- * Note that we need siglock even if ->exit_code == data and/or this
+ * Note that we need siglock even if ->ptrace_code == data and/or this
* status was not reported yet, the new status must not be cleared by
* wait_task_stopped() after resume.
*
@@ -878,7 +878,7 @@ static int ptrace_resume(struct task_struct *child, long request,
need_siglock = data && !thread_group_empty(current);
if (need_siglock)
spin_lock_irq(&child->sighand->siglock);
- child->exit_code = data;
+ child->ptrace_code = data;
wake_up_state(child, __TASK_TRACED);
if (need_siglock)
spin_unlock_irq(&child->sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 9903ff12e581..fd3c404de8b6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2168,7 +2168,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
info.si_status = tsk->signal->group_exit_code & 0x7f;
break;
case CLD_TRAPPED:
- info.si_status = tsk->exit_code & 0x7f;
+ info.si_status = tsk->ptrace_code & 0x7f;
break;
default:
BUG();
@@ -2198,7 +2198,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* with. If the code did not stop because the tracer is gone,
* the stop signal remains unchanged unless clear_code.
*/
-static int ptrace_stop(int exit_code, int why, int clear_code,
+static int ptrace_stop(int code, int why, int clear_code,
unsigned long message, kernel_siginfo_t *info)
__releases(¤t->sighand->siglock)
__acquires(¤t->sighand->siglock)
@@ -2248,7 +2248,7 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
current->ptrace_message = message;
current->last_siginfo = info;
- current->exit_code = exit_code;
+ current->ptrace_code = code;
/*
* If @why is CLD_STOPPED, we're trapping to participate in a group
@@ -2315,7 +2315,7 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
__set_current_state(TASK_RUNNING);
read_code = false;
if (clear_code)
- exit_code = 0;
+ code = 0;
read_unlock(&tasklist_lock);
}
@@ -2325,10 +2325,10 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
* any signal-sending on another CPU that wants to examine it.
*/
spin_lock_irq(¤t->sighand->siglock);
- if (read_code) exit_code = current->exit_code;
+ if (read_code) code = current->ptrace_code;
current->last_siginfo = NULL;
current->ptrace_message = 0;
- current->exit_code = 0;
+ current->ptrace_code = 0;
/* LISTENING can be set only during STOP traps, clear it */
current->jobctl &= ~JOBCTL_LISTENING;
@@ -2339,7 +2339,7 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
* This sets TIF_SIGPENDING, but never clears it.
*/
recalc_sigpending_tsk(current);
- return exit_code;
+ return code;
}
static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
@@ -2501,11 +2501,11 @@ static bool do_signal_stop(int signr)
*
* When PT_SEIZED, it's used for both group stop and explicit
* SEIZE/INTERRUPT traps. Both generate PTRACE_EVENT_STOP trap with
- * accompanying siginfo. If stopped, lower eight bits of exit_code contain
+ * accompanying siginfo. If stopped, lower eight bits of ptrace_code contain
* the stop signal; otherwise, %SIGTRAP.
*
* When !PT_SEIZED, it's used only for group stop trap with stop signal
- * number as exit_code and no siginfo.
+ * number as ptrace_code and no siginfo.
*
* CONTEXT:
* Must be called with @current->sighand->siglock held, which may be
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 12/17] signal: Compute the process exit_code in get_signal
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (10 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 11/17] ptrace: Separate task->ptrace_code out from task->exit_code Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 13/17] signal: Make individual tasks exiting a first class concept Eric W. Biederman
` (6 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
In prepartion for moving the work of sys_exit and sys_group_exit into
get_signal compute exit_code in get_signal, make PF_SIGNALED depend on
the exit_code and pass the exit_code to do_group_exit.
Anytime there is a group exit the exit_code may differ from the signal
number.
To match the historical precedent as best I can make the exit_code 0
during exec. (The exit_code field would not have been set but probably
would have been left at a value of 0).
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
kernel/signal.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index fd3c404de8b6..2a24cca00ca1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2707,6 +2707,7 @@ bool get_signal(struct ksignal *ksig)
for (;;) {
struct k_sigaction *ka;
enum pid_type type;
+ int exit_code;
/* Has this task already been marked for death? */
if ((signal->flags & SIGNAL_GROUP_EXIT) ||
@@ -2716,6 +2717,10 @@ bool get_signal(struct ksignal *ksig)
trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
&sighand->action[SIGKILL - 1]);
recalc_sigpending();
+ if (signal->flags & SIGNAL_GROUP_EXIT)
+ exit_code = signal->group_exit_code;
+ else
+ exit_code = 0;
goto fatal;
}
@@ -2837,15 +2842,17 @@ bool get_signal(struct ksignal *ksig)
continue;
}
+ /*
+ * Anything else is fatal, maybe with a core dump.
+ */
+ exit_code = signr;
fatal:
spin_unlock_irq(&sighand->siglock);
if (unlikely(cgroup_task_frozen(current)))
cgroup_leave_frozen(true);
- /*
- * Anything else is fatal, maybe with a core dump.
- */
- current->flags |= PF_SIGNALED;
+ if (exit_code & 0x7f)
+ current->flags |= PF_SIGNALED;
if (sig_kernel_coredump(signr)) {
if (print_fatal_signals)
@@ -2873,7 +2880,7 @@ bool get_signal(struct ksignal *ksig)
/*
* Death signals, no core dump.
*/
- do_group_exit(ksig->info.si_signo);
+ do_group_exit(exit_code);
/* NOTREACHED */
}
spin_unlock_irq(&sighand->siglock);
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 13/17] signal: Make individual tasks exiting a first class concept
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (11 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 12/17] signal: Compute the process exit_code in get_signal Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 14/17] signal: Remove zap_other_threads Eric W. Biederman
` (5 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
Add a helper schedule_task_exit_locked that is equivalent to
asynchronously calling exit(2) except for not having an exit code.
This is a generalization of what happens in de_thread, zap_process,
prepare_signal, complete_signal, and zap_other_threads when individual
tasks are asked to shutdown.
The various code paths optimize away the setting sigaddset and
signal_wake_up based on different conditions. Neither sigaddset nor
signal_wake_up are needed if the task has already started running
do_exit. So skip the work if PF_POSTCOREDUMP is set. Which is the
earliest any of the original hand rolled implementations used.
Update get_signal to detect either signal group exit or a single task
exit by testing for __fatal_signal_pending. This works because the
all of the tasks in group exits are killed with
schedule_task_exit_locked.
For clarity the code in get_signal has been updated to call do_exit
instead of do_group_exit when a single task is exiting.
While this schedule_task_exit_locked is a generalization of what
happens in prepare_signal I do not change prepare_signal to use
schedule_task_exit_locked to deliver SIGKILL to a coredumping process.
This keeps all of the specialness delivering a signal to a coredumping
process limited to prepare_signal and the coredump code itself.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/coredump.c | 7 ++-----
include/linux/sched/signal.h | 2 ++
kernel/signal.c | 36 +++++++++++++++++++++---------------
3 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/fs/coredump.c b/fs/coredump.c
index 09302a6a0d80..9559e29daada 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -358,12 +358,9 @@ static int zap_process(struct task_struct *start, int exit_code)
start->signal->group_stop_count = 0;
for_each_thread(start, t) {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
+ schedule_task_exit_locked(t);
+ if (t != current && !(t->flags & PF_POSTCOREDUMP))
nr++;
- }
}
return nr;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b6ecb9fc4cd2..7c62b7c29cc0 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -427,6 +427,8 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
}
+void schedule_task_exit_locked(struct task_struct *task);
+
void task_join_group_stop(struct task_struct *task);
#ifdef TIF_RESTORE_SIGMASK
diff --git a/kernel/signal.c b/kernel/signal.c
index 2a24cca00ca1..cbfb9020368e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1056,9 +1056,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
signal->group_stop_count = 0;
t = p;
do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
+ schedule_task_exit_locked(t);
} while_each_thread(p, t);
return;
}
@@ -1363,6 +1361,16 @@ int force_sig_info(struct kernel_siginfo *info)
return force_sig_info_to_task(info, current, HANDLER_CURRENT);
}
+void schedule_task_exit_locked(struct task_struct *task)
+{
+ task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
+ /* Only bother with threads that might be alive */
+ if (!(task->flags & PF_POSTCOREDUMP)) {
+ sigaddset(&task->pending.signal, SIGKILL);
+ signal_wake_up(task, 1);
+ }
+}
+
/*
* Nuke all other threads in the group.
*/
@@ -1374,16 +1382,9 @@ int zap_other_threads(struct task_struct *p)
p->signal->group_stop_count = 0;
while_each_thread(p, t) {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
count++;
-
- /* Don't bother with already dead threads */
- if (t->exit_state)
- continue;
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
+ schedule_task_exit_locked(t);
}
-
return count;
}
@@ -2706,12 +2707,12 @@ bool get_signal(struct ksignal *ksig)
for (;;) {
struct k_sigaction *ka;
+ bool group_exit = true;
enum pid_type type;
int exit_code;
/* Has this task already been marked for death? */
- if ((signal->flags & SIGNAL_GROUP_EXIT) ||
- signal->group_exec_task) {
+ if (__fatal_signal_pending(current)) {
ksig->info.si_signo = signr = SIGKILL;
sigdelset(¤t->pending.signal, SIGKILL);
trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
@@ -2719,8 +2720,10 @@ bool get_signal(struct ksignal *ksig)
recalc_sigpending();
if (signal->flags & SIGNAL_GROUP_EXIT)
exit_code = signal->group_exit_code;
- else
+ else {
exit_code = 0;
+ group_exit = false;
+ }
goto fatal;
}
@@ -2880,7 +2883,10 @@ bool get_signal(struct ksignal *ksig)
/*
* Death signals, no core dump.
*/
- do_group_exit(exit_code);
+ if (group_exit)
+ do_group_exit(exit_code);
+ else
+ do_exit(exit_code);
/* NOTREACHED */
}
spin_unlock_irq(&sighand->siglock);
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 14/17] signal: Remove zap_other_threads
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (12 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 13/17] signal: Make individual tasks exiting a first class concept Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 15/17] signal: Add JOBCTL_WILL_EXIT to mark exiting tasks Eric W. Biederman
` (4 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
The two callers of zap_other_threads want different things. The
function do_group_exit wants to set the exit code and it does not care
about the number of threads exiting. In de_thread the current thread
is not exiting so there is not really an exit code.
Since schedule_task_exit_locked factors out the tricky bits stop
sharing the loop in zap_other_threads between de_thread and
do_group_exit.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/exec.c | 12 +++++++++---
include/linux/sched/signal.h | 1 -
kernel/exit.c | 9 ++++++++-
kernel/signal.c | 17 -----------------
4 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 82db656ca709..b9f646fddc51 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1037,6 +1037,7 @@ static int de_thread(struct task_struct *tsk)
struct signal_struct *sig = tsk->signal;
struct sighand_struct *oldsighand = tsk->sighand;
spinlock_t *lock = &oldsighand->siglock;
+ struct task_struct *t;
if (thread_group_empty(tsk))
goto no_thread_group;
@@ -1055,9 +1056,14 @@ static int de_thread(struct task_struct *tsk)
}
sig->group_exec_task = tsk;
- sig->notify_count = zap_other_threads(tsk);
- if (!thread_group_leader(tsk))
- sig->notify_count--;
+ sig->group_stop_count = 0;
+ sig->notify_count = 0;
+ __for_each_thread(sig, t) {
+ if (t == tsk)
+ continue;
+ sig->notify_count++;
+ schedule_task_exit_locked(t);
+ }
while (sig->notify_count) {
__set_current_state(TASK_KILLABLE);
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 7c62b7c29cc0..eed54f9ea2fc 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -343,7 +343,6 @@ extern void force_sig(int);
extern void force_fatal_sig(int);
extern void force_exit_sig(int);
extern int send_sig(int, struct task_struct *, int);
-extern int zap_other_threads(struct task_struct *p);
extern struct sigqueue *sigqueue_alloc(void);
extern void sigqueue_free(struct sigqueue *);
extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
diff --git a/kernel/exit.c b/kernel/exit.c
index aedefe5eb0eb..27bc0ccfea78 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -918,9 +918,16 @@ do_group_exit(int exit_code)
else if (sig->group_exec_task)
exit_code = 0;
else {
+ struct task_struct *t;
+
sig->group_exit_code = exit_code;
sig->flags = SIGNAL_GROUP_EXIT;
- zap_other_threads(current);
+ sig->group_stop_count = 0;
+ __for_each_thread(sig, t) {
+ if (t == current)
+ continue;
+ schedule_task_exit_locked(t);
+ }
}
spin_unlock_irq(&sighand->siglock);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index cbfb9020368e..b0201e05be40 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1371,23 +1371,6 @@ void schedule_task_exit_locked(struct task_struct *task)
}
}
-/*
- * Nuke all other threads in the group.
- */
-int zap_other_threads(struct task_struct *p)
-{
- struct task_struct *t = p;
- int count = 0;
-
- p->signal->group_stop_count = 0;
-
- while_each_thread(p, t) {
- count++;
- schedule_task_exit_locked(t);
- }
- return count;
-}
-
struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
unsigned long *flags)
{
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 15/17] signal: Add JOBCTL_WILL_EXIT to mark exiting tasks
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (13 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 14/17] signal: Remove zap_other_threads Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 16/17] signal: Record the exit_code when an exit is scheduled Eric W. Biederman
` (3 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
Mark tasks that need to exit with JOBCTL_WILL_EXIT instead of reusing
the per thread SIGKILL.
This removes the double meaning of the per thread SIGKILL and makes it
possible to detect when a task has already been scheduled for exiting
and to skip unnecessary work if the task is already scheduled to exit.
A jobctl flag was choosen for this purpose because jobctl changes are
protected by siglock, and updates are already careful not to change or
clear other bits in jobctl. Protection by a lock when changing the
value is necessary as JOBCTL_WILL_EXIT will not be limited to being
set by the current task. That task->jobctl is protected by siglock is
convenient as siglock is already held everywhere I want to set or reset
JOBCTL_WILL_EXIT.
Teach wants_signal and retarget_shared_pending to use
JOBCTL_TASK_EXITING to detect threads that have an exit pending and so
will not be processing any more signals.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/coredump.c | 6 ++++--
include/linux/sched/jobctl.h | 2 ++
include/linux/sched/signal.h | 2 +-
kernel/exit.c | 4 ++--
kernel/signal.c | 19 +++++++++----------
5 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/fs/coredump.c b/fs/coredump.c
index 9559e29daada..4e82ee51633d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -352,7 +352,6 @@ static int zap_process(struct task_struct *start, int exit_code)
struct task_struct *t;
int nr = 0;
- /* Allow SIGKILL, see prepare_signal() */
start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -376,9 +375,11 @@ static int zap_threads(struct task_struct *tsk,
if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
signal->core_state = core_state;
nr = zap_process(tsk, exit_code);
+ atomic_set(&core_state->nr_threads, nr);
+ /* Allow SIGKILL, see prepare_signal() */
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
tsk->flags |= PF_DUMPCORE;
- atomic_set(&core_state->nr_threads, nr);
+ tsk->jobctl &= ~JOBCTL_WILL_EXIT;
}
spin_unlock_irq(&tsk->sighand->siglock);
return nr;
@@ -425,6 +426,7 @@ static void coredump_finish(bool core_dumped)
current->signal->group_exit_code |= 0x80;
next = current->signal->core_state->dumper.next;
current->signal->core_state = NULL;
+ current->jobctl |= JOBCTL_WILL_EXIT;
spin_unlock_irq(¤t->sighand->siglock);
while ((curr = next) != NULL) {
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index fa067de9f1a9..9887d737ccfb 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -19,6 +19,7 @@ struct task_struct;
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
+#define JOBCTL_WILL_EXIT_BIT 31 /* task will exit */
#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
@@ -28,6 +29,7 @@ struct task_struct;
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
+#define JOBCTL_WILL_EXIT (1UL << JOBCTL_WILL_EXIT_BIT)
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index eed54f9ea2fc..989bb665f107 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -373,7 +373,7 @@ static inline int signal_pending(struct task_struct *p)
static inline int __fatal_signal_pending(struct task_struct *p)
{
- return unlikely(sigismember(&p->pending.signal, SIGKILL));
+ return unlikely(p->jobctl & JOBCTL_WILL_EXIT);
}
static inline int fatal_signal_pending(struct task_struct *p)
diff --git a/kernel/exit.c b/kernel/exit.c
index 27bc0ccfea78..7a7a0cbac28e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -906,7 +906,7 @@ do_group_exit(int exit_code)
if (sig->flags & SIGNAL_GROUP_EXIT)
exit_code = sig->group_exit_code;
- else if (sig->group_exec_task)
+ else if (current->jobctl & JOBCTL_WILL_EXIT)
exit_code = 0;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
@@ -915,7 +915,7 @@ do_group_exit(int exit_code)
if (sig->flags & SIGNAL_GROUP_EXIT)
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
- else if (sig->group_exec_task)
+ else if (current->jobctl & JOBCTL_WILL_EXIT)
exit_code = 0;
else {
struct task_struct *t;
diff --git a/kernel/signal.c b/kernel/signal.c
index b0201e05be40..6179e34ce666 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -153,7 +153,8 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
static bool recalc_sigpending_tsk(struct task_struct *t)
{
- if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
+ if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE |
+ JOBCTL_WILL_EXIT)) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked) ||
cgroup_task_frozen(t)) {
@@ -911,7 +912,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
if (core_state) {
if (sig == SIGKILL) {
struct task_struct *dumper = core_state->dumper.task;
- sigaddset(&dumper->pending.signal, SIGKILL);
+ dumper->jobctl |= JOBCTL_WILL_EXIT;
signal_wake_up(dumper, 1);
}
}
@@ -985,7 +986,7 @@ static inline bool wants_signal(int sig, struct task_struct *p)
if (sigismember(&p->blocked, sig))
return false;
- if (p->flags & PF_EXITING)
+ if (p->jobctl & JOBCTL_WILL_EXIT)
return false;
if (sig == SIGKILL)
@@ -1363,10 +1364,9 @@ int force_sig_info(struct kernel_siginfo *info)
void schedule_task_exit_locked(struct task_struct *task)
{
- task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
- /* Only bother with threads that might be alive */
- if (!(task->flags & PF_POSTCOREDUMP)) {
- sigaddset(&task->pending.signal, SIGKILL);
+ if (!(task->jobctl & JOBCTL_WILL_EXIT)) {
+ task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
+ task->jobctl |= JOBCTL_WILL_EXIT;
signal_wake_up(task, 1);
}
}
@@ -2695,9 +2695,8 @@ bool get_signal(struct ksignal *ksig)
int exit_code;
/* Has this task already been marked for death? */
- if (__fatal_signal_pending(current)) {
+ if (current->jobctl & JOBCTL_WILL_EXIT) {
ksig->info.si_signo = signr = SIGKILL;
- sigdelset(¤t->pending.signal, SIGKILL);
trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
&sighand->action[SIGKILL - 1]);
recalc_sigpending();
@@ -2935,7 +2934,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
t = tsk;
while_each_thread(tsk, t) {
- if (t->flags & PF_EXITING)
+ if (t->jobctl & JOBCTL_WILL_EXIT)
continue;
if (!has_pending_signals(&retarget, &t->blocked))
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 16/17] signal: Record the exit_code when an exit is scheduled
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (14 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 15/17] signal: Add JOBCTL_WILL_EXIT to mark exiting tasks Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-01-03 21:33 ` [PATCH 17/17] signal: Always set SIGNAL_GROUP_EXIT on process exit Eric W. Biederman
` (2 subsequent siblings)
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
With ptrace_stop no longer using task->exit_code it is safe
to set task->exit_code when an exit is scheduled.
Use the bit JOBCTL_WILL_EXIT to detect when a exit is first scheduled
and only set exit_code the first time. Only use the code provided
to do_exit if the task has not yet been schedled to exit.
In get_signal and do_grup_exit when JOBCTL_WILL_EXIT is set read the
recored exit_code from current->exit_code, instead of assuming
exit_code will always be 0.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/coredump.c | 2 +-
fs/exec.c | 2 +-
include/linux/sched/signal.h | 2 +-
kernel/exit.c | 12 ++++++++----
kernel/signal.c | 7 ++++---
5 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/fs/coredump.c b/fs/coredump.c
index 4e82ee51633d..c54b502bf648 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -357,7 +357,7 @@ static int zap_process(struct task_struct *start, int exit_code)
start->signal->group_stop_count = 0;
for_each_thread(start, t) {
- schedule_task_exit_locked(t);
+ schedule_task_exit_locked(t, exit_code);
if (t != current && !(t->flags & PF_POSTCOREDUMP))
nr++;
}
diff --git a/fs/exec.c b/fs/exec.c
index b9f646fddc51..3203605e54cb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1062,7 +1062,7 @@ static int de_thread(struct task_struct *tsk)
if (t == tsk)
continue;
sig->notify_count++;
- schedule_task_exit_locked(t);
+ schedule_task_exit_locked(t, 0);
}
while (sig->notify_count) {
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 989bb665f107..e8034ecaee84 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -426,7 +426,7 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
}
-void schedule_task_exit_locked(struct task_struct *task);
+void schedule_task_exit_locked(struct task_struct *task, int exit_code);
void task_join_group_stop(struct task_struct *task);
diff --git a/kernel/exit.c b/kernel/exit.c
index 7a7a0cbac28e..e95500e2d27c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -735,6 +735,11 @@ void __noreturn do_exit(long code)
struct task_struct *tsk = current;
int group_dead;
+ spin_lock_irq(&tsk->sighand->siglock);
+ schedule_task_exit_locked(tsk, code);
+ code = tsk->exit_code;
+ spin_unlock_irq(&tsk->sighand->siglock);
+
WARN_ON(blk_needs_flush_plug(tsk));
kcov_task_exit(tsk);
@@ -773,7 +778,6 @@ void __noreturn do_exit(long code)
tty_audit_exit();
audit_free(tsk);
- tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
exit_mm();
@@ -907,7 +911,7 @@ do_group_exit(int exit_code)
if (sig->flags & SIGNAL_GROUP_EXIT)
exit_code = sig->group_exit_code;
else if (current->jobctl & JOBCTL_WILL_EXIT)
- exit_code = 0;
+ exit_code = current->exit_code;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
@@ -916,7 +920,7 @@ do_group_exit(int exit_code)
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
else if (current->jobctl & JOBCTL_WILL_EXIT)
- exit_code = 0;
+ exit_code = current->exit_code;
else {
struct task_struct *t;
@@ -926,7 +930,7 @@ do_group_exit(int exit_code)
__for_each_thread(sig, t) {
if (t == current)
continue;
- schedule_task_exit_locked(t);
+ schedule_task_exit_locked(t, exit_code);
}
}
spin_unlock_irq(&sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 6179e34ce666..e8fac8a3c935 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1057,7 +1057,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
signal->group_stop_count = 0;
t = p;
do {
- schedule_task_exit_locked(t);
+ schedule_task_exit_locked(t, sig);
} while_each_thread(p, t);
return;
}
@@ -1362,11 +1362,12 @@ int force_sig_info(struct kernel_siginfo *info)
return force_sig_info_to_task(info, current, HANDLER_CURRENT);
}
-void schedule_task_exit_locked(struct task_struct *task)
+void schedule_task_exit_locked(struct task_struct *task, int exit_code)
{
if (!(task->jobctl & JOBCTL_WILL_EXIT)) {
task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
task->jobctl |= JOBCTL_WILL_EXIT;
+ task->exit_code = exit_code;
signal_wake_up(task, 1);
}
}
@@ -2703,7 +2704,7 @@ bool get_signal(struct ksignal *ksig)
if (signal->flags & SIGNAL_GROUP_EXIT)
exit_code = signal->group_exit_code;
else {
- exit_code = 0;
+ exit_code = current->exit_code;
group_exit = false;
}
goto fatal;
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 17/17] signal: Always set SIGNAL_GROUP_EXIT on process exit
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (15 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 16/17] signal: Record the exit_code when an exit is scheduled Eric W. Biederman
@ 2022-01-03 21:33 ` Eric W. Biederman
2022-03-09 0:13 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
2022-03-09 0:15 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
18 siblings, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-01-03 21:33 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Linus Torvalds, Oleg Nesterov, Al Viro, Kees Cook,
linux-api, Eric W. Biederman
Track how many threads have not started exiting and when
the last thread starts exiting set SIGNAL_GROUP_EXIT.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/coredump.c | 4 ----
include/linux/sched/signal.h | 1 +
kernel/exit.c | 8 +-------
kernel/fork.c | 2 ++
kernel/signal.c | 10 +++++++---
5 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/fs/coredump.c b/fs/coredump.c
index c54b502bf648..029d0f98aa90 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -352,10 +352,6 @@ static int zap_process(struct task_struct *start, int exit_code)
struct task_struct *t;
int nr = 0;
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
for_each_thread(start, t) {
schedule_task_exit_locked(t, exit_code);
if (t != current && !(t->flags & PF_POSTCOREDUMP))
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e8034ecaee84..bd9435e934a1 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -94,6 +94,7 @@ struct signal_struct {
refcount_t sigcnt;
atomic_t live;
int nr_threads;
+ int quick_threads;
struct list_head thread_head;
wait_queue_head_t wait_chldexit; /* for wait4() */
diff --git a/kernel/exit.c b/kernel/exit.c
index e95500e2d27c..be867a12de65 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -924,14 +924,8 @@ do_group_exit(int exit_code)
else {
struct task_struct *t;
- sig->group_exit_code = exit_code;
- sig->flags = SIGNAL_GROUP_EXIT;
- sig->group_stop_count = 0;
- __for_each_thread(sig, t) {
- if (t == current)
- continue;
+ __for_each_thread(sig, t)
schedule_task_exit_locked(t, exit_code);
- }
}
spin_unlock_irq(&sighand->siglock);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 6f0293cb29c9..d973189a4014 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1644,6 +1644,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM;
sig->nr_threads = 1;
+ sig->quick_threads = 1;
atomic_set(&sig->live, 1);
refcount_set(&sig->sigcnt, 1);
@@ -2383,6 +2384,7 @@ static __latent_entropy struct task_struct *copy_process(
__this_cpu_inc(process_counts);
} else {
current->signal->nr_threads++;
+ current->signal->quick_threads++;
atomic_inc(¤t->signal->live);
refcount_inc(¤t->signal->sigcnt);
task_join_group_stop(p);
diff --git a/kernel/signal.c b/kernel/signal.c
index e8fac8a3c935..9bd835fcb1dc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1052,9 +1052,6 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
* running and doing things after a slower
* thread has the fatal signal pending.
*/
- signal->flags = SIGNAL_GROUP_EXIT;
- signal->group_exit_code = sig;
- signal->group_stop_count = 0;
t = p;
do {
schedule_task_exit_locked(t, sig);
@@ -1365,10 +1362,17 @@ int force_sig_info(struct kernel_siginfo *info)
void schedule_task_exit_locked(struct task_struct *task, int exit_code)
{
if (!(task->jobctl & JOBCTL_WILL_EXIT)) {
+ struct signal_struct *signal = task->signal;
task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
task->jobctl |= JOBCTL_WILL_EXIT;
task->exit_code = exit_code;
signal_wake_up(task, 1);
+ signal->quick_threads--;
+ if (signal->quick_threads == 0) {
+ signal->flags = SIGNAL_GROUP_EXIT;
+ signal->group_exit_code = exit_code;
+ signal->group_stop_count = 0;
+ }
}
}
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* [PATCH 00/13] Removing tracehook.h
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (16 preceding siblings ...)
2022-01-03 21:33 ` [PATCH 17/17] signal: Always set SIGNAL_GROUP_EXIT on process exit Eric W. Biederman
@ 2022-03-09 0:13 ` Eric W. Biederman
2022-03-09 21:05 ` Jens Axboe
2022-03-15 23:18 ` [PATCH 0/2] ptrace: Making the ptrace changes atomic Eric W. Biederman
2022-03-09 0:15 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
18 siblings, 2 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-09 0:13 UTC (permalink / raw)
To: linux-kernel
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Kees Cook, Al Viro, linux-api, Jens Axboe
While working on cleaning up do_exit I have been having to deal with the
code in tracehook.h. Unfortunately the code in tracehook.h does not
make sense as organized.
This set of changes reorganizes things so that tracehook.h no longer
exists, and so that it's current contents are organized in a fashion
that is a little easier to understand.
The biggest change is that I lean into the fact that get_signal
always calls task_work_run and removes the logic that tried to
be smart and decouple task_work_run and get_signal as it has proven
to not be effective.
This is a conservative change and I am not changing the how things
like signal_pending operate (although it is probably justified).
A new header resume_user_mode.h is added to hold resume_user_mode_work
which was previously known as tracehook_notify_resume.
Eric W. Biederman (13):
ptrace: Move ptrace_report_syscall into ptrace.h
ptrace/arm: Rename tracehook_report_syscall report_syscall
ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h
ptrace: Remove arch_syscall_{enter,exit}_tracehook
ptrace: Remove tracehook_signal_handler
task_work: Remove unnecessary include from posix_timers.h
task_work: Introduce task_work_pending
task_work: Call tracehook_notify_signal from get_signal on all architectures
task_work: Decouple TIF_NOTIFY_SIGNAL and task_work
signal: Move set_notify_signal and clear_notify_signal into sched/signal.h
resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume
resume_user_mode: Move to resume_user_mode.h
tracehook: Remove tracehook.h
MAINTAINERS | 1 -
arch/Kconfig | 5 +-
arch/alpha/kernel/ptrace.c | 5 +-
arch/alpha/kernel/signal.c | 4 +-
arch/arc/kernel/ptrace.c | 5 +-
arch/arc/kernel/signal.c | 4 +-
arch/arm/kernel/ptrace.c | 12 +-
arch/arm/kernel/signal.c | 4 +-
arch/arm64/kernel/ptrace.c | 14 +--
arch/arm64/kernel/signal.c | 4 +-
arch/csky/kernel/ptrace.c | 5 +-
arch/csky/kernel/signal.c | 4 +-
arch/h8300/kernel/ptrace.c | 5 +-
arch/h8300/kernel/signal.c | 4 +-
arch/hexagon/kernel/process.c | 4 +-
arch/hexagon/kernel/signal.c | 1 -
arch/hexagon/kernel/traps.c | 6 +-
arch/ia64/kernel/process.c | 4 +-
arch/ia64/kernel/ptrace.c | 6 +-
arch/ia64/kernel/signal.c | 1 -
arch/m68k/kernel/ptrace.c | 6 +-
arch/m68k/kernel/signal.c | 4 +-
arch/microblaze/kernel/ptrace.c | 5 +-
arch/microblaze/kernel/signal.c | 4 +-
arch/mips/kernel/ptrace.c | 5 +-
arch/mips/kernel/signal.c | 4 +-
arch/nds32/include/asm/syscall.h | 2 +-
arch/nds32/kernel/ptrace.c | 5 +-
arch/nds32/kernel/signal.c | 4 +-
arch/nios2/kernel/ptrace.c | 5 +-
arch/nios2/kernel/signal.c | 4 +-
arch/openrisc/kernel/ptrace.c | 5 +-
arch/openrisc/kernel/signal.c | 4 +-
arch/parisc/kernel/ptrace.c | 7 +-
arch/parisc/kernel/signal.c | 4 +-
arch/powerpc/kernel/ptrace/ptrace.c | 8 +-
arch/powerpc/kernel/signal.c | 4 +-
arch/riscv/kernel/ptrace.c | 5 +-
arch/riscv/kernel/signal.c | 4 +-
arch/s390/include/asm/entry-common.h | 1 -
arch/s390/kernel/ptrace.c | 1 -
arch/s390/kernel/signal.c | 5 +-
arch/sh/kernel/ptrace_32.c | 5 +-
arch/sh/kernel/signal_32.c | 4 +-
arch/sparc/kernel/ptrace_32.c | 5 +-
arch/sparc/kernel/ptrace_64.c | 5 +-
arch/sparc/kernel/signal32.c | 1 -
arch/sparc/kernel/signal_32.c | 4 +-
arch/sparc/kernel/signal_64.c | 4 +-
arch/um/kernel/process.c | 4 +-
arch/um/kernel/ptrace.c | 5 +-
arch/x86/kernel/ptrace.c | 1 -
arch/x86/kernel/signal.c | 5 +-
arch/x86/mm/tlb.c | 1 +
arch/xtensa/kernel/ptrace.c | 5 +-
arch/xtensa/kernel/signal.c | 4 +-
block/blk-cgroup.c | 2 +-
fs/coredump.c | 1 -
fs/exec.c | 1 -
fs/io-wq.c | 6 +-
fs/io_uring.c | 11 +-
fs/proc/array.c | 1 -
fs/proc/base.c | 1 -
include/asm-generic/syscall.h | 2 +-
include/linux/entry-common.h | 47 +-------
include/linux/entry-kvm.h | 2 +-
include/linux/posix-timers.h | 1 -
include/linux/ptrace.h | 78 ++++++++++++
include/linux/resume_user_mode.h | 64 ++++++++++
include/linux/sched/signal.h | 17 +++
include/linux/task_work.h | 5 +
include/linux/tracehook.h | 226 -----------------------------------
include/uapi/linux/ptrace.h | 2 +-
kernel/entry/common.c | 19 +--
kernel/entry/kvm.c | 9 +-
kernel/exit.c | 3 +-
kernel/livepatch/transition.c | 1 -
kernel/seccomp.c | 1 -
kernel/signal.c | 23 ++--
kernel/task_work.c | 4 +-
kernel/time/posix-cpu-timers.c | 1 +
mm/memcontrol.c | 2 +-
security/apparmor/domain.c | 1 -
security/selinux/hooks.c | 1 -
84 files changed, 317 insertions(+), 462 deletions(-)
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 00/13] Removing tracehook.h
2022-03-09 0:13 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
@ 2022-03-09 21:05 ` Jens Axboe
2022-03-15 23:18 ` [PATCH 0/2] ptrace: Making the ptrace changes atomic Eric W. Biederman
1 sibling, 0 replies; 95+ messages in thread
From: Jens Axboe @ 2022-03-09 21:05 UTC (permalink / raw)
To: Eric W. Biederman, linux-kernel
Cc: Linus Torvalds, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, linux-api
On 3/8/22 5:13 PM, Eric W. Biederman wrote:
>
> While working on cleaning up do_exit I have been having to deal with the
> code in tracehook.h. Unfortunately the code in tracehook.h does not
> make sense as organized.
>
> This set of changes reorganizes things so that tracehook.h no longer
> exists, and so that it's current contents are organized in a fashion
> that is a little easier to understand.
>
> The biggest change is that I lean into the fact that get_signal
> always calls task_work_run and removes the logic that tried to
> be smart and decouple task_work_run and get_signal as it has proven
> to not be effective.
>
> This is a conservative change and I am not changing the how things
> like signal_pending operate (although it is probably justified).
>
> A new header resume_user_mode.h is added to hold resume_user_mode_work
> which was previously known as tracehook_notify_resume.
This is a nice cleanup. I did bother me adding the TIF_NOTIFY_SIGNAL
bits and work hooks to something unrelated, but that's where other
things resided then. This makes it a lot better.
--
Jens Axboe
^ permalink raw reply [flat|nested] 95+ messages in thread
* [PATCH 0/2] ptrace: Making the ptrace changes atomic
2022-03-09 0:13 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
2022-03-09 21:05 ` Jens Axboe
@ 2022-03-15 23:18 ` Eric W. Biederman
2022-03-15 23:21 ` [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
` (2 more replies)
1 sibling, 3 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-15 23:18 UTC (permalink / raw)
To: linux-kernel
Cc: Linus Torvalds, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, linux-api, Jens Axboe
While working on cleaning up the exit path it have had occasion to look
at what guarantees are provided for setting and reading the fields that
are provided in task_struct for ptraces. Namely exit_code,
ptrace_message, and last_siginfo. It turns out as the ptrace interface
in the kernel was extended in the kernel the old existing interfaces
in the kernel were just wrapped and not properly updated to handle
the new functionality. This lead to races and inconsistencies.
This fixes the reason for the races and inconsistencies by moving the
work of maintaining the ptrace fields into ptrace_stop.
The inconsistency that results in some ptrace_stop points continuing
with a signal while others will not I have left alone as it appears to
be part of our userspace ABI, and changing that risks breaking
userspace.
Eric W. Biederman (2):
ptrace: Move setting/clearing ptrace_message into ptrace_stop
ptrace: Return the signal to continue with from ptrace_stop
include/linux/ptrace.h | 17 +++++++----------
include/uapi/linux/ptrace.h | 2 +-
kernel/signal.c | 40 ++++++++++++++++++++++++----------------
3 files changed, 32 insertions(+), 27 deletions(-)
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop
2022-03-15 23:18 ` [PATCH 0/2] ptrace: Making the ptrace changes atomic Eric W. Biederman
@ 2022-03-15 23:21 ` Eric W. Biederman
2022-03-17 17:46 ` Oleg Nesterov
2022-03-17 19:10 ` Kees Cook
2022-03-15 23:22 ` [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop Eric W. Biederman
2022-03-28 23:56 ` [GIT PULL] ptrace: Cleanups for v5.18 Eric W. Biederman
2 siblings, 2 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-15 23:21 UTC (permalink / raw)
To: linux-kernel
Cc: Linus Torvalds, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, linux-api, Jens Axboe
Today ptrace_message is easy to overlook as it not a core part of
ptrace_stop. It has been overlooked so much that there are places
that set ptrace_message and don't clear it, and places that never set
it. So if you get an unlucky sequence of events the ptracer may be
able to read a ptrace_message that does not apply to the current
ptrace stop.
Move setting of ptrace_message into ptrace_stop so that it always gets
set before the stop, and always gets cleared after the stop. This
prevents non-sense from being reported to userspace and makes
ptrace_message more visible in the ptrace helper functions so that
kernel developers can see it.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/ptrace.h | 9 +++------
include/uapi/linux/ptrace.h | 2 +-
kernel/signal.c | 21 ++++++++++++---------
3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 5310f43e4762..3e6b46e2b7be 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
extern void ptrace_disable(struct task_struct *);
extern int ptrace_request(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
-extern void ptrace_notify(int exit_code);
+extern void ptrace_notify(int exit_code, unsigned long message);
extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent,
const struct cred *ptracer_cred);
@@ -155,8 +155,7 @@ static inline bool ptrace_event_enabled(struct task_struct *task, int event)
static inline void ptrace_event(int event, unsigned long message)
{
if (unlikely(ptrace_event_enabled(current, event))) {
- current->ptrace_message = message;
- ptrace_notify((event << 8) | SIGTRAP);
+ ptrace_notify((event << 8) | SIGTRAP, message);
} else if (event == PTRACE_EVENT_EXEC) {
/* legacy EXEC report via SIGTRAP */
if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
@@ -424,8 +423,7 @@ static inline int ptrace_report_syscall(unsigned long message)
if (!(ptrace & PT_PTRACED))
return 0;
- current->ptrace_message = message;
- ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+ ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message);
/*
* this isn't the same as continuing with a signal, but it will do
@@ -437,7 +435,6 @@ static inline int ptrace_report_syscall(unsigned long message)
current->exit_code = 0;
}
- current->ptrace_message = 0;
return fatal_signal_pending(current);
}
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index b7af92e07d1f..195ae64a8c87 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -114,7 +114,7 @@ struct ptrace_rseq_configuration {
/*
* These values are stored in task->ptrace_message
- * by ptrace_report_syscall_* to describe the current syscall-stop.
+ * by ptrace_stop to describe the current syscall-stop.
*/
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
diff --git a/kernel/signal.c b/kernel/signal.c
index c2dee5420567..a49ac7149256 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2191,7 +2191,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* If we actually decide not to stop at all because the tracer
* is gone, we keep current->exit_code unless clear_code.
*/
-static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t *info)
+static void ptrace_stop(int exit_code, int why, int clear_code,
+ unsigned long message, kernel_siginfo_t *info)
__releases(¤t->sighand->siglock)
__acquires(¤t->sighand->siglock)
{
@@ -2237,6 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
smp_wmb();
+ current->ptrace_message = message;
current->last_siginfo = info;
current->exit_code = exit_code;
@@ -2315,6 +2317,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
spin_lock_irq(¤t->sighand->siglock);
current->last_siginfo = NULL;
+ current->ptrace_message = 0;
/* LISTENING can be set only during STOP traps, clear it */
current->jobctl &= ~JOBCTL_LISTENING;
@@ -2327,7 +2330,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
recalc_sigpending_tsk(current);
}
-static void ptrace_do_notify(int signr, int exit_code, int why)
+static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
{
kernel_siginfo_t info;
@@ -2338,17 +2341,17 @@ static void ptrace_do_notify(int signr, int exit_code, int why)
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
/* Let the debugger run. */
- ptrace_stop(exit_code, why, 1, &info);
+ ptrace_stop(exit_code, why, 1, message, &info);
}
-void ptrace_notify(int exit_code)
+void ptrace_notify(int exit_code, unsigned long message)
{
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
if (unlikely(task_work_pending(current)))
task_work_run();
spin_lock_irq(¤t->sighand->siglock);
- ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED);
+ ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
spin_unlock_irq(¤t->sighand->siglock);
}
@@ -2504,10 +2507,10 @@ static void do_jobctl_trap(void)
signr = SIGTRAP;
WARN_ON_ONCE(!signr);
ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8),
- CLD_STOPPED);
+ CLD_STOPPED, 0);
} else {
WARN_ON_ONCE(!signr);
- ptrace_stop(signr, CLD_STOPPED, 0, NULL);
+ ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL);
current->exit_code = 0;
}
}
@@ -2561,7 +2564,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
* comment in dequeue_signal().
*/
current->jobctl |= JOBCTL_STOP_DEQUEUED;
- ptrace_stop(signr, CLD_TRAPPED, 0, info);
+ ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
/* We're back. Did the debugger cancel the sig? */
signr = current->exit_code;
@@ -2891,7 +2894,7 @@ static void signal_delivered(struct ksignal *ksig, int stepping)
if (current->sas_ss_flags & SS_AUTODISARM)
sas_ss_reset(current);
if (stepping)
- ptrace_notify(SIGTRAP);
+ ptrace_notify(SIGTRAP, 0);
}
void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* Re: [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop
2022-03-15 23:21 ` [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
@ 2022-03-17 17:46 ` Oleg Nesterov
2022-03-17 19:10 ` Kees Cook
1 sibling, 0 replies; 95+ messages in thread
From: Oleg Nesterov @ 2022-03-17 17:46 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Kees Cook, Al Viro, linux-api, Jens Axboe
On 03/15, Eric W. Biederman wrote:
>
> there are places
> that set ptrace_message and don't clear it, and places that never set
> it.
Yes, I too never understood this.
So I obviously like this change. The only problem (as usual) is that we
can never know if something depends on this old (and strange) behaviour.
Acked-by: Oleg Nesterov <oleg@redhat.com>
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop
2022-03-15 23:21 ` [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
2022-03-17 17:46 ` Oleg Nesterov
@ 2022-03-17 19:10 ` Kees Cook
2022-03-18 14:44 ` Eric W. Biederman
1 sibling, 1 reply; 95+ messages in thread
From: Kees Cook @ 2022-03-17 19:10 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Al Viro, linux-api, Jens Axboe
On Tue, Mar 15, 2022 at 06:21:08PM -0500, Eric W. Biederman wrote:
>
> Today ptrace_message is easy to overlook as it not a core part of
> ptrace_stop. It has been overlooked so much that there are places
> that set ptrace_message and don't clear it, and places that never set
> it. So if you get an unlucky sequence of events the ptracer may be
> able to read a ptrace_message that does not apply to the current
> ptrace stop.
>
> Move setting of ptrace_message into ptrace_stop so that it always gets
> set before the stop, and always gets cleared after the stop. This
> prevents non-sense from being reported to userspace and makes
> ptrace_message more visible in the ptrace helper functions so that
> kernel developers can see it.
>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
This looks good to me. Did you happen to run the seccomp selftests
before/after these changes?
Reviewed-by: Kees Cook <keescook@chromium.org>
--
Kees Cook
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop
2022-03-17 19:10 ` Kees Cook
@ 2022-03-18 14:44 ` Eric W. Biederman
2022-03-18 17:20 ` Kees Cook
0 siblings, 1 reply; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-18 14:44 UTC (permalink / raw)
To: Kees Cook
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Al Viro, linux-api, Jens Axboe
Kees Cook <keescook@chromium.org> writes:
> On Tue, Mar 15, 2022 at 06:21:08PM -0500, Eric W. Biederman wrote:
>>
>> Today ptrace_message is easy to overlook as it not a core part of
>> ptrace_stop. It has been overlooked so much that there are places
>> that set ptrace_message and don't clear it, and places that never set
>> it. So if you get an unlucky sequence of events the ptracer may be
>> able to read a ptrace_message that does not apply to the current
>> ptrace stop.
>>
>> Move setting of ptrace_message into ptrace_stop so that it always gets
>> set before the stop, and always gets cleared after the stop. This
>> prevents non-sense from being reported to userspace and makes
>> ptrace_message more visible in the ptrace helper functions so that
>> kernel developers can see it.
>>
>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>
> This looks good to me. Did you happen to run the seccomp selftests
> before/after these changes?
I did not. This is a pure ptrace change. Do you see a way that seccomp
could be involved?
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop
2022-03-18 14:44 ` Eric W. Biederman
@ 2022-03-18 17:20 ` Kees Cook
0 siblings, 0 replies; 95+ messages in thread
From: Kees Cook @ 2022-03-18 17:20 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Al Viro, linux-api, Jens Axboe
On Fri, Mar 18, 2022 at 09:44:30AM -0500, Eric W. Biederman wrote:
> Kees Cook <keescook@chromium.org> writes:
>
> > On Tue, Mar 15, 2022 at 06:21:08PM -0500, Eric W. Biederman wrote:
> >>
> >> Today ptrace_message is easy to overlook as it not a core part of
> >> ptrace_stop. It has been overlooked so much that there are places
> >> that set ptrace_message and don't clear it, and places that never set
> >> it. So if you get an unlucky sequence of events the ptracer may be
> >> able to read a ptrace_message that does not apply to the current
> >> ptrace stop.
> >>
> >> Move setting of ptrace_message into ptrace_stop so that it always gets
> >> set before the stop, and always gets cleared after the stop. This
> >> prevents non-sense from being reported to userspace and makes
> >> ptrace_message more visible in the ptrace helper functions so that
> >> kernel developers can see it.
> >>
> >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> >
> > This looks good to me. Did you happen to run the seccomp selftests
> > before/after these changes?
>
> I did not. This is a pure ptrace change. Do you see a way that seccomp
> could be involved?
Sorry, that wasn't clear: seccomp includes a number of ptrace tests as
well, especially involving handling process death, messages, and
signals. I'll give it a spin; so far it seems fine.
--
Kees Cook
^ permalink raw reply [flat|nested] 95+ messages in thread
* [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-15 23:18 ` [PATCH 0/2] ptrace: Making the ptrace changes atomic Eric W. Biederman
2022-03-15 23:21 ` [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
@ 2022-03-15 23:22 ` Eric W. Biederman
2022-03-17 18:08 ` Oleg Nesterov
2022-03-17 19:13 ` Kees Cook
2022-03-28 23:56 ` [GIT PULL] ptrace: Cleanups for v5.18 Eric W. Biederman
2 siblings, 2 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-15 23:22 UTC (permalink / raw)
To: linux-kernel
Cc: Linus Torvalds, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, linux-api, Jens Axboe
The signal a task should continue with after a ptrace stop is
inconsistently read, cleared, and sent. Solve this by reading and
clearing the signal to be sent in ptrace_stop.
In an ideal world everything except ptrace_signal would share a common
implementation of continuing with the signal, so ptracers could count
on the signal they ask to continue with actually being delivered. For
now retain bug compatibility and just return with the signal number
the ptracer requested the code continue with.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
include/linux/ptrace.h | 12 ++++++------
kernel/signal.c | 31 ++++++++++++++++++-------------
2 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 3e6b46e2b7be..15b3d176b6b4 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
extern void ptrace_disable(struct task_struct *);
extern int ptrace_request(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
-extern void ptrace_notify(int exit_code, unsigned long message);
+extern int ptrace_notify(int exit_code, unsigned long message);
extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent,
const struct cred *ptracer_cred);
@@ -419,21 +419,21 @@ extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oa
static inline int ptrace_report_syscall(unsigned long message)
{
int ptrace = current->ptrace;
+ int signr;
if (!(ptrace & PT_PTRACED))
return 0;
- ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message);
+ signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0),
+ message);
/*
* this isn't the same as continuing with a signal, but it will do
* for normal use. strace only continues with a signal if the
* stopping signal is not SIGTRAP. -brl
*/
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
+ if (signr)
+ send_sig(signr, current, 1);
return fatal_signal_pending(current);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index a49ac7149256..e53ee84b9021 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2188,15 +2188,17 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* That makes it a way to test a stopped process for
* being ptrace-stopped vs being job-control-stopped.
*
- * If we actually decide not to stop at all because the tracer
- * is gone, we keep current->exit_code unless clear_code.
+ * Returns the signal the ptracer requested the code resume
+ * with. If the code did not stop because the tracer is gone,
+ * the stop signal remains unchanged unless clear_code.
*/
-static void ptrace_stop(int exit_code, int why, int clear_code,
+static int ptrace_stop(int exit_code, int why, int clear_code,
unsigned long message, kernel_siginfo_t *info)
__releases(¤t->sighand->siglock)
__acquires(¤t->sighand->siglock)
{
bool gstop_done = false;
+ bool read_code = true;
if (arch_ptrace_stop_needed()) {
/*
@@ -2305,8 +2307,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
/* tasklist protects us from ptrace_freeze_traced() */
__set_current_state(TASK_RUNNING);
+ read_code = false;
if (clear_code)
- current->exit_code = 0;
+ exit_code = 0;
read_unlock(&tasklist_lock);
}
@@ -2316,8 +2319,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
* any signal-sending on another CPU that wants to examine it.
*/
spin_lock_irq(¤t->sighand->siglock);
+ if (read_code) exit_code = current->exit_code;
current->last_siginfo = NULL;
current->ptrace_message = 0;
+ current->exit_code = 0;
/* LISTENING can be set only during STOP traps, clear it */
current->jobctl &= ~JOBCTL_LISTENING;
@@ -2328,9 +2333,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
* This sets TIF_SIGPENDING, but never clears it.
*/
recalc_sigpending_tsk(current);
+ return exit_code;
}
-static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
+static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
{
kernel_siginfo_t info;
@@ -2341,18 +2347,21 @@ static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long me
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
/* Let the debugger run. */
- ptrace_stop(exit_code, why, 1, message, &info);
+ return ptrace_stop(exit_code, why, 1, message, &info);
}
-void ptrace_notify(int exit_code, unsigned long message)
+int ptrace_notify(int exit_code, unsigned long message)
{
+ int signr;
+
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
if (unlikely(task_work_pending(current)))
task_work_run();
spin_lock_irq(¤t->sighand->siglock);
- ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
+ signr = ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
spin_unlock_irq(¤t->sighand->siglock);
+ return signr;
}
/**
@@ -2511,7 +2520,6 @@ static void do_jobctl_trap(void)
} else {
WARN_ON_ONCE(!signr);
ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL);
- current->exit_code = 0;
}
}
@@ -2564,15 +2572,12 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
* comment in dequeue_signal().
*/
current->jobctl |= JOBCTL_STOP_DEQUEUED;
- ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
+ signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
/* We're back. Did the debugger cancel the sig? */
- signr = current->exit_code;
if (signr == 0)
return signr;
- current->exit_code = 0;
-
/*
* Update the siginfo structure if the signal has
* changed. If the debugger wanted something
--
2.29.2
^ permalink raw reply related [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-15 23:22 ` [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop Eric W. Biederman
@ 2022-03-17 18:08 ` Oleg Nesterov
2022-03-17 18:31 ` Eric W. Biederman
2022-03-18 14:40 ` Eric W. Biederman
2022-03-17 19:13 ` Kees Cook
1 sibling, 2 replies; 95+ messages in thread
From: Oleg Nesterov @ 2022-03-17 18:08 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Kees Cook, Al Viro, linux-api, Jens Axboe
Not sure I understand this patch, I can't apply it. I guess I need to
clone your tree first, will do later.
Just one question right now,
On 03/15, Eric W. Biederman wrote:
>
> +static int ptrace_stop(int exit_code, int why, int clear_code,
> unsigned long message, kernel_siginfo_t *info)
> __releases(¤t->sighand->siglock)
> __acquires(¤t->sighand->siglock)
> {
> bool gstop_done = false;
> + bool read_code = true;
>
> if (arch_ptrace_stop_needed()) {
> /*
> @@ -2305,8 +2307,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
>
> /* tasklist protects us from ptrace_freeze_traced() */
> __set_current_state(TASK_RUNNING);
> + read_code = false;
> if (clear_code)
> - current->exit_code = 0;
> + exit_code = 0;
> read_unlock(&tasklist_lock);
> }
>
> @@ -2316,8 +2319,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
> * any signal-sending on another CPU that wants to examine it.
> */
> spin_lock_irq(¤t->sighand->siglock);
> + if (read_code) exit_code = current->exit_code;
style ;)
> current->last_siginfo = NULL;
> current->ptrace_message = 0;
> + current->exit_code = 0;
OK, I like it... but can't we remove the ugly "int clear_code" arg?
Oleg.
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-17 18:08 ` Oleg Nesterov
@ 2022-03-17 18:31 ` Eric W. Biederman
2022-03-18 19:43 ` Oleg Nesterov
2022-03-18 14:40 ` Eric W. Biederman
1 sibling, 1 reply; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-17 18:31 UTC (permalink / raw)
To: Oleg Nesterov
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Kees Cook, Al Viro, linux-api, Jens Axboe
Oleg Nesterov <oleg@redhat.com> writes:
> Not sure I understand this patch, I can't apply it. I guess I need to
> clone your tree first, will do later.
>
> Just one question right now,
>
> On 03/15, Eric W. Biederman wrote:
>>
>> +static int ptrace_stop(int exit_code, int why, int clear_code,
>> unsigned long message, kernel_siginfo_t *info)
>> __releases(¤t->sighand->siglock)
>> __acquires(¤t->sighand->siglock)
>> {
>> bool gstop_done = false;
>> + bool read_code = true;
>>
>> if (arch_ptrace_stop_needed()) {
>> /*
>> @@ -2305,8 +2307,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
>>
>> /* tasklist protects us from ptrace_freeze_traced() */
>> __set_current_state(TASK_RUNNING);
>> + read_code = false;
>> if (clear_code)
>> - current->exit_code = 0;
>> + exit_code = 0;
>> read_unlock(&tasklist_lock);
>> }
>>
>> @@ -2316,8 +2319,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code,
>> * any signal-sending on another CPU that wants to examine it.
>> */
>> spin_lock_irq(¤t->sighand->siglock);
>> + if (read_code) exit_code = current->exit_code;
>
> style ;)
>
>> current->last_siginfo = NULL;
>> current->ptrace_message = 0;
>> + current->exit_code = 0;
>
> OK, I like it... but can't we remove the ugly "int clear_code" arg?
The flag clear_code controls what happens if a ptrace_stop does not
stop. In particular clear_code means do not continue with
a signal if we can not stop.
For do_jobctl_trap calling ptrace_stop it clearly does not matter.
For ptrace_signal it would be a change in behavior, that would
cause the signal not to be delivered.
For ptrace_do_notify we don't have a signal to deliver unless userspace
gives us one so clear_code makes sense at that point.
Which is a long way of saying that no we can not remove clear_stop
because the behavior it is used to implement makes sense and userspace
can reasonably depend upon it.
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-17 18:31 ` Eric W. Biederman
@ 2022-03-18 19:43 ` Oleg Nesterov
0 siblings, 0 replies; 95+ messages in thread
From: Oleg Nesterov @ 2022-03-18 19:43 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Kees Cook, Al Viro, linux-api, Jens Axboe
On 03/17, Eric W. Biederman wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > OK, I like it... but can't we remove the ugly "int clear_code" arg?
>
> The flag clear_code controls what happens if a ptrace_stop does not
> stop. In particular clear_code means do not continue with
> a signal if we can not stop.
>
> For do_jobctl_trap calling ptrace_stop it clearly does not matter.
>
> For ptrace_signal it would be a change in behavior, that would
> cause the signal not to be delivered.
Well I meant that "clear_code" should be false, iirc only
ptrace_report_syscall() should be updated to void the spurious
send_sig() if debugger exits... Nevermind, pleasee forget, this is
not as trivial as I thought.
Acked-by: Oleg Nesterov <oleg@redhat.com>
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-17 18:08 ` Oleg Nesterov
2022-03-17 18:31 ` Eric W. Biederman
@ 2022-03-18 14:40 ` Eric W. Biederman
1 sibling, 0 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-18 14:40 UTC (permalink / raw)
To: Oleg Nesterov
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Kees Cook, Al Viro, linux-api, Jens Axboe
Oleg Nesterov <oleg@redhat.com> writes:
> Not sure I understand this patch, I can't apply it. I guess I need to
> clone your tree first, will do later.
Yes this series is on top of:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git kill-tracehook-for-v5.18
Nothing particularly interesting happens in my kill tracehook series
but I do get rid of tracehook.h and so everything gets moved and
renamed.
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-15 23:22 ` [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop Eric W. Biederman
2022-03-17 18:08 ` Oleg Nesterov
@ 2022-03-17 19:13 ` Kees Cook
2022-03-18 14:52 ` Eric W. Biederman
1 sibling, 1 reply; 95+ messages in thread
From: Kees Cook @ 2022-03-17 19:13 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Al Viro, linux-api, Jens Axboe
On Tue, Mar 15, 2022 at 06:22:26PM -0500, Eric W. Biederman wrote:
>
> The signal a task should continue with after a ptrace stop is
> inconsistently read, cleared, and sent. Solve this by reading and
> clearing the signal to be sent in ptrace_stop.
>
> In an ideal world everything except ptrace_signal would share a common
> implementation of continuing with the signal, so ptracers could count
> on the signal they ask to continue with actually being delivered. For
> now retain bug compatibility and just return with the signal number
> the ptracer requested the code continue with.
>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
> include/linux/ptrace.h | 12 ++++++------
> kernel/signal.c | 31 ++++++++++++++++++-------------
> 2 files changed, 24 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
> index 3e6b46e2b7be..15b3d176b6b4 100644
> --- a/include/linux/ptrace.h
> +++ b/include/linux/ptrace.h
> @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
> extern void ptrace_disable(struct task_struct *);
> extern int ptrace_request(struct task_struct *child, long request,
> unsigned long addr, unsigned long data);
> -extern void ptrace_notify(int exit_code, unsigned long message);
> +extern int ptrace_notify(int exit_code, unsigned long message);
> [...]
> -static void ptrace_stop(int exit_code, int why, int clear_code,
> +static int ptrace_stop(int exit_code, int why, int clear_code,
> unsigned long message, kernel_siginfo_t *info)
> [...]
> -static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
> +static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
> [...]
> -void ptrace_notify(int exit_code, unsigned long message)
> +int ptrace_notify(int exit_code, unsigned long message)
Just for robustness, how about marking the functions that have switched
from void to int return as __must_check (or at least just ptrace_notify)?
With that and the style nit Oleg already mentioned, yeah, this looks
good too.
Reviewed-by: Kees Cook <keescook@chromium.org>
--
Kees Cook
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-17 19:13 ` Kees Cook
@ 2022-03-18 14:52 ` Eric W. Biederman
2022-03-18 17:28 ` Kees Cook
0 siblings, 1 reply; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-18 14:52 UTC (permalink / raw)
To: Kees Cook
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Al Viro, linux-api, Jens Axboe
Kees Cook <keescook@chromium.org> writes:
> On Tue, Mar 15, 2022 at 06:22:26PM -0500, Eric W. Biederman wrote:
>>
>> The signal a task should continue with after a ptrace stop is
>> inconsistently read, cleared, and sent. Solve this by reading and
>> clearing the signal to be sent in ptrace_stop.
>>
>> In an ideal world everything except ptrace_signal would share a common
>> implementation of continuing with the signal, so ptracers could count
>> on the signal they ask to continue with actually being delivered. For
>> now retain bug compatibility and just return with the signal number
>> the ptracer requested the code continue with.
>>
>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> ---
>> include/linux/ptrace.h | 12 ++++++------
>> kernel/signal.c | 31 ++++++++++++++++++-------------
>> 2 files changed, 24 insertions(+), 19 deletions(-)
>>
>> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
>> index 3e6b46e2b7be..15b3d176b6b4 100644
>> --- a/include/linux/ptrace.h
>> +++ b/include/linux/ptrace.h
>> @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
>> extern void ptrace_disable(struct task_struct *);
>> extern int ptrace_request(struct task_struct *child, long request,
>> unsigned long addr, unsigned long data);
>> -extern void ptrace_notify(int exit_code, unsigned long message);
>> +extern int ptrace_notify(int exit_code, unsigned long message);
>> [...]
>> -static void ptrace_stop(int exit_code, int why, int clear_code,
>> +static int ptrace_stop(int exit_code, int why, int clear_code,
>> unsigned long message, kernel_siginfo_t *info)
>> [...]
>> -static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
>> +static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
>> [...]
>> -void ptrace_notify(int exit_code, unsigned long message)
>> +int ptrace_notify(int exit_code, unsigned long message)
>
> Just for robustness, how about marking the functions that have switched
> from void to int return as __must_check (or at least just ptrace_notify)?
We can't. There are historical cases that simply don't check if a
signal should be sent after the function, and they exist for every
function that is modified.
If we can modify the code so that everyone is checking the return value
than certainly, but that just doesn't happen to reflect how this
ptrace helper is being used today.
> With that and the style nit Oleg already mentioned, yeah, this looks
> good too.
Alright style nit fixed.
> Reviewed-by: Kees Cook <keescook@chromium.org>
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop
2022-03-18 14:52 ` Eric W. Biederman
@ 2022-03-18 17:28 ` Kees Cook
0 siblings, 0 replies; 95+ messages in thread
From: Kees Cook @ 2022-03-18 17:28 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Al Viro, linux-api, Jens Axboe
On Fri, Mar 18, 2022 at 09:52:46AM -0500, Eric W. Biederman wrote:
> Kees Cook <keescook@chromium.org> writes:
>
> > On Tue, Mar 15, 2022 at 06:22:26PM -0500, Eric W. Biederman wrote:
> >>
> >> The signal a task should continue with after a ptrace stop is
> >> inconsistently read, cleared, and sent. Solve this by reading and
> >> clearing the signal to be sent in ptrace_stop.
> >>
> >> In an ideal world everything except ptrace_signal would share a common
> >> implementation of continuing with the signal, so ptracers could count
> >> on the signal they ask to continue with actually being delivered. For
> >> now retain bug compatibility and just return with the signal number
> >> the ptracer requested the code continue with.
> >>
> >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> >> ---
> >> include/linux/ptrace.h | 12 ++++++------
> >> kernel/signal.c | 31 ++++++++++++++++++-------------
> >> 2 files changed, 24 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
> >> index 3e6b46e2b7be..15b3d176b6b4 100644
> >> --- a/include/linux/ptrace.h
> >> +++ b/include/linux/ptrace.h
> >> @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned
> >> extern void ptrace_disable(struct task_struct *);
> >> extern int ptrace_request(struct task_struct *child, long request,
> >> unsigned long addr, unsigned long data);
> >> -extern void ptrace_notify(int exit_code, unsigned long message);
> >> +extern int ptrace_notify(int exit_code, unsigned long message);
> >> [...]
> >> -static void ptrace_stop(int exit_code, int why, int clear_code,
> >> +static int ptrace_stop(int exit_code, int why, int clear_code,
> >> unsigned long message, kernel_siginfo_t *info)
> >> [...]
> >> -static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
> >> +static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
> >> [...]
> >> -void ptrace_notify(int exit_code, unsigned long message)
> >> +int ptrace_notify(int exit_code, unsigned long message)
> >
> > Just for robustness, how about marking the functions that have switched
> > from void to int return as __must_check (or at least just ptrace_notify)?
>
> We can't. There are historical cases that simply don't check if a
> signal should be sent after the function, and they exist for every
> function that is modified.
This seems at least worth documenting with a comment, otherwise we're
just trading one kind of "weirdness" (setting/clearing
current->exit_code) with another (ignoring the signal returned by
ptrace_notify()).
I see only two cases that would need comments:
static inline void ptrace_event(int event, unsigned long message)
{
if (unlikely(ptrace_event_enabled(current, event))) {
ptrace_notify((event << 8) | SIGTRAP, message);
} else if (event == PTRACE_EVENT_EXEC) {
/* legacy EXEC report via SIGTRAP */
if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
send_sig(SIGTRAP, current, 0);
}
}
static void signal_delivered(struct ksignal *ksig, int stepping)
{
...
if (stepping)
ptrace_notify(SIGTRAP, 0);
}
--
Kees Cook
^ permalink raw reply [flat|nested] 95+ messages in thread
* [GIT PULL] ptrace: Cleanups for v5.18
2022-03-15 23:18 ` [PATCH 0/2] ptrace: Making the ptrace changes atomic Eric W. Biederman
2022-03-15 23:21 ` [PATCH 1/2] ptrace: Move setting/clearing ptrace_message into ptrace_stop Eric W. Biederman
2022-03-15 23:22 ` [PATCH 2/2] ptrace: Return the signal to continue with from ptrace_stop Eric W. Biederman
@ 2022-03-28 23:56 ` Eric W. Biederman
2022-03-29 0:03 ` Jens Axboe
` (2 more replies)
2 siblings, 3 replies; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-28 23:56 UTC (permalink / raw)
To: Linus Torvalds
Cc: Alexey Gladkov, Kyle Huey, Oleg Nesterov, Kees Cook, Al Viro,
linux-api, Jens Axboe, linux-kernel
Linus,
Please pull the ptrace-cleanups-for-v5.18 tag from the git tree:
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git ptrace-cleanups-for-v5.18
HEAD: dcbc65aac28360df5f5a3b613043ccc0e81da3cf ptrace: Remove duplicated include in ptrace.c
This set of changes removes tracehook.h, moves modification of all of
the ptrace fields inside of siglock to remove races, adds a missing
permission check to ptrace.c
The removal of tracehook.h is quite significant as it has been a major
source of confusion in recent years. Much of that confusion was
around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled
making the semantics clearer).
For people who don't know tracehook.h is a vestiage of an attempt to
implement uprobes like functionality that was never fully merged, and
was later superseeded by uprobes when uprobes was merged. For many
years now we have been removing what tracehook functionaly a little
bit at a time. To the point where now anything left in tracehook.h is
some weird strange thing that is difficult to understand.
Eric W. Biederman (15):
ptrace: Move ptrace_report_syscall into ptrace.h
ptrace/arm: Rename tracehook_report_syscall report_syscall
ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h
ptrace: Remove arch_syscall_{enter,exit}_tracehook
ptrace: Remove tracehook_signal_handler
task_work: Remove unnecessary include from posix_timers.h
task_work: Introduce task_work_pending
task_work: Call tracehook_notify_signal from get_signal on all architectures
task_work: Decouple TIF_NOTIFY_SIGNAL and task_work
signal: Move set_notify_signal and clear_notify_signal into sched/signal.h
resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume
resume_user_mode: Move to resume_user_mode.h
tracehook: Remove tracehook.h
ptrace: Move setting/clearing ptrace_message into ptrace_stop
ptrace: Return the signal to continue with from ptrace_stop
Jann Horn (1):
ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
Yang Li (1):
ptrace: Remove duplicated include in ptrace.c
MAINTAINERS | 1 -
arch/Kconfig | 5 +-
arch/alpha/kernel/ptrace.c | 5 +-
arch/alpha/kernel/signal.c | 4 +-
arch/arc/kernel/ptrace.c | 5 +-
arch/arc/kernel/signal.c | 4 +-
arch/arm/kernel/ptrace.c | 12 +-
arch/arm/kernel/signal.c | 4 +-
arch/arm64/kernel/ptrace.c | 14 +--
arch/arm64/kernel/signal.c | 4 +-
arch/csky/kernel/ptrace.c | 5 +-
arch/csky/kernel/signal.c | 4 +-
arch/h8300/kernel/ptrace.c | 5 +-
arch/h8300/kernel/signal.c | 4 +-
arch/hexagon/kernel/process.c | 4 +-
arch/hexagon/kernel/signal.c | 1 -
arch/hexagon/kernel/traps.c | 6 +-
arch/ia64/kernel/process.c | 4 +-
arch/ia64/kernel/ptrace.c | 6 +-
arch/ia64/kernel/signal.c | 1 -
arch/m68k/kernel/ptrace.c | 5 +-
arch/m68k/kernel/signal.c | 4 +-
arch/microblaze/kernel/ptrace.c | 5 +-
arch/microblaze/kernel/signal.c | 4 +-
arch/mips/kernel/ptrace.c | 5 +-
arch/mips/kernel/signal.c | 4 +-
arch/nds32/include/asm/syscall.h | 2 +-
arch/nds32/kernel/ptrace.c | 5 +-
arch/nds32/kernel/signal.c | 4 +-
arch/nios2/kernel/ptrace.c | 5 +-
arch/nios2/kernel/signal.c | 4 +-
arch/openrisc/kernel/ptrace.c | 5 +-
arch/openrisc/kernel/signal.c | 4 +-
arch/parisc/kernel/ptrace.c | 7 +-
arch/parisc/kernel/signal.c | 4 +-
arch/powerpc/kernel/ptrace/ptrace.c | 8 +-
arch/powerpc/kernel/signal.c | 4 +-
arch/riscv/kernel/ptrace.c | 5 +-
arch/riscv/kernel/signal.c | 4 +-
arch/s390/include/asm/entry-common.h | 1 -
arch/s390/kernel/ptrace.c | 1 -
arch/s390/kernel/signal.c | 5 +-
arch/sh/kernel/ptrace_32.c | 5 +-
arch/sh/kernel/signal_32.c | 4 +-
arch/sparc/kernel/ptrace_32.c | 5 +-
arch/sparc/kernel/ptrace_64.c | 5 +-
arch/sparc/kernel/signal32.c | 1 -
arch/sparc/kernel/signal_32.c | 4 +-
arch/sparc/kernel/signal_64.c | 4 +-
arch/um/kernel/process.c | 4 +-
arch/um/kernel/ptrace.c | 5 +-
arch/x86/kernel/ptrace.c | 1 -
arch/x86/kernel/signal.c | 5 +-
arch/x86/mm/tlb.c | 1 +
arch/xtensa/kernel/ptrace.c | 5 +-
arch/xtensa/kernel/signal.c | 4 +-
block/blk-cgroup.c | 2 +-
fs/coredump.c | 1 -
fs/exec.c | 1 -
fs/io-wq.c | 6 +-
fs/io_uring.c | 11 +-
fs/proc/array.c | 1 -
fs/proc/base.c | 1 -
include/asm-generic/syscall.h | 2 +-
include/linux/entry-common.h | 47 +-------
include/linux/entry-kvm.h | 2 +-
include/linux/posix-timers.h | 1 -
include/linux/ptrace.h | 81 ++++++++++++-
include/linux/resume_user_mode.h | 64 ++++++++++
include/linux/sched/signal.h | 17 +++
include/linux/task_work.h | 5 +
include/linux/tracehook.h | 226 -----------------------------------
include/uapi/linux/ptrace.h | 2 +-
kernel/entry/common.c | 19 +--
kernel/entry/kvm.c | 9 +-
kernel/exit.c | 3 +-
kernel/livepatch/transition.c | 1 -
kernel/ptrace.c | 47 +++++---
kernel/seccomp.c | 1 -
kernel/signal.c | 62 +++++-----
kernel/task_work.c | 4 +-
kernel/time/posix-cpu-timers.c | 1 +
mm/memcontrol.c | 2 +-
security/apparmor/domain.c | 1 -
security/selinux/hooks.c | 1 -
85 files changed, 372 insertions(+), 495 deletions(-)
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-28 23:56 ` [GIT PULL] ptrace: Cleanups for v5.18 Eric W. Biederman
@ 2022-03-29 0:03 ` Jens Axboe
2022-03-29 0:33 ` Linus Torvalds
2022-03-29 0:35 ` pr-tracker-bot
2 siblings, 0 replies; 95+ messages in thread
From: Jens Axboe @ 2022-03-29 0:03 UTC (permalink / raw)
To: Eric W. Biederman, Linus Torvalds
Cc: Alexey Gladkov, Kyle Huey, Oleg Nesterov, Kees Cook, Al Viro,
linux-api, linux-kernel
On 3/28/22 5:56 PM, Eric W. Biederman wrote:
>
> Linus,
>
> Please pull the ptrace-cleanups-for-v5.18 tag from the git tree:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git ptrace-cleanups-for-v5.18
>
> HEAD: dcbc65aac28360df5f5a3b613043ccc0e81da3cf ptrace: Remove duplicated include in ptrace.c
>
> This set of changes removes tracehook.h, moves modification of all of
> the ptrace fields inside of siglock to remove races, adds a missing
> permission check to ptrace.c
>
> The removal of tracehook.h is quite significant as it has been a major
> source of confusion in recent years. Much of that confusion was
> around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled
> making the semantics clearer).
>
> For people who don't know tracehook.h is a vestiage of an attempt to
> implement uprobes like functionality that was never fully merged, and
> was later superseeded by uprobes when uprobes was merged. For many
> years now we have been removing what tracehook functionaly a little
> bit at a time. To the point where now anything left in tracehook.h is
> some weird strange thing that is difficult to understand.
FWIW, the notify/task_work/io_uring changes look good to me. Thanks for
cleaning this up, Eric.
--
Jens Axboe
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-28 23:56 ` [GIT PULL] ptrace: Cleanups for v5.18 Eric W. Biederman
2022-03-29 0:03 ` Jens Axboe
@ 2022-03-29 0:33 ` Linus Torvalds
2022-03-29 0:53 ` Stephen Rothwell
2022-03-29 3:37 ` Eric W. Biederman
2022-03-29 0:35 ` pr-tracker-bot
2 siblings, 2 replies; 95+ messages in thread
From: Linus Torvalds @ 2022-03-29 0:33 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Alexey Gladkov, Kyle Huey, Oleg Nesterov, Kees Cook, Al Viro,
Linux API, Jens Axboe, Linux Kernel Mailing List
On Mon, Mar 28, 2022 at 4:56 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> The removal of tracehook.h is quite significant as it has been a major
> source of confusion in recent years. Much of that confusion was
> around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled
> making the semantics clearer).
Hmm. I love removing tracehook.c, but this looks like it hasn't been
in linux-next.
The header file changes messes with other changes, and we have
kernel/sched/fair.c:2884:9: error: implicit declaration of function
‘init_task_work’; did you mean ‘init_irq_work’?
[-Werror=implicit-function-declaration]
2884 | init_task_work(&p->numa_work, task_numa_work);
| ^~~~~~~~~~~~~~
as a result (also a few other things in that same file).
Now, this is trivial to fix - just add an include for
<linux/task_work.h> from that file - and that's the right thing to do
anyway.
But I'm a bit unhappy that this was either not tested in linux-next,
or if it was, I wasn't notified about the semantic in the pull
request.
So I've pulled this, and fixed up things in my merge, but I'm a bit
worried that there might be other situations like this where some
header file is no longer included and it was included implicitly
before through that disgusting tracehook.h header..
I *hope* it was just the scheduler header file updates that ended up
having this effect, and nothing else is affected.
Let's see if the test robots start complaining about non-x86
architecture-specific stuff that I don't build test.
Linus
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-29 0:33 ` Linus Torvalds
@ 2022-03-29 0:53 ` Stephen Rothwell
2022-03-29 0:58 ` Linus Torvalds
2022-03-29 3:37 ` Eric W. Biederman
1 sibling, 1 reply; 95+ messages in thread
From: Stephen Rothwell @ 2022-03-29 0:53 UTC (permalink / raw)
To: Linus Torvalds
Cc: Eric W. Biederman, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, Linux API, Jens Axboe,
Linux Kernel Mailing List
[-- Attachment #1: Type: text/plain, Size: 657 bytes --]
Hi Linus,
On Mon, 28 Mar 2022 17:33:52 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Mon, Mar 28, 2022 at 4:56 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> >
> > The removal of tracehook.h is quite significant as it has been a major
> > source of confusion in recent years. Much of that confusion was
> > around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled
> > making the semantics clearer).
>
> Hmm. I love removing tracehook.c, but this looks like it hasn't been
> in linux-next.
See https://lore.kernel.org/lkml/20220316165612.4f50faad@canb.auug.org.au/
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-29 0:53 ` Stephen Rothwell
@ 2022-03-29 0:58 ` Linus Torvalds
0 siblings, 0 replies; 95+ messages in thread
From: Linus Torvalds @ 2022-03-29 0:58 UTC (permalink / raw)
To: Stephen Rothwell
Cc: Eric W. Biederman, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, Linux API, Jens Axboe,
Linux Kernel Mailing List
On Mon, Mar 28, 2022 at 5:53 PM Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> See https://lore.kernel.org/lkml/20220316165612.4f50faad@canb.auug.org.au/
Ok, so it was known, just not reported to me.
And the good news seems to be that at least there isn't anything
hiding elsewhere.
Linus
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-29 0:33 ` Linus Torvalds
2022-03-29 0:53 ` Stephen Rothwell
@ 2022-03-29 3:37 ` Eric W. Biederman
2022-03-29 4:49 ` Linus Torvalds
1 sibling, 1 reply; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-29 3:37 UTC (permalink / raw)
To: Linus Torvalds
Cc: Alexey Gladkov, Kyle Huey, Oleg Nesterov, Kees Cook, Al Viro,
Linux API, Jens Axboe, Linux Kernel Mailing List
Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Mon, Mar 28, 2022 at 4:56 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>>
>> The removal of tracehook.h is quite significant as it has been a major
>> source of confusion in recent years. Much of that confusion was
>> around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled
>> making the semantics clearer).
>
> Hmm. I love removing tracehook.c, but this looks like it hasn't been
> in linux-next.
>
> The header file changes messes with other changes, and we have
>
> kernel/sched/fair.c:2884:9: error: implicit declaration of function
> ‘init_task_work’; did you mean ‘init_irq_work’?
> [-Werror=implicit-function-declaration]
> 2884 | init_task_work(&p->numa_work, task_numa_work);
> | ^~~~~~~~~~~~~~
>
> as a result (also a few other things in that same file).
>
> Now, this is trivial to fix - just add an include for
> <linux/task_work.h> from that file - and that's the right thing to do
> anyway.
>
> But I'm a bit unhappy that this was either not tested in linux-next,
> or if it was, I wasn't notified about the semantic in the pull
> request.
>
> So I've pulled this, and fixed up things in my merge, but I'm a bit
> worried that there might be other situations like this where some
> header file is no longer included and it was included implicitly
> before through that disgusting tracehook.h header..
>
> I *hope* it was just the scheduler header file updates that ended up
> having this effect, and nothing else is affected.
>
> Let's see if the test robots start complaining about non-x86
> architecture-specific stuff that I don't build test.
Sorry for not mentioning that. I had tracked it down. It was
fundamentally in the scheduler headers changes removing an include of
task_work.h, so it didn't feel like there was anything I could do in my
tree. I asked Ingo if he could fix his tree and unfortunately forgot
about it.
For the record there were also a couple of other pretty trivial
conflicts, the removal of nds32, some block_cgroup header where
an adjacent line was modified to what I was changing. But it thankfully
looks like none of those caused you any problems.
Sorry about all of that I am about that. I am running pretty weak this
last couple of days as a cold has been running through the household.
Dumb question because this seems to burning a few extra creativity
points. Is there any way to create a signed tag and a branch with the
same name? Or in general is there a good way to manage topic branches
and then tag them at the end before I send them?
Having a tag and a branch with the same name seems to completely confuse
git and it just tells me no I won't push anything to another git tree,
because what you are asking me to do is ambiguous. So now I am having
to come up with two names for each topic branch, even if I only push the
tags upstream.
I feel like there is a best practice on how to manage tags and topic
branches and I just haven't seen it yet.
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-29 3:37 ` Eric W. Biederman
@ 2022-03-29 4:49 ` Linus Torvalds
2022-03-29 5:20 ` Linus Torvalds
0 siblings, 1 reply; 95+ messages in thread
From: Linus Torvalds @ 2022-03-29 4:49 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Alexey Gladkov, Kyle Huey, Oleg Nesterov, Kees Cook, Al Viro,
Linux API, Jens Axboe, Linux Kernel Mailing List
On Mon, Mar 28, 2022 at 8:38 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> Dumb question because this seems to burning a few extra creativity
> points. Is there any way to create a signed tag and a branch with the
> same name?
Oh, absolutely.
But you may find it annoying, because as you noticed:
> Having a tag and a branch with the same name seems to completely confuse
> git and it just tells me no I won't push anything to another git tree,
> because what you are asking me to do is ambiguous.
Not at all.
git is not at all confused by the situation, git is in fact very aware
of it indeed.
But as git then tells you, exactly *because* it is aware that you have
picked the same name for both a branch and a tag, it will keep warning
you about the ambiguity of said name (but after warning, will
generally then preferentially use the tag of that name over the branch
of the same name).
So if you have both a branch and a tag named 'xyz', you generally need
to disambiguate them when you use them. That will make git happy,
because now it's not ambiguous any more.
(Technical detail: some git operations work on specific namespaces, so
"git branch -d xyz" should never remove a _tag_ called 'xyz', and as
such the name isn't ambiguous in the context of that git command)
And disambiguating them is simple, but I suspect you'll find it's
annoying enough that you simply don't want to use the same name for
tags and branches.
The full name of a tag 'x' is actually 'refs/tags/x', and the full
unambiguous name of a branch 'x' is 'refs/heads/x'.
So technically a tag and a branch can never _really_ have the same
name, because internally they have longer unambiguous names.
You would almost never actually use that full name, it's mostly an
internal git thing. Because even if you have ambiguous branch and tag
names, you'd then still shorten it to just 'tags/x' and 'heads/x'
respectively.
Git has other "namespaces" for these refs, too, notably
'refs/remotes/'. In fact, I guess you could make up your own namespace
too if you really wanted, although I don't think anybody really has
ever had a good reason for it.
(There is also the non-namespaced special refs like HEAD and
FETCH_HEAD and MERGE_HEAD for "current branch", "what you fetched" and
"what you are merging")
So you *can* do this:
# create and check out the branch 'xyz'
$ git checkout -b xyz master
# create a tag called 'xyz', but to confuse everybody, make it
point somewhere else
$ git tag xyz master~3
# look at what 'xyz' means:
$ git rev-parse xyz
warning: refname 'xyz' is ambiguous.
cffb2b72d3ed47f5093d128bd44d9ce136b6b5af
# Hmm. it warned about it being ambiguous
$ git rev-parse heads/xyz
1930a6e739c4b4a654a69164dbe39e554d228915
# Ok, it clearly took the tag, not the branch:
$ git rev-parse tags/xyz
cffb2b72d3ed47f5093d128bd44d9ce136b6b5af
so as you can see, yes, you can work with a tag and a branch that have
the same 'short name', but having to disambiguate them all the time
will likely just drive you mad.
And it's worth pointing out that the name does not imply a
relationship. So the branch called 'xyz' (ie refs/heads/xyz) has
absolutely *no* relationship to the tag called 'xyz' (ie
refs/tags/xyz) except for that ambiguous short name. So updating the
branch - by perhaps committing more to it - will in no way affect the
tag.
Also note that branches and tags are both "just refs" as far as git is
concerned, but git *does* give some semantic meaning to the
namespaces.
So the branch namespace (it those 'refs/heads/*' things) are things
that get updated automatically as you make new commits.
In contrast, the tag namespace is something you *can* update, but it's
considered odd, and if you want to overwrite an existing tag you
generally need to do something special (eg "git tag -f" to force
overwriting a new tag rather than telling you that you already have
one).
So in a very real way, to git a ref is a ref is a ref, with very
little to no real *technical* distinction. They are just ways to point
to the SHA1 hash of an object. But there are basically some common
semantic rules that are based on the namespaces, and all those git
operations that use certain namespaces by default.
Linus
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-29 4:49 ` Linus Torvalds
@ 2022-03-29 5:20 ` Linus Torvalds
0 siblings, 0 replies; 95+ messages in thread
From: Linus Torvalds @ 2022-03-29 5:20 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Alexey Gladkov, Kyle Huey, Oleg Nesterov, Kees Cook, Al Viro,
Linux API, Jens Axboe, Linux Kernel Mailing List
On Mon, Mar 28, 2022 at 9:49 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> So if you have both a branch and a tag named 'xyz', you generally need
> to disambiguate them when you use them. That will make git happy,
> because now it's not ambiguous any more.
On a similar but very different issue: this is not the only kind of
naming ambiguity you can have.
For example, try this insane thing:
git tag Makefile
that creates a tag called 'Makefile' (pointing to your current HEAD, of course).
Now, guess what happens when you then do
git log Makefile
that's right - git once again notices that you are doing something ambiguous.
In fact, git will be *so* unhappy about this kind of ambiguity that
(unlike the 'tag vs branch' case) it will not prefer one version of
reality over the other, and simply consider this to be a fatal error,
and say
fatal: ambiguous argument 'Makefile': both revision and filename
Use '--' to separate paths from revisions, like this:
'git <command> [<revision>...] -- [<file>...]'
and as a result you will hopefully go "Oh, I didn't mean to have that
tag at all" and just remove the bogus tagname.
Because you probably made it by mistake.
But you *can* choose to say
git log Makefile --
or
git log refs/tags/Makefile
to make it clear that no, 'Makefile' is not the pathname in the
repository, you really mean the tag 'Makefile'.
Or use
git log -- Makefile
or
git log ./Makefile
to say "yeah, I meant the _pathname_ 'Makefile', not the tag".
Or, if you just like playing games, do
git log Makefile -- Makefile
if you want to track the history of the path 'Makefile' starting at
the tag 'Makefile'.
But don't actually do this for real. There's a reason git notices these things.
Using ambiguous branch names (whether ambiguous with tag-names or with
filenames) is a pain that is not worth it in practice. Git will
notice, and git will allow you to work around it, but it's just a
BadIdea(tm) despite that.
But you probably want to be aware of issues like this if you script
things around git, though.
That "--" form in particular is generally the one you want to use for
scripting, if you want to allow people to do anything they damn well
please. It's the "Give them rope" syntax.
Of course, a much more common reason for "--" for pathname separation,
and one you may already be familiar with, is that you want to see the
history of a pathname that is not *currently* a pathname, but was one
at some point in the past.
So in my current kernel tree, doing
$ git log arch/nds32/
will actually cause an error, because of "unknown revision or path not
in the working tree".
But if you really want to see the history of that now-deleted
architecture, you do
$ git log -- arch/nds32/
and it's all good.
This concludes today's sermon on git,
Linus
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [GIT PULL] ptrace: Cleanups for v5.18
2022-03-28 23:56 ` [GIT PULL] ptrace: Cleanups for v5.18 Eric W. Biederman
2022-03-29 0:03 ` Jens Axboe
2022-03-29 0:33 ` Linus Torvalds
@ 2022-03-29 0:35 ` pr-tracker-bot
2 siblings, 0 replies; 95+ messages in thread
From: pr-tracker-bot @ 2022-03-29 0:35 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Linus Torvalds, Alexey Gladkov, Kyle Huey, Oleg Nesterov,
Kees Cook, Al Viro, linux-api, Jens Axboe, linux-kernel
The pull request you sent on Mon, 28 Mar 2022 18:56:46 -0500:
> git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git ptrace-cleanups-for-v5.18
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1930a6e739c4b4a654a69164dbe39e554d228915
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 95+ messages in thread
* [PATCH 00/13] Removing tracehook.h
2022-01-03 21:30 ` [PATCH 00/17] exit: Making task exiting a first class concept Eric W. Biederman
` (17 preceding siblings ...)
2022-03-09 0:13 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
@ 2022-03-09 0:15 ` Eric W. Biederman
2022-03-09 20:58 ` Linus Torvalds
18 siblings, 1 reply; 95+ messages in thread
From: Eric W. Biederman @ 2022-03-09 0:15 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, Linus Torvalds, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Kees Cook, Al Viro, linux-api, Jens Axboe
While working on cleaning up do_exit I have been having to deal with the
code in tracehook.h. Unfortunately the code in tracehook.h does not
make sense as organized.
This set of changes reorganizes things so that tracehook.h no longer
exists, and so that it's current contents are organized in a fashion
that is a little easier to understand.
The biggest change is that I lean into the fact that get_signal
always calls task_work_run and removes the logic that tried to
be smart and decouple task_work_run and get_signal as it has proven
to not be effective.
This is a conservative change and I am not changing the how things
like signal_pending operate (although it is probably justified).
A new header resume_user_mode.h is added to hold resume_user_mode_work
which was previously known as tracehook_notify_resume.
Eric W. Biederman (13):
ptrace: Move ptrace_report_syscall into ptrace.h
ptrace/arm: Rename tracehook_report_syscall report_syscall
ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h
ptrace: Remove arch_syscall_{enter,exit}_tracehook
ptrace: Remove tracehook_signal_handler
task_work: Remove unnecessary include from posix_timers.h
task_work: Introduce task_work_pending
task_work: Call tracehook_notify_signal from get_signal on all architectures
task_work: Decouple TIF_NOTIFY_SIGNAL and task_work
signal: Move set_notify_signal and clear_notify_signal into sched/signal.h
resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume
resume_user_mode: Move to resume_user_mode.h
tracehook: Remove tracehook.h
MAINTAINERS | 1 -
arch/Kconfig | 5 +-
arch/alpha/kernel/ptrace.c | 5 +-
arch/alpha/kernel/signal.c | 4 +-
arch/arc/kernel/ptrace.c | 5 +-
arch/arc/kernel/signal.c | 4 +-
arch/arm/kernel/ptrace.c | 12 +-
arch/arm/kernel/signal.c | 4 +-
arch/arm64/kernel/ptrace.c | 14 +--
arch/arm64/kernel/signal.c | 4 +-
arch/csky/kernel/ptrace.c | 5 +-
arch/csky/kernel/signal.c | 4 +-
arch/h8300/kernel/ptrace.c | 5 +-
arch/h8300/kernel/signal.c | 4 +-
arch/hexagon/kernel/process.c | 4 +-
arch/hexagon/kernel/signal.c | 1 -
arch/hexagon/kernel/traps.c | 6 +-
arch/ia64/kernel/process.c | 4 +-
arch/ia64/kernel/ptrace.c | 6 +-
arch/ia64/kernel/signal.c | 1 -
arch/m68k/kernel/ptrace.c | 6 +-
arch/m68k/kernel/signal.c | 4 +-
arch/microblaze/kernel/ptrace.c | 5 +-
arch/microblaze/kernel/signal.c | 4 +-
arch/mips/kernel/ptrace.c | 5 +-
arch/mips/kernel/signal.c | 4 +-
arch/nds32/include/asm/syscall.h | 2 +-
arch/nds32/kernel/ptrace.c | 5 +-
arch/nds32/kernel/signal.c | 4 +-
arch/nios2/kernel/ptrace.c | 5 +-
arch/nios2/kernel/signal.c | 4 +-
arch/openrisc/kernel/ptrace.c | 5 +-
arch/openrisc/kernel/signal.c | 4 +-
arch/parisc/kernel/ptrace.c | 7 +-
arch/parisc/kernel/signal.c | 4 +-
arch/powerpc/kernel/ptrace/ptrace.c | 8 +-
arch/powerpc/kernel/signal.c | 4 +-
arch/riscv/kernel/ptrace.c | 5 +-
arch/riscv/kernel/signal.c | 4 +-
arch/s390/include/asm/entry-common.h | 1 -
arch/s390/kernel/ptrace.c | 1 -
arch/s390/kernel/signal.c | 5 +-
arch/sh/kernel/ptrace_32.c | 5 +-
arch/sh/kernel/signal_32.c | 4 +-
arch/sparc/kernel/ptrace_32.c | 5 +-
arch/sparc/kernel/ptrace_64.c | 5 +-
arch/sparc/kernel/signal32.c | 1 -
arch/sparc/kernel/signal_32.c | 4 +-
arch/sparc/kernel/signal_64.c | 4 +-
arch/um/kernel/process.c | 4 +-
arch/um/kernel/ptrace.c | 5 +-
arch/x86/kernel/ptrace.c | 1 -
arch/x86/kernel/signal.c | 5 +-
arch/x86/mm/tlb.c | 1 +
arch/xtensa/kernel/ptrace.c | 5 +-
arch/xtensa/kernel/signal.c | 4 +-
block/blk-cgroup.c | 2 +-
fs/coredump.c | 1 -
fs/exec.c | 1 -
fs/io-wq.c | 6 +-
fs/io_uring.c | 11 +-
fs/proc/array.c | 1 -
fs/proc/base.c | 1 -
include/asm-generic/syscall.h | 2 +-
include/linux/entry-common.h | 47 +-------
include/linux/entry-kvm.h | 2 +-
include/linux/posix-timers.h | 1 -
include/linux/ptrace.h | 78 ++++++++++++
include/linux/resume_user_mode.h | 64 ++++++++++
include/linux/sched/signal.h | 17 +++
include/linux/task_work.h | 5 +
include/linux/tracehook.h | 226 -----------------------------------
include/uapi/linux/ptrace.h | 2 +-
kernel/entry/common.c | 19 +--
kernel/entry/kvm.c | 9 +-
kernel/exit.c | 3 +-
kernel/livepatch/transition.c | 1 -
kernel/seccomp.c | 1 -
kernel/signal.c | 23 ++--
kernel/task_work.c | 4 +-
kernel/time/posix-cpu-timers.c | 1 +
mm/memcontrol.c | 2 +-
security/apparmor/domain.c | 1 -
security/selinux/hooks.c | 1 -
84 files changed, 317 insertions(+), 462 deletions(-)
Eric
^ permalink raw reply [flat|nested] 95+ messages in thread
* Re: [PATCH 00/13] Removing tracehook.h
2022-03-09 0:15 ` [PATCH 00/13] Removing tracehook.h Eric W. Biederman
@ 2022-03-09 20:58 ` Linus Torvalds
0 siblings, 0 replies; 95+ messages in thread
From: Linus Torvalds @ 2022-03-09 20:58 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, Linux Kernel Mailing List, Alexey Gladkov, Kyle Huey,
Oleg Nesterov, Kees Cook, Al Viro, Linux API, Jens Axboe
On Tue, Mar 8, 2022 at 4:16 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> While working on cleaning up do_exit I have been having to deal with the
> code in tracehook.h. Unfortunately the code in tracehook.h does not
> make sense as organized. [...]
Thanks, that odd naming has tripped me up several times, this looks
like an improvement.
Linus
^ permalink raw reply [flat|nested] 95+ messages in thread