From: Jens Axboe <axboe@kernel.dk>
To: Sergei Trofimovich <slyich@gmail.com>
Cc: linux-ia64@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: 5.11 regression: "ia64: add support for TIF_NOTIFY_SIGNAL" breaks ia64 boot
Date: Tue, 02 Mar 2021 22:31:13 +0000 [thread overview]
Message-ID: <9cfaede7-d885-88a5-0bff-91b99b30a8d4@kernel.dk> (raw)
In-Reply-To: <20210302220716.0b6f72ae@sf>
On 3/2/21 3:07 PM, Sergei Trofimovich wrote:
> On Tue, 23 Feb 2021 08:08:30 +0000
> Sergei Trofimovich <slyich@gmail.com> wrote:
>
>> On Mon, 22 Feb 2021 17:43:58 -0700
>> Jens Axboe <axboe@kernel.dk> wrote:
>>
>>> On 2/22/21 5:41 PM, Jens Axboe wrote:
>>>> On 2/22/21 5:34 PM, Jens Axboe wrote:
>>>>> On 2/22/21 4:53 PM, Sergei Trofimovich wrote:
>>>>>> On Mon, 22 Feb 2021 16:34:50 -0700
>>>>>> Jens Axboe <axboe@kernel.dk> wrote:
>>>>>>
>>>>>>> On 2/22/21 4:05 PM, Sergei Trofimovich wrote:
>>>>>>>> Hia Jens!
>>>>>>>>
>>>>>>>> Tried 5.11 on rx3600 box and noticed it has
>>>>>>>> a problem handling init (5.10 booted fine):
>>>>>>>>
>>>>>>>> INIT: version 2.98 booting
>>>>>>>>
>>>>>>>> OpenRC 0.42.1 is starting up Gentoo Linux (ia64)
>>>>>>>>
>>>>>>>> mkdir `/run/openrc': Read-only file system
>>>>>>>> mkdir `/run/openrc/starting': No such file or directory
>>>>>>>> mkdir `/run/openrc/started': No such file or directory
>>>>>>>> mkdir `/run/openrc/stopping': No such file or directory
>>>>>>>> mkdir `/run/openrc/inactive': No such file or directory
>>>>>>>> mkdir `/run/openrc/wasinactive': No such file or directory
>>>>>>>> mkdir `/run/openrc/failed': No such file or directory
>>>>>>>> mkdir `/run/openrc/hotplugged': No such file or directory
>>>>>>>> mkdir `/run/openrc/daemons': No such file or directory
>>>>>>>> mkdir `/run[ 14.595059] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>>>> [ 14.599059] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>>>
>>>>>>>> I suspect we build bad signal stack frame for userspace.
>>>>>>>>
>>>>>>>> With a bit of #define DEBUG_SIG 1 enabled the signals are SIGCHLD:
>>>>>>>>
>>>>>>>> [ 34.969771] SIG deliver (gendepends.sh:69): sig\x17 sp`000fffff6aeaa0 ip 00000000040740 handler\00000004b4c59b6
>>>>>>>> [ 34.969948] SIG deliver (init:1): sig\x17 sp`000fffff1ccc50 ip 00000000040740 handler\00000004638b9e5
>>>>>>>> [ 34.969948] SIG deliver (gendepends.sh:69): sig\x17 sp`000fffff6adf90 ip 00000000040740 handler\00000004b4c59b6
>>>>>>>> [ 34.973948] SIG deliver (init:1): sig\x17 sp`000fffff1cc140 ip 00000000040740 handler\00000004638b9e5
>>>>>>>> [ 34.973948] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>>>> [ 34.973948] SIG deliver (gendepends.sh:69): sig\x17 sp`000fffff6ad480 ip 00000000040740 handler\00000004b4c59b6
>>>>>>>> [ 34.973948] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>>>
>>>>>>>> Bisect points at:
>>>>>>>>
>>>>>>>> commit b269c229b0e89aedb7943c06673b56b6052cf5e5
>>>>>>>> Author: Jens Axboe <axboe@kernel.dk>
>>>>>>>> Date: Fri Oct 9 14:49:43 2020 -0600
>>>>>>>>
>>>>>>>> ia64: add support for TIF_NOTIFY_SIGNAL
>>>>>>>>
>>>>>>>> Wire up TIF_NOTIFY_SIGNAL handling for ia64.
>>>>>>>>
>>>>>>>> Cc: linux-ia64@vger.kernel.org
>>>>>>>> [axboe: added fixes from Mike Rapoport <rppt@kernel.org>]
>>>>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>>>>>>>
>>>>>>>> diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
>>>>>>>> index 64a1011f6812..51d20cb37706 100644
>>>>>>>> --- a/arch/ia64/include/asm/thread_info.h
>>>>>>>> +++ b/arch/ia64/include/asm/thread_info.h
>>>>>>>> @@ -103,6 +103,7 @@ struct thread_info {
>>>>>>>> #define TIF_SYSCALL_TRACE 2 /* syscall trace active */
>>>>>>>> #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
>>>>>>>> #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
>>>>>>>> +#define TIF_NOTIFY_SIGNAL 5 /* signal notification exist */
>>>>>>>> #define TIF_NOTIFY_RESUME 6 /* resumption notification requested */
>>>>>>>> #define TIF_MEMDIE 17 /* is terminating due to OOM killer */
>>>>>>>> #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */
>>>>>>>> @@ -115,6 +116,7 @@ struct thread_info {
>>>>>>>> #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
>>>>>>>> #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP)
>>>>>>>> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
>>>>>>>> +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
>>>>>>>> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
>>>>>>>> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
>>>>>>>> #define _TIF_MCA_INIT (1 << TIF_MCA_INIT)
>>>>>>>> @@ -124,7 +126,7 @@ struct thread_info {
>>>>>>>>
>>>>>>>> /* "work to do on user-return" bits */
>>>>>>>> #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
>>>>>>>> - _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE)
>>>>>>>> + _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_NOTIFY_SIGNAL)
>>>>>>>> /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
>>>>>>>> #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
>>>>>>>>
>>>>>>>> diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
>>>>>>>> index 6b61a703bcf5..8d4e1cab9190 100644
>>>>>>>> --- a/arch/ia64/kernel/process.c
>>>>>>>> +++ b/arch/ia64/kernel/process.c
>>>>>>>> @@ -171,7 +171,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
>>>>>>>> }
>>>>>>>>
>>>>>>>> /* deal with pending signal delivery */
>>>>>>>> - if (test_thread_flag(TIF_SIGPENDING)) {
>>>>>>>> + if (test_thread_flag(TIF_SIGPENDING) ||
>>>>>>>> + test_thread_flag(TIF_NOTIFY_SIGNAL)) {
>>>>>>>> local_irq_enable(); /* force interrupt enable */
>>>>>>>> ia64_do_signal(scr, in_syscall);
>>>>>>>>
>>>>>>>> which looks benign, but it enables a bit of conditional
>>>>>>>> TIF_NOTIFY_SIGNAL handling I don't understand.
>>>>>>>>
>>>>>>>> Can you help me get what is the interaction between
>>>>>>>> TIF_NOTIFY_SIGNAL and TIF_SIGPENDING for
>>>>>>>> simple processes without io_uring use case?
>>>>>>>>
>>>>>>>> I wonder if it's ia64_do_signal()' generates a signal
>>>>>>>> delivery when it should not.
>>>>>>>
>>>>>>> Can you test:
>>>>>>>
>>>>>>> https://marc.info/?l=linux-ia64&m\x161187407609443&w=1
>>>>>>>
>>>>>>> with the addition mentioned here:
>>>>>>
>>>>>> Not enough:
>>>>>>
>>>>>> mkdir `/run/openrc': Read-only file system
>>>>>> mkdir `/run/openrc/starting': No such file or directory
>>>>>> mkdir `/run/openrc/started': No such file or directory
>>>>>> mkdir `/run/openrc/stopping': No such file or directory
>>>>>> mkdir `/run/openrc/inactive': No such file or directory
>>>>>> mkdir `/run/openrc/wasinactive': No such file or directory
>>>>>> mkdir `/run/openrc/failed': No such file or directory
>>>>>> mkdir `/run/openrc/hotplugged': No such file or directory
>>>>>> mkdir `/run/openrc/daemons': No such file or directory
>>>>>> [ 14.554357] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>> [ 14.554357] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>> mkdir `/run/openrc/options': No such file or directory
>>>>>> mkdir `/run/openrc/exclusive': No such file or directory
>>>>>> mkdir `/run/openrc/scheduled': No such file or directory
>>>>>> mkdir `/run/openrc/tmp': No such file or directory
>>>>>>
>>>>>>> https://marc.info/?l=linux-ia64&m\x161187470709706&w=1
>>>>>>>
>>>>>>> if needed?
>>>>>>
>>>>>> Two patches above do fix the boot \o/ But have a lot of spam about
>>>>>> 'signal 0' delivery to a bunch of processes:
>>>>>>
>>>>>> * Mounting /proc ...
>>>>>> [ ok ]
>>>>>> * Mounting /run ...
>>>>>> * /run/openrc: creating directory
>>>>>> * /run/lock: creating directory
>>>>>> * /run/lock: correcting owner
>>>>>> * Caching service dependencies ...
>>>>>> [ ok ]
>>>>>> * Mounting /sys ...
>>>>>> [ ok ]
>>>>>> * Mounting debug filesystem ...
>>>>>> [ ok ]
>>>>>> * Mounting efivarfs filesystem ...
>>>>>> [ ok ]
>>>>>> * sysfs: caught unknown signal 0
>>>>>> * openrc: caught unknown signal 0
>>>>>> * Mounting cgroup filesystem ...
>>>>>
>>>>> That's an improvement! Let me take a look at this tonight and see if I
>>>>> can figure out what's going on. But yes, it's the ia64 signal delivery
>>>>> being just different enough from the norm that it apparently triggers
>>>>> some weirdness.
>>>>
>>>> Is this any better?
>>>
>>> And if that one works, can you try this basic variant?
>>
>> Both patches boot successfully without 'caught unknown signal 0' spam \o/
>>
>>> diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
>>> index e67b22fc3c60..c1b299760bf7 100644
>>> --- a/arch/ia64/kernel/signal.c
>>> +++ b/arch/ia64/kernel/signal.c
>>> @@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
>>> * need to push through a forced SIGSEGV.
>>> */
>>> while (1) {
>>> - get_signal(&ksig);
>>> + if (!get_signal(&ksig))
>>> + break;
>>>
>>> /*
>>> * get_signal() may have run a debugger (via notify_parent())
>>>
>
> Should I send the patch in `git am`-able form or the patch
> already queued up in some other form?
I'll be happy to queue it up. I take it you tried the above one-liner and it
works for you. Just trying to get some clarity on what was tried so I can
queue it up appropriately.
--
Jens Axboe
next prev parent reply other threads:[~2021-03-02 22:31 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20210222230519.73f3e239@sf>
2021-02-22 23:34 ` 5.11 regression: "ia64: add support for TIF_NOTIFY_SIGNAL" breaks ia64 boot Jens Axboe
2021-02-22 23:55 ` John Paul Adrian Glaubitz
[not found] ` <20210223083507.43b5a6dd@sf>
[not found] ` <51cbf584-07ef-1e62-7a3b-81494a04faa6@physik.fu-berlin.de>
2021-02-23 12:36 ` John Paul Adrian Glaubitz
[not found] ` <20210223192743.0198d4a9@sf>
[not found] ` <20210302222630.5056f243@sf>
2021-03-02 22:31 ` John Paul Adrian Glaubitz
2021-03-03 0:22 ` [bisected] 5.12-rc1 hpsa regression: "scsi: hpsa: Correct dev cmds outstanding for retried cmds" bre Sergei Trofimovich
2021-03-03 8:55 ` [bisected] 5.12-rc1 hpsa regression: "scsi: hpsa: Correct dev cmds outstanding for retried cmds" Sergei Trofimovich
2021-03-03 17:33 ` Don.Brace
[not found] ` <20210303220401.501449e5@sf>
2021-03-04 17:00 ` Don.Brace
2021-03-05 13:26 ` Tomas Henzl
2021-03-12 22:27 ` [PATCH] hpsa: fix boot on ia64 (atomic_t alignment) Sergei Trofimovich
2021-03-16 16:30 ` Don.Brace
2021-03-16 18:28 ` Arnd Bergmann
2021-03-17 2:25 ` Martin K. Petersen
2021-03-17 13:19 ` David Laight
2021-03-17 19:06 ` Don.Brace
2021-03-17 17:28 ` John Paul Adrian Glaubitz
2021-03-27 10:24 ` Sergei Trofimovich
2021-03-24 7:08 ` John Paul Adrian Glaubitz
2021-03-24 18:37 ` Don.Brace
2021-03-29 11:25 ` John Paul Adrian Glaubitz
2021-03-29 14:22 ` Arnd Bergmann
2021-03-30 3:02 ` Martin K. Petersen
2021-03-30 7:19 ` [PATCH v2 1/3] hpsa: use __packed on individual structs, not header-wide Sergei Trofimovich
2021-03-30 7:19 ` [PATCH v2 2/3] hpsa: fix boot on ia64 (atomic_t alignment) Sergei Trofimovich
2021-03-30 7:19 ` [PATCH v2 3/3] hpsa: add an assert to prevent from __packed reintroduction Sergei Trofimovich
2021-03-30 7:34 ` Arnd Bergmann
2021-04-02 14:40 ` Elliott, Robert (Servers)
2021-04-03 14:51 ` Sergei Trofimovich
2021-03-30 7:30 ` [PATCH v2 1/3] hpsa: use __packed on individual structs, not header-wide Arnd Bergmann
2021-03-30 7:43 ` Arnd Bergmann
2021-04-02 3:54 ` Martin K. Petersen
2021-04-15 18:41 ` Don.Brace
2021-03-05 9:22 ` [bisected] 5.12-rc1 hpsa regression: "scsi: hpsa: Correct dev cmds outstanding for retried cmds" Geert Uytterhoeven
2021-03-05 13:31 ` Arnd Bergmann
2021-03-05 20:45 ` Don.Brace
2021-03-03 15:42 ` Don.Brace
2021-03-17 17:42 ` 5.11 regression: "ia64: add support for TIF_NOTIFY_SIGNAL" breaks ia64 boot John Paul Adrian Glaubitz
2021-03-17 17:53 ` John Paul Adrian Glaubitz
[not found] ` <20210222235359.75d1a912@sf>
2021-02-23 0:34 ` Jens Axboe
2021-02-23 0:41 ` Jens Axboe
2021-02-23 0:43 ` Jens Axboe
[not found] ` <20210223080830.23bccdbf@sf>
2021-03-02 22:07 ` Sergei Trofimovich
2021-03-02 22:31 ` Jens Axboe [this message]
[not found] ` <20210302232716.353ed49b@sf>
2021-03-03 0:34 ` Jens Axboe
2021-03-03 3:51 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9cfaede7-d885-88a5-0bff-91b99b30a8d4@kernel.dk \
--to=axboe@kernel.dk \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=slyich@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox