From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jens Axboe Date: Tue, 02 Mar 2021 22:31:13 +0000 Subject: Re: 5.11 regression: "ia64: add support for TIF_NOTIFY_SIGNAL" breaks ia64 boot Message-Id: <9cfaede7-d885-88a5-0bff-91b99b30a8d4@kernel.dk> List-Id: References: <20210222230519.73f3e239@sf> <20210222235359.75d1a912@sf> <30a833d8-44a0-284d-4fe4-e9a52f407043@kernel.dk> <20210223080830.23bccdbf@sf> <20210302220716.0b6f72ae@sf> In-Reply-To: <20210302220716.0b6f72ae@sf> MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable To: Sergei Trofimovich Cc: linux-ia64@vger.kernel.org, linux-kernel@vger.kernel.org On 3/2/21 3:07 PM, Sergei Trofimovich wrote: > On Tue, 23 Feb 2021 08:08:30 +0000 > Sergei Trofimovich wrote: >=20 >> On Mon, 22 Feb 2021 17:43:58 -0700 >> Jens Axboe wrote: >> >>> On 2/22/21 5:41 PM, Jens Axboe wrote: =20 >>>> On 2/22/21 5:34 PM, Jens Axboe wrote: =20 >>>>> On 2/22/21 4:53 PM, Sergei Trofimovich wrote: =20 >>>>>> On Mon, 22 Feb 2021 16:34:50 -0700 >>>>>> Jens Axboe wrote: >>>>>> =20 >>>>>>> On 2/22/21 4:05 PM, Sergei Trofimovich wrote: =20 >>>>>>>> Hia Jens! >>>>>>>> >>>>>>>> Tried 5.11 on rx3600 box and noticed it has >>>>>>>> a problem handling init (5.10 booted fine): >>>>>>>> >>>>>>>> INIT: version 2.98 booting >>>>>>>> >>>>>>>> OpenRC 0.42.1 is starting up Gentoo Linux (ia64) >>>>>>>> >>>>>>>> mkdir `/run/openrc': Read-only file system >>>>>>>> mkdir `/run/openrc/starting': No such file or directory >>>>>>>> mkdir `/run/openrc/started': No such file or directory >>>>>>>> mkdir `/run/openrc/stopping': No such file or directory >>>>>>>> mkdir `/run/openrc/inactive': No such file or directory >>>>>>>> mkdir `/run/openrc/wasinactive': No such file or directory >>>>>>>> mkdir `/run/openrc/failed': No such file or directory >>>>>>>> mkdir `/run/openrc/hotplugged': No such file or directory >>>>>>>> mkdir `/run/openrc/daemons': No such file or directory >>>>>>>> mkdir `/run[ 14.595059] Kernel panic - not syncing: Attempted to= kill init! exitcode=3D0x0000000b >>>>>>>> [ 14.599059] ---[ end Kernel panic - not syncing: Attempted to k= ill init! exitcode=3D0x0000000b ]--- >>>>>>>> >>>>>>>> I suspect we build bad signal stack frame for userspace. >>>>>>>> >>>>>>>> With a bit of #define DEBUG_SIG 1 enabled the signals are SIGCHLD: >>>>>>>> >>>>>>>> [ 34.969771] SIG deliver (gendepends.sh:69): sig=17 sp`000fffff6= aeaa0 ip=A000000000040740 handler=000000004b4c59b6 >>>>>>>> [ 34.969948] SIG deliver (init:1): sig=17 sp`000fffff1ccc50 ip= =A000000000040740 handler=000000004638b9e5 >>>>>>>> [ 34.969948] SIG deliver (gendepends.sh:69): sig=17 sp`000fffff6= adf90 ip=A000000000040740 handler=000000004b4c59b6 >>>>>>>> [ 34.973948] SIG deliver (init:1): sig=17 sp`000fffff1cc140 ip= =A000000000040740 handler=000000004638b9e5 >>>>>>>> [ 34.973948] Kernel panic - not syncing: Attempted to kill init!= exitcode=3D0x0000000b >>>>>>>> [ 34.973948] SIG deliver (gendepends.sh:69): sig=17 sp`000fffff6= ad480 ip=A000000000040740 handler=000000004b4c59b6 >>>>>>>> [ 34.973948] ---[ end Kernel panic - not syncing: Attempted to k= ill init! exitcode=3D0x0000000b ]--- >>>>>>>> >>>>>>>> Bisect points at: >>>>>>>> >>>>>>>> commit b269c229b0e89aedb7943c06673b56b6052cf5e5 >>>>>>>> Author: Jens Axboe >>>>>>>> Date: Fri Oct 9 14:49:43 2020 -0600 >>>>>>>> >>>>>>>> ia64: add support for TIF_NOTIFY_SIGNAL >>>>>>>> >>>>>>>> Wire up TIF_NOTIFY_SIGNAL handling for ia64. >>>>>>>> >>>>>>>> Cc: linux-ia64@vger.kernel.org >>>>>>>> [axboe: added fixes from Mike Rapoport ] >>>>>>>> Signed-off-by: Jens Axboe >>>>>>>> >>>>>>>> diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/inclu= de/asm/thread_info.h >>>>>>>> index 64a1011f6812..51d20cb37706 100644 >>>>>>>> --- a/arch/ia64/include/asm/thread_info.h >>>>>>>> +++ b/arch/ia64/include/asm/thread_info.h >>>>>>>> @@ -103,6 +103,7 @@ struct thread_info { >>>>>>>> #define TIF_SYSCALL_TRACE 2 /* syscall trace active */ >>>>>>>> #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active= */ >>>>>>>> #define TIF_SINGLESTEP 4 /* restore singlestep on r= eturn to user mode */ >>>>>>>> +#define TIF_NOTIFY_SIGNAL 5 /* signal notification exi= st */ >>>>>>>> #define TIF_NOTIFY_RESUME 6 /* resumption notification= requested */ >>>>>>>> #define TIF_MEMDIE 17 /* is terminating due to O= OM killer */ >>>>>>>> #define TIF_MCA_INIT 18 /* this task is processing= MCA or INIT */ >>>>>>>> @@ -115,6 +116,7 @@ struct thread_info { >>>>>>>> #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) >>>>>>>> #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_S= YSCALL_AUDIT|_TIF_SINGLESTEP) >>>>>>>> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) >>>>>>>> +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) >>>>>>>> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) >>>>>>>> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) >>>>>>>> #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) >>>>>>>> @@ -124,7 +126,7 @@ struct thread_info { >>>>>>>> >>>>>>>> /* "work to do on user-return" bits */ >>>>>>>> #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUM= E|_TIF_SYSCALL_AUDIT|\ >>>>>>>> - _TIF_NEED_RESCHED|_TIF_SYSCALL_TR= ACE) >>>>>>>> + _TIF_NEED_RESCHED|_TIF_SYSCALL_TR= ACE|_TIF_NOTIFY_SIGNAL) >>>>>>>> /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCAL= L_AUDIT */ >>>>>>>> #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_T= RACE|_TIF_SYSCALL_AUDIT)) >>>>>>>> >>>>>>>> diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process= .c >>>>>>>> index 6b61a703bcf5..8d4e1cab9190 100644 >>>>>>>> --- a/arch/ia64/kernel/process.c >>>>>>>> +++ b/arch/ia64/kernel/process.c >>>>>>>> @@ -171,7 +171,8 @@ do_notify_resume_user(sigset_t *unused, struct= sigscratch *scr, long in_syscall) >>>>>>>> } >>>>>>>> >>>>>>>> /* deal with pending signal delivery */ >>>>>>>> - if (test_thread_flag(TIF_SIGPENDING)) { >>>>>>>> + if (test_thread_flag(TIF_SIGPENDING) || >>>>>>>> + test_thread_flag(TIF_NOTIFY_SIGNAL)) { >>>>>>>> local_irq_enable(); /* force interrupt enable = */ >>>>>>>> ia64_do_signal(scr, in_syscall); >>>>>>>> >>>>>>>> which looks benign, but it enables a bit of conditional >>>>>>>> TIF_NOTIFY_SIGNAL handling I don't understand. >>>>>>>> >>>>>>>> Can you help me get what is the interaction between >>>>>>>> TIF_NOTIFY_SIGNAL and TIF_SIGPENDING for >>>>>>>> simple processes without io_uring use case? >>>>>>>> >>>>>>>> I wonder if it's ia64_do_signal()' generates a signal >>>>>>>> delivery when it should not. =20 >>>>>>> >>>>>>> Can you test: >>>>>>> >>>>>>> https://marc.info/?l=3Dlinux-ia64&m=161187407609443&w=3D1 >>>>>>> >>>>>>> with the addition mentioned here: =20 >>>>>> >>>>>> Not enough: >>>>>> >>>>>> mkdir `/run/openrc': Read-only file system >>>>>> mkdir `/run/openrc/starting': No such file or directory >>>>>> mkdir `/run/openrc/started': No such file or directory >>>>>> mkdir `/run/openrc/stopping': No such file or directory >>>>>> mkdir `/run/openrc/inactive': No such file or directory >>>>>> mkdir `/run/openrc/wasinactive': No such file or directory >>>>>> mkdir `/run/openrc/failed': No such file or directory >>>>>> mkdir `/run/openrc/hotplugged': No such file or directory >>>>>> mkdir `/run/openrc/daemons': No such file or directory >>>>>> [ 14.554357] Kernel panic - not syncing: Attempted to kill init! e= xitcode=3D0x0000000b >>>>>> [ 14.554357] ---[ end Kernel panic - not syncing: Attempted to kil= l init! exitcode=3D0x0000000b ]--- >>>>>> mkdir `/run/openrc/options': No such file or directory >>>>>> mkdir `/run/openrc/exclusive': No such file or directory >>>>>> mkdir `/run/openrc/scheduled': No such file or directory >>>>>> mkdir `/run/openrc/tmp': No such file or directory >>>>>> =20 >>>>>>> https://marc.info/?l=3Dlinux-ia64&m=161187470709706&w=3D1 >>>>>>> >>>>>>> if needed? =20 >>>>>> >>>>>> Two patches above do fix the boot \o/ But have a lot of spam about >>>>>> 'signal 0' delivery to a bunch of processes: >>>>>> >>>>>> * Mounting /proc ... >>>>>> [ ok ] >>>>>> * Mounting /run ... >>>>>> * /run/openrc: creating directory >>>>>> * /run/lock: creating directory >>>>>> * /run/lock: correcting owner >>>>>> * Caching service dependencies ... >>>>>> [ ok ] >>>>>> * Mounting /sys ... >>>>>> [ ok ] >>>>>> * Mounting debug filesystem ... >>>>>> [ ok ] >>>>>> * Mounting efivarfs filesystem ... >>>>>> [ ok ] >>>>>> * sysfs: caught unknown signal 0 >>>>>> * openrc: caught unknown signal 0 >>>>>> * Mounting cgroup filesystem ... =20 >>>>> >>>>> That's an improvement! Let me take a look at this tonight and see if I >>>>> can figure out what's going on. But yes, it's the ia64 signal delivery >>>>> being just different enough from the norm that it apparently triggers >>>>> some weirdness. =20 >>>> >>>> Is this any better? =20 >>> >>> And if that one works, can you try this basic variant? =20 >> >> Both patches boot successfully without 'caught unknown signal 0' spam \o/ >> >>> diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c >>> index e67b22fc3c60..c1b299760bf7 100644 >>> --- a/arch/ia64/kernel/signal.c >>> +++ b/arch/ia64/kernel/signal.c >>> @@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_sys= call) >>> * need to push through a forced SIGSEGV. >>> */ >>> while (1) { >>> - get_signal(&ksig); >>> + if (!get_signal(&ksig)) >>> + break; >>> =20 >>> /* >>> * get_signal() may have run a debugger (via notify_parent()) >>> >=20 > Should I send the patch in `git am`-able form or the patch=20 > already queued up in some other form? I'll be happy to queue it up. I take it you tried the above one-liner and it works for you. Just trying to get some clarity on what was tried so I can queue it up appropriately. --=20 Jens Axboe