Re: 5.11 regression: "ia64: add support for TIF_NOTIFY_SIGNAL" breaks ia64 boot

From: Jens Axboe
Date: Wed Mar 03 2021 - 06:41:01 EST


On 3/2/21 5:34 PM, Jens Axboe wrote:
> On 3/2/21 4:27 PM, Sergei Trofimovich wrote:
>> On Tue, 2 Mar 2021 15:31:13 -0700
>> Jens Axboe <axboe@xxxxxxxxx> wrote:
>>
>>> On 3/2/21 3:07 PM, Sergei Trofimovich wrote:
>>>> On Tue, 23 Feb 2021 08:08:30 +0000
>>>> Sergei Trofimovich <slyich@xxxxxxxxx> wrote:
>>>>
>>>>> On Mon, 22 Feb 2021 17:43:58 -0700
>>>>> Jens Axboe <axboe@xxxxxxxxx> wrote:
>>>>>
>>>>>> On 2/22/21 5:41 PM, Jens Axboe wrote:
>>>>>>> On 2/22/21 5:34 PM, Jens Axboe wrote:
>>>>>>>> On 2/22/21 4:53 PM, Sergei Trofimovich wrote:
>>>>>>>>> On Mon, 22 Feb 2021 16:34:50 -0700
>>>>>>>>> Jens Axboe <axboe@xxxxxxxxx> wrote:
>>>>>>>>>
>>>>>>>>>> On 2/22/21 4:05 PM, Sergei Trofimovich wrote:
>>>>>>>>>>> Hia Jens!
>>>>>>>>>>>
>>>>>>>>>>> Tried 5.11 on rx3600 box and noticed it has
>>>>>>>>>>> a problem handling init (5.10 booted fine):
>>>>>>>>>>>
>>>>>>>>>>> INIT: version 2.98 booting
>>>>>>>>>>>
>>>>>>>>>>> OpenRC 0.42.1 is starting up Gentoo Linux (ia64)
>>>>>>>>>>>
>>>>>>>>>>> mkdir `/run/openrc': Read-only file system
>>>>>>>>>>> mkdir `/run/openrc/starting': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/started': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/stopping': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/inactive': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/wasinactive': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/failed': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/hotplugged': No such file or directory
>>>>>>>>>>> mkdir `/run/openrc/daemons': No such file or directory
>>>>>>>>>>> mkdir `/run[ 14.595059] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>>>>>>> [ 14.599059] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>>>>>>
>>>>>>>>>>> I suspect we build bad signal stack frame for userspace.
>>>>>>>>>>>
>>>>>>>>>>> With a bit of #define DEBUG_SIG 1 enabled the signals are SIGCHLD:
>>>>>>>>>>>
>>>>>>>>>>> [ 34.969771] SIG deliver (gendepends.sh:69): sig=17 sp=60000fffff6aeaa0 ip=a000000000040740 handler=000000004b4c59b6
>>>>>>>>>>> [ 34.969948] SIG deliver (init:1): sig=17 sp=60000fffff1ccc50 ip=a000000000040740 handler=000000004638b9e5
>>>>>>>>>>> [ 34.969948] SIG deliver (gendepends.sh:69): sig=17 sp=60000fffff6adf90 ip=a000000000040740 handler=000000004b4c59b6
>>>>>>>>>>> [ 34.973948] SIG deliver (init:1): sig=17 sp=60000fffff1cc140 ip=a000000000040740 handler=000000004638b9e5
>>>>>>>>>>> [ 34.973948] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>>>>>>> [ 34.973948] SIG deliver (gendepends.sh:69): sig=17 sp=60000fffff6ad480 ip=a000000000040740 handler=000000004b4c59b6
>>>>>>>>>>> [ 34.973948] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>>>>>>
>>>>>>>>>>> Bisect points at:
>>>>>>>>>>>
>>>>>>>>>>> commit b269c229b0e89aedb7943c06673b56b6052cf5e5
>>>>>>>>>>> Author: Jens Axboe <axboe@xxxxxxxxx>
>>>>>>>>>>> Date: Fri Oct 9 14:49:43 2020 -0600
>>>>>>>>>>>
>>>>>>>>>>> ia64: add support for TIF_NOTIFY_SIGNAL
>>>>>>>>>>>
>>>>>>>>>>> Wire up TIF_NOTIFY_SIGNAL handling for ia64.
>>>>>>>>>>>
>>>>>>>>>>> Cc: linux-ia64@xxxxxxxxxxxxxxx
>>>>>>>>>>> [axboe: added fixes from Mike Rapoport <rppt@xxxxxxxxxx>]
>>>>>>>>>>> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
>>>>>>>>>>> index 64a1011f6812..51d20cb37706 100644
>>>>>>>>>>> --- a/arch/ia64/include/asm/thread_info.h
>>>>>>>>>>> +++ b/arch/ia64/include/asm/thread_info.h
>>>>>>>>>>> @@ -103,6 +103,7 @@ struct thread_info {
>>>>>>>>>>> #define TIF_SYSCALL_TRACE 2 /* syscall trace active */
>>>>>>>>>>> #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
>>>>>>>>>>> #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
>>>>>>>>>>> +#define TIF_NOTIFY_SIGNAL 5 /* signal notification exist */
>>>>>>>>>>> #define TIF_NOTIFY_RESUME 6 /* resumption notification requested */
>>>>>>>>>>> #define TIF_MEMDIE 17 /* is terminating due to OOM killer */
>>>>>>>>>>> #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */
>>>>>>>>>>> @@ -115,6 +116,7 @@ struct thread_info {
>>>>>>>>>>> #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
>>>>>>>>>>> #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP)
>>>>>>>>>>> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
>>>>>>>>>>> +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
>>>>>>>>>>> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
>>>>>>>>>>> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
>>>>>>>>>>> #define _TIF_MCA_INIT (1 << TIF_MCA_INIT)
>>>>>>>>>>> @@ -124,7 +126,7 @@ struct thread_info {
>>>>>>>>>>>
>>>>>>>>>>> /* "work to do on user-return" bits */
>>>>>>>>>>> #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
>>>>>>>>>>> - _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE)
>>>>>>>>>>> + _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_NOTIFY_SIGNAL)
>>>>>>>>>>> /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
>>>>>>>>>>> #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
>>>>>>>>>>> index 6b61a703bcf5..8d4e1cab9190 100644
>>>>>>>>>>> --- a/arch/ia64/kernel/process.c
>>>>>>>>>>> +++ b/arch/ia64/kernel/process.c
>>>>>>>>>>> @@ -171,7 +171,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
>>>>>>>>>>> }
>>>>>>>>>>>
>>>>>>>>>>> /* deal with pending signal delivery */
>>>>>>>>>>> - if (test_thread_flag(TIF_SIGPENDING)) {
>>>>>>>>>>> + if (test_thread_flag(TIF_SIGPENDING) ||
>>>>>>>>>>> + test_thread_flag(TIF_NOTIFY_SIGNAL)) {
>>>>>>>>>>> local_irq_enable(); /* force interrupt enable */
>>>>>>>>>>> ia64_do_signal(scr, in_syscall);
>>>>>>>>>>>
>>>>>>>>>>> which looks benign, but it enables a bit of conditional
>>>>>>>>>>> TIF_NOTIFY_SIGNAL handling I don't understand.
>>>>>>>>>>>
>>>>>>>>>>> Can you help me get what is the interaction between
>>>>>>>>>>> TIF_NOTIFY_SIGNAL and TIF_SIGPENDING for
>>>>>>>>>>> simple processes without io_uring use case?
>>>>>>>>>>>
>>>>>>>>>>> I wonder if it's ia64_do_signal()' generates a signal
>>>>>>>>>>> delivery when it should not.
>>>>>>>>>>
>>>>>>>>>> Can you test:
>>>>>>>>>>
>>>>>>>>>> https://marc.info/?l=linux-ia64&m=161187407609443&w=1
>>>>>>>>>>
>>>>>>>>>> with the addition mentioned here:
>>>>>>>>>
>>>>>>>>> Not enough:
>>>>>>>>>
>>>>>>>>> mkdir `/run/openrc': Read-only file system
>>>>>>>>> mkdir `/run/openrc/starting': No such file or directory
>>>>>>>>> mkdir `/run/openrc/started': No such file or directory
>>>>>>>>> mkdir `/run/openrc/stopping': No such file or directory
>>>>>>>>> mkdir `/run/openrc/inactive': No such file or directory
>>>>>>>>> mkdir `/run/openrc/wasinactive': No such file or directory
>>>>>>>>> mkdir `/run/openrc/failed': No such file or directory
>>>>>>>>> mkdir `/run/openrc/hotplugged': No such file or directory
>>>>>>>>> mkdir `/run/openrc/daemons': No such file or directory
>>>>>>>>> [ 14.554357] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>>>>> [ 14.554357] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>>>> mkdir `/run/openrc/options': No such file or directory
>>>>>>>>> mkdir `/run/openrc/exclusive': No such file or directory
>>>>>>>>> mkdir `/run/openrc/scheduled': No such file or directory
>>>>>>>>> mkdir `/run/openrc/tmp': No such file or directory
>>>>>>>>>
>>>>>>>>>> https://marc.info/?l=linux-ia64&m=161187470709706&w=1
>>>>>>>>>>
>>>>>>>>>> if needed?
>>>>>>>>>
>>>>>>>>> Two patches above do fix the boot \o/ But have a lot of spam about
>>>>>>>>> 'signal 0' delivery to a bunch of processes:
>>>>>>>>>
>>>>>>>>> * Mounting /proc ...
>>>>>>>>> [ ok ]
>>>>>>>>> * Mounting /run ...
>>>>>>>>> * /run/openrc: creating directory
>>>>>>>>> * /run/lock: creating directory
>>>>>>>>> * /run/lock: correcting owner
>>>>>>>>> * Caching service dependencies ...
>>>>>>>>> [ ok ]
>>>>>>>>> * Mounting /sys ...
>>>>>>>>> [ ok ]
>>>>>>>>> * Mounting debug filesystem ...
>>>>>>>>> [ ok ]
>>>>>>>>> * Mounting efivarfs filesystem ...
>>>>>>>>> [ ok ]
>>>>>>>>> * sysfs: caught unknown signal 0
>>>>>>>>> * openrc: caught unknown signal 0
>>>>>>>>> * Mounting cgroup filesystem ...
>>>>>>>>
>>>>>>>> That's an improvement! Let me take a look at this tonight and see if I
>>>>>>>> can figure out what's going on. But yes, it's the ia64 signal delivery
>>>>>>>> being just different enough from the norm that it apparently triggers
>>>>>>>> some weirdness.
>>>>>>>
>>>>>>> Is this any better?
>>>>>>
>>>>>> And if that one works, can you try this basic variant?
>>>>>
>>>>> Both patches boot successfully without 'caught unknown signal 0' spam \o/
>>>>>
>>>>>> diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
>>>>>> index e67b22fc3c60..c1b299760bf7 100644
>>>>>> --- a/arch/ia64/kernel/signal.c
>>>>>> +++ b/arch/ia64/kernel/signal.c
>>>>>> @@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
>>>>>> * need to push through a forced SIGSEGV.
>>>>>> */
>>>>>> while (1) {
>>>>>> - get_signal(&ksig);
>>>>>> + if (!get_signal(&ksig))
>>>>>> + break;
>>>>>>
>>>>>> /*
>>>>>> * get_signal() may have run a debugger (via notify_parent())
>>>>>>
>>>>
>>>> Should I send the patch in `git am`-able form or the patch
>>>> already queued up in some other form?
>>>
>>> I'll be happy to queue it up. I take it you tried the above one-liner and it
>>> works for you. Just trying to get some clarity on what was tried so I can
>>> queue it up appropriately.
>>
>> Yes, 1.5 liner with
>>
>>>>>> - get_signal(&ksig);
>>>>>> + if (!get_signal(&ksig))
>>>>>> + break;
>>
>> made 5.11 boot successfully.
>>
>> Tested-by: Sergei Trofimovich <slyich@xxxxxxxxx>
>
> Awesome! Queued it up:
>
> https://git.kernel.dk/cgit/linux-block/commit/?h=misc-5.12&id=f5f4fc4649ae542b1a25670b17aaf3cbb6187acc

This is now in Linus's tree.

--
Jens Axboe