From: james.morse@arm.com (James Morse)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2] arm64: Introduce IRQ stack
Date: Fri, 18 Sep 2015 14:46:50 +0100 [thread overview]
Message-ID: <55FC15CA.7040009@arm.com> (raw)
In-Reply-To: <1442155337-7020-1-git-send-email-jungseoklee85@gmail.com>
Hi Jungseok Lee,
I gave this a go on a Juno board, while generating usb/network interrupts:
Tested-by: James Morse <james.morse@arm.com>
On 13/09/15 15:42, Jungseok Lee wrote:
> Currently, kernel context and interrupts are handled using a single
> kernel stack navigated by sp_el1. This forces many systems to use
> 16KB stack, not 8KB one. Low memory platforms naturally suffer from
> memory pressure accompanied by performance degradation.
>
> This patch addresses the issue as introducing a separate percpu IRQ
> stack to handle both hard and soft interrupts with two ground rules:
>
> - Utilize sp_el0 in EL1 context, which is not used currently
> - Do not complicate current_thread_info calculation
>
> It is a core concept to trace struct thread_info using sp_el0 instead
> of sp_el1. This approach helps arm64 align with other architectures
> regarding object_is_on_stack() without additional complexity.
I think you are missing a 'mov <reg>, sp; msr sp_el0, <reg>' in
kernel/sleep.S:cpu_resume():175. This code finds the saved stack pointer
from 'sleep_save_sp', and is called when the cpu wakes up from suspend.
It didn't show up in testing, because the wake-up is always under the idle
task, which evidently doesn't call current_thread_info() after wake-up.
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 4306c93..c156540 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -88,7 +88,7 @@
>
> .if \el == 0
> mrs x21, sp_el0
> - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
> + get_thread_info \el, tsk // Ensure MDSCR_EL1.SS is clear,
> ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
> disable_step_tsk x19, x20 // exceptions when scheduling.
> .else
> @@ -105,6 +105,8 @@
> .if \el == 0
> mvn x21, xzr
> str x21, [sp, #S_SYSCALLNO]
> + mov x25, sp
> + msr sp_el0, x25
> .endif
>
> /*
> @@ -163,9 +165,45 @@ alternative_endif
> eret // return to kernel
> .endm
>
> - .macro get_thread_info, rd
> + .macro get_thread_info, el, rd
> + .if \el == 0
Why does \el matter here?
If \el==0, we interrupted an el0 thread, and set sp_el0 in kernel_entry()
to the el1 stack.
If \el==1, we interrupted an el1 thread, didn't overwrite its sp_el0, so
sp_el0 & ~(THREAD_SIZE-1) will give us the struct thread_info of the
interrupted task.
So either way, sp_el0 is correct...
> mov \rd, sp
> - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack
> + .else
> + mrs \rd, sp_el0
> + .endif
> + and \rd, \rd, #~(THREAD_SIZE - 1) // bottom of thread stack
> + .endm
> +
> + .macro get_irq_stack
> + adr_l x21, irq_stacks
> + mrs x22, tpidr_el1
> + add x21, x21, x22
> + .endm
> +
> + .macro irq_stack_entry
> + get_irq_stack
> + ldr w23, [x21, #IRQ_COUNT]
> + cbnz w23, 1f // check irq recursion
> + mov x23, sp
> + str x23, [x21, #IRQ_THREAD_SP]
> + ldr x23, [x21, #IRQ_STACK]
> + mov sp, x23
> + mov x23, xzr
> +1: add w23, w23, #1
> + str w23, [x21, #IRQ_COUNT]
A (largely untested) example for the 'compare the high-order bits' way of
doing this:
.macro irq_stack_entry
get_irq_stack
ldr x22, [x21, #IRQ_STACK]
and x23, x22, #~(THREAD_SIZE -1)
mov x24, sp
and x24, x24, #~(THREAD_SIZE -1)
cmp x23, x24 // irq_recursion?
mov x24, sp
csel x23, x24, x22, eq
mov sp, x23
.endm
/* preserve x24 between irq_stack_entry/irq_stack_exit */
.macro irq_stack_exit
mov sp, x24
.endm
This would let you remove IRQ_COUNT and IRQ_THREAD_SP, and avoid the two
stores and a conditional-branch in irq_stack_entry/irq_stack_exit.
Thoughts?
> + .endm
> +
> + .macro irq_stack_exit
> + get_irq_stack
> + ldr w23, [x21, #IRQ_COUNT]
> + sub w23, w23, #1
> + cbnz w23, 1f // check irq recursion
> + mov x23, sp
> + str x23, [x21, #IRQ_STACK]
> + ldr x23, [x21, #IRQ_THREAD_SP]
> + mov sp, x23
> + mov x23, xzr
> +1: str w23, [x21, #IRQ_COUNT]
> .endm
>
> /*
> @@ -183,10 +221,11 @@ tsk .req x28 // current thread_info
> * Interrupt handling.
> */
> .macro irq_handler
> - adrp x1, handle_arch_irq
> - ldr x1, [x1, #:lo12:handle_arch_irq]
> + ldr_l x1, handle_arch_irq
> mov x0, sp
> + irq_stack_entry
> blr x1
> + irq_stack_exit
> .endm
>
> .text
> @@ -361,7 +400,7 @@ el1_irq:
> irq_handler
>
> #ifdef CONFIG_PREEMPT
> - get_thread_info tsk
> + get_thread_info 1, tsk
> ldr w24, [tsk, #TI_PREEMPT] // get preempt count
> cbnz w24, 1f // preempt count != 0
> ldr x0, [tsk, #TI_FLAGS] // get flags
> @@ -597,6 +636,7 @@ ENTRY(cpu_switch_to)
> ldp x29, x9, [x8], #16
> ldr lr, [x8]
> mov sp, x9
> + msr sp_el0, x9
> ret
> ENDPROC(cpu_switch_to)
>
> @@ -655,7 +695,7 @@ ENTRY(ret_from_fork)
> cbz x19, 1f // not a kernel thread
> mov x0, x20
> blr x19
> -1: get_thread_info tsk
> +1: get_thread_info 1, tsk
> b ret_to_user
> ENDPROC(ret_from_fork)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index a055be6..cb13290 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -441,6 +441,8 @@ __mmap_switched:
> b 1b
> 2:
> adr_l sp, initial_sp, x4
> + mov x4, sp
There should probably a comment explaining why sp_el0 is being set (for the
changes outside entry.S). Something like:
msr sp_el0, x4 // stash struct thread_info
> + msr sp_el0, x4
> str_l x21, __fdt_pointer, x5 // Save FDT pointer
> str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
> mov x29, #0
> @@ -613,6 +615,7 @@ ENDPROC(secondary_startup)
> ENTRY(__secondary_switched)
> ldr x0, [x21] // get secondary_data.stack
> mov sp, x0
> + msr sp_el0, x0
> mov x29, #0
> b secondary_start_kernel
> ENDPROC(__secondary_switched)
Thanks,
James
WARNING: multiple messages have this Message-ID (diff)
From: James Morse <james.morse@arm.com>
To: Jungseok Lee <jungseoklee85@gmail.com>
Cc: Catalin Marinas <Catalin.Marinas@arm.com>,
Will Deacon <Will.Deacon@arm.com>,
"linux-arm-kernel@lists.infradead.org"
<linux-arm-kernel@lists.infradead.org>,
"takahiro.akashi@linaro.org" <takahiro.akashi@linaro.org>,
Mark Rutland <Mark.Rutland@arm.com>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2] arm64: Introduce IRQ stack
Date: Fri, 18 Sep 2015 14:46:50 +0100 [thread overview]
Message-ID: <55FC15CA.7040009@arm.com> (raw)
In-Reply-To: <1442155337-7020-1-git-send-email-jungseoklee85@gmail.com>
Hi Jungseok Lee,
I gave this a go on a Juno board, while generating usb/network interrupts:
Tested-by: James Morse <james.morse@arm.com>
On 13/09/15 15:42, Jungseok Lee wrote:
> Currently, kernel context and interrupts are handled using a single
> kernel stack navigated by sp_el1. This forces many systems to use
> 16KB stack, not 8KB one. Low memory platforms naturally suffer from
> memory pressure accompanied by performance degradation.
>
> This patch addresses the issue as introducing a separate percpu IRQ
> stack to handle both hard and soft interrupts with two ground rules:
>
> - Utilize sp_el0 in EL1 context, which is not used currently
> - Do not complicate current_thread_info calculation
>
> It is a core concept to trace struct thread_info using sp_el0 instead
> of sp_el1. This approach helps arm64 align with other architectures
> regarding object_is_on_stack() without additional complexity.
I think you are missing a 'mov <reg>, sp; msr sp_el0, <reg>' in
kernel/sleep.S:cpu_resume():175. This code finds the saved stack pointer
from 'sleep_save_sp', and is called when the cpu wakes up from suspend.
It didn't show up in testing, because the wake-up is always under the idle
task, which evidently doesn't call current_thread_info() after wake-up.
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 4306c93..c156540 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -88,7 +88,7 @@
>
> .if \el == 0
> mrs x21, sp_el0
> - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
> + get_thread_info \el, tsk // Ensure MDSCR_EL1.SS is clear,
> ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
> disable_step_tsk x19, x20 // exceptions when scheduling.
> .else
> @@ -105,6 +105,8 @@
> .if \el == 0
> mvn x21, xzr
> str x21, [sp, #S_SYSCALLNO]
> + mov x25, sp
> + msr sp_el0, x25
> .endif
>
> /*
> @@ -163,9 +165,45 @@ alternative_endif
> eret // return to kernel
> .endm
>
> - .macro get_thread_info, rd
> + .macro get_thread_info, el, rd
> + .if \el == 0
Why does \el matter here?
If \el==0, we interrupted an el0 thread, and set sp_el0 in kernel_entry()
to the el1 stack.
If \el==1, we interrupted an el1 thread, didn't overwrite its sp_el0, so
sp_el0 & ~(THREAD_SIZE-1) will give us the struct thread_info of the
interrupted task.
So either way, sp_el0 is correct...
> mov \rd, sp
> - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack
> + .else
> + mrs \rd, sp_el0
> + .endif
> + and \rd, \rd, #~(THREAD_SIZE - 1) // bottom of thread stack
> + .endm
> +
> + .macro get_irq_stack
> + adr_l x21, irq_stacks
> + mrs x22, tpidr_el1
> + add x21, x21, x22
> + .endm
> +
> + .macro irq_stack_entry
> + get_irq_stack
> + ldr w23, [x21, #IRQ_COUNT]
> + cbnz w23, 1f // check irq recursion
> + mov x23, sp
> + str x23, [x21, #IRQ_THREAD_SP]
> + ldr x23, [x21, #IRQ_STACK]
> + mov sp, x23
> + mov x23, xzr
> +1: add w23, w23, #1
> + str w23, [x21, #IRQ_COUNT]
A (largely untested) example for the 'compare the high-order bits' way of
doing this:
.macro irq_stack_entry
get_irq_stack
ldr x22, [x21, #IRQ_STACK]
and x23, x22, #~(THREAD_SIZE -1)
mov x24, sp
and x24, x24, #~(THREAD_SIZE -1)
cmp x23, x24 // irq_recursion?
mov x24, sp
csel x23, x24, x22, eq
mov sp, x23
.endm
/* preserve x24 between irq_stack_entry/irq_stack_exit */
.macro irq_stack_exit
mov sp, x24
.endm
This would let you remove IRQ_COUNT and IRQ_THREAD_SP, and avoid the two
stores and a conditional-branch in irq_stack_entry/irq_stack_exit.
Thoughts?
> + .endm
> +
> + .macro irq_stack_exit
> + get_irq_stack
> + ldr w23, [x21, #IRQ_COUNT]
> + sub w23, w23, #1
> + cbnz w23, 1f // check irq recursion
> + mov x23, sp
> + str x23, [x21, #IRQ_STACK]
> + ldr x23, [x21, #IRQ_THREAD_SP]
> + mov sp, x23
> + mov x23, xzr
> +1: str w23, [x21, #IRQ_COUNT]
> .endm
>
> /*
> @@ -183,10 +221,11 @@ tsk .req x28 // current thread_info
> * Interrupt handling.
> */
> .macro irq_handler
> - adrp x1, handle_arch_irq
> - ldr x1, [x1, #:lo12:handle_arch_irq]
> + ldr_l x1, handle_arch_irq
> mov x0, sp
> + irq_stack_entry
> blr x1
> + irq_stack_exit
> .endm
>
> .text
> @@ -361,7 +400,7 @@ el1_irq:
> irq_handler
>
> #ifdef CONFIG_PREEMPT
> - get_thread_info tsk
> + get_thread_info 1, tsk
> ldr w24, [tsk, #TI_PREEMPT] // get preempt count
> cbnz w24, 1f // preempt count != 0
> ldr x0, [tsk, #TI_FLAGS] // get flags
> @@ -597,6 +636,7 @@ ENTRY(cpu_switch_to)
> ldp x29, x9, [x8], #16
> ldr lr, [x8]
> mov sp, x9
> + msr sp_el0, x9
> ret
> ENDPROC(cpu_switch_to)
>
> @@ -655,7 +695,7 @@ ENTRY(ret_from_fork)
> cbz x19, 1f // not a kernel thread
> mov x0, x20
> blr x19
> -1: get_thread_info tsk
> +1: get_thread_info 1, tsk
> b ret_to_user
> ENDPROC(ret_from_fork)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index a055be6..cb13290 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -441,6 +441,8 @@ __mmap_switched:
> b 1b
> 2:
> adr_l sp, initial_sp, x4
> + mov x4, sp
There should probably a comment explaining why sp_el0 is being set (for the
changes outside entry.S). Something like:
msr sp_el0, x4 // stash struct thread_info
> + msr sp_el0, x4
> str_l x21, __fdt_pointer, x5 // Save FDT pointer
> str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
> mov x29, #0
> @@ -613,6 +615,7 @@ ENDPROC(secondary_startup)
> ENTRY(__secondary_switched)
> ldr x0, [x21] // get secondary_data.stack
> mov sp, x0
> + msr sp_el0, x0
> mov x29, #0
> b secondary_start_kernel
> ENDPROC(__secondary_switched)
Thanks,
James
next prev parent reply other threads:[~2015-09-18 13:46 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-13 14:42 [PATCH v2] arm64: Introduce IRQ stack Jungseok Lee
2015-09-13 14:42 ` Jungseok Lee
2015-09-16 11:25 ` Will Deacon
2015-09-16 11:25 ` Will Deacon
2015-09-17 10:33 ` James Morse
2015-09-17 10:33 ` James Morse
2015-09-17 10:48 ` Catalin Marinas
2015-09-17 10:48 ` Catalin Marinas
2015-09-17 12:36 ` Jungseok Lee
2015-09-17 12:36 ` Jungseok Lee
2015-09-17 17:07 ` Catalin Marinas
2015-09-17 17:07 ` Catalin Marinas
2015-09-18 13:02 ` Jungseok Lee
2015-09-18 13:02 ` Jungseok Lee
2015-09-17 11:17 ` Catalin Marinas
2015-09-17 11:17 ` Catalin Marinas
2015-09-17 13:17 ` Jungseok Lee
2015-09-17 13:17 ` Jungseok Lee
2015-09-17 13:22 ` Jungseok Lee
2015-09-17 13:22 ` Jungseok Lee
2015-09-17 16:21 ` Catalin Marinas
2015-09-17 16:21 ` Catalin Marinas
2015-09-18 12:57 ` Jungseok Lee
2015-09-18 12:57 ` Jungseok Lee
2015-09-18 13:44 ` James Morse
2015-09-18 13:44 ` James Morse
2015-09-18 15:03 ` Catalin Marinas
2015-09-18 15:03 ` Catalin Marinas
2015-09-18 15:31 ` Catalin Marinas
2015-09-18 15:31 ` Catalin Marinas
2015-09-19 8:44 ` Jungseok Lee
2015-09-19 8:44 ` Jungseok Lee
2015-09-21 9:25 ` Catalin Marinas
2015-09-21 9:25 ` Catalin Marinas
2015-09-21 12:14 ` Jungseok Lee
2015-09-21 12:14 ` Jungseok Lee
2015-09-18 13:46 ` James Morse [this message]
2015-09-18 13:46 ` James Morse
2015-09-19 8:20 ` Jungseok Lee
2015-09-19 8:20 ` Jungseok Lee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55FC15CA.7040009@arm.com \
--to=james.morse@arm.com \
--cc=linux-arm-kernel@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.