All of lore.kernel.org
 help / color / mirror / Atom feed
From: james.morse@arm.com (James Morse)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2] arm64: Introduce IRQ stack
Date: Fri, 18 Sep 2015 14:46:50 +0100	[thread overview]
Message-ID: <55FC15CA.7040009@arm.com> (raw)
In-Reply-To: <1442155337-7020-1-git-send-email-jungseoklee85@gmail.com>

Hi Jungseok Lee,

I gave this a go on a Juno board, while generating usb/network interrupts:

Tested-by: James Morse <james.morse@arm.com>


On 13/09/15 15:42, Jungseok Lee wrote:
> Currently, kernel context and interrupts are handled using a single
> kernel stack navigated by sp_el1. This forces many systems to use
> 16KB stack, not 8KB one. Low memory platforms naturally suffer from
> memory pressure accompanied by performance degradation.
> 
> This patch addresses the issue as introducing a separate percpu IRQ
> stack to handle both hard and soft interrupts with two ground rules:
> 
>   - Utilize sp_el0 in EL1 context, which is not used currently
>   - Do not complicate current_thread_info calculation
> 
> It is a core concept to trace struct thread_info using sp_el0 instead
> of sp_el1. This approach helps arm64 align with other architectures
> regarding object_is_on_stack() without additional complexity.

I think you are missing a 'mov <reg>, sp; msr sp_el0, <reg>' in
kernel/sleep.S:cpu_resume():175. This code finds the saved stack pointer
from 'sleep_save_sp', and is called when the cpu wakes up from suspend.

It didn't show up in testing, because the wake-up is always under the idle
task, which evidently doesn't call current_thread_info() after wake-up.


> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 4306c93..c156540 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -88,7 +88,7 @@
>  
>  	.if	\el == 0
>  	mrs	x21, sp_el0
> -	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
> +	get_thread_info \el, tsk		// Ensure MDSCR_EL1.SS is clear,
>  	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
>  	disable_step_tsk x19, x20		// exceptions when scheduling.
>  	.else
> @@ -105,6 +105,8 @@
>  	.if	\el == 0
>  	mvn	x21, xzr
>  	str	x21, [sp, #S_SYSCALLNO]
> +	mov	x25, sp
> +	msr	sp_el0, x25
>  	.endif
>  
>  	/*
> @@ -163,9 +165,45 @@ alternative_endif
>  	eret					// return to kernel
>  	.endm
>  
> -	.macro	get_thread_info, rd
> +	.macro	get_thread_info, el, rd
> +	.if	\el == 0

Why does \el matter here?
If \el==0, we interrupted an el0 thread, and set sp_el0 in kernel_entry()
to the el1 stack.
If \el==1, we interrupted an el1 thread, didn't overwrite its sp_el0, so
sp_el0 & ~(THREAD_SIZE-1) will give us the struct thread_info of the
interrupted task.

So either way, sp_el0 is correct...


>  	mov	\rd, sp
> -	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
> +	.else
> +	mrs	\rd, sp_el0
> +	.endif
> +	and	\rd, \rd, #~(THREAD_SIZE - 1)	// bottom of thread stack
> +	.endm
> +
> +	.macro	get_irq_stack
> +	adr_l	x21, irq_stacks
> +	mrs	x22, tpidr_el1
> +	add	x21, x21, x22
> +	.endm
> +
> +	.macro	irq_stack_entry
> +	get_irq_stack
> +	ldr	w23, [x21, #IRQ_COUNT]
> +	cbnz	w23, 1f				// check irq recursion
> +	mov	x23, sp
> +	str	x23, [x21, #IRQ_THREAD_SP]
> +	ldr	x23, [x21, #IRQ_STACK]
> +	mov	sp, x23
> +	mov	x23, xzr
> +1:	add	w23, w23, #1
> +	str	w23, [x21, #IRQ_COUNT]

A (largely untested) example for the 'compare the high-order bits' way of
doing this:

	.macro	irq_stack_entry
	get_irq_stack
	ldr	x22, [x21, #IRQ_STACK]
	and	x23, x22, #~(THREAD_SIZE -1)
	mov	x24, sp
	and	x24, x24, #~(THREAD_SIZE -1)
	cmp	x23, x24	// irq_recursion?
	mov	x24, sp
	csel	x23, x24, x22, eq
	mov	sp, x23
	.endm

	/* preserve x24 between irq_stack_entry/irq_stack_exit */

	.macro	irq_stack_exit
	mov	sp, x24
	.endm

This would let you remove IRQ_COUNT and IRQ_THREAD_SP, and avoid the two
stores and a conditional-branch in irq_stack_entry/irq_stack_exit.

Thoughts?


> +	.endm
> +
> +	.macro	irq_stack_exit
> +	get_irq_stack
> +	ldr	w23, [x21, #IRQ_COUNT]
> +	sub	w23, w23, #1
> +	cbnz	w23, 1f				// check irq recursion
> +	mov	x23, sp
> +	str	x23, [x21, #IRQ_STACK]
> +	ldr	x23, [x21, #IRQ_THREAD_SP]
> +	mov	sp, x23
> +	mov	x23, xzr
> +1:	str	w23, [x21, #IRQ_COUNT]
>  	.endm
>  
>  /*
> @@ -183,10 +221,11 @@ tsk	.req	x28		// current thread_info
>   * Interrupt handling.
>   */
>  	.macro	irq_handler
> -	adrp	x1, handle_arch_irq
> -	ldr	x1, [x1, #:lo12:handle_arch_irq]
> +	ldr_l	x1, handle_arch_irq
>  	mov	x0, sp
> +	irq_stack_entry
>  	blr	x1
> +	irq_stack_exit
>  	.endm
>  
>  	.text
> @@ -361,7 +400,7 @@ el1_irq:
>  	irq_handler
>  
>  #ifdef CONFIG_PREEMPT
> -	get_thread_info tsk
> +	get_thread_info 1, tsk
>  	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
>  	cbnz	w24, 1f				// preempt count != 0
>  	ldr	x0, [tsk, #TI_FLAGS]		// get flags
> @@ -597,6 +636,7 @@ ENTRY(cpu_switch_to)
>  	ldp	x29, x9, [x8], #16
>  	ldr	lr, [x8]
>  	mov	sp, x9
> +	msr	sp_el0, x9
>  	ret
>  ENDPROC(cpu_switch_to)
>  
> @@ -655,7 +695,7 @@ ENTRY(ret_from_fork)
>  	cbz	x19, 1f				// not a kernel thread
>  	mov	x0, x20
>  	blr	x19
> -1:	get_thread_info tsk
> +1:	get_thread_info 1, tsk
>  	b	ret_to_user
>  ENDPROC(ret_from_fork)
>  
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index a055be6..cb13290 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -441,6 +441,8 @@ __mmap_switched:
>  	b	1b
>  2:
>  	adr_l	sp, initial_sp, x4
> +	mov	x4, sp

There should probably a comment explaining why sp_el0 is being set (for the
changes outside entry.S). Something like:

	msr	sp_el0, x4		// stash struct thread_info


> +	msr	sp_el0, x4
>  	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
>  	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
>  	mov	x29, #0
> @@ -613,6 +615,7 @@ ENDPROC(secondary_startup)
>  ENTRY(__secondary_switched)
>  	ldr	x0, [x21]			// get secondary_data.stack
>  	mov	sp, x0
> +	msr	sp_el0, x0
>  	mov	x29, #0
>  	b	secondary_start_kernel
>  ENDPROC(__secondary_switched)


Thanks,

James

WARNING: multiple messages have this Message-ID (diff)
From: James Morse <james.morse@arm.com>
To: Jungseok Lee <jungseoklee85@gmail.com>
Cc: Catalin Marinas <Catalin.Marinas@arm.com>,
	Will Deacon <Will.Deacon@arm.com>,
	"linux-arm-kernel@lists.infradead.org" 
	<linux-arm-kernel@lists.infradead.org>,
	"takahiro.akashi@linaro.org" <takahiro.akashi@linaro.org>,
	Mark Rutland <Mark.Rutland@arm.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2] arm64: Introduce IRQ stack
Date: Fri, 18 Sep 2015 14:46:50 +0100	[thread overview]
Message-ID: <55FC15CA.7040009@arm.com> (raw)
In-Reply-To: <1442155337-7020-1-git-send-email-jungseoklee85@gmail.com>

Hi Jungseok Lee,

I gave this a go on a Juno board, while generating usb/network interrupts:

Tested-by: James Morse <james.morse@arm.com>


On 13/09/15 15:42, Jungseok Lee wrote:
> Currently, kernel context and interrupts are handled using a single
> kernel stack navigated by sp_el1. This forces many systems to use
> 16KB stack, not 8KB one. Low memory platforms naturally suffer from
> memory pressure accompanied by performance degradation.
> 
> This patch addresses the issue as introducing a separate percpu IRQ
> stack to handle both hard and soft interrupts with two ground rules:
> 
>   - Utilize sp_el0 in EL1 context, which is not used currently
>   - Do not complicate current_thread_info calculation
> 
> It is a core concept to trace struct thread_info using sp_el0 instead
> of sp_el1. This approach helps arm64 align with other architectures
> regarding object_is_on_stack() without additional complexity.

I think you are missing a 'mov <reg>, sp; msr sp_el0, <reg>' in
kernel/sleep.S:cpu_resume():175. This code finds the saved stack pointer
from 'sleep_save_sp', and is called when the cpu wakes up from suspend.

It didn't show up in testing, because the wake-up is always under the idle
task, which evidently doesn't call current_thread_info() after wake-up.


> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 4306c93..c156540 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -88,7 +88,7 @@
>  
>  	.if	\el == 0
>  	mrs	x21, sp_el0
> -	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
> +	get_thread_info \el, tsk		// Ensure MDSCR_EL1.SS is clear,
>  	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
>  	disable_step_tsk x19, x20		// exceptions when scheduling.
>  	.else
> @@ -105,6 +105,8 @@
>  	.if	\el == 0
>  	mvn	x21, xzr
>  	str	x21, [sp, #S_SYSCALLNO]
> +	mov	x25, sp
> +	msr	sp_el0, x25
>  	.endif
>  
>  	/*
> @@ -163,9 +165,45 @@ alternative_endif
>  	eret					// return to kernel
>  	.endm
>  
> -	.macro	get_thread_info, rd
> +	.macro	get_thread_info, el, rd
> +	.if	\el == 0

Why does \el matter here?
If \el==0, we interrupted an el0 thread, and set sp_el0 in kernel_entry()
to the el1 stack.
If \el==1, we interrupted an el1 thread, didn't overwrite its sp_el0, so
sp_el0 & ~(THREAD_SIZE-1) will give us the struct thread_info of the
interrupted task.

So either way, sp_el0 is correct...


>  	mov	\rd, sp
> -	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
> +	.else
> +	mrs	\rd, sp_el0
> +	.endif
> +	and	\rd, \rd, #~(THREAD_SIZE - 1)	// bottom of thread stack
> +	.endm
> +
> +	.macro	get_irq_stack
> +	adr_l	x21, irq_stacks
> +	mrs	x22, tpidr_el1
> +	add	x21, x21, x22
> +	.endm
> +
> +	.macro	irq_stack_entry
> +	get_irq_stack
> +	ldr	w23, [x21, #IRQ_COUNT]
> +	cbnz	w23, 1f				// check irq recursion
> +	mov	x23, sp
> +	str	x23, [x21, #IRQ_THREAD_SP]
> +	ldr	x23, [x21, #IRQ_STACK]
> +	mov	sp, x23
> +	mov	x23, xzr
> +1:	add	w23, w23, #1
> +	str	w23, [x21, #IRQ_COUNT]

A (largely untested) example for the 'compare the high-order bits' way of
doing this:

	.macro	irq_stack_entry
	get_irq_stack
	ldr	x22, [x21, #IRQ_STACK]
	and	x23, x22, #~(THREAD_SIZE -1)
	mov	x24, sp
	and	x24, x24, #~(THREAD_SIZE -1)
	cmp	x23, x24	// irq_recursion?
	mov	x24, sp
	csel	x23, x24, x22, eq
	mov	sp, x23
	.endm

	/* preserve x24 between irq_stack_entry/irq_stack_exit */

	.macro	irq_stack_exit
	mov	sp, x24
	.endm

This would let you remove IRQ_COUNT and IRQ_THREAD_SP, and avoid the two
stores and a conditional-branch in irq_stack_entry/irq_stack_exit.

Thoughts?


> +	.endm
> +
> +	.macro	irq_stack_exit
> +	get_irq_stack
> +	ldr	w23, [x21, #IRQ_COUNT]
> +	sub	w23, w23, #1
> +	cbnz	w23, 1f				// check irq recursion
> +	mov	x23, sp
> +	str	x23, [x21, #IRQ_STACK]
> +	ldr	x23, [x21, #IRQ_THREAD_SP]
> +	mov	sp, x23
> +	mov	x23, xzr
> +1:	str	w23, [x21, #IRQ_COUNT]
>  	.endm
>  
>  /*
> @@ -183,10 +221,11 @@ tsk	.req	x28		// current thread_info
>   * Interrupt handling.
>   */
>  	.macro	irq_handler
> -	adrp	x1, handle_arch_irq
> -	ldr	x1, [x1, #:lo12:handle_arch_irq]
> +	ldr_l	x1, handle_arch_irq
>  	mov	x0, sp
> +	irq_stack_entry
>  	blr	x1
> +	irq_stack_exit
>  	.endm
>  
>  	.text
> @@ -361,7 +400,7 @@ el1_irq:
>  	irq_handler
>  
>  #ifdef CONFIG_PREEMPT
> -	get_thread_info tsk
> +	get_thread_info 1, tsk
>  	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
>  	cbnz	w24, 1f				// preempt count != 0
>  	ldr	x0, [tsk, #TI_FLAGS]		// get flags
> @@ -597,6 +636,7 @@ ENTRY(cpu_switch_to)
>  	ldp	x29, x9, [x8], #16
>  	ldr	lr, [x8]
>  	mov	sp, x9
> +	msr	sp_el0, x9
>  	ret
>  ENDPROC(cpu_switch_to)
>  
> @@ -655,7 +695,7 @@ ENTRY(ret_from_fork)
>  	cbz	x19, 1f				// not a kernel thread
>  	mov	x0, x20
>  	blr	x19
> -1:	get_thread_info tsk
> +1:	get_thread_info 1, tsk
>  	b	ret_to_user
>  ENDPROC(ret_from_fork)
>  
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index a055be6..cb13290 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -441,6 +441,8 @@ __mmap_switched:
>  	b	1b
>  2:
>  	adr_l	sp, initial_sp, x4
> +	mov	x4, sp

There should probably a comment explaining why sp_el0 is being set (for the
changes outside entry.S). Something like:

	msr	sp_el0, x4		// stash struct thread_info


> +	msr	sp_el0, x4
>  	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
>  	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
>  	mov	x29, #0
> @@ -613,6 +615,7 @@ ENDPROC(secondary_startup)
>  ENTRY(__secondary_switched)
>  	ldr	x0, [x21]			// get secondary_data.stack
>  	mov	sp, x0
> +	msr	sp_el0, x0
>  	mov	x29, #0
>  	b	secondary_start_kernel
>  ENDPROC(__secondary_switched)


Thanks,

James

  parent reply	other threads:[~2015-09-18 13:46 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-13 14:42 [PATCH v2] arm64: Introduce IRQ stack Jungseok Lee
2015-09-13 14:42 ` Jungseok Lee
2015-09-16 11:25 ` Will Deacon
2015-09-16 11:25   ` Will Deacon
2015-09-17 10:33   ` James Morse
2015-09-17 10:33     ` James Morse
2015-09-17 10:48     ` Catalin Marinas
2015-09-17 10:48       ` Catalin Marinas
2015-09-17 12:36     ` Jungseok Lee
2015-09-17 12:36       ` Jungseok Lee
2015-09-17 17:07       ` Catalin Marinas
2015-09-17 17:07         ` Catalin Marinas
2015-09-18 13:02         ` Jungseok Lee
2015-09-18 13:02           ` Jungseok Lee
2015-09-17 11:17 ` Catalin Marinas
2015-09-17 11:17   ` Catalin Marinas
2015-09-17 13:17   ` Jungseok Lee
2015-09-17 13:17     ` Jungseok Lee
2015-09-17 13:22     ` Jungseok Lee
2015-09-17 13:22       ` Jungseok Lee
2015-09-17 16:21       ` Catalin Marinas
2015-09-17 16:21         ` Catalin Marinas
2015-09-18 12:57         ` Jungseok Lee
2015-09-18 12:57           ` Jungseok Lee
2015-09-18 13:44           ` James Morse
2015-09-18 13:44             ` James Morse
2015-09-18 15:03           ` Catalin Marinas
2015-09-18 15:03             ` Catalin Marinas
2015-09-18 15:31             ` Catalin Marinas
2015-09-18 15:31               ` Catalin Marinas
2015-09-19  8:44               ` Jungseok Lee
2015-09-19  8:44                 ` Jungseok Lee
2015-09-21  9:25                 ` Catalin Marinas
2015-09-21  9:25                   ` Catalin Marinas
2015-09-21 12:14                   ` Jungseok Lee
2015-09-21 12:14                     ` Jungseok Lee
2015-09-18 13:46 ` James Morse [this message]
2015-09-18 13:46   ` James Morse
2015-09-19  8:20   ` Jungseok Lee
2015-09-19  8:20     ` Jungseok Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55FC15CA.7040009@arm.com \
    --to=james.morse@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.