linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v8 0/3] Cortex-M3 series once more
@ 2013-01-17  8:59 Uwe Kleine-König
  2013-01-17  8:59 ` [PATCH v8 1/3] ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15 Uwe Kleine-König
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Uwe Kleine-König @ 2013-01-17  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

Based on Jonny's comments I changed a few more details. The diff to v7
can be found below after the diffstat. These are:

 - describe why a #define for cr_alignment is OK in both the
   commit log and a comment in the header file
 - a work around for errata 752419 that newer gcc warns about
 - add a few more comments for better readability
 - a whitespace fix

I think the updates are minor enough to still allow the patches to go
into v3.9-rc1. Russell, what do you think? You can pull them from

	git://git.pengutronix.de/git/ukl/linux.git for-next

still based on v3.7-rc1 + your commit "ARM: fix oops on initial entry to
userspace with Thumb2 kernels".

Catalin Marinas (1):
  Cortex-M3: Add base support for Cortex-M3

Uwe Kleine-K?nig (2):
  ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15
  Cortex-M3: Add support for exception handling

 arch/arm/include/asm/assembler.h   |   13 ++-
 arch/arm/include/asm/cp15.h        |   16 +++-
 arch/arm/include/asm/cputype.h     |    3 +
 arch/arm/include/asm/glue-cache.h  |   25 ++++++
 arch/arm/include/asm/glue-df.h     |    8 ++
 arch/arm/include/asm/glue-proc.h   |    9 ++
 arch/arm/include/asm/irqflags.h    |   22 +++--
 arch/arm/include/asm/processor.h   |    7 ++
 arch/arm/include/asm/ptrace.h      |    8 ++
 arch/arm/include/asm/system_info.h |    1 +
 arch/arm/include/uapi/asm/ptrace.h |   36 ++++++--
 arch/arm/kernel/asm-offsets.c      |    3 +
 arch/arm/kernel/entry-common.S     |    4 +
 arch/arm/kernel/entry-header.S     |  148 +++++++++++++++++++++++++++++++++
 arch/arm/kernel/entry-v7m.S        |  134 ++++++++++++++++++++++++++++++
 arch/arm/kernel/head-common.S      |    9 +-
 arch/arm/kernel/head-nommu.S       |    9 +-
 arch/arm/kernel/process.c          |    4 +
 arch/arm/kernel/ptrace.c           |    3 +
 arch/arm/kernel/setup.c            |   13 ++-
 arch/arm/kernel/traps.c            |    2 +
 arch/arm/mm/alignment.c            |    2 +
 arch/arm/mm/mmu.c                  |   17 ++++
 arch/arm/mm/nommu.c                |    2 +
 arch/arm/mm/proc-v7m.S             |  161 ++++++++++++++++++++++++++++++++++++
 25 files changed, 637 insertions(+), 22 deletions(-)
 create mode 100644 arch/arm/kernel/entry-v7m.S
 create mode 100644 arch/arm/mm/proc-v7m.S

The incremental changes since v7 are:

diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index d814435..1f3262e 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -86,6 +86,11 @@ static inline void set_copro_access(unsigned int val)
 
 #else /* ifdef CONFIG_CPU_CP15 */
 
+/*
+ * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the
+ * minds of the developers). Yielding 0 for machines without a cp15 (and making
+ * it read-only) is fine for most cases and saves quite some #ifdeffery.
+ */
 #define cr_no_alignment	UL(0)
 #define cr_alignment	UL(0)
 
diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
index 2ae7d1b..d3be66e 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -144,7 +144,7 @@ struct pt_regs {
 #define ARM_r1		uregs[1]
 #define ARM_r0		uregs[0]
 #define ARM_ORIG_r0	uregs[17]
-#define ARM_EXC_RET    uregs[18]
+#define ARM_EXC_RET	uregs[18]
 
 /*
  * The size of the user-visible VFP state as seen by PTRACE_GET/SETVFPREGS
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
index a0991dc..842394c 100644
--- a/arch/arm/kernel/entry-v7m.S
+++ b/arch/arm/kernel/entry-v7m.S
@@ -102,8 +102,8 @@ ENTRY(__switch_to)
 	mov	ip, r4
 	mov	r0, r5
 	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
-	ldr	sp, [ip], #4
-	ldr	pc, [ip]
+	ldr	sp, [ip]
+	ldr	pc, [ip, #4]!
 	.fnend
 ENDPROC(__switch_to)
 
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 2f560c5..5b391a6 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -117,7 +117,7 @@ __mmap_switched_data:
 #ifdef CONFIG_CPU_CP15
 	.long	cr_alignment			@ r7
 #else
-	.long	0
+	.long	0				@ r7
 #endif
 	.long	init_thread_union + THREAD_START_SP @ sp
 	.size	__mmap_switched_data, . - __mmap_switched_data
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b675918..92abdb8 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -196,7 +196,7 @@ void adjust_cr(unsigned long mask, unsigned long set)
 }
 #endif
 
-#else
+#else /* ifdef CONFIG_CPU_CP15 */
 
 static int __init early_cachepolicy(char *p)
 {
@@ -210,7 +210,7 @@ static int __init noalign_setup(char *__unused)
 }
 __setup("noalign", noalign_setup);
 
-#endif
+#endif /* ifdef CONFIG_CPU_CP15 / else */
 
 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE

-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v8 1/3] ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15
  2013-01-17  8:59 [PATCH v8 0/3] Cortex-M3 series once more Uwe Kleine-König
@ 2013-01-17  8:59 ` Uwe Kleine-König
  2013-01-17  8:59 ` [PATCH v8 2/3] Cortex-M3: Add base support for Cortex-M3 Uwe Kleine-König
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Uwe Kleine-König @ 2013-01-17  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

This makes cr_alignment a constant 0 to break code that tries to modify
the value as it's likely that it's built on wrong assumption when
CONFIG_CPU_CP15 isn't defined. For code that is only reading the value 0
is more or less a fine value to report. As suggested by Nicolas this is
cleaner than to #ifdef out all uses.

Signed-off-by: Uwe Kleine-K?nig <u.kleine-koenig@pengutronix.de>
---
 arch/arm/include/asm/cp15.h   |   16 +++++++++++++++-
 arch/arm/kernel/head-common.S |    9 +++++++--
 arch/arm/mm/alignment.c       |    2 ++
 arch/arm/mm/mmu.c             |   17 +++++++++++++++++
 4 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 5ef4d80..1f3262e 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -42,6 +42,8 @@
 #define vectors_high()	(0)
 #endif
 
+#ifdef CONFIG_CPU_CP15
+
 extern unsigned long cr_no_alignment;	/* defined in entry-armv.S */
 extern unsigned long cr_alignment;	/* defined in entry-armv.S */
 
@@ -82,6 +84,18 @@ static inline void set_copro_access(unsigned int val)
 	isb();
 }
 
-#endif
+#else /* ifdef CONFIG_CPU_CP15 */
+
+/*
+ * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the
+ * minds of the developers). Yielding 0 for machines without a cp15 (and making
+ * it read-only) is fine for most cases and saves quite some #ifdeffery.
+ */
+#define cr_no_alignment	UL(0)
+#define cr_alignment	UL(0)
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
+
+#endif /* ifndef __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 854bd22..5b391a6 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -98,8 +98,9 @@ __mmap_switched:
 	str	r9, [r4]			@ Save processor ID
 	str	r1, [r5]			@ Save machine type
 	str	r2, [r6]			@ Save atags pointer
-	bic	r4, r0, #CR_A			@ Clear 'A' bit
-	stmia	r7, {r0, r4}			@ Save control register values
+	cmp	r7, #0
+	bicne	r4, r0, #CR_A			@ Clear 'A' bit
+	stmneia	r7, {r0, r4}			@ Save control register values
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
@@ -113,7 +114,11 @@ __mmap_switched_data:
 	.long	processor_id			@ r4
 	.long	__machine_arch_type		@ r5
 	.long	__atags_pointer			@ r6
+#ifdef CONFIG_CPU_CP15
 	.long	cr_alignment			@ r7
+#else
+	.long	0				@ r7
+#endif
 	.long	init_thread_union + THREAD_START_SP @ sp
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b9f60eb..5748094 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -962,12 +962,14 @@ static int __init alignment_init(void)
 		return -ENOMEM;
 #endif
 
+#ifdef CONFIG_CPU_CP15
 	if (cpu_is_v6_unaligned()) {
 		cr_alignment &= ~CR_A;
 		cr_no_alignment &= ~CR_A;
 		set_cr(cr_alignment);
 		ai_usermode = safe_usermode(ai_usermode, false);
 	}
+#endif
 
 	hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,
 			"alignment exception");
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 941dfb9..92abdb8 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -97,6 +97,7 @@ static struct cachepolicy cache_policies[] __initdata = {
 	}
 };
 
+#ifdef CONFIG_CPU_CP15
 /*
  * These are useful for identifying cache coherency
  * problems by allowing the cache or the cache and
@@ -195,6 +196,22 @@ void adjust_cr(unsigned long mask, unsigned long set)
 }
 #endif
 
+#else /* ifdef CONFIG_CPU_CP15 */
+
+static int __init early_cachepolicy(char *p)
+{
+	pr_warning("cachepolicy kernel parameter not supported without cp15\n");
+}
+early_param("cachepolicy", early_cachepolicy);
+
+static int __init noalign_setup(char *__unused)
+{
+	pr_warning("noalign kernel parameter not supported without cp15\n");
+}
+__setup("noalign", noalign_setup);
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
+
 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v8 2/3] Cortex-M3: Add base support for Cortex-M3
  2013-01-17  8:59 [PATCH v8 0/3] Cortex-M3 series once more Uwe Kleine-König
  2013-01-17  8:59 ` [PATCH v8 1/3] ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15 Uwe Kleine-König
@ 2013-01-17  8:59 ` Uwe Kleine-König
  2013-01-18 18:23   ` Jonathan Austin
  2013-01-17  8:59 ` [PATCH v8 3/3] Cortex-M3: Add support for exception handling Uwe Kleine-König
  2013-01-24 18:46 ` [PATCH v8 0/3] Cortex-M3 series once more Jonathan Austin
  3 siblings, 1 reply; 7+ messages in thread
From: Uwe Kleine-König @ 2013-01-17  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch adds the base support for the Cortex-M3 processor (ARMv7-M
architecture). It consists of the corresponding arch/arm/mm/ files and
various #ifdef's around the kernel. Exception handling is implemented by
a subsequent patch.

[ukleinek: squash in some changes originating from commit

b5717ba (Cortex-M3: Add support for the Microcontroller Prototyping System)

from the v2.6.33-arm1 patch stack, port to post 3.6, drop zImage
support, drop reorganisation of pt_regs, assert CONFIG_V7M doesn't leak
into installed headers and a few cosmetic changes]

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Uwe Kleine-K?nig <u.kleine-koenig@pengutronix.de>
---
 arch/arm/include/asm/assembler.h   |   13 ++-
 arch/arm/include/asm/cputype.h     |    3 +
 arch/arm/include/asm/glue-cache.h  |   25 ++++++
 arch/arm/include/asm/glue-df.h     |    8 ++
 arch/arm/include/asm/glue-proc.h   |    9 ++
 arch/arm/include/asm/irqflags.h    |   22 +++--
 arch/arm/include/asm/processor.h   |    7 ++
 arch/arm/include/asm/ptrace.h      |    8 ++
 arch/arm/include/asm/system_info.h |    1 +
 arch/arm/include/uapi/asm/ptrace.h |   36 ++++++--
 arch/arm/kernel/asm-offsets.c      |    3 +
 arch/arm/kernel/head-nommu.S       |    9 +-
 arch/arm/kernel/setup.c            |   13 ++-
 arch/arm/kernel/traps.c            |    2 +
 arch/arm/mm/nommu.c                |    2 +
 arch/arm/mm/proc-v7m.S             |  161 ++++++++++++++++++++++++++++++++++++
 16 files changed, 303 insertions(+), 19 deletions(-)
 create mode 100644 arch/arm/mm/proc-v7m.S

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 2ef9581..ab7c02c 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -136,7 +136,11 @@
  * assumes FIQs are enabled, and that the processor is in SVC mode.
  */
 	.macro	save_and_disable_irqs, oldcpsr
+#ifdef CONFIG_CPU_V7M
+	mrs	\oldcpsr, primask
+#else
 	mrs	\oldcpsr, cpsr
+#endif
 	disable_irq
 	.endm
 
@@ -150,7 +154,11 @@
  * guarantee that this will preserve the flags.
  */
 	.macro	restore_irqs_notrace, oldcpsr
+#ifdef CONFIG_CPU_V7M
+	msr	primask, \oldcpsr
+#else
 	msr	cpsr_c, \oldcpsr
+#endif
 	.endm
 
 	.macro restore_irqs, oldcpsr
@@ -229,7 +237,10 @@
 #endif
 	.endm
 
-#ifdef CONFIG_THUMB2_KERNEL
+#if defined(CONFIG_CPU_V7M)
+	.macro	setmode, mode, reg
+	.endm
+#elif defined(CONFIG_THUMB2_KERNEL)
 	.macro	setmode, mode, reg
 	mov	\reg, #\mode
 	msr	cpsr_c, \reg
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index cb47d28..5bd8cb6 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -46,6 +46,9 @@ extern unsigned int processor_id;
 		    : "cc");						\
 		__val;							\
 	})
+#elif defined(CONFIG_CPU_V7M)
+#define read_cpuid(reg) (*(unsigned int *)0xe000ed00)
+#define read_cpuid_ext(reg) 0
 #else
 #define read_cpuid(reg) (processor_id)
 #define read_cpuid_ext(reg) 0
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index cca9f15..ea98658 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -125,10 +125,35 @@
 # endif
 #endif
 
+#if defined(CONFIG_CPU_V7M)
+# ifdef _CACHE
+#  error "Multi-cache not supported on ARMv7-M"
+# else
+#  define _CACHE nop
+# endif
+#endif
+
 #if !defined(_CACHE) && !defined(MULTI_CACHE)
 #error Unknown cache maintenance model
 #endif
 
+#ifndef __ASSEMBLER__
+static inline void nop_flush_icache_all(void) { }
+static inline void nop_flush_kern_cache_all(void) { }
+static inline void nop_flush_kern_cache_louis(void) { }
+static inline void nop_flush_user_cache_all(void) { }
+static inline void nop_flush_user_cache_range(unsigned long a, unsigned long b, unsigned int c) { }
+
+static inline void nop_coherent_kern_range(unsigned long a, unsigned long b) { }
+static inline int nop_coherent_user_range(unsigned long a, unsigned long b) { return 0; }
+static inline void nop_flush_kern_dcache_area(void *a, size_t s) { }
+
+static inline void nop_dma_flush_range(const void *a, const void *b) { }
+
+static inline void nop_dma_map_area(const void *s, size_t l, int f) { }
+static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { }
+#endif
+
 #ifndef MULTI_CACHE
 #define __cpuc_flush_icache_all		__glue(_CACHE,_flush_icache_all)
 #define __cpuc_flush_kern_all		__glue(_CACHE,_flush_kern_cache_all)
diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h
index 8cacbcd..1f2339c 100644
--- a/arch/arm/include/asm/glue-df.h
+++ b/arch/arm/include/asm/glue-df.h
@@ -95,6 +95,14 @@
 # endif
 #endif
 
+#ifdef CONFIG_CPU_ABRT_NOMMU
+# ifdef CPU_DABORT_HANDLER
+#  define MULTI_DABORT 1
+# else
+#  define CPU_DABORT_HANDLER nommu_early_abort
+# endif
+#endif
+
 #ifndef CPU_DABORT_HANDLER
 #error Unknown data abort handler type
 #endif
diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
index ac1dd54..f2f39bc 100644
--- a/arch/arm/include/asm/glue-proc.h
+++ b/arch/arm/include/asm/glue-proc.h
@@ -230,6 +230,15 @@
 # endif
 #endif
 
+#ifdef CONFIG_CPU_V7M
+# ifdef CPU_NAME
+#  undef  MULTI_CPU
+#  define MULTI_CPU
+# else
+#  define CPU_NAME cpu_v7m
+# endif
+#endif
+
 #ifndef MULTI_CPU
 #define cpu_proc_init			__glue(CPU_NAME,_proc_init)
 #define cpu_proc_fin			__glue(CPU_NAME,_proc_fin)
diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
index 1e6cca5..3b763d6 100644
--- a/arch/arm/include/asm/irqflags.h
+++ b/arch/arm/include/asm/irqflags.h
@@ -8,6 +8,16 @@
 /*
  * CPU interrupt mask handling.
  */
+#ifdef CONFIG_CPU_V7M
+#define IRQMASK_REG_NAME_R "primask"
+#define IRQMASK_REG_NAME_W "primask"
+#define IRQMASK_I_BIT	1
+#else
+#define IRQMASK_REG_NAME_R "cpsr"
+#define IRQMASK_REG_NAME_W "cpsr_c"
+#define IRQMASK_I_BIT	PSR_I_BIT
+#endif
+
 #if __LINUX_ARM_ARCH__ >= 6
 
 static inline unsigned long arch_local_irq_save(void)
@@ -15,7 +25,7 @@ static inline unsigned long arch_local_irq_save(void)
 	unsigned long flags;
 
 	asm volatile(
-		"	mrs	%0, cpsr	@ arch_local_irq_save\n"
+		"	mrs	%0, " IRQMASK_REG_NAME_R "	@ arch_local_irq_save\n"
 		"	cpsid	i"
 		: "=r" (flags) : : "memory", "cc");
 	return flags;
@@ -129,7 +139,7 @@ static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 	asm volatile(
-		"	mrs	%0, cpsr	@ local_save_flags"
+		"	mrs	%0, " IRQMASK_REG_NAME_R "	@ local_save_flags"
 		: "=r" (flags) : : "memory", "cc");
 	return flags;
 }
@@ -140,7 +150,7 @@ static inline unsigned long arch_local_save_flags(void)
 static inline void arch_local_irq_restore(unsigned long flags)
 {
 	asm volatile(
-		"	msr	cpsr_c, %0	@ local_irq_restore"
+		"	msr	" IRQMASK_REG_NAME_W ", %0	@ local_irq_restore"
 		:
 		: "r" (flags)
 		: "memory", "cc");
@@ -148,8 +158,8 @@ static inline void arch_local_irq_restore(unsigned long flags)
 
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-	return flags & PSR_I_BIT;
+	return flags & IRQMASK_I_BIT;
 }
 
-#endif
-#endif
+#endif /* ifdef __KERNEL__ */
+#endif /* ifndef __ASM_ARM_IRQFLAGS_H */
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 06e7d50..5e61b88 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -49,7 +49,14 @@ struct thread_struct {
 #ifdef CONFIG_MMU
 #define nommu_start_thread(regs) do { } while (0)
 #else
+#ifndef CONFIG_CPU_V7M
 #define nommu_start_thread(regs) regs->ARM_r10 = current->mm->start_data
+#else
+#define nommu_start_thread(regs) do {					\
+	regs->ARM_r10 = current->mm->start_data;			\
+	regs->ARM_EXC_RET = 0xfffffffdL;				\
+} while (0)
+#endif
 #endif
 
 #define start_thread(regs,pc,sp)					\
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 3d52ee1..67661e8 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -14,7 +14,11 @@
 
 #ifndef __ASSEMBLY__
 struct pt_regs {
+#ifdef CONFIG_CPU_V7M
+	unsigned long uregs[20];
+#else
 	unsigned long uregs[18];
+#endif
 };
 
 #define user_mode(regs)	\
@@ -45,6 +49,7 @@ struct pt_regs {
  */
 static inline int valid_user_regs(struct pt_regs *regs)
 {
+#ifndef CONFIG_CPU_V7M
 	unsigned long mode = regs->ARM_cpsr & MODE_MASK;
 
 	/*
@@ -67,6 +72,9 @@ static inline int valid_user_regs(struct pt_regs *regs)
 		regs->ARM_cpsr |= USR_MODE;
 
 	return 0;
+#else /* ifndef CONFIG_CPU_V7M */
+	return 1;
+#endif
 }
 
 static inline long regs_return_value(struct pt_regs *regs)
diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h
index dfd386d..720ea03 100644
--- a/arch/arm/include/asm/system_info.h
+++ b/arch/arm/include/asm/system_info.h
@@ -11,6 +11,7 @@
 #define CPU_ARCH_ARMv5TEJ	7
 #define CPU_ARCH_ARMv6		8
 #define CPU_ARCH_ARMv7		9
+#define CPU_ARCH_ARMv7M		10
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
index 96ee092..d3be66e 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -34,28 +34,47 @@
 
 /*
  * PSR bits
+ * Note on V7M there is no mode contained in the PSR
  */
 #define USR26_MODE	0x00000000
 #define FIQ26_MODE	0x00000001
 #define IRQ26_MODE	0x00000002
 #define SVC26_MODE	0x00000003
+#if defined(__KERNEL__) && defined(CONFIG_CPU_V7M)
+/*
+ * Use 0 here to get code right that creates a userspace
+ * or kernel space thread.
+ */
+#define USR_MODE	0x00000000
+#define SVC_MODE	0x00000000
+#else
 #define USR_MODE	0x00000010
+#define SVC_MODE	0x00000013
+#endif
 #define FIQ_MODE	0x00000011
 #define IRQ_MODE	0x00000012
-#define SVC_MODE	0x00000013
 #define ABT_MODE	0x00000017
 #define HYP_MODE	0x0000001a
 #define UND_MODE	0x0000001b
 #define SYSTEM_MODE	0x0000001f
 #define MODE32_BIT	0x00000010
 #define MODE_MASK	0x0000001f
-#define PSR_T_BIT	0x00000020
-#define PSR_F_BIT	0x00000040
-#define PSR_I_BIT	0x00000080
-#define PSR_A_BIT	0x00000100
-#define PSR_E_BIT	0x00000200
-#define PSR_J_BIT	0x01000000
-#define PSR_Q_BIT	0x08000000
+
+#define V4_PSR_T_BIT	0x00000020	/* >= V4T, but not V7M */
+#define V7M_PSR_T_BIT	0x01000000
+#if defined(__KERNEL__) && defined(CONFIG_CPU_V7M)
+#define PSR_T_BIT	V7M_PSR_T_BIT
+#else
+/* for compatibility */
+#define PSR_T_BIT	V4_PSR_T_BIT
+#endif
+
+#define PSR_F_BIT	0x00000040	/* >= V4, but not V7M */
+#define PSR_I_BIT	0x00000080	/* >= V4, but not V7M */
+#define PSR_A_BIT	0x00000100	/* >= V6, but not V7M */
+#define PSR_E_BIT	0x00000200	/* >= V6, but not V7M */
+#define PSR_J_BIT	0x01000000	/* >= V5J, but not V7M */
+#define PSR_Q_BIT	0x08000000	/* >= V5E, including V7M */
 #define PSR_V_BIT	0x10000000
 #define PSR_C_BIT	0x20000000
 #define PSR_Z_BIT	0x40000000
@@ -125,6 +144,7 @@ struct pt_regs {
 #define ARM_r1		uregs[1]
 #define ARM_r0		uregs[0]
 #define ARM_ORIG_r0	uregs[17]
+#define ARM_EXC_RET	uregs[18]
 
 /*
  * The size of the user-visible VFP state as seen by PTRACE_GET/SETVFPREGS
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index c985b48..5fe9ace 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -93,6 +93,9 @@ int main(void)
   DEFINE(S_PC,			offsetof(struct pt_regs, ARM_pc));
   DEFINE(S_PSR,			offsetof(struct pt_regs, ARM_cpsr));
   DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
+#ifdef CONFIG_CPU_V7M
+  DEFINE(S_EXC_RET,		offsetof(struct pt_regs, ARM_EXC_RET));
+#endif
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
 #ifdef CONFIG_CACHE_L2X0
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 278cfc1..c391c05 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -44,10 +44,13 @@ ENTRY(stext)
 
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
-#ifndef CONFIG_CPU_CP15
-	ldr	r9, =CONFIG_PROCESSOR_ID
-#else
+#if defined(CONFIG_CPU_CP15)
 	mrc	p15, 0, r9, c0, c0		@ get processor id
+#elif defined(CONFIG_CPU_V7M)
+	ldr	r9, =0xe000ed00			@ CPUID register address
+	ldr	r9, [r9]
+#else
+	ldr	r9, =CONFIG_PROCESSOR_ID
 #endif
 	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
 	movs	r10, r5				@ invalid processor (r5=0)?
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index da1d1aa..3cca0c8 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -128,7 +128,9 @@ struct stack {
 	u32 und[3];
 } ____cacheline_aligned;
 
+#ifndef CONFIG_CPU_V7M
 static struct stack stacks[NR_CPUS];
+#endif
 
 char elf_platform[ELF_PLATFORM_SIZE];
 EXPORT_SYMBOL(elf_platform);
@@ -207,7 +209,7 @@ static const char *proc_arch[] = {
 	"5TEJ",
 	"6TEJ",
 	"7",
-	"?(11)",
+	"7M",
 	"?(12)",
 	"?(13)",
 	"?(14)",
@@ -216,6 +218,12 @@ static const char *proc_arch[] = {
 	"?(17)",
 };
 
+#ifdef CONFIG_CPU_V7M
+static int __get_cpu_architecture(void)
+{
+	return CPU_ARCH_ARMv7M;
+}
+#else
 static int __get_cpu_architecture(void)
 {
 	int cpu_arch;
@@ -248,6 +256,7 @@ static int __get_cpu_architecture(void)
 
 	return cpu_arch;
 }
+#endif
 
 int __pure cpu_architecture(void)
 {
@@ -375,6 +384,7 @@ static void __init feat_v6_fixup(void)
  */
 void cpu_init(void)
 {
+#ifndef CONFIG_CPU_V7M
 	unsigned int cpu = smp_processor_id();
 	struct stack *stk = &stacks[cpu];
 
@@ -419,6 +429,7 @@ void cpu_init(void)
 	      "I" (offsetof(struct stack, und[0])),
 	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
 	    : "r14");
+#endif
 }
 
 int __cpu_logical_map[NR_CPUS];
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index b0179b8..12d976b 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -819,6 +819,7 @@ static void __init kuser_get_tls_init(unsigned long vectors)
 
 void __init early_trap_init(void *vectors_base)
 {
+#ifndef CONFIG_CPU_V7M
 	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
@@ -850,4 +851,5 @@ void __init early_trap_init(void *vectors_base)
 
 	flush_icache_range(vectors, vectors + PAGE_SIZE);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
+#endif
 }
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index d51225f..4bc8ae5 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -20,12 +20,14 @@
 
 void __init arm_mm_memblock_reserve(void)
 {
+#ifndef CONFIG_CPU_V7M
 	/*
 	 * Register the exception vector page.
 	 * some architectures which the DRAM is the exception vector to trap,
 	 * alloc_page breaks with error, although it is not NULL, but "0."
 	 */
 	memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE);
+#endif
 }
 
 void __init sanity_check_meminfo(void)
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
new file mode 100644
index 0000000..2b8eb97
--- /dev/null
+++ b/arch/arm/mm/proc-v7m.S
@@ -0,0 +1,161 @@
+/*
+ *  linux/arch/arm/mm/proc-v7m.S
+ *
+ *  Copyright (C) 2008 ARM Ltd.
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7-M processor support.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ENTRY(cpu_v7m_proc_init)
+	mov	pc, lr
+ENDPROC(cpu_v7m_proc_init)
+
+ENTRY(cpu_v7m_proc_fin)
+	mov	pc, lr
+ENDPROC(cpu_v7m_proc_fin)
+
+/*
+ *	cpu_v7m_reset(loc)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the
+ *	same state as it would be if it had been reset, and branch
+ *	to what would be the reset vector.
+ *
+ *	- loc   - location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_v7m_reset)
+	mov	pc, r0
+ENDPROC(cpu_v7m_reset)
+
+/*
+ *	cpu_v7m_do_idle()
+ *
+ *	Idle the processor (eg, wait for interrupt).
+ *
+ *	IRQs are already disabled.
+ */
+ENTRY(cpu_v7m_do_idle)
+	wfi
+	mov	pc, lr
+ENDPROC(cpu_v7m_do_idle)
+
+ENTRY(cpu_v7m_dcache_clean_area)
+	mov	pc, lr
+ENDPROC(cpu_v7m_dcache_clean_area)
+
+/*
+ * There is no MMU, so here is nothing to do.
+ */
+ENTRY(cpu_v7m_switch_mm)
+	mov	pc, lr
+ENDPROC(cpu_v7m_switch_mm)
+
+cpu_v7m_name:
+	.ascii	"ARMv7-M Processor"
+	.align
+
+	.section ".text.init", #alloc, #execinstr
+
+/*
+ *	__v7m_setup
+ *
+ *	This should be able to cover all ARMv7-M cores.
+ */
+__v7m_setup:
+	@ Configure the vector table base address
+	ldr	r0, =0xe000ed08		@ vector table base address
+	ldr	r12, =vector_table
+	str	r12, [r0]
+
+	@ Lower the priority of the SVC and PendSV exceptions
+	ldr	r0, =0xe000ed1c
+	mov	r5, #0x80000000
+	str	r5, [r0]		@ set SVC priority
+	ldr	r0, =0xe000ed20
+	mov	r5, #0x00800000
+	str	r5, [r0]		@ set PendSV priority
+
+	@ SVC to run the kernel in this mode
+	adr	r0, BSYM(1f)
+	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
+	str	r0, [r12, #11 * 4]	@ write the temporary SVC vector entry
+	mov	r6, lr			@ save LR
+	mov	r7, sp			@ save SP
+	ldr	sp, =__v7m_setup_stack_top
+	cpsie	i
+	svc	#0
+1:	cpsid	i
+	str	r5, [r12, #11 * 4]	@ restore the original SVC vector entry
+	mov	lr, r6			@ restore LR
+	mov	sp, r7			@ restore SP
+
+	@ Special-purpose control register
+	mov	r0, #1
+	msr	control, r0		@ Thread mode has unpriviledged access
+
+	@ Configure the System Control Register
+	ldr	r0, =0xe000ed14		@ system control register
+	ldr	r12, [r0]
+	orr	r12, #1 << 9		@ STKALIGN
+	str	r12, [r0]
+	mov	pc, lr
+ENDPROC(__v7m_setup)
+
+	.align	2
+	.type	v7m_processor_functions, #object
+ENTRY(v7m_processor_functions)
+	.word	nommu_early_abort
+	.word	cpu_v7m_proc_init
+	.word	cpu_v7m_proc_fin
+	.word	cpu_v7m_reset
+	.word	cpu_v7m_do_idle
+	.word	cpu_v7m_dcache_clean_area
+	.word	cpu_v7m_switch_mm
+	.word	0			@ cpu_v7m_set_pte_ext
+	.word	legacy_pabort
+	.size	v7m_processor_functions, . - v7m_processor_functions
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv7m"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v7m"
+	.size	cpu_elf_name, . - cpu_elf_name
+	.align
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	/*
+	 * Match any ARMv7-M processor core.
+	 */
+	.type	__v7m_proc_info, #object
+__v7m_proc_info:
+	.long	0x000f0000		@ Required ID value
+	.long	0x000f0000		@ Mask for ID
+	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
+	.long   0			@ proc_info_list.__cpu_io_mmu_flags
+	b	__v7m_setup		@ proc_info_list.__cpu_flush
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_v7m_name
+	.long	v7m_processor_functions	@ proc_info_list.proc
+	.long	0			@ proc_info_list.tlb
+	.long	0			@ proc_info_list.user
+	.long	0			@ proc_info_list.cache
+	.size	__v7m_proc_info, . - __v7m_proc_info
+
+__v7m_setup_stack:
+	.space	4 * 8				@ 8 registers
+__v7m_setup_stack_top:
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v8 3/3] Cortex-M3: Add support for exception handling
  2013-01-17  8:59 [PATCH v8 0/3] Cortex-M3 series once more Uwe Kleine-König
  2013-01-17  8:59 ` [PATCH v8 1/3] ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15 Uwe Kleine-König
  2013-01-17  8:59 ` [PATCH v8 2/3] Cortex-M3: Add base support for Cortex-M3 Uwe Kleine-König
@ 2013-01-17  8:59 ` Uwe Kleine-König
  2013-01-24 18:21   ` Jonathan Austin
  2013-01-24 18:46 ` [PATCH v8 0/3] Cortex-M3 series once more Jonathan Austin
  3 siblings, 1 reply; 7+ messages in thread
From: Uwe Kleine-König @ 2013-01-17  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

This patch implements the exception handling for the ARMv7-M
architecture (pretty different from the A or R profiles).

It bases on work done earlier by Catalin for 2.6.33 but was nearly
completely rewritten to use a pt_regs layout compatible to the A
profile.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Uwe Kleine-K?nig <u.kleine-koenig@pengutronix.de>
---
 arch/arm/kernel/entry-common.S |    4 ++
 arch/arm/kernel/entry-header.S |  148 ++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/entry-v7m.S    |  134 ++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/process.c      |    4 ++
 arch/arm/kernel/ptrace.c       |    3 +
 5 files changed, 293 insertions(+)
 create mode 100644 arch/arm/kernel/entry-v7m.S

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 3471175..48d8dc0 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -339,6 +339,9 @@ ENDPROC(ftrace_stub)
 
 	.align	5
 ENTRY(vector_swi)
+#ifdef CONFIG_CPU_V7M
+	v7m_exception_entry
+#else
 	sub	sp, sp, #S_FRAME_SIZE
 	stmia	sp, {r0 - r12}			@ Calling r0 - r12
  ARM(	add	r8, sp, #S_PC		)
@@ -349,6 +352,7 @@ ENTRY(vector_swi)
 	str	lr, [sp, #S_PC]			@ Save calling PC
 	str	r8, [sp, #S_PSR]		@ Save CPSR
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
+#endif
 	zero_fp
 
 	/*
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a8531e..33d9900 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -44,6 +44,145 @@
 #endif
 	.endm
 
+#ifdef CONFIG_CPU_V7M
+/*
+ * ARMv7-M exception entry/exit macros.
+ *
+ * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are
+ * automatically saved on the current stack (32 words) before
+ * switching to the exception stack (SP_main).
+ *
+ * If exception is taken while in user mode, SP_main is
+ * empty. Otherwise, SP_main is aligned to 64 bit automatically
+ * (CCR.STKALIGN set).
+ *
+ * Linux assumes that the interrupts are disabled when entering an
+ * exception handler and it may BUG if this is not the case. Interrupts
+ * are disabled during entry and reenabled in the exit macro.
+ *
+ * v7m_exception_fast_exit is used when returning from interrupts.
+ *
+ * v7m_exception_slow_exit is used when returning from SVC or PendSV.
+ * When returning to kernel mode, we don't return from exception.
+ */
+	.macro	v7m_exception_entry
+	@ determine the location of the registers saved by the core during
+	@ exception entry. Depending on the mode the cpu was in when the
+	@ exception happend that is either on the main or the process stack.
+	@ Bit 2 of EXC_RETURN stored in the lr register specifies which stack
+	@ was used.
+	tst	lr, #0x4
+	mrsne	r12, psp
+	moveq	r12, sp
+
+	@ we cannot rely on r0-r3 and r12 matching the value saved in the
+	@ exception frame because of tail-chaining. So these have to be
+	@ reloaded.
+	ldmia	r12!, {r0-r3}
+
+	@ Linux expects to have irqs off. Do it here before taking stack space
+	cpsid	i
+
+	sub	sp, #S_FRAME_SIZE-S_IP
+	stmdb	sp!, {r0-r11}
+
+	@ load saved r12, lr, return address and xPSR.
+	@ r0-r7 are used for signals and never touched from now on. Clobbering
+	@ r8-r12 is OK.
+	mov	r9, r12
+	ldmia	r9!, {r8, r10-r12}
+
+	@ calculate the original stack pointer value.
+	@ r9 currently points to the memory location just above the auto saved
+	@ xPSR. If the FP extension is implemented and bit 4 of EXC_RETURN is 0
+	@ then space was allocated for FP state. That is space for 18 32-bit
+	@ values. (If FP extension is unimplemented, bit 4 is 1.)
+	@ Additionally the cpu might automatically 8-byte align the stack. Bit 9
+	@ of the saved xPSR specifies if stack aligning took place. In this case
+	@ another 32-bit value is included in the stack.
+
+	tst	lr, #0x10
+	addeq	r9, r9, #576
+
+	tst	r12, 0x100
+	addne	r9, r9, #4
+
+	@ store saved r12 using str to have a register to hold the base for stm
+	str	r8, [sp, #S_IP]
+	add	r8, sp, #S_SP
+	@ store r13-r15, xPSR
+	stmia	r8!, {r9-r12}
+	@ store r0 once more and EXC_RETURN
+	stmia	r8, {r0, lr}
+	.endm
+
+	.macro	v7m_exception_fast_exit
+	@ registers r0-r3 and r12 are automatically restored on exception
+	@ return. r4-r7 were not clobbered in v7m_exception_entry so for
+	@ correctness they don't need to be restored. So only r8-r11 must be
+	@ restored here. The easiest way to do so is to restore r0-r7, too.
+	ldmia	sp!, {r0-r11}
+	add	sp, #S_FRAME_SIZE-S_IP
+	cpsie	i
+	bx	lr
+	.endm
+
+	.macro	v7m_exception_slow_exit ret_r0
+	cpsid	i
+	ldr	lr, [sp, #S_EXC_RET]	@ read exception LR
+	tst     lr, #0x8
+	bne	1f			@ go to thread mode using exception return
+
+	/*
+	 * return to kernel thread
+	 * sp is already set up (and might be unset in pt_regs), so only
+	 * restore r0-r12 and pc
+	 */
+	ldmia	sp, {r0-r12}
+	ldr	lr, [sp, #S_PC]
+	add	sp, sp, #S_FRAME_SIZE
+	cpsie	i
+	bx	lr
+
+1:	/*
+	 * return to userspace
+	 */
+
+	@ read original r12, sp, lr, pc and xPSR
+	add	r12, sp, #S_IP
+	ldmia	r12, {r1-r5}
+
+	@ handle stack aligning
+	tst	r5, #0x100
+	subne	r2, r2, #4
+
+	@ skip over stack space for fp saving
+	tst	lr, #0x10
+	subeq	r2, r2, #576
+
+	@ write basic exception frame
+	stmdb	r2!, {r1, r3-r5}
+	ldmia	sp, {r1, r3-r5}
+	.if	\ret_r0
+	stmdb	r2!, {r0, r3-r5}
+	.else
+	stmdb	r2!, {r1, r3-r5}
+	.endif
+
+	@ restore process sp
+	msr	psp, r2
+
+	@ restore original r4-r11
+	ldmia	sp!, {r0-r11}
+
+	@ restore main sp
+	add	sp, sp, #S_FRAME_SIZE-S_IP
+
+	cpsie	i
+	bx	lr
+	.endm
+#endif	/* CONFIG_CPU_V7M */
+
 	@
 	@ Store/load the USER SP and LR registers by switching to the SYS
 	@ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not
@@ -131,6 +270,14 @@
 	rfeia	sp!
 	.endm
 
+#ifdef CONFIG_CPU_V7M
+	.macro	restore_user_regs, fast = 0, offset = 0
+	.if	\offset
+	add	sp, #\offset
+	.endif
+	v7m_exception_slow_exit ret_r0 = \fast
+	.endm
+#else	/* !CONFIG_CPU_V7M */
 	.macro	restore_user_regs, fast = 0, offset = 0
 	clrex					@ clear the exclusive monitor
 	mov	r2, sp
@@ -147,6 +294,7 @@
 	add	sp, sp, #S_FRAME_SIZE - S_SP
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
+#endif	/* CONFIG_CPU_V7M */
 
 	.macro	get_thread_info, rd
 	mov	\rd, sp
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
new file mode 100644
index 0000000..842394c
--- /dev/null
+++ b/arch/arm/kernel/entry-v7m.S
@@ -0,0 +1,134 @@
+/*
+ * linux/arch/arm/kernel/entry-v7m.S
+ *
+ * Copyright (C) 2008 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Low-level vector interface routines for the ARMv7-M architecture
+ */
+#include <asm/memory.h>
+#include <asm/glue.h>
+#include <asm/thread_notify.h>
+
+#include <mach/entry-macro.S>
+
+#include "entry-header.S"
+
+#ifdef CONFIG_PREEMPT
+#error "CONFIG_PREEMPT not supported on the current ARMv7M implementation"
+#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation"
+#endif
+
+__invalid_entry:
+	v7m_exception_entry
+	adr	r0, strerr
+	mrs	r1, ipsr
+	mov	r2, lr
+	bl	printk
+	mov	r0, sp
+	bl	show_regs
+1:	b	1b
+ENDPROC(__invalid_entry)
+
+strerr:	.asciz	"\nUnhandled exception: IPSR = %08lx LR = %08lx\n"
+
+	.align	2
+__irq_entry:
+	v7m_exception_entry
+
+	@
+	@ Invoke the IRQ handler
+	@
+	mrs	r0, ipsr
+	and	r0, #0xff
+	sub	r0, #16			@ IRQ number
+	mov	r1, sp
+	@ routine called with r0 = irq number, r1 = struct pt_regs *
+	bl	asm_do_IRQ
+
+	@
+	@ Check for any pending work if returning to user
+	@
+	ldr	lr, [sp, #S_EXC_RET]
+	tst	lr, #0x8		@ check the return stack
+	beq	2f			@ returning to handler mode
+	get_thread_info tsk
+	ldr	r1, [tsk, #TI_FLAGS]
+	tst	r1, #_TIF_WORK_MASK
+	beq	2f			@ no work pending
+	ldr	r1, =0xe000ed04		@ ICSR
+	mov	r0, #1 << 28		@ ICSR.PENDSVSET
+	str	r0, [r1]		@ raise PendSV
+
+2:
+	v7m_exception_fast_exit
+ENDPROC(__irq_entry)
+
+__pendsv_entry:
+	v7m_exception_entry
+
+	ldr	r1, =0xe000ed04		@ ICSR
+	mov	r0, #1 << 27		@ ICSR.PENDSVCLR
+	str	r0, [r1]		@ clear PendSV
+
+	@ execute the pending work, including reschedule
+	get_thread_info tsk
+	mov	why, #0
+	b	ret_to_user
+ENDPROC(__pendsv_entry)
+
+/*
+ * Register switch for ARMv7-M processors.
+ * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
+ * previous and next are guaranteed not to be the same.
+ */
+ENTRY(__switch_to)
+	.fnstart
+	.cantunwind
+	add	ip, r1, #TI_CPU_SAVE
+	stmia	ip!, {r4 - r11}		@ Store most regs on stack
+	str	sp, [ip], #4
+	str	lr, [ip], #4
+	mov	r5, r0
+	add	r4, r2, #TI_CPU_SAVE
+	ldr	r0, =thread_notify_head
+	mov	r1, #THREAD_NOTIFY_SWITCH
+	bl	atomic_notifier_call_chain
+	mov	ip, r4
+	mov	r0, r5
+	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
+	ldr	sp, [ip]
+	ldr	pc, [ip, #4]!
+	.fnend
+ENDPROC(__switch_to)
+
+	.data
+	.align	8
+/*
+ * Vector table (64 words => 256 bytes natural alignment)
+ */
+ENTRY(vector_table)
+	.long	0			@ 0 - Reset stack pointer
+	.long	__invalid_entry		@ 1 - Reset
+	.long	__invalid_entry		@ 2 - NMI
+	.long	__invalid_entry		@ 3 - HardFault
+	.long	__invalid_entry		@ 4 - MemManage
+	.long	__invalid_entry		@ 5 - BusFault
+	.long	__invalid_entry		@ 6 - UsageFault
+	.long	__invalid_entry		@ 7 - Reserved
+	.long	__invalid_entry		@ 8 - Reserved
+	.long	__invalid_entry		@ 9 - Reserved
+	.long	__invalid_entry		@ 10 - Reserved
+	.long	vector_swi		@ 11 - SVCall
+	.long	__invalid_entry		@ 12 - Debug Monitor
+	.long	__invalid_entry		@ 13 - Reserved
+	.long	__pendsv_entry		@ 14 - PendSV
+	.long	__invalid_entry		@ 15 - SysTick
+	.rept	64 - 16
+	.long	__irq_entry		@ 16..64 - External Interrupts
+	.endr
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 90084a6..3d745d4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -387,6 +387,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 		*childregs = *regs;
 		childregs->ARM_r0 = 0;
 		childregs->ARM_sp = stack_start;
+#if defined CONFIG_CPU_V7M
+		/* Return to Thread mode with Process stack */
+		childregs->ARM_EXC_RET = 0xfffffffdUL;
+#endif
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		thread->cpu_context.r4 = stk_sz;
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 739db3a..55df1d5 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -87,6 +87,9 @@ static const struct pt_regs_offset regoffset_table[] = {
 	REG_OFFSET_NAME(pc),
 	REG_OFFSET_NAME(cpsr),
 	REG_OFFSET_NAME(ORIG_r0),
+#ifdef CONFIG_CPU_V7M
+	REG_OFFSET_NAME(EXC_RET),
+#endif
 	REG_OFFSET_END,
 };
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v8 2/3] Cortex-M3: Add base support for Cortex-M3
  2013-01-17  8:59 ` [PATCH v8 2/3] Cortex-M3: Add base support for Cortex-M3 Uwe Kleine-König
@ 2013-01-18 18:23   ` Jonathan Austin
  0 siblings, 0 replies; 7+ messages in thread
From: Jonathan Austin @ 2013-01-18 18:23 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Uwe,

I've got a few questions about this patch, as well as some places where 
there are changes that I think would increase the quality.

The most obvious thing that applies throughout is that there are quite a 
bunch of special constants that could really do with #defines... I've 
pulled out a few obvious onese inline below.

On 17/01/13 08:59, Uwe Kleine-K?nig wrote:
> From: Catalin Marinas <catalin.marinas@arm.com>
>
> This patch adds the base support for the Cortex-M3 processor (ARMv7-M
> architecture). It consists of the corresponding arch/arm/mm/ files and
> various #ifdef's around the kernel. Exception handling is implemented by
> a subsequent patch.
>
> [ukleinek: squash in some changes originating from commit
>
> b5717ba (Cortex-M3: Add support for the Microcontroller Prototyping System)
>
> from the v2.6.33-arm1 patch stack, port to post 3.6, drop zImage
> support, drop reorganisation of pt_regs, assert CONFIG_V7M doesn't leak
> into installed headers and a few cosmetic changes]
>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Uwe Kleine-K?nig <u.kleine-koenig@pengutronix.de>
[...]
> -#ifdef CONFIG_THUMB2_KERNEL
> +#if defined(CONFIG_CPU_V7M)
> +       .macro  setmode, mode, reg
> +       .endm
> +#elif defined(CONFIG_THUMB2_KERNEL)

Is it really okay to leave setmode doing nothing? My understanding of 
the reason we need it normally is that we can't rely on the bootloader 
entering the kernel in the right mode.

As far as M goes, it *looks* to me that it would be possible to enter 
the kernel in 'handler' mode, as well as privileged thread mode (which I 
presume is the 'right' mode). Is that possible? Likely? Should we 
mitigate against?

>          .macro  setmode, mode, reg
>          mov     \reg, #\mode
>          msr     cpsr_c, \reg
> diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
> index cb47d28..5bd8cb6 100644
> --- a/arch/arm/include/asm/cputype.h
> +++ b/arch/arm/include/asm/cputype.h
> @@ -46,6 +46,9 @@ extern unsigned int processor_id;
>                      : "cc");                                            \
>                  __val;                                                  \
>          })
> +#elif defined(CONFIG_CPU_V7M)
> +#define read_cpuid(reg) (*(unsigned int *)0xe000ed00)

Here's the first example of magic constants that aren't ideal, but I've 
said more about magic constants below (I'd much prefer #defines)

> +#define read_cpuid_ext(reg) 0
>   #else
>   #define read_cpuid(reg) (processor_id)
>   #define read_cpuid_ext(reg) 0
> diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
> index cca9f15..ea98658 100644
> --- a/arch/arm/include/asm/glue-cache.h
> +++ b/arch/arm/include/asm/glue-cache.h
> @@ -125,10 +125,35 @@
>   # endif
>   #endif
>
> +#if defined(CONFIG_CPU_V7M)
> +# ifdef _CACHE
> +#  error "Multi-cache not supported on ARMv7-M"
> +# else
> +#  define _CACHE nop
> +# endif
> +#endif
> +

What's the difficulty with MULTI_CACHE that's not encountered with 
MULTI_CPU? Or the reason that you've got MULTI_CPU supported but not 
MULTI_CACHE

I had a look at the mechanisms and couldn't see what would stop your 
nop_cache_fns from being listed in the __v7m_proc_info?

It seems sensible to do this right from the start, especially given the 
scarcity of eyes for review of NOMMU code...

>   #if !defined(_CACHE) && !defined(MULTI_CACHE)
>   #error Unknown cache maintenance model
>   #endif
>
> +#ifndef __ASSEMBLER__
> +static inline void nop_flush_icache_all(void) { }
> +static inline void nop_flush_kern_cache_all(void) { }
> +static inline void nop_flush_kern_cache_louis(void) { }
> +static inline void nop_flush_user_cache_all(void) { }
> +static inline void nop_flush_user_cache_range(unsigned long a, unsigned long b, unsigned int c) { }
> +
> +static inline void nop_coherent_kern_range(unsigned long a, unsigned long b) { }
> +static inline int nop_coherent_user_range(unsigned long a, unsigned long b) { return 0; }
> +static inline void nop_flush_kern_dcache_area(void *a, size_t s) { }
> +
> +static inline void nop_dma_flush_range(const void *a, const void *b) { }
> +
> +static inline void nop_dma_map_area(const void *s, size_t l, int f) { }
> +static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { }
> +#endif
> +
>   #ifndef MULTI_CACHE
>   #define __cpuc_flush_icache_all                __glue(_CACHE,_flush_icache_all)
>   #define __cpuc_flush_kern_all          __glue(_CACHE,_flush_kern_cache_all)
> diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h
> index 8cacbcd..1f2339c 100644
> --- a/arch/arm/include/asm/glue-df.h
> +++ b/arch/arm/include/asm/glue-df.h
> @@ -95,6 +95,14 @@
>   # endif
>   #endif
>
> +#ifdef CONFIG_CPU_ABRT_NOMMU
> +# ifdef CPU_DABORT_HANDLER
> +#  define MULTI_DABORT 1
> +# else
> +#  define CPU_DABORT_HANDLER nommu_early_abort
> +# endif
> +#endif
> +

You haven't added this to the list of models further up in the file - 
looks like until now that list has been maintained so surely we should 
keep it up-to-date?

>   #ifndef CPU_DABORT_HANDLER
>   #error Unknown data abort handler type
>   #endif
> diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
> index ac1dd54..f2f39bc 100644
> --- a/arch/arm/include/asm/glue-proc.h
> +++ b/arch/arm/include/asm/glue-proc.h
> @@ -230,6 +230,15 @@
>   # endif
>   #endif
>
> +#ifdef CONFIG_CPU_V7M
> +# ifdef CPU_NAME
> +#  undef  MULTI_CPU
> +#  define MULTI_CPU
> +# else
> +#  define CPU_NAME cpu_v7m
> +# endif
> +#endif
> +

See my question above about MULTI_CPU and not MULTI_CACHE - strikes me 
as strange...

>   #ifndef MULTI_CPU
>   #define cpu_proc_init                  __glue(CPU_NAME,_proc_init)
>   #define cpu_proc_fin                   __glue(CPU_NAME,_proc_fin)
> diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
> index 1e6cca5..3b763d6 100644
> --- a/arch/arm/include/asm/irqflags.h
> +++ b/arch/arm/include/asm/irqflags.h
> @@ -8,6 +8,16 @@
>   /*
>    * CPU interrupt mask handling.
>    */
> +#ifdef CONFIG_CPU_V7M
> +#define IRQMASK_REG_NAME_R "primask"
> +#define IRQMASK_REG_NAME_W "primask"
> +#define IRQMASK_I_BIT  1
> +#else
> +#define IRQMASK_REG_NAME_R "cpsr"
> +#define IRQMASK_REG_NAME_W "cpsr_c"
> +#define IRQMASK_I_BIT  PSR_I_BIT
> +#endif
> +
>   #if __LINUX_ARM_ARCH__ >= 6
>
>   static inline unsigned long arch_local_irq_save(void)
> @@ -15,7 +25,7 @@ static inline unsigned long arch_local_irq_save(void)
>          unsigned long flags;
>
>          asm volatile(
> -               "       mrs     %0, cpsr        @ arch_local_irq_save\n"
> +               "       mrs     %0, " IRQMASK_REG_NAME_R "      @ arch_local_irq_save\n"
>                  "       cpsid   i"
>                  : "=r" (flags) : : "memory", "cc");
>          return flags;
> @@ -129,7 +139,7 @@ static inline unsigned long arch_local_save_flags(void)
>   {
>          unsigned long flags;
>          asm volatile(
> -               "       mrs     %0, cpsr        @ local_save_flags"
> +               "       mrs     %0, " IRQMASK_REG_NAME_R "      @ local_save_flags"
>                  : "=r" (flags) : : "memory", "cc");
>          return flags;
>   }
> @@ -140,7 +150,7 @@ static inline unsigned long arch_local_save_flags(void)
>   static inline void arch_local_irq_restore(unsigned long flags)
>   {
>          asm volatile(
> -               "       msr     cpsr_c, %0      @ local_irq_restore"
> +               "       msr     " IRQMASK_REG_NAME_W ", %0      @ local_irq_restore"
>                  :
>                  : "r" (flags)
>                  : "memory", "cc");
> @@ -148,8 +158,8 @@ static inline void arch_local_irq_restore(unsigned long flags)
>
>   static inline int arch_irqs_disabled_flags(unsigned long flags)
>   {
> -       return flags & PSR_I_BIT;
> +       return flags & IRQMASK_I_BIT;
>   }
>
> -#endif
> -#endif
> +#endif /* ifdef __KERNEL__ */
> +#endif /* ifndef __ASM_ARM_IRQFLAGS_H */
> diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
> index 06e7d50..5e61b88 100644
> --- a/arch/arm/include/asm/processor.h
> +++ b/arch/arm/include/asm/processor.h
> @@ -49,7 +49,14 @@ struct thread_struct {
>   #ifdef CONFIG_MMU
>   #define nommu_start_thread(regs) do { } while (0)
>   #else
> +#ifndef CONFIG_CPU_V7M
>   #define nommu_start_thread(regs) regs->ARM_r10 = current->mm->start_data
> +#else
> +#define nommu_start_thread(regs) do {                                  \
> +       regs->ARM_r10 = current->mm->start_data;                        \

This one isn't really a comment on your patch, because you're 
duplicating the normal nommu behaviour, but why don't we do anything 
special with r9? Isn't that the PIC offset base regiser exclusively in 
uClinux?

Does someone know - is this legacy from before the uClinux stuff was 
EABI compliant and used r10, or is this doing something else?

> +       regs->ARM_EXC_RET = 0xfffffffdL;                                \

So, here's another magic constant...

This corresponds to 'Return to thread mode with process stack'

I think it'd be much clearer to #define these somewhere where the FP 
versions can be added later..

how about
#define ER_THREAD_PROCESS_BASIC 0xFFFFFFFdL

Which I think gives us scope for adding all the FP options (Table 8.8, 
8.9 in V7M ARMARM)

That said, as discussed in the next comment, and on IRC, we should 
probably get rid of this particular instance (regs->ARM_EXC_RET) and 
derive this value based on other saved information.

> +} while (0)
> +#endif
>   #endif
>
>   #define start_thread(regs,pc,sp)                                       \
> diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
> index 3d52ee1..67661e8 100644
> --- a/arch/arm/include/asm/ptrace.h
> +++ b/arch/arm/include/asm/ptrace.h
> @@ -14,7 +14,11 @@
>
>   #ifndef __ASSEMBLY__
>   struct pt_regs {
> +#ifdef CONFIG_CPU_V7M
> +       unsigned long uregs[20];

This increase in pt_regs size is due to the addition of ARM_EXC_RET to 
pt_regs (for the kernel only, not userspace)

However, I think we can avoid doing this, which is nicer because we don't
a) Have a register in pt_regs that isn't actually a register
b) Have less #ifdef'd code, and so the chance of NOMMU getting 
inadvertently broken is lower.

ARM_EXC_RET can be one of the following (assuming we always use the 
basic stack frame)

EXC_RETURN	Return to	Return stack
0xFFFFFFF1	Handler mode	Main
0xFFFFFFF9	Thread mode	Main
0xFFFFFFFD	Thread mode	Process

But we never return to thread mode with the main stack (right?) - so 
really to calculate what ARM_EXC_RET should be we just need to know 
whether the thread is handler mode or thread mode. We can know this from 
the IPSR, and as the xPSR gets saved by both the core at exception time, 
and then by us in to the pt_regs struct, we shouldn't need to *also* 
store EXC_RET out of the lr. This would, of course, also need 
appropriate changes to v7m_exception_entry and v7m_exception_{fast, 
slow}_exit and __irq_entry where we also load EXC_RET into lr.

This way, only when some change or new feature for the kernel actually 
needs to be storing ARM_EXC_RET do we need to think about storing it 
somewhere, be that in the thread_info or pt_regs.

> +#else
>          unsigned long uregs[18];
> +#endif
>   };
>
>   #define user_mode(regs)        \
> @@ -45,6 +49,7 @@ struct pt_regs {
>    */
>   static inline int valid_user_regs(struct pt_regs *regs)
>   {
> +#ifndef CONFIG_CPU_V7M
>          unsigned long mode = regs->ARM_cpsr & MODE_MASK;
>
>          /*
> @@ -67,6 +72,9 @@ static inline int valid_user_regs(struct pt_regs *regs)
>                  regs->ARM_cpsr |= USR_MODE;
>
>          return 0;
> +#else /* ifndef CONFIG_CPU_V7M */
> +       return 1;
> +#endif
>   }
>
>   static inline long regs_return_value(struct pt_regs *regs)
> diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h
> index dfd386d..720ea03 100644
> --- a/arch/arm/include/asm/system_info.h
> +++ b/arch/arm/include/asm/system_info.h
> @@ -11,6 +11,7 @@
>   #define CPU_ARCH_ARMv5TEJ      7
>   #define CPU_ARCH_ARMv6         8
>   #define CPU_ARCH_ARMv7         9
> +#define CPU_ARCH_ARMv7M                10
>
>   #ifndef __ASSEMBLY__
>
> diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
> index 96ee092..d3be66e 100644
> --- a/arch/arm/include/uapi/asm/ptrace.h
> +++ b/arch/arm/include/uapi/asm/ptrace.h
> @@ -34,28 +34,47 @@
>
>   /*
>    * PSR bits
> + * Note on V7M there is no mode contained in the PSR
>    */
>   #define USR26_MODE     0x00000000
>   #define FIQ26_MODE     0x00000001
>   #define IRQ26_MODE     0x00000002
>   #define SVC26_MODE     0x00000003
> +#if defined(__KERNEL__) && defined(CONFIG_CPU_V7M)
> +/*
> + * Use 0 here to get code right that creates a userspace
> + * or kernel space thread.
> + */
> +#define USR_MODE       0x00000000
> +#define SVC_MODE       0x00000000
> +#else
>   #define USR_MODE       0x00000010
> +#define SVC_MODE       0x00000013
> +#endif
>   #define FIQ_MODE       0x00000011
>   #define IRQ_MODE       0x00000012
> -#define SVC_MODE       0x00000013
>   #define ABT_MODE       0x00000017
>   #define HYP_MODE       0x0000001a
>   #define UND_MODE       0x0000001b
>   #define SYSTEM_MODE    0x0000001f
>   #define MODE32_BIT     0x00000010
>   #define MODE_MASK      0x0000001f
> -#define PSR_T_BIT      0x00000020
> -#define PSR_F_BIT      0x00000040
> -#define PSR_I_BIT      0x00000080
> -#define PSR_A_BIT      0x00000100
> -#define PSR_E_BIT      0x00000200
> -#define PSR_J_BIT      0x01000000
> -#define PSR_Q_BIT      0x08000000
> +
> +#define V4_PSR_T_BIT   0x00000020      /* >= V4T, but not V7M */
> +#define V7M_PSR_T_BIT  0x01000000
> +#if defined(__KERNEL__) && defined(CONFIG_CPU_V7M)
> +#define PSR_T_BIT      V7M_PSR_T_BIT
> +#else
> +/* for compatibility */
> +#define PSR_T_BIT      V4_PSR_T_BIT
> +#endif
> +
> +#define PSR_F_BIT      0x00000040      /* >= V4, but not V7M */
> +#define PSR_I_BIT      0x00000080      /* >= V4, but not V7M */
> +#define PSR_A_BIT      0x00000100      /* >= V6, but not V7M */
> +#define PSR_E_BIT      0x00000200      /* >= V6, but not V7M */
> +#define PSR_J_BIT      0x01000000      /* >= V5J, but not V7M */
> +#define PSR_Q_BIT      0x08000000      /* >= V5E, including V7M */
>   #define PSR_V_BIT      0x10000000
>   #define PSR_C_BIT      0x20000000
>   #define PSR_Z_BIT      0x40000000
> @@ -125,6 +144,7 @@ struct pt_regs {
>   #define ARM_r1         uregs[1]
>   #define ARM_r0         uregs[0]
>   #define ARM_ORIG_r0    uregs[17]
> +#define ARM_EXC_RET    uregs[18]
>

If we can get rid of the pt_regs change, then this can also go away - 
that'd be nice because I'm not sure about it... I don't know what the 
implications of having this defined for userspace are, given that you've got

#ifndef __KERNEL__
struct pt_regs {
         long uregs[18];
};
#endif /* __KERNEL__ */

Should that last definition for ARM_EXC_RET be #ifdef __KERNEL__ too?

>   /*
>    * The size of the user-visible VFP state as seen by PTRACE_GET/SETVFPREGS
> diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
> index c985b48..5fe9ace 100644
> --- a/arch/arm/kernel/asm-offsets.c
> +++ b/arch/arm/kernel/asm-offsets.c
> @@ -93,6 +93,9 @@ int main(void)
>     DEFINE(S_PC,                 offsetof(struct pt_regs, ARM_pc));
>     DEFINE(S_PSR,                        offsetof(struct pt_regs, ARM_cpsr));
>     DEFINE(S_OLD_R0,             offsetof(struct pt_regs, ARM_ORIG_r0));
> +#ifdef CONFIG_CPU_V7M
> +  DEFINE(S_EXC_RET,            offsetof(struct pt_regs, ARM_EXC_RET));
> +#endif
>     DEFINE(S_FRAME_SIZE,         sizeof(struct pt_regs));
>     BLANK();
>   #ifdef CONFIG_CACHE_L2X0
> diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
> index 278cfc1..c391c05 100644
> --- a/arch/arm/kernel/head-nommu.S
> +++ b/arch/arm/kernel/head-nommu.S
> @@ -44,10 +44,13 @@ ENTRY(stext)
>
>          setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
>                                                  @ and irqs disabled
> -#ifndef CONFIG_CPU_CP15
> -       ldr     r9, =CONFIG_PROCESSOR_ID
> -#else
> +#if defined(CONFIG_CPU_CP15)
>          mrc     p15, 0, r9, c0, c0              @ get processor id
> +#elif defined(CONFIG_CPU_V7M)
> +       ldr     r9, =0xe000ed00                 @ CPUID register address

Another case where the magic constant makes things less clear...

As these things are architecturally defined (ARMARMv7-M) then can we 
please #define them with their register names?

ldr	r9, =V7M_CPUID

reads much more cleanly.

> +       ldr     r9, [r9]
> +#else
> +       ldr     r9, =CONFIG_PROCESSOR_ID
>   #endif
>          bl      __lookup_processor_type         @ r5=procinfo r9=cpuid
>          movs    r10, r5                         @ invalid processor (r5=0)?
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index da1d1aa..3cca0c8 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -128,7 +128,9 @@ struct stack {
>          u32 und[3];
>   } ____cacheline_aligned;
>
> +#ifndef CONFIG_CPU_V7M
>   static struct stack stacks[NR_CPUS];
> +#endif
>
>   char elf_platform[ELF_PLATFORM_SIZE];
>   EXPORT_SYMBOL(elf_platform);
> @@ -207,7 +209,7 @@ static const char *proc_arch[] = {
>          "5TEJ",
>          "6TEJ",
>          "7",
> -       "?(11)",
> +       "7M",
>          "?(12)",
>          "?(13)",
>          "?(14)",
> @@ -216,6 +218,12 @@ static const char *proc_arch[] = {
>          "?(17)",
>   };
>
> +#ifdef CONFIG_CPU_V7M
> +static int __get_cpu_architecture(void)
> +{
> +       return CPU_ARCH_ARMv7M;
> +}
 > +#else
 >   static int __get_cpu_architecture(void)
 >   {
 >          int cpu_arch;

You've wired up read_cpu_id() and a stub read_cpuid_ext() for V7M so it 
would be better to get rid of this #ifdef.

I'm guessing the problem that caused you to do this was the inline asm 
in __get_cpu_architecture that uses cp15 access.

If you used read_cpu_ext(CPUID_EXT_MMFR0) instead of that asm then you'd
a) clean up the code
b) avoid a possible bug as that asm is not predicated on CONFIG_CPU_CP15

I reckon that could be a separate patch, perhaps?

Then you don't need the #ifdef anymore, right?

> @@ -248,6 +256,7 @@ static int __get_cpu_architecture(void)
>
>          return cpu_arch;
>   }
> +#endif
>
>   int __pure cpu_architecture(void)
>   {
> @@ -375,6 +384,7 @@ static void __init feat_v6_fixup(void)
>    */
>   void cpu_init(void)
>   {
> +#ifndef CONFIG_CPU_V7M

What is it in here that we're avoiding? Couldn't the #ifdefs go around 
the __asm__ in this function instead of the whole thing? Or at most the 
initialisation of stk and the __asm__ block?

cpu_proc_init() is wired (albeit as a stub) for v7m, and I don't  see 
why smp_processor_id() should hurt (it seems like it is defined away in 
the !SMP case in include/linux/smp.h). It could also get quite confusing 
to debug now that we've got a function we can't execute.

Yes, we execute a few more instructions, but reducing the 'delta' 
between M and A/R seems to be a worthwhile pay-off to me...

If there's something hiding in there that I've missed then apologies...

>          unsigned int cpu = smp_processor_id();
>          struct stack *stk = &stacks[cpu];
>
> @@ -419,6 +429,7 @@ void cpu_init(void)
>                "I" (offsetof(struct stack, und[0])),
>                PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
>              : "r14");
> +#endif
>   }
>
>   int __cpu_logical_map[NR_CPUS];
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index b0179b8..12d976b 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -819,6 +819,7 @@ static void __init kuser_get_tls_init(unsigned long vectors)
>
>   void __init early_trap_init(void *vectors_base)
>   {
> +#ifndef CONFIG_CPU_V7M
>          unsigned long vectors = (unsigned long)vectors_base;
>          extern char __stubs_start[], __stubs_end[];
>          extern char __vectors_start[], __vectors_end[];
> @@ -850,4 +851,5 @@ void __init early_trap_init(void *vectors_base)
>
>          flush_icache_range(vectors, vectors + PAGE_SIZE);
>          modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
> +#endif

This looks scary enough ("What, we don't install any vectors!?") that it 
could probably do with a comment along the lines of 'V7M allows us to 
point to a vector table inside the kernel image'

Also helps explain why the kuser helpers aren't going to be around.

>   }
> diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
> index d51225f..4bc8ae5 100644
> --- a/arch/arm/mm/nommu.c
> +++ b/arch/arm/mm/nommu.c
> @@ -20,12 +20,14 @@
>
>   void __init arm_mm_memblock_reserve(void)
>   {
> +#ifndef CONFIG_CPU_V7M
>          /*
>           * Register the exception vector page.
>           * some architectures which the DRAM is the exception vector to trap,
>           * alloc_page breaks with error, although it is not NULL, but "0."
>           */
>          memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE);
> +#endif
>   }
>
>   void __init sanity_check_meminfo(void)
> diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
> new file mode 100644
> index 0000000..2b8eb97
> --- /dev/null
> +++ b/arch/arm/mm/proc-v7m.S
> @@ -0,0 +1,161 @@
> +/*
> + *  linux/arch/arm/mm/proc-v7m.S
> + *
> + *  Copyright (C) 2008 ARM Ltd.
> + *  Copyright (C) 2001 Deep Blue Solutions Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + *  This is the "shell" of the ARMv7-M processor support.
> + */
> +#include <linux/linkage.h>
> +#include <asm/assembler.h>
> +
> +ENTRY(cpu_v7m_proc_init)
> +       mov     pc, lr
> +ENDPROC(cpu_v7m_proc_init)
> +
> +ENTRY(cpu_v7m_proc_fin)
> +       mov     pc, lr
> +ENDPROC(cpu_v7m_proc_fin)
> +
> +/*
> + *     cpu_v7m_reset(loc)
> + *
> + *     Perform a soft reset of the system.  Put the CPU into the
> + *     same state as it would be if it had been reset, and branch
> + *     to what would be the reset vector.
> + *
> + *     - loc   - location to jump to for soft reset
> + */
> +       .align  5
> +ENTRY(cpu_v7m_reset)
> +       mov     pc, r0
> +ENDPROC(cpu_v7m_reset)
> +
> +/*
> + *     cpu_v7m_do_idle()
> + *
> + *     Idle the processor (eg, wait for interrupt).
> + *
> + *     IRQs are already disabled.
> + */
> +ENTRY(cpu_v7m_do_idle)
> +       wfi
> +       mov     pc, lr
> +ENDPROC(cpu_v7m_do_idle)
> +
> +ENTRY(cpu_v7m_dcache_clean_area)
> +       mov     pc, lr
> +ENDPROC(cpu_v7m_dcache_clean_area)
> +
> +/*
> + * There is no MMU, so here is nothing to do.
> + */
> +ENTRY(cpu_v7m_switch_mm)
> +       mov     pc, lr
> +ENDPROC(cpu_v7m_switch_mm)
> +
> +cpu_v7m_name:
> +       .ascii  "ARMv7-M Processor"
> +       .align
> +
> +       .section ".text.init", #alloc, #execinstr
> +
> +/*
> + *     __v7m_setup
> + *
> + *     This should be able to cover all ARMv7-M cores.
> + */
> +__v7m_setup:
> +       @ Configure the vector table base address
> +       ldr     r0, =0xe000ed08         @ vector table base address

Here's the next case of using a 'magic constant register'

In this case, using the register name (say V7M_VTOR) also gives us the 
joy of being able to trivially search in the ARMARM to understand what's 
going on.

I'll stop pointing out the magic registers now, but there are a bunch 
more - I think a search of e000 should pick a lot of them up. (though 
the SVC and PendSV priorities below don't conform to that).

> +       ldr     r12, =vector_table
> +       str     r12, [r0]
> +
> +       @ Lower the priority of the SVC and PendSV exceptions
> +       ldr     r0, =0xe000ed1c
> +       mov     r5, #0x80000000
> +       str     r5, [r0]                @ set SVC priority
> +       ldr     r0, =0xe000ed20
> +       mov     r5, #0x00800000
> +       str     r5, [r0]                @ set PendSV priority
> +
> +       @ SVC to run the kernel in this mode
> +       adr     r0, BSYM(1f)
> +       ldr     r5, [r12, #11 * 4]      @ read the SVC vector entry
> +       str     r0, [r12, #11 * 4]      @ write the temporary SVC vector entry
> +       mov     r6, lr                  @ save LR
> +       mov     r7, sp                  @ save SP
> +       ldr     sp, =__v7m_setup_stack_top
> +       cpsie   i
> +       svc     #0
> +1:     cpsid   i
> +       str     r5, [r12, #11 * 4]      @ restore the original SVC vector entry
> +       mov     lr, r6                  @ restore LR
> +       mov     sp, r7                  @ restore SP
> +
> +       @ Special-purpose control register
> +       mov     r0, #1
> +       msr     control, r0             @ Thread mode has unpriviledged access
> +
> +       @ Configure the System Control Register
> +       ldr     r0, =0xe000ed14         @ system control register
> +       ldr     r12, [r0]
> +       orr     r12, #1 << 9            @ STKALIGN
> +       str     r12, [r0]
> +       mov     pc, lr
> +ENDPROC(__v7m_setup)
> +
> +       .align  2
> +       .type   v7m_processor_functions, #object
> +ENTRY(v7m_processor_functions)
> +       .word   nommu_early_abort
> +       .word   cpu_v7m_proc_init
> +       .word   cpu_v7m_proc_fin
> +       .word   cpu_v7m_reset
> +       .word   cpu_v7m_do_idle
> +       .word   cpu_v7m_dcache_clean_area
> +       .word   cpu_v7m_switch_mm
> +       .word   0                       @ cpu_v7m_set_pte_ext
> +       .word   legacy_pabort
> +       .size   v7m_processor_functions, . - v7m_processor_functions
> +
> +       .type   cpu_arch_name, #object
> +cpu_arch_name:
> +       .asciz  "armv7m"
> +       .size   cpu_arch_name, . - cpu_arch_name
> +
> +       .type   cpu_elf_name, #object
> +cpu_elf_name:
> +       .asciz  "v7m"
> +       .size   cpu_elf_name, . - cpu_elf_name
> +       .align
> +
> +       .section ".proc.info.init", #alloc, #execinstr
> +
> +       /*
> +        * Match any ARMv7-M processor core.
> +        */
> +       .type   __v7m_proc_info, #object
> +__v7m_proc_info:
> +       .long   0x000f0000              @ Required ID value
> +       .long   0x000f0000              @ Mask for ID
> +       .long   0                       @ proc_info_list.__cpu_mm_mmu_flags
> +       .long   0                       @ proc_info_list.__cpu_io_mmu_flags
> +       b       __v7m_setup             @ proc_info_list.__cpu_flush
> +       .long   cpu_arch_name
> +       .long   cpu_elf_name
> +       .long   HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
> +       .long   cpu_v7m_name
> +       .long   v7m_processor_functions @ proc_info_list.proc
> +       .long   0                       @ proc_info_list.tlb
> +       .long   0                       @ proc_info_list.user
> +       .long   0                       @ proc_info_list.cache

Surely here's where we could specify the nop_cache_fns for MULTI_CACHE?

> +       .size   __v7m_proc_info, . - __v7m_proc_info
> +
> +__v7m_setup_stack:
> +       .space  4 * 8                           @ 8 registers
> +__v7m_setup_stack_top:
> --

Hope that's helpful!

Jonny

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v8 3/3] Cortex-M3: Add support for exception handling
  2013-01-17  8:59 ` [PATCH v8 3/3] Cortex-M3: Add support for exception handling Uwe Kleine-König
@ 2013-01-24 18:21   ` Jonathan Austin
  0 siblings, 0 replies; 7+ messages in thread
From: Jonathan Austin @ 2013-01-24 18:21 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Uwe,

Here's the next bunch of comments on this series...

This patch, like the other one, introduces another lot of magic 
constants that I'd really rather have #defined as it is much cleaner to 
read.

Culprits for this are things in the 0xe000XXXX range, as well as 
bitmasks for certain parts of special registers.

Do we need something like arch/arm/include/asm/cp15.h for M?

The second major comment that arises from this patch is that the FP 
stuff looks untested/incomplete for M. I would prefer that there was no 
FP stuff (for example in this patch the optional skipping of the extra 
space in the FP stack-frame) and we added something later. For example, 
I imagine that when we come to implement FP for v7-M we might well find 
we don't want to use the larger stack-frame every anyway!

These comments, as well as a few related to the idea from the previous 
patch of deriving EXC_RETURN rather than storing it in pt_regs.

The final thing that I would like to think a bit more about is whether 
we can align our fast and slow exception exits better with what happens 
for A-class. At the moment we're quite different (and for good reason, 
too), but is there a way to map the M-case more closely on to the 
existing code?

On 17/01/13 08:59, Uwe Kleine-K?nig wrote:
> This patch implements the exception handling for the ARMv7-M
> architecture (pretty different from the A or R profiles).
>
> It bases on work done earlier by Catalin for 2.6.33 but was nearly
> completely rewritten to use a pt_regs layout compatible to the A
> profile.
>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Uwe Kleine-K?nig <u.kleine-koenig@pengutronix.de>
> ---
>   arch/arm/kernel/entry-common.S |    4 ++
>   arch/arm/kernel/entry-header.S |  148 ++++++++++++++++++++++++++++++++++++++++
>   arch/arm/kernel/entry-v7m.S    |  134 ++++++++++++++++++++++++++++++++++++
>   arch/arm/kernel/process.c      |    4 ++
>   arch/arm/kernel/ptrace.c       |    3 +
>   5 files changed, 293 insertions(+)
>   create mode 100644 arch/arm/kernel/entry-v7m.S
>
> diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
> index 3471175..48d8dc0 100644
> --- a/arch/arm/kernel/entry-common.S
> +++ b/arch/arm/kernel/entry-common.S
> @@ -339,6 +339,9 @@ ENDPROC(ftrace_stub)
>
>   	.align	5
>   ENTRY(vector_swi)
> +#ifdef CONFIG_CPU_V7M
> +	v7m_exception_entry
> +#else
>   	sub	sp, sp, #S_FRAME_SIZE
>   	stmia	sp, {r0 - r12}			@ Calling r0 - r12
>    ARM(	add	r8, sp, #S_PC		)
> @@ -349,6 +352,7 @@ ENTRY(vector_swi)
>   	str	lr, [sp, #S_PC]			@ Save calling PC
>   	str	r8, [sp, #S_PSR]		@ Save CPSR
>   	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
> +#endif
>   	zero_fp
>
>   	/*
> diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
> index 9a8531e..33d9900 100644
> --- a/arch/arm/kernel/entry-header.S
> +++ b/arch/arm/kernel/entry-header.S
> @@ -44,6 +44,145 @@
>   #endif
>   	.endm
>
> +#ifdef CONFIG_CPU_V7M
> +/*
> + * ARMv7-M exception entry/exit macros.
> + *
> + * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are
> + * automatically saved on the current stack (32 words) before
> + * switching to the exception stack (SP_main).
> + *
> + * If exception is taken while in user mode, SP_main is
> + * empty. Otherwise, SP_main is aligned to 64 bit automatically
> + * (CCR.STKALIGN set).
> + *
> + * Linux assumes that the interrupts are disabled when entering an
> + * exception handler and it may BUG if this is not the case. Interrupts
> + * are disabled during entry and reenabled in the exit macro.
> + *
> + * v7m_exception_fast_exit is used when returning from interrupts.
> + *
> + * v7m_exception_slow_exit is used when returning from SVC or PendSV.
> + * When returning to kernel mode, we don't return from exception.
> + */
> +	.macro	v7m_exception_entry
> +	@ determine the location of the registers saved by the core during
> +	@ exception entry. Depending on the mode the cpu was in when the
> +	@ exception happend that is either on the main or the process stack.
> +	@ Bit 2 of EXC_RETURN stored in the lr register specifies which stack
> +	@ was used.
> +	tst	lr, #0x4
> +	mrsne	r12, psp
> +	moveq	r12, sp
> +
> +	@ we cannot rely on r0-r3 and r12 matching the value saved in the
> +	@ exception frame because of tail-chaining. So these have to be
> +	@ reloaded.
> +	ldmia	r12!, {r0-r3}
> +
> +	@ Linux expects to have irqs off. Do it here before taking stack space
> +	cpsid	i
> +
> +	sub	sp, #S_FRAME_SIZE-S_IP
> +	stmdb	sp!, {r0-r11}
> +
> +	@ load saved r12, lr, return address and xPSR.
> +	@ r0-r7 are used for signals and never touched from now on. Clobbering
> +	@ r8-r12 is OK.
> +	mov	r9, r12
> +	ldmia	r9!, {r8, r10-r12}
> +
> +	@ calculate the original stack pointer value.
> +	@ r9 currently points to the memory location just above the auto saved
> +	@ xPSR. If the FP extension is implemented and bit 4 of EXC_RETURN is 0
> +	@ then space was allocated for FP state. That is space for 18 32-bit
> +	@ values. (If FP extension is unimplemented, bit 4 is 1.)
> +	@ Additionally the cpu might automatically 8-byte align the stack. Bit 9
> +	@ of the saved xPSR specifies if stack aligning took place. In this case
> +	@ another 32-bit value is included in the stack.
> +
> +	tst	lr, #0x10

Can we name these constants please as the bitmasks are architecturally 
defined. I think if we name the constants the code is more 'self 
documenting' ;)

> +	addeq	r9, r9, #576

Okay, this looks weird, and I don't think it gets tested in the code 
path for efm32 as there's not FP support there, correct?

Do we want to add 576 to r9? It looks more like we should be adding 
4*18, not 32*18...

I think I probably don't need to go on about them in this patch too, but 
this is another bit of magic that I'd like to see #defined. If this was

32*FP_STACK_SIZE

then this would be clearer.

> +
> +	tst	r12, 0x100
> +	addne	r9, r9, #4
> +
> +	@ store saved r12 using str to have a register to hold the base for stm
> +	str	r8, [sp, #S_IP]
> +	add	r8, sp, #S_SP
> +	@ store r13-r15, xPSR
> +	stmia	r8!, {r9-r12}
> +	@ store r0 once more and EXC_RETURN
> +	stmia	r8, {r0, lr}
> +	.endm
> +
> +	.macro	v7m_exception_fast_exit
> +	@ registers r0-r3 and r12 are automatically restored on exception
> +	@ return. r4-r7 were not clobbered in v7m_exception_entry so for
> +	@ correctness they don't need to be restored. So only r8-r11 must be
> +	@ restored here. The easiest way to do so is to restore r0-r7, too.
> +	ldmia	sp!, {r0-r11}
> +	add	sp, #S_FRAME_SIZE-S_IP
> +	cpsie	i
> +	bx	lr
> +	.endm

We don't set lr here - we seem to rely on whatever is calling 
v7m_fast_exit to do that.

Is that safe? So far it works because we only do a fast exit from a few 
locations, but as the port grows I worry about that being always true...

Secondly, I think we need a clrex somewhere in this path:

The ARM ARM states that if a strex is performed to any address other 
than the last one from which a ldrex was done AND the exclusive monitor 
is exclusive access state then it is implementation defined whether the 
store succeeds. In ARMv7-M this must be treated as a programming error.

I think without a clrex here we can provoke this:

        (A)        (b)
        ...        ...
     ldrex(a)
        ...
-------context switch--------
                  ...
                ldrex(b)
                  ...
-------context switch--------
     strex(a)


And I believe the same is required in the slow path, as this could be 
required after returning from a pend_sv or even when switching between 
kernel threads if we've got a pre-emptable kernel

And that leads me to think perhaps we should have some unified part of 
the exception return path?

> +
> +	.macro	v7m_exception_slow_exit ret_r0
> +	cpsid	i
> +	ldr	lr, [sp, #S_EXC_RET]	@ read exception LR0.11.22

So, if we chose to remove ARM_EXC_RET from pt_regs we could instead load 
S_PSR, and check if the IPSR part of this is zero to decide whether 
we're going back to handler or thread mode. Put the appropriate EXC_RET 
in to lr and carry on as usual...

(typed in editor/intended for explanation not compilation)
ldr	lr, [sp, #S_PSR]
ands	lr, lr, #IPSR_MASK
ldreq	lr, #ER_THREAD_PROCESS_BASIC
ldrne	lr, #ER_THREAD_HANDLER_BASIC

Where IPSR_MASK is 0x1FF (bits 0-8 of the xPSR)

> +	tst     lr, #0x8
> +	bne	1f			@ go to thread mode using exception return
> +
> +	/*
> +	 * return to kernel thread
> +	 * sp is already set up (and might be unset in pt_regs), so only
> +	 * restore r0-r12 and pc
> +	 */
> +	ldmia	sp, {r0-r12}
> +	ldr	lr, [sp, #S_PC]
> +	add	sp, sp, #S_FRAME_SIZE
> +	cpsie	i

As I mentioned above, this path needs a clrex too (or we should have 
some unified return path with a clrex)

> +	bx	lr
> +
> +1:	/*
> +	 * return to userspace
> +	 */
> +
> +	@ read original r12, sp, lr, pc and xPSR
> +	add	r12, sp, #S_IP
> +	ldmia	r12, {r1-r5}
> +
> +	@ handle stack aligning
> +	tst	r5, #0x100
> +	subne	r2, r2, #4
> +
> +	@ skip over stack space for fp saving
> +	tst	lr, #0x10
> +	subeq	r2, r2, #576
> +

This is another bit/byte error, I presume?

We could simplify this code quite a lot by enforcing that we only ever 
use the standard (basic) exception frame size...

Given the extra overhead of saving all the FP registers, what do you 
think about doing that in a lazy fashion, a bit like we do for A-class.

This way we could always disable FP on return to userspace, and we'd not 
need to worry about the different size frames?

> +	@ write basic exception frame
> +	stmdb	r2!, {r1, r3-r5}
> +	ldmia	sp, {r1, r3-r5}
> +	.if	\ret_r0
> +	stmdb	r2!, {r0, r3-r5}
> +	.else
> +	stmdb	r2!, {r1, r3-r5}
> +	.endif
> +
> +	@ restore process sp
> +	msr	psp, r2
> +
> +	@ restore original r4-r11
> +	ldmia	sp!, {r0-r11}

The comment and the code don't match here.

Didn't we just set up a stack frame so that the hardware would restore 
r0-r3 for us? I guess perhaps you're doing this to save an instruction 
but I had a quick chat with the hardware guys and they reckon that we're 
better to fix up sp with an add first, then ldmia fewer registers.

Is there something I'm missing about why you're written back r0-r3 too?

> +
> +	@ restore main sp
> +	add	sp, sp, #S_FRAME_SIZE-S_IP
> +
> +	cpsie	i
> +	bx	lr
> +	.endm
> +#endif	/* CONFIG_CPU_V7M */
> +
>   	@
>   	@ Store/load the USER SP and LR registers by switching to the SYS
>   	@ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not
> @@ -131,6 +270,14 @@
>   	rfeia	sp!
>   	.endm
>
> +#ifdef CONFIG_CPU_V7M
> +	.macro	restore_user_regs, fast = 0, offset = 0
> +	.if	\offset
> +	add	sp, #\offset
> +	.endif
> +	v7m_exception_slow_exit ret_r0 = \fast
> +	.endm
> +#else	/* !CONFIG_CPU_V7M */
>   	.macro	restore_user_regs, fast = 0, offset = 0
>   	clrex					@ clear the exclusive monitor
>   	mov	r2, sp
> @@ -147,6 +294,7 @@
>   	add	sp, sp, #S_FRAME_SIZE - S_SP
>   	movs	pc, lr				@ return & move spsr_svc into cpsr
>   	.endm
> +#endif	/* CONFIG_CPU_V7M */
>
>   	.macro	get_thread_info, rd
>   	mov	\rd, sp
> diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
> new file mode 100644
> index 0000000..842394c
> --- /dev/null
> +++ b/arch/arm/kernel/entry-v7m.S
> @@ -0,0 +1,134 @@
> +/*
> + * linux/arch/arm/kernel/entry-v7m.S
> + *
> + * Copyright (C) 2008 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * Low-level vector interface routines for the ARMv7-M architecture
> + */
> +#include <asm/memory.h>
> +#include <asm/glue.h>
> +#include <asm/thread_notify.h>
> +
> +#include <mach/entry-macro.S>
> +
> +#include "entry-header.S"
> +
> +#ifdef CONFIG_PREEMPT
> +#error "CONFIG_PREEMPT not supported on the current ARMv7M implementation"
> +#endif
> +#ifdef CONFIG_TRACE_IRQFLAGS
> +#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation"
> +#endif
> +
> +__invalid_entry:
> +	v7m_exception_entry
> +	adr	r0, strerr
> +	mrs	r1, ipsr
> +	mov	r2, lr
> +	bl	printk
> +	mov	r0, sp
> +	bl	show_regs
> +1:	b	1b
> +ENDPROC(__invalid_entry)
> +
> +strerr:	.asciz	"\nUnhandled exception: IPSR = %08lx LR = %08lx\n"
> +
> +	.align	2
> +__irq_entry:
> +	v7m_exception_entry
> +
> +	@
> +	@ Invoke the IRQ handler
> +	@
> +	mrs	r0, ipsr
> +	and	r0, #0xff
> +	sub	r0, #16			@ IRQ number
> +	mov	r1, sp
> +	@ routine called with r0 = irq number, r1 = struct pt_regs *
> +	bl	asm_do_IRQ
> +
> +	@
> +	@ Check for any pending work if returning to user
> +	@
> +	ldr	lr, [sp, #S_EXC_RET]

Same kind of change as I describe above might work here, too.

It also struck me as a bit weird that we do this in the __irq_entry 
*not* in the fast exit path. What's the reasoning behind that. I know at 
the moment that the only user of the fast_exit is the irq handler, but 
is that always going to be true?


> +	tst	lr, #0x8		@ check the return stack
> +	beq	2f			@ returning to handler mode
> +	get_thread_info tsk
> +	ldr	r1, [tsk, #TI_FLAGS]
> +	tst	r1, #_TIF_WORK_MASK
> +	beq	2f			@ no work pending
> +	ldr	r1, =0xe000ed04		@ ICSR
> +	mov	r0, #1 << 28		@ ICSR.PENDSVSET
> +	str	r0, [r1]		@ raise PendSV
> +
> +2:
> +	v7m_exception_fast_exit

At the moment this is the only place that we use fast_exit, and I wonder 
if there is scope to inline *some* of fast_exit here and split the rest 
of it off in to a common exit path.

For example

fast_exit becomes
* new stuff at the end of __irq_entry
* common_exit

slow_exit becomes
* slow_exit
* common_exit

But if you have plans to use v7m_exception_fast_exit more broadly in the 
future then disregard this...

> +ENDPROC(__irq_entry)
> +
> +__pendsv_entry:
> +	v7m_exception_entry
> +
> +	ldr	r1, =0xe000ed04		@ ICSR
> +	mov	r0, #1 << 27		@ ICSR.PENDSVCLR
> +	str	r0, [r1]		@ clear PendSV
> +
> +	@ execute the pending work, including reschedule
> +	get_thread_info tsk
> +	mov	why, #0
> +	b	ret_to_user
> +ENDPROC(__pendsv_entry)
> +
> +/*
> + * Register switch for ARMv7-M processors.
> + * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
> + * previous and next are guaranteed not to be the same.
> + */
> +ENTRY(__switch_to)
> +	.fnstart
> +	.cantunwind
> +	add	ip, r1, #TI_CPU_SAVE

Out of curiosity, why ip here not r12 as later in the code?

> +	stmia	ip!, {r4 - r11}		@ Store most regs on stack
> +	str	sp, [ip], #4
> +	str	lr, [ip], #4
> +	mov	r5, r0
> +	add	r4, r2, #TI_CPU_SAVE
> +	ldr	r0, =thread_notify_head
> +	mov	r1, #THREAD_NOTIFY_SWITCH
> +	bl	atomic_notifier_call_chain
> +	mov	ip, r4
> +	mov	r0, r5
> +	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
> +	ldr	sp, [ip]
> +	ldr	pc, [ip, #4]!
> +	.fnend
> +ENDPROC(__switch_to)
> +
> +	.data
> +	.align	8
> +/*
> + * Vector table (64 words => 256 bytes natural alignment)
> + */
> +ENTRY(vector_table)
> +	.long	0			@ 0 - Reset stack pointer
> +	.long	__invalid_entry		@ 1 - Reset
> +	.long	__invalid_entry		@ 2 - NMI
> +	.long	__invalid_entry		@ 3 - HardFault
> +	.long	__invalid_entry		@ 4 - MemManage
> +	.long	__invalid_entry		@ 5 - BusFault
> +	.long	__invalid_entry		@ 6 - UsageFault
> +	.long	__invalid_entry		@ 7 - Reserved
> +	.long	__invalid_entry		@ 8 - Reserved
> +	.long	__invalid_entry		@ 9 - Reserved
> +	.long	__invalid_entry		@ 10 - Reserved
> +	.long	vector_swi		@ 11 - SVCall
> +	.long	__invalid_entry		@ 12 - Debug Monitor
> +	.long	__invalid_entry		@ 13 - Reserved
> +	.long	__pendsv_entry		@ 14 - PendSV
> +	.long	__invalid_entry		@ 15 - SysTick
> +	.rept	64 - 16
> +	.long	__irq_entry		@ 16..64 - External Interrupts
> +	.endr
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 90084a6..3d745d4 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -387,6 +387,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   		*childregs = *regs;
>   		childregs->ARM_r0 = 0;
>   		childregs->ARM_sp = stack_start;
> +#if defined CONFIG_CPU_V7M
> +		/* Return to Thread mode with Process stack */
> +		childregs->ARM_EXC_RET = 0xfffffffdUL;
> +#endif

Another justification for deriving this instead of 
saving/storing/copying it - we could remove another #ifdef block.

>   	} else {
>   		memset(childregs, 0, sizeof(struct pt_regs));
>   		thread->cpu_context.r4 = stk_sz;
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 739db3a..55df1d5 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -87,6 +87,9 @@ static const struct pt_regs_offset regoffset_table[] = {
>   	REG_OFFSET_NAME(pc),
>   	REG_OFFSET_NAME(cpsr),
>   	REG_OFFSET_NAME(ORIG_r0),
> +#ifdef CONFIG_CPU_V7M
> +	REG_OFFSET_NAME(EXC_RET),
> +#endif
>   	REG_OFFSET_END,
>   };
>
>

Hope you find these comments useful!

Jonny

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v8 0/3] Cortex-M3 series once more
  2013-01-17  8:59 [PATCH v8 0/3] Cortex-M3 series once more Uwe Kleine-König
                   ` (2 preceding siblings ...)
  2013-01-17  8:59 ` [PATCH v8 3/3] Cortex-M3: Add support for exception handling Uwe Kleine-König
@ 2013-01-24 18:46 ` Jonathan Austin
  3 siblings, 0 replies; 7+ messages in thread
From: Jonathan Austin @ 2013-01-24 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Uwe,

On 17/01/13 08:59, Uwe Kleine-K?nig wrote:
> Based on Jonny's comments I changed a few more details. The diff to v7
> can be found below after the diffstat. These are:
>
>   - describe why a #define for cr_alignment is OK in both the
>     commit log and a comment in the header file
>   - a work around for errata 752419 that newer gcc warns about
>   - add a few more comments for better readability
>   - a whitespace fix
>

Thanks for making these changes.

As you will see I've now reviewed the other to patches in the series and 
there are quite a few smaller things in there...

> I think the updates are minor enough to still allow the patches to go
> into v3.9-rc1. Russell, what do you think? You can pull them from
>

I think we should assess this once you've responded to the review of the 
other two patches.

As far as merging these patches go, as well as the review comments on 
each patch, I still have the same reservations that I raised around v6 
of these about the fact that NVIC support and a platform are missing. 
This means there's no way to actually *use* the V7M code. I understand 
that you're prepared to commit to getting this in shape and ready to be 
merged once the base support goes in - is that right?

What sort of time-frame do you expect to be able to do this in?

I'm happy to help review/work on NVIC support if you need more eyes (and 
perhaps even minds!) on the problem, as well as test on the efm32 board.

> 	git://git.pengutronix.de/git/ukl/linux.git for-next
>
> still based on v3.7-rc1 + your commit "ARM: fix oops on initial entry to
> userspace with Thumb2 kernels".

Does it trivially re-base to v3.8-rc4?

>
> Catalin Marinas (1):
>    Cortex-M3: Add base support for Cortex-M3
>
> Uwe Kleine-K?nig (2):
>    ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15
>    Cortex-M3: Add support for exception handling
>
>   arch/arm/include/asm/assembler.h   |   13 ++-
>   arch/arm/include/asm/cp15.h        |   16 +++-
>   arch/arm/include/asm/cputype.h     |    3 +
>   arch/arm/include/asm/glue-cache.h  |   25 ++++++
>   arch/arm/include/asm/glue-df.h     |    8 ++
>   arch/arm/include/asm/glue-proc.h   |    9 ++
>   arch/arm/include/asm/irqflags.h    |   22 +++--
>   arch/arm/include/asm/processor.h   |    7 ++
>   arch/arm/include/asm/ptrace.h      |    8 ++
>   arch/arm/include/asm/system_info.h |    1 +
>   arch/arm/include/uapi/asm/ptrace.h |   36 ++++++--
>   arch/arm/kernel/asm-offsets.c      |    3 +
>   arch/arm/kernel/entry-common.S     |    4 +
>   arch/arm/kernel/entry-header.S     |  148 +++++++++++++++++++++++++++++++++
>   arch/arm/kernel/entry-v7m.S        |  134 ++++++++++++++++++++++++++++++
>   arch/arm/kernel/head-common.S      |    9 +-
>   arch/arm/kernel/head-nommu.S       |    9 +-
>   arch/arm/kernel/process.c          |    4 +
>   arch/arm/kernel/ptrace.c           |    3 +
>   arch/arm/kernel/setup.c            |   13 ++-
>   arch/arm/kernel/traps.c            |    2 +
>   arch/arm/mm/alignment.c            |    2 +
>   arch/arm/mm/mmu.c                  |   17 ++++
>   arch/arm/mm/nommu.c                |    2 +
>   arch/arm/mm/proc-v7m.S             |  161 ++++++++++++++++++++++++++++++++++++
>   25 files changed, 637 insertions(+), 22 deletions(-)
>   create mode 100644 arch/arm/kernel/entry-v7m.S
>   create mode 100644 arch/arm/mm/proc-v7m.S
>
> The incremental changes since v7 are:
>
> diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
> index d814435..1f3262e 100644
> --- a/arch/arm/include/asm/cp15.h
> +++ b/arch/arm/include/asm/cp15.h
> @@ -86,6 +86,11 @@ static inline void set_copro_access(unsigned int val)
>
>   #else /* ifdef CONFIG_CPU_CP15 */
>
> +/*
> + * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the
> + * minds of the developers). Yielding 0 for machines without a cp15 (and making
> + * it read-only) is fine for most cases and saves quite some #ifdeffery.
> + */
>   #define cr_no_alignment	UL(0)
>   #define cr_alignment	UL(0)
>
> diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
> index 2ae7d1b..d3be66e 100644
> --- a/arch/arm/include/uapi/asm/ptrace.h
> +++ b/arch/arm/include/uapi/asm/ptrace.h
> @@ -144,7 +144,7 @@ struct pt_regs {
>   #define ARM_r1		uregs[1]
>   #define ARM_r0		uregs[0]
>   #define ARM_ORIG_r0	uregs[17]
> -#define ARM_EXC_RET    uregs[18]
> +#define ARM_EXC_RET	uregs[18]
>
>   /*
>    * The size of the user-visible VFP state as seen by PTRACE_GET/SETVFPREGS
> diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
> index a0991dc..842394c 100644
> --- a/arch/arm/kernel/entry-v7m.S
> +++ b/arch/arm/kernel/entry-v7m.S
> @@ -102,8 +102,8 @@ ENTRY(__switch_to)
>   	mov	ip, r4
>   	mov	r0, r5
>   	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
> -	ldr	sp, [ip], #4
> -	ldr	pc, [ip]
> +	ldr	sp, [ip]
> +	ldr	pc, [ip, #4]!
>   	.fnend
>   ENDPROC(__switch_to)
>
> diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
> index 2f560c5..5b391a6 100644
> --- a/arch/arm/kernel/head-common.S
> +++ b/arch/arm/kernel/head-common.S
> @@ -117,7 +117,7 @@ __mmap_switched_data:
>   #ifdef CONFIG_CPU_CP15
>   	.long	cr_alignment			@ r7
>   #else
> -	.long	0
> +	.long	0				@ r7
>   #endif
>   	.long	init_thread_union + THREAD_START_SP @ sp
>   	.size	__mmap_switched_data, . - __mmap_switched_data
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index b675918..92abdb8 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -196,7 +196,7 @@ void adjust_cr(unsigned long mask, unsigned long set)
>   }
>   #endif
>
> -#else
> +#else /* ifdef CONFIG_CPU_CP15 */
>
>   static int __init early_cachepolicy(char *p)
>   {
> @@ -210,7 +210,7 @@ static int __init noalign_setup(char *__unused)
>   }
>   __setup("noalign", noalign_setup);
>
> -#endif
> +#endif /* ifdef CONFIG_CPU_CP15 / else */
>
>   #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
>   #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-01-24 18:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-17  8:59 [PATCH v8 0/3] Cortex-M3 series once more Uwe Kleine-König
2013-01-17  8:59 ` [PATCH v8 1/3] ARM: make cr_alignment read-only #ifndef CONFIG_CPU_CP15 Uwe Kleine-König
2013-01-17  8:59 ` [PATCH v8 2/3] Cortex-M3: Add base support for Cortex-M3 Uwe Kleine-König
2013-01-18 18:23   ` Jonathan Austin
2013-01-17  8:59 ` [PATCH v8 3/3] Cortex-M3: Add support for exception handling Uwe Kleine-König
2013-01-24 18:21   ` Jonathan Austin
2013-01-24 18:46 ` [PATCH v8 0/3] Cortex-M3 series once more Jonathan Austin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).