* [PATCH 00/15] Initial MSA support
@ 2014-01-27 15:22 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:22 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This series introduces initial support for the MIPS SIMD Architecture
(MSA) ASE introduced with MIPSr5. This support allows for MSA being
detected & enabled for tasks which use it, and for vector registers
(which are aliased with FP registers) to be context switched. MSA
implementations with vector register partitioning are not handled since
none currently exist, and a mechanism for exposing the vector registers
via ptrace remains to be added later.
The series applies atop the current mips-for-linux-next branch. An
earlier version of the first patch has been sent to the list before, but
is now included in this series to make its intent clearer.
Paul Burton (15):
mips: simplify FP context access
mips: update outdated comment
mips: move & rename fpu_emulator_{save,restore}_context
mips: don't require FPU on sigcontext setup/restore
mips: replace hardcoded 32 with NUM_FPU_REGS in ptrace
mips: clear upper bits of FP registers on emulator writes
mips: don't assume 64-bit FP registers for dump_{,task_}fpu
mips: don't assume 64-bit FP registers for FP regset
mips: don't assume 64-bit FP registers for context switch
mips: add MSA register definitions & access
mips: detect the MSA ASE
mips: basic MSA context switching support
mips: dumb MSA FP exception handler
mips: panic if vector register partitioning is implemented
mips: save/restore MSA context around signals
arch/mips/Kconfig | 20 ++
arch/mips/Makefile | 5 +
arch/mips/include/asm/asmmacro-32.h | 128 ++++++-------
arch/mips/include/asm/asmmacro.h | 319 +++++++++++++++++++++++++-------
arch/mips/include/asm/cpu-features.h | 6 +
arch/mips/include/asm/cpu-info.h | 1 +
arch/mips/include/asm/cpu.h | 1 +
arch/mips/include/asm/fpu.h | 2 +-
arch/mips/include/asm/mipsregs.h | 1 +
arch/mips/include/asm/msa.h | 199 ++++++++++++++++++++
arch/mips/include/asm/processor.h | 45 ++++-
arch/mips/include/asm/sigcontext.h | 2 +
arch/mips/include/asm/switch_to.h | 22 ++-
arch/mips/include/asm/thread_info.h | 4 +
arch/mips/include/uapi/asm/sigcontext.h | 8 +
arch/mips/kernel/asm-offsets.c | 69 +++++++
arch/mips/kernel/cpu-probe.c | 26 +++
arch/mips/kernel/genex.S | 2 +
arch/mips/kernel/proc.c | 1 +
arch/mips/kernel/process.c | 23 ++-
arch/mips/kernel/ptrace.c | 85 ++++++---
arch/mips/kernel/ptrace32.c | 25 +--
arch/mips/kernel/r4k_fpu.S | 213 +++++++++++++++++++++
arch/mips/kernel/r4k_switch.S | 58 ++++--
arch/mips/kernel/signal.c | 136 +++++++++++---
arch/mips/kernel/signal32.c | 134 ++++++++++++--
arch/mips/kernel/traps.c | 113 ++++++++++-
arch/mips/math-emu/cp1emu.c | 51 +++--
arch/mips/math-emu/kernel_linkage.c | 76 +-------
29 files changed, 1433 insertions(+), 342 deletions(-)
create mode 100644 arch/mips/include/asm/msa.h
--
1.8.5.3
^ permalink raw reply [flat|nested] 52+ messages in thread* [PATCH v2 01/15] mips: simplify FP context access
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch replaces the fpureg_t typedef with a "union fpureg" enabling
easier access to 32 & 64 bit values. This allows the access macros used
in cp1emu.c to be simplified somewhat. It will also make it easier to
expand the width of the FP registers as will be done in a future
patch in order to support the 128 bit registers introduced with MSA.
No behavioural change is intended by this patch.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
Changes in v2:
- Remove extraneous braces in SIFROMREG, SITOREG macros.
---
arch/mips/include/asm/fpu.h | 2 +-
arch/mips/include/asm/processor.h | 31 ++++++++++++++++++++++++++---
arch/mips/kernel/ptrace.c | 39 ++++++++++++++++---------------------
arch/mips/kernel/ptrace32.c | 25 ++++++++----------------
arch/mips/math-emu/cp1emu.c | 36 +++++++++++++++++++++-------------
arch/mips/math-emu/kernel_linkage.c | 21 +++++++++++---------
6 files changed, 88 insertions(+), 66 deletions(-)
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index cfe092f..8d57b71 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -178,7 +178,7 @@ static inline void restore_fp(struct task_struct *tsk)
_restore_fp(tsk);
}
-static inline fpureg_t *get_fpu_regs(struct task_struct *tsk)
+static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
{
if (tsk == current) {
preempt_disable();
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 3605b84..49a61be 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,8 +96,33 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
+#define FPU_REG_WIDTH 64
-typedef __u64 fpureg_t;
+union fpureg {
+ __u32 val32[FPU_REG_WIDTH / 32];
+ __u64 val64[FPU_REG_WIDTH / 64];
+};
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# define FPR_IDX(width, idx) (idx)
+#else
+# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+#endif
+
+#define BUILD_FPR_ACCESS(width) \
+static inline u##width get_fpr##width(union fpureg *fpr, unsigned idx) \
+{ \
+ return fpr->val##width[FPR_IDX(width, idx)]; \
+} \
+ \
+static inline void set_fpr##width(union fpureg *fpr, unsigned idx, \
+ u##width val) \
+{ \
+ fpr->val##width[FPR_IDX(width, idx)] = val; \
+}
+
+BUILD_FPR_ACCESS(32)
+BUILD_FPR_ACCESS(64)
/*
* It would be nice to add some more fields for emulator statistics, but there
@@ -107,7 +132,7 @@ typedef __u64 fpureg_t;
*/
struct mips_fpu_struct {
- fpureg_t fpr[NUM_FPU_REGS];
+ union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
};
@@ -284,7 +309,7 @@ struct thread_struct {
* Saved FPU/FPU emulator stuff \
*/ \
.fpu = { \
- .fpr = {0,}, \
+ .fpr = {{{0,},},}, \
.fcr31 = 0, \
}, \
/* \
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 7da9b76..624773e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -120,9 +120,10 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
return -EIO;
if (tsk_used_math(child)) {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
for (i = 0; i < 32; i++)
- __put_user(fregs[i], i + (__u64 __user *) data);
+ __put_user(get_fpr64(&fregs[i], 0),
+ i + (__u64 __user *)data);
} else {
for (i = 0; i < 32; i++)
__put_user((__u64) -1, i + (__u64 __user *) data);
@@ -158,7 +159,8 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
{
- fpureg_t *fregs;
+ union fpureg *fregs;
+ u64 fpr_val;
int i;
if (!access_ok(VERIFY_READ, data, 33 * 8))
@@ -166,8 +168,10 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
fregs = get_fpu_regs(child);
- for (i = 0; i < 32; i++)
- __get_user(fregs[i], i + (__u64 __user *) data);
+ for (i = 0; i < 32; i++) {
+ __get_user(fpr_val, i + (__u64 __user *)data);
+ set_fpr64(&fregs[i], 0, fpr_val);
+ }
__get_user(child->thread.fpu.fcr31, data + 64);
@@ -408,7 +412,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned long tmp = 0;
regs = task_pt_regs(child);
@@ -433,14 +437,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
#endif
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -548,7 +550,7 @@ long arch_ptrace(struct task_struct *child, long request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -563,19 +565,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
#endif
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index b8aa2dd..c394d8f 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -80,7 +80,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned int tmp;
regs = task_pt_regs(child);
@@ -103,13 +103,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -233,7 +231,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -247,18 +245,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 506925b..9144842 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -876,20 +876,28 @@ static inline int cop1_64bit(struct pt_regs *xcp)
#endif
}
-#define SIFROMREG(si, x) ((si) = cop1_64bit(xcp) || !(x & 1) ? \
- (int)ctx->fpr[x] : (int)(ctx->fpr[x & ~1] >> 32))
-
-#define SITOREG(si, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = \
- cop1_64bit(xcp) || !(x & 1) ? \
- ctx->fpr[x & ~1] >> 32 << 32 | (u32)(si) : \
- ctx->fpr[x & ~1] << 32 >> 32 | (u64)(si) << 32)
-
-#define SIFROMHREG(si, x) ((si) = (int)(ctx->fpr[x] >> 32))
-#define SITOHREG(si, x) (ctx->fpr[x] = \
- ctx->fpr[x] << 32 >> 32 | (u64)(si) << 32)
-
-#define DIFROMREG(di, x) ((di) = ctx->fpr[x & ~(cop1_64bit(xcp) == 0)])
-#define DITOREG(di, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = (di))
+#define SIFROMREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ (si) = get_fpr32(&ctx->fpr[x], 0); \
+ else \
+ (si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \
+} while (0)
+
+#define SITOREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ set_fpr32(&ctx->fpr[x], 0, si); \
+ else \
+ set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+} while (0)
+
+#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
+#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
+
+#define DIFROMREG(di, x) \
+ ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+
+#define DITOREG(di, x) \
+ set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 3aeae07..9b46213 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -40,9 +40,8 @@ void fpu_emulator_init_fpu(void)
}
current->thread.fpu.fcr31 = 0;
- for (i = 0; i < 32; i++) {
- current->thread.fpu.fpr[i] = SIGNALLING_NAN;
- }
+ for (i = 0; i < 32; i++)
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
}
@@ -59,7 +58,8 @@ int fpu_emulator_save_context(struct sigcontext __user *sc)
for (i = 0; i < 32; i++) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -70,10 +70,11 @@ int fpu_emulator_restore_context(struct sigcontext __user *sc)
{
int i;
int err = 0;
+ u64 fpr_val;
for (i = 0; i < 32; i++) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -93,7 +94,8 @@ int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
for (i = 0; i < 32; i += inc) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -105,10 +107,11 @@ int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
int i;
int err = 0;
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+ u64 fpr_val;
for (i = 0; i < 32; i += inc) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH v2 01/15] mips: simplify FP context access
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch replaces the fpureg_t typedef with a "union fpureg" enabling
easier access to 32 & 64 bit values. This allows the access macros used
in cp1emu.c to be simplified somewhat. It will also make it easier to
expand the width of the FP registers as will be done in a future
patch in order to support the 128 bit registers introduced with MSA.
No behavioural change is intended by this patch.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
Changes in v2:
- Remove extraneous braces in SIFROMREG, SITOREG macros.
---
arch/mips/include/asm/fpu.h | 2 +-
arch/mips/include/asm/processor.h | 31 ++++++++++++++++++++++++++---
arch/mips/kernel/ptrace.c | 39 ++++++++++++++++---------------------
arch/mips/kernel/ptrace32.c | 25 ++++++++----------------
arch/mips/math-emu/cp1emu.c | 36 +++++++++++++++++++++-------------
arch/mips/math-emu/kernel_linkage.c | 21 +++++++++++---------
6 files changed, 88 insertions(+), 66 deletions(-)
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index cfe092f..8d57b71 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -178,7 +178,7 @@ static inline void restore_fp(struct task_struct *tsk)
_restore_fp(tsk);
}
-static inline fpureg_t *get_fpu_regs(struct task_struct *tsk)
+static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
{
if (tsk == current) {
preempt_disable();
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 3605b84..49a61be 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,8 +96,33 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
+#define FPU_REG_WIDTH 64
-typedef __u64 fpureg_t;
+union fpureg {
+ __u32 val32[FPU_REG_WIDTH / 32];
+ __u64 val64[FPU_REG_WIDTH / 64];
+};
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# define FPR_IDX(width, idx) (idx)
+#else
+# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+#endif
+
+#define BUILD_FPR_ACCESS(width) \
+static inline u##width get_fpr##width(union fpureg *fpr, unsigned idx) \
+{ \
+ return fpr->val##width[FPR_IDX(width, idx)]; \
+} \
+ \
+static inline void set_fpr##width(union fpureg *fpr, unsigned idx, \
+ u##width val) \
+{ \
+ fpr->val##width[FPR_IDX(width, idx)] = val; \
+}
+
+BUILD_FPR_ACCESS(32)
+BUILD_FPR_ACCESS(64)
/*
* It would be nice to add some more fields for emulator statistics, but there
@@ -107,7 +132,7 @@ typedef __u64 fpureg_t;
*/
struct mips_fpu_struct {
- fpureg_t fpr[NUM_FPU_REGS];
+ union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
};
@@ -284,7 +309,7 @@ struct thread_struct {
* Saved FPU/FPU emulator stuff \
*/ \
.fpu = { \
- .fpr = {0,}, \
+ .fpr = {{{0,},},}, \
.fcr31 = 0, \
}, \
/* \
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 7da9b76..624773e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -120,9 +120,10 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
return -EIO;
if (tsk_used_math(child)) {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
for (i = 0; i < 32; i++)
- __put_user(fregs[i], i + (__u64 __user *) data);
+ __put_user(get_fpr64(&fregs[i], 0),
+ i + (__u64 __user *)data);
} else {
for (i = 0; i < 32; i++)
__put_user((__u64) -1, i + (__u64 __user *) data);
@@ -158,7 +159,8 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
{
- fpureg_t *fregs;
+ union fpureg *fregs;
+ u64 fpr_val;
int i;
if (!access_ok(VERIFY_READ, data, 33 * 8))
@@ -166,8 +168,10 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
fregs = get_fpu_regs(child);
- for (i = 0; i < 32; i++)
- __get_user(fregs[i], i + (__u64 __user *) data);
+ for (i = 0; i < 32; i++) {
+ __get_user(fpr_val, i + (__u64 __user *)data);
+ set_fpr64(&fregs[i], 0, fpr_val);
+ }
__get_user(child->thread.fpu.fcr31, data + 64);
@@ -408,7 +412,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned long tmp = 0;
regs = task_pt_regs(child);
@@ -433,14 +437,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
#endif
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -548,7 +550,7 @@ long arch_ptrace(struct task_struct *child, long request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -563,19 +565,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
#endif
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index b8aa2dd..c394d8f 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -80,7 +80,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned int tmp;
regs = task_pt_regs(child);
@@ -103,13 +103,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -233,7 +231,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -247,18 +245,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 506925b..9144842 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -876,20 +876,28 @@ static inline int cop1_64bit(struct pt_regs *xcp)
#endif
}
-#define SIFROMREG(si, x) ((si) = cop1_64bit(xcp) || !(x & 1) ? \
- (int)ctx->fpr[x] : (int)(ctx->fpr[x & ~1] >> 32))
-
-#define SITOREG(si, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = \
- cop1_64bit(xcp) || !(x & 1) ? \
- ctx->fpr[x & ~1] >> 32 << 32 | (u32)(si) : \
- ctx->fpr[x & ~1] << 32 >> 32 | (u64)(si) << 32)
-
-#define SIFROMHREG(si, x) ((si) = (int)(ctx->fpr[x] >> 32))
-#define SITOHREG(si, x) (ctx->fpr[x] = \
- ctx->fpr[x] << 32 >> 32 | (u64)(si) << 32)
-
-#define DIFROMREG(di, x) ((di) = ctx->fpr[x & ~(cop1_64bit(xcp) == 0)])
-#define DITOREG(di, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = (di))
+#define SIFROMREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ (si) = get_fpr32(&ctx->fpr[x], 0); \
+ else \
+ (si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \
+} while (0)
+
+#define SITOREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ set_fpr32(&ctx->fpr[x], 0, si); \
+ else \
+ set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+} while (0)
+
+#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
+#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
+
+#define DIFROMREG(di, x) \
+ ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+
+#define DITOREG(di, x) \
+ set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 3aeae07..9b46213 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -40,9 +40,8 @@ void fpu_emulator_init_fpu(void)
}
current->thread.fpu.fcr31 = 0;
- for (i = 0; i < 32; i++) {
- current->thread.fpu.fpr[i] = SIGNALLING_NAN;
- }
+ for (i = 0; i < 32; i++)
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
}
@@ -59,7 +58,8 @@ int fpu_emulator_save_context(struct sigcontext __user *sc)
for (i = 0; i < 32; i++) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -70,10 +70,11 @@ int fpu_emulator_restore_context(struct sigcontext __user *sc)
{
int i;
int err = 0;
+ u64 fpr_val;
for (i = 0; i < 32; i++) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -93,7 +94,8 @@ int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
for (i = 0; i < 32; i += inc) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -105,10 +107,11 @@ int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
int i;
int err = 0;
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+ u64 fpr_val;
for (i = 0; i < 32; i += inc) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH v3 01/15] mips: simplify FP context access
@ 2014-02-13 11:26 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-02-13 11:26 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton, Ralf Baechle
This patch replaces the fpureg_t typedef with a "union fpureg" enabling
easier access to 32 & 64 bit values. This allows the access macros used
in cp1emu.c to be simplified somewhat. It will also make it easier to
expand the width of the FP registers as will be done in a future
patch in order to support the 128 bit registers introduced with MSA.
No behavioural change is intended by this patch.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
---
Changes in v3:
- Fix the l_fmt case for __mips64 builds in fpu_emu.
Changes in v2:
- Remove extraneous braces in SIFROMREG, SITOREG macros.
---
arch/mips/include/asm/fpu.h | 2 +-
arch/mips/include/asm/processor.h | 31 ++++++++++++++++++++++++++---
arch/mips/kernel/ptrace.c | 39 ++++++++++++++++---------------------
arch/mips/kernel/ptrace32.c | 25 ++++++++----------------
arch/mips/math-emu/cp1emu.c | 37 ++++++++++++++++++++++-------------
arch/mips/math-emu/kernel_linkage.c | 21 +++++++++++---------
6 files changed, 90 insertions(+), 65 deletions(-)
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index cfe092f..8d57b71 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -178,7 +178,7 @@ static inline void restore_fp(struct task_struct *tsk)
_restore_fp(tsk);
}
-static inline fpureg_t *get_fpu_regs(struct task_struct *tsk)
+static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
{
if (tsk == current) {
preempt_disable();
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 3605b84..49a61be 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,8 +96,33 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
+#define FPU_REG_WIDTH 64
-typedef __u64 fpureg_t;
+union fpureg {
+ __u32 val32[FPU_REG_WIDTH / 32];
+ __u64 val64[FPU_REG_WIDTH / 64];
+};
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# define FPR_IDX(width, idx) (idx)
+#else
+# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+#endif
+
+#define BUILD_FPR_ACCESS(width) \
+static inline u##width get_fpr##width(union fpureg *fpr, unsigned idx) \
+{ \
+ return fpr->val##width[FPR_IDX(width, idx)]; \
+} \
+ \
+static inline void set_fpr##width(union fpureg *fpr, unsigned idx, \
+ u##width val) \
+{ \
+ fpr->val##width[FPR_IDX(width, idx)] = val; \
+}
+
+BUILD_FPR_ACCESS(32)
+BUILD_FPR_ACCESS(64)
/*
* It would be nice to add some more fields for emulator statistics, but there
@@ -107,7 +132,7 @@ typedef __u64 fpureg_t;
*/
struct mips_fpu_struct {
- fpureg_t fpr[NUM_FPU_REGS];
+ union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
};
@@ -284,7 +309,7 @@ struct thread_struct {
* Saved FPU/FPU emulator stuff \
*/ \
.fpu = { \
- .fpr = {0,}, \
+ .fpr = {{{0,},},}, \
.fcr31 = 0, \
}, \
/* \
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 7da9b76..624773e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -120,9 +120,10 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
return -EIO;
if (tsk_used_math(child)) {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
for (i = 0; i < 32; i++)
- __put_user(fregs[i], i + (__u64 __user *) data);
+ __put_user(get_fpr64(&fregs[i], 0),
+ i + (__u64 __user *)data);
} else {
for (i = 0; i < 32; i++)
__put_user((__u64) -1, i + (__u64 __user *) data);
@@ -158,7 +159,8 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
{
- fpureg_t *fregs;
+ union fpureg *fregs;
+ u64 fpr_val;
int i;
if (!access_ok(VERIFY_READ, data, 33 * 8))
@@ -166,8 +168,10 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
fregs = get_fpu_regs(child);
- for (i = 0; i < 32; i++)
- __get_user(fregs[i], i + (__u64 __user *) data);
+ for (i = 0; i < 32; i++) {
+ __get_user(fpr_val, i + (__u64 __user *)data);
+ set_fpr64(&fregs[i], 0, fpr_val);
+ }
__get_user(child->thread.fpu.fcr31, data + 64);
@@ -408,7 +412,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned long tmp = 0;
regs = task_pt_regs(child);
@@ -433,14 +437,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
#endif
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -548,7 +550,7 @@ long arch_ptrace(struct task_struct *child, long request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -563,19 +565,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
#endif
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index b8aa2dd..c394d8f 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -80,7 +80,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned int tmp;
regs = task_pt_regs(child);
@@ -103,13 +103,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -233,7 +231,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -247,18 +245,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 506925b..196cf1a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -876,20 +876,28 @@ static inline int cop1_64bit(struct pt_regs *xcp)
#endif
}
-#define SIFROMREG(si, x) ((si) = cop1_64bit(xcp) || !(x & 1) ? \
- (int)ctx->fpr[x] : (int)(ctx->fpr[x & ~1] >> 32))
+#define SIFROMREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ (si) = get_fpr32(&ctx->fpr[x], 0); \
+ else \
+ (si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \
+} while (0)
-#define SITOREG(si, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = \
- cop1_64bit(xcp) || !(x & 1) ? \
- ctx->fpr[x & ~1] >> 32 << 32 | (u32)(si) : \
- ctx->fpr[x & ~1] << 32 >> 32 | (u64)(si) << 32)
+#define SITOREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ set_fpr32(&ctx->fpr[x], 0, si); \
+ else \
+ set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+} while (0)
-#define SIFROMHREG(si, x) ((si) = (int)(ctx->fpr[x] >> 32))
-#define SITOHREG(si, x) (ctx->fpr[x] = \
- ctx->fpr[x] << 32 >> 32 | (u64)(si) << 32)
+#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
+#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
-#define DIFROMREG(di, x) ((di) = ctx->fpr[x & ~(cop1_64bit(xcp) == 0)])
-#define DITOREG(di, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = (di))
+#define DIFROMREG(di, x) \
+ ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+
+#define DITOREG(di, x) \
+ set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
@@ -1960,15 +1968,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
#if defined(__mips64)
case l_fmt:{
+ u64 bits;
+ DIFROMREG(bits, MIPSInst_FS(ir));
+
switch (MIPSInst_FUNC(ir)) {
case fcvts_op:
/* convert long to single precision real */
- rv.s = ieee754sp_flong(ctx->fpr[MIPSInst_FS(ir)]);
+ rv.s = ieee754sp_flong(bits);
rfmt = s_fmt;
goto copcsr;
case fcvtd_op:
/* convert long to double precision real */
- rv.d = ieee754dp_flong(ctx->fpr[MIPSInst_FS(ir)]);
+ rv.d = ieee754dp_flong(bits);
rfmt = d_fmt;
goto copcsr;
default:
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 3aeae07..9b46213 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -40,9 +40,8 @@ void fpu_emulator_init_fpu(void)
}
current->thread.fpu.fcr31 = 0;
- for (i = 0; i < 32; i++) {
- current->thread.fpu.fpr[i] = SIGNALLING_NAN;
- }
+ for (i = 0; i < 32; i++)
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
}
@@ -59,7 +58,8 @@ int fpu_emulator_save_context(struct sigcontext __user *sc)
for (i = 0; i < 32; i++) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -70,10 +70,11 @@ int fpu_emulator_restore_context(struct sigcontext __user *sc)
{
int i;
int err = 0;
+ u64 fpr_val;
for (i = 0; i < 32; i++) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -93,7 +94,8 @@ int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
for (i = 0; i < 32; i += inc) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -105,10 +107,11 @@ int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
int i;
int err = 0;
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+ u64 fpr_val;
for (i = 0; i < 32; i += inc) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH v3 01/15] mips: simplify FP context access
@ 2014-02-13 11:26 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-02-13 11:26 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton, Ralf Baechle
This patch replaces the fpureg_t typedef with a "union fpureg" enabling
easier access to 32 & 64 bit values. This allows the access macros used
in cp1emu.c to be simplified somewhat. It will also make it easier to
expand the width of the FP registers as will be done in a future
patch in order to support the 128 bit registers introduced with MSA.
No behavioural change is intended by this patch.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
---
Changes in v3:
- Fix the l_fmt case for __mips64 builds in fpu_emu.
Changes in v2:
- Remove extraneous braces in SIFROMREG, SITOREG macros.
---
arch/mips/include/asm/fpu.h | 2 +-
arch/mips/include/asm/processor.h | 31 ++++++++++++++++++++++++++---
arch/mips/kernel/ptrace.c | 39 ++++++++++++++++---------------------
arch/mips/kernel/ptrace32.c | 25 ++++++++----------------
arch/mips/math-emu/cp1emu.c | 37 ++++++++++++++++++++++-------------
arch/mips/math-emu/kernel_linkage.c | 21 +++++++++++---------
6 files changed, 90 insertions(+), 65 deletions(-)
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index cfe092f..8d57b71 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -178,7 +178,7 @@ static inline void restore_fp(struct task_struct *tsk)
_restore_fp(tsk);
}
-static inline fpureg_t *get_fpu_regs(struct task_struct *tsk)
+static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
{
if (tsk == current) {
preempt_disable();
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 3605b84..49a61be 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,8 +96,33 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
+#define FPU_REG_WIDTH 64
-typedef __u64 fpureg_t;
+union fpureg {
+ __u32 val32[FPU_REG_WIDTH / 32];
+ __u64 val64[FPU_REG_WIDTH / 64];
+};
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# define FPR_IDX(width, idx) (idx)
+#else
+# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+#endif
+
+#define BUILD_FPR_ACCESS(width) \
+static inline u##width get_fpr##width(union fpureg *fpr, unsigned idx) \
+{ \
+ return fpr->val##width[FPR_IDX(width, idx)]; \
+} \
+ \
+static inline void set_fpr##width(union fpureg *fpr, unsigned idx, \
+ u##width val) \
+{ \
+ fpr->val##width[FPR_IDX(width, idx)] = val; \
+}
+
+BUILD_FPR_ACCESS(32)
+BUILD_FPR_ACCESS(64)
/*
* It would be nice to add some more fields for emulator statistics, but there
@@ -107,7 +132,7 @@ typedef __u64 fpureg_t;
*/
struct mips_fpu_struct {
- fpureg_t fpr[NUM_FPU_REGS];
+ union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
};
@@ -284,7 +309,7 @@ struct thread_struct {
* Saved FPU/FPU emulator stuff \
*/ \
.fpu = { \
- .fpr = {0,}, \
+ .fpr = {{{0,},},}, \
.fcr31 = 0, \
}, \
/* \
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 7da9b76..624773e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -120,9 +120,10 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
return -EIO;
if (tsk_used_math(child)) {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
for (i = 0; i < 32; i++)
- __put_user(fregs[i], i + (__u64 __user *) data);
+ __put_user(get_fpr64(&fregs[i], 0),
+ i + (__u64 __user *)data);
} else {
for (i = 0; i < 32; i++)
__put_user((__u64) -1, i + (__u64 __user *) data);
@@ -158,7 +159,8 @@ int ptrace_getfpregs(struct task_struct *child, __u32 __user *data)
int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
{
- fpureg_t *fregs;
+ union fpureg *fregs;
+ u64 fpr_val;
int i;
if (!access_ok(VERIFY_READ, data, 33 * 8))
@@ -166,8 +168,10 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
fregs = get_fpu_regs(child);
- for (i = 0; i < 32; i++)
- __get_user(fregs[i], i + (__u64 __user *) data);
+ for (i = 0; i < 32; i++) {
+ __get_user(fpr_val, i + (__u64 __user *)data);
+ set_fpr64(&fregs[i], 0, fpr_val);
+ }
__get_user(child->thread.fpu.fcr31, data + 64);
@@ -408,7 +412,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned long tmp = 0;
regs = task_pt_regs(child);
@@ -433,14 +437,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
#endif
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -548,7 +550,7 @@ long arch_ptrace(struct task_struct *child, long request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -563,19 +565,12 @@ long arch_ptrace(struct task_struct *child, long request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
#endif
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index b8aa2dd..c394d8f 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -80,7 +80,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
/* Read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
struct pt_regs *regs;
- fpureg_t *fregs;
+ union fpureg *fregs;
unsigned int tmp;
regs = task_pt_regs(child);
@@ -103,13 +103,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1)
- tmp = fregs[(addr & ~1) - 32] >> 32;
- else
- tmp = fregs[addr - 32];
+ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1);
break;
}
- tmp = fregs[addr - FPR_BASE];
+ tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -233,7 +231,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
regs->regs[addr] = data;
break;
case FPR_BASE ... FPR_BASE + 31: {
- fpureg_t *fregs = get_fpu_regs(child);
+ union fpureg *fregs = get_fpu_regs(child);
if (!tsk_used_math(child)) {
/* FP not yet used */
@@ -247,18 +245,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* order bits of the values stored in the even
* registers - unless we're using r2k_switch.S.
*/
- if (addr & 1) {
- fregs[(addr & ~1) - FPR_BASE] &=
- 0xffffffff;
- fregs[(addr & ~1) - FPR_BASE] |=
- ((u64)data) << 32;
- } else {
- fregs[addr - FPR_BASE] &= ~0xffffffffLL;
- fregs[addr - FPR_BASE] |= data;
- }
+ set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
+ addr & 1, data);
break;
}
- fregs[addr - FPR_BASE] = data;
+ set_fpr64(&fregs[addr - FPR_BASE], 0, data);
break;
}
case PC:
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 506925b..196cf1a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -876,20 +876,28 @@ static inline int cop1_64bit(struct pt_regs *xcp)
#endif
}
-#define SIFROMREG(si, x) ((si) = cop1_64bit(xcp) || !(x & 1) ? \
- (int)ctx->fpr[x] : (int)(ctx->fpr[x & ~1] >> 32))
+#define SIFROMREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ (si) = get_fpr32(&ctx->fpr[x], 0); \
+ else \
+ (si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \
+} while (0)
-#define SITOREG(si, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = \
- cop1_64bit(xcp) || !(x & 1) ? \
- ctx->fpr[x & ~1] >> 32 << 32 | (u32)(si) : \
- ctx->fpr[x & ~1] << 32 >> 32 | (u64)(si) << 32)
+#define SITOREG(si, x) do { \
+ if (cop1_64bit(xcp)) \
+ set_fpr32(&ctx->fpr[x], 0, si); \
+ else \
+ set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+} while (0)
-#define SIFROMHREG(si, x) ((si) = (int)(ctx->fpr[x] >> 32))
-#define SITOHREG(si, x) (ctx->fpr[x] = \
- ctx->fpr[x] << 32 >> 32 | (u64)(si) << 32)
+#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
+#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
-#define DIFROMREG(di, x) ((di) = ctx->fpr[x & ~(cop1_64bit(xcp) == 0)])
-#define DITOREG(di, x) (ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = (di))
+#define DIFROMREG(di, x) \
+ ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+
+#define DITOREG(di, x) \
+ set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
@@ -1960,15 +1968,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
#if defined(__mips64)
case l_fmt:{
+ u64 bits;
+ DIFROMREG(bits, MIPSInst_FS(ir));
+
switch (MIPSInst_FUNC(ir)) {
case fcvts_op:
/* convert long to single precision real */
- rv.s = ieee754sp_flong(ctx->fpr[MIPSInst_FS(ir)]);
+ rv.s = ieee754sp_flong(bits);
rfmt = s_fmt;
goto copcsr;
case fcvtd_op:
/* convert long to double precision real */
- rv.d = ieee754dp_flong(ctx->fpr[MIPSInst_FS(ir)]);
+ rv.d = ieee754dp_flong(bits);
rfmt = d_fmt;
goto copcsr;
default:
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 3aeae07..9b46213 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -40,9 +40,8 @@ void fpu_emulator_init_fpu(void)
}
current->thread.fpu.fcr31 = 0;
- for (i = 0; i < 32; i++) {
- current->thread.fpu.fpr[i] = SIGNALLING_NAN;
- }
+ for (i = 0; i < 32; i++)
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
}
@@ -59,7 +58,8 @@ int fpu_emulator_save_context(struct sigcontext __user *sc)
for (i = 0; i < 32; i++) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -70,10 +70,11 @@ int fpu_emulator_restore_context(struct sigcontext __user *sc)
{
int i;
int err = 0;
+ u64 fpr_val;
for (i = 0; i < 32; i++) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -93,7 +94,8 @@ int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
for (i = 0; i < 32; i += inc) {
err |=
- __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
}
err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
@@ -105,10 +107,11 @@ int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
int i;
int err = 0;
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+ u64 fpr_val;
for (i = 0; i < 32; i += inc) {
- err |=
- __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 02/15] mips: update outdated comment
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
The hard-coded offsets mentioned in this comment seem to not exist
anymore, so remove mention of them from the comment.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
arch/mips/include/asm/processor.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 49a61be..50cf4c3 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -125,10 +125,9 @@ BUILD_FPR_ACCESS(32)
BUILD_FPR_ACCESS(64)
/*
- * It would be nice to add some more fields for emulator statistics, but there
- * are a number of fixed offsets in offset.h and elsewhere that would have to
- * be recalculated by hand. So the additional information will be private to
- * the FPU emulator for now. See asm-mips/fpu_emulator.h.
+ * It would be nice to add some more fields for emulator statistics,
+ * the additional information is private to the FPU emulator for now.
+ * See arch/mips/include/asm/fpu_emulator.h.
*/
struct mips_fpu_struct {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 02/15] mips: update outdated comment
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
The hard-coded offsets mentioned in this comment seem to not exist
anymore, so remove mention of them from the comment.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
arch/mips/include/asm/processor.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 49a61be..50cf4c3 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -125,10 +125,9 @@ BUILD_FPR_ACCESS(32)
BUILD_FPR_ACCESS(64)
/*
- * It would be nice to add some more fields for emulator statistics, but there
- * are a number of fixed offsets in offset.h and elsewhere that would have to
- * be recalculated by hand. So the additional information will be private to
- * the FPU emulator for now. See asm-mips/fpu_emulator.h.
+ * It would be nice to add some more fields for emulator statistics,
+ * the additional information is private to the FPU emulator for now.
+ * See arch/mips/include/asm/fpu_emulator.h.
*/
struct mips_fpu_struct {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 03/15] mips: move & rename fpu_emulator_{save,restore}_context
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
These functions aren't directly related to the FPU emulator at all, they
simply copy between a thread's saved context & a sigcontext. Thus move
them to the appropriate signal files & rename them accordingly. This
makes it clearer that the functions don't require the FPU emulator in
any way.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
arch/mips/kernel/signal.c | 45 ++++++++++++++++++----
arch/mips/kernel/signal32.c | 43 ++++++++++++++++++---
arch/mips/math-emu/kernel_linkage.c | 75 -------------------------------------
3 files changed, 76 insertions(+), 87 deletions(-)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5199563..b7e4614 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -46,9 +46,6 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
-extern asmlinkage int fpu_emulator_save_context(struct sigcontext __user *sc);
-extern asmlinkage int fpu_emulator_restore_context(struct sigcontext __user *sc);
-
struct sigframe {
u32 sf_ass[4]; /* argument save space for o32 */
u32 sf_pad[2]; /* Was: signal trampoline */
@@ -64,6 +61,40 @@ struct rt_sigframe {
};
/*
+ * Thread saved context copy to/from a signal context presumed to be on the
+ * user stack, and therefore accessed with appropriate macros from uaccess.h.
+ */
+static int copy_fp_to_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < 32; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 fpr_val;
+
+ for (i = 0; i < 32; i++) {
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
+ }
+ err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+/*
* Helper routines
*/
static int protected_save_fp_context(struct sigcontext __user *sc)
@@ -595,14 +626,14 @@ static int smp_save_fp_context(struct sigcontext __user *sc)
{
return raw_cpu_has_fpu
? _save_fp_context(sc)
- : fpu_emulator_save_context(sc);
+ : copy_fp_to_sigcontext(sc);
}
static int smp_restore_fp_context(struct sigcontext __user *sc)
{
return raw_cpu_has_fpu
? _restore_fp_context(sc)
- : fpu_emulator_restore_context(sc);
+ : copy_fp_from_sigcontext(sc);
}
#endif
@@ -617,8 +648,8 @@ static int signal_setup(void)
save_fp_context = _save_fp_context;
restore_fp_context = _restore_fp_context;
} else {
- save_fp_context = fpu_emulator_save_context;
- restore_fp_context = fpu_emulator_restore_context;
+ save_fp_context = copy_fp_from_sigcontext;
+ restore_fp_context = copy_fp_to_sigcontext;
}
#endif
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 3d60f77..dc09206 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -42,9 +42,6 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
-extern asmlinkage int fpu_emulator_save_context32(struct sigcontext32 __user *sc);
-extern asmlinkage int fpu_emulator_restore_context32(struct sigcontext32 __user *sc);
-
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -78,6 +75,42 @@ struct rt_sigframe32 {
};
/*
+ * Thread saved context copy to/from a signal context presumed to be on the
+ * user stack, and therefore accessed with appropriate macros from uaccess.h.
+ */
+static int copy_fp_to_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+
+ for (i = 0; i < 32; i += inc) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+ u64 fpr_val;
+
+ for (i = 0; i < 32; i += inc) {
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
+ }
+ err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+/*
* sigcontext handlers
*/
static int protected_save_fp_context32(struct sigcontext32 __user *sc)
@@ -566,8 +599,8 @@ static int signal32_init(void)
save_fp_context32 = _save_fp_context32;
restore_fp_context32 = _restore_fp_context32;
} else {
- save_fp_context32 = fpu_emulator_save_context32;
- restore_fp_context32 = fpu_emulator_restore_context32;
+ save_fp_context32 = copy_fp_to_sigcontext32;
+ restore_fp_context32 = copy_fp_from_sigcontext32;
}
return 0;
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 9b46213..eb58a85 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -43,78 +43,3 @@ void fpu_emulator_init_fpu(void)
for (i = 0; i < 32; i++)
set_fpr64(¤t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
}
-
-
-/*
- * Emulator context save/restore to/from a signal context
- * presumed to be on the user stack, and therefore accessed
- * with appropriate macros from uaccess.h
- */
-
-int fpu_emulator_save_context(struct sigcontext __user *sc)
-{
- int i;
- int err = 0;
-
- for (i = 0; i < 32; i++) {
- err |=
- __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
- &sc->sc_fpregs[i]);
- }
- err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-
-int fpu_emulator_restore_context(struct sigcontext __user *sc)
-{
- int i;
- int err = 0;
- u64 fpr_val;
-
- for (i = 0; i < 32; i++) {
- err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
- set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
- }
- err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-
-#ifdef CONFIG_64BIT
-/*
- * This is the o32 version
- */
-
-int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
-{
- int i;
- int err = 0;
- int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
-
- for (i = 0; i < 32; i += inc) {
- err |=
- __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
- &sc->sc_fpregs[i]);
- }
- err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-
-int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
-{
- int i;
- int err = 0;
- int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
- u64 fpr_val;
-
- for (i = 0; i < 32; i += inc) {
- err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
- set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
- }
- err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-#endif
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 03/15] mips: move & rename fpu_emulator_{save,restore}_context
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
These functions aren't directly related to the FPU emulator at all, they
simply copy between a thread's saved context & a sigcontext. Thus move
them to the appropriate signal files & rename them accordingly. This
makes it clearer that the functions don't require the FPU emulator in
any way.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
arch/mips/kernel/signal.c | 45 ++++++++++++++++++----
arch/mips/kernel/signal32.c | 43 ++++++++++++++++++---
arch/mips/math-emu/kernel_linkage.c | 75 -------------------------------------
3 files changed, 76 insertions(+), 87 deletions(-)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5199563..b7e4614 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -46,9 +46,6 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
-extern asmlinkage int fpu_emulator_save_context(struct sigcontext __user *sc);
-extern asmlinkage int fpu_emulator_restore_context(struct sigcontext __user *sc);
-
struct sigframe {
u32 sf_ass[4]; /* argument save space for o32 */
u32 sf_pad[2]; /* Was: signal trampoline */
@@ -64,6 +61,40 @@ struct rt_sigframe {
};
/*
+ * Thread saved context copy to/from a signal context presumed to be on the
+ * user stack, and therefore accessed with appropriate macros from uaccess.h.
+ */
+static int copy_fp_to_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < 32; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 fpr_val;
+
+ for (i = 0; i < 32; i++) {
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
+ }
+ err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+/*
* Helper routines
*/
static int protected_save_fp_context(struct sigcontext __user *sc)
@@ -595,14 +626,14 @@ static int smp_save_fp_context(struct sigcontext __user *sc)
{
return raw_cpu_has_fpu
? _save_fp_context(sc)
- : fpu_emulator_save_context(sc);
+ : copy_fp_to_sigcontext(sc);
}
static int smp_restore_fp_context(struct sigcontext __user *sc)
{
return raw_cpu_has_fpu
? _restore_fp_context(sc)
- : fpu_emulator_restore_context(sc);
+ : copy_fp_from_sigcontext(sc);
}
#endif
@@ -617,8 +648,8 @@ static int signal_setup(void)
save_fp_context = _save_fp_context;
restore_fp_context = _restore_fp_context;
} else {
- save_fp_context = fpu_emulator_save_context;
- restore_fp_context = fpu_emulator_restore_context;
+ save_fp_context = copy_fp_from_sigcontext;
+ restore_fp_context = copy_fp_to_sigcontext;
}
#endif
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 3d60f77..dc09206 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -42,9 +42,6 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
-extern asmlinkage int fpu_emulator_save_context32(struct sigcontext32 __user *sc);
-extern asmlinkage int fpu_emulator_restore_context32(struct sigcontext32 __user *sc);
-
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -78,6 +75,42 @@ struct rt_sigframe32 {
};
/*
+ * Thread saved context copy to/from a signal context presumed to be on the
+ * user stack, and therefore accessed with appropriate macros from uaccess.h.
+ */
+static int copy_fp_to_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+
+ for (i = 0; i < 32; i += inc) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
+ &sc->sc_fpregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
+ u64 fpr_val;
+
+ for (i = 0; i < 32; i += inc) {
+ err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
+ }
+ err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
+
+ return err;
+}
+
+/*
* sigcontext handlers
*/
static int protected_save_fp_context32(struct sigcontext32 __user *sc)
@@ -566,8 +599,8 @@ static int signal32_init(void)
save_fp_context32 = _save_fp_context32;
restore_fp_context32 = _restore_fp_context32;
} else {
- save_fp_context32 = fpu_emulator_save_context32;
- restore_fp_context32 = fpu_emulator_restore_context32;
+ save_fp_context32 = copy_fp_to_sigcontext32;
+ restore_fp_context32 = copy_fp_from_sigcontext32;
}
return 0;
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 9b46213..eb58a85 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -43,78 +43,3 @@ void fpu_emulator_init_fpu(void)
for (i = 0; i < 32; i++)
set_fpr64(¤t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
}
-
-
-/*
- * Emulator context save/restore to/from a signal context
- * presumed to be on the user stack, and therefore accessed
- * with appropriate macros from uaccess.h
- */
-
-int fpu_emulator_save_context(struct sigcontext __user *sc)
-{
- int i;
- int err = 0;
-
- for (i = 0; i < 32; i++) {
- err |=
- __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
- &sc->sc_fpregs[i]);
- }
- err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-
-int fpu_emulator_restore_context(struct sigcontext __user *sc)
-{
- int i;
- int err = 0;
- u64 fpr_val;
-
- for (i = 0; i < 32; i++) {
- err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
- set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
- }
- err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-
-#ifdef CONFIG_64BIT
-/*
- * This is the o32 version
- */
-
-int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
-{
- int i;
- int err = 0;
- int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
-
- for (i = 0; i < 32; i += inc) {
- err |=
- __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
- &sc->sc_fpregs[i]);
- }
- err |= __put_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-
-int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
-{
- int i;
- int err = 0;
- int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
- u64 fpr_val;
-
- for (i = 0; i < 32; i += inc) {
- err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
- set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
- }
- err |= __get_user(current->thread.fpu.fcr31, &sc->sc_fpc_csr);
-
- return err;
-}
-#endif
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 04/15] mips: don't require FPU on sigcontext setup/restore
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
When a task which has used the FPU at some point in its past takes a
signal the kernel would previously always require the task to take
ownership of the FPU whilst setting up or restoring from the sigcontext.
That means that if the task has not used the FPU within this timeslice
then the kernel would enable the FPU, restore the task's FP context into
FPU registers and then save them into the sigcontext. This seems
inefficient, and if the signal handler doesn't use FP then enabling the
FPU & the extra memory accesses are entirely wasted work.
This patch modifies the sigcontext setup & restore code to copy directly
between the tasks saved FP context & the sigcontext for any tasks which
have used FP in the past but are not currently the FPU owner (ie. have
not used FP in this timeslice).
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
arch/mips/kernel/signal.c | 22 ++++++++++++++--------
arch/mips/kernel/signal32.c | 22 ++++++++++++++--------
2 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index b7e4614..e0178e1 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -102,10 +102,13 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
int err;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(1);
- if (!err)
- err = save_fp_context(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = save_fp_context(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_to_sigcontext(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
@@ -123,10 +126,13 @@ static int protected_restore_fp_context(struct sigcontext __user *sc)
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(0);
- if (!err)
- err = restore_fp_context(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = restore_fp_context(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_from_sigcontext(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index dc09206..aec5821 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -118,10 +118,13 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
int err;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(1);
- if (!err)
- err = save_fp_context32(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = save_fp_context32(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_to_sigcontext32(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
@@ -139,10 +142,13 @@ static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(0);
- if (!err)
- err = restore_fp_context32(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = restore_fp_context32(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_from_sigcontext32(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 04/15] mips: don't require FPU on sigcontext setup/restore
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
When a task which has used the FPU at some point in its past takes a
signal the kernel would previously always require the task to take
ownership of the FPU whilst setting up or restoring from the sigcontext.
That means that if the task has not used the FPU within this timeslice
then the kernel would enable the FPU, restore the task's FP context into
FPU registers and then save them into the sigcontext. This seems
inefficient, and if the signal handler doesn't use FP then enabling the
FPU & the extra memory accesses are entirely wasted work.
This patch modifies the sigcontext setup & restore code to copy directly
between the tasks saved FP context & the sigcontext for any tasks which
have used FP in the past but are not currently the FPU owner (ie. have
not used FP in this timeslice).
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Qais Yousef <qais.yousef@imgtec.com>
---
arch/mips/kernel/signal.c | 22 ++++++++++++++--------
arch/mips/kernel/signal32.c | 22 ++++++++++++++--------
2 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index b7e4614..e0178e1 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -102,10 +102,13 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
int err;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(1);
- if (!err)
- err = save_fp_context(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = save_fp_context(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_to_sigcontext(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
@@ -123,10 +126,13 @@ static int protected_restore_fp_context(struct sigcontext __user *sc)
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(0);
- if (!err)
- err = restore_fp_context(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = restore_fp_context(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_from_sigcontext(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index dc09206..aec5821 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -118,10 +118,13 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
int err;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(1);
- if (!err)
- err = save_fp_context32(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = save_fp_context32(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_to_sigcontext32(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
@@ -139,10 +142,13 @@ static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
- err = own_fpu_inatomic(0);
- if (!err)
- err = restore_fp_context32(sc); /* this might fail */
- unlock_fpu_owner();
+ if (is_fpu_owner()) {
+ err = restore_fp_context32(sc);
+ unlock_fpu_owner();
+ } else {
+ unlock_fpu_owner();
+ err = copy_fp_from_sigcontext32(sc);
+ }
if (likely(!err))
break;
/* touch the sigcontext and try again */
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 05/15] mips: replace hardcoded 32 with NUM_FPU_REGS in ptrace
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
NUM_FPU_REGS just makes it clearer what's going on, rather than the
magic hard coded 32.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/signal.c | 4 ++--
arch/mips/kernel/signal32.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index e0178e1..0f97c7d 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -69,7 +69,7 @@ static int copy_fp_to_sigcontext(struct sigcontext __user *sc)
int i;
int err = 0;
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < NUM_FPU_REGS; i++) {
err |=
__put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
&sc->sc_fpregs[i]);
@@ -85,7 +85,7 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
int err = 0;
u64 fpr_val;
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index aec5821..bae2e6e 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -84,7 +84,7 @@ static int copy_fp_to_sigcontext32(struct sigcontext32 __user *sc)
int err = 0;
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
- for (i = 0; i < 32; i += inc) {
+ for (i = 0; i < NUM_FPU_REGS; i += inc) {
err |=
__put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
&sc->sc_fpregs[i]);
@@ -101,7 +101,7 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
u64 fpr_val;
- for (i = 0; i < 32; i += inc) {
+ for (i = 0; i < NUM_FPU_REGS; i += inc) {
err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 05/15] mips: replace hardcoded 32 with NUM_FPU_REGS in ptrace
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
NUM_FPU_REGS just makes it clearer what's going on, rather than the
magic hard coded 32.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/signal.c | 4 ++--
arch/mips/kernel/signal32.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index e0178e1..0f97c7d 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -69,7 +69,7 @@ static int copy_fp_to_sigcontext(struct sigcontext __user *sc)
int i;
int err = 0;
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < NUM_FPU_REGS; i++) {
err |=
__put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
&sc->sc_fpregs[i]);
@@ -85,7 +85,7 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
int err = 0;
u64 fpr_val;
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index aec5821..bae2e6e 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -84,7 +84,7 @@ static int copy_fp_to_sigcontext32(struct sigcontext32 __user *sc)
int err = 0;
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
- for (i = 0; i < 32; i += inc) {
+ for (i = 0; i < NUM_FPU_REGS; i += inc) {
err |=
__put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0),
&sc->sc_fpregs[i]);
@@ -101,7 +101,7 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
u64 fpr_val;
- for (i = 0; i < 32; i += inc) {
+ for (i = 0; i < NUM_FPU_REGS; i += inc) {
err |= __get_user(fpr_val, &sc->sc_fpregs[i]);
set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val);
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 06/15] mips: clear upper bits of FP registers on emulator writes
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
The upper bits of an FP register are architecturally defined as
unpredictable following an instructions which only writes the lower
bits. The prior behaviour of the kernel is to leave them unmodified.
This patch modifies that to clear the upper bits to zero. This is what
the MSA architecture reference manual specifies should happen for its
wider registers and is still permissible for scalar FP instructions
given the bits unpredictability there.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9144842..c484f5f 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
} while (0)
#define SITOREG(si, x) do { \
- if (cop1_64bit(xcp)) \
+ if (cop1_64bit(xcp)) { \
+ unsigned i; \
set_fpr32(&ctx->fpr[x], 0, si); \
- else \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+ } else { \
set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+ } \
} while (0)
#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
-#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
+
+#define SITOHREG(si, x) do { \
+ unsigned i; \
+ set_fpr32(&ctx->fpr[x], 1, si); \
+ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+} while (0)
#define DIFROMREG(di, x) \
((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
-#define DITOREG(di, x) \
- set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
+#define DITOREG(di, x) do { \
+ unsigned fpr, i; \
+ fpr = (x) & ~(cop1_64bit(xcp) == 0); \
+ set_fpr64(&ctx->fpr[fpr], 0, di); \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
+ set_fpr64(&ctx->fpr[fpr], i, 0); \
+} while (0)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 06/15] mips: clear upper bits of FP registers on emulator writes
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
The upper bits of an FP register are architecturally defined as
unpredictable following an instructions which only writes the lower
bits. The prior behaviour of the kernel is to leave them unmodified.
This patch modifies that to clear the upper bits to zero. This is what
the MSA architecture reference manual specifies should happen for its
wider registers and is still permissible for scalar FP instructions
given the bits unpredictability there.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9144842..c484f5f 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
} while (0)
#define SITOREG(si, x) do { \
- if (cop1_64bit(xcp)) \
+ if (cop1_64bit(xcp)) { \
+ unsigned i; \
set_fpr32(&ctx->fpr[x], 0, si); \
- else \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+ } else { \
set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+ } \
} while (0)
#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
-#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
+
+#define SITOHREG(si, x) do { \
+ unsigned i; \
+ set_fpr32(&ctx->fpr[x], 1, si); \
+ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+} while (0)
#define DIFROMREG(di, x) \
((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
-#define DITOREG(di, x) \
- set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
+#define DITOREG(di, x) do { \
+ unsigned fpr, i; \
+ fpr = (x) & ~(cop1_64bit(xcp) == 0); \
+ set_fpr64(&ctx->fpr[fpr], 0, di); \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
+ set_fpr64(&ctx->fpr[fpr], i, 0); \
+} while (0)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* Re: [PATCH 06/15] mips: clear upper bits of FP registers on emulator writes
2014-01-27 15:23 ` Paul Burton
(?)
@ 2014-01-27 18:06 ` Sergei Shtylyov
2014-01-27 17:11 ` Paul Burton
2014-01-27 17:14 ` Paul Burton
-1 siblings, 2 replies; 52+ messages in thread
From: Sergei Shtylyov @ 2014-01-27 18:06 UTC (permalink / raw)
To: Paul Burton, linux-mips
Hello.
On 01/27/2014 06:23 PM, Paul Burton wrote:
> The upper bits of an FP register are architecturally defined as
> unpredictable following an instructions which only writes the lower
> bits. The prior behaviour of the kernel is to leave them unmodified.
> This patch modifies that to clear the upper bits to zero. This is what
> the MSA architecture reference manual specifies should happen for its
> wider registers and is still permissible for scalar FP instructions
> given the bits unpredictability there.
> Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> ---
> arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
> 1 file changed, 20 insertions(+), 5 deletions(-)
> diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
> index 9144842..c484f5f 100644
> --- a/arch/mips/math-emu/cp1emu.c
> +++ b/arch/mips/math-emu/cp1emu.c
> @@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
> } while (0)
>
> #define SITOREG(si, x) do { \
> - if (cop1_64bit(xcp)) \
> + if (cop1_64bit(xcp)) { \
> + unsigned i; \
> set_fpr32(&ctx->fpr[x], 0, si); \
> - else \
> + for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
> + set_fpr32(&ctx->fpr[x], i, 0); \
> + } else { \
> set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
> + } \
> } while (0)
>
> #define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
> -#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
> +
> +#define SITOHREG(si, x) do { \
> + unsigned i; \
> + set_fpr32(&ctx->fpr[x], 1, si); \
> + for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
> + set_fpr32(&ctx->fpr[x], i, 0); \
This line is over-indented, no? Compare the loop below...
> +} while (0)
>
> #define DIFROMREG(di, x) \
> ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
>
> -#define DITOREG(di, x) \
> - set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
> +#define DITOREG(di, x) do { \
> + unsigned fpr, i; \
> + fpr = (x) & ~(cop1_64bit(xcp) == 0); \
> + set_fpr64(&ctx->fpr[fpr], 0, di); \
> + for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
> + set_fpr64(&ctx->fpr[fpr], i, 0); \
> +} while (0)
WBR, Sergei
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 06/15] mips: clear upper bits of FP registers on emulator writes
@ 2014-01-27 17:11 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 17:11 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: linux-mips
On Mon, Jan 27, 2014 at 09:06:52PM +0300, Sergei Shtylyov wrote:
> Hello.
>
> On 01/27/2014 06:23 PM, Paul Burton wrote:
>
> >The upper bits of an FP register are architecturally defined as
> >unpredictable following an instructions which only writes the lower
> >bits. The prior behaviour of the kernel is to leave them unmodified.
> >This patch modifies that to clear the upper bits to zero. This is what
> >the MSA architecture reference manual specifies should happen for its
> >wider registers and is still permissible for scalar FP instructions
> >given the bits unpredictability there.
>
> >Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >---
> > arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
> > 1 file changed, 20 insertions(+), 5 deletions(-)
>
> >diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
> >index 9144842..c484f5f 100644
> >--- a/arch/mips/math-emu/cp1emu.c
> >+++ b/arch/mips/math-emu/cp1emu.c
> >@@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
> > } while (0)
> >
> > #define SITOREG(si, x) do { \
> >- if (cop1_64bit(xcp)) \
> >+ if (cop1_64bit(xcp)) { \
> >+ unsigned i; \
> > set_fpr32(&ctx->fpr[x], 0, si); \
> >- else \
> >+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
> >+ set_fpr32(&ctx->fpr[x], i, 0); \
> >+ } else { \
> > set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
> >+ } \
> > } while (0)
> >
> > #define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
> >-#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
> >+
> >+#define SITOHREG(si, x) do { \
> >+ unsigned i; \
> >+ set_fpr32(&ctx->fpr[x], 1, si); \
> >+ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
> >+ set_fpr32(&ctx->fpr[x], i, 0); \
>
> This line is over-indented, no? Compare the loop below...
>
Indeed it is, well spotted :)
Thanks,
Paul
> >+} while (0)
> >
> > #define DIFROMREG(di, x) \
> > ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
> >
> >-#define DITOREG(di, x) \
> >- set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
> >+#define DITOREG(di, x) do { \
> >+ unsigned fpr, i; \
> >+ fpr = (x) & ~(cop1_64bit(xcp) == 0); \
> >+ set_fpr64(&ctx->fpr[fpr], 0, di); \
> >+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
> >+ set_fpr64(&ctx->fpr[fpr], i, 0); \
> >+} while (0)
>
> WBR, Sergei
>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 06/15] mips: clear upper bits of FP registers on emulator writes
@ 2014-01-27 17:11 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 17:11 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: linux-mips
On Mon, Jan 27, 2014 at 09:06:52PM +0300, Sergei Shtylyov wrote:
> Hello.
>
> On 01/27/2014 06:23 PM, Paul Burton wrote:
>
> >The upper bits of an FP register are architecturally defined as
> >unpredictable following an instructions which only writes the lower
> >bits. The prior behaviour of the kernel is to leave them unmodified.
> >This patch modifies that to clear the upper bits to zero. This is what
> >the MSA architecture reference manual specifies should happen for its
> >wider registers and is still permissible for scalar FP instructions
> >given the bits unpredictability there.
>
> >Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >---
> > arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
> > 1 file changed, 20 insertions(+), 5 deletions(-)
>
> >diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
> >index 9144842..c484f5f 100644
> >--- a/arch/mips/math-emu/cp1emu.c
> >+++ b/arch/mips/math-emu/cp1emu.c
> >@@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
> > } while (0)
> >
> > #define SITOREG(si, x) do { \
> >- if (cop1_64bit(xcp)) \
> >+ if (cop1_64bit(xcp)) { \
> >+ unsigned i; \
> > set_fpr32(&ctx->fpr[x], 0, si); \
> >- else \
> >+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
> >+ set_fpr32(&ctx->fpr[x], i, 0); \
> >+ } else { \
> > set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
> >+ } \
> > } while (0)
> >
> > #define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
> >-#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
> >+
> >+#define SITOHREG(si, x) do { \
> >+ unsigned i; \
> >+ set_fpr32(&ctx->fpr[x], 1, si); \
> >+ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
> >+ set_fpr32(&ctx->fpr[x], i, 0); \
>
> This line is over-indented, no? Compare the loop below...
>
Indeed it is, well spotted :)
Thanks,
Paul
> >+} while (0)
> >
> > #define DIFROMREG(di, x) \
> > ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
> >
> >-#define DITOREG(di, x) \
> >- set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
> >+#define DITOREG(di, x) do { \
> >+ unsigned fpr, i; \
> >+ fpr = (x) & ~(cop1_64bit(xcp) == 0); \
> >+ set_fpr64(&ctx->fpr[fpr], 0, di); \
> >+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
> >+ set_fpr64(&ctx->fpr[fpr], i, 0); \
> >+} while (0)
>
> WBR, Sergei
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 06/15] mips: clear upper bits of FP registers on emulator writes
@ 2014-01-27 17:14 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 17:14 UTC (permalink / raw)
To: linux-mips; +Cc: sergei.shtylyov, Paul Burton
The upper bits of an FP register are architecturally defined as
unpredictable following an instructions which only writes the lower
bits. The prior behaviour of the kernel is to leave them unmodified.
This patch modifies that to clear the upper bits to zero. This is what
the MSA architecture reference manual specifies should happen for its
wider registers and is still permissible for scalar FP instructions
given the bits unpredictability there.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
Changes in v2:
- Fix indentation issue in SITOHREG spotted by Sergei Shtylyov.
---
arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9144842..873e9f0 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
} while (0)
#define SITOREG(si, x) do { \
- if (cop1_64bit(xcp)) \
+ if (cop1_64bit(xcp)) { \
+ unsigned i; \
set_fpr32(&ctx->fpr[x], 0, si); \
- else \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+ } else { \
set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+ } \
} while (0)
#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
-#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
+
+#define SITOHREG(si, x) do { \
+ unsigned i; \
+ set_fpr32(&ctx->fpr[x], 1, si); \
+ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+} while (0)
#define DIFROMREG(di, x) \
((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
-#define DITOREG(di, x) \
- set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
+#define DITOREG(di, x) do { \
+ unsigned fpr, i; \
+ fpr = (x) & ~(cop1_64bit(xcp) == 0); \
+ set_fpr64(&ctx->fpr[fpr], 0, di); \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
+ set_fpr64(&ctx->fpr[fpr], i, 0); \
+} while (0)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH v2 06/15] mips: clear upper bits of FP registers on emulator writes
@ 2014-01-27 17:14 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 17:14 UTC (permalink / raw)
To: linux-mips; +Cc: sergei.shtylyov, Paul Burton
The upper bits of an FP register are architecturally defined as
unpredictable following an instructions which only writes the lower
bits. The prior behaviour of the kernel is to leave them unmodified.
This patch modifies that to clear the upper bits to zero. This is what
the MSA architecture reference manual specifies should happen for its
wider registers and is still permissible for scalar FP instructions
given the bits unpredictability there.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
Changes in v2:
- Fix indentation issue in SITOHREG spotted by Sergei Shtylyov.
---
arch/mips/math-emu/cp1emu.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9144842..873e9f0 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -884,20 +884,35 @@ static inline int cop1_64bit(struct pt_regs *xcp)
} while (0)
#define SITOREG(si, x) do { \
- if (cop1_64bit(xcp)) \
+ if (cop1_64bit(xcp)) { \
+ unsigned i; \
set_fpr32(&ctx->fpr[x], 0, si); \
- else \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+ } else { \
set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \
+ } \
} while (0)
#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
-#define SITOHREG(si, x) set_fpr32(&ctx->fpr[x], 1, si)
+
+#define SITOHREG(si, x) do { \
+ unsigned i; \
+ set_fpr32(&ctx->fpr[x], 1, si); \
+ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
+ set_fpr32(&ctx->fpr[x], i, 0); \
+} while (0)
#define DIFROMREG(di, x) \
((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
-#define DITOREG(di, x) \
- set_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0, di)
+#define DITOREG(di, x) do { \
+ unsigned fpr, i; \
+ fpr = (x) & ~(cop1_64bit(xcp) == 0); \
+ set_fpr64(&ctx->fpr[fpr], 0, di); \
+ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
+ set_fpr64(&ctx->fpr[fpr], i, 0); \
+} while (0)
#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x)
#define SPTOREG(sp, x) SITOREG((sp).bits, x)
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 07/15] mips: don't assume 64-bit FP registers for dump_{,task_}fpu
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This code assumed that saved FP registers are 64 bits wide, an
assumption which will no longer be true once MSA is introduced. This
patch modifies the code to copy the lower 64 bits of each register in
turn, which is safe for any FP register width >= 64 bits.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/process.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 6ae540e..2f01f3d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -157,7 +157,13 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Fill in the fpu structure for a core dump.. */
int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
{
- memcpy(r, ¤t->thread.fpu, sizeof(current->thread.fpu));
+ int i;
+
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&r[i], ¤t->thread.fpu.fpr[i], sizeof(*r));
+
+ memcpy(&r[NUM_FPU_REGS], ¤t->thread.fpu.fcr31,
+ sizeof(current->thread.fpu.fcr31));
return 1;
}
@@ -192,7 +198,13 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
int dump_task_fpu(struct task_struct *t, elf_fpregset_t *fpr)
{
- memcpy(fpr, &t->thread.fpu, sizeof(current->thread.fpu));
+ int i;
+
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&fpr[i], &t->thread.fpu.fpr[i], sizeof(*fpr));
+
+ memcpy(&fpr[NUM_FPU_REGS], &t->thread.fpu.fcr31,
+ sizeof(t->thread.fpu.fcr31));
return 1;
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 07/15] mips: don't assume 64-bit FP registers for dump_{,task_}fpu
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This code assumed that saved FP registers are 64 bits wide, an
assumption which will no longer be true once MSA is introduced. This
patch modifies the code to copy the lower 64 bits of each register in
turn, which is safe for any FP register width >= 64 bits.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/process.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 6ae540e..2f01f3d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -157,7 +157,13 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Fill in the fpu structure for a core dump.. */
int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
{
- memcpy(r, ¤t->thread.fpu, sizeof(current->thread.fpu));
+ int i;
+
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&r[i], ¤t->thread.fpu.fpr[i], sizeof(*r));
+
+ memcpy(&r[NUM_FPU_REGS], ¤t->thread.fpu.fcr31,
+ sizeof(current->thread.fpu.fcr31));
return 1;
}
@@ -192,7 +198,13 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
int dump_task_fpu(struct task_struct *t, elf_fpregset_t *fpr)
{
- memcpy(fpr, &t->thread.fpu, sizeof(current->thread.fpu));
+ int i;
+
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&fpr[i], &t->thread.fpu.fpr[i], sizeof(*fpr));
+
+ memcpy(&fpr[NUM_FPU_REGS], &t->thread.fpu.fcr31,
+ sizeof(t->thread.fpu.fcr31));
return 1;
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 08/15] mips: don't assume 64-bit FP registers for FP regset
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
When we want to access 64-bit FP register values we can only treat
consecutive registers as being consecutive in memory when the width of
an FP register equals 64 bits. This assumption will not remain true once
MSA support is introduced, so provide a code path which copies each 64
bit FP register value in turn when the width of an FP register differs
from 64 bits.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/ptrace.c | 46 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 624773e..7bff8d3 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -304,10 +304,27 @@ static int fpr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ unsigned i;
+ int err;
+ u64 fpr_val;
+
/* XXX fcr31 */
+
+ if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu,
+ 0, sizeof(elf_fpregset_t));
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static int fpr_set(struct task_struct *target,
@@ -315,10 +332,27 @@ static int fpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ unsigned i;
+ int err;
+ u64 fpr_val;
+
/* XXX fcr31 */
+
+ if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu,
+ 0, sizeof(elf_fpregset_t));
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+ }
+
+ return 0;
}
enum mips_regset {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 08/15] mips: don't assume 64-bit FP registers for FP regset
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
When we want to access 64-bit FP register values we can only treat
consecutive registers as being consecutive in memory when the width of
an FP register equals 64 bits. This assumption will not remain true once
MSA support is introduced, so provide a code path which copies each 64
bit FP register value in turn when the width of an FP register differs
from 64 bits.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/ptrace.c | 46 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 624773e..7bff8d3 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -304,10 +304,27 @@ static int fpr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ unsigned i;
+ int err;
+ u64 fpr_val;
+
/* XXX fcr31 */
+
+ if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu,
+ 0, sizeof(elf_fpregset_t));
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static int fpr_set(struct task_struct *target,
@@ -315,10 +332,27 @@ static int fpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ unsigned i;
+ int err;
+ u64 fpr_val;
+
/* XXX fcr31 */
+
+ if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu,
+ 0, sizeof(elf_fpregset_t));
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+ }
+
+ return 0;
}
enum mips_regset {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 09/15] mips: don't assume 64-bit FP registers for context switch
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
When saving or restoring scalar FP context we want to access the least
significant 64 bits of each FP register. When the FP registers are 64
bits wide that is trivially the start of the registers value in memory.
However when the FP registers are wider this equivalence will no longer
be true for big endian systems. Define a new set of offset macros for
the least significant 64 bits of each saved FP register within thread
context, and make use of them when saving and restoring scalar FP
context.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/include/asm/asmmacro-32.h | 128 ++++++++++++++++++------------------
arch/mips/include/asm/asmmacro.h | 128 ++++++++++++++++++------------------
arch/mips/kernel/asm-offsets.c | 66 +++++++++++++++++++
3 files changed, 194 insertions(+), 128 deletions(-)
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index 70e1f17..e38c281 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -14,75 +14,75 @@
.macro fpu_save_single thread tmp=t0
cfc1 \tmp, fcr31
- swc1 $f0, THREAD_FPR0(\thread)
- swc1 $f1, THREAD_FPR1(\thread)
- swc1 $f2, THREAD_FPR2(\thread)
- swc1 $f3, THREAD_FPR3(\thread)
- swc1 $f4, THREAD_FPR4(\thread)
- swc1 $f5, THREAD_FPR5(\thread)
- swc1 $f6, THREAD_FPR6(\thread)
- swc1 $f7, THREAD_FPR7(\thread)
- swc1 $f8, THREAD_FPR8(\thread)
- swc1 $f9, THREAD_FPR9(\thread)
- swc1 $f10, THREAD_FPR10(\thread)
- swc1 $f11, THREAD_FPR11(\thread)
- swc1 $f12, THREAD_FPR12(\thread)
- swc1 $f13, THREAD_FPR13(\thread)
- swc1 $f14, THREAD_FPR14(\thread)
- swc1 $f15, THREAD_FPR15(\thread)
- swc1 $f16, THREAD_FPR16(\thread)
- swc1 $f17, THREAD_FPR17(\thread)
- swc1 $f18, THREAD_FPR18(\thread)
- swc1 $f19, THREAD_FPR19(\thread)
- swc1 $f20, THREAD_FPR20(\thread)
- swc1 $f21, THREAD_FPR21(\thread)
- swc1 $f22, THREAD_FPR22(\thread)
- swc1 $f23, THREAD_FPR23(\thread)
- swc1 $f24, THREAD_FPR24(\thread)
- swc1 $f25, THREAD_FPR25(\thread)
- swc1 $f26, THREAD_FPR26(\thread)
- swc1 $f27, THREAD_FPR27(\thread)
- swc1 $f28, THREAD_FPR28(\thread)
- swc1 $f29, THREAD_FPR29(\thread)
- swc1 $f30, THREAD_FPR30(\thread)
- swc1 $f31, THREAD_FPR31(\thread)
+ swc1 $f0, THREAD_FPR0_LS64(\thread)
+ swc1 $f1, THREAD_FPR1_LS64(\thread)
+ swc1 $f2, THREAD_FPR2_LS64(\thread)
+ swc1 $f3, THREAD_FPR3_LS64(\thread)
+ swc1 $f4, THREAD_FPR4_LS64(\thread)
+ swc1 $f5, THREAD_FPR5_LS64(\thread)
+ swc1 $f6, THREAD_FPR6_LS64(\thread)
+ swc1 $f7, THREAD_FPR7_LS64(\thread)
+ swc1 $f8, THREAD_FPR8_LS64(\thread)
+ swc1 $f9, THREAD_FPR9_LS64(\thread)
+ swc1 $f10, THREAD_FPR10_LS64(\thread)
+ swc1 $f11, THREAD_FPR11_LS64(\thread)
+ swc1 $f12, THREAD_FPR12_LS64(\thread)
+ swc1 $f13, THREAD_FPR13_LS64(\thread)
+ swc1 $f14, THREAD_FPR14_LS64(\thread)
+ swc1 $f15, THREAD_FPR15_LS64(\thread)
+ swc1 $f16, THREAD_FPR16_LS64(\thread)
+ swc1 $f17, THREAD_FPR17_LS64(\thread)
+ swc1 $f18, THREAD_FPR18_LS64(\thread)
+ swc1 $f19, THREAD_FPR19_LS64(\thread)
+ swc1 $f20, THREAD_FPR20_LS64(\thread)
+ swc1 $f21, THREAD_FPR21_LS64(\thread)
+ swc1 $f22, THREAD_FPR22_LS64(\thread)
+ swc1 $f23, THREAD_FPR23_LS64(\thread)
+ swc1 $f24, THREAD_FPR24_LS64(\thread)
+ swc1 $f25, THREAD_FPR25_LS64(\thread)
+ swc1 $f26, THREAD_FPR26_LS64(\thread)
+ swc1 $f27, THREAD_FPR27_LS64(\thread)
+ swc1 $f28, THREAD_FPR28_LS64(\thread)
+ swc1 $f29, THREAD_FPR29_LS64(\thread)
+ swc1 $f30, THREAD_FPR30_LS64(\thread)
+ swc1 $f31, THREAD_FPR31_LS64(\thread)
sw \tmp, THREAD_FCR31(\thread)
.endm
.macro fpu_restore_single thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
- lwc1 $f0, THREAD_FPR0(\thread)
- lwc1 $f1, THREAD_FPR1(\thread)
- lwc1 $f2, THREAD_FPR2(\thread)
- lwc1 $f3, THREAD_FPR3(\thread)
- lwc1 $f4, THREAD_FPR4(\thread)
- lwc1 $f5, THREAD_FPR5(\thread)
- lwc1 $f6, THREAD_FPR6(\thread)
- lwc1 $f7, THREAD_FPR7(\thread)
- lwc1 $f8, THREAD_FPR8(\thread)
- lwc1 $f9, THREAD_FPR9(\thread)
- lwc1 $f10, THREAD_FPR10(\thread)
- lwc1 $f11, THREAD_FPR11(\thread)
- lwc1 $f12, THREAD_FPR12(\thread)
- lwc1 $f13, THREAD_FPR13(\thread)
- lwc1 $f14, THREAD_FPR14(\thread)
- lwc1 $f15, THREAD_FPR15(\thread)
- lwc1 $f16, THREAD_FPR16(\thread)
- lwc1 $f17, THREAD_FPR17(\thread)
- lwc1 $f18, THREAD_FPR18(\thread)
- lwc1 $f19, THREAD_FPR19(\thread)
- lwc1 $f20, THREAD_FPR20(\thread)
- lwc1 $f21, THREAD_FPR21(\thread)
- lwc1 $f22, THREAD_FPR22(\thread)
- lwc1 $f23, THREAD_FPR23(\thread)
- lwc1 $f24, THREAD_FPR24(\thread)
- lwc1 $f25, THREAD_FPR25(\thread)
- lwc1 $f26, THREAD_FPR26(\thread)
- lwc1 $f27, THREAD_FPR27(\thread)
- lwc1 $f28, THREAD_FPR28(\thread)
- lwc1 $f29, THREAD_FPR29(\thread)
- lwc1 $f30, THREAD_FPR30(\thread)
- lwc1 $f31, THREAD_FPR31(\thread)
+ lwc1 $f0, THREAD_FPR0_LS64(\thread)
+ lwc1 $f1, THREAD_FPR1_LS64(\thread)
+ lwc1 $f2, THREAD_FPR2_LS64(\thread)
+ lwc1 $f3, THREAD_FPR3_LS64(\thread)
+ lwc1 $f4, THREAD_FPR4_LS64(\thread)
+ lwc1 $f5, THREAD_FPR5_LS64(\thread)
+ lwc1 $f6, THREAD_FPR6_LS64(\thread)
+ lwc1 $f7, THREAD_FPR7_LS64(\thread)
+ lwc1 $f8, THREAD_FPR8_LS64(\thread)
+ lwc1 $f9, THREAD_FPR9_LS64(\thread)
+ lwc1 $f10, THREAD_FPR10_LS64(\thread)
+ lwc1 $f11, THREAD_FPR11_LS64(\thread)
+ lwc1 $f12, THREAD_FPR12_LS64(\thread)
+ lwc1 $f13, THREAD_FPR13_LS64(\thread)
+ lwc1 $f14, THREAD_FPR14_LS64(\thread)
+ lwc1 $f15, THREAD_FPR15_LS64(\thread)
+ lwc1 $f16, THREAD_FPR16_LS64(\thread)
+ lwc1 $f17, THREAD_FPR17_LS64(\thread)
+ lwc1 $f18, THREAD_FPR18_LS64(\thread)
+ lwc1 $f19, THREAD_FPR19_LS64(\thread)
+ lwc1 $f20, THREAD_FPR20_LS64(\thread)
+ lwc1 $f21, THREAD_FPR21_LS64(\thread)
+ lwc1 $f22, THREAD_FPR22_LS64(\thread)
+ lwc1 $f23, THREAD_FPR23_LS64(\thread)
+ lwc1 $f24, THREAD_FPR24_LS64(\thread)
+ lwc1 $f25, THREAD_FPR25_LS64(\thread)
+ lwc1 $f26, THREAD_FPR26_LS64(\thread)
+ lwc1 $f27, THREAD_FPR27_LS64(\thread)
+ lwc1 $f28, THREAD_FPR28_LS64(\thread)
+ lwc1 $f29, THREAD_FPR29_LS64(\thread)
+ lwc1 $f30, THREAD_FPR30_LS64(\thread)
+ lwc1 $f31, THREAD_FPR31_LS64(\thread)
ctc1 \tmp, fcr31
.endm
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 3220c93..2aa713f 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -64,44 +64,44 @@
.macro fpu_save_16even thread tmp=t0
cfc1 \tmp, fcr31
- sdc1 $f0, THREAD_FPR0(\thread)
- sdc1 $f2, THREAD_FPR2(\thread)
- sdc1 $f4, THREAD_FPR4(\thread)
- sdc1 $f6, THREAD_FPR6(\thread)
- sdc1 $f8, THREAD_FPR8(\thread)
- sdc1 $f10, THREAD_FPR10(\thread)
- sdc1 $f12, THREAD_FPR12(\thread)
- sdc1 $f14, THREAD_FPR14(\thread)
- sdc1 $f16, THREAD_FPR16(\thread)
- sdc1 $f18, THREAD_FPR18(\thread)
- sdc1 $f20, THREAD_FPR20(\thread)
- sdc1 $f22, THREAD_FPR22(\thread)
- sdc1 $f24, THREAD_FPR24(\thread)
- sdc1 $f26, THREAD_FPR26(\thread)
- sdc1 $f28, THREAD_FPR28(\thread)
- sdc1 $f30, THREAD_FPR30(\thread)
+ sdc1 $f0, THREAD_FPR0_LS64(\thread)
+ sdc1 $f2, THREAD_FPR2_LS64(\thread)
+ sdc1 $f4, THREAD_FPR4_LS64(\thread)
+ sdc1 $f6, THREAD_FPR6_LS64(\thread)
+ sdc1 $f8, THREAD_FPR8_LS64(\thread)
+ sdc1 $f10, THREAD_FPR10_LS64(\thread)
+ sdc1 $f12, THREAD_FPR12_LS64(\thread)
+ sdc1 $f14, THREAD_FPR14_LS64(\thread)
+ sdc1 $f16, THREAD_FPR16_LS64(\thread)
+ sdc1 $f18, THREAD_FPR18_LS64(\thread)
+ sdc1 $f20, THREAD_FPR20_LS64(\thread)
+ sdc1 $f22, THREAD_FPR22_LS64(\thread)
+ sdc1 $f24, THREAD_FPR24_LS64(\thread)
+ sdc1 $f26, THREAD_FPR26_LS64(\thread)
+ sdc1 $f28, THREAD_FPR28_LS64(\thread)
+ sdc1 $f30, THREAD_FPR30_LS64(\thread)
sw \tmp, THREAD_FCR31(\thread)
.endm
.macro fpu_save_16odd thread
.set push
.set mips64r2
- sdc1 $f1, THREAD_FPR1(\thread)
- sdc1 $f3, THREAD_FPR3(\thread)
- sdc1 $f5, THREAD_FPR5(\thread)
- sdc1 $f7, THREAD_FPR7(\thread)
- sdc1 $f9, THREAD_FPR9(\thread)
- sdc1 $f11, THREAD_FPR11(\thread)
- sdc1 $f13, THREAD_FPR13(\thread)
- sdc1 $f15, THREAD_FPR15(\thread)
- sdc1 $f17, THREAD_FPR17(\thread)
- sdc1 $f19, THREAD_FPR19(\thread)
- sdc1 $f21, THREAD_FPR21(\thread)
- sdc1 $f23, THREAD_FPR23(\thread)
- sdc1 $f25, THREAD_FPR25(\thread)
- sdc1 $f27, THREAD_FPR27(\thread)
- sdc1 $f29, THREAD_FPR29(\thread)
- sdc1 $f31, THREAD_FPR31(\thread)
+ sdc1 $f1, THREAD_FPR1_LS64(\thread)
+ sdc1 $f3, THREAD_FPR3_LS64(\thread)
+ sdc1 $f5, THREAD_FPR5_LS64(\thread)
+ sdc1 $f7, THREAD_FPR7_LS64(\thread)
+ sdc1 $f9, THREAD_FPR9_LS64(\thread)
+ sdc1 $f11, THREAD_FPR11_LS64(\thread)
+ sdc1 $f13, THREAD_FPR13_LS64(\thread)
+ sdc1 $f15, THREAD_FPR15_LS64(\thread)
+ sdc1 $f17, THREAD_FPR17_LS64(\thread)
+ sdc1 $f19, THREAD_FPR19_LS64(\thread)
+ sdc1 $f21, THREAD_FPR21_LS64(\thread)
+ sdc1 $f23, THREAD_FPR23_LS64(\thread)
+ sdc1 $f25, THREAD_FPR25_LS64(\thread)
+ sdc1 $f27, THREAD_FPR27_LS64(\thread)
+ sdc1 $f29, THREAD_FPR29_LS64(\thread)
+ sdc1 $f31, THREAD_FPR31_LS64(\thread)
.set pop
.endm
@@ -117,44 +117,44 @@
.macro fpu_restore_16even thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
- ldc1 $f0, THREAD_FPR0(\thread)
- ldc1 $f2, THREAD_FPR2(\thread)
- ldc1 $f4, THREAD_FPR4(\thread)
- ldc1 $f6, THREAD_FPR6(\thread)
- ldc1 $f8, THREAD_FPR8(\thread)
- ldc1 $f10, THREAD_FPR10(\thread)
- ldc1 $f12, THREAD_FPR12(\thread)
- ldc1 $f14, THREAD_FPR14(\thread)
- ldc1 $f16, THREAD_FPR16(\thread)
- ldc1 $f18, THREAD_FPR18(\thread)
- ldc1 $f20, THREAD_FPR20(\thread)
- ldc1 $f22, THREAD_FPR22(\thread)
- ldc1 $f24, THREAD_FPR24(\thread)
- ldc1 $f26, THREAD_FPR26(\thread)
- ldc1 $f28, THREAD_FPR28(\thread)
- ldc1 $f30, THREAD_FPR30(\thread)
+ ldc1 $f0, THREAD_FPR0_LS64(\thread)
+ ldc1 $f2, THREAD_FPR2_LS64(\thread)
+ ldc1 $f4, THREAD_FPR4_LS64(\thread)
+ ldc1 $f6, THREAD_FPR6_LS64(\thread)
+ ldc1 $f8, THREAD_FPR8_LS64(\thread)
+ ldc1 $f10, THREAD_FPR10_LS64(\thread)
+ ldc1 $f12, THREAD_FPR12_LS64(\thread)
+ ldc1 $f14, THREAD_FPR14_LS64(\thread)
+ ldc1 $f16, THREAD_FPR16_LS64(\thread)
+ ldc1 $f18, THREAD_FPR18_LS64(\thread)
+ ldc1 $f20, THREAD_FPR20_LS64(\thread)
+ ldc1 $f22, THREAD_FPR22_LS64(\thread)
+ ldc1 $f24, THREAD_FPR24_LS64(\thread)
+ ldc1 $f26, THREAD_FPR26_LS64(\thread)
+ ldc1 $f28, THREAD_FPR28_LS64(\thread)
+ ldc1 $f30, THREAD_FPR30_LS64(\thread)
ctc1 \tmp, fcr31
.endm
.macro fpu_restore_16odd thread
.set push
.set mips64r2
- ldc1 $f1, THREAD_FPR1(\thread)
- ldc1 $f3, THREAD_FPR3(\thread)
- ldc1 $f5, THREAD_FPR5(\thread)
- ldc1 $f7, THREAD_FPR7(\thread)
- ldc1 $f9, THREAD_FPR9(\thread)
- ldc1 $f11, THREAD_FPR11(\thread)
- ldc1 $f13, THREAD_FPR13(\thread)
- ldc1 $f15, THREAD_FPR15(\thread)
- ldc1 $f17, THREAD_FPR17(\thread)
- ldc1 $f19, THREAD_FPR19(\thread)
- ldc1 $f21, THREAD_FPR21(\thread)
- ldc1 $f23, THREAD_FPR23(\thread)
- ldc1 $f25, THREAD_FPR25(\thread)
- ldc1 $f27, THREAD_FPR27(\thread)
- ldc1 $f29, THREAD_FPR29(\thread)
- ldc1 $f31, THREAD_FPR31(\thread)
+ ldc1 $f1, THREAD_FPR1_LS64(\thread)
+ ldc1 $f3, THREAD_FPR3_LS64(\thread)
+ ldc1 $f5, THREAD_FPR5_LS64(\thread)
+ ldc1 $f7, THREAD_FPR7_LS64(\thread)
+ ldc1 $f9, THREAD_FPR9_LS64(\thread)
+ ldc1 $f11, THREAD_FPR11_LS64(\thread)
+ ldc1 $f13, THREAD_FPR13_LS64(\thread)
+ ldc1 $f15, THREAD_FPR15_LS64(\thread)
+ ldc1 $f17, THREAD_FPR17_LS64(\thread)
+ ldc1 $f19, THREAD_FPR19_LS64(\thread)
+ ldc1 $f21, THREAD_FPR21_LS64(\thread)
+ ldc1 $f23, THREAD_FPR23_LS64(\thread)
+ ldc1 $f25, THREAD_FPR25_LS64(\thread)
+ ldc1 $f27, THREAD_FPR27_LS64(\thread)
+ ldc1 $f29, THREAD_FPR29_LS64(\thread)
+ ldc1 $f31, THREAD_FPR31_LS64(\thread)
.set pop
.endm
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 0c2e853..f454d7b 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -168,6 +168,72 @@ void output_thread_fpu_defines(void)
OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
+ /* the least significant 64 bits of each FP register */
+ OFFSET(THREAD_FPR0_LS64, task_struct,
+ thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR1_LS64, task_struct,
+ thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR2_LS64, task_struct,
+ thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR3_LS64, task_struct,
+ thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR4_LS64, task_struct,
+ thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR5_LS64, task_struct,
+ thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR6_LS64, task_struct,
+ thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR7_LS64, task_struct,
+ thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR8_LS64, task_struct,
+ thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR9_LS64, task_struct,
+ thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR10_LS64, task_struct,
+ thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR11_LS64, task_struct,
+ thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR12_LS64, task_struct,
+ thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR13_LS64, task_struct,
+ thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR14_LS64, task_struct,
+ thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR15_LS64, task_struct,
+ thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR16_LS64, task_struct,
+ thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR17_LS64, task_struct,
+ thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR18_LS64, task_struct,
+ thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR19_LS64, task_struct,
+ thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR20_LS64, task_struct,
+ thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR21_LS64, task_struct,
+ thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR22_LS64, task_struct,
+ thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR23_LS64, task_struct,
+ thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR24_LS64, task_struct,
+ thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR25_LS64, task_struct,
+ thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR26_LS64, task_struct,
+ thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR27_LS64, task_struct,
+ thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR28_LS64, task_struct,
+ thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR29_LS64, task_struct,
+ thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR30_LS64, task_struct,
+ thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR31_LS64, task_struct,
+ thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
+
OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
BLANK();
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 09/15] mips: don't assume 64-bit FP registers for context switch
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
When saving or restoring scalar FP context we want to access the least
significant 64 bits of each FP register. When the FP registers are 64
bits wide that is trivially the start of the registers value in memory.
However when the FP registers are wider this equivalence will no longer
be true for big endian systems. Define a new set of offset macros for
the least significant 64 bits of each saved FP register within thread
context, and make use of them when saving and restoring scalar FP
context.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/include/asm/asmmacro-32.h | 128 ++++++++++++++++++------------------
arch/mips/include/asm/asmmacro.h | 128 ++++++++++++++++++------------------
arch/mips/kernel/asm-offsets.c | 66 +++++++++++++++++++
3 files changed, 194 insertions(+), 128 deletions(-)
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index 70e1f17..e38c281 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -14,75 +14,75 @@
.macro fpu_save_single thread tmp=t0
cfc1 \tmp, fcr31
- swc1 $f0, THREAD_FPR0(\thread)
- swc1 $f1, THREAD_FPR1(\thread)
- swc1 $f2, THREAD_FPR2(\thread)
- swc1 $f3, THREAD_FPR3(\thread)
- swc1 $f4, THREAD_FPR4(\thread)
- swc1 $f5, THREAD_FPR5(\thread)
- swc1 $f6, THREAD_FPR6(\thread)
- swc1 $f7, THREAD_FPR7(\thread)
- swc1 $f8, THREAD_FPR8(\thread)
- swc1 $f9, THREAD_FPR9(\thread)
- swc1 $f10, THREAD_FPR10(\thread)
- swc1 $f11, THREAD_FPR11(\thread)
- swc1 $f12, THREAD_FPR12(\thread)
- swc1 $f13, THREAD_FPR13(\thread)
- swc1 $f14, THREAD_FPR14(\thread)
- swc1 $f15, THREAD_FPR15(\thread)
- swc1 $f16, THREAD_FPR16(\thread)
- swc1 $f17, THREAD_FPR17(\thread)
- swc1 $f18, THREAD_FPR18(\thread)
- swc1 $f19, THREAD_FPR19(\thread)
- swc1 $f20, THREAD_FPR20(\thread)
- swc1 $f21, THREAD_FPR21(\thread)
- swc1 $f22, THREAD_FPR22(\thread)
- swc1 $f23, THREAD_FPR23(\thread)
- swc1 $f24, THREAD_FPR24(\thread)
- swc1 $f25, THREAD_FPR25(\thread)
- swc1 $f26, THREAD_FPR26(\thread)
- swc1 $f27, THREAD_FPR27(\thread)
- swc1 $f28, THREAD_FPR28(\thread)
- swc1 $f29, THREAD_FPR29(\thread)
- swc1 $f30, THREAD_FPR30(\thread)
- swc1 $f31, THREAD_FPR31(\thread)
+ swc1 $f0, THREAD_FPR0_LS64(\thread)
+ swc1 $f1, THREAD_FPR1_LS64(\thread)
+ swc1 $f2, THREAD_FPR2_LS64(\thread)
+ swc1 $f3, THREAD_FPR3_LS64(\thread)
+ swc1 $f4, THREAD_FPR4_LS64(\thread)
+ swc1 $f5, THREAD_FPR5_LS64(\thread)
+ swc1 $f6, THREAD_FPR6_LS64(\thread)
+ swc1 $f7, THREAD_FPR7_LS64(\thread)
+ swc1 $f8, THREAD_FPR8_LS64(\thread)
+ swc1 $f9, THREAD_FPR9_LS64(\thread)
+ swc1 $f10, THREAD_FPR10_LS64(\thread)
+ swc1 $f11, THREAD_FPR11_LS64(\thread)
+ swc1 $f12, THREAD_FPR12_LS64(\thread)
+ swc1 $f13, THREAD_FPR13_LS64(\thread)
+ swc1 $f14, THREAD_FPR14_LS64(\thread)
+ swc1 $f15, THREAD_FPR15_LS64(\thread)
+ swc1 $f16, THREAD_FPR16_LS64(\thread)
+ swc1 $f17, THREAD_FPR17_LS64(\thread)
+ swc1 $f18, THREAD_FPR18_LS64(\thread)
+ swc1 $f19, THREAD_FPR19_LS64(\thread)
+ swc1 $f20, THREAD_FPR20_LS64(\thread)
+ swc1 $f21, THREAD_FPR21_LS64(\thread)
+ swc1 $f22, THREAD_FPR22_LS64(\thread)
+ swc1 $f23, THREAD_FPR23_LS64(\thread)
+ swc1 $f24, THREAD_FPR24_LS64(\thread)
+ swc1 $f25, THREAD_FPR25_LS64(\thread)
+ swc1 $f26, THREAD_FPR26_LS64(\thread)
+ swc1 $f27, THREAD_FPR27_LS64(\thread)
+ swc1 $f28, THREAD_FPR28_LS64(\thread)
+ swc1 $f29, THREAD_FPR29_LS64(\thread)
+ swc1 $f30, THREAD_FPR30_LS64(\thread)
+ swc1 $f31, THREAD_FPR31_LS64(\thread)
sw \tmp, THREAD_FCR31(\thread)
.endm
.macro fpu_restore_single thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
- lwc1 $f0, THREAD_FPR0(\thread)
- lwc1 $f1, THREAD_FPR1(\thread)
- lwc1 $f2, THREAD_FPR2(\thread)
- lwc1 $f3, THREAD_FPR3(\thread)
- lwc1 $f4, THREAD_FPR4(\thread)
- lwc1 $f5, THREAD_FPR5(\thread)
- lwc1 $f6, THREAD_FPR6(\thread)
- lwc1 $f7, THREAD_FPR7(\thread)
- lwc1 $f8, THREAD_FPR8(\thread)
- lwc1 $f9, THREAD_FPR9(\thread)
- lwc1 $f10, THREAD_FPR10(\thread)
- lwc1 $f11, THREAD_FPR11(\thread)
- lwc1 $f12, THREAD_FPR12(\thread)
- lwc1 $f13, THREAD_FPR13(\thread)
- lwc1 $f14, THREAD_FPR14(\thread)
- lwc1 $f15, THREAD_FPR15(\thread)
- lwc1 $f16, THREAD_FPR16(\thread)
- lwc1 $f17, THREAD_FPR17(\thread)
- lwc1 $f18, THREAD_FPR18(\thread)
- lwc1 $f19, THREAD_FPR19(\thread)
- lwc1 $f20, THREAD_FPR20(\thread)
- lwc1 $f21, THREAD_FPR21(\thread)
- lwc1 $f22, THREAD_FPR22(\thread)
- lwc1 $f23, THREAD_FPR23(\thread)
- lwc1 $f24, THREAD_FPR24(\thread)
- lwc1 $f25, THREAD_FPR25(\thread)
- lwc1 $f26, THREAD_FPR26(\thread)
- lwc1 $f27, THREAD_FPR27(\thread)
- lwc1 $f28, THREAD_FPR28(\thread)
- lwc1 $f29, THREAD_FPR29(\thread)
- lwc1 $f30, THREAD_FPR30(\thread)
- lwc1 $f31, THREAD_FPR31(\thread)
+ lwc1 $f0, THREAD_FPR0_LS64(\thread)
+ lwc1 $f1, THREAD_FPR1_LS64(\thread)
+ lwc1 $f2, THREAD_FPR2_LS64(\thread)
+ lwc1 $f3, THREAD_FPR3_LS64(\thread)
+ lwc1 $f4, THREAD_FPR4_LS64(\thread)
+ lwc1 $f5, THREAD_FPR5_LS64(\thread)
+ lwc1 $f6, THREAD_FPR6_LS64(\thread)
+ lwc1 $f7, THREAD_FPR7_LS64(\thread)
+ lwc1 $f8, THREAD_FPR8_LS64(\thread)
+ lwc1 $f9, THREAD_FPR9_LS64(\thread)
+ lwc1 $f10, THREAD_FPR10_LS64(\thread)
+ lwc1 $f11, THREAD_FPR11_LS64(\thread)
+ lwc1 $f12, THREAD_FPR12_LS64(\thread)
+ lwc1 $f13, THREAD_FPR13_LS64(\thread)
+ lwc1 $f14, THREAD_FPR14_LS64(\thread)
+ lwc1 $f15, THREAD_FPR15_LS64(\thread)
+ lwc1 $f16, THREAD_FPR16_LS64(\thread)
+ lwc1 $f17, THREAD_FPR17_LS64(\thread)
+ lwc1 $f18, THREAD_FPR18_LS64(\thread)
+ lwc1 $f19, THREAD_FPR19_LS64(\thread)
+ lwc1 $f20, THREAD_FPR20_LS64(\thread)
+ lwc1 $f21, THREAD_FPR21_LS64(\thread)
+ lwc1 $f22, THREAD_FPR22_LS64(\thread)
+ lwc1 $f23, THREAD_FPR23_LS64(\thread)
+ lwc1 $f24, THREAD_FPR24_LS64(\thread)
+ lwc1 $f25, THREAD_FPR25_LS64(\thread)
+ lwc1 $f26, THREAD_FPR26_LS64(\thread)
+ lwc1 $f27, THREAD_FPR27_LS64(\thread)
+ lwc1 $f28, THREAD_FPR28_LS64(\thread)
+ lwc1 $f29, THREAD_FPR29_LS64(\thread)
+ lwc1 $f30, THREAD_FPR30_LS64(\thread)
+ lwc1 $f31, THREAD_FPR31_LS64(\thread)
ctc1 \tmp, fcr31
.endm
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 3220c93..2aa713f 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -64,44 +64,44 @@
.macro fpu_save_16even thread tmp=t0
cfc1 \tmp, fcr31
- sdc1 $f0, THREAD_FPR0(\thread)
- sdc1 $f2, THREAD_FPR2(\thread)
- sdc1 $f4, THREAD_FPR4(\thread)
- sdc1 $f6, THREAD_FPR6(\thread)
- sdc1 $f8, THREAD_FPR8(\thread)
- sdc1 $f10, THREAD_FPR10(\thread)
- sdc1 $f12, THREAD_FPR12(\thread)
- sdc1 $f14, THREAD_FPR14(\thread)
- sdc1 $f16, THREAD_FPR16(\thread)
- sdc1 $f18, THREAD_FPR18(\thread)
- sdc1 $f20, THREAD_FPR20(\thread)
- sdc1 $f22, THREAD_FPR22(\thread)
- sdc1 $f24, THREAD_FPR24(\thread)
- sdc1 $f26, THREAD_FPR26(\thread)
- sdc1 $f28, THREAD_FPR28(\thread)
- sdc1 $f30, THREAD_FPR30(\thread)
+ sdc1 $f0, THREAD_FPR0_LS64(\thread)
+ sdc1 $f2, THREAD_FPR2_LS64(\thread)
+ sdc1 $f4, THREAD_FPR4_LS64(\thread)
+ sdc1 $f6, THREAD_FPR6_LS64(\thread)
+ sdc1 $f8, THREAD_FPR8_LS64(\thread)
+ sdc1 $f10, THREAD_FPR10_LS64(\thread)
+ sdc1 $f12, THREAD_FPR12_LS64(\thread)
+ sdc1 $f14, THREAD_FPR14_LS64(\thread)
+ sdc1 $f16, THREAD_FPR16_LS64(\thread)
+ sdc1 $f18, THREAD_FPR18_LS64(\thread)
+ sdc1 $f20, THREAD_FPR20_LS64(\thread)
+ sdc1 $f22, THREAD_FPR22_LS64(\thread)
+ sdc1 $f24, THREAD_FPR24_LS64(\thread)
+ sdc1 $f26, THREAD_FPR26_LS64(\thread)
+ sdc1 $f28, THREAD_FPR28_LS64(\thread)
+ sdc1 $f30, THREAD_FPR30_LS64(\thread)
sw \tmp, THREAD_FCR31(\thread)
.endm
.macro fpu_save_16odd thread
.set push
.set mips64r2
- sdc1 $f1, THREAD_FPR1(\thread)
- sdc1 $f3, THREAD_FPR3(\thread)
- sdc1 $f5, THREAD_FPR5(\thread)
- sdc1 $f7, THREAD_FPR7(\thread)
- sdc1 $f9, THREAD_FPR9(\thread)
- sdc1 $f11, THREAD_FPR11(\thread)
- sdc1 $f13, THREAD_FPR13(\thread)
- sdc1 $f15, THREAD_FPR15(\thread)
- sdc1 $f17, THREAD_FPR17(\thread)
- sdc1 $f19, THREAD_FPR19(\thread)
- sdc1 $f21, THREAD_FPR21(\thread)
- sdc1 $f23, THREAD_FPR23(\thread)
- sdc1 $f25, THREAD_FPR25(\thread)
- sdc1 $f27, THREAD_FPR27(\thread)
- sdc1 $f29, THREAD_FPR29(\thread)
- sdc1 $f31, THREAD_FPR31(\thread)
+ sdc1 $f1, THREAD_FPR1_LS64(\thread)
+ sdc1 $f3, THREAD_FPR3_LS64(\thread)
+ sdc1 $f5, THREAD_FPR5_LS64(\thread)
+ sdc1 $f7, THREAD_FPR7_LS64(\thread)
+ sdc1 $f9, THREAD_FPR9_LS64(\thread)
+ sdc1 $f11, THREAD_FPR11_LS64(\thread)
+ sdc1 $f13, THREAD_FPR13_LS64(\thread)
+ sdc1 $f15, THREAD_FPR15_LS64(\thread)
+ sdc1 $f17, THREAD_FPR17_LS64(\thread)
+ sdc1 $f19, THREAD_FPR19_LS64(\thread)
+ sdc1 $f21, THREAD_FPR21_LS64(\thread)
+ sdc1 $f23, THREAD_FPR23_LS64(\thread)
+ sdc1 $f25, THREAD_FPR25_LS64(\thread)
+ sdc1 $f27, THREAD_FPR27_LS64(\thread)
+ sdc1 $f29, THREAD_FPR29_LS64(\thread)
+ sdc1 $f31, THREAD_FPR31_LS64(\thread)
.set pop
.endm
@@ -117,44 +117,44 @@
.macro fpu_restore_16even thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
- ldc1 $f0, THREAD_FPR0(\thread)
- ldc1 $f2, THREAD_FPR2(\thread)
- ldc1 $f4, THREAD_FPR4(\thread)
- ldc1 $f6, THREAD_FPR6(\thread)
- ldc1 $f8, THREAD_FPR8(\thread)
- ldc1 $f10, THREAD_FPR10(\thread)
- ldc1 $f12, THREAD_FPR12(\thread)
- ldc1 $f14, THREAD_FPR14(\thread)
- ldc1 $f16, THREAD_FPR16(\thread)
- ldc1 $f18, THREAD_FPR18(\thread)
- ldc1 $f20, THREAD_FPR20(\thread)
- ldc1 $f22, THREAD_FPR22(\thread)
- ldc1 $f24, THREAD_FPR24(\thread)
- ldc1 $f26, THREAD_FPR26(\thread)
- ldc1 $f28, THREAD_FPR28(\thread)
- ldc1 $f30, THREAD_FPR30(\thread)
+ ldc1 $f0, THREAD_FPR0_LS64(\thread)
+ ldc1 $f2, THREAD_FPR2_LS64(\thread)
+ ldc1 $f4, THREAD_FPR4_LS64(\thread)
+ ldc1 $f6, THREAD_FPR6_LS64(\thread)
+ ldc1 $f8, THREAD_FPR8_LS64(\thread)
+ ldc1 $f10, THREAD_FPR10_LS64(\thread)
+ ldc1 $f12, THREAD_FPR12_LS64(\thread)
+ ldc1 $f14, THREAD_FPR14_LS64(\thread)
+ ldc1 $f16, THREAD_FPR16_LS64(\thread)
+ ldc1 $f18, THREAD_FPR18_LS64(\thread)
+ ldc1 $f20, THREAD_FPR20_LS64(\thread)
+ ldc1 $f22, THREAD_FPR22_LS64(\thread)
+ ldc1 $f24, THREAD_FPR24_LS64(\thread)
+ ldc1 $f26, THREAD_FPR26_LS64(\thread)
+ ldc1 $f28, THREAD_FPR28_LS64(\thread)
+ ldc1 $f30, THREAD_FPR30_LS64(\thread)
ctc1 \tmp, fcr31
.endm
.macro fpu_restore_16odd thread
.set push
.set mips64r2
- ldc1 $f1, THREAD_FPR1(\thread)
- ldc1 $f3, THREAD_FPR3(\thread)
- ldc1 $f5, THREAD_FPR5(\thread)
- ldc1 $f7, THREAD_FPR7(\thread)
- ldc1 $f9, THREAD_FPR9(\thread)
- ldc1 $f11, THREAD_FPR11(\thread)
- ldc1 $f13, THREAD_FPR13(\thread)
- ldc1 $f15, THREAD_FPR15(\thread)
- ldc1 $f17, THREAD_FPR17(\thread)
- ldc1 $f19, THREAD_FPR19(\thread)
- ldc1 $f21, THREAD_FPR21(\thread)
- ldc1 $f23, THREAD_FPR23(\thread)
- ldc1 $f25, THREAD_FPR25(\thread)
- ldc1 $f27, THREAD_FPR27(\thread)
- ldc1 $f29, THREAD_FPR29(\thread)
- ldc1 $f31, THREAD_FPR31(\thread)
+ ldc1 $f1, THREAD_FPR1_LS64(\thread)
+ ldc1 $f3, THREAD_FPR3_LS64(\thread)
+ ldc1 $f5, THREAD_FPR5_LS64(\thread)
+ ldc1 $f7, THREAD_FPR7_LS64(\thread)
+ ldc1 $f9, THREAD_FPR9_LS64(\thread)
+ ldc1 $f11, THREAD_FPR11_LS64(\thread)
+ ldc1 $f13, THREAD_FPR13_LS64(\thread)
+ ldc1 $f15, THREAD_FPR15_LS64(\thread)
+ ldc1 $f17, THREAD_FPR17_LS64(\thread)
+ ldc1 $f19, THREAD_FPR19_LS64(\thread)
+ ldc1 $f21, THREAD_FPR21_LS64(\thread)
+ ldc1 $f23, THREAD_FPR23_LS64(\thread)
+ ldc1 $f25, THREAD_FPR25_LS64(\thread)
+ ldc1 $f27, THREAD_FPR27_LS64(\thread)
+ ldc1 $f29, THREAD_FPR29_LS64(\thread)
+ ldc1 $f31, THREAD_FPR31_LS64(\thread)
.set pop
.endm
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 0c2e853..f454d7b 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -168,6 +168,72 @@ void output_thread_fpu_defines(void)
OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
+ /* the least significant 64 bits of each FP register */
+ OFFSET(THREAD_FPR0_LS64, task_struct,
+ thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR1_LS64, task_struct,
+ thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR2_LS64, task_struct,
+ thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR3_LS64, task_struct,
+ thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR4_LS64, task_struct,
+ thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR5_LS64, task_struct,
+ thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR6_LS64, task_struct,
+ thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR7_LS64, task_struct,
+ thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR8_LS64, task_struct,
+ thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR9_LS64, task_struct,
+ thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR10_LS64, task_struct,
+ thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR11_LS64, task_struct,
+ thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR12_LS64, task_struct,
+ thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR13_LS64, task_struct,
+ thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR14_LS64, task_struct,
+ thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR15_LS64, task_struct,
+ thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR16_LS64, task_struct,
+ thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR17_LS64, task_struct,
+ thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR18_LS64, task_struct,
+ thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR19_LS64, task_struct,
+ thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR20_LS64, task_struct,
+ thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR21_LS64, task_struct,
+ thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR22_LS64, task_struct,
+ thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR23_LS64, task_struct,
+ thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR24_LS64, task_struct,
+ thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR25_LS64, task_struct,
+ thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR26_LS64, task_struct,
+ thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR27_LS64, task_struct,
+ thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR28_LS64, task_struct,
+ thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR29_LS64, task_struct,
+ thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR30_LS64, task_struct,
+ thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
+ OFFSET(THREAD_FPR31_LS64, task_struct,
+ thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
+
OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
BLANK();
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 10/15] mips: add MSA register definitions & access
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch introduces definitions for the MSA control registers and
functions which allow access to both the control & vector registers. If
the toolchain being used to build the kernel includes support for MSA
then this patch will make use of that support & use MSA instructions
directly. However toolchain support for MSA is very new & far from a
point where it can be reasonably expected that everyone building the
kernel uses a toolchain with support. Thus fallbacks using .word
assembler directives are also provided for now as a temporary measure.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/Makefile | 5 ++
arch/mips/include/asm/asmmacro.h | 121 +++++++++++++++++++++++++++
arch/mips/include/asm/mipsregs.h | 1 +
arch/mips/include/asm/msa.h | 171 +++++++++++++++++++++++++++++++++++++++
4 files changed, 298 insertions(+)
create mode 100644 arch/mips/include/asm/msa.h
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 873a0ca..86522e5 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -119,6 +119,11 @@ cflags-$(CONFIG_CPU_MICROMIPS) += $(call cc-option,-mmicromips)
cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \
-fno-omit-frame-pointer
+ifeq ($(CONFIG_CPU_HAS_MSA),y)
+toolchain-msa := $(call cc-option-yn,-mhard-float -mfp64 -mmsa)
+cflags-$(toolchain-msa) += -DTOOLCHAIN_SUPPORTS_MSA
+endif
+
#
# CPU-dependent compiler/assembler options for optimization.
#
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 2aa713f..c759501 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -196,4 +196,125 @@
.word 0x41800000 | (\rt << 16) | (\rd << 11) | (\u << 5) | (\sel)
.endm
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+ .macro ld_d wd, off, base
+ .set push
+ .set mips32r2
+ .set msa
+ ld.d $w\wd, \off(\base)
+ .set pop
+ .endm
+
+ .macro st_d wd, off, base
+ .set push
+ .set mips32r2
+ .set msa
+ st.d $w\wd, \off(\base)
+ .set pop
+ .endm
+
+ .macro copy_u_w rd, ws, n
+ .set push
+ .set mips32r2
+ .set msa
+ copy_u.w \rd, $w\ws[\n]
+ .set pop
+ .endm
+
+ .macro copy_u_d rd, ws, n
+ .set push
+ .set mips64r2
+ .set msa
+ copy_u.d \rd, $w\ws[\n]
+ .set pop
+ .endm
+
+ .macro insert_w wd, n, rs
+ .set push
+ .set mips32r2
+ .set msa
+ insert.w $w\wd[\n], \rs
+ .set pop
+ .endm
+
+ .macro insert_d wd, n, rs
+ .set push
+ .set mips64r2
+ .set msa
+ insert.d $w\wd[\n], \rs
+ .set pop
+ .endm
+#else
+ /*
+ * Temporary until all toolchains in use include MSA support.
+ */
+ .macro cfcmsa rd, cs
+ .set push
+ .set noat
+ .word 0x787e0059 | (\cs << 11)
+ move \rd, $1
+ .set pop
+ .endm
+
+ .macro ctcmsa cd, rs
+ .set push
+ .set noat
+ move $1, \rs
+ .word 0x783e0819 | (\cd << 6)
+ .set pop
+ .endm
+
+ .macro ld_d wd, off, base
+ .set push
+ .set noat
+ add $1, \base, \off
+ .word 0x78000823 | (\wd << 6)
+ .set pop
+ .endm
+
+ .macro st_d wd, off, base
+ .set push
+ .set noat
+ add $1, \base, \off
+ .word 0x78000827 | (\wd << 6)
+ .set pop
+ .endm
+
+ .macro copy_u_w rd, ws, n
+ .set push
+ .set noat
+ .word 0x78f00059 | (\n << 16) | (\ws << 11)
+ /* move triggers an assembler bug... */
+ or \rd, $1, zero
+ .set pop
+ .endm
+
+ .macro copy_u_d rd, ws, n
+ .set push
+ .set noat
+ .word 0x78f80059 | (\n << 16) | (\ws << 11)
+ /* move triggers an assembler bug... */
+ or \rd, $1, zero
+ .set pop
+ .endm
+
+ .macro insert_w wd, n, rs
+ .set push
+ .set noat
+ /* move triggers an assembler bug... */
+ or $1, \rs, zero
+ .word 0x79300819 | (\n << 16) | (\wd << 6)
+ .set pop
+ .endm
+
+ .macro insert_d wd, n, rs
+ .set push
+ .set noat
+ /* move triggers an assembler bug... */
+ or $1, \rs, zero
+ .word 0x79380819 | (\n << 16) | (\wd << 6)
+ .set pop
+ .endm
+#endif
+
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index bbc3dd4..f440c27 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1883,6 +1883,7 @@ change_c0_##name(unsigned int change, unsigned int newbits) \
__BUILD_SET_C0(status)
__BUILD_SET_C0(cause)
__BUILD_SET_C0(config)
+__BUILD_SET_C0(config5)
__BUILD_SET_C0(intcontrol)
__BUILD_SET_C0(intctl)
__BUILD_SET_C0(srsmap)
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
new file mode 100644
index 0000000..a306ea8
--- /dev/null
+++ b/arch/mips/include/asm/msa.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#ifndef _ASM_MSA_H
+#define _ASM_MSA_H
+
+#include <asm/mipsregs.h>
+
+static inline void enable_msa(void)
+{
+ if (cpu_has_msa)
+ set_c0_config5(MIPS_CONF5_MSAEN);
+}
+
+static inline void disable_msa(void)
+{
+ if (cpu_has_msa)
+ clear_c0_config5(MIPS_CONF5_MSAEN);
+}
+
+static inline int is_msa_enabled(void)
+{
+ if (!cpu_has_msa)
+ return 0;
+
+ return read_c0_config5() & MIPS_CONF5_MSAEN;
+}
+
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+
+#define __BUILD_MSA_CTL_REG(name, cs) \
+static inline unsigned int read_msa_##name(void) \
+{ \
+ unsigned int reg; \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set msa\n" \
+ " cfcmsa %0, $" #cs "\n" \
+ " .set pop\n" \
+ : "=r"(reg)); \
+ return reg; \
+} \
+ \
+static inline void write_msa_##name(unsigned int val) \
+{ \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set msa\n" \
+ " cfcmsa $" #cs ", %0\n" \
+ " .set pop\n" \
+ : : "r"(val)); \
+}
+
+#else /* !TOOLCHAIN_SUPPORTS_MSA */
+
+/*
+ * Define functions using .word for the c[ft]cmsa instructions in order to
+ * allow compilation with toolchains that do not support MSA. Once all
+ * toolchains in use support MSA these can be removed.
+ */
+
+#define __BUILD_MSA_CTL_REG(name, cs) \
+static inline unsigned int read_msa_##name(void) \
+{ \
+ unsigned int reg; \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set noat\n" \
+ " .word 0x787e0059 | (" #cs " << 11)\n" \
+ " move %0, $1\n" \
+ " .set pop\n" \
+ : "=r"(reg)); \
+ return reg; \
+} \
+ \
+static inline void write_msa_##name(unsigned int val) \
+{ \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set noat\n" \
+ " move $1, %0\n" \
+ " .word 0x783e0819 | (" #cs " << 6)\n" \
+ " .set pop\n" \
+ : : "r"(val)); \
+}
+
+#endif /* !TOOLCHAIN_SUPPORTS_MSA */
+
+#define MSA_IR 0
+#define MSA_CSR 1
+#define MSA_ACCESS 2
+#define MSA_SAVE 3
+#define MSA_MODIFY 4
+#define MSA_REQUEST 5
+#define MSA_MAP 6
+#define MSA_UNMAP 7
+
+__BUILD_MSA_CTL_REG(ir, 0)
+__BUILD_MSA_CTL_REG(csr, 1)
+__BUILD_MSA_CTL_REG(access, 2)
+__BUILD_MSA_CTL_REG(save, 3)
+__BUILD_MSA_CTL_REG(modify, 4)
+__BUILD_MSA_CTL_REG(request, 5)
+__BUILD_MSA_CTL_REG(map, 6)
+__BUILD_MSA_CTL_REG(unmap, 7)
+
+/* MSA Implementation Register (MSAIR) */
+#define MSA_IR_REVB 0
+#define MSA_IR_REVF (_ULCAST_(0xff) << MSA_IR_REVB)
+#define MSA_IR_PROCB 8
+#define MSA_IR_PROCF (_ULCAST_(0xff) << MSA_IR_PROCB)
+#define MSA_IR_WRPB 16
+#define MSA_IR_WRPF (_ULCAST_(0x1) << MSA_IR_WRPB)
+
+/* MSA Control & Status Register (MSACSR) */
+#define MSA_CSR_RMB 0
+#define MSA_CSR_RMF (_ULCAST_(0x3) << MSA_CSR_RMB)
+#define MSA_CSR_RM_NEAREST 0
+#define MSA_CSR_RM_TO_ZERO 1
+#define MSA_CSR_RM_TO_POS 2
+#define MSA_CSR_RM_TO_NEG 3
+#define MSA_CSR_FLAGSB 2
+#define MSA_CSR_FLAGSF (_ULCAST_(0x1f) << MSA_CSR_FLAGSB)
+#define MSA_CSR_FLAGS_IB 2
+#define MSA_CSR_FLAGS_IF (_ULCAST_(0x1) << MSA_CSR_FLAGS_IB)
+#define MSA_CSR_FLAGS_UB 3
+#define MSA_CSR_FLAGS_UF (_ULCAST_(0x1) << MSA_CSR_FLAGS_UB)
+#define MSA_CSR_FLAGS_OB 4
+#define MSA_CSR_FLAGS_OF (_ULCAST_(0x1) << MSA_CSR_FLAGS_OB)
+#define MSA_CSR_FLAGS_ZB 5
+#define MSA_CSR_FLAGS_ZF (_ULCAST_(0x1) << MSA_CSR_FLAGS_ZB)
+#define MSA_CSR_FLAGS_VB 6
+#define MSA_CSR_FLAGS_VF (_ULCAST_(0x1) << MSA_CSR_FLAGS_VB)
+#define MSA_CSR_ENABLESB 7
+#define MSA_CSR_ENABLESF (_ULCAST_(0x1f) << MSA_CSR_ENABLESB)
+#define MSA_CSR_ENABLES_IB 7
+#define MSA_CSR_ENABLES_IF (_ULCAST_(0x1) << MSA_CSR_ENABLES_IB)
+#define MSA_CSR_ENABLES_UB 8
+#define MSA_CSR_ENABLES_UF (_ULCAST_(0x1) << MSA_CSR_ENABLES_UB)
+#define MSA_CSR_ENABLES_OB 9
+#define MSA_CSR_ENABLES_OF (_ULCAST_(0x1) << MSA_CSR_ENABLES_OB)
+#define MSA_CSR_ENABLES_ZB 10
+#define MSA_CSR_ENABLES_ZF (_ULCAST_(0x1) << MSA_CSR_ENABLES_ZB)
+#define MSA_CSR_ENABLES_VB 11
+#define MSA_CSR_ENABLES_VF (_ULCAST_(0x1) << MSA_CSR_ENABLES_VB)
+#define MSA_CSR_CAUSEB 12
+#define MSA_CSR_CAUSEF (_ULCAST_(0x3f) << MSA_CSR_CAUSEB)
+#define MSA_CSR_CAUSE_IB 12
+#define MSA_CSR_CAUSE_IF (_ULCAST_(0x1) << MSA_CSR_CAUSE_IB)
+#define MSA_CSR_CAUSE_UB 13
+#define MSA_CSR_CAUSE_UF (_ULCAST_(0x1) << MSA_CSR_CAUSE_UB)
+#define MSA_CSR_CAUSE_OB 14
+#define MSA_CSR_CAUSE_OF (_ULCAST_(0x1) << MSA_CSR_CAUSE_OB)
+#define MSA_CSR_CAUSE_ZB 15
+#define MSA_CSR_CAUSE_ZF (_ULCAST_(0x1) << MSA_CSR_CAUSE_ZB)
+#define MSA_CSR_CAUSE_VB 16
+#define MSA_CSR_CAUSE_VF (_ULCAST_(0x1) << MSA_CSR_CAUSE_VB)
+#define MSA_CSR_CAUSE_EB 17
+#define MSA_CSR_CAUSE_EF (_ULCAST_(0x1) << MSA_CSR_CAUSE_EB)
+#define MSA_CSR_NXB 18
+#define MSA_CSR_NXF (_ULCAST_(0x1) << MSA_CSR_NXB)
+#define MSA_CSR_FSB 24
+#define MSA_CSR_FSF (_ULCAST_(0x1) << MSA_CSR_FSB)
+
+#endif /* _ASM_MSA_H */
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 10/15] mips: add MSA register definitions & access
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch introduces definitions for the MSA control registers and
functions which allow access to both the control & vector registers. If
the toolchain being used to build the kernel includes support for MSA
then this patch will make use of that support & use MSA instructions
directly. However toolchain support for MSA is very new & far from a
point where it can be reasonably expected that everyone building the
kernel uses a toolchain with support. Thus fallbacks using .word
assembler directives are also provided for now as a temporary measure.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/Makefile | 5 ++
arch/mips/include/asm/asmmacro.h | 121 +++++++++++++++++++++++++++
arch/mips/include/asm/mipsregs.h | 1 +
arch/mips/include/asm/msa.h | 171 +++++++++++++++++++++++++++++++++++++++
4 files changed, 298 insertions(+)
create mode 100644 arch/mips/include/asm/msa.h
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 873a0ca..86522e5 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -119,6 +119,11 @@ cflags-$(CONFIG_CPU_MICROMIPS) += $(call cc-option,-mmicromips)
cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \
-fno-omit-frame-pointer
+ifeq ($(CONFIG_CPU_HAS_MSA),y)
+toolchain-msa := $(call cc-option-yn,-mhard-float -mfp64 -mmsa)
+cflags-$(toolchain-msa) += -DTOOLCHAIN_SUPPORTS_MSA
+endif
+
#
# CPU-dependent compiler/assembler options for optimization.
#
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 2aa713f..c759501 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -196,4 +196,125 @@
.word 0x41800000 | (\rt << 16) | (\rd << 11) | (\u << 5) | (\sel)
.endm
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+ .macro ld_d wd, off, base
+ .set push
+ .set mips32r2
+ .set msa
+ ld.d $w\wd, \off(\base)
+ .set pop
+ .endm
+
+ .macro st_d wd, off, base
+ .set push
+ .set mips32r2
+ .set msa
+ st.d $w\wd, \off(\base)
+ .set pop
+ .endm
+
+ .macro copy_u_w rd, ws, n
+ .set push
+ .set mips32r2
+ .set msa
+ copy_u.w \rd, $w\ws[\n]
+ .set pop
+ .endm
+
+ .macro copy_u_d rd, ws, n
+ .set push
+ .set mips64r2
+ .set msa
+ copy_u.d \rd, $w\ws[\n]
+ .set pop
+ .endm
+
+ .macro insert_w wd, n, rs
+ .set push
+ .set mips32r2
+ .set msa
+ insert.w $w\wd[\n], \rs
+ .set pop
+ .endm
+
+ .macro insert_d wd, n, rs
+ .set push
+ .set mips64r2
+ .set msa
+ insert.d $w\wd[\n], \rs
+ .set pop
+ .endm
+#else
+ /*
+ * Temporary until all toolchains in use include MSA support.
+ */
+ .macro cfcmsa rd, cs
+ .set push
+ .set noat
+ .word 0x787e0059 | (\cs << 11)
+ move \rd, $1
+ .set pop
+ .endm
+
+ .macro ctcmsa cd, rs
+ .set push
+ .set noat
+ move $1, \rs
+ .word 0x783e0819 | (\cd << 6)
+ .set pop
+ .endm
+
+ .macro ld_d wd, off, base
+ .set push
+ .set noat
+ add $1, \base, \off
+ .word 0x78000823 | (\wd << 6)
+ .set pop
+ .endm
+
+ .macro st_d wd, off, base
+ .set push
+ .set noat
+ add $1, \base, \off
+ .word 0x78000827 | (\wd << 6)
+ .set pop
+ .endm
+
+ .macro copy_u_w rd, ws, n
+ .set push
+ .set noat
+ .word 0x78f00059 | (\n << 16) | (\ws << 11)
+ /* move triggers an assembler bug... */
+ or \rd, $1, zero
+ .set pop
+ .endm
+
+ .macro copy_u_d rd, ws, n
+ .set push
+ .set noat
+ .word 0x78f80059 | (\n << 16) | (\ws << 11)
+ /* move triggers an assembler bug... */
+ or \rd, $1, zero
+ .set pop
+ .endm
+
+ .macro insert_w wd, n, rs
+ .set push
+ .set noat
+ /* move triggers an assembler bug... */
+ or $1, \rs, zero
+ .word 0x79300819 | (\n << 16) | (\wd << 6)
+ .set pop
+ .endm
+
+ .macro insert_d wd, n, rs
+ .set push
+ .set noat
+ /* move triggers an assembler bug... */
+ or $1, \rs, zero
+ .word 0x79380819 | (\n << 16) | (\wd << 6)
+ .set pop
+ .endm
+#endif
+
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index bbc3dd4..f440c27 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1883,6 +1883,7 @@ change_c0_##name(unsigned int change, unsigned int newbits) \
__BUILD_SET_C0(status)
__BUILD_SET_C0(cause)
__BUILD_SET_C0(config)
+__BUILD_SET_C0(config5)
__BUILD_SET_C0(intcontrol)
__BUILD_SET_C0(intctl)
__BUILD_SET_C0(srsmap)
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
new file mode 100644
index 0000000..a306ea8
--- /dev/null
+++ b/arch/mips/include/asm/msa.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#ifndef _ASM_MSA_H
+#define _ASM_MSA_H
+
+#include <asm/mipsregs.h>
+
+static inline void enable_msa(void)
+{
+ if (cpu_has_msa)
+ set_c0_config5(MIPS_CONF5_MSAEN);
+}
+
+static inline void disable_msa(void)
+{
+ if (cpu_has_msa)
+ clear_c0_config5(MIPS_CONF5_MSAEN);
+}
+
+static inline int is_msa_enabled(void)
+{
+ if (!cpu_has_msa)
+ return 0;
+
+ return read_c0_config5() & MIPS_CONF5_MSAEN;
+}
+
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+
+#define __BUILD_MSA_CTL_REG(name, cs) \
+static inline unsigned int read_msa_##name(void) \
+{ \
+ unsigned int reg; \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set msa\n" \
+ " cfcmsa %0, $" #cs "\n" \
+ " .set pop\n" \
+ : "=r"(reg)); \
+ return reg; \
+} \
+ \
+static inline void write_msa_##name(unsigned int val) \
+{ \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set msa\n" \
+ " cfcmsa $" #cs ", %0\n" \
+ " .set pop\n" \
+ : : "r"(val)); \
+}
+
+#else /* !TOOLCHAIN_SUPPORTS_MSA */
+
+/*
+ * Define functions using .word for the c[ft]cmsa instructions in order to
+ * allow compilation with toolchains that do not support MSA. Once all
+ * toolchains in use support MSA these can be removed.
+ */
+
+#define __BUILD_MSA_CTL_REG(name, cs) \
+static inline unsigned int read_msa_##name(void) \
+{ \
+ unsigned int reg; \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set noat\n" \
+ " .word 0x787e0059 | (" #cs " << 11)\n" \
+ " move %0, $1\n" \
+ " .set pop\n" \
+ : "=r"(reg)); \
+ return reg; \
+} \
+ \
+static inline void write_msa_##name(unsigned int val) \
+{ \
+ __asm__ __volatile__( \
+ " .set push\n" \
+ " .set noat\n" \
+ " move $1, %0\n" \
+ " .word 0x783e0819 | (" #cs " << 6)\n" \
+ " .set pop\n" \
+ : : "r"(val)); \
+}
+
+#endif /* !TOOLCHAIN_SUPPORTS_MSA */
+
+#define MSA_IR 0
+#define MSA_CSR 1
+#define MSA_ACCESS 2
+#define MSA_SAVE 3
+#define MSA_MODIFY 4
+#define MSA_REQUEST 5
+#define MSA_MAP 6
+#define MSA_UNMAP 7
+
+__BUILD_MSA_CTL_REG(ir, 0)
+__BUILD_MSA_CTL_REG(csr, 1)
+__BUILD_MSA_CTL_REG(access, 2)
+__BUILD_MSA_CTL_REG(save, 3)
+__BUILD_MSA_CTL_REG(modify, 4)
+__BUILD_MSA_CTL_REG(request, 5)
+__BUILD_MSA_CTL_REG(map, 6)
+__BUILD_MSA_CTL_REG(unmap, 7)
+
+/* MSA Implementation Register (MSAIR) */
+#define MSA_IR_REVB 0
+#define MSA_IR_REVF (_ULCAST_(0xff) << MSA_IR_REVB)
+#define MSA_IR_PROCB 8
+#define MSA_IR_PROCF (_ULCAST_(0xff) << MSA_IR_PROCB)
+#define MSA_IR_WRPB 16
+#define MSA_IR_WRPF (_ULCAST_(0x1) << MSA_IR_WRPB)
+
+/* MSA Control & Status Register (MSACSR) */
+#define MSA_CSR_RMB 0
+#define MSA_CSR_RMF (_ULCAST_(0x3) << MSA_CSR_RMB)
+#define MSA_CSR_RM_NEAREST 0
+#define MSA_CSR_RM_TO_ZERO 1
+#define MSA_CSR_RM_TO_POS 2
+#define MSA_CSR_RM_TO_NEG 3
+#define MSA_CSR_FLAGSB 2
+#define MSA_CSR_FLAGSF (_ULCAST_(0x1f) << MSA_CSR_FLAGSB)
+#define MSA_CSR_FLAGS_IB 2
+#define MSA_CSR_FLAGS_IF (_ULCAST_(0x1) << MSA_CSR_FLAGS_IB)
+#define MSA_CSR_FLAGS_UB 3
+#define MSA_CSR_FLAGS_UF (_ULCAST_(0x1) << MSA_CSR_FLAGS_UB)
+#define MSA_CSR_FLAGS_OB 4
+#define MSA_CSR_FLAGS_OF (_ULCAST_(0x1) << MSA_CSR_FLAGS_OB)
+#define MSA_CSR_FLAGS_ZB 5
+#define MSA_CSR_FLAGS_ZF (_ULCAST_(0x1) << MSA_CSR_FLAGS_ZB)
+#define MSA_CSR_FLAGS_VB 6
+#define MSA_CSR_FLAGS_VF (_ULCAST_(0x1) << MSA_CSR_FLAGS_VB)
+#define MSA_CSR_ENABLESB 7
+#define MSA_CSR_ENABLESF (_ULCAST_(0x1f) << MSA_CSR_ENABLESB)
+#define MSA_CSR_ENABLES_IB 7
+#define MSA_CSR_ENABLES_IF (_ULCAST_(0x1) << MSA_CSR_ENABLES_IB)
+#define MSA_CSR_ENABLES_UB 8
+#define MSA_CSR_ENABLES_UF (_ULCAST_(0x1) << MSA_CSR_ENABLES_UB)
+#define MSA_CSR_ENABLES_OB 9
+#define MSA_CSR_ENABLES_OF (_ULCAST_(0x1) << MSA_CSR_ENABLES_OB)
+#define MSA_CSR_ENABLES_ZB 10
+#define MSA_CSR_ENABLES_ZF (_ULCAST_(0x1) << MSA_CSR_ENABLES_ZB)
+#define MSA_CSR_ENABLES_VB 11
+#define MSA_CSR_ENABLES_VF (_ULCAST_(0x1) << MSA_CSR_ENABLES_VB)
+#define MSA_CSR_CAUSEB 12
+#define MSA_CSR_CAUSEF (_ULCAST_(0x3f) << MSA_CSR_CAUSEB)
+#define MSA_CSR_CAUSE_IB 12
+#define MSA_CSR_CAUSE_IF (_ULCAST_(0x1) << MSA_CSR_CAUSE_IB)
+#define MSA_CSR_CAUSE_UB 13
+#define MSA_CSR_CAUSE_UF (_ULCAST_(0x1) << MSA_CSR_CAUSE_UB)
+#define MSA_CSR_CAUSE_OB 14
+#define MSA_CSR_CAUSE_OF (_ULCAST_(0x1) << MSA_CSR_CAUSE_OB)
+#define MSA_CSR_CAUSE_ZB 15
+#define MSA_CSR_CAUSE_ZF (_ULCAST_(0x1) << MSA_CSR_CAUSE_ZB)
+#define MSA_CSR_CAUSE_VB 16
+#define MSA_CSR_CAUSE_VF (_ULCAST_(0x1) << MSA_CSR_CAUSE_VB)
+#define MSA_CSR_CAUSE_EB 17
+#define MSA_CSR_CAUSE_EF (_ULCAST_(0x1) << MSA_CSR_CAUSE_EB)
+#define MSA_CSR_NXB 18
+#define MSA_CSR_NXF (_ULCAST_(0x1) << MSA_CSR_NXB)
+#define MSA_CSR_FSB 24
+#define MSA_CSR_FSF (_ULCAST_(0x1) << MSA_CSR_FSB)
+
+#endif /* _ASM_MSA_H */
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 11/15] mips: detect the MSA ASE
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch adds support for probing the MSAP bit within the Config3
register in order to detect the presence of the MSA ASE. Presence of the
ASE will be indicated in /proc/cpuinfo. The value of the MSA
implementation register will be displayed at boot to aid debugging and
verification of a correct setup, as is done for the FPU.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/Kconfig | 19 +++++++++++++++++++
arch/mips/include/asm/cpu-features.h | 6 ++++++
arch/mips/include/asm/cpu-info.h | 1 +
arch/mips/include/asm/cpu.h | 1 +
arch/mips/kernel/cpu-probe.c | 22 ++++++++++++++++++++++
arch/mips/kernel/proc.c | 1 +
6 files changed, 50 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 87dc0c3..bb08f1a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1202,6 +1202,7 @@ config CPU_MIPS32_R2
select CPU_HAS_PREFETCH
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
+ select CPU_SUPPORTS_MSA
select HAVE_KVM
help
Choose this option to build a kernel for release 2 or later of the
@@ -1237,6 +1238,7 @@ config CPU_MIPS64_R2
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_SUPPORTS_MSA
help
Choose this option to build a kernel for release 2 or later of the
MIPS64 architecture. Many modern embedded systems with a 64-bit
@@ -2045,6 +2047,20 @@ config CPU_MICROMIPS
When this option is enabled the kernel will be built using the
microMIPS ISA
+config CPU_HAS_MSA
+ bool "Support for the MIPS SIMD Architecture"
+ depends on CPU_SUPPORTS_MSA
+ default y
+ help
+ MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers
+ and a set of SIMD instructions to operate on them. When this option
+ is enabled the kernel will support detection of the MSA ASE. If you
+ know that your kernel will only be running on CPUs which do not
+ support MSA then you may wish to say N here to reduce the size of
+ your kernel.
+
+ If unsure, say Y.
+
config CPU_HAS_WB
bool
@@ -2110,6 +2126,9 @@ config SYS_SUPPORTS_SMARTMIPS
config SYS_SUPPORTS_MICROMIPS
bool
+config CPU_SUPPORTS_MSA
+ bool
+
config ARCH_FLATMEM_ENABLE
def_bool y
depends on !NUMA && !CPU_LOONGSON2
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 6e70b03..390795d 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -299,4 +299,10 @@
#define cpu_has_vz (cpu_data[0].ases & MIPS_ASE_VZ)
#endif
+#if defined(CONFIG_CPU_HAS_MSA) && !defined(cpu_has_msa)
+# define cpu_has_msa (cpu_data[0].ases & MIPS_ASE_MSA)
+#elif !defined(cpu_has_msa)
+# define cpu_has_msa 0
+#endif
+
#endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 8f7adf0..359cea1 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -49,6 +49,7 @@ struct cpuinfo_mips {
unsigned long ases;
unsigned int processor_id;
unsigned int fpu_id;
+ unsigned int msa_id;
unsigned int cputype;
int isa_level;
int tlbsize;
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 76411df..0008277 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -370,5 +370,6 @@ enum cpu_type_enum {
#define MIPS_ASE_MIPSMT 0x00000020 /* CPU supports MIPS MT */
#define MIPS_ASE_DSP2P 0x00000040 /* Signal Processing ASE Rev 2 */
#define MIPS_ASE_VZ 0x00000080 /* Virtualization ASE */
+#define MIPS_ASE_MSA 0x00000100 /* MIPS SIMD Architecture */
#endif /* _ASM_CPU_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 530f832..852e085 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -23,6 +23,7 @@
#include <asm/cpu-type.h>
#include <asm/fpu.h>
#include <asm/mipsregs.h>
+#include <asm/msa.h>
#include <asm/watch.h>
#include <asm/elf.h>
#include <asm/spram.h>
@@ -126,6 +127,20 @@ static inline int __cpu_has_fpu(void)
return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE);
}
+static inline unsigned long cpu_get_msa_id(void)
+{
+ unsigned long status, conf5, msa_id;
+
+ status = read_c0_status();
+ __enable_fpu(FPU_64BIT);
+ conf5 = read_c0_config5();
+ enable_msa();
+ msa_id = read_msa_ir();
+ write_c0_config5(conf5);
+ write_c0_status(status);
+ return msa_id;
+}
+
static inline void cpu_probe_vmbits(struct cpuinfo_mips *c)
{
#ifdef __NEED_VMBITS_PROBE
@@ -301,6 +316,8 @@ static inline unsigned int decode_config3(struct cpuinfo_mips *c)
c->ases |= MIPS_ASE_VZ;
if (config3 & MIPS_CONF3_SC)
c->options |= MIPS_CPU_SEGMENTS;
+ if (config3 & MIPS_CONF3_MSA)
+ c->ases |= MIPS_ASE_MSA;
return config3 & MIPS_CONF_M;
}
@@ -1176,6 +1193,9 @@ void cpu_probe(void)
else
c->srsets = 1;
+ if (cpu_has_msa)
+ c->msa_id = cpu_get_msa_id();
+
cpu_probe_vmbits(c);
#ifdef CONFIG_64BIT
@@ -1192,4 +1212,6 @@ void cpu_report(void)
smp_processor_id(), c->processor_id, cpu_name_string());
if (c->options & MIPS_CPU_FPU)
printk(KERN_INFO "FPU revision is: %08x\n", c->fpu_id);
+ if (cpu_has_msa)
+ pr_info("MSA revision is: %08x\n", c->msa_id);
}
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 00d2097..ca1d48e 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -95,6 +95,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has_mipsmt) seq_printf(m, "%s", " mt");
if (cpu_has_mmips) seq_printf(m, "%s", " micromips");
if (cpu_has_vz) seq_printf(m, "%s", " vz");
+ if (cpu_has_msa) seq_printf(m, "%s", " msa");
seq_printf(m, "\n");
if (cpu_has_mmips) {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 11/15] mips: detect the MSA ASE
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch adds support for probing the MSAP bit within the Config3
register in order to detect the presence of the MSA ASE. Presence of the
ASE will be indicated in /proc/cpuinfo. The value of the MSA
implementation register will be displayed at boot to aid debugging and
verification of a correct setup, as is done for the FPU.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/Kconfig | 19 +++++++++++++++++++
arch/mips/include/asm/cpu-features.h | 6 ++++++
arch/mips/include/asm/cpu-info.h | 1 +
arch/mips/include/asm/cpu.h | 1 +
arch/mips/kernel/cpu-probe.c | 22 ++++++++++++++++++++++
arch/mips/kernel/proc.c | 1 +
6 files changed, 50 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 87dc0c3..bb08f1a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1202,6 +1202,7 @@ config CPU_MIPS32_R2
select CPU_HAS_PREFETCH
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
+ select CPU_SUPPORTS_MSA
select HAVE_KVM
help
Choose this option to build a kernel for release 2 or later of the
@@ -1237,6 +1238,7 @@ config CPU_MIPS64_R2
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_SUPPORTS_MSA
help
Choose this option to build a kernel for release 2 or later of the
MIPS64 architecture. Many modern embedded systems with a 64-bit
@@ -2045,6 +2047,20 @@ config CPU_MICROMIPS
When this option is enabled the kernel will be built using the
microMIPS ISA
+config CPU_HAS_MSA
+ bool "Support for the MIPS SIMD Architecture"
+ depends on CPU_SUPPORTS_MSA
+ default y
+ help
+ MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers
+ and a set of SIMD instructions to operate on them. When this option
+ is enabled the kernel will support detection of the MSA ASE. If you
+ know that your kernel will only be running on CPUs which do not
+ support MSA then you may wish to say N here to reduce the size of
+ your kernel.
+
+ If unsure, say Y.
+
config CPU_HAS_WB
bool
@@ -2110,6 +2126,9 @@ config SYS_SUPPORTS_SMARTMIPS
config SYS_SUPPORTS_MICROMIPS
bool
+config CPU_SUPPORTS_MSA
+ bool
+
config ARCH_FLATMEM_ENABLE
def_bool y
depends on !NUMA && !CPU_LOONGSON2
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 6e70b03..390795d 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -299,4 +299,10 @@
#define cpu_has_vz (cpu_data[0].ases & MIPS_ASE_VZ)
#endif
+#if defined(CONFIG_CPU_HAS_MSA) && !defined(cpu_has_msa)
+# define cpu_has_msa (cpu_data[0].ases & MIPS_ASE_MSA)
+#elif !defined(cpu_has_msa)
+# define cpu_has_msa 0
+#endif
+
#endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 8f7adf0..359cea1 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -49,6 +49,7 @@ struct cpuinfo_mips {
unsigned long ases;
unsigned int processor_id;
unsigned int fpu_id;
+ unsigned int msa_id;
unsigned int cputype;
int isa_level;
int tlbsize;
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 76411df..0008277 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -370,5 +370,6 @@ enum cpu_type_enum {
#define MIPS_ASE_MIPSMT 0x00000020 /* CPU supports MIPS MT */
#define MIPS_ASE_DSP2P 0x00000040 /* Signal Processing ASE Rev 2 */
#define MIPS_ASE_VZ 0x00000080 /* Virtualization ASE */
+#define MIPS_ASE_MSA 0x00000100 /* MIPS SIMD Architecture */
#endif /* _ASM_CPU_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 530f832..852e085 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -23,6 +23,7 @@
#include <asm/cpu-type.h>
#include <asm/fpu.h>
#include <asm/mipsregs.h>
+#include <asm/msa.h>
#include <asm/watch.h>
#include <asm/elf.h>
#include <asm/spram.h>
@@ -126,6 +127,20 @@ static inline int __cpu_has_fpu(void)
return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE);
}
+static inline unsigned long cpu_get_msa_id(void)
+{
+ unsigned long status, conf5, msa_id;
+
+ status = read_c0_status();
+ __enable_fpu(FPU_64BIT);
+ conf5 = read_c0_config5();
+ enable_msa();
+ msa_id = read_msa_ir();
+ write_c0_config5(conf5);
+ write_c0_status(status);
+ return msa_id;
+}
+
static inline void cpu_probe_vmbits(struct cpuinfo_mips *c)
{
#ifdef __NEED_VMBITS_PROBE
@@ -301,6 +316,8 @@ static inline unsigned int decode_config3(struct cpuinfo_mips *c)
c->ases |= MIPS_ASE_VZ;
if (config3 & MIPS_CONF3_SC)
c->options |= MIPS_CPU_SEGMENTS;
+ if (config3 & MIPS_CONF3_MSA)
+ c->ases |= MIPS_ASE_MSA;
return config3 & MIPS_CONF_M;
}
@@ -1176,6 +1193,9 @@ void cpu_probe(void)
else
c->srsets = 1;
+ if (cpu_has_msa)
+ c->msa_id = cpu_get_msa_id();
+
cpu_probe_vmbits(c);
#ifdef CONFIG_64BIT
@@ -1192,4 +1212,6 @@ void cpu_report(void)
smp_processor_id(), c->processor_id, cpu_name_string());
if (c->options & MIPS_CPU_FPU)
printk(KERN_INFO "FPU revision is: %08x\n", c->fpu_id);
+ if (cpu_has_msa)
+ pr_info("MSA revision is: %08x\n", c->msa_id);
}
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 00d2097..ca1d48e 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -95,6 +95,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has_mipsmt) seq_printf(m, "%s", " mt");
if (cpu_has_mmips) seq_printf(m, "%s", " micromips");
if (cpu_has_vz) seq_printf(m, "%s", " vz");
+ if (cpu_has_msa) seq_printf(m, "%s", " msa");
seq_printf(m, "\n");
if (cpu_has_mmips) {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 12/15] mips: basic MSA context switching support
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch adds support for context switching the MSA vector registers.
These 128 bit vector registers are aliased with the FP registers - an
FP register accesses the least significant bits of the vector register
with which it is aliased (ie. the register with the same index). Due to
both this & the requirement that the scalar FPU must be 64-bit (FR=1) if
enabled at the same time as MSA the kernel will enable MSA & scalar FP
at the same time for tasks which use MSA. If we restore the MSA vector
context then we might as well enable the scalar FPU since the reason it
was left disabled was to allow for lazy FP context restoring - but we
just restored the FP context as it's a subset of the vector context. If
we restore the FP context and have previously used MSA then we have to
restore the whole vector context anyway (see comment in
enable_restore_fp_context for details) so similarly we might as well
enable MSA.
Thus if a task does not use MSA then it will continue to behave as
without this patch - the scalar FP context will be saved & restored as
usual. But if a task executes an MSA instruction then it will save &
restore the vector context forever more.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/Kconfig | 9 ++--
arch/mips/include/asm/asmmacro.h | 70 +++++++++++++++++++++++++
arch/mips/include/asm/msa.h | 28 ++++++++++
arch/mips/include/asm/processor.h | 9 +++-
arch/mips/include/asm/switch_to.h | 22 ++++++--
arch/mips/include/asm/thread_info.h | 4 ++
arch/mips/kernel/genex.S | 1 +
arch/mips/kernel/process.c | 7 ++-
arch/mips/kernel/r4k_switch.S | 58 +++++++++++++++------
arch/mips/kernel/traps.c | 101 +++++++++++++++++++++++++++++++++---
10 files changed, 275 insertions(+), 34 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index bb08f1a..6f78eb3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2054,10 +2054,11 @@ config CPU_HAS_MSA
help
MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
- is enabled the kernel will support detection of the MSA ASE. If you
- know that your kernel will only be running on CPUs which do not
- support MSA then you may wish to say N here to reduce the size of
- your kernel.
+ is enabled the kernel will support allocating & switching MSA
+ vector register contexts. If you know that your kernel will only be
+ running on CPUs which do not support MSA or that your userland will
+ not be making use of it then you may wish to say N here to reduce
+ the size & complexity of your kernel.
If unsure, say Y.
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index c759501..c087963 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -317,4 +317,74 @@
.endm
#endif
+ .macro msa_save_all thread
+ st_d 0, THREAD_FPR0, \thread
+ st_d 1, THREAD_FPR1, \thread
+ st_d 2, THREAD_FPR2, \thread
+ st_d 3, THREAD_FPR3, \thread
+ st_d 4, THREAD_FPR4, \thread
+ st_d 5, THREAD_FPR5, \thread
+ st_d 6, THREAD_FPR6, \thread
+ st_d 7, THREAD_FPR7, \thread
+ st_d 8, THREAD_FPR8, \thread
+ st_d 9, THREAD_FPR9, \thread
+ st_d 10, THREAD_FPR10, \thread
+ st_d 11, THREAD_FPR11, \thread
+ st_d 12, THREAD_FPR12, \thread
+ st_d 13, THREAD_FPR13, \thread
+ st_d 14, THREAD_FPR14, \thread
+ st_d 15, THREAD_FPR15, \thread
+ st_d 16, THREAD_FPR16, \thread
+ st_d 17, THREAD_FPR17, \thread
+ st_d 18, THREAD_FPR18, \thread
+ st_d 19, THREAD_FPR19, \thread
+ st_d 20, THREAD_FPR20, \thread
+ st_d 21, THREAD_FPR21, \thread
+ st_d 22, THREAD_FPR22, \thread
+ st_d 23, THREAD_FPR23, \thread
+ st_d 24, THREAD_FPR24, \thread
+ st_d 25, THREAD_FPR25, \thread
+ st_d 26, THREAD_FPR26, \thread
+ st_d 27, THREAD_FPR27, \thread
+ st_d 28, THREAD_FPR28, \thread
+ st_d 29, THREAD_FPR29, \thread
+ st_d 30, THREAD_FPR30, \thread
+ st_d 31, THREAD_FPR31, \thread
+ .endm
+
+ .macro msa_restore_all thread
+ ld_d 0, THREAD_FPR0, \thread
+ ld_d 1, THREAD_FPR1, \thread
+ ld_d 2, THREAD_FPR2, \thread
+ ld_d 3, THREAD_FPR3, \thread
+ ld_d 4, THREAD_FPR4, \thread
+ ld_d 5, THREAD_FPR5, \thread
+ ld_d 6, THREAD_FPR6, \thread
+ ld_d 7, THREAD_FPR7, \thread
+ ld_d 8, THREAD_FPR8, \thread
+ ld_d 9, THREAD_FPR9, \thread
+ ld_d 10, THREAD_FPR10, \thread
+ ld_d 11, THREAD_FPR11, \thread
+ ld_d 12, THREAD_FPR12, \thread
+ ld_d 13, THREAD_FPR13, \thread
+ ld_d 14, THREAD_FPR14, \thread
+ ld_d 15, THREAD_FPR15, \thread
+ ld_d 16, THREAD_FPR16, \thread
+ ld_d 17, THREAD_FPR17, \thread
+ ld_d 18, THREAD_FPR18, \thread
+ ld_d 19, THREAD_FPR19, \thread
+ ld_d 20, THREAD_FPR20, \thread
+ ld_d 21, THREAD_FPR21, \thread
+ ld_d 22, THREAD_FPR22, \thread
+ ld_d 23, THREAD_FPR23, \thread
+ ld_d 24, THREAD_FPR24, \thread
+ ld_d 25, THREAD_FPR25, \thread
+ ld_d 26, THREAD_FPR26, \thread
+ ld_d 27, THREAD_FPR27, \thread
+ ld_d 28, THREAD_FPR28, \thread
+ ld_d 29, THREAD_FPR29, \thread
+ ld_d 30, THREAD_FPR30, \thread
+ ld_d 31, THREAD_FPR31, \thread
+ .endm
+
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index a306ea8..d7fd8e1 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h
@@ -12,6 +12,9 @@
#include <asm/mipsregs.h>
+extern void _save_msa(struct task_struct *);
+extern void _restore_msa(struct task_struct *);
+
static inline void enable_msa(void)
{
if (cpu_has_msa)
@@ -32,6 +35,31 @@ static inline int is_msa_enabled(void)
return read_c0_config5() & MIPS_CONF5_MSAEN;
}
+static inline int thread_msa_context_live(void)
+{
+ /*
+ * Check cpu_has_msa only if it's a constant. This will allow the
+ * compiler to optimise out code for CPUs without MSA without adding
+ * an extra redundant check for CPUs with MSA.
+ */
+ if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa)
+ return 0;
+
+ return test_thread_flag(TIF_MSA_CTX_LIVE);
+}
+
+static inline void save_msa(struct task_struct *t)
+{
+ if (cpu_has_msa)
+ _save_msa(t);
+}
+
+static inline void restore_msa(struct task_struct *t)
+{
+ if (cpu_has_msa)
+ _restore_msa(t);
+}
+
#ifdef TOOLCHAIN_SUPPORTS_MSA
#define __BUILD_MSA_CTL_REG(name, cs) \
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 50cf4c3..ad70cba 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,7 +96,12 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
-#define FPU_REG_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_MSA
+# define FPU_REG_WIDTH 128
+#else
+# define FPU_REG_WIDTH 64
+#endif
union fpureg {
__u32 val32[FPU_REG_WIDTH / 32];
@@ -133,6 +138,7 @@ BUILD_FPR_ACCESS(64)
struct mips_fpu_struct {
union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
+ unsigned int msacsr;
};
#define NUM_DSP_REGS 6
@@ -310,6 +316,7 @@ struct thread_struct {
.fpu = { \
.fpr = {{{0,},},}, \
.fcr31 = 0, \
+ .msacsr = 0, \
}, \
/* \
* FPU affinity state (null if not FPAFF) \
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 278d45a..495c104 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -16,22 +16,29 @@
#include <asm/watch.h>
#include <asm/dsp.h>
#include <asm/cop2.h>
+#include <asm/msa.h>
struct task_struct;
+enum {
+ FP_SAVE_NONE = 0,
+ FP_SAVE_VECTOR = -1,
+ FP_SAVE_SCALAR = 1,
+};
+
/**
* resume - resume execution of a task
* @prev: The task previously executed.
* @next: The task to begin executing.
* @next_ti: task_thread_info(next).
- * @usedfpu: Non-zero if prev's FP context should be saved.
+ * @fp_save: Which, if any, FP context to save for prev.
*
* This function is used whilst scheduling to save the context of prev & load
* the context of next. Returns prev.
*/
extern asmlinkage struct task_struct *resume(struct task_struct *prev,
struct task_struct *next, struct thread_info *next_ti,
- u32 usedfpu);
+ s32 fp_save);
extern unsigned int ll_bit;
extern struct task_struct *ll_task;
@@ -75,7 +82,8 @@ do { \
#define switch_to(prev, next, last) \
do { \
- u32 __usedfpu, __c0_stat; \
+ u32 __c0_stat; \
+ s32 __fpsave = FP_SAVE_NONE; \
__mips_mt_fpaff_switch_to(prev); \
if (cpu_has_dsp) \
__save_dsp(prev); \
@@ -88,8 +96,12 @@ do { \
write_c0_status(__c0_stat & ~ST0_CU2); \
} \
__clear_software_ll_bit(); \
- __usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \
- (last) = resume(prev, next, task_thread_info(next), __usedfpu); \
+ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \
+ __fpsave = FP_SAVE_SCALAR; \
+ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
+ __fpsave = FP_SAVE_VECTOR; \
+ (last) = resume(prev, next, task_thread_info(next), __fpsave); \
+ disable_msa(); \
} while (0)
#define finish_arch_switch(prev) \
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 24846f9..b18a4e2 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -116,6 +116,8 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_LOAD_WATCH 25 /* If set, load watch registers */
#define TIF_SYSCALL_TRACEPOINT 26 /* syscall tracepoint instrumentation */
#define TIF_32BIT_FPREGS 27 /* 32-bit floating point registers */
+#define TIF_USEDMSA 29 /* MSA has been used this quantum */
+#define TIF_MSA_CTX_LIVE 30 /* MSA context must be preserved */
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -133,6 +135,8 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
#define _TIF_32BIT_FPREGS (1<<TIF_32BIT_FPREGS)
+#define _TIF_USEDMSA (1<<TIF_USEDMSA)
+#define _TIF_MSA_CTX_LIVE (1<<TIF_MSA_CTX_LIVE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index d84f6a5..278a49b 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -477,6 +477,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER tr tr sti silent /* #13 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
+ BUILD_HANDLER msa msa sti silent /* #21 */
BUILD_HANDLER mdmx mdmx sti silent /* #22 */
#ifdef CONFIG_HARDWARE_WATCHPOINTS
/*
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 2f01f3d..60e39dc 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -32,6 +32,7 @@
#include <asm/cpu.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
#include <asm/processor.h>
@@ -65,6 +66,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
clear_used_math();
clear_fpu_owner();
init_dsp();
+ clear_thread_flag(TIF_MSA_CTX_LIVE);
+ disable_msa();
regs->cp0_epc = pc;
regs->regs[29] = sp;
}
@@ -89,7 +92,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
preempt_disable();
- if (is_fpu_owner())
+ if (is_msa_enabled())
+ save_msa(p);
+ else if (is_fpu_owner())
save_fp(p);
if (cpu_has_dsp)
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index cc78dd9..f938ecd 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -29,18 +29,8 @@
#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
/*
- * FPU context is saved iff the process has used it's FPU in the current
- * time slice as indicated by _TIF_USEDFPU. In any case, the CU1 bit for user
- * space STATUS register should be 0, so that a process *always* starts its
- * userland with FPU disabled after each context switch.
- *
- * FPU will be enabled as soon as the process accesses FPU again, through
- * do_cpu() trap.
- */
-
-/*
* task_struct *resume(task_struct *prev, task_struct *next,
- * struct thread_info *next_ti, int usedfpu)
+ * struct thread_info *next_ti, s32 fp_save)
*/
.align 5
LEAF(resume)
@@ -50,23 +40,37 @@
LONG_S ra, THREAD_REG31(a0)
/*
- * check if we need to save FPU registers
+ * Check whether we need to save any FP context. FP context is saved
+ * iff the process has used the context with the scalar FPU or the MSA
+ * ASE in the current time slice, as indicated by _TIF_USEDFPU and
+ * _TIF_USEDMSA respectively. switch_to will have set fp_save
+ * accordingly to an FP_SAVE_ enum value.
*/
+ beqz a3, 2f
- beqz a3, 1f
-
- PTR_L t3, TASK_THREAD_INFO(a0)
/*
- * clear saved user stack CU1 bit
+ * We do. Clear the saved CU1 bit for prev, such that next time it is
+ * scheduled it will start in userland with the FPU disabled. If the
+ * task uses the FPU then it will be enabled again via the do_cpu trap.
+ * This allows us to lazily restore the FP context.
*/
+ PTR_L t3, TASK_THREAD_INFO(a0)
LONG_L t0, ST_OFF(t3)
li t1, ~ST0_CU1
and t0, t0, t1
LONG_S t0, ST_OFF(t3)
+ /* Check whether we're saving scalar or vector context. */
+ bgtz a3, 1f
+
+ /* Save 128b MSA vector context. */
+ msa_save_all a0
+ b 2f
+
+1: /* Save 32b/64b scalar FP context. */
fpu_save_double a0 t0 t1 # c0_status passed in t0
# clobbers t1
-1:
+2:
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
PTR_LA t8, __stack_chk_guard
@@ -141,6 +145,26 @@ LEAF(_restore_fp)
jr ra
END(_restore_fp)
+#ifdef CONFIG_CPU_HAS_MSA
+
+/*
+ * Save a thread's MSA vector context.
+ */
+LEAF(_save_msa)
+ msa_save_all a0
+ jr ra
+ END(_save_msa)
+
+/*
+ * Restore a thread's MSA vector context.
+ */
+LEAF(_restore_msa)
+ msa_restore_all a0
+ jr ra
+ END(_restore_msa)
+
+#endif
+
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e0b4996..e609c89 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -47,6 +47,7 @@
#include <asm/mipsregs.h>
#include <asm/mipsmtregs.h>
#include <asm/module.h>
+#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
@@ -79,6 +80,7 @@ extern asmlinkage void handle_ov(void);
extern asmlinkage void handle_tr(void);
extern asmlinkage void handle_fpe(void);
extern asmlinkage void handle_ftlb(void);
+extern asmlinkage void handle_msa(void);
extern asmlinkage void handle_mdmx(void);
extern asmlinkage void handle_watch(void);
extern asmlinkage void handle_mt(void);
@@ -1074,6 +1076,76 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
return NOTIFY_OK;
}
+static int enable_restore_fp_context(int msa)
+{
+ int err, was_fpu_owner;
+
+ if (!used_math()) {
+ /* First time FP context user. */
+ err = init_fpu();
+ if (msa && !err)
+ enable_msa();
+ if (!err)
+ set_used_math();
+ return err;
+ }
+
+ /*
+ * This task has formerly used the FP context.
+ *
+ * If this thread has no live MSA vector context then we can simply
+ * restore the scalar FP context. If it has live MSA vector context
+ * (that is, it has or may have used MSA since last performing a
+ * function call) then we'll need to restore the vector context. This
+ * applies even if we're currently only executing a scalar FP
+ * instruction. This is because if we were to later execute an MSA
+ * instruction then we'd either have to:
+ *
+ * - Restore the vector context & clobber any registers modified by
+ * scalar FP instructions between now & then.
+ *
+ * or
+ *
+ * - Not restore the vector context & lose the most significant bits
+ * of all vector registers.
+ *
+ * Neither of those options is acceptable. We cannot restore the least
+ * significant bits of the registers now & only restore the most
+ * significant bits later because the most significant bits of any
+ * vector registers whose aliased FP register is modified now will have
+ * been zeroed. We'd have no way to know that when restoring the vector
+ * context & thus may load an outdated value for the most significant
+ * bits of a vector register.
+ */
+ if (!msa && !thread_msa_context_live())
+ return own_fpu(1);
+
+ /*
+ * This task is using or has previously used MSA. Thus we require
+ * that Status.FR == 1.
+ */
+ was_fpu_owner = is_fpu_owner();
+ err = own_fpu(0);
+ if (err)
+ return err;
+
+ enable_msa();
+ write_msa_csr(current->thread.fpu.msacsr);
+ set_thread_flag(TIF_USEDMSA);
+
+ /*
+ * If this is the first time that the task is using MSA and it has
+ * previously used scalar FP in this time slice then we already nave
+ * FP context which we shouldn't clobber.
+ */
+ if (!test_and_set_thread_flag(TIF_MSA_CTX_LIVE) && was_fpu_owner)
+ return 0;
+
+ /* We need to restore the vector context. */
+ restore_msa(current);
+ return 0;
+}
+
asmlinkage void do_cpu(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -1153,12 +1225,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
/* Fall through. */
case 1:
- if (used_math()) /* Using the FPU again. */
- err = own_fpu(1);
- else { /* First time FPU user. */
- err = init_fpu();
- set_used_math();
- }
+ err = enable_restore_fp_context(0);
if (!raw_cpu_has_fpu || err) {
int sig;
@@ -1183,6 +1250,27 @@ out:
exception_exit(prev_state);
}
+asmlinkage void do_msa(struct pt_regs *regs)
+{
+ enum ctx_state prev_state;
+ int err;
+
+ prev_state = exception_enter();
+
+ if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) {
+ force_sig(SIGILL, current);
+ goto out;
+ }
+
+ die_if_kernel("do_msa invoked from kernel context!", regs);
+
+ err = enable_restore_fp_context(1);
+ if (err)
+ force_sig(SIGILL, current);
+out:
+ exception_exit(prev_state);
+}
+
asmlinkage void do_mdmx(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -2040,6 +2128,7 @@ void __init trap_init(void)
set_except_vector(15, handle_fpe);
set_except_vector(16, handle_ftlb);
+ set_except_vector(21, handle_msa);
set_except_vector(22, handle_mdmx);
if (cpu_has_mcheck)
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 12/15] mips: basic MSA context switching support
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch adds support for context switching the MSA vector registers.
These 128 bit vector registers are aliased with the FP registers - an
FP register accesses the least significant bits of the vector register
with which it is aliased (ie. the register with the same index). Due to
both this & the requirement that the scalar FPU must be 64-bit (FR=1) if
enabled at the same time as MSA the kernel will enable MSA & scalar FP
at the same time for tasks which use MSA. If we restore the MSA vector
context then we might as well enable the scalar FPU since the reason it
was left disabled was to allow for lazy FP context restoring - but we
just restored the FP context as it's a subset of the vector context. If
we restore the FP context and have previously used MSA then we have to
restore the whole vector context anyway (see comment in
enable_restore_fp_context for details) so similarly we might as well
enable MSA.
Thus if a task does not use MSA then it will continue to behave as
without this patch - the scalar FP context will be saved & restored as
usual. But if a task executes an MSA instruction then it will save &
restore the vector context forever more.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/Kconfig | 9 ++--
arch/mips/include/asm/asmmacro.h | 70 +++++++++++++++++++++++++
arch/mips/include/asm/msa.h | 28 ++++++++++
arch/mips/include/asm/processor.h | 9 +++-
arch/mips/include/asm/switch_to.h | 22 ++++++--
arch/mips/include/asm/thread_info.h | 4 ++
arch/mips/kernel/genex.S | 1 +
arch/mips/kernel/process.c | 7 ++-
arch/mips/kernel/r4k_switch.S | 58 +++++++++++++++------
arch/mips/kernel/traps.c | 101 +++++++++++++++++++++++++++++++++---
10 files changed, 275 insertions(+), 34 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index bb08f1a..6f78eb3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2054,10 +2054,11 @@ config CPU_HAS_MSA
help
MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
- is enabled the kernel will support detection of the MSA ASE. If you
- know that your kernel will only be running on CPUs which do not
- support MSA then you may wish to say N here to reduce the size of
- your kernel.
+ is enabled the kernel will support allocating & switching MSA
+ vector register contexts. If you know that your kernel will only be
+ running on CPUs which do not support MSA or that your userland will
+ not be making use of it then you may wish to say N here to reduce
+ the size & complexity of your kernel.
If unsure, say Y.
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index c759501..c087963 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -317,4 +317,74 @@
.endm
#endif
+ .macro msa_save_all thread
+ st_d 0, THREAD_FPR0, \thread
+ st_d 1, THREAD_FPR1, \thread
+ st_d 2, THREAD_FPR2, \thread
+ st_d 3, THREAD_FPR3, \thread
+ st_d 4, THREAD_FPR4, \thread
+ st_d 5, THREAD_FPR5, \thread
+ st_d 6, THREAD_FPR6, \thread
+ st_d 7, THREAD_FPR7, \thread
+ st_d 8, THREAD_FPR8, \thread
+ st_d 9, THREAD_FPR9, \thread
+ st_d 10, THREAD_FPR10, \thread
+ st_d 11, THREAD_FPR11, \thread
+ st_d 12, THREAD_FPR12, \thread
+ st_d 13, THREAD_FPR13, \thread
+ st_d 14, THREAD_FPR14, \thread
+ st_d 15, THREAD_FPR15, \thread
+ st_d 16, THREAD_FPR16, \thread
+ st_d 17, THREAD_FPR17, \thread
+ st_d 18, THREAD_FPR18, \thread
+ st_d 19, THREAD_FPR19, \thread
+ st_d 20, THREAD_FPR20, \thread
+ st_d 21, THREAD_FPR21, \thread
+ st_d 22, THREAD_FPR22, \thread
+ st_d 23, THREAD_FPR23, \thread
+ st_d 24, THREAD_FPR24, \thread
+ st_d 25, THREAD_FPR25, \thread
+ st_d 26, THREAD_FPR26, \thread
+ st_d 27, THREAD_FPR27, \thread
+ st_d 28, THREAD_FPR28, \thread
+ st_d 29, THREAD_FPR29, \thread
+ st_d 30, THREAD_FPR30, \thread
+ st_d 31, THREAD_FPR31, \thread
+ .endm
+
+ .macro msa_restore_all thread
+ ld_d 0, THREAD_FPR0, \thread
+ ld_d 1, THREAD_FPR1, \thread
+ ld_d 2, THREAD_FPR2, \thread
+ ld_d 3, THREAD_FPR3, \thread
+ ld_d 4, THREAD_FPR4, \thread
+ ld_d 5, THREAD_FPR5, \thread
+ ld_d 6, THREAD_FPR6, \thread
+ ld_d 7, THREAD_FPR7, \thread
+ ld_d 8, THREAD_FPR8, \thread
+ ld_d 9, THREAD_FPR9, \thread
+ ld_d 10, THREAD_FPR10, \thread
+ ld_d 11, THREAD_FPR11, \thread
+ ld_d 12, THREAD_FPR12, \thread
+ ld_d 13, THREAD_FPR13, \thread
+ ld_d 14, THREAD_FPR14, \thread
+ ld_d 15, THREAD_FPR15, \thread
+ ld_d 16, THREAD_FPR16, \thread
+ ld_d 17, THREAD_FPR17, \thread
+ ld_d 18, THREAD_FPR18, \thread
+ ld_d 19, THREAD_FPR19, \thread
+ ld_d 20, THREAD_FPR20, \thread
+ ld_d 21, THREAD_FPR21, \thread
+ ld_d 22, THREAD_FPR22, \thread
+ ld_d 23, THREAD_FPR23, \thread
+ ld_d 24, THREAD_FPR24, \thread
+ ld_d 25, THREAD_FPR25, \thread
+ ld_d 26, THREAD_FPR26, \thread
+ ld_d 27, THREAD_FPR27, \thread
+ ld_d 28, THREAD_FPR28, \thread
+ ld_d 29, THREAD_FPR29, \thread
+ ld_d 30, THREAD_FPR30, \thread
+ ld_d 31, THREAD_FPR31, \thread
+ .endm
+
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index a306ea8..d7fd8e1 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h
@@ -12,6 +12,9 @@
#include <asm/mipsregs.h>
+extern void _save_msa(struct task_struct *);
+extern void _restore_msa(struct task_struct *);
+
static inline void enable_msa(void)
{
if (cpu_has_msa)
@@ -32,6 +35,31 @@ static inline int is_msa_enabled(void)
return read_c0_config5() & MIPS_CONF5_MSAEN;
}
+static inline int thread_msa_context_live(void)
+{
+ /*
+ * Check cpu_has_msa only if it's a constant. This will allow the
+ * compiler to optimise out code for CPUs without MSA without adding
+ * an extra redundant check for CPUs with MSA.
+ */
+ if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa)
+ return 0;
+
+ return test_thread_flag(TIF_MSA_CTX_LIVE);
+}
+
+static inline void save_msa(struct task_struct *t)
+{
+ if (cpu_has_msa)
+ _save_msa(t);
+}
+
+static inline void restore_msa(struct task_struct *t)
+{
+ if (cpu_has_msa)
+ _restore_msa(t);
+}
+
#ifdef TOOLCHAIN_SUPPORTS_MSA
#define __BUILD_MSA_CTL_REG(name, cs) \
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 50cf4c3..ad70cba 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,7 +96,12 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
-#define FPU_REG_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_MSA
+# define FPU_REG_WIDTH 128
+#else
+# define FPU_REG_WIDTH 64
+#endif
union fpureg {
__u32 val32[FPU_REG_WIDTH / 32];
@@ -133,6 +138,7 @@ BUILD_FPR_ACCESS(64)
struct mips_fpu_struct {
union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
+ unsigned int msacsr;
};
#define NUM_DSP_REGS 6
@@ -310,6 +316,7 @@ struct thread_struct {
.fpu = { \
.fpr = {{{0,},},}, \
.fcr31 = 0, \
+ .msacsr = 0, \
}, \
/* \
* FPU affinity state (null if not FPAFF) \
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 278d45a..495c104 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -16,22 +16,29 @@
#include <asm/watch.h>
#include <asm/dsp.h>
#include <asm/cop2.h>
+#include <asm/msa.h>
struct task_struct;
+enum {
+ FP_SAVE_NONE = 0,
+ FP_SAVE_VECTOR = -1,
+ FP_SAVE_SCALAR = 1,
+};
+
/**
* resume - resume execution of a task
* @prev: The task previously executed.
* @next: The task to begin executing.
* @next_ti: task_thread_info(next).
- * @usedfpu: Non-zero if prev's FP context should be saved.
+ * @fp_save: Which, if any, FP context to save for prev.
*
* This function is used whilst scheduling to save the context of prev & load
* the context of next. Returns prev.
*/
extern asmlinkage struct task_struct *resume(struct task_struct *prev,
struct task_struct *next, struct thread_info *next_ti,
- u32 usedfpu);
+ s32 fp_save);
extern unsigned int ll_bit;
extern struct task_struct *ll_task;
@@ -75,7 +82,8 @@ do { \
#define switch_to(prev, next, last) \
do { \
- u32 __usedfpu, __c0_stat; \
+ u32 __c0_stat; \
+ s32 __fpsave = FP_SAVE_NONE; \
__mips_mt_fpaff_switch_to(prev); \
if (cpu_has_dsp) \
__save_dsp(prev); \
@@ -88,8 +96,12 @@ do { \
write_c0_status(__c0_stat & ~ST0_CU2); \
} \
__clear_software_ll_bit(); \
- __usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \
- (last) = resume(prev, next, task_thread_info(next), __usedfpu); \
+ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \
+ __fpsave = FP_SAVE_SCALAR; \
+ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
+ __fpsave = FP_SAVE_VECTOR; \
+ (last) = resume(prev, next, task_thread_info(next), __fpsave); \
+ disable_msa(); \
} while (0)
#define finish_arch_switch(prev) \
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 24846f9..b18a4e2 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -116,6 +116,8 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_LOAD_WATCH 25 /* If set, load watch registers */
#define TIF_SYSCALL_TRACEPOINT 26 /* syscall tracepoint instrumentation */
#define TIF_32BIT_FPREGS 27 /* 32-bit floating point registers */
+#define TIF_USEDMSA 29 /* MSA has been used this quantum */
+#define TIF_MSA_CTX_LIVE 30 /* MSA context must be preserved */
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -133,6 +135,8 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
#define _TIF_32BIT_FPREGS (1<<TIF_32BIT_FPREGS)
+#define _TIF_USEDMSA (1<<TIF_USEDMSA)
+#define _TIF_MSA_CTX_LIVE (1<<TIF_MSA_CTX_LIVE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index d84f6a5..278a49b 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -477,6 +477,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER tr tr sti silent /* #13 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
+ BUILD_HANDLER msa msa sti silent /* #21 */
BUILD_HANDLER mdmx mdmx sti silent /* #22 */
#ifdef CONFIG_HARDWARE_WATCHPOINTS
/*
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 2f01f3d..60e39dc 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -32,6 +32,7 @@
#include <asm/cpu.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
#include <asm/processor.h>
@@ -65,6 +66,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
clear_used_math();
clear_fpu_owner();
init_dsp();
+ clear_thread_flag(TIF_MSA_CTX_LIVE);
+ disable_msa();
regs->cp0_epc = pc;
regs->regs[29] = sp;
}
@@ -89,7 +92,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
preempt_disable();
- if (is_fpu_owner())
+ if (is_msa_enabled())
+ save_msa(p);
+ else if (is_fpu_owner())
save_fp(p);
if (cpu_has_dsp)
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index cc78dd9..f938ecd 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -29,18 +29,8 @@
#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
/*
- * FPU context is saved iff the process has used it's FPU in the current
- * time slice as indicated by _TIF_USEDFPU. In any case, the CU1 bit for user
- * space STATUS register should be 0, so that a process *always* starts its
- * userland with FPU disabled after each context switch.
- *
- * FPU will be enabled as soon as the process accesses FPU again, through
- * do_cpu() trap.
- */
-
-/*
* task_struct *resume(task_struct *prev, task_struct *next,
- * struct thread_info *next_ti, int usedfpu)
+ * struct thread_info *next_ti, s32 fp_save)
*/
.align 5
LEAF(resume)
@@ -50,23 +40,37 @@
LONG_S ra, THREAD_REG31(a0)
/*
- * check if we need to save FPU registers
+ * Check whether we need to save any FP context. FP context is saved
+ * iff the process has used the context with the scalar FPU or the MSA
+ * ASE in the current time slice, as indicated by _TIF_USEDFPU and
+ * _TIF_USEDMSA respectively. switch_to will have set fp_save
+ * accordingly to an FP_SAVE_ enum value.
*/
+ beqz a3, 2f
- beqz a3, 1f
-
- PTR_L t3, TASK_THREAD_INFO(a0)
/*
- * clear saved user stack CU1 bit
+ * We do. Clear the saved CU1 bit for prev, such that next time it is
+ * scheduled it will start in userland with the FPU disabled. If the
+ * task uses the FPU then it will be enabled again via the do_cpu trap.
+ * This allows us to lazily restore the FP context.
*/
+ PTR_L t3, TASK_THREAD_INFO(a0)
LONG_L t0, ST_OFF(t3)
li t1, ~ST0_CU1
and t0, t0, t1
LONG_S t0, ST_OFF(t3)
+ /* Check whether we're saving scalar or vector context. */
+ bgtz a3, 1f
+
+ /* Save 128b MSA vector context. */
+ msa_save_all a0
+ b 2f
+
+1: /* Save 32b/64b scalar FP context. */
fpu_save_double a0 t0 t1 # c0_status passed in t0
# clobbers t1
-1:
+2:
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
PTR_LA t8, __stack_chk_guard
@@ -141,6 +145,26 @@ LEAF(_restore_fp)
jr ra
END(_restore_fp)
+#ifdef CONFIG_CPU_HAS_MSA
+
+/*
+ * Save a thread's MSA vector context.
+ */
+LEAF(_save_msa)
+ msa_save_all a0
+ jr ra
+ END(_save_msa)
+
+/*
+ * Restore a thread's MSA vector context.
+ */
+LEAF(_restore_msa)
+ msa_restore_all a0
+ jr ra
+ END(_restore_msa)
+
+#endif
+
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e0b4996..e609c89 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -47,6 +47,7 @@
#include <asm/mipsregs.h>
#include <asm/mipsmtregs.h>
#include <asm/module.h>
+#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
@@ -79,6 +80,7 @@ extern asmlinkage void handle_ov(void);
extern asmlinkage void handle_tr(void);
extern asmlinkage void handle_fpe(void);
extern asmlinkage void handle_ftlb(void);
+extern asmlinkage void handle_msa(void);
extern asmlinkage void handle_mdmx(void);
extern asmlinkage void handle_watch(void);
extern asmlinkage void handle_mt(void);
@@ -1074,6 +1076,76 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
return NOTIFY_OK;
}
+static int enable_restore_fp_context(int msa)
+{
+ int err, was_fpu_owner;
+
+ if (!used_math()) {
+ /* First time FP context user. */
+ err = init_fpu();
+ if (msa && !err)
+ enable_msa();
+ if (!err)
+ set_used_math();
+ return err;
+ }
+
+ /*
+ * This task has formerly used the FP context.
+ *
+ * If this thread has no live MSA vector context then we can simply
+ * restore the scalar FP context. If it has live MSA vector context
+ * (that is, it has or may have used MSA since last performing a
+ * function call) then we'll need to restore the vector context. This
+ * applies even if we're currently only executing a scalar FP
+ * instruction. This is because if we were to later execute an MSA
+ * instruction then we'd either have to:
+ *
+ * - Restore the vector context & clobber any registers modified by
+ * scalar FP instructions between now & then.
+ *
+ * or
+ *
+ * - Not restore the vector context & lose the most significant bits
+ * of all vector registers.
+ *
+ * Neither of those options is acceptable. We cannot restore the least
+ * significant bits of the registers now & only restore the most
+ * significant bits later because the most significant bits of any
+ * vector registers whose aliased FP register is modified now will have
+ * been zeroed. We'd have no way to know that when restoring the vector
+ * context & thus may load an outdated value for the most significant
+ * bits of a vector register.
+ */
+ if (!msa && !thread_msa_context_live())
+ return own_fpu(1);
+
+ /*
+ * This task is using or has previously used MSA. Thus we require
+ * that Status.FR == 1.
+ */
+ was_fpu_owner = is_fpu_owner();
+ err = own_fpu(0);
+ if (err)
+ return err;
+
+ enable_msa();
+ write_msa_csr(current->thread.fpu.msacsr);
+ set_thread_flag(TIF_USEDMSA);
+
+ /*
+ * If this is the first time that the task is using MSA and it has
+ * previously used scalar FP in this time slice then we already nave
+ * FP context which we shouldn't clobber.
+ */
+ if (!test_and_set_thread_flag(TIF_MSA_CTX_LIVE) && was_fpu_owner)
+ return 0;
+
+ /* We need to restore the vector context. */
+ restore_msa(current);
+ return 0;
+}
+
asmlinkage void do_cpu(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -1153,12 +1225,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
/* Fall through. */
case 1:
- if (used_math()) /* Using the FPU again. */
- err = own_fpu(1);
- else { /* First time FPU user. */
- err = init_fpu();
- set_used_math();
- }
+ err = enable_restore_fp_context(0);
if (!raw_cpu_has_fpu || err) {
int sig;
@@ -1183,6 +1250,27 @@ out:
exception_exit(prev_state);
}
+asmlinkage void do_msa(struct pt_regs *regs)
+{
+ enum ctx_state prev_state;
+ int err;
+
+ prev_state = exception_enter();
+
+ if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) {
+ force_sig(SIGILL, current);
+ goto out;
+ }
+
+ die_if_kernel("do_msa invoked from kernel context!", regs);
+
+ err = enable_restore_fp_context(1);
+ if (err)
+ force_sig(SIGILL, current);
+out:
+ exception_exit(prev_state);
+}
+
asmlinkage void do_mdmx(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -2040,6 +2128,7 @@ void __init trap_init(void)
set_except_vector(15, handle_fpe);
set_except_vector(16, handle_ftlb);
+ set_except_vector(21, handle_msa);
set_except_vector(22, handle_mdmx);
if (cpu_has_mcheck)
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 13/15] mips: dumb MSA FP exception handler
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch adds a simple handler for MSA FP exceptions which delivers a
SIGFPE to the running task. In the future it should probably be extended
to re-execute the instruction with the MSACSR.NX bit set in order to
generate results for any elements which did not cause an exception
before delivering the SIGFPE signal.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/genex.S | 1 +
arch/mips/kernel/traps.c | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 278a49b..7365cd6 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -475,6 +475,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER cpu cpu sti silent /* #11 */
BUILD_HANDLER ov ov sti silent /* #12 */
BUILD_HANDLER tr tr sti silent /* #13 */
+ BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
BUILD_HANDLER msa msa sti silent /* #21 */
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e609c89..88db702 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -78,6 +78,7 @@ extern asmlinkage void handle_ri_rdhwr(void);
extern asmlinkage void handle_cpu(void);
extern asmlinkage void handle_ov(void);
extern asmlinkage void handle_tr(void);
+extern asmlinkage void handle_msa_fpe(void);
extern asmlinkage void handle_fpe(void);
extern asmlinkage void handle_ftlb(void);
extern asmlinkage void handle_msa(void);
@@ -1250,6 +1251,16 @@ out:
exception_exit(prev_state);
}
+asmlinkage void do_msa_fpe(struct pt_regs *regs)
+{
+ enum ctx_state prev_state;
+
+ prev_state = exception_enter();
+ die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
+ force_sig(SIGFPE, current);
+ exception_exit(prev_state);
+}
+
asmlinkage void do_msa(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -2105,6 +2116,7 @@ void __init trap_init(void)
set_except_vector(11, handle_cpu);
set_except_vector(12, handle_ov);
set_except_vector(13, handle_tr);
+ set_except_vector(14, handle_msa_fpe);
if (current_cpu_type() == CPU_R6000 ||
current_cpu_type() == CPU_R6000A) {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 13/15] mips: dumb MSA FP exception handler
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch adds a simple handler for MSA FP exceptions which delivers a
SIGFPE to the running task. In the future it should probably be extended
to re-execute the instruction with the MSACSR.NX bit set in order to
generate results for any elements which did not cause an exception
before delivering the SIGFPE signal.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/genex.S | 1 +
arch/mips/kernel/traps.c | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 278a49b..7365cd6 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -475,6 +475,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER cpu cpu sti silent /* #11 */
BUILD_HANDLER ov ov sti silent /* #12 */
BUILD_HANDLER tr tr sti silent /* #13 */
+ BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
BUILD_HANDLER msa msa sti silent /* #21 */
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e609c89..88db702 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -78,6 +78,7 @@ extern asmlinkage void handle_ri_rdhwr(void);
extern asmlinkage void handle_cpu(void);
extern asmlinkage void handle_ov(void);
extern asmlinkage void handle_tr(void);
+extern asmlinkage void handle_msa_fpe(void);
extern asmlinkage void handle_fpe(void);
extern asmlinkage void handle_ftlb(void);
extern asmlinkage void handle_msa(void);
@@ -1250,6 +1251,16 @@ out:
exception_exit(prev_state);
}
+asmlinkage void do_msa_fpe(struct pt_regs *regs)
+{
+ enum ctx_state prev_state;
+
+ prev_state = exception_enter();
+ die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
+ force_sig(SIGFPE, current);
+ exception_exit(prev_state);
+}
+
asmlinkage void do_msa(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -2105,6 +2116,7 @@ void __init trap_init(void)
set_except_vector(11, handle_cpu);
set_except_vector(12, handle_ov);
set_except_vector(13, handle_tr);
+ set_except_vector(14, handle_msa_fpe);
if (current_cpu_type() == CPU_R6000 ||
current_cpu_type() == CPU_R6000A) {
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 14/15] mips: panic if vector register partitioning is implemented
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
No current systems implementing MSA include support for vector register
partitioning which makes it somewhat difficult to implement support for
it in the kernel. Thus for the moment the kernel includes no such
support. However if the kernel were to be run on a system which
implemented register partitioning then it would not function correctly,
mishandling MSA disabled exceptions. Calling panic when run on a system
with vector register partitioning implemented ensures that we're not
caught out by this later but instead reminded to implement support once
such a system is available.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/cpu-probe.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 852e085..003ba3c 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1193,9 +1193,13 @@ void cpu_probe(void)
else
c->srsets = 1;
- if (cpu_has_msa)
+ if (cpu_has_msa) {
c->msa_id = cpu_get_msa_id();
+ if (c->msa_id & MSA_IR_WRPF)
+ panic("Vector register partitioning unimplemented!");
+ }
+
cpu_probe_vmbits(c);
#ifdef CONFIG_64BIT
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 14/15] mips: panic if vector register partitioning is implemented
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
No current systems implementing MSA include support for vector register
partitioning which makes it somewhat difficult to implement support for
it in the kernel. Thus for the moment the kernel includes no such
support. However if the kernel were to be run on a system which
implemented register partitioning then it would not function correctly,
mishandling MSA disabled exceptions. Calling panic when run on a system
with vector register partitioning implemented ensures that we're not
caught out by this later but instead reminded to implement support once
such a system is available.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/kernel/cpu-probe.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 852e085..003ba3c 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1193,9 +1193,13 @@ void cpu_probe(void)
else
c->srsets = 1;
- if (cpu_has_msa)
+ if (cpu_has_msa) {
c->msa_id = cpu_get_msa_id();
+ if (c->msa_id & MSA_IR_WRPF)
+ panic("Vector register partitioning unimplemented!");
+ }
+
cpu_probe_vmbits(c);
#ifdef CONFIG_64BIT
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* Re: [PATCH 14/15] mips: panic if vector register partitioning is implemented
2014-01-27 15:23 ` Paul Burton
(?)
@ 2014-01-27 18:38 ` David Daney
2014-01-27 19:39 ` Paul Burton
-1 siblings, 1 reply; 52+ messages in thread
From: David Daney @ 2014-01-27 18:38 UTC (permalink / raw)
To: Paul Burton; +Cc: linux-mips
On 01/27/2014 07:23 AM, Paul Burton wrote:
> No current systems implementing MSA include support for vector register
> partitioning which makes it somewhat difficult to implement support for
> it in the kernel. Thus for the moment the kernel includes no such
> support. However if the kernel were to be run on a system which
> implemented register partitioning then it would not function correctly,
> mishandling MSA disabled exceptions. Calling panic when run on a system
> with vector register partitioning implemented ensures that we're not
> caught out by this later but instead reminded to implement support once
> such a system is available.
>
> Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> ---
> arch/mips/kernel/cpu-probe.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
> index 852e085..003ba3c 100644
> --- a/arch/mips/kernel/cpu-probe.c
> +++ b/arch/mips/kernel/cpu-probe.c
> @@ -1193,9 +1193,13 @@ void cpu_probe(void)
> else
> c->srsets = 1;
>
> - if (cpu_has_msa)
> + if (cpu_has_msa) {
> c->msa_id = cpu_get_msa_id();
>
> + if (c->msa_id & MSA_IR_WRPF)
> + panic("Vector register partitioning unimplemented!");
You should probably use a WARN_ON() instead. There is no reason to
crash the kernel for this condition is there?
> + }
> +
> cpu_probe_vmbits(c);
>
> #ifdef CONFIG_64BIT
>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 14/15] mips: panic if vector register partitioning is implemented
@ 2014-01-27 19:39 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 19:39 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Jan 27, 2014 at 10:38:45AM -0800, David Daney wrote:
> ....
> On 01/27/2014 07:23 AM, Paul Burton wrote:
> >No current systems implementing MSA include support for vector register
> >partitioning which makes it somewhat difficult to implement support for
> >it in the kernel. Thus for the moment the kernel includes no such
> >support. However if the kernel were to be run on a system which
> >implemented register partitioning then it would not function correctly,
> >mishandling MSA disabled exceptions. Calling panic when run on a system
> >with vector register partitioning implemented ensures that we're not
> >caught out by this later but instead reminded to implement support once
> >such a system is available.
> >
> >Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >---
> > arch/mips/kernel/cpu-probe.c | 6 +++++-
> > 1 file changed, 5 insertions(+), 1 deletion(-)
> >
> >diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
> >index 852e085..003ba3c 100644
> >--- a/arch/mips/kernel/cpu-probe.c
> >+++ b/arch/mips/kernel/cpu-probe.c
> >@@ -1193,9 +1193,13 @@ void cpu_probe(void)
> > else
> > c->srsets = 1;
> >
> >- if (cpu_has_msa)
> >+ if (cpu_has_msa) {
> > c->msa_id = cpu_get_msa_id();
> >
> >+ if (c->msa_id & MSA_IR_WRPF)
> >+ panic("Vector register partitioning unimplemented!");
>
> You should probably use a WARN_ON() instead. There is no reason to crash
> the kernel for this condition is there?
>
Well mapping vector registers reuses the MSA disabled exception, so if
the kernel were to continue with my current code & userland were to
execute an MSA instruction I believe it would appear to hang. There
would be an initial MSA disabled exception which would lead the kernel
to enable MSA & return to userland to re-execute the MSA instruction.
Then another MSA disabled exception would occur because we'd need to
map a vector register, but the kernel doesn't understand so would
attempt to enable MSA & return again. Then another MSA disabled
exception, etc etc. So if the kernel were to continue then it would
probably want to disable MSA support entirely to avoid userland
appearing to just hang. Additionally since vector registers & FP
registers are aliased the same would apply to scalar FP (cop1)
instructions too, so we'd also need to disable the FPU. To me that all
seems like a lot of hassle to allow a crippled kernel to run on a system
that doesn't exist yet, so personally I'd rather the kernel just panics
as a safeguard. Then once some target system implements vector register
partitioning support for it can be implemented in the kernel.
Thanks,
Paul
> >+ }
> >+
> > cpu_probe_vmbits(c);
> >
> > #ifdef CONFIG_64BIT
> >
>
>
> To report this email as SPAM, please forward it to spam@websense.com
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 14/15] mips: panic if vector register partitioning is implemented
@ 2014-01-27 19:39 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 19:39 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Jan 27, 2014 at 10:38:45AM -0800, David Daney wrote:
> ....
> On 01/27/2014 07:23 AM, Paul Burton wrote:
> >No current systems implementing MSA include support for vector register
> >partitioning which makes it somewhat difficult to implement support for
> >it in the kernel. Thus for the moment the kernel includes no such
> >support. However if the kernel were to be run on a system which
> >implemented register partitioning then it would not function correctly,
> >mishandling MSA disabled exceptions. Calling panic when run on a system
> >with vector register partitioning implemented ensures that we're not
> >caught out by this later but instead reminded to implement support once
> >such a system is available.
> >
> >Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >---
> > arch/mips/kernel/cpu-probe.c | 6 +++++-
> > 1 file changed, 5 insertions(+), 1 deletion(-)
> >
> >diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
> >index 852e085..003ba3c 100644
> >--- a/arch/mips/kernel/cpu-probe.c
> >+++ b/arch/mips/kernel/cpu-probe.c
> >@@ -1193,9 +1193,13 @@ void cpu_probe(void)
> > else
> > c->srsets = 1;
> >
> >- if (cpu_has_msa)
> >+ if (cpu_has_msa) {
> > c->msa_id = cpu_get_msa_id();
> >
> >+ if (c->msa_id & MSA_IR_WRPF)
> >+ panic("Vector register partitioning unimplemented!");
>
> You should probably use a WARN_ON() instead. There is no reason to crash
> the kernel for this condition is there?
>
Well mapping vector registers reuses the MSA disabled exception, so if
the kernel were to continue with my current code & userland were to
execute an MSA instruction I believe it would appear to hang. There
would be an initial MSA disabled exception which would lead the kernel
to enable MSA & return to userland to re-execute the MSA instruction.
Then another MSA disabled exception would occur because we'd need to
map a vector register, but the kernel doesn't understand so would
attempt to enable MSA & return again. Then another MSA disabled
exception, etc etc. So if the kernel were to continue then it would
probably want to disable MSA support entirely to avoid userland
appearing to just hang. Additionally since vector registers & FP
registers are aliased the same would apply to scalar FP (cop1)
instructions too, so we'd also need to disable the FPU. To me that all
seems like a lot of hassle to allow a crippled kernel to run on a system
that doesn't exist yet, so personally I'd rather the kernel just panics
as a safeguard. Then once some target system implements vector register
partitioning support for it can be implemented in the kernel.
Thanks,
Paul
> >+ }
> >+
> > cpu_probe_vmbits(c);
> >
> > #ifdef CONFIG_64BIT
> >
>
>
> To report this email as SPAM, please forward it to spam@websense.com
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 14/15] mips: panic if vector register partitioning is implemented
2014-01-27 19:39 ` Paul Burton
(?)
@ 2014-01-27 19:56 ` David Daney
2014-01-28 14:20 ` Paul Burton
-1 siblings, 1 reply; 52+ messages in thread
From: David Daney @ 2014-01-27 19:56 UTC (permalink / raw)
To: Paul Burton; +Cc: linux-mips
On 01/27/2014 11:39 AM, Paul Burton wrote:
> On Mon, Jan 27, 2014 at 10:38:45AM -0800, David Daney wrote:
>> ....
>> On 01/27/2014 07:23 AM, Paul Burton wrote:
>>> No current systems implementing MSA include support for vector register
>>> partitioning which makes it somewhat difficult to implement support for
>>> it in the kernel. Thus for the moment the kernel includes no such
>>> support. However if the kernel were to be run on a system which
>>> implemented register partitioning then it would not function correctly,
>>> mishandling MSA disabled exceptions. Calling panic when run on a system
>>> with vector register partitioning implemented ensures that we're not
>>> caught out by this later but instead reminded to implement support once
>>> such a system is available.
>>>
>>> Signed-off-by: Paul Burton <paul.burton@imgtec.com>
>>> ---
>>> arch/mips/kernel/cpu-probe.c | 6 +++++-
>>> 1 file changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
>>> index 852e085..003ba3c 100644
>>> --- a/arch/mips/kernel/cpu-probe.c
>>> +++ b/arch/mips/kernel/cpu-probe.c
>>> @@ -1193,9 +1193,13 @@ void cpu_probe(void)
>>> else
>>> c->srsets = 1;
>>>
>>> - if (cpu_has_msa)
>>> + if (cpu_has_msa) {
>>> c->msa_id = cpu_get_msa_id();
>>>
>>> + if (c->msa_id & MSA_IR_WRPF)
>>> + panic("Vector register partitioning unimplemented!");
>>
>> You should probably use a WARN_ON() instead. There is no reason to crash
>> the kernel for this condition is there?
>>
>
> Well mapping vector registers reuses the MSA disabled exception, so if
> the kernel were to continue with my current code & userland were to
> execute an MSA instruction I believe it would appear to hang. [...]
The CPU probing things are called so early that any panic() or BUG()
here will result in absolutely no console output as this code is called
before any console drivers are enabled.
So the choice is really:
panic(): No output on console and system is frozen/locked-up.
WARN(): Nice stack trace on console, theoretical lockup once userspace
code starts executing.
You can probably guess which I think is the better option.
>
> Thanks,
> Paul
>
>>> + }
>>> +
>>> cpu_probe_vmbits(c);
>>>
>>> #ifdef CONFIG_64BIT
>>>
>>
>>
>> To report this email as SPAM, please forward it to spam@websense.com
>
>
>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 14/15] mips: panic if vector register partitioning is implemented
@ 2014-01-28 14:20 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-28 14:20 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Jan 27, 2014 at 11:56:07AM -0800, David Daney wrote:
> ....
> On 01/27/2014 11:39 AM, Paul Burton wrote:
> >On Mon, Jan 27, 2014 at 10:38:45AM -0800, David Daney wrote:
> >>....
> >>On 01/27/2014 07:23 AM, Paul Burton wrote:
> >>>No current systems implementing MSA include support for vector register
> >>>partitioning which makes it somewhat difficult to implement support for
> >>>it in the kernel. Thus for the moment the kernel includes no such
> >>>support. However if the kernel were to be run on a system which
> >>>implemented register partitioning then it would not function correctly,
> >>>mishandling MSA disabled exceptions. Calling panic when run on a system
> >>>with vector register partitioning implemented ensures that we're not
> >>>caught out by this later but instead reminded to implement support once
> >>>such a system is available.
> >>>
> >>>Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >>>---
> >>> arch/mips/kernel/cpu-probe.c | 6 +++++-
> >>> 1 file changed, 5 insertions(+), 1 deletion(-)
> >>>
> >>>diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
> >>>index 852e085..003ba3c 100644
> >>>--- a/arch/mips/kernel/cpu-probe.c
> >>>+++ b/arch/mips/kernel/cpu-probe.c
> >>>@@ -1193,9 +1193,13 @@ void cpu_probe(void)
> >>> else
> >>> c->srsets = 1;
> >>>
> >>>- if (cpu_has_msa)
> >>>+ if (cpu_has_msa) {
> >>> c->msa_id = cpu_get_msa_id();
> >>>
> >>>+ if (c->msa_id & MSA_IR_WRPF)
> >>>+ panic("Vector register partitioning unimplemented!");
> >>
> >>You should probably use a WARN_ON() instead. There is no reason to crash
> >>the kernel for this condition is there?
> >>
> >
> >Well mapping vector registers reuses the MSA disabled exception, so if
> >the kernel were to continue with my current code & userland were to
> >execute an MSA instruction I believe it would appear to hang. [...]
>
> The CPU probing things are called so early that any panic() or BUG() here
> will result in absolutely no console output as this code is called before
> any console drivers are enabled.
Fair point, I'd overlooked that. v2 on its way.
Paul
>
> So the choice is really:
>
> panic(): No output on console and system is frozen/locked-up.
>
> WARN(): Nice stack trace on console, theoretical lockup once userspace code
> starts executing.
>
> You can probably guess which I think is the better option.
>
> >
> >Thanks,
> > Paul
> >
> >>>+ }
> >>>+
> >>> cpu_probe_vmbits(c);
> >>>
> >>> #ifdef CONFIG_64BIT
> >>>
> >>
> >>
> >>To report this email as SPAM, please forward it to spam@websense.com
> >
> >
> >
>
>
> To report this email as SPAM, please forward it to spam@websense.com
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 14/15] mips: panic if vector register partitioning is implemented
@ 2014-01-28 14:20 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-28 14:20 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Jan 27, 2014 at 11:56:07AM -0800, David Daney wrote:
> ....
> On 01/27/2014 11:39 AM, Paul Burton wrote:
> >On Mon, Jan 27, 2014 at 10:38:45AM -0800, David Daney wrote:
> >>....
> >>On 01/27/2014 07:23 AM, Paul Burton wrote:
> >>>No current systems implementing MSA include support for vector register
> >>>partitioning which makes it somewhat difficult to implement support for
> >>>it in the kernel. Thus for the moment the kernel includes no such
> >>>support. However if the kernel were to be run on a system which
> >>>implemented register partitioning then it would not function correctly,
> >>>mishandling MSA disabled exceptions. Calling panic when run on a system
> >>>with vector register partitioning implemented ensures that we're not
> >>>caught out by this later but instead reminded to implement support once
> >>>such a system is available.
> >>>
> >>>Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >>>---
> >>> arch/mips/kernel/cpu-probe.c | 6 +++++-
> >>> 1 file changed, 5 insertions(+), 1 deletion(-)
> >>>
> >>>diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
> >>>index 852e085..003ba3c 100644
> >>>--- a/arch/mips/kernel/cpu-probe.c
> >>>+++ b/arch/mips/kernel/cpu-probe.c
> >>>@@ -1193,9 +1193,13 @@ void cpu_probe(void)
> >>> else
> >>> c->srsets = 1;
> >>>
> >>>- if (cpu_has_msa)
> >>>+ if (cpu_has_msa) {
> >>> c->msa_id = cpu_get_msa_id();
> >>>
> >>>+ if (c->msa_id & MSA_IR_WRPF)
> >>>+ panic("Vector register partitioning unimplemented!");
> >>
> >>You should probably use a WARN_ON() instead. There is no reason to crash
> >>the kernel for this condition is there?
> >>
> >
> >Well mapping vector registers reuses the MSA disabled exception, so if
> >the kernel were to continue with my current code & userland were to
> >execute an MSA instruction I believe it would appear to hang. [...]
>
> The CPU probing things are called so early that any panic() or BUG() here
> will result in absolutely no console output as this code is called before
> any console drivers are enabled.
Fair point, I'd overlooked that. v2 on its way.
Paul
>
> So the choice is really:
>
> panic(): No output on console and system is frozen/locked-up.
>
> WARN(): Nice stack trace on console, theoretical lockup once userspace code
> starts executing.
>
> You can probably guess which I think is the better option.
>
> >
> >Thanks,
> > Paul
> >
> >>>+ }
> >>>+
> >>> cpu_probe_vmbits(c);
> >>>
> >>> #ifdef CONFIG_64BIT
> >>>
> >>
> >>
> >>To report this email as SPAM, please forward it to spam@websense.com
> >
> >
> >
>
>
> To report this email as SPAM, please forward it to spam@websense.com
^ permalink raw reply [flat|nested] 52+ messages in thread* [PATCH v2 14/15] mips: warn if vector register partitioning is implemented
@ 2014-01-28 14:28 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-28 14:28 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
No current systems implementing MSA include support for vector register
partitioning which makes it somewhat difficult to implement support for
it in the kernel. Thus for the moment the kernel includes no such
support. However if the kernel were to be run on a system which
implemented register partitioning then it would not function correctly,
mishandling MSA disabled exceptions. Print a warning if run on a system
with vector register partitioning implemented to indicate this problem
should it occur.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
Changes in v2:
- Switch from panic to WARN so that console output can be seen.
---
arch/mips/kernel/cpu-probe.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 852e085..8605eb6 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1193,8 +1193,11 @@ void cpu_probe(void)
else
c->srsets = 1;
- if (cpu_has_msa)
+ if (cpu_has_msa) {
c->msa_id = cpu_get_msa_id();
+ WARN(c->msa_id & MSA_IR_WRPF,
+ "Vector register partitioning unimplemented!");
+ }
cpu_probe_vmbits(c);
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH v2 14/15] mips: warn if vector register partitioning is implemented
@ 2014-01-28 14:28 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-28 14:28 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
No current systems implementing MSA include support for vector register
partitioning which makes it somewhat difficult to implement support for
it in the kernel. Thus for the moment the kernel includes no such
support. However if the kernel were to be run on a system which
implemented register partitioning then it would not function correctly,
mishandling MSA disabled exceptions. Print a warning if run on a system
with vector register partitioning implemented to indicate this problem
should it occur.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
Changes in v2:
- Switch from panic to WARN so that console output can be seen.
---
arch/mips/kernel/cpu-probe.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 852e085..8605eb6 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1193,8 +1193,11 @@ void cpu_probe(void)
else
c->srsets = 1;
- if (cpu_has_msa)
+ if (cpu_has_msa) {
c->msa_id = cpu_get_msa_id();
+ WARN(c->msa_id & MSA_IR_WRPF,
+ "Vector register partitioning unimplemented!");
+ }
cpu_probe_vmbits(c);
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 15/15] mips: save/restore MSA context around signals
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch extends sigcontext in order to hold the most significant 64
bits of each vector register in addition to the MSA control & status
register. The least significant 64 bits are already saved as the scalar
FP context. This makes things a little awkward since the least & most
significant 64 bits of each vector register are not contiguous in
memory. Thus the copy_u & insert instructions are used to transfer the
values of the most significant 64 bits via GP registers.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/include/asm/sigcontext.h | 2 +
arch/mips/include/uapi/asm/sigcontext.h | 8 ++
arch/mips/kernel/asm-offsets.c | 3 +
arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
arch/mips/kernel/signal.c | 71 +++++++++--
arch/mips/kernel/signal32.c | 71 +++++++++--
6 files changed, 352 insertions(+), 16 deletions(-)
diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index eeeb0f4..f54bdbe 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -32,6 +32,8 @@ struct sigcontext32 {
__u32 sc_lo2;
__u32 sc_hi3;
__u32 sc_lo3;
+ __u64 sc_msaregs[32]; /* Most significant 64 bits */
+ __u32 sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
#endif /* _ASM_SIGCONTEXT_H */
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 6c9906f..681c176 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
#include <asm/sgidefs.h>
+/* Bits which may be set in sc_used_math */
+#define USEDMATH_FP (1 << 0)
+#define USEDMATH_MSA (1 << 1)
+
#if _MIPS_SIM == _MIPS_SIM_ABI32
/*
@@ -37,6 +41,8 @@ struct sigcontext {
unsigned long sc_lo2;
unsigned long sc_hi3;
unsigned long sc_lo3;
+ unsigned long long sc_msaregs[32]; /* Most significant 64 bits */
+ unsigned long sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
@@ -70,6 +76,8 @@ struct sigcontext {
__u32 sc_used_math;
__u32 sc_dsp;
__u32 sc_reserved;
+ __u64 sc_msaregs[32];
+ __u32 sc_msa_csr;
};
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index f454d7b..ace6814 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -294,6 +294,7 @@ void output_sc_defines(void)
OFFSET(SC_LO2, sigcontext, sc_lo2);
OFFSET(SC_HI3, sigcontext, sc_hi3);
OFFSET(SC_LO3, sigcontext, sc_lo3);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -308,6 +309,7 @@ void output_sc_defines(void)
OFFSET(SC_MDLO, sigcontext, sc_mdlo);
OFFSET(SC_PC, sigcontext, sc_pc);
OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -319,6 +321,7 @@ void output_sc32_defines(void)
OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs);
OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr);
OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir);
+ OFFSET(SC32_MSAREGS, sigcontext32, sc_msaregs);
BLANK();
}
#endif
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 253b2fb..752b50a 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -13,6 +13,7 @@
* Copyright (C) 1999, 2001 Silicon Graphics, Inc.
*/
#include <asm/asm.h>
+#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
@@ -245,6 +246,218 @@ LEAF(_restore_fp_context32)
END(_restore_fp_context32)
#endif
+#ifdef CONFIG_CPU_HAS_MSA
+
+ .macro save_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ copy_u_d \tmp, \wr, 1
+ EX sd \tmp, (\off+(\wr*8))(\sc)
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+#endif
+ .endm
+
+/*
+ * int _save_msa_context(struct sigcontext *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context)
+ save_sc_msareg 0, SC_MSAREGS, a0, t0
+ save_sc_msareg 1, SC_MSAREGS, a0, t0
+ save_sc_msareg 2, SC_MSAREGS, a0, t0
+ save_sc_msareg 3, SC_MSAREGS, a0, t0
+ save_sc_msareg 4, SC_MSAREGS, a0, t0
+ save_sc_msareg 5, SC_MSAREGS, a0, t0
+ save_sc_msareg 6, SC_MSAREGS, a0, t0
+ save_sc_msareg 7, SC_MSAREGS, a0, t0
+ save_sc_msareg 8, SC_MSAREGS, a0, t0
+ save_sc_msareg 9, SC_MSAREGS, a0, t0
+ save_sc_msareg 10, SC_MSAREGS, a0, t0
+ save_sc_msareg 11, SC_MSAREGS, a0, t0
+ save_sc_msareg 12, SC_MSAREGS, a0, t0
+ save_sc_msareg 13, SC_MSAREGS, a0, t0
+ save_sc_msareg 14, SC_MSAREGS, a0, t0
+ save_sc_msareg 15, SC_MSAREGS, a0, t0
+ save_sc_msareg 16, SC_MSAREGS, a0, t0
+ save_sc_msareg 17, SC_MSAREGS, a0, t0
+ save_sc_msareg 18, SC_MSAREGS, a0, t0
+ save_sc_msareg 19, SC_MSAREGS, a0, t0
+ save_sc_msareg 20, SC_MSAREGS, a0, t0
+ save_sc_msareg 21, SC_MSAREGS, a0, t0
+ save_sc_msareg 22, SC_MSAREGS, a0, t0
+ save_sc_msareg 23, SC_MSAREGS, a0, t0
+ save_sc_msareg 24, SC_MSAREGS, a0, t0
+ save_sc_msareg 25, SC_MSAREGS, a0, t0
+ save_sc_msareg 26, SC_MSAREGS, a0, t0
+ save_sc_msareg 27, SC_MSAREGS, a0, t0
+ save_sc_msareg 28, SC_MSAREGS, a0, t0
+ save_sc_msareg 29, SC_MSAREGS, a0, t0
+ save_sc_msareg 30, SC_MSAREGS, a0, t0
+ save_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _save_msa_context32(struct sigcontext32 *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context32)
+ save_sc_msareg 0, SC32_MSAREGS, a0, t0
+ save_sc_msareg 1, SC32_MSAREGS, a0, t0
+ save_sc_msareg 2, SC32_MSAREGS, a0, t0
+ save_sc_msareg 3, SC32_MSAREGS, a0, t0
+ save_sc_msareg 4, SC32_MSAREGS, a0, t0
+ save_sc_msareg 5, SC32_MSAREGS, a0, t0
+ save_sc_msareg 6, SC32_MSAREGS, a0, t0
+ save_sc_msareg 7, SC32_MSAREGS, a0, t0
+ save_sc_msareg 8, SC32_MSAREGS, a0, t0
+ save_sc_msareg 9, SC32_MSAREGS, a0, t0
+ save_sc_msareg 10, SC32_MSAREGS, a0, t0
+ save_sc_msareg 11, SC32_MSAREGS, a0, t0
+ save_sc_msareg 12, SC32_MSAREGS, a0, t0
+ save_sc_msareg 13, SC32_MSAREGS, a0, t0
+ save_sc_msareg 14, SC32_MSAREGS, a0, t0
+ save_sc_msareg 15, SC32_MSAREGS, a0, t0
+ save_sc_msareg 16, SC32_MSAREGS, a0, t0
+ save_sc_msareg 17, SC32_MSAREGS, a0, t0
+ save_sc_msareg 18, SC32_MSAREGS, a0, t0
+ save_sc_msareg 19, SC32_MSAREGS, a0, t0
+ save_sc_msareg 20, SC32_MSAREGS, a0, t0
+ save_sc_msareg 21, SC32_MSAREGS, a0, t0
+ save_sc_msareg 22, SC32_MSAREGS, a0, t0
+ save_sc_msareg 23, SC32_MSAREGS, a0, t0
+ save_sc_msareg 24, SC32_MSAREGS, a0, t0
+ save_sc_msareg 25, SC32_MSAREGS, a0, t0
+ save_sc_msareg 26, SC32_MSAREGS, a0, t0
+ save_sc_msareg 27, SC32_MSAREGS, a0, t0
+ save_sc_msareg 28, SC32_MSAREGS, a0, t0
+ save_sc_msareg 29, SC32_MSAREGS, a0, t0
+ save_sc_msareg 30, SC32_MSAREGS, a0, t0
+ save_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+ .macro restore_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ EX ld \tmp, (\off+(\wr*8))(\sc)
+ insert_d \wr, 1, \tmp
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 3, \tmp
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 3, \tmp
+#endif
+ .endm
+
+/*
+ * int _restore_msa_context(struct sigcontext *sc)
+ */
+LEAF(_restore_msa_context)
+ restore_sc_msareg 0, SC_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _restore_msa_context32(struct sigcontext32 *sc)
+ */
+LEAF(_restore_msa_context32)
+ restore_sc_msareg 0, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+#endif /* CONFIG_CPU_HAS_MSA */
+
.set reorder
.type fault@function
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 0f97c7d..6ccbc69 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -30,6 +30,7 @@
#include <linux/bitops.h>
#include <asm/cacheflush.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/sim.h>
#include <asm/ucontext.h>
#include <asm/cpu-features.h>
@@ -46,6 +47,9 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
+extern asmlinkage int _save_msa_context(struct sigcontext __user *sc);
+extern asmlinkage int _restore_msa_context(struct sigcontext __user *sc);
+
struct sigframe {
u32 sf_ass[4]; /* argument save space for o32 */
u32 sf_pad[2]; /* Was: signal trampoline */
@@ -95,19 +99,58 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* Helper routines
*/
-static int protected_save_fp_context(struct sigcontext __user *sc)
+static int protected_save_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = _save_msa_context(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_to_sigcontext(sc);
}
if (likely(!err))
break;
@@ -121,17 +164,27 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
return err;
}
-static int protected_restore_fp_context(struct sigcontext __user *sc)
+static int protected_restore_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context(sc);
+ if (!err && (used_math & USEDMATH_MSA)) {
+ enable_msa();
+ err = _restore_msa_context(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_from_sigcontext(sc);
}
if (likely(!err))
break;
@@ -172,7 +225,8 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -180,7 +234,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context(sc);
+ err |= protected_save_fp_context(sc, used_math);
}
return err;
}
@@ -205,14 +259,14 @@ int fpcsr_pending(unsigned int __user *fpcsr)
}
static int
-check_and_restore_fp_context(struct sigcontext __user *sc)
+check_and_restore_fp_context(struct sigcontext __user *sc, unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context(sc);
+ err |= protected_restore_fp_context(sc, used_math);
return err ?: sig;
}
@@ -252,9 +306,10 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context(sc);
+ err = check_and_restore_fp_context(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index bae2e6e..4c796b1 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -42,6 +42,9 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _save_msa_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _restore_msa_context32(struct sigcontext32 __user *sc);
+
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -111,19 +114,58 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* sigcontext handlers
*/
-static int protected_save_fp_context32(struct sigcontext32 __user *sc)
+static int protected_save_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context32(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = _save_msa_context32(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext32(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_to_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -137,17 +179,27 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
return err;
}
-static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
+static int protected_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context32(sc);
+ if (!err && (used_math & USEDMATH_MSA)) {
+ enable_msa();
+ err = _restore_msa_context32(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext32(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_from_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -186,7 +238,8 @@ static int setup_sigcontext32(struct pt_regs *regs,
err |= __put_user(mflo3(), &sc->sc_lo3);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -194,20 +247,21 @@ static int setup_sigcontext32(struct pt_regs *regs,
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context32(sc);
+ err |= protected_save_fp_context32(sc, used_math);
}
return err;
}
static int
-check_and_restore_fp_context32(struct sigcontext32 __user *sc)
+check_and_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context32(sc);
+ err |= protected_restore_fp_context32(sc, used_math);
return err ?: sig;
}
@@ -244,9 +298,10 @@ static int restore_sigcontext32(struct pt_regs *regs,
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context32(sc);
+ err = check_and_restore_fp_context32(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH 15/15] mips: save/restore MSA context around signals
@ 2014-01-27 15:23 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 15:23 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton
This patch extends sigcontext in order to hold the most significant 64
bits of each vector register in addition to the MSA control & status
register. The least significant 64 bits are already saved as the scalar
FP context. This makes things a little awkward since the least & most
significant 64 bits of each vector register are not contiguous in
memory. Thus the copy_u & insert instructions are used to transfer the
values of the most significant 64 bits via GP registers.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
arch/mips/include/asm/sigcontext.h | 2 +
arch/mips/include/uapi/asm/sigcontext.h | 8 ++
arch/mips/kernel/asm-offsets.c | 3 +
arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
arch/mips/kernel/signal.c | 71 +++++++++--
arch/mips/kernel/signal32.c | 71 +++++++++--
6 files changed, 352 insertions(+), 16 deletions(-)
diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index eeeb0f4..f54bdbe 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -32,6 +32,8 @@ struct sigcontext32 {
__u32 sc_lo2;
__u32 sc_hi3;
__u32 sc_lo3;
+ __u64 sc_msaregs[32]; /* Most significant 64 bits */
+ __u32 sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
#endif /* _ASM_SIGCONTEXT_H */
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 6c9906f..681c176 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
#include <asm/sgidefs.h>
+/* Bits which may be set in sc_used_math */
+#define USEDMATH_FP (1 << 0)
+#define USEDMATH_MSA (1 << 1)
+
#if _MIPS_SIM == _MIPS_SIM_ABI32
/*
@@ -37,6 +41,8 @@ struct sigcontext {
unsigned long sc_lo2;
unsigned long sc_hi3;
unsigned long sc_lo3;
+ unsigned long long sc_msaregs[32]; /* Most significant 64 bits */
+ unsigned long sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
@@ -70,6 +76,8 @@ struct sigcontext {
__u32 sc_used_math;
__u32 sc_dsp;
__u32 sc_reserved;
+ __u64 sc_msaregs[32];
+ __u32 sc_msa_csr;
};
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index f454d7b..ace6814 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -294,6 +294,7 @@ void output_sc_defines(void)
OFFSET(SC_LO2, sigcontext, sc_lo2);
OFFSET(SC_HI3, sigcontext, sc_hi3);
OFFSET(SC_LO3, sigcontext, sc_lo3);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -308,6 +309,7 @@ void output_sc_defines(void)
OFFSET(SC_MDLO, sigcontext, sc_mdlo);
OFFSET(SC_PC, sigcontext, sc_pc);
OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -319,6 +321,7 @@ void output_sc32_defines(void)
OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs);
OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr);
OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir);
+ OFFSET(SC32_MSAREGS, sigcontext32, sc_msaregs);
BLANK();
}
#endif
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 253b2fb..752b50a 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -13,6 +13,7 @@
* Copyright (C) 1999, 2001 Silicon Graphics, Inc.
*/
#include <asm/asm.h>
+#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
@@ -245,6 +246,218 @@ LEAF(_restore_fp_context32)
END(_restore_fp_context32)
#endif
+#ifdef CONFIG_CPU_HAS_MSA
+
+ .macro save_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ copy_u_d \tmp, \wr, 1
+ EX sd \tmp, (\off+(\wr*8))(\sc)
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+#endif
+ .endm
+
+/*
+ * int _save_msa_context(struct sigcontext *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context)
+ save_sc_msareg 0, SC_MSAREGS, a0, t0
+ save_sc_msareg 1, SC_MSAREGS, a0, t0
+ save_sc_msareg 2, SC_MSAREGS, a0, t0
+ save_sc_msareg 3, SC_MSAREGS, a0, t0
+ save_sc_msareg 4, SC_MSAREGS, a0, t0
+ save_sc_msareg 5, SC_MSAREGS, a0, t0
+ save_sc_msareg 6, SC_MSAREGS, a0, t0
+ save_sc_msareg 7, SC_MSAREGS, a0, t0
+ save_sc_msareg 8, SC_MSAREGS, a0, t0
+ save_sc_msareg 9, SC_MSAREGS, a0, t0
+ save_sc_msareg 10, SC_MSAREGS, a0, t0
+ save_sc_msareg 11, SC_MSAREGS, a0, t0
+ save_sc_msareg 12, SC_MSAREGS, a0, t0
+ save_sc_msareg 13, SC_MSAREGS, a0, t0
+ save_sc_msareg 14, SC_MSAREGS, a0, t0
+ save_sc_msareg 15, SC_MSAREGS, a0, t0
+ save_sc_msareg 16, SC_MSAREGS, a0, t0
+ save_sc_msareg 17, SC_MSAREGS, a0, t0
+ save_sc_msareg 18, SC_MSAREGS, a0, t0
+ save_sc_msareg 19, SC_MSAREGS, a0, t0
+ save_sc_msareg 20, SC_MSAREGS, a0, t0
+ save_sc_msareg 21, SC_MSAREGS, a0, t0
+ save_sc_msareg 22, SC_MSAREGS, a0, t0
+ save_sc_msareg 23, SC_MSAREGS, a0, t0
+ save_sc_msareg 24, SC_MSAREGS, a0, t0
+ save_sc_msareg 25, SC_MSAREGS, a0, t0
+ save_sc_msareg 26, SC_MSAREGS, a0, t0
+ save_sc_msareg 27, SC_MSAREGS, a0, t0
+ save_sc_msareg 28, SC_MSAREGS, a0, t0
+ save_sc_msareg 29, SC_MSAREGS, a0, t0
+ save_sc_msareg 30, SC_MSAREGS, a0, t0
+ save_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _save_msa_context32(struct sigcontext32 *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context32)
+ save_sc_msareg 0, SC32_MSAREGS, a0, t0
+ save_sc_msareg 1, SC32_MSAREGS, a0, t0
+ save_sc_msareg 2, SC32_MSAREGS, a0, t0
+ save_sc_msareg 3, SC32_MSAREGS, a0, t0
+ save_sc_msareg 4, SC32_MSAREGS, a0, t0
+ save_sc_msareg 5, SC32_MSAREGS, a0, t0
+ save_sc_msareg 6, SC32_MSAREGS, a0, t0
+ save_sc_msareg 7, SC32_MSAREGS, a0, t0
+ save_sc_msareg 8, SC32_MSAREGS, a0, t0
+ save_sc_msareg 9, SC32_MSAREGS, a0, t0
+ save_sc_msareg 10, SC32_MSAREGS, a0, t0
+ save_sc_msareg 11, SC32_MSAREGS, a0, t0
+ save_sc_msareg 12, SC32_MSAREGS, a0, t0
+ save_sc_msareg 13, SC32_MSAREGS, a0, t0
+ save_sc_msareg 14, SC32_MSAREGS, a0, t0
+ save_sc_msareg 15, SC32_MSAREGS, a0, t0
+ save_sc_msareg 16, SC32_MSAREGS, a0, t0
+ save_sc_msareg 17, SC32_MSAREGS, a0, t0
+ save_sc_msareg 18, SC32_MSAREGS, a0, t0
+ save_sc_msareg 19, SC32_MSAREGS, a0, t0
+ save_sc_msareg 20, SC32_MSAREGS, a0, t0
+ save_sc_msareg 21, SC32_MSAREGS, a0, t0
+ save_sc_msareg 22, SC32_MSAREGS, a0, t0
+ save_sc_msareg 23, SC32_MSAREGS, a0, t0
+ save_sc_msareg 24, SC32_MSAREGS, a0, t0
+ save_sc_msareg 25, SC32_MSAREGS, a0, t0
+ save_sc_msareg 26, SC32_MSAREGS, a0, t0
+ save_sc_msareg 27, SC32_MSAREGS, a0, t0
+ save_sc_msareg 28, SC32_MSAREGS, a0, t0
+ save_sc_msareg 29, SC32_MSAREGS, a0, t0
+ save_sc_msareg 30, SC32_MSAREGS, a0, t0
+ save_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+ .macro restore_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ EX ld \tmp, (\off+(\wr*8))(\sc)
+ insert_d \wr, 1, \tmp
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 3, \tmp
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 3, \tmp
+#endif
+ .endm
+
+/*
+ * int _restore_msa_context(struct sigcontext *sc)
+ */
+LEAF(_restore_msa_context)
+ restore_sc_msareg 0, SC_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _restore_msa_context32(struct sigcontext32 *sc)
+ */
+LEAF(_restore_msa_context32)
+ restore_sc_msareg 0, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+#endif /* CONFIG_CPU_HAS_MSA */
+
.set reorder
.type fault@function
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 0f97c7d..6ccbc69 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -30,6 +30,7 @@
#include <linux/bitops.h>
#include <asm/cacheflush.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/sim.h>
#include <asm/ucontext.h>
#include <asm/cpu-features.h>
@@ -46,6 +47,9 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
+extern asmlinkage int _save_msa_context(struct sigcontext __user *sc);
+extern asmlinkage int _restore_msa_context(struct sigcontext __user *sc);
+
struct sigframe {
u32 sf_ass[4]; /* argument save space for o32 */
u32 sf_pad[2]; /* Was: signal trampoline */
@@ -95,19 +99,58 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* Helper routines
*/
-static int protected_save_fp_context(struct sigcontext __user *sc)
+static int protected_save_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = _save_msa_context(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_to_sigcontext(sc);
}
if (likely(!err))
break;
@@ -121,17 +164,27 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
return err;
}
-static int protected_restore_fp_context(struct sigcontext __user *sc)
+static int protected_restore_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context(sc);
+ if (!err && (used_math & USEDMATH_MSA)) {
+ enable_msa();
+ err = _restore_msa_context(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_from_sigcontext(sc);
}
if (likely(!err))
break;
@@ -172,7 +225,8 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -180,7 +234,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context(sc);
+ err |= protected_save_fp_context(sc, used_math);
}
return err;
}
@@ -205,14 +259,14 @@ int fpcsr_pending(unsigned int __user *fpcsr)
}
static int
-check_and_restore_fp_context(struct sigcontext __user *sc)
+check_and_restore_fp_context(struct sigcontext __user *sc, unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context(sc);
+ err |= protected_restore_fp_context(sc, used_math);
return err ?: sig;
}
@@ -252,9 +306,10 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context(sc);
+ err = check_and_restore_fp_context(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index bae2e6e..4c796b1 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -42,6 +42,9 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _save_msa_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _restore_msa_context32(struct sigcontext32 __user *sc);
+
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -111,19 +114,58 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* sigcontext handlers
*/
-static int protected_save_fp_context32(struct sigcontext32 __user *sc)
+static int protected_save_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context32(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = _save_msa_context32(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext32(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_to_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -137,17 +179,27 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
return err;
}
-static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
+static int protected_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context32(sc);
+ if (!err && (used_math & USEDMATH_MSA)) {
+ enable_msa();
+ err = _restore_msa_context32(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext32(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_from_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -186,7 +238,8 @@ static int setup_sigcontext32(struct pt_regs *regs,
err |= __put_user(mflo3(), &sc->sc_lo3);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -194,20 +247,21 @@ static int setup_sigcontext32(struct pt_regs *regs,
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context32(sc);
+ err |= protected_save_fp_context32(sc, used_math);
}
return err;
}
static int
-check_and_restore_fp_context32(struct sigcontext32 __user *sc)
+check_and_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context32(sc);
+ err |= protected_restore_fp_context32(sc, used_math);
return err ?: sig;
}
@@ -244,9 +298,10 @@ static int restore_sigcontext32(struct pt_regs *regs,
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context32(sc);
+ err = check_and_restore_fp_context32(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* Re: [PATCH 15/15] mips: save/restore MSA context around signals
2014-01-27 15:23 ` Paul Burton
(?)
@ 2014-01-27 19:50 ` David Daney
2014-01-27 20:06 ` Paul Burton
-1 siblings, 1 reply; 52+ messages in thread
From: David Daney @ 2014-01-27 19:50 UTC (permalink / raw)
To: Paul Burton; +Cc: linux-mips
On 01/27/2014 07:23 AM, Paul Burton wrote:
> This patch extends sigcontext in order to hold the most significant 64
> bits of each vector register in addition to the MSA control & status
> register. The least significant 64 bits are already saved as the scalar
> FP context. This makes things a little awkward since the least & most
> significant 64 bits of each vector register are not contiguous in
> memory. Thus the copy_u & insert instructions are used to transfer the
> values of the most significant 64 bits via GP registers.
>
Interesting.
This very much touches the userspace ABI of the kernel, so it merits
very careful consideration.
> Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> ---
> arch/mips/include/asm/sigcontext.h | 2 +
> arch/mips/include/uapi/asm/sigcontext.h | 8 ++
> arch/mips/kernel/asm-offsets.c | 3 +
> arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
> arch/mips/kernel/signal.c | 71 +++++++++--
> arch/mips/kernel/signal32.c | 71 +++++++++--
> 6 files changed, 352 insertions(+), 16 deletions(-)
>
[...]
> diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
> index 6c9906f..681c176 100644
> --- a/arch/mips/include/uapi/asm/sigcontext.h
> +++ b/arch/mips/include/uapi/asm/sigcontext.h
> @@ -12,6 +12,10 @@
> #include <linux/types.h>
> #include <asm/sgidefs.h>
>
> +/* Bits which may be set in sc_used_math */
> +#define USEDMATH_FP (1 << 0)
> +#define USEDMATH_MSA (1 << 1)
> +
How is this going to interact with existing userspace applications?
Is the current behavior to use / manipulate sc_used_math?
How will USEDMATH_MSA interact with existing code?
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 15/15] mips: save/restore MSA context around signals
@ 2014-01-27 20:06 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 20:06 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Jan 27, 2014 at 11:50:31AM -0800, David Daney wrote:
> ....
> On 01/27/2014 07:23 AM, Paul Burton wrote:
> >This patch extends sigcontext in order to hold the most significant 64
> >bits of each vector register in addition to the MSA control & status
> >register. The least significant 64 bits are already saved as the scalar
> >FP context. This makes things a little awkward since the least & most
> >significant 64 bits of each vector register are not contiguous in
> >memory. Thus the copy_u & insert instructions are used to transfer the
> >values of the most significant 64 bits via GP registers.
> >
>
> Interesting.
>
> This very much touches the userspace ABI of the kernel, so it merits very
> careful consideration.
>
Absolutely :)
>
> >Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >---
> > arch/mips/include/asm/sigcontext.h | 2 +
> > arch/mips/include/uapi/asm/sigcontext.h | 8 ++
> > arch/mips/kernel/asm-offsets.c | 3 +
> > arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
> > arch/mips/kernel/signal.c | 71 +++++++++--
> > arch/mips/kernel/signal32.c | 71 +++++++++--
> > 6 files changed, 352 insertions(+), 16 deletions(-)
> >
> [...]
> >diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
> >index 6c9906f..681c176 100644
> >--- a/arch/mips/include/uapi/asm/sigcontext.h
> >+++ b/arch/mips/include/uapi/asm/sigcontext.h
> >@@ -12,6 +12,10 @@
> > #include <linux/types.h>
> > #include <asm/sgidefs.h>
> >
> >+/* Bits which may be set in sc_used_math */
> >+#define USEDMATH_FP (1 << 0)
> >+#define USEDMATH_MSA (1 << 1)
> >+
>
> How is this going to interact with existing userspace applications?
>
> Is the current behavior to use / manipulate sc_used_math?
>
> How will USEDMATH_MSA interact with existing code?
>
My belief is that since previously sc_used_math was either 0 or 1, any
code using this is likely to simply check for it being non-zero and
continue to work just fine. The only issue would be if code explicitly
checks for sc_used_math==1. Even then it's only an issue if your program
also uses MSA, so there should certainly be no issue with old binaries.
If you can think of or find any uses of sc_used_math which would cause a
problem please do let me know.
Thanks,
Paul
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH 15/15] mips: save/restore MSA context around signals
@ 2014-01-27 20:06 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-01-27 20:06 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Jan 27, 2014 at 11:50:31AM -0800, David Daney wrote:
> ....
> On 01/27/2014 07:23 AM, Paul Burton wrote:
> >This patch extends sigcontext in order to hold the most significant 64
> >bits of each vector register in addition to the MSA control & status
> >register. The least significant 64 bits are already saved as the scalar
> >FP context. This makes things a little awkward since the least & most
> >significant 64 bits of each vector register are not contiguous in
> >memory. Thus the copy_u & insert instructions are used to transfer the
> >values of the most significant 64 bits via GP registers.
> >
>
> Interesting.
>
> This very much touches the userspace ABI of the kernel, so it merits very
> careful consideration.
>
Absolutely :)
>
> >Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> >---
> > arch/mips/include/asm/sigcontext.h | 2 +
> > arch/mips/include/uapi/asm/sigcontext.h | 8 ++
> > arch/mips/kernel/asm-offsets.c | 3 +
> > arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
> > arch/mips/kernel/signal.c | 71 +++++++++--
> > arch/mips/kernel/signal32.c | 71 +++++++++--
> > 6 files changed, 352 insertions(+), 16 deletions(-)
> >
> [...]
> >diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
> >index 6c9906f..681c176 100644
> >--- a/arch/mips/include/uapi/asm/sigcontext.h
> >+++ b/arch/mips/include/uapi/asm/sigcontext.h
> >@@ -12,6 +12,10 @@
> > #include <linux/types.h>
> > #include <asm/sgidefs.h>
> >
> >+/* Bits which may be set in sc_used_math */
> >+#define USEDMATH_FP (1 << 0)
> >+#define USEDMATH_MSA (1 << 1)
> >+
>
> How is this going to interact with existing userspace applications?
>
> Is the current behavior to use / manipulate sc_used_math?
>
> How will USEDMATH_MSA interact with existing code?
>
My belief is that since previously sc_used_math was either 0 or 1, any
code using this is likely to simply check for it being non-zero and
continue to work just fine. The only issue would be if code explicitly
checks for sc_used_math==1. Even then it's only an issue if your program
also uses MSA, so there should certainly be no issue with old binaries.
If you can think of or find any uses of sc_used_math which would cause a
problem please do let me know.
Thanks,
Paul
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 15/15] mips: save/restore MSA context around signals
@ 2014-02-13 11:27 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-02-13 11:27 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton, Ralf Baechle
This patch extends sigcontext in order to hold the most significant 64
bits of each vector register in addition to the MSA control & status
register. The least significant 64 bits are already saved as the scalar
FP context. This makes things a little awkward since the least & most
significant 64 bits of each vector register are not contiguous in
memory. Thus the copy_u & insert instructions are used to transfer the
values of the most significant 64 bits via GP registers.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
---
Changes in v2:
- Conditionalise MSA sigcontext save/restore calls upon cpu_has_msa
in addition to used_math in order to avoid them for kernels without
MSA support. Fixes link errors such as:
arch/mips/built-in.o: In function `restore_sigcontext':
(.text+0x7500): undefined reference to `_restore_msa_context'
make: *** [vmlinux] Error 1
- Include asm/msa.h in signal32.c to avoid build errors such as:
CC arch/mips/kernel/signal32.o
arch/mips/kernel/signal32.c: In function ‘protected_restore_fp_context32’:
arch/mips/kernel/signal32.c:191:5: error: implicit declaration of function ‘enable_msa’ [-Werror=implicit-function-declaration]
arch/mips/kernel/signal32.c:195:5: error: implicit declaration of function ‘disable_msa’ [-Werror=implicit-function-declaration]
arch/mips/kernel/signal32.c: In function ‘setup_sigcontext32’:
arch/mips/kernel/signal32.c:242:2: error: implicit declaration of function ‘thread_msa_context_live’ [-Werror=implicit-function-declaration]
---
arch/mips/include/asm/sigcontext.h | 2 +
arch/mips/include/uapi/asm/sigcontext.h | 8 ++
arch/mips/kernel/asm-offsets.c | 3 +
arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
arch/mips/kernel/signal.c | 73 +++++++++--
arch/mips/kernel/signal32.c | 74 +++++++++--
6 files changed, 357 insertions(+), 16 deletions(-)
diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index eeeb0f4..f54bdbe 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -32,6 +32,8 @@ struct sigcontext32 {
__u32 sc_lo2;
__u32 sc_hi3;
__u32 sc_lo3;
+ __u64 sc_msaregs[32]; /* Most significant 64 bits */
+ __u32 sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
#endif /* _ASM_SIGCONTEXT_H */
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 6c9906f..681c176 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
#include <asm/sgidefs.h>
+/* Bits which may be set in sc_used_math */
+#define USEDMATH_FP (1 << 0)
+#define USEDMATH_MSA (1 << 1)
+
#if _MIPS_SIM == _MIPS_SIM_ABI32
/*
@@ -37,6 +41,8 @@ struct sigcontext {
unsigned long sc_lo2;
unsigned long sc_hi3;
unsigned long sc_lo3;
+ unsigned long long sc_msaregs[32]; /* Most significant 64 bits */
+ unsigned long sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
@@ -70,6 +76,8 @@ struct sigcontext {
__u32 sc_used_math;
__u32 sc_dsp;
__u32 sc_reserved;
+ __u64 sc_msaregs[32];
+ __u32 sc_msa_csr;
};
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index f454d7b..ace6814 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -294,6 +294,7 @@ void output_sc_defines(void)
OFFSET(SC_LO2, sigcontext, sc_lo2);
OFFSET(SC_HI3, sigcontext, sc_hi3);
OFFSET(SC_LO3, sigcontext, sc_lo3);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -308,6 +309,7 @@ void output_sc_defines(void)
OFFSET(SC_MDLO, sigcontext, sc_mdlo);
OFFSET(SC_PC, sigcontext, sc_pc);
OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -319,6 +321,7 @@ void output_sc32_defines(void)
OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs);
OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr);
OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir);
+ OFFSET(SC32_MSAREGS, sigcontext32, sc_msaregs);
BLANK();
}
#endif
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 253b2fb..752b50a 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -13,6 +13,7 @@
* Copyright (C) 1999, 2001 Silicon Graphics, Inc.
*/
#include <asm/asm.h>
+#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
@@ -245,6 +246,218 @@ LEAF(_restore_fp_context32)
END(_restore_fp_context32)
#endif
+#ifdef CONFIG_CPU_HAS_MSA
+
+ .macro save_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ copy_u_d \tmp, \wr, 1
+ EX sd \tmp, (\off+(\wr*8))(\sc)
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+#endif
+ .endm
+
+/*
+ * int _save_msa_context(struct sigcontext *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context)
+ save_sc_msareg 0, SC_MSAREGS, a0, t0
+ save_sc_msareg 1, SC_MSAREGS, a0, t0
+ save_sc_msareg 2, SC_MSAREGS, a0, t0
+ save_sc_msareg 3, SC_MSAREGS, a0, t0
+ save_sc_msareg 4, SC_MSAREGS, a0, t0
+ save_sc_msareg 5, SC_MSAREGS, a0, t0
+ save_sc_msareg 6, SC_MSAREGS, a0, t0
+ save_sc_msareg 7, SC_MSAREGS, a0, t0
+ save_sc_msareg 8, SC_MSAREGS, a0, t0
+ save_sc_msareg 9, SC_MSAREGS, a0, t0
+ save_sc_msareg 10, SC_MSAREGS, a0, t0
+ save_sc_msareg 11, SC_MSAREGS, a0, t0
+ save_sc_msareg 12, SC_MSAREGS, a0, t0
+ save_sc_msareg 13, SC_MSAREGS, a0, t0
+ save_sc_msareg 14, SC_MSAREGS, a0, t0
+ save_sc_msareg 15, SC_MSAREGS, a0, t0
+ save_sc_msareg 16, SC_MSAREGS, a0, t0
+ save_sc_msareg 17, SC_MSAREGS, a0, t0
+ save_sc_msareg 18, SC_MSAREGS, a0, t0
+ save_sc_msareg 19, SC_MSAREGS, a0, t0
+ save_sc_msareg 20, SC_MSAREGS, a0, t0
+ save_sc_msareg 21, SC_MSAREGS, a0, t0
+ save_sc_msareg 22, SC_MSAREGS, a0, t0
+ save_sc_msareg 23, SC_MSAREGS, a0, t0
+ save_sc_msareg 24, SC_MSAREGS, a0, t0
+ save_sc_msareg 25, SC_MSAREGS, a0, t0
+ save_sc_msareg 26, SC_MSAREGS, a0, t0
+ save_sc_msareg 27, SC_MSAREGS, a0, t0
+ save_sc_msareg 28, SC_MSAREGS, a0, t0
+ save_sc_msareg 29, SC_MSAREGS, a0, t0
+ save_sc_msareg 30, SC_MSAREGS, a0, t0
+ save_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _save_msa_context32(struct sigcontext32 *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context32)
+ save_sc_msareg 0, SC32_MSAREGS, a0, t0
+ save_sc_msareg 1, SC32_MSAREGS, a0, t0
+ save_sc_msareg 2, SC32_MSAREGS, a0, t0
+ save_sc_msareg 3, SC32_MSAREGS, a0, t0
+ save_sc_msareg 4, SC32_MSAREGS, a0, t0
+ save_sc_msareg 5, SC32_MSAREGS, a0, t0
+ save_sc_msareg 6, SC32_MSAREGS, a0, t0
+ save_sc_msareg 7, SC32_MSAREGS, a0, t0
+ save_sc_msareg 8, SC32_MSAREGS, a0, t0
+ save_sc_msareg 9, SC32_MSAREGS, a0, t0
+ save_sc_msareg 10, SC32_MSAREGS, a0, t0
+ save_sc_msareg 11, SC32_MSAREGS, a0, t0
+ save_sc_msareg 12, SC32_MSAREGS, a0, t0
+ save_sc_msareg 13, SC32_MSAREGS, a0, t0
+ save_sc_msareg 14, SC32_MSAREGS, a0, t0
+ save_sc_msareg 15, SC32_MSAREGS, a0, t0
+ save_sc_msareg 16, SC32_MSAREGS, a0, t0
+ save_sc_msareg 17, SC32_MSAREGS, a0, t0
+ save_sc_msareg 18, SC32_MSAREGS, a0, t0
+ save_sc_msareg 19, SC32_MSAREGS, a0, t0
+ save_sc_msareg 20, SC32_MSAREGS, a0, t0
+ save_sc_msareg 21, SC32_MSAREGS, a0, t0
+ save_sc_msareg 22, SC32_MSAREGS, a0, t0
+ save_sc_msareg 23, SC32_MSAREGS, a0, t0
+ save_sc_msareg 24, SC32_MSAREGS, a0, t0
+ save_sc_msareg 25, SC32_MSAREGS, a0, t0
+ save_sc_msareg 26, SC32_MSAREGS, a0, t0
+ save_sc_msareg 27, SC32_MSAREGS, a0, t0
+ save_sc_msareg 28, SC32_MSAREGS, a0, t0
+ save_sc_msareg 29, SC32_MSAREGS, a0, t0
+ save_sc_msareg 30, SC32_MSAREGS, a0, t0
+ save_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+ .macro restore_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ EX ld \tmp, (\off+(\wr*8))(\sc)
+ insert_d \wr, 1, \tmp
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 3, \tmp
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 3, \tmp
+#endif
+ .endm
+
+/*
+ * int _restore_msa_context(struct sigcontext *sc)
+ */
+LEAF(_restore_msa_context)
+ restore_sc_msareg 0, SC_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _restore_msa_context32(struct sigcontext32 *sc)
+ */
+LEAF(_restore_msa_context32)
+ restore_sc_msareg 0, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+#endif /* CONFIG_CPU_HAS_MSA */
+
.set reorder
.type fault@function
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 0f97c7d..fd61700 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -30,6 +30,7 @@
#include <linux/bitops.h>
#include <asm/cacheflush.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/sim.h>
#include <asm/ucontext.h>
#include <asm/cpu-features.h>
@@ -46,6 +47,9 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
+extern asmlinkage int _save_msa_context(struct sigcontext __user *sc);
+extern asmlinkage int _restore_msa_context(struct sigcontext __user *sc);
+
struct sigframe {
u32 sf_ass[4]; /* argument save space for o32 */
u32 sf_pad[2]; /* Was: signal trampoline */
@@ -95,19 +99,59 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* Helper routines
*/
-static int protected_save_fp_context(struct sigcontext __user *sc)
+static int protected_save_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err;
+ bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context(sc);
+ if (save_msa && !err)
+ err = _save_msa_context(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext(sc);
+ if (save_msa && !err)
+ err = copy_msa_to_sigcontext(sc);
}
if (likely(!err))
break;
@@ -121,17 +165,28 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
return err;
}
-static int protected_restore_fp_context(struct sigcontext __user *sc)
+static int protected_restore_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
+ bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context(sc);
+ if (restore_msa && !err) {
+ enable_msa();
+ err = _restore_msa_context(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_from_sigcontext(sc);
}
if (likely(!err))
break;
@@ -172,7 +227,8 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -180,7 +236,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context(sc);
+ err |= protected_save_fp_context(sc, used_math);
}
return err;
}
@@ -205,14 +261,14 @@ int fpcsr_pending(unsigned int __user *fpcsr)
}
static int
-check_and_restore_fp_context(struct sigcontext __user *sc)
+check_and_restore_fp_context(struct sigcontext __user *sc, unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context(sc);
+ err |= protected_restore_fp_context(sc, used_math);
return err ?: sig;
}
@@ -252,9 +308,10 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context(sc);
+ err = check_and_restore_fp_context(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index bae2e6e..299f956 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -30,6 +30,7 @@
#include <asm/sim.h>
#include <asm/ucontext.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/war.h>
#include <asm/vdso.h>
#include <asm/dsp.h>
@@ -42,6 +43,9 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _save_msa_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _restore_msa_context32(struct sigcontext32 __user *sc);
+
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -111,19 +115,59 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* sigcontext handlers
*/
-static int protected_save_fp_context32(struct sigcontext32 __user *sc)
+static int protected_save_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err;
+ bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context32(sc);
+ if (save_msa && !err)
+ err = _save_msa_context32(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext32(sc);
+ if (save_msa && !err)
+ err = copy_msa_to_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -137,17 +181,28 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
return err;
}
-static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
+static int protected_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
+ bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context32(sc);
+ if (restore_msa && !err) {
+ enable_msa();
+ err = _restore_msa_context32(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext32(sc);
+ if (restore_msa && !err)
+ err = copy_msa_from_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -186,7 +241,8 @@ static int setup_sigcontext32(struct pt_regs *regs,
err |= __put_user(mflo3(), &sc->sc_lo3);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -194,20 +250,21 @@ static int setup_sigcontext32(struct pt_regs *regs,
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context32(sc);
+ err |= protected_save_fp_context32(sc, used_math);
}
return err;
}
static int
-check_and_restore_fp_context32(struct sigcontext32 __user *sc)
+check_and_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context32(sc);
+ err |= protected_restore_fp_context32(sc, used_math);
return err ?: sig;
}
@@ -244,9 +301,10 @@ static int restore_sigcontext32(struct pt_regs *regs,
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context32(sc);
+ err = check_and_restore_fp_context32(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread* [PATCH v2 15/15] mips: save/restore MSA context around signals
@ 2014-02-13 11:27 ` Paul Burton
0 siblings, 0 replies; 52+ messages in thread
From: Paul Burton @ 2014-02-13 11:27 UTC (permalink / raw)
To: linux-mips; +Cc: Paul Burton, Ralf Baechle
This patch extends sigcontext in order to hold the most significant 64
bits of each vector register in addition to the MSA control & status
register. The least significant 64 bits are already saved as the scalar
FP context. This makes things a little awkward since the least & most
significant 64 bits of each vector register are not contiguous in
memory. Thus the copy_u & insert instructions are used to transfer the
values of the most significant 64 bits via GP registers.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
---
Changes in v2:
- Conditionalise MSA sigcontext save/restore calls upon cpu_has_msa
in addition to used_math in order to avoid them for kernels without
MSA support. Fixes link errors such as:
arch/mips/built-in.o: In function `restore_sigcontext':
(.text+0x7500): undefined reference to `_restore_msa_context'
make: *** [vmlinux] Error 1
- Include asm/msa.h in signal32.c to avoid build errors such as:
CC arch/mips/kernel/signal32.o
arch/mips/kernel/signal32.c: In function ‘protected_restore_fp_context32’:
arch/mips/kernel/signal32.c:191:5: error: implicit declaration of function ‘enable_msa’ [-Werror=implicit-function-declaration]
arch/mips/kernel/signal32.c:195:5: error: implicit declaration of function ‘disable_msa’ [-Werror=implicit-function-declaration]
arch/mips/kernel/signal32.c: In function ‘setup_sigcontext32’:
arch/mips/kernel/signal32.c:242:2: error: implicit declaration of function ‘thread_msa_context_live’ [-Werror=implicit-function-declaration]
---
arch/mips/include/asm/sigcontext.h | 2 +
arch/mips/include/uapi/asm/sigcontext.h | 8 ++
arch/mips/kernel/asm-offsets.c | 3 +
arch/mips/kernel/r4k_fpu.S | 213 ++++++++++++++++++++++++++++++++
arch/mips/kernel/signal.c | 73 +++++++++--
arch/mips/kernel/signal32.c | 74 +++++++++--
6 files changed, 357 insertions(+), 16 deletions(-)
diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index eeeb0f4..f54bdbe 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -32,6 +32,8 @@ struct sigcontext32 {
__u32 sc_lo2;
__u32 sc_hi3;
__u32 sc_lo3;
+ __u64 sc_msaregs[32]; /* Most significant 64 bits */
+ __u32 sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
#endif /* _ASM_SIGCONTEXT_H */
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 6c9906f..681c176 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
#include <asm/sgidefs.h>
+/* Bits which may be set in sc_used_math */
+#define USEDMATH_FP (1 << 0)
+#define USEDMATH_MSA (1 << 1)
+
#if _MIPS_SIM == _MIPS_SIM_ABI32
/*
@@ -37,6 +41,8 @@ struct sigcontext {
unsigned long sc_lo2;
unsigned long sc_hi3;
unsigned long sc_lo3;
+ unsigned long long sc_msaregs[32]; /* Most significant 64 bits */
+ unsigned long sc_msa_csr;
};
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
@@ -70,6 +76,8 @@ struct sigcontext {
__u32 sc_used_math;
__u32 sc_dsp;
__u32 sc_reserved;
+ __u64 sc_msaregs[32];
+ __u32 sc_msa_csr;
};
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index f454d7b..ace6814 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -294,6 +294,7 @@ void output_sc_defines(void)
OFFSET(SC_LO2, sigcontext, sc_lo2);
OFFSET(SC_HI3, sigcontext, sc_hi3);
OFFSET(SC_LO3, sigcontext, sc_lo3);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -308,6 +309,7 @@ void output_sc_defines(void)
OFFSET(SC_MDLO, sigcontext, sc_mdlo);
OFFSET(SC_PC, sigcontext, sc_pc);
OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
+ OFFSET(SC_MSAREGS, sigcontext, sc_msaregs);
BLANK();
}
#endif
@@ -319,6 +321,7 @@ void output_sc32_defines(void)
OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs);
OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr);
OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir);
+ OFFSET(SC32_MSAREGS, sigcontext32, sc_msaregs);
BLANK();
}
#endif
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 253b2fb..752b50a 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -13,6 +13,7 @@
* Copyright (C) 1999, 2001 Silicon Graphics, Inc.
*/
#include <asm/asm.h>
+#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
@@ -245,6 +246,218 @@ LEAF(_restore_fp_context32)
END(_restore_fp_context32)
#endif
+#ifdef CONFIG_CPU_HAS_MSA
+
+ .macro save_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ copy_u_d \tmp, \wr, 1
+ EX sd \tmp, (\off+(\wr*8))(\sc)
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ copy_u_w \tmp, \wr, 2
+ EX sw \tmp, (\off+(\wr*8)+4)(\sc)
+ copy_u_w \tmp, \wr, 3
+ EX sw \tmp, (\off+(\wr*8)+0)(\sc)
+#endif
+ .endm
+
+/*
+ * int _save_msa_context(struct sigcontext *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context)
+ save_sc_msareg 0, SC_MSAREGS, a0, t0
+ save_sc_msareg 1, SC_MSAREGS, a0, t0
+ save_sc_msareg 2, SC_MSAREGS, a0, t0
+ save_sc_msareg 3, SC_MSAREGS, a0, t0
+ save_sc_msareg 4, SC_MSAREGS, a0, t0
+ save_sc_msareg 5, SC_MSAREGS, a0, t0
+ save_sc_msareg 6, SC_MSAREGS, a0, t0
+ save_sc_msareg 7, SC_MSAREGS, a0, t0
+ save_sc_msareg 8, SC_MSAREGS, a0, t0
+ save_sc_msareg 9, SC_MSAREGS, a0, t0
+ save_sc_msareg 10, SC_MSAREGS, a0, t0
+ save_sc_msareg 11, SC_MSAREGS, a0, t0
+ save_sc_msareg 12, SC_MSAREGS, a0, t0
+ save_sc_msareg 13, SC_MSAREGS, a0, t0
+ save_sc_msareg 14, SC_MSAREGS, a0, t0
+ save_sc_msareg 15, SC_MSAREGS, a0, t0
+ save_sc_msareg 16, SC_MSAREGS, a0, t0
+ save_sc_msareg 17, SC_MSAREGS, a0, t0
+ save_sc_msareg 18, SC_MSAREGS, a0, t0
+ save_sc_msareg 19, SC_MSAREGS, a0, t0
+ save_sc_msareg 20, SC_MSAREGS, a0, t0
+ save_sc_msareg 21, SC_MSAREGS, a0, t0
+ save_sc_msareg 22, SC_MSAREGS, a0, t0
+ save_sc_msareg 23, SC_MSAREGS, a0, t0
+ save_sc_msareg 24, SC_MSAREGS, a0, t0
+ save_sc_msareg 25, SC_MSAREGS, a0, t0
+ save_sc_msareg 26, SC_MSAREGS, a0, t0
+ save_sc_msareg 27, SC_MSAREGS, a0, t0
+ save_sc_msareg 28, SC_MSAREGS, a0, t0
+ save_sc_msareg 29, SC_MSAREGS, a0, t0
+ save_sc_msareg 30, SC_MSAREGS, a0, t0
+ save_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _save_msa_context32(struct sigcontext32 *sc)
+ *
+ * Save the upper 64 bits of each vector register along with the MSA_CSR
+ * register into sc. Returns zero on success, else non-zero.
+ */
+LEAF(_save_msa_context32)
+ save_sc_msareg 0, SC32_MSAREGS, a0, t0
+ save_sc_msareg 1, SC32_MSAREGS, a0, t0
+ save_sc_msareg 2, SC32_MSAREGS, a0, t0
+ save_sc_msareg 3, SC32_MSAREGS, a0, t0
+ save_sc_msareg 4, SC32_MSAREGS, a0, t0
+ save_sc_msareg 5, SC32_MSAREGS, a0, t0
+ save_sc_msareg 6, SC32_MSAREGS, a0, t0
+ save_sc_msareg 7, SC32_MSAREGS, a0, t0
+ save_sc_msareg 8, SC32_MSAREGS, a0, t0
+ save_sc_msareg 9, SC32_MSAREGS, a0, t0
+ save_sc_msareg 10, SC32_MSAREGS, a0, t0
+ save_sc_msareg 11, SC32_MSAREGS, a0, t0
+ save_sc_msareg 12, SC32_MSAREGS, a0, t0
+ save_sc_msareg 13, SC32_MSAREGS, a0, t0
+ save_sc_msareg 14, SC32_MSAREGS, a0, t0
+ save_sc_msareg 15, SC32_MSAREGS, a0, t0
+ save_sc_msareg 16, SC32_MSAREGS, a0, t0
+ save_sc_msareg 17, SC32_MSAREGS, a0, t0
+ save_sc_msareg 18, SC32_MSAREGS, a0, t0
+ save_sc_msareg 19, SC32_MSAREGS, a0, t0
+ save_sc_msareg 20, SC32_MSAREGS, a0, t0
+ save_sc_msareg 21, SC32_MSAREGS, a0, t0
+ save_sc_msareg 22, SC32_MSAREGS, a0, t0
+ save_sc_msareg 23, SC32_MSAREGS, a0, t0
+ save_sc_msareg 24, SC32_MSAREGS, a0, t0
+ save_sc_msareg 25, SC32_MSAREGS, a0, t0
+ save_sc_msareg 26, SC32_MSAREGS, a0, t0
+ save_sc_msareg 27, SC32_MSAREGS, a0, t0
+ save_sc_msareg 28, SC32_MSAREGS, a0, t0
+ save_sc_msareg 29, SC32_MSAREGS, a0, t0
+ save_sc_msareg 30, SC32_MSAREGS, a0, t0
+ save_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_save_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+ .macro restore_sc_msareg wr, off, sc, tmp
+#ifdef CONFIG_64BIT
+ EX ld \tmp, (\off+(\wr*8))(\sc)
+ insert_d \wr, 1, \tmp
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 3, \tmp
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ EX lw \tmp, (\off+(\wr*8)+4)(\sc)
+ insert_w \wr, 2, \tmp
+ EX lw \tmp, (\off+(\wr*8)+0)(\sc)
+ insert_w \wr, 3, \tmp
+#endif
+ .endm
+
+/*
+ * int _restore_msa_context(struct sigcontext *sc)
+ */
+LEAF(_restore_msa_context)
+ restore_sc_msareg 0, SC_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context)
+
+#ifdef CONFIG_MIPS32_COMPAT
+
+/*
+ * int _restore_msa_context32(struct sigcontext32 *sc)
+ */
+LEAF(_restore_msa_context32)
+ restore_sc_msareg 0, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 1, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 2, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 3, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 4, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 5, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 6, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 7, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 8, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 9, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 10, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 11, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 12, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 13, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 14, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 15, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 16, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 17, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 18, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 19, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 20, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 21, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 22, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 23, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 24, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 25, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 26, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 27, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 28, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 29, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 30, SC32_MSAREGS, a0, t0
+ restore_sc_msareg 31, SC32_MSAREGS, a0, t0
+ jr ra
+ li v0, 0
+ END(_restore_msa_context32)
+
+#endif /* CONFIG_MIPS32_COMPAT */
+
+#endif /* CONFIG_CPU_HAS_MSA */
+
.set reorder
.type fault@function
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 0f97c7d..fd61700 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -30,6 +30,7 @@
#include <linux/bitops.h>
#include <asm/cacheflush.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/sim.h>
#include <asm/ucontext.h>
#include <asm/cpu-features.h>
@@ -46,6 +47,9 @@ static int (*restore_fp_context)(struct sigcontext __user *sc);
extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
+extern asmlinkage int _save_msa_context(struct sigcontext __user *sc);
+extern asmlinkage int _restore_msa_context(struct sigcontext __user *sc);
+
struct sigframe {
u32 sf_ass[4]; /* argument save space for o32 */
u32 sf_pad[2]; /* Was: signal trampoline */
@@ -95,19 +99,59 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext(struct sigcontext __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* Helper routines
*/
-static int protected_save_fp_context(struct sigcontext __user *sc)
+static int protected_save_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err;
+ bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context(sc);
+ if (save_msa && !err)
+ err = _save_msa_context(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext(sc);
+ if (save_msa && !err)
+ err = copy_msa_to_sigcontext(sc);
}
if (likely(!err))
break;
@@ -121,17 +165,28 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
return err;
}
-static int protected_restore_fp_context(struct sigcontext __user *sc)
+static int protected_restore_fp_context(struct sigcontext __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
+ bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context(sc);
+ if (restore_msa && !err) {
+ enable_msa();
+ err = _restore_msa_context(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext(sc);
+ if (!err && (used_math & USEDMATH_MSA))
+ err = copy_msa_from_sigcontext(sc);
}
if (likely(!err))
break;
@@ -172,7 +227,8 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -180,7 +236,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context(sc);
+ err |= protected_save_fp_context(sc, used_math);
}
return err;
}
@@ -205,14 +261,14 @@ int fpcsr_pending(unsigned int __user *fpcsr)
}
static int
-check_and_restore_fp_context(struct sigcontext __user *sc)
+check_and_restore_fp_context(struct sigcontext __user *sc, unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context(sc);
+ err |= protected_restore_fp_context(sc, used_math);
return err ?: sig;
}
@@ -252,9 +308,10 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context(sc);
+ err = check_and_restore_fp_context(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index bae2e6e..299f956 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -30,6 +30,7 @@
#include <asm/sim.h>
#include <asm/ucontext.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/war.h>
#include <asm/vdso.h>
#include <asm/dsp.h>
@@ -42,6 +43,9 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc);
extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _save_msa_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _restore_msa_context32(struct sigcontext32 __user *sc);
+
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -111,19 +115,59 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc)
}
/*
+ * These functions will save only the upper 64 bits of the vector registers,
+ * since the lower 64 bits have already been saved as the scalar FP context.
+ */
+static int copy_msa_to_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |=
+ __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1),
+ &sc->sc_msaregs[i]);
+ }
+ err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+static int copy_msa_from_sigcontext32(struct sigcontext32 __user *sc)
+{
+ int i;
+ int err = 0;
+ u64 val;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(val, &sc->sc_msaregs[i]);
+ set_fpr64(¤t->thread.fpu.fpr[i], 1, val);
+ }
+ err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr);
+
+ return err;
+}
+
+/*
* sigcontext handlers
*/
-static int protected_save_fp_context32(struct sigcontext32 __user *sc)
+static int protected_save_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err;
+ bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = save_fp_context32(sc);
+ if (save_msa && !err)
+ err = _save_msa_context32(sc);
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_to_sigcontext32(sc);
+ if (save_msa && !err)
+ err = copy_msa_to_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -137,17 +181,28 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
return err;
}
-static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
+static int protected_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, tmp __maybe_unused;
+ bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA);
while (1) {
lock_fpu_owner();
if (is_fpu_owner()) {
err = restore_fp_context32(sc);
+ if (restore_msa && !err) {
+ enable_msa();
+ err = _restore_msa_context32(sc);
+ } else {
+ /* signal handler may have used MSA */
+ disable_msa();
+ }
unlock_fpu_owner();
} else {
unlock_fpu_owner();
err = copy_fp_from_sigcontext32(sc);
+ if (restore_msa && !err)
+ err = copy_msa_from_sigcontext32(sc);
}
if (likely(!err))
break;
@@ -186,7 +241,8 @@ static int setup_sigcontext32(struct pt_regs *regs,
err |= __put_user(mflo3(), &sc->sc_lo3);
}
- used_math = !!used_math();
+ used_math = used_math() ? USEDMATH_FP : 0;
+ used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0;
err |= __put_user(used_math, &sc->sc_used_math);
if (used_math) {
@@ -194,20 +250,21 @@ static int setup_sigcontext32(struct pt_regs *regs,
* Save FPU state to signal context. Signal handler
* will "inherit" current FPU state.
*/
- err |= protected_save_fp_context32(sc);
+ err |= protected_save_fp_context32(sc, used_math);
}
return err;
}
static int
-check_and_restore_fp_context32(struct sigcontext32 __user *sc)
+check_and_restore_fp_context32(struct sigcontext32 __user *sc,
+ unsigned used_math)
{
int err, sig;
err = sig = fpcsr_pending(&sc->sc_fpc_csr);
if (err > 0)
err = 0;
- err |= protected_restore_fp_context32(sc);
+ err |= protected_restore_fp_context32(sc, used_math);
return err ?: sig;
}
@@ -244,9 +301,10 @@ static int restore_sigcontext32(struct pt_regs *regs,
if (used_math) {
/* restore fpu context if we have used it before */
if (!err)
- err = check_and_restore_fp_context32(sc);
+ err = check_and_restore_fp_context32(sc, used_math);
} else {
- /* signal handler may have used FPU. Give it up. */
+ /* signal handler may have used FPU or MSA. Disable them. */
+ disable_msa();
lose_fpu(0);
}
--
1.8.5.3
^ permalink raw reply related [flat|nested] 52+ messages in thread