[patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions

linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed

* [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
@ 2007-01-12  5:31 ebony.zhu
  2007-01-12  6:40 ` Kumar Gala
  2007-01-12  9:52 ` Christoph Hellwig
  0 siblings, 2 replies; 20+ messages in thread
From: ebony.zhu @ 2007-01-12  5:31 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev

Add the general support for Embedded Floating-Point instructions
to fully comply with IEEE-754.

Signed-off-by:Ebony Zhu <ebony.zhu@freescale.com>
---
 arch/powerpc/Makefile                |    5 +
 arch/powerpc/kernel/entry_32.S       |   50 ++++++
 arch/powerpc/kernel/head_booke.h     |    4 
 arch/powerpc/kernel/head_fsl_booke.S |   22 ++-
 arch/powerpc/kernel/traps.c          |   17 ++
 arch/powerpc/math-emu/Makefile       |   29 ++-
 arch/powerpc/math-emu/sfp-machine.h  |    4 
 arch/powerpc/sysdev/sigfpe_handler.c |  298 ++++++++++++++++++++++++++++++++++
 arch/powerpc/sysdev/Makefile         |    1 
 9 files changed, 421 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index a00fe72..dd0b4b8 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -134,6 +134,11 @@ core-y				+= arch/powerpc/kernel/ \
 				   arch/powerpc/lib/ \
 				   arch/powerpc/sysdev/ \
 				   arch/powerpc/platforms/
+ifeq ($(CONFIG_SPE),y)
+ifneq ($(CONFIG_MATH_EMULATION),y)
+core-y				+= arch/powerpc/math-emu/
+endif
+endif
 core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/
 core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index c03e829..a3d4ece 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -616,6 +616,56 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
 	.long	ret_from_except
 #endif
 
+#ifdef CONFIG_SPE
+	.globl  ret_from_except_spe_full
+ret_from_except_spe_full:
+	REST_NVGPRS(r1)
+	/* fall through */
+	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+	SYNC			/* Some chip revs have problems here... */
+	MTMSRD(r10)		/* disable interrupts */
+
+	lwz     r0,THREAD+THREAD_SPEFSCR(r2)
+	mtspr   SPRN_SPEFSCR,r0		/* restore SPEFSCR reg */
+
+	lwz     r0,GPR0(r1)
+	lwz     r2,GPR2(r1)
+	REST_4GPRS(3, r1)
+	REST_2GPRS(7, r1)
+	
+	lwz     r10,_XER(r1)
+	lwz     r11,_CTR(r1)
+	mtspr   SPRN_XER,r10
+	mtctr   r11
+
+	stwcx.  r0,0,r1                 /* to clear the reservation */
+
+	lwz     r11,_LINK(r1)
+	mtlr    r11
+	lwz     r10,_CCR(r1)
+	mtcrf   0xff,r10
+	REST_2GPRS(9, r1)
+
+	mtspr   SPRN_SPRG0,r11
+	mtspr   SPRN_SPRG1,r12
+	mfmsr   r11
+	oris  r11, r11, MSR_SPE@h
+	mtmsr   r11
+	mfspr   r12,SPRN_SPRG3
+	REST_32EVRS(0, r11,r12)
+	mfspr   r11,SPRN_SPRG0
+	mfspr   r12,SPRN_SPRG1
+
+	lwz     r11,_NIP(r1)
+	lwz     r12,_MSR(r1)
+	mtspr   SPRN_SRR0,r11
+	mtspr   SPRN_SRR1,r12
+	REST_2GPRS(11, r1)
+	lwz     r1,GPR1(r1)
+
+	rfi
+	b	.		/* prevent prefetch past rfi */
+#endif
 	.globl	ret_from_except_full
 ret_from_except_full:
 	REST_NVGPRS(r1)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 8536e76..1e14d3e 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -195,6 +195,10 @@ #define EXC_XFER_EE(n, hdlr)		\
 	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
 			  ret_from_except_full)
 
+#define EXC_XFER_EE_SPE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_spe_full)
+
 #define EXC_XFER_EE_LITE(n, hdlr)	\
 	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
 			  ret_from_except)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 66877bd..56200b6 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -699,7 +699,25 @@ #endif /* CONFIG_SPE */
 
 	/* SPE Floating Point Data */
 #ifdef CONFIG_SPE
-	EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+	START_EXCEPTION(SPEFloatingPointData)
+	mtspr   SPRN_SPRG0,r3
+	mtspr   SPRN_SPRG1,r4
+	mfmsr   r3
+	oris    r3, r3, MSR_SPE@h
+	mtmsr   r3
+	mfspr   r3, SPRN_SRR0
+	lwz     r3, 0(r3)
+	lis     r4, speinsn@ha
+	stw     r3, speinsn@l(r4)
+	mfspr   r4, SPRN_SPRG3
+	mfspr   r3, SPRN_SPEFSCR
+	stw     r3, THREAD_SPEFSCR(r4)
+	SAVE_32EVRS(0, r3, r4)
+	mfspr   r3, SPRN_SPRG0
+	mfspr   r4, SPRN_SPRG1
+	NORMAL_EXCEPTION_PROLOG
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE_SPE(0x2030, SPEFloatingPointException)
 #else
 	EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
 #endif /* CONFIG_SPE */
@@ -840,6 +858,8 @@ load_up_spe:
 	oris	r5,r5,MSR_SPE@h
 	mtmsr	r5			/* enable use of SPE now */
 	isync
+	li      r5,(SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE)
+	mtspr   SPRN_SPEFSCR,r5
 /*
  * For SMP, we don't do lazy SPE switching because it just gets too
  * horrendously complex, especially when a task switches from one CPU
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 535f506..68407d4 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -986,6 +986,22 @@ #endif /* CONFIG_FSL_BOOKE */
 #ifdef CONFIG_SPE
 void SPEFloatingPointException(struct pt_regs *regs)
 {
+#ifdef CONFIG_E500
+	extern int sigfpe_handler(struct pt_regs *regs);
+	int err;
+	if (current->thread.spefscr & ~0x3f) {
+		err = sigfpe_handler(regs);
+		if (err == 0) {
+			regs->nip += 4;
+			return;
+		} else {
+			current->thread.spefscr = 0x0;
+			return;
+		}
+	} else {
+		return;
+	}
+#else
 	unsigned long spefscr;
 	int fpexc_mode;
 	int code = 0;
@@ -1016,6 +1032,7 @@ void SPEFloatingPointException(struct pt
 
 	_exception(SIGFPE, regs, code, regs->nip);
 	return;
+#endif
 }
 #endif
 
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 29bc912..dbb3e26 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -1,16 +1,29 @@
 
-obj-y				:= math.o fmr.o lfd.o stfd.o
-
-obj-$(CONFIG_MATH_EMULATION)	+= fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \
-					fctiw.o fctiwz.o fdiv.o fdivs.o \
+obj-y				:= fabs.o fadd.o fdiv.o fmul.o \
+					fneg.o fsub.o types.o udivmodti4.o
+					
+obj-$(CONFIG_MATH_EMULATION)	+= math.o fmr.o lfd.o stfd.o \
+					fadds.o fcmpo.o fcmpu.o \
+					fctiw.o fctiwz.o fdivs.o \
 					fmadd.o fmadds.o fmsub.o fmsubs.o \
-					fmul.o fmuls.o fnabs.o fneg.o types.o \
+					fmuls.o fnabs.o \
 					fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \
 					fres.o frsp.o frsqrte.o fsel.o lfs.o \
-					fsqrt.o	fsqrts.o fsub.o fsubs.o \
+					fsqrt.o	fsqrts.o fsubs.o \
 					mcrfs.o mffs.o mtfsb0.o mtfsb1.o \
-					mtfsf.o mtfsfi.o stfiwx.o stfs.o \
-					udivmodti4.o
+					mtfsf.o mtfsfi.o stfiwx.o stfs.o
+
+obj-$(CONFIG_SPE)		+= efsabs.o efsadd.o efscfd.o efscmpeq.o \
+					efscmpgt.o efscmplt.o efsctsf.o efsctsi.o \
+					efsctsiz.o efsctuf.o efsctui.o efsctuiz.o \
+					efsdiv.o efsmul.o efsnabs.o efsneg.o efssub.o \
+					evfsabs.o evfsadd.o evfscmpeq.o evfscmpgt.o \
+					evfscmplt.o evfsctsf.o evfsctsi.o evfsctsiz.o \
+					evfsctuf.o evfsctui.o evfsctuiz.o evfsdiv.o \
+					evfsmul.o evfsnabs.o evfsneg.o evfssub.o \
+					efdcfs.o efdcmpeq.o efdcmpgt.o efdcmplt.o efdctsf.o \
+					efdctsi.o efdctsidz.o efdctsiz.o efdctuf.o \
+					efdctui.o efdctuidz.o efdctuiz.o efdnabs.o
 
 CFLAGS_fabs.o = -fno-builtin-fabs
 CFLAGS_math.o = -fno-builtin-fabs
diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/math-emu/sfp-machine.h
index 4b17d83..313734d 100644
--- a/arch/powerpc/math-emu/sfp-machine.h
+++ b/arch/powerpc/math-emu/sfp-machine.h
@@ -166,7 +166,11 @@ #define __FP_PACK_RAW_2(fs, val, X)			\
 #include <linux/kernel.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_SPE
+#define __FPU_FPSCR	(current->thread.spefscr)
+#else
 #define __FPU_FPSCR	(current->thread.fpscr.val)
+#endif
 
 /* We only actually write to the destination register
  * if exceptions signalled (if any) will not trap.
diff --git a/arch/powerpc/sysdev/sigfpe_handler.c b/arch/powerpc/sysdev/sigfpe_handler.c
new file mode 100644
index 0000000..6e809b2
--- /dev/null
+++ b/arch/powerpc/sysdev/sigfpe_handler.c
@@ -0,0 +1,298 @@
+/*
+ * arch/powerpc/sysdev/sigfpe_handler.c
+ *
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Ebony Zhu, ebony.zhu@freescale.com
+ * 
+ * Derived from arch/powerpc/math-emu/math.c
+ * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
+ *
+ * Description:
+ * This file is the exception handler to make E500 SPE instructions
+ * fully comply with IEEE-754 floating point standard.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+
+#include <asm/uaccess.h>
+#include <asm/reg.h>
+
+#define SPEFUNC(x)	extern int x(void *, void *, void *, void *)
+#define efdabs	fabs
+#define efdadd	fadd
+#define efdsub	fsub
+#define efddiv	fdiv
+#define efdmul	fmul
+#define efdneg	fneg
+
+/* Scalar SPFP functions */
+SPEFUNC(efsabs);
+SPEFUNC(efsadd);
+SPEFUNC(efscfd);
+SPEFUNC(efscmpeq);
+SPEFUNC(efscmpgt);
+SPEFUNC(efscmplt);
+SPEFUNC(efsctsf);
+SPEFUNC(efsctsi);
+SPEFUNC(efsctsiz);
+SPEFUNC(efsctuf);
+SPEFUNC(efsctui);
+SPEFUNC(efsctuiz);
+SPEFUNC(efsdiv);
+SPEFUNC(efsmul);
+SPEFUNC(efsnabs);
+SPEFUNC(efsneg);
+SPEFUNC(efssub);
+
+/* Vector Floating-Point functions */
+SPEFUNC(evfsabs);
+SPEFUNC(evfsadd);
+SPEFUNC(evfscmpeq);
+SPEFUNC(evfscmpgt);
+SPEFUNC(evfscmplt);
+SPEFUNC(evfsctsf);
+SPEFUNC(evfsctsi);
+SPEFUNC(evfsctsiz);
+SPEFUNC(evfsctuf);
+SPEFUNC(evfsctui);
+SPEFUNC(evfsctuiz);
+SPEFUNC(evfsdiv);
+SPEFUNC(evfsmul);
+SPEFUNC(evfsnabs);
+SPEFUNC(evfsneg);
+SPEFUNC(evfssub);
+
+/* Scalar DPFP functions */
+SPEFUNC(efdabs);
+SPEFUNC(efdadd);
+SPEFUNC(efdcfs);
+SPEFUNC(efdcmpeq);
+SPEFUNC(efdcmpgt);
+SPEFUNC(efdcmplt);
+SPEFUNC(efdctsf);
+SPEFUNC(efdctsi);
+SPEFUNC(efdctsidz);
+SPEFUNC(efdctsiz);
+SPEFUNC(efdctuf);
+SPEFUNC(efdctui);
+SPEFUNC(efdctuidz);
+SPEFUNC(efdctuiz);
+SPEFUNC(efddiv);
+SPEFUNC(efdmul);
+SPEFUNC(efdnabs);
+SPEFUNC(efdneg);
+SPEFUNC(efdsub);
+
+#define VCT		0x4
+#define SPFP		0x6
+#define DPFP		0x7
+#define EFAPU		0x4
+
+#define EFSADD		0x2c0
+#define EFSSUB		0x2c1
+#define EFSABS		0x2c4
+#define EFSNABS		0x2c5
+#define EFSNEG		0x2c6
+#define EFSMUL		0x2c8
+#define EFSDIV		0x2c9
+#define EFSCMPGT	0x2cc
+#define EFSCMPLT	0x2cd
+#define EFSCMPEQ	0x2ce
+#define EFSCFD		0x2cf
+#define EFSCTUI		0x2d4
+#define EFSCTSI		0x2d5
+#define EFSCTUF		0x2d6
+#define EFSCTSF		0x2d7
+#define EFSCTUIZ	0x2d8
+#define EFSCTSIZ	0x2da
+
+#define EVFSADD		0x280
+#define EVFSSUB		0x281
+#define EVFSABS		0x284
+#define EVFSNABS	0x285
+#define EVFSNEG		0x286
+#define EVFSMUL		0x288
+#define EVFSDIV		0x289
+#define EVFSCMPGT	0x28c
+#define EVFSCMPLT	0x28d
+#define EVFSCMPEQ	0x28e
+#define EVFSCTUI	0x294
+#define EVFSCTSI	0x295
+#define EVFSCTUF	0x296
+#define EVFSCTSF	0x297
+#define EVFSCTUIZ	0x298
+#define EVFSCTSIZ	0x29a
+
+#define EFDADD		0x2e0
+#define EFDSUB		0x2e1
+#define EFDABS		0x2e4
+#define EFDNABS		0x2e5
+#define EFDNEG		0x2e6
+#define EFDMUL		0x2e8
+#define EFDDIV		0x2e9
+#define EFDCTUIDZ	0x2ea
+#define EFDCTSIDZ	0x2eb
+#define EFDCMPGT	0x2ec
+#define EFDCMPLT	0x2ed
+#define EFDCMPEQ	0x2ee
+#define EFDCFS		0x2ef
+#define EFDCTUI		0x2f4
+#define EFDCTSI		0x2f5
+#define EFDCTUF		0x2f6
+#define EFDCTSF		0x2f7
+#define EFDCTUIZ	0x2f8
+#define EFDCTSIZ	0x2fa
+
+#define AB	2
+#define XA	3
+#define XB	4
+#define XCR	5	
+
+static u64 fullgprs[32];
+u32 speinsn;
+
+int
+sigfpe_handler(struct pt_regs *regs)
+{
+	void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
+	int i;
+	int (*func)(void *, void *, void *, void *);
+	int type = 0;
+	int flag;
+	
+	switch ((speinsn >> 5) & 0x7 ) {
+	case SPFP:
+		for(i = 0; i < 32; i++) {
+			fullgprs[i] = regs->gpr[i];
+			fullgprs[i] = fullgprs[i] << 32 | current->thread.evr[i]; 
+		};
+		break;
+	default:
+		for(i = 0; i < 32; i++) {
+			fullgprs[i] = current->thread.evr[i];
+			fullgprs[i] = (fullgprs[i] << 32) | (regs->gpr[i]); 
+		};
+	}
+
+	switch (speinsn >> 26) {
+	
+	case EFAPU:
+		switch (speinsn & 0x7ff) {
+		case EFSABS:	func = efsabs;		type = XA;	break;
+		case EFSADD:	func = efsadd;		type = AB;      break;
+		case EFSCFD:	func = efscfd;		type = XB;	break;
+		case EFSCMPEQ:	func = efscmpeq;	type = XCR;	break;
+		case EFSCMPGT:	func = efscmpgt;	type = XCR;	break;
+		case EFSCMPLT:	func = efscmplt;	type = XCR;	break;
+		case EFSCTSF:	func = efsctsf;		type = XB;	break;
+		case EFSCTSI:	func = efsctsi;		type = XB;	break;
+		case EFSCTSIZ:	func = efsctsiz;	type = XB;	break;
+		case EFSCTUF:	func = efsctuf;		type = XB;	break;
+		case EFSCTUI:	func = efsctui;		type = XB;	break;
+		case EFSCTUIZ:	func = efsctuiz;	type = XB;	break;
+		case EFSDIV:	func = efsdiv;		type = AB;	break;
+		case EFSMUL:	func = efsmul;		type = AB;	break;
+		case EFSNABS:	func = efsnabs;		type = XA;	break;
+		case EFSNEG:	func = efsneg;		type = XA;	break;
+		case EFSSUB:	func = efssub;		type = AB;	break;
+
+		case EVFSABS:	func = evfsabs;		type = XA;	break;
+		case EVFSADD:	func = evfsadd;		type = AB;      break;
+		case EVFSCMPEQ:	func = evfscmpeq;	type = XCR;	break;
+		case EVFSCMPGT:	func = evfscmpgt;	type = XCR;	break;
+		case EVFSCMPLT:	func = evfscmplt;	type = XCR;	break;
+		case EVFSCTSF:	func = evfsctsf;	type = XB;	break;
+		case EVFSCTSI:	func = evfsctsi;	type = XB;	break;
+		case EVFSCTSIZ:	func = evfsctsiz;	type = XB;	break;
+		case EVFSCTUF:	func = evfsctuf;	type = XB;	break;
+		case EVFSCTUI:	func = evfsctui;	type = XB;	break;
+		case EVFSCTUIZ:	func = evfsctuiz;	type = XB;	break;
+		case EVFSDIV:	func = evfsdiv;		type = AB;	break;
+		case EVFSMUL:	func = evfsmul;		type = AB;	break;
+		case EVFSNABS:	func = evfsnabs;	type = XA;	break;
+		case EVFSNEG:	func = evfsneg;		type = XA;	break;
+		case EVFSSUB:	func = evfssub;		type = AB;	break;
+
+		case EFDABS:	func = efdabs;		type = XA;	break;
+		case EFDADD:	func = efdadd;		type = AB;	break;
+		case EFDCFS:	func = efdcfs;		type = XB;	break;
+		case EFDCMPEQ:	func = efdcmpeq;	type = XCR;	break;
+		case EFDCMPGT:	func = efdcmpgt;	type = XCR;	break;
+		case EFDCMPLT:	func = efdcmplt;	type = XCR;	break;
+		case EFDCTSF:	func = efdctsf;		type = XB;	break;
+		case EFDCTSI:	func = efdctsi;		type = XB;	break;
+		case EFDCTSIDZ:	func = efdctsidz;	type = XB;	break;
+		case EFDCTSIZ:	func = efdctsiz;	type = XB;	break;
+		case EFDCTUF:	func = efdctuf;		type = XB;	break;
+		case EFDCTUI:	func = efdctui;		type = XB;	break;
+		case EFDCTUIDZ:	func = efdctuidz;	type = XB;	break;
+		case EFDCTUIZ:	func = efdctuiz;	type = XB;	break;
+		case EFDDIV:	func = efddiv;		type = AB;	break;
+		case EFDMUL:	func = efdmul;		type = AB;	break;
+		case EFDNABS:	func = efdnabs;		type = XA;	break;
+		case EFDNEG:	func = efdneg;		type = XA;	break;
+		case EFDSUB:	func = efdsub;		type = AB;	break;		
+		default:
+			goto illegal; 
+		}
+		break;
+	default:
+		goto illegal;
+	}
+
+	switch (type) {
+	case AB:
+		op0 = &fullgprs[(speinsn >> 21) & 0x1f];
+		op1 = &fullgprs[(speinsn >> 16) & 0x1f];
+		op2 = &fullgprs[(speinsn >> 11) & 0x1f];
+		break;
+
+	case XA:
+		op0 = &fullgprs[(speinsn >> 21) & 0x1f];
+		op1 = &fullgprs[(speinsn >> 16) & 0x1f];
+		break;
+
+	case XB:
+		op0 = &fullgprs[(speinsn >> 21) & 0x1f];
+		op1 = &fullgprs[(speinsn >> 11) & 0x1f];
+		break;
+	
+	case XCR:
+		op0 = (void *)&regs->ccr;
+		op1 = (void *)((speinsn >> 23) & 0x7);
+		op2 = &fullgprs[(speinsn >> 16) & 0x1f];
+		op3 = &fullgprs[(speinsn >> 11) & 0x1f];
+		break;
+
+	default:
+		goto illegal;
+	}
+
+	flag = func(op0, op1, op2, op3);
+	
+	switch ((speinsn >> 5) & 0x7 ) {
+	case SPFP:
+		for (i = 0; i < 32; i++) {
+			regs->gpr[i] = fullgprs[i] >> 32;
+		};
+		break;
+	default:
+		for (i = 0; i < 32; i++) {
+			regs->gpr[i] = fullgprs[i];
+			current->thread.evr[i] = fullgprs[i] >> 32;
+		};
+	}
+	
+	current->thread.spefscr &= 0x3f;
+	return 0;
+
+illegal:
+	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\n");
+	return -ENOSYS;
+}
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 04d4917..1e74e15 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_QUICC_ENGINE)	+= qe_lib/
 obj-$(CONFIG_MTD)		+= rom.o
+obj-$(CONFIG_SPE)		+= sigfpe_handler.o
 
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-$(CONFIG_PPC_I8259)		+= i8259.o
-- 
1.4.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  5:31 [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions ebony.zhu
@ 2007-01-12  6:40 ` Kumar Gala
  2007-01-12  7:45   ` Zhu Ebony-r57400
  2007-01-12  9:52 ` Christoph Hellwig
  1 sibling, 1 reply; 20+ messages in thread
From: Kumar Gala @ 2007-01-12  6:40 UTC (permalink / raw)
  To: ebony.zhu; +Cc: linuxppc-dev, paulus


On Jan 11, 2007, at 11:31 PM, ebony.zhu@freescale.com wrote:

> Add the general support for Embedded Floating-Point instructions
> to fully comply with IEEE-754.
>
> Signed-off-by:Ebony Zhu <ebony.zhu@freescale.com>
> ---
>  arch/powerpc/Makefile                |    5 +
>  arch/powerpc/kernel/entry_32.S       |   50 ++++++
>  arch/powerpc/kernel/head_booke.h     |    4
>  arch/powerpc/kernel/head_fsl_booke.S |   22 ++-
>  arch/powerpc/kernel/traps.c          |   17 ++
>  arch/powerpc/math-emu/Makefile       |   29 ++-
>  arch/powerpc/math-emu/sfp-machine.h  |    4
>  arch/powerpc/sysdev/sigfpe_handler.c |  298 +++++++++++++++++++++++ 
> +++++++++++
>  arch/powerpc/sysdev/Makefile         |    1
>  9 files changed, 421 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> index a00fe72..dd0b4b8 100644
> --- a/arch/powerpc/Makefile
> +++ b/arch/powerpc/Makefile
> @@ -134,6 +134,11 @@ core-y				+= arch/powerpc/kernel/ \
>  				   arch/powerpc/lib/ \
>  				   arch/powerpc/sysdev/ \
>  				   arch/powerpc/platforms/
> +ifeq ($(CONFIG_SPE),y)
> +ifneq ($(CONFIG_MATH_EMULATION),y)
> +core-y				+= arch/powerpc/math-emu/
> +endif
> +endif
>  core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/
>  core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
>
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/ 
> entry_32.S
> index c03e829..a3d4ece 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -616,6 +616,56 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
>  	.long	ret_from_except
>  #endif
>
> +#ifdef CONFIG_SPE
> +	.globl  ret_from_except_spe_full
> +ret_from_except_spe_full:
> +	REST_NVGPRS(r1)
> +	/* fall through */
> +	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
> +	SYNC			/* Some chip revs have problems here... */
> +	MTMSRD(r10)		/* disable interrupts */
> +
> +	lwz     r0,THREAD+THREAD_SPEFSCR(r2)
> +	mtspr   SPRN_SPEFSCR,r0		/* restore SPEFSCR reg */
> +
> +	lwz     r0,GPR0(r1)
> +	lwz     r2,GPR2(r1)
> +	REST_4GPRS(3, r1)
> +	REST_2GPRS(7, r1)
> +	
> +	lwz     r10,_XER(r1)
> +	lwz     r11,_CTR(r1)
> +	mtspr   SPRN_XER,r10
> +	mtctr   r11
> +
> +	stwcx.  r0,0,r1                 /* to clear the reservation */
> +
> +	lwz     r11,_LINK(r1)
> +	mtlr    r11
> +	lwz     r10,_CCR(r1)
> +	mtcrf   0xff,r10
> +	REST_2GPRS(9, r1)
> +
> +	mtspr   SPRN_SPRG0,r11
> +	mtspr   SPRN_SPRG1,r12
> +	mfmsr   r11
> +	oris  r11, r11, MSR_SPE@h
> +	mtmsr   r11
> +	mfspr   r12,SPRN_SPRG3
> +	REST_32EVRS(0, r11,r12)
> +	mfspr   r11,SPRN_SPRG0
> +	mfspr   r12,SPRN_SPRG1
> +
> +	lwz     r11,_NIP(r1)
> +	lwz     r12,_MSR(r1)
> +	mtspr   SPRN_SRR0,r11
> +	mtspr   SPRN_SRR1,r12
> +	REST_2GPRS(11, r1)
> +	lwz     r1,GPR1(r1)
> +
> +	rfi
> +	b	.		/* prevent prefetch past rfi */

Why do we need a separate ret_from_except_spe_full?

> +#endif
>  	.globl	ret_from_except_full
>  ret_from_except_full:
>  	REST_NVGPRS(r1)
> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/ 
> head_booke.h
> index 8536e76..1e14d3e 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -195,6 +195,10 @@ #define EXC_XFER_EE(n, hdlr)		\
>  	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE,  
> transfer_to_handler_full, \
>  			  ret_from_except_full)
>
> +#define EXC_XFER_EE_SPE(n, hdlr)	\
> +	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE,  
> transfer_to_handler_full, \
> +			  ret_from_except_spe_full)
> +
>  #define EXC_XFER_EE_LITE(n, hdlr)	\
>  	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE,  
> transfer_to_handler, \
>  			  ret_from_except)
> diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/ 
> kernel/head_fsl_booke.S
> index 66877bd..56200b6 100644
> --- a/arch/powerpc/kernel/head_fsl_booke.S
> +++ b/arch/powerpc/kernel/head_fsl_booke.S
> @@ -699,7 +699,25 @@ #endif /* CONFIG_SPE */
>
>  	/* SPE Floating Point Data */
>  #ifdef CONFIG_SPE
> -	EXCEPTION(0x2030, SPEFloatingPointData,  
> SPEFloatingPointException, EXC_XFER_EE);
> +	START_EXCEPTION(SPEFloatingPointData)
> +	mtspr   SPRN_SPRG0,r3
> +	mtspr   SPRN_SPRG1,r4
> +	mfmsr   r3
> +	oris    r3, r3, MSR_SPE@h
> +	mtmsr   r3
> +	mfspr   r3, SPRN_SRR0
> +	lwz     r3, 0(r3)
> +	lis     r4, speinsn@ha
> +	stw     r3, speinsn@l(r4)
> +	mfspr   r4, SPRN_SPRG3
> +	mfspr   r3, SPRN_SPEFSCR
> +	stw     r3, THREAD_SPEFSCR(r4)
> +	SAVE_32EVRS(0, r3, r4)
> +	mfspr   r3, SPRN_SPRG0
> +	mfspr   r4, SPRN_SPRG1
> +	NORMAL_EXCEPTION_PROLOG
> +	addi    r3,r1,STACK_FRAME_OVERHEAD
> +	EXC_XFER_EE_SPE(0x2030, SPEFloatingPointException)

Why do we have to do any of this in the exception path?

SPEFloatingPointException can have something like:

	unsigned long pc = regs->nip;
	flush_spe_to_thread(current);

	if (get_user(insn, (u32 *)pc))
		return -EFAULT;

	...

>  #else
>  	EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception,  
> EXC_XFER_EE)
>  #endif /* CONFIG_SPE */
> @@ -840,6 +858,8 @@ load_up_spe:
>  	oris	r5,r5,MSR_SPE@h
>  	mtmsr	r5			/* enable use of SPE now */
>  	isync
> +	li      r5,(SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE |  
> SPEFSCR_FOVFE)
> +	mtspr   SPRN_SPEFSCR,r5

If you want to initialize spefscr so the enables are set do it in  
INIT_THREAD.  I need to think more about if we should even be doing  
that in the kernel.

>  /*
>   * For SMP, we don't do lazy SPE switching because it just gets too
>   * horrendously complex, especially when a task switches from one CPU
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 535f506..68407d4 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -986,6 +986,22 @@ #endif /* CONFIG_FSL_BOOKE */
>  #ifdef CONFIG_SPE
>  void SPEFloatingPointException(struct pt_regs *regs)
>  {
> +#ifdef CONFIG_E500
> +	extern int sigfpe_handler(struct pt_regs *regs);
> +	int err;
> +	if (current->thread.spefscr & ~0x3f) {
> +		err = sigfpe_handler(regs);
> +		if (err == 0) {
> +			regs->nip += 4;
> +			return;
> +		} else {
> +			current->thread.spefscr = 0x0;

Why do clear spefscr and re-execute?

> +			return;
> +		}
> +	} else {
> +		return;

How else would we get here if not by having spefscr bit set?

> +	}
> +#else
>  	unsigned long spefscr;
>  	int fpexc_mode;
>  	int code = 0;
> @@ -1016,6 +1032,7 @@ void SPEFloatingPointException(struct pt
>
>  	_exception(SIGFPE, regs, code, regs->nip);
>  	return;

Your code should supersede the handling code that's currently there.

> +#endif
>  }
>  #endif
>
> diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/ 
> Makefile
> index 29bc912..dbb3e26 100644
> --- a/arch/powerpc/math-emu/Makefile
> +++ b/arch/powerpc/math-emu/Makefile
> @@ -1,16 +1,29 @@
>
> -obj-y				:= math.o fmr.o lfd.o stfd.o
> -
> -obj-$(CONFIG_MATH_EMULATION)	+= fabs.o fadd.o fadds.o fcmpo.o  
> fcmpu.o \
> -					fctiw.o fctiwz.o fdiv.o fdivs.o \
> +obj-y				:= fabs.o fadd.o fdiv.o fmul.o \
> +					fneg.o fsub.o types.o udivmodti4.o
> +					
> +obj-$(CONFIG_MATH_EMULATION)	+= math.o fmr.o lfd.o stfd.o \
> +					fadds.o fcmpo.o fcmpu.o \
> +					fctiw.o fctiwz.o fdivs.o \
>  					fmadd.o fmadds.o fmsub.o fmsubs.o \
> -					fmul.o fmuls.o fnabs.o fneg.o types.o \
> +					fmuls.o fnabs.o \
>  					fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \
>  					fres.o frsp.o frsqrte.o fsel.o lfs.o \
> -					fsqrt.o	fsqrts.o fsub.o fsubs.o \
> +					fsqrt.o	fsqrts.o fsubs.o \
>  					mcrfs.o mffs.o mtfsb0.o mtfsb1.o \
> -					mtfsf.o mtfsfi.o stfiwx.o stfs.o \
> -					udivmodti4.o
> +					mtfsf.o mtfsfi.o stfiwx.o stfs.o
> +
> +obj-$(CONFIG_SPE)		+= efsabs.o efsadd.o efscfd.o efscmpeq.o \
> +					efscmpgt.o efscmplt.o efsctsf.o efsctsi.o \
> +					efsctsiz.o efsctuf.o efsctui.o efsctuiz.o \
> +					efsdiv.o efsmul.o efsnabs.o efsneg.o efssub.o \
> +					evfsabs.o evfsadd.o evfscmpeq.o evfscmpgt.o \
> +					evfscmplt.o evfsctsf.o evfsctsi.o evfsctsiz.o \
> +					evfsctuf.o evfsctui.o evfsctuiz.o evfsdiv.o \
> +					evfsmul.o evfsnabs.o evfsneg.o evfssub.o \
> +					efdcfs.o efdcmpeq.o efdcmpgt.o efdcmplt.o efdctsf.o \
> +					efdctsi.o efdctsidz.o efdctsiz.o efdctuf.o \
> +					efdctui.o efdctuidz.o efdctuiz.o efdnabs.o
>
>  CFLAGS_fabs.o = -fno-builtin-fabs
>  CFLAGS_math.o = -fno-builtin-fabs
> diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/ 
> math-emu/sfp-machine.h
> index 4b17d83..313734d 100644
> --- a/arch/powerpc/math-emu/sfp-machine.h
> +++ b/arch/powerpc/math-emu/sfp-machine.h
> @@ -166,7 +166,11 @@ #define __FP_PACK_RAW_2(fs, val, X)			\
>  #include <linux/kernel.h>
>  #include <linux/sched.h>
>
> +#ifdef CONFIG_SPE
> +#define __FPU_FPSCR	(current->thread.spefscr)
> +#else
>  #define __FPU_FPSCR	(current->thread.fpscr.val)
> +#endif
>
>  /* We only actually write to the destination register
>   * if exceptions signalled (if any) will not trap.
> diff --git a/arch/powerpc/sysdev/sigfpe_handler.c b/arch/powerpc/ 
> sysdev/sigfpe_handler.c
> new file mode 100644
> index 0000000..6e809b2
> --- /dev/null
> +++ b/arch/powerpc/sysdev/sigfpe_handler.c
> @@ -0,0 +1,298 @@
> +/*
> + * arch/powerpc/sysdev/sigfpe_handler.c
> + *
> + * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights  
> reserved.
> + *
> + * Author: Ebony Zhu, ebony.zhu@freescale.com
> + *
> + * Derived from arch/powerpc/math-emu/math.c
> + * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
> + *
> + * Description:
> + * This file is the exception handler to make E500 SPE instructions
> + * fully comply with IEEE-754 floating point standard.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/types.h>
> +
> +#include <asm/uaccess.h>
> +#include <asm/reg.h>
> +
> +#define SPEFUNC(x)	extern int x(void *, void *, void *, void *)
> +#define efdabs	fabs
> +#define efdadd	fadd
> +#define efdsub	fsub
> +#define efddiv	fdiv
> +#define efdmul	fmul
> +#define efdneg	fneg
> +
> +/* Scalar SPFP functions */
> +SPEFUNC(efsabs);
> +SPEFUNC(efsadd);
> +SPEFUNC(efscfd);
> +SPEFUNC(efscmpeq);
> +SPEFUNC(efscmpgt);
> +SPEFUNC(efscmplt);
> +SPEFUNC(efsctsf);
> +SPEFUNC(efsctsi);
> +SPEFUNC(efsctsiz);
> +SPEFUNC(efsctuf);
> +SPEFUNC(efsctui);
> +SPEFUNC(efsctuiz);
> +SPEFUNC(efsdiv);
> +SPEFUNC(efsmul);
> +SPEFUNC(efsnabs);
> +SPEFUNC(efsneg);
> +SPEFUNC(efssub);
> +
> +/* Vector Floating-Point functions */
> +SPEFUNC(evfsabs);
> +SPEFUNC(evfsadd);
> +SPEFUNC(evfscmpeq);
> +SPEFUNC(evfscmpgt);
> +SPEFUNC(evfscmplt);
> +SPEFUNC(evfsctsf);
> +SPEFUNC(evfsctsi);
> +SPEFUNC(evfsctsiz);
> +SPEFUNC(evfsctuf);
> +SPEFUNC(evfsctui);
> +SPEFUNC(evfsctuiz);
> +SPEFUNC(evfsdiv);
> +SPEFUNC(evfsmul);
> +SPEFUNC(evfsnabs);
> +SPEFUNC(evfsneg);
> +SPEFUNC(evfssub);
> +
> +/* Scalar DPFP functions */
> +SPEFUNC(efdabs);
> +SPEFUNC(efdadd);
> +SPEFUNC(efdcfs);
> +SPEFUNC(efdcmpeq);
> +SPEFUNC(efdcmpgt);
> +SPEFUNC(efdcmplt);
> +SPEFUNC(efdctsf);
> +SPEFUNC(efdctsi);
> +SPEFUNC(efdctsidz);
> +SPEFUNC(efdctsiz);
> +SPEFUNC(efdctuf);
> +SPEFUNC(efdctui);
> +SPEFUNC(efdctuidz);
> +SPEFUNC(efdctuiz);
> +SPEFUNC(efddiv);
> +SPEFUNC(efdmul);
> +SPEFUNC(efdnabs);
> +SPEFUNC(efdneg);
> +SPEFUNC(efdsub);
> +
> +#define VCT		0x4

not used?

> +#define SPFP		0x6
> +#define DPFP		0x7
> +#define EFAPU		0x4
> +
> +#define EFSADD		0x2c0
> +#define EFSSUB		0x2c1
> +#define EFSABS		0x2c4
> +#define EFSNABS		0x2c5
> +#define EFSNEG		0x2c6
> +#define EFSMUL		0x2c8
> +#define EFSDIV		0x2c9
> +#define EFSCMPGT	0x2cc
> +#define EFSCMPLT	0x2cd
> +#define EFSCMPEQ	0x2ce
> +#define EFSCFD		0x2cf
> +#define EFSCTUI		0x2d4
> +#define EFSCTSI		0x2d5
> +#define EFSCTUF		0x2d6
> +#define EFSCTSF		0x2d7
> +#define EFSCTUIZ	0x2d8
> +#define EFSCTSIZ	0x2da
> +
> +#define EVFSADD		0x280
> +#define EVFSSUB		0x281
> +#define EVFSABS		0x284
> +#define EVFSNABS	0x285
> +#define EVFSNEG		0x286
> +#define EVFSMUL		0x288
> +#define EVFSDIV		0x289
> +#define EVFSCMPGT	0x28c
> +#define EVFSCMPLT	0x28d
> +#define EVFSCMPEQ	0x28e
> +#define EVFSCTUI	0x294
> +#define EVFSCTSI	0x295
> +#define EVFSCTUF	0x296
> +#define EVFSCTSF	0x297
> +#define EVFSCTUIZ	0x298
> +#define EVFSCTSIZ	0x29a
> +
> +#define EFDADD		0x2e0
> +#define EFDSUB		0x2e1
> +#define EFDABS		0x2e4
> +#define EFDNABS		0x2e5
> +#define EFDNEG		0x2e6
> +#define EFDMUL		0x2e8
> +#define EFDDIV		0x2e9
> +#define EFDCTUIDZ	0x2ea
> +#define EFDCTSIDZ	0x2eb
> +#define EFDCMPGT	0x2ec
> +#define EFDCMPLT	0x2ed
> +#define EFDCMPEQ	0x2ee
> +#define EFDCFS		0x2ef
> +#define EFDCTUI		0x2f4
> +#define EFDCTSI		0x2f5
> +#define EFDCTUF		0x2f6
> +#define EFDCTSF		0x2f7
> +#define EFDCTUIZ	0x2f8
> +#define EFDCTSIZ	0x2fa
> +
> +#define AB	2
> +#define XA	3
> +#define XB	4
> +#define XCR	5	
> +
> +static u64 fullgprs[32];
> +u32 speinsn;
> +
> +int
> +sigfpe_handler(struct pt_regs *regs)
> +{
> +	void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
> +	int i;
> +	int (*func)(void *, void *, void *, void *);
> +	int type = 0;
> +	int flag;
> +	
> +	switch ((speinsn >> 5) & 0x7 ) {
> +	case SPFP:
> +		for(i = 0; i < 32; i++) {
> +			fullgprs[i] = regs->gpr[i];
> +			fullgprs[i] = fullgprs[i] << 32 | current->thread.evr[i];
> +		};
> +		break;
> +	default:
> +		for(i = 0; i < 32; i++) {
> +			fullgprs[i] = current->thread.evr[i];
> +			fullgprs[i] = (fullgprs[i] << 32) | (regs->gpr[i]);
> +		};
> +	}
> +
> +	switch (speinsn >> 26) {
> +	
> +	case EFAPU:
> +		switch (speinsn & 0x7ff) {
> +		case EFSABS:	func = efsabs;		type = XA;	break;
> +		case EFSADD:	func = efsadd;		type = AB;      break;
> +		case EFSCFD:	func = efscfd;		type = XB;	break;
> +		case EFSCMPEQ:	func = efscmpeq;	type = XCR;	break;
> +		case EFSCMPGT:	func = efscmpgt;	type = XCR;	break;
> +		case EFSCMPLT:	func = efscmplt;	type = XCR;	break;
> +		case EFSCTSF:	func = efsctsf;		type = XB;	break;
> +		case EFSCTSI:	func = efsctsi;		type = XB;	break;
> +		case EFSCTSIZ:	func = efsctsiz;	type = XB;	break;
> +		case EFSCTUF:	func = efsctuf;		type = XB;	break;
> +		case EFSCTUI:	func = efsctui;		type = XB;	break;
> +		case EFSCTUIZ:	func = efsctuiz;	type = XB;	break;
> +		case EFSDIV:	func = efsdiv;		type = AB;	break;
> +		case EFSMUL:	func = efsmul;		type = AB;	break;
> +		case EFSNABS:	func = efsnabs;		type = XA;	break;
> +		case EFSNEG:	func = efsneg;		type = XA;	break;
> +		case EFSSUB:	func = efssub;		type = AB;	break;
> +
> +		case EVFSABS:	func = evfsabs;		type = XA;	break;
> +		case EVFSADD:	func = evfsadd;		type = AB;      break;
> +		case EVFSCMPEQ:	func = evfscmpeq;	type = XCR;	break;
> +		case EVFSCMPGT:	func = evfscmpgt;	type = XCR;	break;
> +		case EVFSCMPLT:	func = evfscmplt;	type = XCR;	break;
> +		case EVFSCTSF:	func = evfsctsf;	type = XB;	break;
> +		case EVFSCTSI:	func = evfsctsi;	type = XB;	break;
> +		case EVFSCTSIZ:	func = evfsctsiz;	type = XB;	break;
> +		case EVFSCTUF:	func = evfsctuf;	type = XB;	break;
> +		case EVFSCTUI:	func = evfsctui;	type = XB;	break;
> +		case EVFSCTUIZ:	func = evfsctuiz;	type = XB;	break;
> +		case EVFSDIV:	func = evfsdiv;		type = AB;	break;
> +		case EVFSMUL:	func = evfsmul;		type = AB;	break;
> +		case EVFSNABS:	func = evfsnabs;	type = XA;	break;
> +		case EVFSNEG:	func = evfsneg;		type = XA;	break;
> +		case EVFSSUB:	func = evfssub;		type = AB;	break;
> +
> +		case EFDABS:	func = efdabs;		type = XA;	break;
> +		case EFDADD:	func = efdadd;		type = AB;	break;
> +		case EFDCFS:	func = efdcfs;		type = XB;	break;
> +		case EFDCMPEQ:	func = efdcmpeq;	type = XCR;	break;
> +		case EFDCMPGT:	func = efdcmpgt;	type = XCR;	break;
> +		case EFDCMPLT:	func = efdcmplt;	type = XCR;	break;
> +		case EFDCTSF:	func = efdctsf;		type = XB;	break;
> +		case EFDCTSI:	func = efdctsi;		type = XB;	break;
> +		case EFDCTSIDZ:	func = efdctsidz;	type = XB;	break;
> +		case EFDCTSIZ:	func = efdctsiz;	type = XB;	break;
> +		case EFDCTUF:	func = efdctuf;		type = XB;	break;
> +		case EFDCTUI:	func = efdctui;		type = XB;	break;
> +		case EFDCTUIDZ:	func = efdctuidz;	type = XB;	break;
> +		case EFDCTUIZ:	func = efdctuiz;	type = XB;	break;
> +		case EFDDIV:	func = efddiv;		type = AB;	break;
> +		case EFDMUL:	func = efdmul;		type = AB;	break;
> +		case EFDNABS:	func = efdnabs;		type = XA;	break;
> +		case EFDNEG:	func = efdneg;		type = XA;	break;
> +		case EFDSUB:	func = efdsub;		type = AB;	break;		
> +		default:
> +			goto illegal;
> +		}
> +		break;
> +	default:
> +		goto illegal;
> +	}
> +
> +	switch (type) {
> +	case AB:
> +		op0 = &fullgprs[(speinsn >> 21) & 0x1f];
> +		op1 = &fullgprs[(speinsn >> 16) & 0x1f];
> +		op2 = &fullgprs[(speinsn >> 11) & 0x1f];
> +		break;
> +
> +	case XA:
> +		op0 = &fullgprs[(speinsn >> 21) & 0x1f];
> +		op1 = &fullgprs[(speinsn >> 16) & 0x1f];
> +		break;
> +
> +	case XB:
> +		op0 = &fullgprs[(speinsn >> 21) & 0x1f];
> +		op1 = &fullgprs[(speinsn >> 11) & 0x1f];
> +		break;
> +	
> +	case XCR:
> +		op0 = (void *)&regs->ccr;
> +		op1 = (void *)((speinsn >> 23) & 0x7);
> +		op2 = &fullgprs[(speinsn >> 16) & 0x1f];
> +		op3 = &fullgprs[(speinsn >> 11) & 0x1f];
> +		break;
> +
> +	default:
> +		goto illegal;
> +	}
> +
> +	flag = func(op0, op1, op2, op3);
> +	
> +	switch ((speinsn >> 5) & 0x7 ) {
> +	case SPFP:
> +		for (i = 0; i < 32; i++) {
> +			regs->gpr[i] = fullgprs[i] >> 32;
> +		};
> +		break;
> +	default:
> +		for (i = 0; i < 32; i++) {
> +			regs->gpr[i] = fullgprs[i];
> +			current->thread.evr[i] = fullgprs[i] >> 32;
> +		};
> +	}
> +	
> +	current->thread.spefscr &= 0x3f;

can't clear spefscr this way.

Need to do what the existing handler was doing here.

> +	return 0;
> +
> +illegal:
> +	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered  
> un-supported instruction.\n");
> +	return -ENOSYS;
> +}
> diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/ 
> Makefile
> index 04d4917..1e74e15 100644
> --- a/arch/powerpc/sysdev/Makefile
> +++ b/arch/powerpc/sysdev/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o
>  obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
>  obj-$(CONFIG_QUICC_ENGINE)	+= qe_lib/
>  obj-$(CONFIG_MTD)		+= rom.o
> +obj-$(CONFIG_SPE)		+= sigfpe_handler.o
>
>  ifeq ($(CONFIG_PPC_MERGE),y)
>  obj-$(CONFIG_PPC_I8259)		+= i8259.o
> -- 
> 1.4.0

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  6:40 ` Kumar Gala
@ 2007-01-12  7:45   ` Zhu Ebony-r57400
  2007-01-12 11:05     ` Benjamin Herrenschmidt
  2007-01-12 18:53     ` [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions Kumar Gala
  0 siblings, 2 replies; 20+ messages in thread
From: Zhu Ebony-r57400 @ 2007-01-12  7:45 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, paulus

Hi Kumar

Please see my inline comments.

Ebony=20

> -----Original Message-----
> From: Kumar Gala [mailto:galak@kernel.crashing.org]=20
> Sent: 2007=C4=EA1=D4=C212=C8=D5 14:41
> To: Zhu Ebony-r57400
> Cc: paulus@samba.org; linuxppc-dev@ozlabs.org
> Subject: Re: [patch][5/5] powerpc: Add the general support=20
> for Embedded Floating-Point instructions
>=20
>=20
> On Jan 11, 2007, at 11:31 PM, ebony.zhu@freescale.com wrote:
>=20
> > Add the general support for Embedded Floating-Point instructions to=20
> > fully comply with IEEE-754.
> >
> > Signed-off-by:Ebony Zhu <ebony.zhu@freescale.com>
> > ---
> >  arch/powerpc/Makefile                |    5 +
> >  arch/powerpc/kernel/entry_32.S       |   50 ++++++
> >  arch/powerpc/kernel/head_booke.h     |    4
> >  arch/powerpc/kernel/head_fsl_booke.S |   22 ++-
> >  arch/powerpc/kernel/traps.c          |   17 ++
> >  arch/powerpc/math-emu/Makefile       |   29 ++-
> >  arch/powerpc/math-emu/sfp-machine.h  |    4
> >  arch/powerpc/sysdev/sigfpe_handler.c |  298 +++++++++++++++++++++++
> > +++++++++++
> >  arch/powerpc/sysdev/Makefile         |    1
> >  9 files changed, 421 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index=20
> > a00fe72..dd0b4b8 100644
> > --- a/arch/powerpc/Makefile
> > +++ b/arch/powerpc/Makefile
> > @@ -134,6 +134,11 @@ core-y				+=3D=20
> arch/powerpc/kernel/ \
> >  				   arch/powerpc/lib/ \
> >  				   arch/powerpc/sysdev/ \
> >  				   arch/powerpc/platforms/
> > +ifeq ($(CONFIG_SPE),y)
> > +ifneq ($(CONFIG_MATH_EMULATION),y)
> > +core-y				+=3D arch/powerpc/math-emu/
> > +endif
> > +endif
> >  core-$(CONFIG_MATH_EMULATION)	+=3D arch/powerpc/math-emu/
> >  core-$(CONFIG_XMON)		+=3D arch/powerpc/xmon/
> >
> > diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/=20
> > entry_32.S index c03e829..a3d4ece 100644
> > --- a/arch/powerpc/kernel/entry_32.S
> > +++ b/arch/powerpc/kernel/entry_32.S
> > @@ -616,6 +616,56 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
> >  	.long	ret_from_except
> >  #endif
> >
> > +#ifdef CONFIG_SPE
> > +	.globl  ret_from_except_spe_full
> > +ret_from_except_spe_full:
> > +	REST_NVGPRS(r1)
> > +	/* fall through */
> > +	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
> > +	SYNC			/* Some chip revs have problems=20
> here... */
> > +	MTMSRD(r10)		/* disable interrupts */
> > +
> > +	lwz     r0,THREAD+THREAD_SPEFSCR(r2)
> > +	mtspr   SPRN_SPEFSCR,r0		/* restore SPEFSCR reg */
> > +
> > +	lwz     r0,GPR0(r1)
> > +	lwz     r2,GPR2(r1)
> > +	REST_4GPRS(3, r1)
> > +	REST_2GPRS(7, r1)
> > +=09
> > +	lwz     r10,_XER(r1)
> > +	lwz     r11,_CTR(r1)
> > +	mtspr   SPRN_XER,r10
> > +	mtctr   r11
> > +
> > +	stwcx.  r0,0,r1                 /* to clear the reservation */
> > +
> > +	lwz     r11,_LINK(r1)
> > +	mtlr    r11
> > +	lwz     r10,_CCR(r1)
> > +	mtcrf   0xff,r10
> > +	REST_2GPRS(9, r1)
> > +
> > +	mtspr   SPRN_SPRG0,r11
> > +	mtspr   SPRN_SPRG1,r12
> > +	mfmsr   r11
> > +	oris  r11, r11, MSR_SPE@h
> > +	mtmsr   r11
> > +	mfspr   r12,SPRN_SPRG3
> > +	REST_32EVRS(0, r11,r12)
> > +	mfspr   r11,SPRN_SPRG0
> > +	mfspr   r12,SPRN_SPRG1
> > +
> > +	lwz     r11,_NIP(r1)
> > +	lwz     r12,_MSR(r1)
> > +	mtspr   SPRN_SRR0,r11
> > +	mtspr   SPRN_SRR1,r12
> > +	REST_2GPRS(11, r1)
> > +	lwz     r1,GPR1(r1)
> > +
> > +	rfi
> > +	b	.		/* prevent prefetch past rfi */
>=20
> Why do we need a separate ret_from_except_spe_full?

I'm not sure if the kernel will return from exception in a multi-thread
way, but the truth is if restoring EVRs in exsiting =
ret_from_except_full,
some non-SPE exception will try to retore EVR. At that time, the
MSR[SPE] may not be enabled, which will cause error.

>=20
> > +#endif
> >  	.globl	ret_from_except_full
> >  ret_from_except_full:
> >  	REST_NVGPRS(r1)
> > diff --git a/arch/powerpc/kernel/head_booke.h=20
> b/arch/powerpc/kernel/=20
> > head_booke.h index 8536e76..1e14d3e 100644
> > --- a/arch/powerpc/kernel/head_booke.h
> > +++ b/arch/powerpc/kernel/head_booke.h
> > @@ -195,6 +195,10 @@ #define EXC_XFER_EE(n, hdlr)		\
> >  	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE,=20
> > transfer_to_handler_full, \
> >  			  ret_from_except_full)
> >
> > +#define EXC_XFER_EE_SPE(n, hdlr)	\
> > +	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE,
> > transfer_to_handler_full, \
> > +			  ret_from_except_spe_full)
> > +
> >  #define EXC_XFER_EE_LITE(n, hdlr)	\
> >  	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE,=20
> > transfer_to_handler, \
> >  			  ret_from_except)
> > diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/=20
> > kernel/head_fsl_booke.S index 66877bd..56200b6 100644
> > --- a/arch/powerpc/kernel/head_fsl_booke.S
> > +++ b/arch/powerpc/kernel/head_fsl_booke.S
> > @@ -699,7 +699,25 @@ #endif /* CONFIG_SPE */
> >
> >  	/* SPE Floating Point Data */
> >  #ifdef CONFIG_SPE
> > -	EXCEPTION(0x2030, SPEFloatingPointData, =20
> > SPEFloatingPointException, EXC_XFER_EE);
> > +	START_EXCEPTION(SPEFloatingPointData)
> > +	mtspr   SPRN_SPRG0,r3
> > +	mtspr   SPRN_SPRG1,r4
> > +	mfmsr   r3
> > +	oris    r3, r3, MSR_SPE@h
> > +	mtmsr   r3
> > +	mfspr   r3, SPRN_SRR0
> > +	lwz     r3, 0(r3)
> > +	lis     r4, speinsn@ha
> > +	stw     r3, speinsn@l(r4)
> > +	mfspr   r4, SPRN_SPRG3
> > +	mfspr   r3, SPRN_SPEFSCR
> > +	stw     r3, THREAD_SPEFSCR(r4)
> > +	SAVE_32EVRS(0, r3, r4)
> > +	mfspr   r3, SPRN_SPRG0
> > +	mfspr   r4, SPRN_SPRG1
> > +	NORMAL_EXCEPTION_PROLOG
> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
> > +	EXC_XFER_EE_SPE(0x2030, SPEFloatingPointException)
>=20
> Why do we have to do any of this in the exception path?
>=20
> SPEFloatingPointException can have something like:
>=20
> 	unsigned long pc =3D regs->nip;
> 	flush_spe_to_thread(current);
>=20
> 	if (get_user(insn, (u32 *)pc))
> 		return -EFAULT;
>=20
> 	...
>=20

Since I want to save EVRs before entering the SPEFloatingPointException. =
Thanks
for providing an alternative way, I can try to see if it works.

> >  #else
> >  	EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception,
> > EXC_XFER_EE)
> >  #endif /* CONFIG_SPE */
> > @@ -840,6 +858,8 @@ load_up_spe:
> >  	oris	r5,r5,MSR_SPE@h
> >  	mtmsr	r5			/* enable use of SPE now */
> >  	isync
> > +	li      r5,(SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | =20
> > SPEFSCR_FOVFE)
> > +	mtspr   SPRN_SPEFSCR,r5
>=20
> If you want to initialize spefscr so the enables are set do=20
> it in INIT_THREAD.  I need to think more about if we should=20
> even be doing that in the kernel.

Ok, let's keep discussing this.

>=20
> >  /*
> >   * For SMP, we don't do lazy SPE switching because it just gets too
> >   * horrendously complex, especially when a task switches=20
> from one CPU=20
> > diff --git a/arch/powerpc/kernel/traps.c=20
> b/arch/powerpc/kernel/traps.c=20
> > index 535f506..68407d4 100644
> > --- a/arch/powerpc/kernel/traps.c
> > +++ b/arch/powerpc/kernel/traps.c
> > @@ -986,6 +986,22 @@ #endif /* CONFIG_FSL_BOOKE */  #ifdef=20
> CONFIG_SPE =20
> > void SPEFloatingPointException(struct pt_regs *regs)  {
> > +#ifdef CONFIG_E500
> > +	extern int sigfpe_handler(struct pt_regs *regs);
> > +	int err;
> > +	if (current->thread.spefscr & ~0x3f) {
> > +		err =3D sigfpe_handler(regs);
> > +		if (err =3D=3D 0) {
> > +			regs->nip +=3D 4;
> > +			return;
> > +		} else {
> > +			current->thread.spefscr =3D 0x0;
>=20
> Why do clear spefscr and re-execute?

I tried to make the code more robust here. Currently, all the SPE =
instructions
that may cause execption are handled. But someday if the instruction set =
is extended and
not supported by software, this code can make sure we handle it with =
powerpc
default value, which won't make kernel crash.

>=20
> > +			return;
> > +		}
> > +	} else {
> > +		return;
>=20
> How else would we get here if not by having spefscr bit set?

As above, I want to make it more robust. If no unexpected condition =
would happen, we can
remove this off.

>=20
> > +	}
> > +#else
> >  	unsigned long spefscr;
> >  	int fpexc_mode;
> >  	int code =3D 0;
> > @@ -1016,6 +1032,7 @@ void SPEFloatingPointException(struct pt
> >
> >  	_exception(SIGFPE, regs, code, regs->nip);
> >  	return;
>=20
> Your code should supersede the handling code that's currently there.

OK, I agree.

>=20
> > +#endif
> >  }
> >  #endif
> >
> > diff --git a/arch/powerpc/math-emu/Makefile=20
> b/arch/powerpc/math-emu/=20
> > Makefile index 29bc912..dbb3e26 100644
> > --- a/arch/powerpc/math-emu/Makefile
> > +++ b/arch/powerpc/math-emu/Makefile
> > @@ -1,16 +1,29 @@
> >
> > -obj-y				:=3D math.o fmr.o lfd.o stfd.o
> > -
> > -obj-$(CONFIG_MATH_EMULATION)	+=3D fabs.o fadd.o=20
> fadds.o fcmpo.o =20
> > fcmpu.o \
> > -					fctiw.o fctiwz.o fdiv.o=20
> fdivs.o \
> > +obj-y				:=3D fabs.o fadd.o fdiv.o fmul.o \
> > +					fneg.o fsub.o types.o=20
> udivmodti4.o
> > +				=09
> > +obj-$(CONFIG_MATH_EMULATION)	+=3D math.o fmr.o lfd.o stfd.o \
> > +					fadds.o fcmpo.o fcmpu.o \
> > +					fctiw.o fctiwz.o fdivs.o \
> >  					fmadd.o fmadds.o=20
> fmsub.o fmsubs.o \
> > -					fmul.o fmuls.o fnabs.o=20
> fneg.o types.o \
> > +					fmuls.o fnabs.o \
> >  					fnmadd.o fnmadds.o=20
> fnmsub.o fnmsubs.o \
> >  					fres.o frsp.o frsqrte.o=20
> fsel.o lfs.o \
> > -					fsqrt.o	fsqrts.o fsub.o=20
> fsubs.o \
> > +					fsqrt.o	fsqrts.o fsubs.o \
> >  					mcrfs.o mffs.o mtfsb0.o=20
> mtfsb1.o \
> > -					mtfsf.o mtfsfi.o=20
> stfiwx.o stfs.o \
> > -					udivmodti4.o
> > +					mtfsf.o mtfsfi.o stfiwx.o stfs.o
> > +
> > +obj-$(CONFIG_SPE)		+=3D efsabs.o efsadd.o efscfd.o=20
> efscmpeq.o \
> > +					efscmpgt.o efscmplt.o=20
> efsctsf.o efsctsi.o \
> > +					efsctsiz.o efsctuf.o=20
> efsctui.o efsctuiz.o \
> > +					efsdiv.o efsmul.o=20
> efsnabs.o efsneg.o efssub.o \
> > +					evfsabs.o evfsadd.o=20
> evfscmpeq.o evfscmpgt.o \
> > +					evfscmplt.o evfsctsf.o=20
> evfsctsi.o evfsctsiz.o \
> > +					evfsctuf.o evfsctui.o=20
> evfsctuiz.o evfsdiv.o \
> > +					evfsmul.o evfsnabs.o=20
> evfsneg.o evfssub.o \
> > +					efdcfs.o efdcmpeq.o=20
> efdcmpgt.o efdcmplt.o efdctsf.o \
> > +					efdctsi.o efdctsidz.o=20
> efdctsiz.o efdctuf.o \
> > +					efdctui.o efdctuidz.o=20
> efdctuiz.o efdnabs.o
> >
> >  CFLAGS_fabs.o =3D -fno-builtin-fabs
> >  CFLAGS_math.o =3D -fno-builtin-fabs
> > diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/=20
> > math-emu/sfp-machine.h index 4b17d83..313734d 100644
> > --- a/arch/powerpc/math-emu/sfp-machine.h
> > +++ b/arch/powerpc/math-emu/sfp-machine.h
> > @@ -166,7 +166,11 @@ #define __FP_PACK_RAW_2(fs, val, X)=09
> 		\
> >  #include <linux/kernel.h>
> >  #include <linux/sched.h>
> >
> > +#ifdef CONFIG_SPE
> > +#define __FPU_FPSCR	(current->thread.spefscr)
> > +#else
> >  #define __FPU_FPSCR	(current->thread.fpscr.val)
> > +#endif
> >
> >  /* We only actually write to the destination register
> >   * if exceptions signalled (if any) will not trap.
> > diff --git a/arch/powerpc/sysdev/sigfpe_handler.c b/arch/powerpc/=20
> > sysdev/sigfpe_handler.c new file mode 100644 index 0000000..6e809b2
> > --- /dev/null
> > +++ b/arch/powerpc/sysdev/sigfpe_handler.c
> > @@ -0,0 +1,298 @@
> > +/*
> > + * arch/powerpc/sysdev/sigfpe_handler.c
> > + *
> > + * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights
> > reserved.
> > + *
> > + * Author: Ebony Zhu, ebony.zhu@freescale.com
> > + *
> > + * Derived from arch/powerpc/math-emu/math.c
> > + * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
> > + *
> > + * Description:
> > + * This file is the exception handler to make E500 SPE instructions
> > + * fully comply with IEEE-754 floating point standard.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version
> > + * 2 of the License, or (at your option) any later version.
> > + */
> > +
> > +#include <linux/types.h>
> > +
> > +#include <asm/uaccess.h>
> > +#include <asm/reg.h>
> > +
> > +#define SPEFUNC(x)	extern int x(void *, void *, void *, void *)
> > +#define efdabs	fabs
> > +#define efdadd	fadd
> > +#define efdsub	fsub
> > +#define efddiv	fdiv
> > +#define efdmul	fmul
> > +#define efdneg	fneg
> > +
> > +/* Scalar SPFP functions */
> > +SPEFUNC(efsabs);
> > +SPEFUNC(efsadd);
> > +SPEFUNC(efscfd);
> > +SPEFUNC(efscmpeq);
> > +SPEFUNC(efscmpgt);
> > +SPEFUNC(efscmplt);
> > +SPEFUNC(efsctsf);
> > +SPEFUNC(efsctsi);
> > +SPEFUNC(efsctsiz);
> > +SPEFUNC(efsctuf);
> > +SPEFUNC(efsctui);
> > +SPEFUNC(efsctuiz);
> > +SPEFUNC(efsdiv);
> > +SPEFUNC(efsmul);
> > +SPEFUNC(efsnabs);
> > +SPEFUNC(efsneg);
> > +SPEFUNC(efssub);
> > +
> > +/* Vector Floating-Point functions */ SPEFUNC(evfsabs);=20
> > +SPEFUNC(evfsadd); SPEFUNC(evfscmpeq); SPEFUNC(evfscmpgt);=20
> > +SPEFUNC(evfscmplt); SPEFUNC(evfsctsf); SPEFUNC(evfsctsi);=20
> > +SPEFUNC(evfsctsiz); SPEFUNC(evfsctuf); SPEFUNC(evfsctui);=20
> > +SPEFUNC(evfsctuiz); SPEFUNC(evfsdiv); SPEFUNC(evfsmul);=20
> > +SPEFUNC(evfsnabs); SPEFUNC(evfsneg); SPEFUNC(evfssub);
> > +
> > +/* Scalar DPFP functions */
> > +SPEFUNC(efdabs);
> > +SPEFUNC(efdadd);
> > +SPEFUNC(efdcfs);
> > +SPEFUNC(efdcmpeq);
> > +SPEFUNC(efdcmpgt);
> > +SPEFUNC(efdcmplt);
> > +SPEFUNC(efdctsf);
> > +SPEFUNC(efdctsi);
> > +SPEFUNC(efdctsidz);
> > +SPEFUNC(efdctsiz);
> > +SPEFUNC(efdctuf);
> > +SPEFUNC(efdctui);
> > +SPEFUNC(efdctuidz);
> > +SPEFUNC(efdctuiz);
> > +SPEFUNC(efddiv);
> > +SPEFUNC(efdmul);
> > +SPEFUNC(efdnabs);
> > +SPEFUNC(efdneg);
> > +SPEFUNC(efdsub);
> > +
> > +#define VCT		0x4
>=20
> not used?

Currently not. We can remove it then.


>=20
> > +#define SPFP		0x6
> > +#define DPFP		0x7
> > +#define EFAPU		0x4
> > +
> > +#define EFSADD		0x2c0
> > +#define EFSSUB		0x2c1
> > +#define EFSABS		0x2c4
> > +#define EFSNABS		0x2c5
> > +#define EFSNEG		0x2c6
> > +#define EFSMUL		0x2c8
> > +#define EFSDIV		0x2c9
> > +#define EFSCMPGT	0x2cc
> > +#define EFSCMPLT	0x2cd
> > +#define EFSCMPEQ	0x2ce
> > +#define EFSCFD		0x2cf
> > +#define EFSCTUI		0x2d4
> > +#define EFSCTSI		0x2d5
> > +#define EFSCTUF		0x2d6
> > +#define EFSCTSF		0x2d7
> > +#define EFSCTUIZ	0x2d8
> > +#define EFSCTSIZ	0x2da
> > +
> > +#define EVFSADD		0x280
> > +#define EVFSSUB		0x281
> > +#define EVFSABS		0x284
> > +#define EVFSNABS	0x285
> > +#define EVFSNEG		0x286
> > +#define EVFSMUL		0x288
> > +#define EVFSDIV		0x289
> > +#define EVFSCMPGT	0x28c
> > +#define EVFSCMPLT	0x28d
> > +#define EVFSCMPEQ	0x28e
> > +#define EVFSCTUI	0x294
> > +#define EVFSCTSI	0x295
> > +#define EVFSCTUF	0x296
> > +#define EVFSCTSF	0x297
> > +#define EVFSCTUIZ	0x298
> > +#define EVFSCTSIZ	0x29a
> > +
> > +#define EFDADD		0x2e0
> > +#define EFDSUB		0x2e1
> > +#define EFDABS		0x2e4
> > +#define EFDNABS		0x2e5
> > +#define EFDNEG		0x2e6
> > +#define EFDMUL		0x2e8
> > +#define EFDDIV		0x2e9
> > +#define EFDCTUIDZ	0x2ea
> > +#define EFDCTSIDZ	0x2eb
> > +#define EFDCMPGT	0x2ec
> > +#define EFDCMPLT	0x2ed
> > +#define EFDCMPEQ	0x2ee
> > +#define EFDCFS		0x2ef
> > +#define EFDCTUI		0x2f4
> > +#define EFDCTSI		0x2f5
> > +#define EFDCTUF		0x2f6
> > +#define EFDCTSF		0x2f7
> > +#define EFDCTUIZ	0x2f8
> > +#define EFDCTSIZ	0x2fa
> > +
> > +#define AB	2
> > +#define XA	3
> > +#define XB	4
> > +#define XCR	5=09
> > +
> > +static u64 fullgprs[32];
> > +u32 speinsn;
> > +
> > +int
> > +sigfpe_handler(struct pt_regs *regs)
> > +{
> > +	void *op0 =3D 0, *op1 =3D 0, *op2 =3D 0, *op3 =3D 0;
> > +	int i;
> > +	int (*func)(void *, void *, void *, void *);
> > +	int type =3D 0;
> > +	int flag;
> > +=09
> > +	switch ((speinsn >> 5) & 0x7 ) {
> > +	case SPFP:
> > +		for(i =3D 0; i < 32; i++) {
> > +			fullgprs[i] =3D regs->gpr[i];
> > +			fullgprs[i] =3D fullgprs[i] << 32 |=20
> current->thread.evr[i];
> > +		};
> > +		break;
> > +	default:
> > +		for(i =3D 0; i < 32; i++) {
> > +			fullgprs[i] =3D current->thread.evr[i];
> > +			fullgprs[i] =3D (fullgprs[i] << 32) |=20
> (regs->gpr[i]);
> > +		};
> > +	}
> > +
> > +	switch (speinsn >> 26) {
> > +=09
> > +	case EFAPU:
> > +		switch (speinsn & 0x7ff) {
> > +		case EFSABS:	func =3D efsabs;		type =3D=20
> XA;	break;
> > +		case EFSADD:	func =3D efsadd;		type =3D=20
> AB;      break;
> > +		case EFSCFD:	func =3D efscfd;		type =3D=20
> XB;	break;
> > +		case EFSCMPEQ:	func =3D efscmpeq;	type =3D=20
> XCR;	break;
> > +		case EFSCMPGT:	func =3D efscmpgt;	type =3D=20
> XCR;	break;
> > +		case EFSCMPLT:	func =3D efscmplt;	type =3D=20
> XCR;	break;
> > +		case EFSCTSF:	func =3D efsctsf;		type =3D=20
> XB;	break;
> > +		case EFSCTSI:	func =3D efsctsi;		type =3D=20
> XB;	break;
> > +		case EFSCTSIZ:	func =3D efsctsiz;	type =3D=20
> XB;	break;
> > +		case EFSCTUF:	func =3D efsctuf;		type =3D=20
> XB;	break;
> > +		case EFSCTUI:	func =3D efsctui;		type =3D=20
> XB;	break;
> > +		case EFSCTUIZ:	func =3D efsctuiz;	type =3D=20
> XB;	break;
> > +		case EFSDIV:	func =3D efsdiv;		type =3D=20
> AB;	break;
> > +		case EFSMUL:	func =3D efsmul;		type =3D=20
> AB;	break;
> > +		case EFSNABS:	func =3D efsnabs;		type =3D=20
> XA;	break;
> > +		case EFSNEG:	func =3D efsneg;		type =3D=20
> XA;	break;
> > +		case EFSSUB:	func =3D efssub;		type =3D=20
> AB;	break;
> > +
> > +		case EVFSABS:	func =3D evfsabs;		type =3D=20
> XA;	break;
> > +		case EVFSADD:	func =3D evfsadd;		type =3D=20
> AB;      break;
> > +		case EVFSCMPEQ:	func =3D evfscmpeq;	type =3D=20
> XCR;	break;
> > +		case EVFSCMPGT:	func =3D evfscmpgt;	type =3D=20
> XCR;	break;
> > +		case EVFSCMPLT:	func =3D evfscmplt;	type =3D=20
> XCR;	break;
> > +		case EVFSCTSF:	func =3D evfsctsf;	type =3D=20
> XB;	break;
> > +		case EVFSCTSI:	func =3D evfsctsi;	type =3D=20
> XB;	break;
> > +		case EVFSCTSIZ:	func =3D evfsctsiz;	type =3D=20
> XB;	break;
> > +		case EVFSCTUF:	func =3D evfsctuf;	type =3D=20
> XB;	break;
> > +		case EVFSCTUI:	func =3D evfsctui;	type =3D=20
> XB;	break;
> > +		case EVFSCTUIZ:	func =3D evfsctuiz;	type =3D=20
> XB;	break;
> > +		case EVFSDIV:	func =3D evfsdiv;		type =3D=20
> AB;	break;
> > +		case EVFSMUL:	func =3D evfsmul;		type =3D=20
> AB;	break;
> > +		case EVFSNABS:	func =3D evfsnabs;	type =3D=20
> XA;	break;
> > +		case EVFSNEG:	func =3D evfsneg;		type =3D=20
> XA;	break;
> > +		case EVFSSUB:	func =3D evfssub;		type =3D=20
> AB;	break;
> > +
> > +		case EFDABS:	func =3D efdabs;		type =3D=20
> XA;	break;
> > +		case EFDADD:	func =3D efdadd;		type =3D=20
> AB;	break;
> > +		case EFDCFS:	func =3D efdcfs;		type =3D=20
> XB;	break;
> > +		case EFDCMPEQ:	func =3D efdcmpeq;	type =3D=20
> XCR;	break;
> > +		case EFDCMPGT:	func =3D efdcmpgt;	type =3D=20
> XCR;	break;
> > +		case EFDCMPLT:	func =3D efdcmplt;	type =3D=20
> XCR;	break;
> > +		case EFDCTSF:	func =3D efdctsf;		type =3D=20
> XB;	break;
> > +		case EFDCTSI:	func =3D efdctsi;		type =3D=20
> XB;	break;
> > +		case EFDCTSIDZ:	func =3D efdctsidz;	type =3D=20
> XB;	break;
> > +		case EFDCTSIZ:	func =3D efdctsiz;	type =3D=20
> XB;	break;
> > +		case EFDCTUF:	func =3D efdctuf;		type =3D=20
> XB;	break;
> > +		case EFDCTUI:	func =3D efdctui;		type =3D=20
> XB;	break;
> > +		case EFDCTUIDZ:	func =3D efdctuidz;	type =3D=20
> XB;	break;
> > +		case EFDCTUIZ:	func =3D efdctuiz;	type =3D=20
> XB;	break;
> > +		case EFDDIV:	func =3D efddiv;		type =3D=20
> AB;	break;
> > +		case EFDMUL:	func =3D efdmul;		type =3D=20
> AB;	break;
> > +		case EFDNABS:	func =3D efdnabs;		type =3D=20
> XA;	break;
> > +		case EFDNEG:	func =3D efdneg;		type =3D=20
> XA;	break;
> > +		case EFDSUB:	func =3D efdsub;		type =3D=20
> AB;	break;	=09
> > +		default:
> > +			goto illegal;
> > +		}
> > +		break;
> > +	default:
> > +		goto illegal;
> > +	}
> > +
> > +	switch (type) {
> > +	case AB:
> > +		op0 =3D &fullgprs[(speinsn >> 21) & 0x1f];
> > +		op1 =3D &fullgprs[(speinsn >> 16) & 0x1f];
> > +		op2 =3D &fullgprs[(speinsn >> 11) & 0x1f];
> > +		break;
> > +
> > +	case XA:
> > +		op0 =3D &fullgprs[(speinsn >> 21) & 0x1f];
> > +		op1 =3D &fullgprs[(speinsn >> 16) & 0x1f];
> > +		break;
> > +
> > +	case XB:
> > +		op0 =3D &fullgprs[(speinsn >> 21) & 0x1f];
> > +		op1 =3D &fullgprs[(speinsn >> 11) & 0x1f];
> > +		break;
> > +=09
> > +	case XCR:
> > +		op0 =3D (void *)&regs->ccr;
> > +		op1 =3D (void *)((speinsn >> 23) & 0x7);
> > +		op2 =3D &fullgprs[(speinsn >> 16) & 0x1f];
> > +		op3 =3D &fullgprs[(speinsn >> 11) & 0x1f];
> > +		break;
> > +
> > +	default:
> > +		goto illegal;
> > +	}
> > +
> > +	flag =3D func(op0, op1, op2, op3);
> > +=09
> > +	switch ((speinsn >> 5) & 0x7 ) {
> > +	case SPFP:
> > +		for (i =3D 0; i < 32; i++) {
> > +			regs->gpr[i] =3D fullgprs[i] >> 32;
> > +		};
> > +		break;
> > +	default:
> > +		for (i =3D 0; i < 32; i++) {
> > +			regs->gpr[i] =3D fullgprs[i];
> > +			current->thread.evr[i] =3D fullgprs[i] >> 32;
> > +		};
> > +	}
> > +=09
> > +	current->thread.spefscr &=3D 0x3f;
>=20
> can't clear spefscr this way.
>=20
> Need to do what the existing handler was doing here.

Do you mean I need to do it like this:

	unsigned long spefscr;
	spefscr =3D current->thread.spefscr;
	spefscr =3D 0x3f;
	current->spefscr =3D spefscr ;

I'm not really understand...

>=20
> > +	return 0;
> > +
> > +illegal:
> > +	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler=20
> encountered
> > un-supported instruction.\n");
> > +	return -ENOSYS;
> > +}
> > diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/=20
> > Makefile index 04d4917..1e74e15 100644
> > --- a/arch/powerpc/sysdev/Makefile
> > +++ b/arch/powerpc/sysdev/Makefile
> > @@ -13,6 +13,7 @@ obj-$(CONFIG_FSL_SOC)		+=3D fsl_soc.o
> >  obj-$(CONFIG_TSI108_BRIDGE)	+=3D tsi108_pci.o tsi108_dev.o
> >  obj-$(CONFIG_QUICC_ENGINE)	+=3D qe_lib/
> >  obj-$(CONFIG_MTD)		+=3D rom.o
> > +obj-$(CONFIG_SPE)		+=3D sigfpe_handler.o
> >
> >  ifeq ($(CONFIG_PPC_MERGE),y)
> >  obj-$(CONFIG_PPC_I8259)		+=3D i8259.o
> > --
> > 1.4.0
>=20
>=20

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  5:31 [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions ebony.zhu
  2007-01-12  6:40 ` Kumar Gala
@ 2007-01-12  9:52 ` Christoph Hellwig
  2007-01-12 10:23   ` Zhu Ebony-r57400
  2007-01-12 18:57   ` Kumar Gala
  1 sibling, 2 replies; 20+ messages in thread
From: Christoph Hellwig @ 2007-01-12  9:52 UTC (permalink / raw)
  To: ebony.zhu; +Cc: linuxppc-dev, paulus

On Fri, Jan 12, 2007 at 01:31:02PM +0800, ebony.zhu@freescale.com wrote:
> +ifeq ($(CONFIG_SPE),y)
> +ifneq ($(CONFIG_MATH_EMULATION),y)
> +core-y				+= arch/powerpc/math-emu/
> +endif
> +endif
>  core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/

This statement doesn't make any sense.  I guess you always want to
build arch/powerpc/math-emu if CONFIG_SPE is set, right?  The proper
way to do that is to force CONFIG_MATH_EMULATION in the Kconfig.

Then again we need a really good explanation why CONFIG_SPE should
force the math emulation to be built.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  9:52 ` Christoph Hellwig
@ 2007-01-12 10:23   ` Zhu Ebony-r57400
  2007-01-12 12:36     ` Segher Boessenkool
  2007-01-12 18:57   ` Kumar Gala
  1 sibling, 1 reply; 20+ messages in thread
From: Zhu Ebony-r57400 @ 2007-01-12 10:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linuxppc-dev, paulus

=20

> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@lst.de]=20
> Sent: 2007=C4=EA1=D4=C212=C8=D5 17:52
> To: Zhu Ebony-r57400
> Cc: paulus@samba.org; linuxppc-dev@ozlabs.org
> Subject: Re: [patch][5/5] powerpc: Add the general support=20
> for Embedded Floating-Point instructions
>=20
> On Fri, Jan 12, 2007 at 01:31:02PM +0800,=20
> ebony.zhu@freescale.com wrote:
> > +ifeq ($(CONFIG_SPE),y)
> > +ifneq ($(CONFIG_MATH_EMULATION),y)
> > +core-y				+=3D arch/powerpc/math-emu/
> > +endif
> > +endif
> >  core-$(CONFIG_MATH_EMULATION)	+=3D arch/powerpc/math-emu/
>=20
> This statement doesn't make any sense.  I guess you always=20
> want to build arch/powerpc/math-emu if CONFIG_SPE is set,=20
> right?  The proper way to do that is to force=20
> CONFIG_MATH_EMULATION in the Kconfig.
>=20
> Then again we need a really good explanation why CONFIG_SPE=20
> should force the math emulation to be built.
>=20
>=20

Yes, you are. If CONFIG_SPE is set, I want some files in =
arch/powerpc/math-emu be built.
The original kernel will build math emulation only if =
CONFIG_MATH_EMULATION
is set. I don't want to break it.

CONFIG_SPE doesn't force the whole math emulation to be built. If =
CONFIG_SPE is set,
only the new SPFP/DPFP/VSPFP instructions in arch/powerpc/math-emu will =
be built,
while the previous FPU instructions won't. This is controlled by =
Makefile in
arch/powerpc/math-emu

Therefore, what I'm trying to do is to build directory =
arch/powerpc/math-emu if
CONFIG_MATH_EMULATION or CONFIG_SPE is set. Do you have any idea?=20

Thanks.
Ebony

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  7:45   ` Zhu Ebony-r57400
@ 2007-01-12 11:05     ` Benjamin Herrenschmidt
  2007-01-12 18:39       ` Kumar Gala
  2007-01-15  8:06       ` [patch][5/5] powerpc: Add the general support for EmbeddedFloating-Point instructions Zhu Ebony-r57400
  2007-01-12 18:53     ` [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions Kumar Gala
  1 sibling, 2 replies; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2007-01-12 11:05 UTC (permalink / raw)
  To: Zhu Ebony-r57400; +Cc: linuxppc-dev, paulus


> > Why do we need a separate ret_from_except_spe_full?
> 
> I'm not sure if the kernel will return from exception in a multi-thread
> way, but the truth is if restoring EVRs in exsiting ret_from_except_full,
> some non-SPE exception will try to retore EVR. At that time, the
> MSR[SPE] may not be enabled, which will cause error.

Hrm... you can restore them before returning from the exception if you
are careful about doing that with preempt/irqs off I suppose.

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 10:23   ` Zhu Ebony-r57400
@ 2007-01-12 12:36     ` Segher Boessenkool
  2007-01-15  7:58       ` Zhu Ebony-r57400
  0 siblings, 1 reply; 20+ messages in thread
From: Segher Boessenkool @ 2007-01-12 12:36 UTC (permalink / raw)
  To: Zhu Ebony-r57400; +Cc: linuxppc-dev, paulus

> CONFIG_SPE doesn't force the whole math emulation to be built. If 
> CONFIG_SPE is set,
> only the new SPFP/DPFP/VSPFP instructions in arch/powerpc/math-emu 
> will be built,
> while the previous FPU instructions won't.

And the other way around I suppose.

> Therefore, what I'm trying to do is to build directory 
> arch/powerpc/math-emu if
> CONFIG_MATH_EMULATION or CONFIG_SPE is set. Do you have any idea?

Maybe you should really have a separate CONFIG_SPE_MATH_EMU?
Or that might complicate things more than it solves, dunno.


Segher

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 11:05     ` Benjamin Herrenschmidt
@ 2007-01-12 18:39       ` Kumar Gala
  2007-01-12 20:52         ` Benjamin Herrenschmidt
  2007-01-15  8:06       ` [patch][5/5] powerpc: Add the general support for EmbeddedFloating-Point instructions Zhu Ebony-r57400
  1 sibling, 1 reply; 20+ messages in thread
From: Kumar Gala @ 2007-01-12 18:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, paulus


On Jan 12, 2007, at 5:05 AM, Benjamin Herrenschmidt wrote:

>
>>> Why do we need a separate ret_from_except_spe_full?
>>
>> I'm not sure if the kernel will return from exception in a multi- 
>> thread
>> way, but the truth is if restoring EVRs in exsiting  
>> ret_from_except_full,
>> some non-SPE exception will try to retore EVR. At that time, the
>> MSR[SPE] may not be enabled, which will cause error.
>
> Hrm... you can restore them before returning from the exception if you
> are careful about doing that with preempt/irqs off I suppose.

Which is what I thought having the exception be EXC_XFER_EE and  
flush_spe_to_thread should get you.

I dont see any reason to dirty up the 'exception' path for stuff we  
can do in C code.

- k

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  7:45   ` Zhu Ebony-r57400
  2007-01-12 11:05     ` Benjamin Herrenschmidt
@ 2007-01-12 18:53     ` Kumar Gala
  2007-01-15  7:48       ` Zhu Ebony-r57400
  1 sibling, 1 reply; 20+ messages in thread
From: Kumar Gala @ 2007-01-12 18:53 UTC (permalink / raw)
  To: Zhu Ebony-r57400; +Cc: linuxppc-dev, paulus


On Jan 12, 2007, at 1:45 AM, Zhu Ebony-r57400 wrote:

> Hi Kumar
>
> Please see my inline comments.
>
> Ebony
>
>> -----Original Message-----
>> From: Kumar Gala [mailto:galak@kernel.crashing.org]
>> Sent: 2007=C4=EA1=D4=C212=C8=D5 14:41
>> To: Zhu Ebony-r57400
>> Cc: paulus@samba.org; linuxppc-dev@ozlabs.org
>> Subject: Re: [patch][5/5] powerpc: Add the general support
>> for Embedded Floating-Point instructions
>>
>>
>> On Jan 11, 2007, at 11:31 PM, ebony.zhu@freescale.com wrote:
>>
>>> Add the general support for Embedded Floating-Point instructions to
>>> fully comply with IEEE-754.
>>>
>>> Signed-off-by:Ebony Zhu <ebony.zhu@freescale.com>
>>> ---
>>>  arch/powerpc/Makefile                |    5 +
>>>  arch/powerpc/kernel/entry_32.S       |   50 ++++++
>>>  arch/powerpc/kernel/head_booke.h     |    4
>>>  arch/powerpc/kernel/head_fsl_booke.S |   22 ++-
>>>  arch/powerpc/kernel/traps.c          |   17 ++
>>>  arch/powerpc/math-emu/Makefile       |   29 ++-
>>>  arch/powerpc/math-emu/sfp-machine.h  |    4
>>>  arch/powerpc/sysdev/sigfpe_handler.c |  298 +++++++++++++++++++++++
>>> +++++++++++
>>>  arch/powerpc/sysdev/Makefile         |    1
>>>  9 files changed, 421 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index
>>> a00fe72..dd0b4b8 100644
>>> --- a/arch/powerpc/Makefile
>>> +++ b/arch/powerpc/Makefile
>>> @@ -134,6 +134,11 @@ core-y				+=3D
>> arch/powerpc/kernel/ \
>>>  				   arch/powerpc/lib/ \
>>>  				   arch/powerpc/sysdev/ \
>>>  				   arch/powerpc/platforms/
>>> +ifeq ($(CONFIG_SPE),y)
>>> +ifneq ($(CONFIG_MATH_EMULATION),y)
>>> +core-y				+=3D arch/powerpc/math-emu/
>>> +endif
>>> +endif
>>>  core-$(CONFIG_MATH_EMULATION)	+=3D arch/powerpc/math-emu/
>>>  core-$(CONFIG_XMON)		+=3D arch/powerpc/xmon/
>>>
>>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/
>>> entry_32.S index c03e829..a3d4ece 100644
>>> --- a/arch/powerpc/kernel/entry_32.S
>>> +++ b/arch/powerpc/kernel/entry_32.S
>>> @@ -616,6 +616,56 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
>>>  	.long	ret_from_except
>>>  #endif
>>>
>>> +#ifdef CONFIG_SPE
>>> +	.globl  ret_from_except_spe_full
>>> +ret_from_except_spe_full:
>>> +	REST_NVGPRS(r1)
>>> +	/* fall through */
>>> +	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
>>> +	SYNC			/* Some chip revs have problems
>> here... */
>>> +	MTMSRD(r10)		/* disable interrupts */
>>> +
>>> +	lwz     r0,THREAD+THREAD_SPEFSCR(r2)
>>> +	mtspr   SPRN_SPEFSCR,r0		/* restore SPEFSCR reg */
>>> +
>>> +	lwz     r0,GPR0(r1)
>>> +	lwz     r2,GPR2(r1)
>>> +	REST_4GPRS(3, r1)
>>> +	REST_2GPRS(7, r1)
>>> +=09
>>> +	lwz     r10,_XER(r1)
>>> +	lwz     r11,_CTR(r1)
>>> +	mtspr   SPRN_XER,r10
>>> +	mtctr   r11
>>> +
>>> +	stwcx.  r0,0,r1                 /* to clear the reservation */
>>> +
>>> +	lwz     r11,_LINK(r1)
>>> +	mtlr    r11
>>> +	lwz     r10,_CCR(r1)
>>> +	mtcrf   0xff,r10
>>> +	REST_2GPRS(9, r1)
>>> +
>>> +	mtspr   SPRN_SPRG0,r11
>>> +	mtspr   SPRN_SPRG1,r12
>>> +	mfmsr   r11
>>> +	oris  r11, r11, MSR_SPE@h
>>> +	mtmsr   r11
>>> +	mfspr   r12,SPRN_SPRG3
>>> +	REST_32EVRS(0, r11,r12)
>>> +	mfspr   r11,SPRN_SPRG0
>>> +	mfspr   r12,SPRN_SPRG1
>>> +
>>> +	lwz     r11,_NIP(r1)
>>> +	lwz     r12,_MSR(r1)
>>> +	mtspr   SPRN_SRR0,r11
>>> +	mtspr   SPRN_SRR1,r12
>>> +	REST_2GPRS(11, r1)
>>> +	lwz     r1,GPR1(r1)
>>> +
>>> +	rfi
>>> +	b	.		/* prevent prefetch past rfi */
>>
>> Why do we need a separate ret_from_except_spe_full?
>
> I'm not sure if the kernel will return from exception in a multi-=20
> thread
> way, but the truth is if restoring EVRs in exsiting =20
> ret_from_except_full,
> some non-SPE exception will try to retore EVR. At that time, the
> MSR[SPE] may not be enabled, which will cause error.

Well, with EE disabled nothing is going to interrupt the exception =20
thread, so you can do all this in C code.

>>> +#endif
>>>  	.globl	ret_from_except_full
>>>  ret_from_except_full:
>>>  	REST_NVGPRS(r1)
>>> diff --git a/arch/powerpc/kernel/head_booke.h
>> b/arch/powerpc/kernel/
>>> head_booke.h index 8536e76..1e14d3e 100644
>>> --- a/arch/powerpc/kernel/head_booke.h
>>> +++ b/arch/powerpc/kernel/head_booke.h
>>> @@ -195,6 +195,10 @@ #define EXC_XFER_EE(n, hdlr)		\
>>>  	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE,
>>> transfer_to_handler_full, \
>>>  			  ret_from_except_full)
>>>
>>> +#define EXC_XFER_EE_SPE(n, hdlr)	\
>>> +	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE,
>>> transfer_to_handler_full, \
>>> +			  ret_from_except_spe_full)
>>> +
>>>  #define EXC_XFER_EE_LITE(n, hdlr)	\
>>>  	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE,
>>> transfer_to_handler, \
>>>  			  ret_from_except)
>>> diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/
>>> kernel/head_fsl_booke.S index 66877bd..56200b6 100644
>>> --- a/arch/powerpc/kernel/head_fsl_booke.S
>>> +++ b/arch/powerpc/kernel/head_fsl_booke.S
>>> @@ -699,7 +699,25 @@ #endif /* CONFIG_SPE */
>>>
>>>  	/* SPE Floating Point Data */
>>>  #ifdef CONFIG_SPE
>>> -	EXCEPTION(0x2030, SPEFloatingPointData,
>>> SPEFloatingPointException, EXC_XFER_EE);
>>> +	START_EXCEPTION(SPEFloatingPointData)
>>> +	mtspr   SPRN_SPRG0,r3
>>> +	mtspr   SPRN_SPRG1,r4
>>> +	mfmsr   r3
>>> +	oris    r3, r3, MSR_SPE@h
>>> +	mtmsr   r3
>>> +	mfspr   r3, SPRN_SRR0
>>> +	lwz     r3, 0(r3)
>>> +	lis     r4, speinsn@ha
>>> +	stw     r3, speinsn@l(r4)
>>> +	mfspr   r4, SPRN_SPRG3
>>> +	mfspr   r3, SPRN_SPEFSCR
>>> +	stw     r3, THREAD_SPEFSCR(r4)
>>> +	SAVE_32EVRS(0, r3, r4)
>>> +	mfspr   r3, SPRN_SPRG0
>>> +	mfspr   r4, SPRN_SPRG1
>>> +	NORMAL_EXCEPTION_PROLOG
>>> +	addi    r3,r1,STACK_FRAME_OVERHEAD
>>> +	EXC_XFER_EE_SPE(0x2030, SPEFloatingPointException)
>>
>> Why do we have to do any of this in the exception path?
>>
>> SPEFloatingPointException can have something like:
>>
>> 	unsigned long pc =3D regs->nip;
>> 	flush_spe_to_thread(current);
>>
>> 	if (get_user(insn, (u32 *)pc))
>> 		return -EFAULT;
>>
>> 	...
>>
>
> Since I want to save EVRs before entering the =20
> SPEFloatingPointException. Thanks
> for providing an alternative way, I can try to see if it works.
>
>>>  #else
>>>  	EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception,
>>> EXC_XFER_EE)
>>>  #endif /* CONFIG_SPE */
>>> @@ -840,6 +858,8 @@ load_up_spe:
>>>  	oris	r5,r5,MSR_SPE@h
>>>  	mtmsr	r5			/* enable use of SPE now */
>>>  	isync
>>> +	li      r5,(SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE |
>>> SPEFSCR_FOVFE)
>>> +	mtspr   SPRN_SPEFSCR,r5
>>
>> If you want to initialize spefscr so the enables are set do
>> it in INIT_THREAD.  I need to think more about if we should
>> even be doing that in the kernel.
>
> Ok, let's keep discussing this.
>
>>
>>>  /*
>>>   * For SMP, we don't do lazy SPE switching because it just gets too
>>>   * horrendously complex, especially when a task switches
>> from one CPU
>>> diff --git a/arch/powerpc/kernel/traps.c
>> b/arch/powerpc/kernel/traps.c
>>> index 535f506..68407d4 100644
>>> --- a/arch/powerpc/kernel/traps.c
>>> +++ b/arch/powerpc/kernel/traps.c
>>> @@ -986,6 +986,22 @@ #endif /* CONFIG_FSL_BOOKE */  #ifdef
>> CONFIG_SPE
>>> void SPEFloatingPointException(struct pt_regs *regs)  {
>>> +#ifdef CONFIG_E500
>>> +	extern int sigfpe_handler(struct pt_regs *regs);
>>> +	int err;
>>> +	if (current->thread.spefscr & ~0x3f) {
>>> +		err =3D sigfpe_handler(regs);
>>> +		if (err =3D=3D 0) {
>>> +			regs->nip +=3D 4;
>>> +			return;
>>> +		} else {
>>> +			current->thread.spefscr =3D 0x0;
>>
>> Why do clear spefscr and re-execute?
>
> I tried to make the code more robust here. Currently, all the SPE =20
> instructions
> that may cause execption are handled. But someday if the =20
> instruction set is extended and
> not supported by software, this code can make sure we handle it =20
> with powerpc
> default value, which won't make kernel crash.

Lets worry about that when it occurs, its probably better that the =20
apps crash and we fixup the kernel when it happens.

>>> +			return;
>>> +		}
>>> +	} else {
>>> +		return;
>>
>> How else would we get here if not by having spefscr bit set?
>
> As above, I want to make it more robust. If no unexpected condition =20=

> would happen, we can
> remove this off.

Lets remove it since it shouldn't happen

>>> +	}
>>> +#else
>>>  	unsigned long spefscr;
>>>  	int fpexc_mode;
>>>  	int code =3D 0;
>>> @@ -1016,6 +1032,7 @@ void SPEFloatingPointException(struct pt
>>>
>>>  	_exception(SIGFPE, regs, code, regs->nip);
>>>  	return;
>>
>> Your code should supersede the handling code that's currently there.
>
> OK, I agree.
>
>>
>>> +#endif
>>>  }
>>>  #endif
>>>
>>> diff --git a/arch/powerpc/math-emu/Makefile
>> b/arch/powerpc/math-emu/
>>> Makefile index 29bc912..dbb3e26 100644
>>> --- a/arch/powerpc/math-emu/Makefile
>>> +++ b/arch/powerpc/math-emu/Makefile
>>> @@ -1,16 +1,29 @@
>>>
>>> -obj-y				:=3D math.o fmr.o lfd.o stfd.o
>>> -
>>> -obj-$(CONFIG_MATH_EMULATION)	+=3D fabs.o fadd.o
>> fadds.o fcmpo.o
>>> fcmpu.o \
>>> -					fctiw.o fctiwz.o fdiv.o
>> fdivs.o \
>>> +obj-y				:=3D fabs.o fadd.o fdiv.o fmul.o =
\
>>> +					fneg.o fsub.o types.o
>> udivmodti4.o
>>> +				=09
>>> +obj-$(CONFIG_MATH_EMULATION)	+=3D math.o fmr.o lfd.o stfd.o \
>>> +					fadds.o fcmpo.o fcmpu.o \
>>> +					fctiw.o fctiwz.o fdivs.o \
>>>  					fmadd.o fmadds.o
>> fmsub.o fmsubs.o \
>>> -					fmul.o fmuls.o fnabs.o
>> fneg.o types.o \
>>> +					fmuls.o fnabs.o \
>>>  					fnmadd.o fnmadds.o
>> fnmsub.o fnmsubs.o \
>>>  					fres.o frsp.o frsqrte.o
>> fsel.o lfs.o \
>>> -					fsqrt.o	fsqrts.o fsub.o
>> fsubs.o \
>>> +					fsqrt.o	fsqrts.o fsubs.o \
>>>  					mcrfs.o mffs.o mtfsb0.o
>> mtfsb1.o \
>>> -					mtfsf.o mtfsfi.o
>> stfiwx.o stfs.o \
>>> -					udivmodti4.o
>>> +					mtfsf.o mtfsfi.o stfiwx.o stfs.o
>>> +
>>> +obj-$(CONFIG_SPE)		+=3D efsabs.o efsadd.o efscfd.o
>> efscmpeq.o \
>>> +					efscmpgt.o efscmplt.o
>> efsctsf.o efsctsi.o \
>>> +					efsctsiz.o efsctuf.o
>> efsctui.o efsctuiz.o \
>>> +					efsdiv.o efsmul.o
>> efsnabs.o efsneg.o efssub.o \
>>> +					evfsabs.o evfsadd.o
>> evfscmpeq.o evfscmpgt.o \
>>> +					evfscmplt.o evfsctsf.o
>> evfsctsi.o evfsctsiz.o \
>>> +					evfsctuf.o evfsctui.o
>> evfsctuiz.o evfsdiv.o \
>>> +					evfsmul.o evfsnabs.o
>> evfsneg.o evfssub.o \
>>> +					efdcfs.o efdcmpeq.o
>> efdcmpgt.o efdcmplt.o efdctsf.o \
>>> +					efdctsi.o efdctsidz.o
>> efdctsiz.o efdctuf.o \
>>> +					efdctui.o efdctuidz.o
>> efdctuiz.o efdnabs.o
>>>
>>>  CFLAGS_fabs.o =3D -fno-builtin-fabs
>>>  CFLAGS_math.o =3D -fno-builtin-fabs
>>> diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/
>>> math-emu/sfp-machine.h index 4b17d83..313734d 100644
>>> --- a/arch/powerpc/math-emu/sfp-machine.h
>>> +++ b/arch/powerpc/math-emu/sfp-machine.h
>>> @@ -166,7 +166,11 @@ #define __FP_PACK_RAW_2(fs, val, X)=09
>> 		\
>>>  #include <linux/kernel.h>
>>>  #include <linux/sched.h>
>>>
>>> +#ifdef CONFIG_SPE
>>> +#define __FPU_FPSCR	(current->thread.spefscr)
>>> +#else
>>>  #define __FPU_FPSCR	(current->thread.fpscr.val)
>>> +#endif
>>>
>>>  /* We only actually write to the destination register
>>>   * if exceptions signalled (if any) will not trap.
>>> diff --git a/arch/powerpc/sysdev/sigfpe_handler.c b/arch/powerpc/
>>> sysdev/sigfpe_handler.c new file mode 100644 index 0000000..6e809b2
>>> --- /dev/null
>>> +++ b/arch/powerpc/sysdev/sigfpe_handler.c
>>> @@ -0,0 +1,298 @@
>>> +/*
>>> + * arch/powerpc/sysdev/sigfpe_handler.c
>>> + *
>>> + * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights
>>> reserved.
>>> + *
>>> + * Author: Ebony Zhu, ebony.zhu@freescale.com
>>> + *
>>> + * Derived from arch/powerpc/math-emu/math.c
>>> + * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
>>> + *
>>> + * Description:
>>> + * This file is the exception handler to make E500 SPE instructions
>>> + * fully comply with IEEE-754 floating point standard.
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU General Public License
>>> + * as published by the Free Software Foundation; either version
>>> + * 2 of the License, or (at your option) any later version.
>>> + */
>>> +
>>> +#include <linux/types.h>
>>> +
>>> +#include <asm/uaccess.h>
>>> +#include <asm/reg.h>
>>> +
>>> +#define SPEFUNC(x)	extern int x(void *, void *, void *, void *)
>>> +#define efdabs	fabs
>>> +#define efdadd	fadd
>>> +#define efdsub	fsub
>>> +#define efddiv	fdiv
>>> +#define efdmul	fmul
>>> +#define efdneg	fneg
>>> +
>>> +/* Scalar SPFP functions */
>>> +SPEFUNC(efsabs);
>>> +SPEFUNC(efsadd);
>>> +SPEFUNC(efscfd);
>>> +SPEFUNC(efscmpeq);
>>> +SPEFUNC(efscmpgt);
>>> +SPEFUNC(efscmplt);
>>> +SPEFUNC(efsctsf);
>>> +SPEFUNC(efsctsi);
>>> +SPEFUNC(efsctsiz);
>>> +SPEFUNC(efsctuf);
>>> +SPEFUNC(efsctui);
>>> +SPEFUNC(efsctuiz);
>>> +SPEFUNC(efsdiv);
>>> +SPEFUNC(efsmul);
>>> +SPEFUNC(efsnabs);
>>> +SPEFUNC(efsneg);
>>> +SPEFUNC(efssub);
>>> +
>>> +/* Vector Floating-Point functions */ SPEFUNC(evfsabs);
>>> +SPEFUNC(evfsadd); SPEFUNC(evfscmpeq); SPEFUNC(evfscmpgt);
>>> +SPEFUNC(evfscmplt); SPEFUNC(evfsctsf); SPEFUNC(evfsctsi);
>>> +SPEFUNC(evfsctsiz); SPEFUNC(evfsctuf); SPEFUNC(evfsctui);
>>> +SPEFUNC(evfsctuiz); SPEFUNC(evfsdiv); SPEFUNC(evfsmul);
>>> +SPEFUNC(evfsnabs); SPEFUNC(evfsneg); SPEFUNC(evfssub);
>>> +
>>> +/* Scalar DPFP functions */
>>> +SPEFUNC(efdabs);
>>> +SPEFUNC(efdadd);
>>> +SPEFUNC(efdcfs);
>>> +SPEFUNC(efdcmpeq);
>>> +SPEFUNC(efdcmpgt);
>>> +SPEFUNC(efdcmplt);
>>> +SPEFUNC(efdctsf);
>>> +SPEFUNC(efdctsi);
>>> +SPEFUNC(efdctsidz);
>>> +SPEFUNC(efdctsiz);
>>> +SPEFUNC(efdctuf);
>>> +SPEFUNC(efdctui);
>>> +SPEFUNC(efdctuidz);
>>> +SPEFUNC(efdctuiz);
>>> +SPEFUNC(efddiv);
>>> +SPEFUNC(efdmul);
>>> +SPEFUNC(efdnabs);
>>> +SPEFUNC(efdneg);
>>> +SPEFUNC(efdsub);
>>> +
>>> +#define VCT		0x4
>>
>> not used?
>
> Currently not. We can remove it then.
>
>
>>
>>> +#define SPFP		0x6
>>> +#define DPFP		0x7
>>> +#define EFAPU		0x4
>>> +
>>> +#define EFSADD		0x2c0
>>> +#define EFSSUB		0x2c1
>>> +#define EFSABS		0x2c4
>>> +#define EFSNABS		0x2c5
>>> +#define EFSNEG		0x2c6
>>> +#define EFSMUL		0x2c8
>>> +#define EFSDIV		0x2c9
>>> +#define EFSCMPGT	0x2cc
>>> +#define EFSCMPLT	0x2cd
>>> +#define EFSCMPEQ	0x2ce
>>> +#define EFSCFD		0x2cf
>>> +#define EFSCTUI		0x2d4
>>> +#define EFSCTSI		0x2d5
>>> +#define EFSCTUF		0x2d6
>>> +#define EFSCTSF		0x2d7
>>> +#define EFSCTUIZ	0x2d8
>>> +#define EFSCTSIZ	0x2da
>>> +
>>> +#define EVFSADD		0x280
>>> +#define EVFSSUB		0x281
>>> +#define EVFSABS		0x284
>>> +#define EVFSNABS	0x285
>>> +#define EVFSNEG		0x286
>>> +#define EVFSMUL		0x288
>>> +#define EVFSDIV		0x289
>>> +#define EVFSCMPGT	0x28c
>>> +#define EVFSCMPLT	0x28d
>>> +#define EVFSCMPEQ	0x28e
>>> +#define EVFSCTUI	0x294
>>> +#define EVFSCTSI	0x295
>>> +#define EVFSCTUF	0x296
>>> +#define EVFSCTSF	0x297
>>> +#define EVFSCTUIZ	0x298
>>> +#define EVFSCTSIZ	0x29a
>>> +
>>> +#define EFDADD		0x2e0
>>> +#define EFDSUB		0x2e1
>>> +#define EFDABS		0x2e4
>>> +#define EFDNABS		0x2e5
>>> +#define EFDNEG		0x2e6
>>> +#define EFDMUL		0x2e8
>>> +#define EFDDIV		0x2e9
>>> +#define EFDCTUIDZ	0x2ea
>>> +#define EFDCTSIDZ	0x2eb
>>> +#define EFDCMPGT	0x2ec
>>> +#define EFDCMPLT	0x2ed
>>> +#define EFDCMPEQ	0x2ee
>>> +#define EFDCFS		0x2ef
>>> +#define EFDCTUI		0x2f4
>>> +#define EFDCTSI		0x2f5
>>> +#define EFDCTUF		0x2f6
>>> +#define EFDCTSF		0x2f7
>>> +#define EFDCTUIZ	0x2f8
>>> +#define EFDCTSIZ	0x2fa
>>> +
>>> +#define AB	2
>>> +#define XA	3
>>> +#define XB	4
>>> +#define XCR	5=09
>>> +
>>> +static u64 fullgprs[32];
>>> +u32 speinsn;
>>> +
>>> +int
>>> +sigfpe_handler(struct pt_regs *regs)
>>> +{
>>> +	void *op0 =3D 0, *op1 =3D 0, *op2 =3D 0, *op3 =3D 0;
>>> +	int i;
>>> +	int (*func)(void *, void *, void *, void *);
>>> +	int type =3D 0;
>>> +	int flag;
>>> +=09
>>> +	switch ((speinsn >> 5) & 0x7 ) {
>>> +	case SPFP:
>>> +		for(i =3D 0; i < 32; i++) {
>>> +			fullgprs[i] =3D regs->gpr[i];
>>> +			fullgprs[i] =3D fullgprs[i] << 32 |
>> current->thread.evr[i];
>>> +		};
>>> +		break;
>>> +	default:
>>> +		for(i =3D 0; i < 32; i++) {
>>> +			fullgprs[i] =3D current->thread.evr[i];
>>> +			fullgprs[i] =3D (fullgprs[i] << 32) |
>> (regs->gpr[i]);
>>> +		};
>>> +	}
>>> +
>>> +	switch (speinsn >> 26) {
>>> +=09
>>> +	case EFAPU:
>>> +		switch (speinsn & 0x7ff) {
>>> +		case EFSABS:	func =3D efsabs;		type =3D
>> XA;	break;
>>> +		case EFSADD:	func =3D efsadd;		type =3D
>> AB;      break;
>>> +		case EFSCFD:	func =3D efscfd;		type =3D
>> XB;	break;
>>> +		case EFSCMPEQ:	func =3D efscmpeq;	type =3D
>> XCR;	break;
>>> +		case EFSCMPGT:	func =3D efscmpgt;	type =3D
>> XCR;	break;
>>> +		case EFSCMPLT:	func =3D efscmplt;	type =3D
>> XCR;	break;
>>> +		case EFSCTSF:	func =3D efsctsf;		type =3D
>> XB;	break;
>>> +		case EFSCTSI:	func =3D efsctsi;		type =3D
>> XB;	break;
>>> +		case EFSCTSIZ:	func =3D efsctsiz;	type =3D
>> XB;	break;
>>> +		case EFSCTUF:	func =3D efsctuf;		type =3D
>> XB;	break;
>>> +		case EFSCTUI:	func =3D efsctui;		type =3D
>> XB;	break;
>>> +		case EFSCTUIZ:	func =3D efsctuiz;	type =3D
>> XB;	break;
>>> +		case EFSDIV:	func =3D efsdiv;		type =3D
>> AB;	break;
>>> +		case EFSMUL:	func =3D efsmul;		type =3D
>> AB;	break;
>>> +		case EFSNABS:	func =3D efsnabs;		type =3D
>> XA;	break;
>>> +		case EFSNEG:	func =3D efsneg;		type =3D
>> XA;	break;
>>> +		case EFSSUB:	func =3D efssub;		type =3D
>> AB;	break;
>>> +
>>> +		case EVFSABS:	func =3D evfsabs;		type =3D
>> XA;	break;
>>> +		case EVFSADD:	func =3D evfsadd;		type =3D
>> AB;      break;
>>> +		case EVFSCMPEQ:	func =3D evfscmpeq;	type =3D
>> XCR;	break;
>>> +		case EVFSCMPGT:	func =3D evfscmpgt;	type =3D
>> XCR;	break;
>>> +		case EVFSCMPLT:	func =3D evfscmplt;	type =3D
>> XCR;	break;
>>> +		case EVFSCTSF:	func =3D evfsctsf;	type =3D
>> XB;	break;
>>> +		case EVFSCTSI:	func =3D evfsctsi;	type =3D
>> XB;	break;
>>> +		case EVFSCTSIZ:	func =3D evfsctsiz;	type =3D
>> XB;	break;
>>> +		case EVFSCTUF:	func =3D evfsctuf;	type =3D
>> XB;	break;
>>> +		case EVFSCTUI:	func =3D evfsctui;	type =3D
>> XB;	break;
>>> +		case EVFSCTUIZ:	func =3D evfsctuiz;	type =3D
>> XB;	break;
>>> +		case EVFSDIV:	func =3D evfsdiv;		type =3D
>> AB;	break;
>>> +		case EVFSMUL:	func =3D evfsmul;		type =3D
>> AB;	break;
>>> +		case EVFSNABS:	func =3D evfsnabs;	type =3D
>> XA;	break;
>>> +		case EVFSNEG:	func =3D evfsneg;		type =3D
>> XA;	break;
>>> +		case EVFSSUB:	func =3D evfssub;		type =3D
>> AB;	break;
>>> +
>>> +		case EFDABS:	func =3D efdabs;		type =3D
>> XA;	break;
>>> +		case EFDADD:	func =3D efdadd;		type =3D
>> AB;	break;
>>> +		case EFDCFS:	func =3D efdcfs;		type =3D
>> XB;	break;
>>> +		case EFDCMPEQ:	func =3D efdcmpeq;	type =3D
>> XCR;	break;
>>> +		case EFDCMPGT:	func =3D efdcmpgt;	type =3D
>> XCR;	break;
>>> +		case EFDCMPLT:	func =3D efdcmplt;	type =3D
>> XCR;	break;
>>> +		case EFDCTSF:	func =3D efdctsf;		type =3D
>> XB;	break;
>>> +		case EFDCTSI:	func =3D efdctsi;		type =3D
>> XB;	break;
>>> +		case EFDCTSIDZ:	func =3D efdctsidz;	type =3D
>> XB;	break;
>>> +		case EFDCTSIZ:	func =3D efdctsiz;	type =3D
>> XB;	break;
>>> +		case EFDCTUF:	func =3D efdctuf;		type =3D
>> XB;	break;
>>> +		case EFDCTUI:	func =3D efdctui;		type =3D
>> XB;	break;
>>> +		case EFDCTUIDZ:	func =3D efdctuidz;	type =3D
>> XB;	break;
>>> +		case EFDCTUIZ:	func =3D efdctuiz;	type =3D
>> XB;	break;
>>> +		case EFDDIV:	func =3D efddiv;		type =3D
>> AB;	break;
>>> +		case EFDMUL:	func =3D efdmul;		type =3D
>> AB;	break;
>>> +		case EFDNABS:	func =3D efdnabs;		type =3D
>> XA;	break;
>>> +		case EFDNEG:	func =3D efdneg;		type =3D
>> XA;	break;
>>> +		case EFDSUB:	func =3D efdsub;		type =3D
>> AB;	break;	=09
>>> +		default:
>>> +			goto illegal;
>>> +		}
>>> +		break;
>>> +	default:
>>> +		goto illegal;
>>> +	}
>>> +
>>> +	switch (type) {
>>> +	case AB:
>>> +		op0 =3D &fullgprs[(speinsn >> 21) & 0x1f];
>>> +		op1 =3D &fullgprs[(speinsn >> 16) & 0x1f];
>>> +		op2 =3D &fullgprs[(speinsn >> 11) & 0x1f];
>>> +		break;
>>> +
>>> +	case XA:
>>> +		op0 =3D &fullgprs[(speinsn >> 21) & 0x1f];
>>> +		op1 =3D &fullgprs[(speinsn >> 16) & 0x1f];
>>> +		break;
>>> +
>>> +	case XB:
>>> +		op0 =3D &fullgprs[(speinsn >> 21) & 0x1f];
>>> +		op1 =3D &fullgprs[(speinsn >> 11) & 0x1f];
>>> +		break;
>>> +=09
>>> +	case XCR:
>>> +		op0 =3D (void *)&regs->ccr;
>>> +		op1 =3D (void *)((speinsn >> 23) & 0x7);
>>> +		op2 =3D &fullgprs[(speinsn >> 16) & 0x1f];
>>> +		op3 =3D &fullgprs[(speinsn >> 11) & 0x1f];
>>> +		break;
>>> +
>>> +	default:
>>> +		goto illegal;
>>> +	}
>>> +
>>> +	flag =3D func(op0, op1, op2, op3);
>>> +=09
>>> +	switch ((speinsn >> 5) & 0x7 ) {
>>> +	case SPFP:
>>> +		for (i =3D 0; i < 32; i++) {
>>> +			regs->gpr[i] =3D fullgprs[i] >> 32;
>>> +		};
>>> +		break;
>>> +	default:
>>> +		for (i =3D 0; i < 32; i++) {
>>> +			regs->gpr[i] =3D fullgprs[i];
>>> +			current->thread.evr[i] =3D fullgprs[i] >> 32;
>>> +		};
>>> +	}
>>> +=09
>>> +	current->thread.spefscr &=3D 0x3f;
>>
>> can't clear spefscr this way.
>>
>> Need to do what the existing handler was doing here.
>
> Do you mean I need to do it like this:
>
> 	unsigned long spefscr;
> 	spefscr =3D current->thread.spefscr;
> 	spefscr =3D 0x3f;
> 	current->spefscr =3D spefscr ;
>
> I'm not really understand...

What I meant is the existing handler uses the thread.fpexc_mode to =20
keep track of the "exception" flags that an application may want to =20
enable as part of the STND-C fpenv support (find a copy of the ANSI =20
spec and read up on Floating-point Environment).  The std c-lib =20
provides functions like fesetexceptflag(), fetestexcept(), fesetround=20
(), etc..

Some of this will end up calling into set_fpexc_mode() in the kernel =20
via the prctl system call.

The idea was we use thread.fpexc_mode to keep track of what the C =20
runtime env wants the flags to be, than in the exception handler we =20
can use those flags to decide if we should raise an software SIGFPE =20
exception on the process.

So when you emulate the instructions you need to handle determining =20
if a software SIGFPE should be sent or not.

I hope that makes sense.

- k

>>> +	return 0;
>>> +
>>> +illegal:
>>> +	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler
>> encountered
>>> un-supported instruction.\n");
>>> +	return -ENOSYS;
>>> +}
>>> diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/
>>> Makefile index 04d4917..1e74e15 100644
>>> --- a/arch/powerpc/sysdev/Makefile
>>> +++ b/arch/powerpc/sysdev/Makefile
>>> @@ -13,6 +13,7 @@ obj-$(CONFIG_FSL_SOC)		+=3D fsl_soc.o
>>>  obj-$(CONFIG_TSI108_BRIDGE)	+=3D tsi108_pci.o tsi108_dev.o
>>>  obj-$(CONFIG_QUICC_ENGINE)	+=3D qe_lib/
>>>  obj-$(CONFIG_MTD)		+=3D rom.o
>>> +obj-$(CONFIG_SPE)		+=3D sigfpe_handler.o
>>>
>>>  ifeq ($(CONFIG_PPC_MERGE),y)
>>>  obj-$(CONFIG_PPC_I8259)		+=3D i8259.o
>>> --
>>> 1.4.0
>>
>>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12  9:52 ` Christoph Hellwig
  2007-01-12 10:23   ` Zhu Ebony-r57400
@ 2007-01-12 18:57   ` Kumar Gala
  1 sibling, 0 replies; 20+ messages in thread
From: Kumar Gala @ 2007-01-12 18:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linuxppc-dev, paulus


On Jan 12, 2007, at 3:52 AM, Christoph Hellwig wrote:

> On Fri, Jan 12, 2007 at 01:31:02PM +0800, ebony.zhu@freescale.com  
> wrote:
>> +ifeq ($(CONFIG_SPE),y)
>> +ifneq ($(CONFIG_MATH_EMULATION),y)
>> +core-y				+= arch/powerpc/math-emu/
>> +endif
>> +endif
>>  core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/
>
> This statement doesn't make any sense.  I guess you always want to
> build arch/powerpc/math-emu if CONFIG_SPE is set, right?  The proper
> way to do that is to force CONFIG_MATH_EMULATION in the Kconfig.
>
> Then again we need a really good explanation why CONFIG_SPE should
> force the math emulation to be built.

I'm a little confused if there is a question about the approach  
taken?  Are you asking is there some way of doing the exception  
handling w/o fully emulating the instruction?

Agree that the build system modifications in this patch set need some  
fixing, just trying to understand if there is a larger design query  
here or not?

- k

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 18:39       ` Kumar Gala
@ 2007-01-12 20:52         ` Benjamin Herrenschmidt
  2007-01-12 21:18           ` Kumar Gala
  0 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2007-01-12 20:52 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, paulus

On Fri, 2007-01-12 at 12:39 -0600, Kumar Gala wrote:
> On Jan 12, 2007, at 5:05 AM, Benjamin Herrenschmidt wrote:
> 
> >
> >>> Why do we need a separate ret_from_except_spe_full?
> >>
> >> I'm not sure if the kernel will return from exception in a multi- 
> >> thread
> >> way, but the truth is if restoring EVRs in exsiting  
> >> ret_from_except_full,
> >> some non-SPE exception will try to retore EVR. At that time, the
> >> MSR[SPE] may not be enabled, which will cause error.
> >
> > Hrm... you can restore them before returning from the exception if you
> > are careful about doing that with preempt/irqs off I suppose.
> 
> Which is what I thought having the exception be EXC_XFER_EE and  
> flush_spe_to_thread should get you.
> 
> I dont see any reason to dirty up the 'exception' path for stuff we  
> can do in C code.

Well, doing that means that you will flush the SPE to the thread struct
and disable it, return to userland with SPE disabled, and right away
take a new exception as soon as the next SPE instruction is reached.
Maybe not optimal...

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 20:52         ` Benjamin Herrenschmidt
@ 2007-01-12 21:18           ` Kumar Gala
  2007-01-12 21:27             ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 20+ messages in thread
From: Kumar Gala @ 2007-01-12 21:18 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, paulus


On Jan 12, 2007, at 2:52 PM, Benjamin Herrenschmidt wrote:

> On Fri, 2007-01-12 at 12:39 -0600, Kumar Gala wrote:
>> On Jan 12, 2007, at 5:05 AM, Benjamin Herrenschmidt wrote:
>>
>>>
>>>>> Why do we need a separate ret_from_except_spe_full?
>>>>
>>>> I'm not sure if the kernel will return from exception in a multi-
>>>> thread
>>>> way, but the truth is if restoring EVRs in exsiting
>>>> ret_from_except_full,
>>>> some non-SPE exception will try to retore EVR. At that time, the
>>>> MSR[SPE] may not be enabled, which will cause error.
>>>
>>> Hrm... you can restore them before returning from the exception  
>>> if you
>>> are careful about doing that with preempt/irqs off I suppose.
>>
>> Which is what I thought having the exception be EXC_XFER_EE and
>> flush_spe_to_thread should get you.
>>
>> I dont see any reason to dirty up the 'exception' path for stuff we
>> can do in C code.
>
> Well, doing that means that you will flush the SPE to the thread  
> struct
> and disable it, return to userland with SPE disabled, and right away
> take a new exception as soon as the next SPE instruction is reached.
> Maybe not optimal...

Well we could reload the registers on exit if wanted to.  Also, if  
want to be more efficient, we should only flush the registers we need.

I think its a fair assumption that nothing is going to interrupt the  
handling of the exception, so its safe to assume the process causing  
the interrupt will be the same one we return to.

- k

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 21:18           ` Kumar Gala
@ 2007-01-12 21:27             ` Benjamin Herrenschmidt
  2007-01-12 21:49               ` Kumar Gala
  0 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2007-01-12 21:27 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, paulus

> I think its a fair assumption that nothing is going to interrupt the  
> handling of the exception, so its safe to assume the process causing  
> the interrupt will be the same one we return to.

What about the get_user to get to the faulting instruction ? I suppose
if those processors are UP only and we use an exception with EE
disabled, there should be no way the page has been evicted since the
access, so that should work, but will that ever be true ?

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 21:27             ` Benjamin Herrenschmidt
@ 2007-01-12 21:49               ` Kumar Gala
  2007-01-12 22:02                 ` Benjamin Herrenschmidt
  2007-01-16  9:43                 ` Zhu Ebony-r57400
  0 siblings, 2 replies; 20+ messages in thread
From: Kumar Gala @ 2007-01-12 21:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, paulus


On Jan 12, 2007, at 3:27 PM, Benjamin Herrenschmidt wrote:

>> I think its a fair assumption that nothing is going to interrupt the
>> handling of the exception, so its safe to assume the process causing
>> the interrupt will be the same one we return to.
>
> What about the get_user to get to the faulting instruction ? I suppose
> if those processors are UP only and we use an exception with EE
> disabled, there should be no way the page has been evicted since the
> access, so that should work, but will that ever be true ?

For UP we are ok, since nothing else can cause an invalidate.

How do we handle getting a fault when we are emulating any other  
instruction?

- k

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 21:49               ` Kumar Gala
@ 2007-01-12 22:02                 ` Benjamin Herrenschmidt
  2007-01-16  9:43                 ` Zhu Ebony-r57400
  1 sibling, 0 replies; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2007-01-12 22:02 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, paulus

On Fri, 2007-01-12 at 15:49 -0600, Kumar Gala wrote:
> On Jan 12, 2007, at 3:27 PM, Benjamin Herrenschmidt wrote:
> 
> >> I think its a fair assumption that nothing is going to interrupt the
> >> handling of the exception, so its safe to assume the process causing
> >> the interrupt will be the same one we return to.
> >
> > What about the get_user to get to the faulting instruction ? I suppose
> > if those processors are UP only and we use an exception with EE
> > disabled, there should be no way the page has been evicted since the
> > access, so that should work, but will that ever be true ?
> 
> For UP we are ok, since nothing else can cause an invalidate.
> 
> How do we handle getting a fault when we are emulating any other  
> instruction?

Well, that's the reason why we had this discussion recently about moving
the local_irq_enable to before the emulation code :-)

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 18:53     ` [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions Kumar Gala
@ 2007-01-15  7:48       ` Zhu Ebony-r57400
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu Ebony-r57400 @ 2007-01-15  7:48 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, paulus


> >>
> >> Why do we need a separate ret_from_except_spe_full?
> >
> > I'm not sure if the kernel will return from exception in a multi-=20
> > thread way, but the truth is if restoring EVRs in exsiting=20
> > ret_from_except_full, some non-SPE exception will try to=20
> retore EVR.=20
> > At that time, the MSR[SPE] may not be enabled, which will=20
> cause error.
>=20
> Well, with EE disabled nothing is going to interrupt the=20
> exception thread, so you can do all this in C code.

Do you mean disabling MSR[EE] in handler C code before calling
ret_from_except_full?


> >>> diff --git a/arch/powerpc/kernel/traps.c
> >> b/arch/powerpc/kernel/traps.c
> >>> index 535f506..68407d4 100644
> >>> --- a/arch/powerpc/kernel/traps.c
> >>> +++ b/arch/powerpc/kernel/traps.c
> >>> @@ -986,6 +986,22 @@ #endif /* CONFIG_FSL_BOOKE */  #ifdef
> >> CONFIG_SPE
> >>> void SPEFloatingPointException(struct pt_regs *regs)  {
> >>> +#ifdef CONFIG_E500
> >>> +	extern int sigfpe_handler(struct pt_regs *regs);
> >>> +	int err;
> >>> +	if (current->thread.spefscr & ~0x3f) {
> >>> +		err =3D sigfpe_handler(regs);
> >>> +		if (err =3D=3D 0) {
> >>> +			regs->nip +=3D 4;
> >>> +			return;
> >>> +		} else {
> >>> +			current->thread.spefscr =3D 0x0;
> >>
> >> Why do clear spefscr and re-execute?
> >
> > I tried to make the code more robust here. Currently, all the SPE=20
> > instructions that may cause execption are handled. But=20
> someday if the=20
> > instruction set is extended and not supported by software,=20
> this code=20
> > can make sure we handle it with powerpc default value, which won't=20
> > make kernel crash.
>=20
> Lets worry about that when it occurs, its probably better=20
> that the apps crash and we fixup the kernel when it happens.

Currently err !=3D 0 won't happen on PQ3/PQ38 cores. So does it mean
that we can simplify the code here?


>=20
> >>> +			return;
> >>> +		}
> >>> +	} else {
> >>> +		return;
> >>
> >> How else would we get here if not by having spefscr bit set?
> >
> > As above, I want to make it more robust. If no unexpected condition=20
> > would happen, we can remove this off.
>=20
> Lets remove it since it shouldn't happen
>=20
OK, I will.

> >>> +	current->thread.spefscr &=3D 0x3f;
> >>
> >> can't clear spefscr this way.
> >>
> >> Need to do what the existing handler was doing here.
> >
> > Do you mean I need to do it like this:
> >
> > 	unsigned long spefscr;
> > 	spefscr =3D current->thread.spefscr;
> > 	spefscr =3D 0x3f;
> > 	current->spefscr =3D spefscr ;
> >
> > I'm not really understand...
>=20
> What I meant is the existing handler uses the=20
> thread.fpexc_mode to keep track of the "exception" flags that=20
> an application may want to enable as part of the STND-C fpenv=20
> support (find a copy of the ANSI spec and read up on=20
> Floating-point Environment).  The std c-lib provides=20
> functions like fesetexceptflag(), fetestexcept(), fesetround (), etc..
>=20
> Some of this will end up calling into set_fpexc_mode() in the=20
> kernel via the prctl system call.
>=20
> The idea was we use thread.fpexc_mode to keep track of what=20
> the C runtime env wants the flags to be, than in the=20
> exception handler we can use those flags to decide if we=20
> should raise an software SIGFPE exception on the process.
>=20
> So when you emulate the instructions you need to handle=20
> determining if a software SIGFPE should be sent or not.
>=20
> I hope that makes sense.
>=20
> - k
>=20

I see. By this way we can synchronize kernel and runtime environment
for floating point flags.  Is it a correct way that we use the existing
handler
to track exception flags and set current->thread.spefscr, then entering
sigfpe_handler()?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 12:36     ` Segher Boessenkool
@ 2007-01-15  7:58       ` Zhu Ebony-r57400
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu Ebony-r57400 @ 2007-01-15  7:58 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linuxppc-dev, paulus

=20

> -----Original Message-----
> From: Segher Boessenkool [mailto:segher@kernel.crashing.org]=20
> Sent: 2007=C4=EA1=D4=C212=C8=D5 20:37
> To: Zhu Ebony-r57400
> Cc: Christoph Hellwig; paulus@samba.org; linuxppc-dev@ozlabs.org
> Subject: Re: [patch][5/5] powerpc: Add the general support=20
> for Embedded Floating-Point instructions
>=20
> > CONFIG_SPE doesn't force the whole math emulation to be built. If=20
> > CONFIG_SPE is set, only the new SPFP/DPFP/VSPFP instructions in=20
> > arch/powerpc/math-emu will be built, while the previous FPU=20
> > instructions won't.
>=20
> And the other way around I suppose.
>=20
> > Therefore, what I'm trying to do is to build directory=20
> > arch/powerpc/math-emu if CONFIG_MATH_EMULATION or=20
> CONFIG_SPE is set.=20
> > Do you have any idea?
>=20
> Maybe you should really have a separate CONFIG_SPE_MATH_EMU?
> Or that might complicate things more than it solves, dunno.
>=20
>=20
> Segher
>=20
>=20

If there is an statement that can express "CONFIG_MATH_EMULATION or
CONFIG_SPE", things would become simpler.

Ebony

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for EmbeddedFloating-Point instructions
  2007-01-12 11:05     ` Benjamin Herrenschmidt
  2007-01-12 18:39       ` Kumar Gala
@ 2007-01-15  8:06       ` Zhu Ebony-r57400
  1 sibling, 0 replies; 20+ messages in thread
From: Zhu Ebony-r57400 @ 2007-01-15  8:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, paulus

=20

> -----Original Message-----
> From: Benjamin Herrenschmidt [mailto:benh@kernel.crashing.org]=20
> Sent: 2007=C4=EA1=D4=C212=C8=D5 19:06
> To: Zhu Ebony-r57400
> Cc: Kumar Gala; linuxppc-dev@ozlabs.org; paulus@samba.org
> Subject: RE: [patch][5/5] powerpc: Add the general support=20
> for EmbeddedFloating-Point instructions
>=20
>=20
> > > Why do we need a separate ret_from_except_spe_full?
> >=20
> > I'm not sure if the kernel will return from exception in a=20
> > multi-thread way, but the truth is if restoring EVRs in exsiting=20
> > ret_from_except_full, some non-SPE exception will try to=20
> retore EVR.=20
> > At that time, the MSR[SPE] may not be enabled, which will=20
> cause error.
>=20
> Hrm... you can restore them before returning from the=20
> exception if you are careful about doing that with=20
> preempt/irqs off I suppose.
>=20
> Ben.

If so, the existing ret_from_except_full needs to be modified
since it only restores 32bit GPRs, and I'm afraid the instructions to =
operate
32bit GPRs will break the hi-words of 64bit GPRs.

Ebony

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-12 21:49               ` Kumar Gala
  2007-01-12 22:02                 ` Benjamin Herrenschmidt
@ 2007-01-16  9:43                 ` Zhu Ebony-r57400
  2007-01-16 21:54                   ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 20+ messages in thread
From: Zhu Ebony-r57400 @ 2007-01-16  9:43 UTC (permalink / raw)
  To: Kumar Gala, Benjamin Herrenschmidt; +Cc: linuxppc-dev, paulus

=20

> -----Original Message-----
> From: Kumar Gala [mailto:galak@kernel.crashing.org]=20
> Sent: 2007=C4=EA1=D4=C213=C8=D5 05:49
> To: Benjamin Herrenschmidt
> Cc: Zhu Ebony-r57400; linuxppc-dev@ozlabs.org; paulus@samba.org
> Subject: Re: [patch][5/5] powerpc: Add the general support=20
> for Embedded Floating-Point instructions
>=20
>=20
> On Jan 12, 2007, at 3:27 PM, Benjamin Herrenschmidt wrote:
>=20
> >> I think its a fair assumption that nothing is going to=20
> interrupt the=20
> >> handling of the exception, so its safe to assume the=20
> process causing=20
> >> the interrupt will be the same one we return to.
> >
> > What about the get_user to get to the faulting instruction=20
> ? I suppose=20
> > if those processors are UP only and we use an exception with EE=20
> > disabled, there should be no way the page has been evicted=20
> since the=20
> > access, so that should work, but will that ever be true ?
>=20
> For UP we are ok, since nothing else can cause an invalidate.
>=20
> How do we handle getting a fault when we are emulating any=20
> other instruction?
>=20
> - k
>=20

Do you think using SRR0 is safer since it contains the exact effective =
address
of the instruction causing the interrupt?

B.R.
Ebony

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions
  2007-01-16  9:43                 ` Zhu Ebony-r57400
@ 2007-01-16 21:54                   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2007-01-16 21:54 UTC (permalink / raw)
  To: Zhu Ebony-r57400; +Cc: linuxppc-dev, paulus


> Do you think using SRR0 is safer since it contains the exact effective address
> of the instruction causing the interrupt?

The problem is reading the instruction itself

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2007-01-16 21:57 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-12  5:31 [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions ebony.zhu
2007-01-12  6:40 ` Kumar Gala
2007-01-12  7:45   ` Zhu Ebony-r57400
2007-01-12 11:05     ` Benjamin Herrenschmidt
2007-01-12 18:39       ` Kumar Gala
2007-01-12 20:52         ` Benjamin Herrenschmidt
2007-01-12 21:18           ` Kumar Gala
2007-01-12 21:27             ` Benjamin Herrenschmidt
2007-01-12 21:49               ` Kumar Gala
2007-01-12 22:02                 ` Benjamin Herrenschmidt
2007-01-16  9:43                 ` Zhu Ebony-r57400
2007-01-16 21:54                   ` Benjamin Herrenschmidt
2007-01-15  8:06       ` [patch][5/5] powerpc: Add the general support for EmbeddedFloating-Point instructions Zhu Ebony-r57400
2007-01-12 18:53     ` [patch][5/5] powerpc: Add the general support for Embedded Floating-Point instructions Kumar Gala
2007-01-15  7:48       ` Zhu Ebony-r57400
2007-01-12  9:52 ` Christoph Hellwig
2007-01-12 10:23   ` Zhu Ebony-r57400
2007-01-12 12:36     ` Segher Boessenkool
2007-01-15  7:58       ` Zhu Ebony-r57400
2007-01-12 18:57   ` Kumar Gala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).