Hi, The IA32 fpstate information is not getting saved/restored during IA32 exception handling. The issue was first observed due to an IA32 binary (which runs fine on IA32 system), failing on Itanium based system. The binary was trying to access the fpstate information during an FPE and got a SEGV, as the fpstate was not getting saved and the sigcontext->fpstate pointer was NULL. The attached patch fixes the issue. While fixing this, I also noticed few bugs in the way sys32_ptrace is saving and restoring the IA32 fpstate. I am currently working on this and will soon be sending a patch fixing this. Please let me know, if you need any more information on this. Thanks, -Venkatesh <> --- ia64-fpe/arch/ia64/ia32/ia32_signal.c.org Thu Sep 26 10:50:24 2002 +++ ia64-fpe/arch/ia64/ia32/ia32_signal.c Fri Oct 4 11:24:02 2002 @@ -39,6 +39,16 @@ #define __IA32_NR_sigreturn 119 #define __IA32_NR_rt_sigreturn 173 +register double f16 asm ("f16"); register double f17 asm ("f17"); +register double f18 asm ("f18"); register double f19 asm ("f19"); +register double f20 asm ("f20"); register double f21 asm ("f21"); +register double f22 asm ("f22"); register double f23 asm ("f23"); + +register double f24 asm ("f24"); register double f25 asm ("f25"); +register double f26 asm ("f26"); register double f27 asm ("f27"); +register double f28 asm ("f28"); register double f29 asm ("f29"); +register double f30 asm ("f30"); register double f31 asm ("f31"); + struct sigframe_ia32 { int pretcode; @@ -143,6 +153,274 @@ return err; } + +/* + * SAVE and RESTORE of ia32 fpstate info, from ia64 current state + * Used in exception handler to pass the fpstate to the user, and restore + * the fpstate while returning from the exception handler. + * + * fpstate info and their mapping to IA64 regs: + * fpstate REG(BITS) Attribute Comments + * cw ar.fcr(0:12) with bits 7 and 6 not used + * sw ar.fsr(0:15) + * tag ar.fsr(16:31) with odd numbered bits not used + * (read returns 0, writes ignored) + * ipoff ar.fir(0:31) RO + * cssel ar.fir(32:47) RO + * dataoff ar.fdr(0:31) RO + * datasel ar.fdr(32:47) RO + * + * _st[(0+TOS)%8] f8 + * _st[(1+TOS)%8] f9 (f8, f9 from ptregs) + * : : : (f10..f15 from live reg) + * : : : + * _st[(7+TOS)%8] f15 TOS=sw.top(bits11:13) + * + * status Same as sw RO + * magic 0 as X86_FXSR_MAGIC in ia32 + * mxcsr Bits(7:15)=ar.fcr(39:47) + * Bits(0:5) =ar.fsr(32:37) with bit 6 reserved + * _xmm[0..7] f16..f31 (live registers) + * with _xmm[0] + * Bit(64:127)=f17(0:63) + * Bit(0:63)=f16(0:63) + * All other fields unused... + */ + +#define __ldfe(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + __asm__ __volatile__ ("ldfe %0=[%1] ;;" :"=f"(__f__): "r"(x)); \ +}) + +#define __ldf8(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + __asm__ __volatile__ ("ldf8 %0=[%1] ;;" :"=f"(__f__): "r"(x)); \ +}) + +#define __stfe(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + __asm__ __volatile__ ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define __stf8(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + __asm__ __volatile__ ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +static int +save_ia32_fpstate_live (struct _fpstate_ia32 *save) +{ + struct task_struct *tsk = current; + struct pt_regs *ptp; + struct _fpreg_ia32 *fpregp; + char buf[32]; + unsigned long fsr, fcr, fir, fdr; + unsigned long num128[2]; + unsigned long mxcsr=0; + int fp_tos, fr8_st_map; + + if (!access_ok(VERIFY_WRITE, save, sizeof(*save))) + return -EFAULT; + + /* Readin fsr, fcr, fir, fdr and copy onto fpstate */ + asm volatile ( "mov %0=ar.fsr;" : "=r"(fsr)); + asm volatile ( "mov %0=ar.fcr;" : "=r"(fcr)); + asm volatile ( "mov %0=ar.fir;" : "=r"(fir)); + asm volatile ( "mov %0=ar.fdr;" : "=r"(fdr)); + + __put_user(fcr & 0xffff, &save->cw); + __put_user(fsr & 0xffff, &save->sw); + __put_user((fsr>>16) & 0xffff, &save->tag); + __put_user(fir, &save->ipoff); + __put_user((fir>>32) & 0xffff, &save->cssel); + __put_user(fdr, &save->dataoff); + __put_user((fdr>>32) & 0xffff, &save->datasel); + __put_user(fsr & 0xffff, &save->status); + + mxcsr = ((fcr>>32) & 0xff80) | ((fsr>>32) & 0x3f); + __put_user(mxcsr & 0xffff, &save->mxcsr); + __put_user( 0, &save->magic); //#define X86_FXSR_MAGIC 0x0000 + + /* + * save f8 and f9 from pt_regs + * save f10..f15 from live register set + */ + /* + * Find the location where f8 has to go in fp reg stack + * This depends on TOP(11:13) field of sw. Other f reg continue + * sequentially from where f8 maps to. + */ + fp_tos = (fsr>>11)&0x7; + fr8_st_map = (8-fp_tos)&0x7; + ptp = ia64_task_regs(tsk); + fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15); + ia64f2ia32f(fpregp, &ptp->f8); + copy_to_user(&save->_st[(0+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64f2ia32f(fpregp, &ptp->f9); + copy_to_user(&save->_st[(1+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + + __stfe(fpregp, 10); + copy_to_user(&save->_st[(2+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + __stfe(fpregp, 11); + copy_to_user(&save->_st[(3+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + __stfe(fpregp, 12); + copy_to_user(&save->_st[(4+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + __stfe(fpregp, 13); + copy_to_user(&save->_st[(5+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + __stfe(fpregp, 14); + copy_to_user(&save->_st[(6+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + __stfe(fpregp, 15); + copy_to_user(&save->_st[(7+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + + __stf8(&num128[0], 16); + __stf8(&num128[1], 17); + copy_to_user(&save->_xmm[0], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 18); + __stf8(&num128[1], 19); + copy_to_user(&save->_xmm[1], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 20); + __stf8(&num128[1], 21); + copy_to_user(&save->_xmm[2], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 22); + __stf8(&num128[1], 23); + copy_to_user(&save->_xmm[3], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 24); + __stf8(&num128[1], 25); + copy_to_user(&save->_xmm[4], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 26); + __stf8(&num128[1], 27); + copy_to_user(&save->_xmm[5], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 28); + __stf8(&num128[1], 29); + copy_to_user(&save->_xmm[6], num128, sizeof(struct _xmmreg_ia32)); + + __stf8(&num128[0], 30); + __stf8(&num128[1], 31); + copy_to_user(&save->_xmm[7], num128, sizeof(struct _xmmreg_ia32)); + return 0; +} + +static int +restore_ia32_fpstate_live (struct _fpstate_ia32 *save) +{ + struct task_struct *tsk = current; + struct pt_regs *ptp; + unsigned int lo, hi; + unsigned long num128[2]; + unsigned long num64, mxcsr; + struct _fpreg_ia32 *fpregp; + char buf[32]; + unsigned long fsr, fcr; + int fp_tos, fr8_st_map; + + if (!access_ok(VERIFY_READ, save, sizeof(*save))) + return(-EFAULT); + + /* + * Updating fsr, fcr, fir, fdr. + * Just a bit more complicated than save. + * - Need to make sure that we dont write any value other than the + * specific fpstate info + * - Need to make sure that the untouched part of frs, fdr, fir, fcr + * should remain same while writing. + * So, we do a read, change specific fields and write. + */ + asm volatile ( "mov %0=ar.fsr;" : "=r"(fsr)); + asm volatile ( "mov %0=ar.fcr;" : "=r"(fcr)); + + __get_user(mxcsr, (unsigned int *)&save->mxcsr); + /* setting bits 0..5 8..12 with cw and 39..47 from mxcsr */ + __get_user(lo, (unsigned int *)&save->cw); + num64 = mxcsr & 0xff10; + num64 = (num64 << 32) | (lo & 0x1f3f); + fcr = (fcr & (~0xff1000001f3f)) | num64; + + /* setting bits 0..31 with sw and tag and 32..37 from mxcsr */ + __get_user(lo, (unsigned int *)&save->sw); + __get_user(hi, (unsigned int *)&save->tag); + num64 = mxcsr & 0x3f; + num64 = (num64 << 16) | (hi & 0xffff); + num64 = (num64 << 16) | (lo & 0xffff); + fsr = (fsr & (~0x3fffffffff)) | num64; + + asm volatile ( "mov ar.fsr=%0;" :: "r"(fsr)); + asm volatile ( "mov ar.fcr=%0;" :: "r"(fcr)); + /* + * restore f8, f9 onto pt_regs + * restore f10..f15 onto live registers + */ + /* + * Find the location where f8 has to go in fp reg stack + * This depends on TOP(11:13) field of sw. Other f reg continue + * sequentially from where f8 maps to. + */ + fp_tos = (fsr>>11)&0x7; + fr8_st_map = (8-fp_tos)&0x7; + fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15); + + ptp = ia64_task_regs(tsk); + copy_from_user(fpregp, &save->_st[(0+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia32f2ia64f(&ptp->f8, fpregp); + copy_from_user(fpregp, &save->_st[(1+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia32f2ia64f(&ptp->f9, fpregp); + + copy_from_user(fpregp, &save->_st[(2+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + __ldfe(10, fpregp); + copy_from_user(fpregp, &save->_st[(3+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + __ldfe(11, fpregp); + copy_from_user(fpregp, &save->_st[(4+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + __ldfe(12, fpregp); + copy_from_user(fpregp, &save->_st[(5+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + __ldfe(13, fpregp); + copy_from_user(fpregp, &save->_st[(6+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + __ldfe(14, fpregp); + copy_from_user(fpregp, &save->_st[(7+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + __ldfe(15, fpregp); + + copy_from_user(num128, &save->_xmm[0], sizeof(struct _xmmreg_ia32)); + __ldf8(16, &num128[0]); + __ldf8(17, &num128[1]); + + copy_from_user(num128, &save->_xmm[1], sizeof(struct _xmmreg_ia32)); + __ldf8(18, &num128[0]); + __ldf8(19, &num128[1]); + + copy_from_user(num128, &save->_xmm[2], sizeof(struct _xmmreg_ia32)); + __ldf8(20, &num128[0]); + __ldf8(21, &num128[1]); + + copy_from_user(num128, &save->_xmm[3], sizeof(struct _xmmreg_ia32)); + __ldf8(22, &num128[0]); + __ldf8(23, &num128[1]); + + copy_from_user(num128, &save->_xmm[4], sizeof(struct _xmmreg_ia32)); + __ldf8(24, &num128[0]); + __ldf8(25, &num128[1]); + + copy_from_user(num128, &save->_xmm[5], sizeof(struct _xmmreg_ia32)); + __ldf8(26, &num128[0]); + __ldf8(27, &num128[1]); + + copy_from_user(num128, &save->_xmm[6], sizeof(struct _xmmreg_ia32)); + __ldf8(28, &num128[0]); + __ldf8(29, &num128[1]); + + copy_from_user(num128, &save->_xmm[7], sizeof(struct _xmmreg_ia32)); + __ldf8(30, &num128[0]); + __ldf8(31, &num128[1]); + return 0; +} + static inline void sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int restorer) { @@ -371,6 +649,9 @@ int err = 0; unsigned long flag; + if (!access_ok(VERIFY_WRITE, sc, sizeof(*sc))) + return -EFAULT; + err |= __put_user((regs->r16 >> 32) & 0xffff, (unsigned int *)&sc->fs); err |= __put_user((regs->r16 >> 48) & 0xffff, (unsigned int *)&sc->gs); err |= __put_user((regs->r16 >> 16) & 0xffff, (unsigned int *)&sc->es); @@ -397,6 +678,11 @@ err |= __put_user(regs->r12, &sc->esp_at_signal); err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int *)&sc->ss); + if ( save_ia32_fpstate_live(fpstate) < 0 ) + err = -EFAULT; + else + err |= __put_user((u32)(u64)fpstate, &sc->fpstate); + #if 0 tmp = save_i387(fpstate); if (tmp < 0) @@ -418,6 +704,9 @@ { unsigned int err = 0; + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + return(-EFAULT); + #define COPY(ia64x, ia32x) err |= __get_user(regs->ia64x, &sc->ia32x) #define copyseg_gs(tmp) (regs->r16 |= (unsigned long) tmp << 48) @@ -477,6 +766,16 @@ regs->r1 = -1; /* disable syscall checks, r1 is orig_eax */ } + { + struct _fpstate_ia32 *buf = NULL; + u32 fpstate_ptr; + err |= get_user(fpstate_ptr, &(sc->fpstate)); + buf = (struct _fpstate_ia32 *)(u64)fpstate_ptr; + if (buf) { + err |= restore_ia32_fpstate_live(buf); + } + } + #if 0 { struct _fpstate * buf; --- ia64-fpe/arch/ia64/ia32/sys_ia32.c.org Fri Sep 27 14:53:07 2002 +++ ia64-fpe/arch/ia64/ia32/sys_ia32.c Mon Sep 30 17:51:02 2002 @@ -2843,20 +2843,6 @@ } } -static inline void -ia32f2ia64f (void *dst, void *src) -{ - asm volatile ("ldfe f6=[%1];; stf.spill [%0]=f6" :: "r"(dst), "r"(src) : "memory"); - return; -} - -static inline void -ia64f2ia32f (void *dst, void *src) -{ - asm volatile ("ldf.fill f6=[%1];; stfe [%0]=f6" :: "r"(dst), "r"(src) : "memory"); - return; -} - static void put_fpreg (int regno, struct _fpreg_ia32 *reg, struct pt_regs *ptp, struct switch_stack *swp, int tos) --- ia64-fpe/arch/ia64/kernel/signal.c.org Tue Oct 1 15:22:38 2002 +++ ia64-fpe/arch/ia64/kernel/signal.c Wed Oct 2 10:35:14 2002 @@ -41,6 +41,16 @@ extern long ia64_do_signal (sigset_t *, struct sigscratch *, long); /* forward decl */ +register double f16 asm ("f16"); register double f17 asm ("f17"); +register double f18 asm ("f18"); register double f19 asm ("f19"); +register double f20 asm ("f20"); register double f21 asm ("f21"); +register double f22 asm ("f22"); register double f23 asm ("f23"); + +register double f24 asm ("f24"); register double f25 asm ("f25"); +register double f26 asm ("f26"); register double f27 asm ("f27"); +register double f28 asm ("f28"); register double f29 asm ("f29"); +register double f30 asm ("f30"); register double f31 asm ("f31"); + long ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct sigscratch *scr) { --- ia64-fpe/include/asm-ia64/ia32.h.org Tue Oct 1 13:13:06 2002 +++ ia64-fpe/include/asm-ia64/ia32.h Wed Oct 2 13:54:04 2002 @@ -73,6 +73,17 @@ unsigned short exponent; }; +struct _fpxreg_ia32 { + unsigned short significand[4]; + unsigned short exponent; + unsigned short padding[3]; +}; + +struct _xmmreg_ia32 { + unsigned int element[4]; +}; + + struct _fpstate_ia32 { unsigned int cw, sw, @@ -82,7 +93,16 @@ dataoff, datasel; struct _fpreg_ia32 _st[8]; - unsigned int status; + unsigned short status; + unsigned short magic; /* 0xffff = regular FPU data only */ + + /* FXSR FPU environment */ + unsigned int _fxsr_env[6]; /* FXSR FPU env is ignored */ + unsigned int mxcsr; + unsigned int reserved; + struct _fpxreg_ia32 _fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct _xmmreg_ia32 _xmm[8]; + unsigned int padding[56]; }; struct sigcontext_ia32 { @@ -485,6 +505,18 @@ extern unsigned long ia32_do_mmap (struct file *, unsigned long, unsigned long, int, int, loff_t); extern void ia32_load_segment_descriptors (struct task_struct *task); +#define ia32f2ia64f(dst,src) \ + do { \ + register double f6 asm ("f6"); \ + asm volatile ("ldfe f6=[%2];; stf.spill [%1]=f6" : "=f"(f6): "r"(dst), "r"(src) : "memory"); \ + } while(0) + +#define ia64f2ia32f(dst,src) \ + do { \ + register double f6 asm ("f6"); \ + asm volatile ("ldf.fill f6=[%2];; stfe [%1]=f6" : "=f"(f6): "r"(dst), "r"(src) : "memory"); \ + } while(0) + #endif /* !CONFIG_IA32_SUPPORT */ #endif /* _ASM_IA64_IA32_H */