* [PATCH] Improve o32 syscall handling
@ 2004-11-21 16:45 Thiemo Seufer
2004-11-22 6:18 ` Ralf Baechle
2004-11-23 12:47 ` Gleb O. Raiko
0 siblings, 2 replies; 7+ messages in thread
From: Thiemo Seufer @ 2004-11-21 16:45 UTC (permalink / raw)
To: linux-mips; +Cc: ralf
Hello All,
this is a major cleanup for the o32 syscall handling.
For the 32bit kernel, it
- uses a more efficient syscall table layout, and reduces its size
- handles stack arguments also more efficiently, and allows for up
to 8 arguments. This gives an indirect fadvise64_64 syscall a
chance to work.
- Fixes several flaws in the indirect syscall path, like duplicated
user stack handling, and incomplete argument handling.
For the 64bit Kernel, it
- checks for unaligned user stack
- also allows now up to 8 arguments
- removes unused stackhandling cruft from the indirect syscall path
and does complete argument handling there.
Thiemo
Index: arch/mips/kernel/scall32-o32.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/kernel/scall32-o32.S,v
retrieving revision 1.15
diff -u -p -r1.15 scall32-o32.S
--- arch/mips/kernel/scall32-o32.S 15 Nov 2004 11:49:19 -0000 1.15
+++ arch/mips/kernel/scall32-o32.S 20 Nov 2004 16:46:39 -0000
@@ -5,6 +5,7 @@
*
* Copyright (C) 1995, 96, 97, 98, 99, 2000, 01, 02 by Ralf Baechle
* Copyright (C) 2001 MIPS Technologies, Inc.
+ * Copyright (C) 2004 Thiemo Seufer
*/
#include <linux/config.h>
#include <linux/errno.h>
@@ -32,26 +33,30 @@ NESTED(handle_sys, PT_SIZE, sp)
lw t1, PT_EPC(sp) # skip syscall on return
+#if defined(CONFIG_BINFMT_IRIX)
sltiu t0, v0, MAX_SYSCALL_NO + 1 # check syscall number
+#else
+ subu v0, v0, __NR_O32_Linux # check syscall number
+ sltiu t0, v0, __NR_O32_Linux_syscalls + 1
+#endif
addiu t1, 4 # skip to next instruction
sw t1, PT_EPC(sp)
beqz t0, illegal_syscall
- /* XXX Put both in one cacheline, should save a bit. */
- sll t0, v0, 2
- lw t2, sys_call_table(t0) # syscall routine
- lbu t3, sys_narg_table(v0) # number of arguments
- beqz t2, illegal_syscall;
+ sll t0, v0, 3
+ la t1, sys_call_table
+ addu t1, t0
+ lw t2, (t1) # syscall routine
+ lw t3, 4(t1) # >= 0 if we need stack arguments
+ beqz t2, illegal_syscall
- subu t0, t3, 5 # 5 or more arguments?
sw a3, PT_R26(sp) # save a3 for syscall restarting
- bgez t0, stackargs
+ bgez t3, stackargs
stack_done:
- sw a3, PT_R26(sp) # save for syscall restart
- LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
+ lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
- and t0, t1, t0
+ and t0, t1
bnez t0, syscall_trace_entry # -> yes
jalr t2 # Do The Real Thing (TM)
@@ -70,9 +75,9 @@ o32_syscall_exit:
local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
- LONG_L a2, TI_FLAGS($28) # current->work
+ lw a2, TI_FLAGS($28) # current->work
li t0, _TIF_ALLWORK_MASK
- and t0, a2, t0
+ and t0, a2
bnez t0, o32_syscall_exit_work
j restore_partial
@@ -117,49 +122,50 @@ syscall_trace_entry:
*/
stackargs:
lw t0, PT_R29(sp) # get old user stack pointer
- subu t3, 4
- sll t1, t3, 2 # stack valid?
-
- addu t1, t0 # end address
- or t0, t1
- bltz t0, bad_stack # -> sp is bad
-
- lw t0, PT_R29(sp) # get old user stack pointer
- PTR_LA t1, 4f # copy 1 to 3 arguments
- sll t3, t3, 4
- subu t1, t3
- jr t1
- /* Ok, copy the args from the luser stack to the kernel stack */
/*
- * I know Ralf doesn't like nops but this avoids code
- * duplication for R3000 targets (and this is the
- * only place where ".set reorder" doesn't help).
- * Harald.
+ * We intentionally keep the kernel stack a little below the top of
+ * userspace so we don't have to do a slower byte accurate check here.
*/
+ andi t1, t0, 7
+ lw t5, TI_ADDR_LIMIT($28)
+ bnez t1, bad_stack
+ addu t4, t0, 32
+ and t5, t4
+ bltz t5, bad_stack # -> sp is bad
+
+ /* Ok, copy the args from the luser stack to the kernel stack.
+ * t3 is the precomputed number of instruction bytes needed to
+ * load or store arguments 6-8.
+ */
+
+ la t1, 5f # load up to 3 arguments
+ subu t1, t3
+1: lw t5, 16(t0) # argument #5 from usp
.set push
.set noreorder
.set nomacro
-1: lw t1, 24(t0) # argument #7 from usp
- nop
- sw t1, 24(sp)
- nop
-2: lw t1, 20(t0) # argument #5 from usp
- nop
- sw t1, 20(sp)
- nop
-3: lw t1, 16(t0) # argument #5 from usp
- nop
- sw t1, 16(sp)
- nop
-4: .set pop
+ jr t1
+ addiu t1, 6f - 5f
- j stack_done # go back
+2: lw t8, 28(t0) # argument #8 from usp
+3: lw t7, 24(t0) # argument #7 from usp
+4: lw t6, 20(t0) # argument #6 from usp
+5: jr t1
+ sw t5, 16(sp) # argument #5 to ksp
+
+ sw t8, 28(sp) # argument #8 to ksp
+ sw t7, 24(sp) # argument #7 to ksp
+ sw t6, 20(sp) # argument #6 to ksp
+6: j stack_done # go back
+ nop
+ .set pop
.section __ex_table,"a"
PTR 1b,bad_stack
PTR 2b,bad_stack
PTR 3b,bad_stack
+ PTR 4b,bad_stack
.previous
/*
@@ -239,12 +245,12 @@ illegal_syscall:
sw v0, PT_R2(sp) # result
/* Success, so skip usual error handling garbage. */
- LONG_L a2, TI_FLAGS($28) # syscall tracing enabled?
+ lw a2, TI_FLAGS($28) # syscall tracing enabled?
li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
and t0, a2, t0
bnez t0, 1f
- b o32_syscall_exit
+ j o32_syscall_exit
1: SAVE_STATIC
move a0, sp
@@ -270,67 +276,47 @@ bad_alignment:
END(sys_sysmips)
LEAF(sys_syscall)
- lw t0, PT_R29(sp) # user sp
-
- sltu v0, a0, __NR_O32_Linux + __NR_O32_Linux_syscalls + 1
+#if defined(CONFIG_BINFMT_IRIX)
+ sltiu v0, a0, MAX_SYSCALL_NO + 1 # check syscall number
+#else
+ subu v0, a0, __NR_O32_Linux # check syscall number
+ sltiu v0, v0, __NR_O32_Linux_syscalls + 1
+#endif
beqz v0, enosys
- sll v0, a0, 2
- la v1, sys_syscall
- lw t2, sys_call_table(v0) # function pointer
- lbu t4, sys_narg_table(a0) # number of arguments
-
- li v0, -EINVAL
- beq t2, v1, out # do not recurse
+ sll t0, v0, 3
+ lw t2, sys_call_table(t0) # syscall routine
+ li v1, 4000 # nr of sys_syscall
beqz t2, enosys # null function pointer?
- andi v0, t0, 0x3 # unaligned stack pointer?
- bnez v0, sigsegv
+ li v0, -EINVAL
+ beq a0, v1, out # do not recurse
- addu v0, t0, 16 # v0 = usp + 16
- addu t1, v0, 12 # 3 32-bit arguments
- lw v1, TI_ADDR_LIMIT($28)
- or v0, v0, t1
- and v1, v1, v0
- bltz v1, efault
+ /* Some syscalls like execve get their arguments from struct pt_regs
+ and claim zero arguments in the syscall table. Thus we have to
+ assume the worst case and shuffle around all potential arguments.
+ If you want performance, don't use indirect syscalls. */
move a0, a1 # shift argument registers
move a1, a2
move a2, a3
-
-1: lw a3, 16(t0)
-2: lw t3, 20(t0)
-3: lw t4, 24(t0)
-
- .section __ex_table, "a"
- .word 1b, efault
- .word 2b, efault
- .word 3b, efault
- .previous
-
- sw t3, 16(sp) # put into new stackframe
- sw t4, 20(sp)
-
- bnez t4, 1f # zero arguments?
- addu a0, sp, 32 # then pass sp in a0
-1:
-
- sw t3, 16(sp)
- sw v1, 20(sp)
+ lw a3, 16(sp)
+ lw t4, 20(sp)
+ lw t5, 24(sp)
+ lw t6, 28(sp)
+ sw t4, 16(sp)
+ sw t5, 20(sp)
+ sw t6, 24(sp)
+ sw a0, PT_R4(sp) # .. and push back a0 - a3, some
+ sw a1, PT_R5(sp) # syscalls expect them there
+ sw a2, PT_R6(sp)
+ sw a3, PT_R7(sp)
+ sw a3, PT_R26(sp) # update a3 for syscall restarting
jr t2
/* Unreached */
enosys: li v0, -ENOSYS
- b out
-
-sigsegv:
- li a0, _SIGSEGV
- move a1, $28
- jal force_sig
- /* Fall through */
-
-efault: li v0, -EFAULT
out: jr ra
END(sys_syscall)
@@ -350,12 +336,14 @@ out: jr ra
.endm
.macro syscalltable
+#if defined(CONFIG_BINFMT_IRIX)
mille sys_ni_syscall 0 /* 0 - 999 SVR4 flavour */
- #include "irix5sys.h" /* 1000 - 1999 32-bit IRIX */
+# include "irix5sys.h" /* 1000 - 1999 32-bit IRIX */
mille sys_ni_syscall 0 /* 2000 - 2999 BSD43 flavour */
mille sys_ni_syscall 0 /* 3000 - 3999 POSIX flavour */
+#endif
- sys sys_syscall 0 /* 4000 */
+ sys sys_syscall 8 /* 4000 */
sys sys_exit 1
sys sys_fork 0
sys sys_read 3
@@ -641,19 +629,16 @@ out: jr ra
.endm
+ /* We pre-compute the number of _instruction_ bytes needed to
+ load or store the arguments 6-8. Negative values are ignored. */
+
.macro sys function, nargs
PTR \function
+ LONG (\nargs << 2) - (5 << 2)
.endm
.align 3
+ .type sys_call_table,@object
sys_call_table:
syscalltable
.size sys_call_table, . - sys_call_table
-
- .macro sys function, nargs
- .byte \nargs
- .endm
-
-sys_narg_table:
- syscalltable
- .size sys_narg_table, . - sys_narg_table
Index: arch/mips/kernel/scall64-o32.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/kernel/scall64-o32.S,v
retrieving revision 1.22
diff -u -p -r1.22 scall64-o32.S
--- arch/mips/kernel/scall64-o32.S 15 Nov 2004 11:49:19 -0000 1.22
+++ arch/mips/kernel/scall64-o32.S 20 Nov 2004 16:46:39 -0000
@@ -6,6 +6,7 @@
* Copyright (C) 1995 - 2000, 2001 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2001 MIPS Technologies, Inc.
+ * Copyright (C) 2004 Thiemo Seufer
*
* Hairy, the userspace application uses a different argument passing
* convention than the kernel, so we have to translate things from o32
@@ -43,6 +44,8 @@ NESTED(handle_sys, PT_SIZE, sp)
RESTORE_ALL
#endif
+ /* We don't want to stumble over broken sign extensions from
+ userland. O32 does never use the upper half. */
sll a0, a0, 0
sll a1, a1, 0
sll a2, a2, 0
@@ -62,17 +65,21 @@ NESTED(handle_sys, PT_SIZE, sp)
* userspace so we don't have to do a slower byte accurate check here.
*/
ld t0, PT_R29(sp) # get old user stack pointer
+ andi t3, t0, 7
+ bnez t3, bad_stack
daddu t1, t0, 32
bltz t1, bad_stack
1: lw a4, 16(t0) # argument #5 from usp
2: lw a5, 20(t0) # argument #6 from usp
3: lw a6, 24(t0) # argument #7 from usp
+4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls)
.section __ex_table,"a"
PTR 1b, bad_stack
PTR 2b, bad_stack
PTR 3b, bad_stack
+ PTR 4b, bad_stack
.previous
li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
@@ -91,7 +98,7 @@ NESTED(handle_sys, PT_SIZE, sp)
sd v0, PT_R0(sp) # flag for syscall restarting
1: sd v0, PT_R2(sp) # result
-FEXPORT(o32_syscall_exit)
+o32_syscall_exit:
local_irq_disable # make need_resched and
# signals dont change between
# sampling and return
@@ -109,12 +116,11 @@ o32_syscall_exit_work:
trace_a_syscall:
SAVE_STATIC
- sd a4, PT_R8(sp)
+ sd t2, PT_R1(sp)
+ sd a4, PT_R8(sp) # Save argument registers
sd a5, PT_R9(sp)
sd a6, PT_R10(sp)
- sd a7, PT_R11(sp)
-
- sd t2,PT_R1(sp)
+ sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
li a1, 0
jal do_syscall_trace
@@ -126,7 +132,8 @@ trace_a_syscall:
ld a3, PT_R7(sp)
ld a4, PT_R8(sp)
ld a5, PT_R9(sp)
- ld a6, PT_R10(sp) # For indirect syscalls
+ ld a6, PT_R10(sp)
+ ld a7, PT_R11(sp) # For indirect syscalls
jalr t2
li t0, -EMAXERRNO - 1 # error?
@@ -174,55 +181,40 @@ illegal_syscall:
END(handle_sys)
LEAF(sys32_syscall)
- ld t0, PT_R29(sp) # user sp
-
sltu v0, a0, __NR_O32_Linux + __NR_O32_Linux_syscalls + 1
beqz v0, enosys
dsll v0, a0, 3
- dla v1, sys32_syscall
ld t2, (sys_call_table - (__NR_O32_Linux * 8))(v0)
+ li v1, 4000 # indirect syscall number
li v0, -EINVAL
- beq t2, v1, out # do not recurse
+ beq a0, v1, out # do not recurse
beqz t2, enosys # null function pointer?
- andi v0, t0, 0x3 # unaligned stack pointer?
- bnez v0, sigsegv
-
- daddiu v0, t0, 16 # v0 = usp + 16
- daddu t1, v0, 12 # 3 32-bit arguments
- ld v1, TI_ADDR_LIMIT($28)
- or v0, v0, t1
- and v1, v1, v0
- bnez v1, efault
-
move a0, a1 # shift argument registers
move a1, a2
move a2, a3
move a3, a4
move a4, a5
move a5, a6
+ move a6, a7
+ sd a0, PT_R4(sp) # ... and push back a0 - a3, some
+ sd a1, PT_R5(sp) # syscalls expect them there
+ sd a2, PT_R6(sp)
+ sd a3, PT_R7(sp)
+ sd a3, PT_R26(sp) # update a3 for syscall restarting
jr t2
/* Unreached */
enosys: li v0, -ENOSYS
- b out
-
-sigsegv:
- li a0, _SIGSEGV
- move a1, $28
- jal force_sig
- /* Fall through */
-
-efault: li v0, -EFAULT
out: jr ra
END(sys32_syscall)
.align 3
- .type sys_call_table,@object;
+ .type sys_call_table,@object
sys_call_table:
PTR sys32_syscall /* 4000 */
PTR sys_exit
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Improve o32 syscall handling
2004-11-21 16:45 [PATCH] Improve o32 syscall handling Thiemo Seufer
@ 2004-11-22 6:18 ` Ralf Baechle
2004-11-22 7:00 ` Thiemo Seufer
2004-11-23 12:47 ` Gleb O. Raiko
1 sibling, 1 reply; 7+ messages in thread
From: Ralf Baechle @ 2004-11-22 6:18 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: linux-mips
On Sun, Nov 21, 2004 at 05:45:57PM +0100, Thiemo Seufer wrote:
> For the 64bit Kernel, it
> - checks for unaligned user stack
Why bother, the unaligned exception handler should take care of this.
> - also allows now up to 8 arguments
Quite frankly I'd prefer to see this being handle in userspace. For o32
it's too late to go for that but for N32 / N64 we still may have a chance.
> - LONG_L a2, TI_FLAGS($28) # current->work
> + lw a2, TI_FLAGS($28) # current->work
Flags is a long variable.
Ralf
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Improve o32 syscall handling
2004-11-22 6:18 ` Ralf Baechle
@ 2004-11-22 7:00 ` Thiemo Seufer
2004-11-22 7:13 ` Ralf Baechle
0 siblings, 1 reply; 7+ messages in thread
From: Thiemo Seufer @ 2004-11-22 7:00 UTC (permalink / raw)
To: Ralf Baechle; +Cc: linux-mips
Ralf Baechle wrote:
> On Sun, Nov 21, 2004 at 05:45:57PM +0100, Thiemo Seufer wrote:
>
> > For the 64bit Kernel, it
> > - checks for unaligned user stack
>
> Why bother, the unaligned exception handler should take care of this.
It really does so for unaligned accesses from kernel space?
> > - also allows now up to 8 arguments
>
> Quite frankly I'd prefer to see this being handle in userspace. For o32
> it's too late to go for that but for N32 / N64 we still may have a chance.
My changes are for O32 only. N32/N64 doesn't need more than 6 arguments.
> > - LONG_L a2, TI_FLAGS($28) # current->work
> > + lw a2, TI_FLAGS($28) # current->work
>
> Flags is a long variable.
"long" isn't a quantity the assembler knows about. :-)
The whole assembler file for O32 support in 32bit Kernels makes only
sense when it is compiled as 32bit code. In that case, the C "long"
has 4 bytes and is loaded with lw. Using a macro which abstracts for
32/64bit compilation hides this needlessly, and can even lead to the
erraneous impression the code would be useful for 64bit, too.
Thiemo
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Improve o32 syscall handling
2004-11-22 7:00 ` Thiemo Seufer
@ 2004-11-22 7:13 ` Ralf Baechle
2004-11-22 9:37 ` Thiemo Seufer
0 siblings, 1 reply; 7+ messages in thread
From: Ralf Baechle @ 2004-11-22 7:13 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: linux-mips
On Mon, Nov 22, 2004 at 08:00:04AM +0100, Thiemo Seufer wrote:
> > Why bother, the unaligned exception handler should take care of this.
>
> It really does so for unaligned accesses from kernel space?
Yes. In fact it's crucially important for this very case. TCP for example
may result in missalignment. And not everybody is using get_unaligned /
put_unaligned as they were intended. Relying on the unaligned handler
is preferable where we expect pointers to be properly aligned almost
always.
The MIPS ABI mandates at least 8 byte stack alignment and funny things
happen if that assumption is violated. So there is no motivation at all
to care about the performance of missalignment. Aside of me defining this
to be verboten by punishment of signal 9 ;-)
> has 4 bytes and is loaded with lw. Using a macro which abstracts for
> 32/64bit compilation hides this needlessly, and can even lead to the
> erraneous impression the code would be useful for 64bit, too.
I'm more following the religion of using such abstractions everywhere
because code tends to be copied around mindlessly ...
Ralf
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Improve o32 syscall handling
2004-11-22 7:13 ` Ralf Baechle
@ 2004-11-22 9:37 ` Thiemo Seufer
2004-11-27 0:29 ` Thiemo Seufer
0 siblings, 1 reply; 7+ messages in thread
From: Thiemo Seufer @ 2004-11-22 9:37 UTC (permalink / raw)
To: Ralf Baechle; +Cc: linux-mips
Ralf Baechle wrote:
> On Mon, Nov 22, 2004 at 08:00:04AM +0100, Thiemo Seufer wrote:
>
> > > Why bother, the unaligned exception handler should take care of this.
> >
> > It really does so for unaligned accesses from kernel space?
>
> Yes. In fact it's crucially important for this very case.
Ok, I'll update the patch accordingly when I'm back to better
connectivity than I have now.
[snip]
> > has 4 bytes and is loaded with lw. Using a macro which abstracts for
> > 32/64bit compilation hides this needlessly, and can even lead to the
> > erraneous impression the code would be useful for 64bit, too.
>
> I'm more following the religion of using such abstractions everywhere
> because code tends to be copied around mindlessly ...
I would agree if there was a roughly similiar 64bit version of the code.
But due to the differences between 32bit and 64bit kernel there will
never be one, so it's IMHO best to make them as distinct as reasonable
in this case.
Thiemo
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Improve o32 syscall handling
2004-11-22 9:37 ` Thiemo Seufer
@ 2004-11-27 0:29 ` Thiemo Seufer
0 siblings, 0 replies; 7+ messages in thread
From: Thiemo Seufer @ 2004-11-27 0:29 UTC (permalink / raw)
To: linux-mips; +Cc: Ralf Baechle
Thiemo Seufer wrote:
> Ralf Baechle wrote:
> > On Mon, Nov 22, 2004 at 08:00:04AM +0100, Thiemo Seufer wrote:
> >
> > > > Why bother, the unaligned exception handler should take care of this.
> > >
> > > It really does so for unaligned accesses from kernel space?
> >
> > Yes. In fact it's crucially important for this very case.
>
> Ok, I'll update the patch accordingly when I'm back to better
> connectivity than I have now.
>
> [snip]
> > > has 4 bytes and is loaded with lw. Using a macro which abstracts for
> > > 32/64bit compilation hides this needlessly, and can even lead to the
> > > erraneous impression the code would be useful for 64bit, too.
> >
> > I'm more following the religion of using such abstractions everywhere
> > because code tends to be copied around mindlessly ...
>
> I would agree if there was a roughly similiar 64bit version of the code.
> But due to the differences between 32bit and 64bit kernel there will
> never be one, so it's IMHO best to make them as distinct as reasonable
> in this case.
The appended patch leaves unaligned stack handling alone and is updated
to newest CVS, including the recent ptrace fix.
Thiemo
Index: arch/mips/kernel/scall32-o32.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/kernel/scall32-o32.S,v
retrieving revision 1.17
diff -u -p -r1.17 scall32-o32.S
--- arch/mips/kernel/scall32-o32.S 25 Nov 2004 13:40:10 -0000 1.17
+++ arch/mips/kernel/scall32-o32.S 26 Nov 2004 23:15:47 -0000
@@ -5,6 +5,7 @@
*
* Copyright (C) 1995, 96, 97, 98, 99, 2000, 01, 02 by Ralf Baechle
* Copyright (C) 2001 MIPS Technologies, Inc.
+ * Copyright (C) 2004 Thiemo Seufer
*/
#include <linux/config.h>
#include <linux/errno.h>
@@ -32,26 +33,30 @@ NESTED(handle_sys, PT_SIZE, sp)
lw t1, PT_EPC(sp) # skip syscall on return
+#if defined(CONFIG_BINFMT_IRIX)
sltiu t0, v0, MAX_SYSCALL_NO + 1 # check syscall number
+#else
+ subu v0, v0, __NR_O32_Linux # check syscall number
+ sltiu t0, v0, __NR_O32_Linux_syscalls + 1
+#endif
addiu t1, 4 # skip to next instruction
sw t1, PT_EPC(sp)
beqz t0, illegal_syscall
- /* XXX Put both in one cacheline, should save a bit. */
- sll t0, v0, 2
- lw t2, sys_call_table(t0) # syscall routine
- lbu t3, sys_narg_table(v0) # number of arguments
- beqz t2, illegal_syscall;
+ sll t0, v0, 3
+ la t1, sys_call_table
+ addu t1, t0
+ lw t2, (t1) # syscall routine
+ lw t3, 4(t1) # >= 0 if we need stack arguments
+ beqz t2, illegal_syscall
- subu t0, t3, 5 # 5 or more arguments?
sw a3, PT_R26(sp) # save a3 for syscall restarting
- bgez t0, stackargs
+ bgez t3, stackargs
stack_done:
- sw a3, PT_R26(sp) # save for syscall restart
- LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
+ lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
- and t0, t1, t0
+ and t0, t1
bnez t0, syscall_trace_entry # -> yes
jalr t2 # Do The Real Thing (TM)
@@ -70,9 +75,9 @@ o32_syscall_exit:
local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
- LONG_L a2, TI_FLAGS($28) # current->work
+ lw a2, TI_FLAGS($28) # current->work
li t0, _TIF_ALLWORK_MASK
- and t0, a2, t0
+ and t0, a2
bnez t0, o32_syscall_exit_work
j restore_partial
@@ -116,49 +121,48 @@ syscall_trace_entry:
*/
stackargs:
lw t0, PT_R29(sp) # get old user stack pointer
- subu t3, 4
- sll t1, t3, 2 # stack valid?
-
- addu t1, t0 # end address
- or t0, t1
- bltz t0, bad_stack # -> sp is bad
-
- lw t0, PT_R29(sp) # get old user stack pointer
- PTR_LA t1, 4f # copy 1 to 3 arguments
- sll t3, t3, 4
- subu t1, t3
- jr t1
- /* Ok, copy the args from the luser stack to the kernel stack */
/*
- * I know Ralf doesn't like nops but this avoids code
- * duplication for R3000 targets (and this is the
- * only place where ".set reorder" doesn't help).
- * Harald.
+ * We intentionally keep the kernel stack a little below the top of
+ * userspace so we don't have to do a slower byte accurate check here.
*/
+ lw t5, TI_ADDR_LIMIT($28)
+ addu t4, t0, 32
+ and t5, t4
+ bltz t5, bad_stack # -> sp is bad
+
+ /* Ok, copy the args from the luser stack to the kernel stack.
+ * t3 is the precomputed number of instruction bytes needed to
+ * load or store arguments 6-8.
+ */
+
+ la t1, 5f # load up to 3 arguments
+ subu t1, t3
+1: lw t5, 16(t0) # argument #5 from usp
.set push
.set noreorder
.set nomacro
-1: lw t1, 24(t0) # argument #7 from usp
- nop
- sw t1, 24(sp)
- nop
-2: lw t1, 20(t0) # argument #5 from usp
- nop
- sw t1, 20(sp)
- nop
-3: lw t1, 16(t0) # argument #5 from usp
- nop
- sw t1, 16(sp)
- nop
-4: .set pop
+ jr t1
+ addiu t1, 6f - 5f
- j stack_done # go back
+2: lw t8, 28(t0) # argument #8 from usp
+3: lw t7, 24(t0) # argument #7 from usp
+4: lw t6, 20(t0) # argument #6 from usp
+5: jr t1
+ sw t5, 16(sp) # argument #5 to ksp
+
+ sw t8, 28(sp) # argument #8 to ksp
+ sw t7, 24(sp) # argument #7 to ksp
+ sw t6, 20(sp) # argument #6 to ksp
+6: j stack_done # go back
+ nop
+ .set pop
.section __ex_table,"a"
PTR 1b,bad_stack
PTR 2b,bad_stack
PTR 3b,bad_stack
+ PTR 4b,bad_stack
.previous
/*
@@ -238,12 +242,12 @@ illegal_syscall:
sw v0, PT_R2(sp) # result
/* Success, so skip usual error handling garbage. */
- LONG_L a2, TI_FLAGS($28) # syscall tracing enabled?
+ lw a2, TI_FLAGS($28) # syscall tracing enabled?
li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
and t0, a2, t0
bnez t0, 1f
- b o32_syscall_exit
+ j o32_syscall_exit
1: SAVE_STATIC
move a0, sp
@@ -269,67 +273,47 @@ bad_alignment:
END(sys_sysmips)
LEAF(sys_syscall)
- lw t0, PT_R29(sp) # user sp
-
- sltu v0, a0, __NR_O32_Linux + __NR_O32_Linux_syscalls + 1
+#if defined(CONFIG_BINFMT_IRIX)
+ sltiu v0, a0, MAX_SYSCALL_NO + 1 # check syscall number
+#else
+ subu v0, a0, __NR_O32_Linux # check syscall number
+ sltiu v0, v0, __NR_O32_Linux_syscalls + 1
+#endif
beqz v0, enosys
- sll v0, a0, 2
- la v1, sys_syscall
- lw t2, sys_call_table(v0) # function pointer
- lbu t4, sys_narg_table(a0) # number of arguments
-
- li v0, -EINVAL
- beq t2, v1, out # do not recurse
+ sll t0, v0, 3
+ lw t2, sys_call_table(t0) # syscall routine
+ li v1, 4000 # nr of sys_syscall
beqz t2, enosys # null function pointer?
- andi v0, t0, 0x3 # unaligned stack pointer?
- bnez v0, sigsegv
+ li v0, -EINVAL
+ beq a0, v1, out # do not recurse
- addu v0, t0, 16 # v0 = usp + 16
- addu t1, v0, 12 # 3 32-bit arguments
- lw v1, TI_ADDR_LIMIT($28)
- or v0, v0, t1
- and v1, v1, v0
- bltz v1, efault
+ /* Some syscalls like execve get their arguments from struct pt_regs
+ and claim zero arguments in the syscall table. Thus we have to
+ assume the worst case and shuffle around all potential arguments.
+ If you want performance, don't use indirect syscalls. */
move a0, a1 # shift argument registers
move a1, a2
move a2, a3
-
-1: lw a3, 16(t0)
-2: lw t3, 20(t0)
-3: lw t4, 24(t0)
-
- .section __ex_table, "a"
- .word 1b, efault
- .word 2b, efault
- .word 3b, efault
- .previous
-
- sw t3, 16(sp) # put into new stackframe
- sw t4, 20(sp)
-
- bnez t4, 1f # zero arguments?
- addu a0, sp, 32 # then pass sp in a0
-1:
-
- sw t3, 16(sp)
- sw v1, 20(sp)
+ lw a3, 16(sp)
+ lw t4, 20(sp)
+ lw t5, 24(sp)
+ lw t6, 28(sp)
+ sw t4, 16(sp)
+ sw t5, 20(sp)
+ sw t6, 24(sp)
+ sw a0, PT_R4(sp) # .. and push back a0 - a3, some
+ sw a1, PT_R5(sp) # syscalls expect them there
+ sw a2, PT_R6(sp)
+ sw a3, PT_R7(sp)
+ sw a3, PT_R26(sp) # update a3 for syscall restarting
jr t2
/* Unreached */
enosys: li v0, -ENOSYS
- b out
-
-sigsegv:
- li a0, _SIGSEGV
- move a1, $28
- jal force_sig
- /* Fall through */
-
-efault: li v0, -EFAULT
out: jr ra
END(sys_syscall)
@@ -349,12 +333,14 @@ out: jr ra
.endm
.macro syscalltable
+#if defined(CONFIG_BINFMT_IRIX)
mille sys_ni_syscall 0 /* 0 - 999 SVR4 flavour */
- #include "irix5sys.h" /* 1000 - 1999 32-bit IRIX */
+# include "irix5sys.h" /* 1000 - 1999 32-bit IRIX */
mille sys_ni_syscall 0 /* 2000 - 2999 BSD43 flavour */
mille sys_ni_syscall 0 /* 3000 - 3999 POSIX flavour */
+#endif
- sys sys_syscall 0 /* 4000 */
+ sys sys_syscall 8 /* 4000 */
sys sys_exit 1
sys sys_fork 0
sys sys_read 3
@@ -640,19 +626,16 @@ out: jr ra
.endm
+ /* We pre-compute the number of _instruction_ bytes needed to
+ load or store the arguments 6-8. Negative values are ignored. */
+
.macro sys function, nargs
PTR \function
+ LONG (\nargs << 2) - (5 << 2)
.endm
.align 3
+ .type sys_call_table,@object
sys_call_table:
syscalltable
.size sys_call_table, . - sys_call_table
-
- .macro sys function, nargs
- .byte \nargs
- .endm
-
-sys_narg_table:
- syscalltable
- .size sys_narg_table, . - sys_narg_table
Index: arch/mips/kernel/scall64-o32.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/kernel/scall64-o32.S,v
retrieving revision 1.24
diff -u -p -r1.24 scall64-o32.S
--- arch/mips/kernel/scall64-o32.S 25 Nov 2004 13:40:10 -0000 1.24
+++ arch/mips/kernel/scall64-o32.S 26 Nov 2004 23:15:47 -0000
@@ -6,6 +6,7 @@
* Copyright (C) 1995 - 2000, 2001 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2001 MIPS Technologies, Inc.
+ * Copyright (C) 2004 Thiemo Seufer
*
* Hairy, the userspace application uses a different argument passing
* convention than the kernel, so we have to translate things from o32
@@ -43,6 +44,8 @@ NESTED(handle_sys, PT_SIZE, sp)
RESTORE_ALL
#endif
+ /* We don't want to stumble over broken sign extensions from
+ userland. O32 does never use the upper half. */
sll a0, a0, 0
sll a1, a1, 0
sll a2, a2, 0
@@ -68,11 +71,13 @@ NESTED(handle_sys, PT_SIZE, sp)
1: lw a4, 16(t0) # argument #5 from usp
2: lw a5, 20(t0) # argument #6 from usp
3: lw a6, 24(t0) # argument #7 from usp
+4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls)
.section __ex_table,"a"
PTR 1b, bad_stack
PTR 2b, bad_stack
PTR 3b, bad_stack
+ PTR 4b, bad_stack
.previous
li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
@@ -91,7 +96,7 @@ NESTED(handle_sys, PT_SIZE, sp)
sd v0, PT_R0(sp) # flag for syscall restarting
1: sd v0, PT_R2(sp) # result
-FEXPORT(o32_syscall_exit)
+o32_syscall_exit:
local_irq_disable # make need_resched and
# signals dont change between
# sampling and return
@@ -109,12 +114,12 @@ o32_syscall_exit_work:
trace_a_syscall:
SAVE_STATIC
- sd a4, PT_R8(sp)
+ sd a4, PT_R8(sp) # Save argument registers
sd a5, PT_R9(sp)
sd a6, PT_R10(sp)
- sd a7, PT_R11(sp)
+ sd a7, PT_R11(sp) # For indirect syscalls
- move s0, t2
+ move s0, t2 # Save syscall pointer
move a0, sp
li a1, 0
jal do_syscall_trace
@@ -125,7 +130,8 @@ trace_a_syscall:
ld a3, PT_R7(sp)
ld a4, PT_R8(sp)
ld a5, PT_R9(sp)
- ld a6, PT_R10(sp) # For indirect syscalls
+ ld a6, PT_R10(sp)
+ ld a7, PT_R11(sp) # For indirect syscalls
jalr s0
li t0, -EMAXERRNO - 1 # error?
@@ -173,55 +179,40 @@ illegal_syscall:
END(handle_sys)
LEAF(sys32_syscall)
- ld t0, PT_R29(sp) # user sp
-
sltu v0, a0, __NR_O32_Linux + __NR_O32_Linux_syscalls + 1
beqz v0, enosys
dsll v0, a0, 3
- dla v1, sys32_syscall
ld t2, (sys_call_table - (__NR_O32_Linux * 8))(v0)
+ li v1, 4000 # indirect syscall number
li v0, -EINVAL
- beq t2, v1, out # do not recurse
+ beq a0, v1, out # do not recurse
beqz t2, enosys # null function pointer?
- andi v0, t0, 0x3 # unaligned stack pointer?
- bnez v0, sigsegv
-
- daddiu v0, t0, 16 # v0 = usp + 16
- daddu t1, v0, 12 # 3 32-bit arguments
- ld v1, TI_ADDR_LIMIT($28)
- or v0, v0, t1
- and v1, v1, v0
- bnez v1, efault
-
move a0, a1 # shift argument registers
move a1, a2
move a2, a3
move a3, a4
move a4, a5
move a5, a6
+ move a6, a7
+ sd a0, PT_R4(sp) # ... and push back a0 - a3, some
+ sd a1, PT_R5(sp) # syscalls expect them there
+ sd a2, PT_R6(sp)
+ sd a3, PT_R7(sp)
+ sd a3, PT_R26(sp) # update a3 for syscall restarting
jr t2
/* Unreached */
enosys: li v0, -ENOSYS
- b out
-
-sigsegv:
- li a0, _SIGSEGV
- move a1, $28
- jal force_sig
- /* Fall through */
-
-efault: li v0, -EFAULT
out: jr ra
END(sys32_syscall)
.align 3
- .type sys_call_table,@object;
+ .type sys_call_table,@object
sys_call_table:
PTR sys32_syscall /* 4000 */
PTR sys_exit
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Improve o32 syscall handling
2004-11-21 16:45 [PATCH] Improve o32 syscall handling Thiemo Seufer
2004-11-22 6:18 ` Ralf Baechle
@ 2004-11-23 12:47 ` Gleb O. Raiko
1 sibling, 0 replies; 7+ messages in thread
From: Gleb O. Raiko @ 2004-11-23 12:47 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: linux-mips, ralf
Hello,
Thiemo Seufer wrote:
> this is a major cleanup for the o32 syscall handling.
While we're here, there is an ptrace exploit in the syscall handling.
The kernel parses arguments, gets the address of the syscall handling
routine in t2, and goes to the process which ptraces. On return from
this process, the kernel restores t2 from the user stack and jumps
there. I've got an example that gets root from this.
Regards,
Gleb.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2004-11-27 0:29 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-11-21 16:45 [PATCH] Improve o32 syscall handling Thiemo Seufer
2004-11-22 6:18 ` Ralf Baechle
2004-11-22 7:00 ` Thiemo Seufer
2004-11-22 7:13 ` Ralf Baechle
2004-11-22 9:37 ` Thiemo Seufer
2004-11-27 0:29 ` Thiemo Seufer
2004-11-23 12:47 ` Gleb O. Raiko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox