* [PATCH] Fix copy_user on x86_64
@ 2008-06-25 12:31 Vitaly Mayatskikh
2008-06-26 9:13 ` Anton Arapov
0 siblings, 1 reply; 5+ messages in thread
From: Vitaly Mayatskikh @ 2008-06-25 12:31 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 452 bytes --]
Bug in copy_user can be used from userspace on RHEL-4 and other
distributions with similar kernel base (CVE-2008-0598). Patch with fixed
copy_user attached, it falls into byte copy loop on faults and returns
correct count for uncopied bytes. Patch also fixes incorrect passing of
zero flag in copy_to_user (%eax was used instead of %ecx).
Also there's script for systemtap, it tests corner cases in both
copy_user realizations (unrolled and string).
[-- Attachment #2: fix copy_user --]
[-- Type: text/plain, Size: 9097 bytes --]
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index ee1c3f6..6402891 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -42,7 +42,7 @@ ENTRY(copy_to_user)
jc bad_to_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_to_user
- xorl %eax,%eax /* clear zero flag */
+ xorl %ecx,%ecx /* clear zero flag */
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
@@ -170,8 +170,8 @@ ENTRY(copy_user_generic_unrolled)
jnz .Lloop_8
.Lhandle_7:
+ andl $7,%edx
movl %edx,%ecx
- andl $7,%ecx
jz .Lende
.p2align 4
.Lloop_1:
@@ -218,41 +218,74 @@ ENTRY(copy_user_generic_unrolled)
.section __ex_table,"a"
.align 8
.quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */
- .quad .Ls2,.Ls1e
- .quad .Ls3,.Ls1e
- .quad .Ls4,.Ls1e
- .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
+ .quad .Ls2,.Ls2e
+ .quad .Ls3,.Ls3e
+ .quad .Ls4,.Ls4e
+ .quad .Ld1,.Ld1e /* Ld1-Ld4 have copied 0-24 bytes */
+ .quad .Ld2,.Ld2e
+ .quad .Ld3,.Ld3e
+ .quad .Ld4,.Ld4e
.quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */
- .quad .Ls6,.Ls5e
- .quad .Ls7,.Ls5e
- .quad .Ls8,.Ls5e
- .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
+ .quad .Ls6,.Ls6e
+ .quad .Ls7,.Ls7e
+ .quad .Ls8,.Ls8e
+ .quad .Ld5,.Ld5e /* Ld5-Ld8 have copied 32-56 bytes */
+ .quad .Ld6,.Ld6e
+ .quad .Ld7,.Ld7e
+ .quad .Ld8,.Ld8e
+ .quad .Ls9,.Ls9e
+ .quad .Ld9,.Ld9e
.quad .Ls10,.Le_byte
.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
.quad .Ls11,.Lzero_rest
.quad .Ld11,.Lzero_rest
#endif
+ .quad .Lt1,.Lt1e
.quad .Le5,.Le_zero
.previous
+ /* Exception on read in unrolled loop
+ Don't forget to store registers, which were loaded before fault.
+ Otherwise we will have up to 24 bytes of garbage and possible
+ security leak */
+.Ls8e: addl $8,%eax
+ movq %r9,6*8(%rdi)
+.Ls7e: addl $8,%eax
+ movq %r8,5*8(%rdi)
+.Ls6e: addl $8,%eax
+ movq %r11,4*8(%rdi)
+.Ls5e: addl $32,%eax
+ jmp .Ls1e
+
+.Ls4e: addl $8,%eax
+ movq %r9,2*8(%rdi)
+.Ls3e: addl $8,%eax
+ movq %r8,1*8(%rdi)
+.Ls2e: addl $8,%eax
+ movq %r11,(%rdi)
+.Ls1e: addq %rax,%rdi
+ addq %rax,%rsi
+ shlq $6,%rdx
+ addq %rbx,%rdx
+ subq %rax,%rdx
+ andl $63,%ecx
+ addq %rdx,%rcx
+.Lt1: rep /* copy last bytes */
+ movsb
+.Lt1e: movq %rcx,%rdx
+ jmp .Lzero_rest
+
+ /* Exception on write in unrolled loop */
/* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
+.Ld1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
+.Ld2e: addl $8,%eax
+.Ld3e: addl $8,%eax
+.Ld4e: addl $8,%eax
+.Ld5e: addl $8,%eax
+.Ld6e: addl $8,%eax
+.Ld7e: addl $8,%eax
+.Ld8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */
@@ -260,19 +293,27 @@ ENTRY(copy_user_generic_unrolled)
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
+ jmp .Le_zero /* fault in dst, just return */
- /* exception on quad word loop in tail handling */
+ /* Exception on read in quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
+.Ls9e: shll $3,%ecx /* fault in src of quad loop */
+ andl $7,%edx
+ addl %edx,%ecx
+ jmp .Lt1 /* try to copy last bytes */
+
+ /* Exception on write in quad word loop in tail handling */
+.Ld9e: shll $3,%ecx /* fault in dst of quad loop */
andl $7,%edx
addl %ecx,%edx
+ jmp .Le_zero /* fault in dst, just return */
+
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
cmpl $0,(%rsp)
jz .Le_zero
movq %rdx,%rcx
+ /* Exception on read or write in byte loop in tail handling */
.Le_byte:
xorl %eax,%eax
.Le5: rep
@@ -308,44 +349,39 @@ ENDPROC(copy_user_generic)
ENTRY(copy_user_generic_string)
CFI_STARTPROC
movl %ecx,%r8d /* save zero flag */
+ xorq %rax,%rax
movl %edx,%ecx
shrl $3,%ecx
- andl $7,%edx
- jz 10f
-1: rep
- movsq
+ andl $7,%edx
+ andl %r8d,%r8d /* store zero flag in eflags */
+.Lc1: rep
+ movsq
movl %edx,%ecx
-2: rep
+.Lc2: rep
movsb
-9: movl %ecx,%eax
ret
- /* multiple of 8 byte */
-10: rep
- movsq
- xor %eax,%eax
+.Lc1e: leaq (%rdx,%rcx,8),%r8
+ movl %r8d,%ecx
+.Lc3: rep /* try to use byte copy */
+ movsb
+.Lc3e: movl %ecx,%r8d
+ jz .Lc4e /* rep movs* does not affect eflags */
+.Lc4: rep
+ stosb
+.Lc4e: movl %r8d,%eax
ret
- /* exception handling */
-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
- jmp 6f
-5: movl %ecx,%eax /* exception on byte loop */
- /* eax: left over bytes */
-6: testl %r8d,%r8d /* zero flag set? */
- jz 7f
- movl %eax,%ecx /* initialize x86 loop counter */
- push %rax
- xorl %eax,%eax
-8: rep
- stosb /* zero the rest */
-11: pop %rax
-7: ret
+.Lc2e: movl %ecx,%r8d
+ jz .Lc4e
+ jmp .Lc4
CFI_ENDPROC
-END(copy_user_generic_c)
+ENDPROC(copy_user_generic_string)
.section __ex_table,"a"
- .quad 1b,3b
- .quad 2b,5b
- .quad 8b,11b
- .quad 10b,3b
+ .align 8
+ .quad .Lc1,.Lc1e
+ .quad .Lc2,.Lc2e
+ .quad .Lc3,.Lc3e
+ .quad .Lc4,.Lc4e
.previous
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 9d3d1ab..b34b6bd 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -146,41 +146,74 @@ ENTRY(__copy_user_nocache)
.section __ex_table,"a"
.align 8
.quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */
- .quad .Ls2,.Ls1e
- .quad .Ls3,.Ls1e
- .quad .Ls4,.Ls1e
- .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
+ .quad .Ls2,.Ls2e
+ .quad .Ls3,.Ls3e
+ .quad .Ls4,.Ls4e
+ .quad .Ld1,.Ld1e /* .Ld[1-4] - 0..24 bytes coped */
+ .quad .Ld2,.Ld2e
+ .quad .Ld3,.Ld3e
+ .quad .Ld4,.Ld4e
.quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */
- .quad .Ls6,.Ls5e
- .quad .Ls7,.Ls5e
- .quad .Ls8,.Ls5e
- .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
+ .quad .Ls6,.Ls6e
+ .quad .Ls7,.Ls7e
+ .quad .Ls8,.Ls8e
+ .quad .Ld5,.Ld5e /* .Ld[5-8] - 32..56 bytes copied */
+ .quad .Ld6,.Ld6e
+ .quad .Ld7,.Ld7e
+ .quad .Ld8,.Ld8e
+ .quad .Ls9,.Ls9e
+ .quad .Ld9,.Ld9e
.quad .Ls10,.Le_byte
.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
.quad .Ls11,.Lzero_rest
.quad .Ld11,.Lzero_rest
#endif
+ .quad .Lt1,.Lt1e
.quad .Le5,.Le_zero
.previous
+ /* Exception on read in unrolled loop
+ Don't forget to store registers, which were loaded before fault.
+ Otherwise we will have up to 24 bytes of garbage and possible
+ security leak */
+.Ls8e: addl $8,%eax
+ movq %r9,6*8(%rdi)
+.Ls7e: addl $8,%eax
+ movq %r8,5*8(%rdi)
+.Ls6e: addl $8,%eax
+ movq %r11,4*8(%rdi)
+.Ls5e: addl $32,%eax
+ jmp .Ls1e
+
+.Ls4e: addl $8,%eax
+ movq %r9,2*8(%rdi)
+.Ls3e: addl $8,%eax
+ movq %r8,1*8(%rdi)
+.Ls2e: addl $8,%eax
+ movq %r11,(%rdi)
+.Ls1e: addq %rax,%rdi
+ addq %rax,%rsi
+ shlq $6,%rdx
+ addq %rbx,%rdx
+ subq %rax,%rdx
+ andl $63,%ecx
+ addq %rdx,%rcx
+.Lt1: rep /* copy last bytes */
+ movsb
+.Lt1e: movq %rcx,%rdx
+ jmp .Lzero_rest
+
+ /* Exception on write in unrolled loop */
/* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
+.Ld1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
+.Ld2e: addl $8,%eax
+.Ld3e: addl $8,%eax
+.Ld4e: addl $8,%eax
+.Ld5e: addl $8,%eax
+.Ld6e: addl $8,%eax
+.Ld7e: addl $8,%eax
+.Ld8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */
@@ -188,19 +221,27 @@ ENTRY(__copy_user_nocache)
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
+ jmp .Le_zero /* fault in dst, just return */
- /* exception on quad word loop in tail handling */
+ /* Exception on read in quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
+.Ls9e: shll $3,%ecx /* fault in src of quad loop */
+ andl $7,%edx
+ addl %edx,%ecx
+ jmp .Lt1 /* try to copy last bytes */
+
+ /* Exception on write in quad word loop in tail handling */
+.Ld9e: shll $3,%ecx /* fault in dst of quad loop */
andl $7,%edx
addl %ecx,%edx
+ jmp .Le_zero /* fault in dst, just return */
+
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
cmpl $0,(%rsp) /* zero flag set? */
jz .Le_zero
movq %rdx,%rcx
+ /* Exception on read or write in byte loop in tail handling */
.Le_byte:
xorl %eax,%eax
.Le5: rep
[-- Attachment #3: test cases --]
[-- Type: text/plain, Size: 7260 bytes --]
function test_src:long(type:long, off:long, len:long, zero:long)
%{
unsigned char *ptr;
unsigned long addr;
int ret = 0, cu_ret = 0, i;
printk("testing src with '%s', off = %lld, len = %lld, zero tail = %lld\n", THIS->type ? "string" : "unrolled", THIS->off, THIS->len, THIS->zero);
// vmalloc makes a hole after new vma
ptr = vmalloc(PAGE_SIZE);
printk("ptr = %p\n", ptr);
// poison memory
memset(ptr, 0xaa, PAGE_SIZE >> 1);
// fill memory with pattern
for (i = 0; i < PAGE_SIZE >> 1; ++i)
ptr[i + (PAGE_SIZE >> 1)] = i & 0xff;
// raise GPF
addr = THIS->type ? _STRING_ : _UNROLLED_;
asm ("call %%rax;"
: "=a"(cu_ret)
: "S"(ptr + PAGE_SIZE - THIS->off), "D" (ptr), "d"(THIS->len), "c"(THIS->zero),
"a"(addr)
);
if (cu_ret)
printk("copy_user GPF (%d bytes uncopied)\n", cu_ret);
// dump
for (i = 0; i < 256; ++i) {
if (!(i % 8))
printk("\n%04x: ", i);
printk("%02x ", ptr[i]);
}
printk("\n");
// check for bug in copy_user_generic_unrolled
for (i = 0; i < (THIS->off > THIS->len ? THIS->len : THIS->off) && !(ret & 1); ++i)
if (ptr[i] != ptr[i + PAGE_SIZE - THIS->off])
ret |= 1;
if (cu_ret) {
if (THIS->len - THIS->off != cu_ret)
ret |= 4;
if (THIS->zero)
// check for bug in copy_user_generic_c/string
for (i = 0; i < cu_ret && !(ret & 2); ++i)
if (ptr[THIS->off + i])
ret |= 2;
}
vfree(ptr);
THIS->__retvalue = ret;
%}
function test_dst:long(type:long, off:long, len:long, zero:long)
%{
unsigned char *ptr;
unsigned long addr;
int ret = 0, cu_ret = 0, i;
printk("testing dst with '%s', off = %lld, len = %lld, zero tail = %lld\n", THIS->type ? "string" : "unrolled", THIS->off, THIS->len, THIS->zero);
// vmalloc makes a hole after new vma
ptr = vmalloc(PAGE_SIZE);
printk("ptr = %p\n", ptr);
// poison memory
memset(ptr + (PAGE_SIZE >> 1), 0xaa, PAGE_SIZE >> 1);
// fill memory with pattern
for (i = 0; i < PAGE_SIZE >> 1; ++i)
ptr[i] = i & 0xff;
// raise GPF
addr = THIS->type ? _STRING_ : _UNROLLED_;
asm ("call %%rax;"
: "=a"(cu_ret)
: "S"(ptr), "D" (ptr + PAGE_SIZE - THIS->off), "d"(THIS->len), "c"(THIS->zero),
"a"(addr)
);
if (cu_ret)
printk("copy_user GPF (%d bytes uncopied)\n", cu_ret);
// dump
for (i = PAGE_SIZE - 256; i < PAGE_SIZE; ++i) {
if (!(i % 8))
printk("\n%04x: ", i);
printk("%02x ", ptr[i]);
}
printk("\n");
for (i = 0; i < (THIS->off > THIS->len ? THIS->len : THIS->off) && !(ret & 1); ++i)
if (ptr[i] != ptr[PAGE_SIZE - THIS->off + i])
ret |= 1;
vfree(ptr);
THIS->__retvalue = ret;
%}
function run_test:long(type:long, off:long, len:long, zero:long)
{
m = test_src(type, off, len, zero);
printf("testing src '%s', off = %d, len = %d, zero tail = %d, buggy: %s\n", type ? "string" : "unrolled", off, len, zero, m ? "yes" : "no");
if (m) {
if (m & 1)
printf("copy_user BUG #0: valid data was not stored\n");
if (m & 2)
printf("copy_user BUG #1: tail not zeroed\n");
if (m & 4)
printf("copy_user BUG #2: uncopied bytes miscalculation\n");
}
n = test_dst(type, off, len, zero);
printf("testing dst '%s', off = %d, len = %d, zero tail = %d, buggy: %s\n", type ? "string" : "unrolled", off, len, zero, n ? "yes" : "no");
if (n) {
if (n & 1)
printf("copy_user BUG #3: copied data unequal\n");
}
return m + n;
}
%( kernel_v <= "2.6.18" %?
function get_feature:long()
%{
THIS->__retvalue = boot_cpu_has(3*32+ 4); // X86_FEATURE_K8_C in RHEL-4 or X86_FEATURE_REP_GOOD in RHEL-5
%}
%:
function get_feature:long()
%{
THIS->__retvalue = boot_cpu_has(3*32+ 16); // X86_FEATURE_REP_GOOD in upstream
%}
%)
probe begin
{
printf("begin\n");
printf("kernel uses '%s' version of copy_user\n", get_feature() ? "string" : "unrolled");
i = 0;
i += run_test(0, 56, 3, 1); // zero tail
i += run_test(1, 56, 3, 1); // zero tail
i += run_test(0, 56, 3, 0);
i += run_test(1, 56, 3, 0);
i += run_test(0, 56, 128, 1); // unrolled loop, zero tail
i += run_test(1, 56, 128, 1); // movsq, zero tail
i += run_test(0, 56, 128, 0); // unrolled loop
i += run_test(1, 56, 128, 0); // movsq
i += run_test(0, 9, 128, 1); // unrolled loop, zero tail
i += run_test(1, 9, 128, 1); // movsq, zero tail
i += run_test(0, 9, 128, 0); // unrolled loop
i += run_test(1, 9, 128, 0); // movsq
i += run_test(0, 9, 9, 1); // quad word loop + byte loop, zero tail
i += run_test(1, 9, 9, 1); // movsq + movsb, zero tail
i += run_test(0, 9, 9, 0); // quad word loop + byte loop
i += run_test(1, 9, 9, 0); // movsq + movsb
i += run_test(0, 8, 9, 1); // quad word loop + byte loop, zero tail
i += run_test(1, 8, 9, 1); // movsq + movsb, zero tail
i += run_test(0, 8, 9, 0); // quad word loop + byte loop
i += run_test(1, 8, 9, 0); // movsq + movsb
i += run_test(0, 1, 8, 1); // quad word loop, zero tail
i += run_test(1, 1, 8, 1); // movsq, zero tail
i += run_test(0, 1, 8, 0); // quad word loop
i += run_test(1, 1, 8, 0); // movsq
i += run_test(0, 0, 1, 1); // byte loop, zero tail
i += run_test(1, 0, 1, 1); // movsb, zero tail
i += run_test(0, 0, 1, 0); // byte loop
i += run_test(1, 0, 1, 0); // movsb
i += run_test(0, 1, 1, 1); // byte loop, zero tail
i += run_test(1, 1, 1, 1); // movsb, zero tail
i += run_test(0, 1, 1, 0); // byte loop
i += run_test(1, 1, 1, 0); // movsb
i += run_test(0, 1, 0, 1); // sanity check
i += run_test(1, 1, 0, 1); // sanity check
i += run_test(0, 1, 0, 0); // sanity check
i += run_test(1, 1, 0, 0); // sanity check
i += run_test(0, 0, 0, 1); // sanity check
i += run_test(1, 0, 0, 1); // sanity check
i += run_test(0, 0, 0, 0); // sanity check
i += run_test(1, 0, 0, 0); // sanity check
if (i) {
printf("copy_user is buggy\n");
} else {
printf("copy user is sane\n");
}
printf("end\n");
exit();
}
[-- Attachment #4: Makefile --]
[-- Type: text/plain, Size: 560 bytes --]
SYMVERS=/boot/System.map-`uname -r`
GENERIC=$(firstword $(shell grep copy_user_generic $(SYMVERS)))
UNROLLED=$(firstword $(shell grep copy_user_generic_unrolled $(SYMVERS)))
STRING=$(firstword $(shell grep copy_user_generic_string $(SYMVERS)))
C=$(firstword $(shell grep copy_user_generic_c $(SYMVERS)))
ifeq ($(UNROLLED),)
UNROLLED=$(GENERIC)# This is RHEL-4
endif
ifeq ($(STRING),)
STRING=$(C)# This is RHEL-4
endif
all: prep test
prep:
cat copy.gen | sed s#_UNROLLED_#0x$(UNROLLED)# | sed s#_STRING_#0x$(STRING)# > copy.stp
test:
stap -vg copy.stp
[-- Attachment #5: Type: text/plain, Size: 17 bytes --]
--
wbr, Vitaly
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix copy_user on x86_64
2008-06-25 12:31 [PATCH] Fix copy_user on x86_64 Vitaly Mayatskikh
@ 2008-06-26 9:13 ` Anton Arapov
2008-06-26 15:37 ` Linus Torvalds
0 siblings, 1 reply; 5+ messages in thread
From: Anton Arapov @ 2008-06-26 9:13 UTC (permalink / raw)
To: Vitaly Mayatskikh, Linus Torvalds, Andi Kleen; +Cc: linux-kernel, Andrew Morton
[-- Attachment #1: Type: text/plain, Size: 920 bytes --]
Just in order to bring your attention!
This is the patch patch for copy_user routine, you've discussed recently.
I'm attached updated patch, in order to pass 'checkpatch.pl' tool process smoothly.
Couple of extra whitespace has been removed and signed-off and acked-by lines
added.
Patch cleanly applies to current git tree.
-Anton
Vitaly Mayatskikh wrote:
> Bug in copy_user can be used from userspace on RHEL-4 and other
> distributions with similar kernel base (CVE-2008-0598). Patch with fixed
> copy_user attached, it falls into byte copy loop on faults and returns
> correct count for uncopied bytes. Patch also fixes incorrect passing of
> zero flag in copy_to_user (%eax was used instead of %ecx).
>
> Also there's script for systemtap, it tests corner cases in both
> copy_user realizations (unrolled and string).
>
>
>
> ------------------------------------------------------------------------
>
>
[-- Attachment #2: copy_user.patch --]
[-- Type: text/x-patch, Size: 9651 bytes --]
Bug in copy_user can be used from userspace on RHEL-4 and other
distributions with similar kernel base (CVE-2008-0598). Patch with fixed
copy_user attached, it falls into byte copy loop on faults and returns
correct count for uncopied bytes. Patch also fixes incorrect passing of
zero flag in copy_to_user (%eax was used instead of %ecx).
Also there's script for systemtap, it tests corner cases in both
copy_user realizations (unrolled and string).
Signed-off-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Acked-by: Anton Arapov <aarapov@redhat.com>
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index ee1c3f6..6402891 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -42,7 +42,7 @@ ENTRY(copy_to_user)
jc bad_to_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_to_user
- xorl %eax,%eax /* clear zero flag */
+ xorl %ecx,%ecx /* clear zero flag */
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
@@ -170,8 +170,8 @@ ENTRY(copy_user_generic_unrolled)
jnz .Lloop_8
.Lhandle_7:
+ andl $7,%edx
movl %edx,%ecx
- andl $7,%ecx
jz .Lende
.p2align 4
.Lloop_1:
@@ -218,41 +218,74 @@ ENTRY(copy_user_generic_unrolled)
.section __ex_table,"a"
.align 8
.quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */
- .quad .Ls2,.Ls1e
- .quad .Ls3,.Ls1e
- .quad .Ls4,.Ls1e
- .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
+ .quad .Ls2,.Ls2e
+ .quad .Ls3,.Ls3e
+ .quad .Ls4,.Ls4e
+ .quad .Ld1,.Ld1e /* Ld1-Ld4 have copied 0-24 bytes */
+ .quad .Ld2,.Ld2e
+ .quad .Ld3,.Ld3e
+ .quad .Ld4,.Ld4e
.quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */
- .quad .Ls6,.Ls5e
- .quad .Ls7,.Ls5e
- .quad .Ls8,.Ls5e
- .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
+ .quad .Ls6,.Ls6e
+ .quad .Ls7,.Ls7e
+ .quad .Ls8,.Ls8e
+ .quad .Ld5,.Ld5e /* Ld5-Ld8 have copied 32-56 bytes */
+ .quad .Ld6,.Ld6e
+ .quad .Ld7,.Ld7e
+ .quad .Ld8,.Ld8e
+ .quad .Ls9,.Ls9e
+ .quad .Ld9,.Ld9e
.quad .Ls10,.Le_byte
.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
.quad .Ls11,.Lzero_rest
.quad .Ld11,.Lzero_rest
#endif
+ .quad .Lt1,.Lt1e
.quad .Le5,.Le_zero
.previous
+ /* Exception on read in unrolled loop
+ Don't forget to store registers, which were loaded before fault.
+ Otherwise we will have up to 24 bytes of garbage and possible
+ security leak */
+.Ls8e: addl $8,%eax
+ movq %r9,6*8(%rdi)
+.Ls7e: addl $8,%eax
+ movq %r8,5*8(%rdi)
+.Ls6e: addl $8,%eax
+ movq %r11,4*8(%rdi)
+.Ls5e: addl $32,%eax
+ jmp .Ls1e
+
+.Ls4e: addl $8,%eax
+ movq %r9,2*8(%rdi)
+.Ls3e: addl $8,%eax
+ movq %r8,1*8(%rdi)
+.Ls2e: addl $8,%eax
+ movq %r11,(%rdi)
+.Ls1e: addq %rax,%rdi
+ addq %rax,%rsi
+ shlq $6,%rdx
+ addq %rbx,%rdx
+ subq %rax,%rdx
+ andl $63,%ecx
+ addq %rdx,%rcx
+.Lt1: rep /* copy last bytes */
+ movsb
+.Lt1e: movq %rcx,%rdx
+ jmp .Lzero_rest
+
+ /* Exception on write in unrolled loop */
/* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
+.Ld1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
+.Ld2e: addl $8,%eax
+.Ld3e: addl $8,%eax
+.Ld4e: addl $8,%eax
+.Ld5e: addl $8,%eax
+.Ld6e: addl $8,%eax
+.Ld7e: addl $8,%eax
+.Ld8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */
@@ -260,19 +293,27 @@ ENTRY(copy_user_generic_unrolled)
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
+ jmp .Le_zero /* fault in dst, just return */
- /* exception on quad word loop in tail handling */
+ /* Exception on read in quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
+.Ls9e: shll $3,%ecx /* fault in src of quad loop */
+ andl $7,%edx
+ addl %edx,%ecx
+ jmp .Lt1 /* try to copy last bytes */
+
+ /* Exception on write in quad word loop in tail handling */
+.Ld9e: shll $3,%ecx /* fault in dst of quad loop */
andl $7,%edx
addl %ecx,%edx
+ jmp .Le_zero /* fault in dst, just return */
+
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
cmpl $0,(%rsp)
jz .Le_zero
movq %rdx,%rcx
+ /* Exception on read or write in byte loop in tail handling */
.Le_byte:
xorl %eax,%eax
.Le5: rep
@@ -308,44 +349,39 @@ ENDPROC(copy_user_generic)
ENTRY(copy_user_generic_string)
CFI_STARTPROC
movl %ecx,%r8d /* save zero flag */
+ xorq %rax,%rax
movl %edx,%ecx
shrl $3,%ecx
- andl $7,%edx
- jz 10f
-1: rep
- movsq
+ andl $7,%edx
+ andl %r8d,%r8d /* store zero flag in eflags */
+.Lc1: rep
+ movsq
movl %edx,%ecx
-2: rep
+.Lc2: rep
movsb
-9: movl %ecx,%eax
ret
- /* multiple of 8 byte */
-10: rep
- movsq
- xor %eax,%eax
+.Lc1e: leaq (%rdx,%rcx,8),%r8
+ movl %r8d,%ecx
+.Lc3: rep /* try to use byte copy */
+ movsb
+.Lc3e: movl %ecx,%r8d
+ jz .Lc4e /* rep movs* does not affect eflags */
+.Lc4: rep
+ stosb
+.Lc4e: movl %r8d,%eax
ret
- /* exception handling */
-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
- jmp 6f
-5: movl %ecx,%eax /* exception on byte loop */
- /* eax: left over bytes */
-6: testl %r8d,%r8d /* zero flag set? */
- jz 7f
- movl %eax,%ecx /* initialize x86 loop counter */
- push %rax
- xorl %eax,%eax
-8: rep
- stosb /* zero the rest */
-11: pop %rax
-7: ret
+.Lc2e: movl %ecx,%r8d
+ jz .Lc4e
+ jmp .Lc4
CFI_ENDPROC
-END(copy_user_generic_c)
+ENDPROC(copy_user_generic_string)
.section __ex_table,"a"
- .quad 1b,3b
- .quad 2b,5b
- .quad 8b,11b
- .quad 10b,3b
+ .align 8
+ .quad .Lc1,.Lc1e
+ .quad .Lc2,.Lc2e
+ .quad .Lc3,.Lc3e
+ .quad .Lc4,.Lc4e
.previous
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 9d3d1ab..b34b6bd 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -146,41 +146,74 @@ ENTRY(__copy_user_nocache)
.section __ex_table,"a"
.align 8
.quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */
- .quad .Ls2,.Ls1e
- .quad .Ls3,.Ls1e
- .quad .Ls4,.Ls1e
- .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
+ .quad .Ls2,.Ls2e
+ .quad .Ls3,.Ls3e
+ .quad .Ls4,.Ls4e
+ .quad .Ld1,.Ld1e /* .Ld[1-4] - 0..24 bytes coped */
+ .quad .Ld2,.Ld2e
+ .quad .Ld3,.Ld3e
+ .quad .Ld4,.Ld4e
.quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */
- .quad .Ls6,.Ls5e
- .quad .Ls7,.Ls5e
- .quad .Ls8,.Ls5e
- .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
+ .quad .Ls6,.Ls6e
+ .quad .Ls7,.Ls7e
+ .quad .Ls8,.Ls8e
+ .quad .Ld5,.Ld5e /* .Ld[5-8] - 32..56 bytes copied */
+ .quad .Ld6,.Ld6e
+ .quad .Ld7,.Ld7e
+ .quad .Ld8,.Ld8e
+ .quad .Ls9,.Ls9e
+ .quad .Ld9,.Ld9e
.quad .Ls10,.Le_byte
.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
.quad .Ls11,.Lzero_rest
.quad .Ld11,.Lzero_rest
#endif
+ .quad .Lt1,.Lt1e
.quad .Le5,.Le_zero
.previous
+ /* Exception on read in unrolled loop
+ Don't forget to store registers, which were loaded before fault.
+ Otherwise we will have up to 24 bytes of garbage and possible
+ security leak */
+.Ls8e: addl $8,%eax
+ movq %r9,6*8(%rdi)
+.Ls7e: addl $8,%eax
+ movq %r8,5*8(%rdi)
+.Ls6e: addl $8,%eax
+ movq %r11,4*8(%rdi)
+.Ls5e: addl $32,%eax
+ jmp .Ls1e
+
+.Ls4e: addl $8,%eax
+ movq %r9,2*8(%rdi)
+.Ls3e: addl $8,%eax
+ movq %r8,1*8(%rdi)
+.Ls2e: addl $8,%eax
+ movq %r11,(%rdi)
+.Ls1e: addq %rax,%rdi
+ addq %rax,%rsi
+ shlq $6,%rdx
+ addq %rbx,%rdx
+ subq %rax,%rdx
+ andl $63,%ecx
+ addq %rdx,%rcx
+.Lt1: rep /* copy last bytes */
+ movsb
+.Lt1e: movq %rcx,%rdx
+ jmp .Lzero_rest
+
+ /* Exception on write in unrolled loop */
/* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
+.Ld1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
+.Ld2e: addl $8,%eax
+.Ld3e: addl $8,%eax
+.Ld4e: addl $8,%eax
+.Ld5e: addl $8,%eax
+.Ld6e: addl $8,%eax
+.Ld7e: addl $8,%eax
+.Ld8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */
@@ -188,19 +221,27 @@ ENTRY(__copy_user_nocache)
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
+ jmp .Le_zero /* fault in dst, just return */
- /* exception on quad word loop in tail handling */
+ /* Exception on read in quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
+.Ls9e: shll $3,%ecx /* fault in src of quad loop */
+ andl $7,%edx
+ addl %edx,%ecx
+ jmp .Lt1 /* try to copy last bytes */
+
+ /* Exception on write in quad word loop in tail handling */
+.Ld9e: shll $3,%ecx /* fault in dst of quad loop */
andl $7,%edx
addl %ecx,%edx
+ jmp .Le_zero /* fault in dst, just return */
+
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
cmpl $0,(%rsp) /* zero flag set? */
jz .Le_zero
movq %rdx,%rcx
+ /* Exception on read or write in byte loop in tail handling */
.Le_byte:
xorl %eax,%eax
.Le5: rep
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix copy_user on x86_64
2008-06-26 9:13 ` Anton Arapov
@ 2008-06-26 15:37 ` Linus Torvalds
2008-06-26 15:58 ` Vitaly Mayatskikh
0 siblings, 1 reply; 5+ messages in thread
From: Linus Torvalds @ 2008-06-26 15:37 UTC (permalink / raw)
To: Anton Arapov; +Cc: Vitaly Mayatskikh, Andi Kleen, linux-kernel, Andrew Morton
On Thu, 26 Jun 2008, Anton Arapov wrote:
>
> This is the patch patch for copy_user routine, you've discussed recently.
I don't think it works right.
Isn't this same routine also used for copy_in_user()? For that case both
source _and_ destination can fault, but your fixup routines assume that
onle one of them does (ie the fixup for a load-fault does a store for the
previously loaded valies, and assumes that it doesn't trap)
Also, I'd realy rather do this all by handling the "taul" case in C. We
already effectively have _half_ that support: the "clear end" flag ends up
calling our specialized memset() routine, but it would be much nicer if
we:
- extended the "clear end" flag to be not just "clear end", but also
which direction things are going.
- always call a (fixed) fixup-routine that is written in C (because
performance on a cycle basis no longer matters) that gets the remaining
length and the source and destination as arguments, along with the
"clear and direction flag".
- make that fixup routine do the byte-exact tests and any necessary
clearing (and return the possibly-fixed-up remaining length).
Notice how this way we still have _optimal_ performance for the case where
no fault happens, and we don't need any complex fixups in assembly code at
all - the only thing the asm routines need to do is to get the right
length (we already have this) and fix up the source/dest pointers (we
don't generally have this, although the zero-at-end fixes up the
destination pointer in order to zero it, of course).
Hmm?
Linus
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix copy_user on x86_64
2008-06-26 15:37 ` Linus Torvalds
@ 2008-06-26 15:58 ` Vitaly Mayatskikh
2008-06-26 16:30 ` Linus Torvalds
0 siblings, 1 reply; 5+ messages in thread
From: Vitaly Mayatskikh @ 2008-06-26 15:58 UTC (permalink / raw)
To: Linus Torvalds
Cc: Anton Arapov, Vitaly Mayatskikh, Andi Kleen, linux-kernel,
Andrew Morton
Linus Torvalds <torvalds@linux-foundation.org> writes:
>> This is the patch patch for copy_user routine, you've discussed recently.
>
> I don't think it works right.
>
> Isn't this same routine also used for copy_in_user()? For that case both
> source _and_ destination can fault, but your fixup routines assume that
> onle one of them does (ie the fixup for a load-fault does a store for the
> previously loaded valies, and assumes that it doesn't trap)
Right. I've missed it... :(
> Also, I'd realy rather do this all by handling the "taul" case in C. We
> already effectively have _half_ that support: the "clear end" flag ends up
> calling our specialized memset() routine, but it would be much nicer if
> we:
>
> - extended the "clear end" flag to be not just "clear end", but also
> which direction things are going.
> - always call a (fixed) fixup-routine that is written in C (because
> performance on a cycle basis no longer matters) that gets the remaining
> length and the source and destination as arguments, along with the
> "clear and direction flag".
> - make that fixup routine do the byte-exact tests and any necessary
> clearing (and return the possibly-fixed-up remaining length).
>
> Notice how this way we still have _optimal_ performance for the case where
> no fault happens, and we don't need any complex fixups in assembly code at
> all - the only thing the asm routines need to do is to get the right
> length (we already have this) and fix up the source/dest pointers (we
> don't generally have this, although the zero-at-end fixes up the
> destination pointer in order to zero it, of course).
>
> Hmm?
Seems reasonable. However, we still need specialized memset() routine,
because, again, destination can fail. Thanks for the review, Linus!
--
wbr, Vitaly
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix copy_user on x86_64
2008-06-26 15:58 ` Vitaly Mayatskikh
@ 2008-06-26 16:30 ` Linus Torvalds
0 siblings, 0 replies; 5+ messages in thread
From: Linus Torvalds @ 2008-06-26 16:30 UTC (permalink / raw)
To: Vitaly Mayatskikh; +Cc: Anton Arapov, Andi Kleen, linux-kernel, Andrew Morton
On Thu, 26 Jun 2008, Vitaly Mayatskikh wrote:
>
> Seems reasonable. However, we still need specialized memset() routine,
> because, again, destination can fail. Thanks for the review, Linus!
Actually, the "zero at the end" case is only for copy_from_user() (at
least it _should_ be), so for the clearing-at-end you should be able to
use a regular memset().
But it's not a big deal either way. As long as we only get into the fixup
routine at exception time, and handle all the common cases fast (ie do the
32-byte unrolled thing etc optimally), the fixup routine can do everything
a byte at a time with "get_user()" and "put_user()" etc. The "fault at
copy_*_user()" case really isn't all that performance-sensitive, because
it really happens essentially _never_.
(That's obviously why nobody even noticed how broken they were for
essentially what must have been _years_. It's not just not a performance
sensitive area, it's one that is entered so seldom that it's hard to ever
hit any correctness issues either)
Linus
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-06-26 17:46 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-25 12:31 [PATCH] Fix copy_user on x86_64 Vitaly Mayatskikh
2008-06-26 9:13 ` Anton Arapov
2008-06-26 15:37 ` Linus Torvalds
2008-06-26 15:58 ` Vitaly Mayatskikh
2008-06-26 16:30 ` Linus Torvalds
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox