* [PATCH 001 of 2] Prepare for __copy_from_user_inatomic to not zero missed bytes.
2006-05-22 4:46 [PATCH - RESEND - 000 of 2] Avoid subtle cache consistancy problem NeilBrown
@ 2006-05-22 4:46 ` NeilBrown
2006-05-25 11:59 ` David Howells
2006-05-22 4:46 ` [PATCH 002 of 2] Make copy_from_user_inatomic NOT zero the tail on i386 NeilBrown
1 sibling, 1 reply; 6+ messages in thread
From: NeilBrown @ 2006-05-22 4:46 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
There is a problem with __copy_from_user_inatomic zeroing the tail of
the buffer in the case of an error. As it is called in atomic
context, the error may be transient, so it results in zeros being
written where maybe they shouldn't be.
In the usage in filemap, this opens a window for a well timed read to
see data (zeros) which is not consistent with any ordering of reads
and writes.
Most cases where __copy_from_user_inatomic is called, a failure results
in __copy_from_user being called immediately. As long as the latter
zeros the tail, the former doesn't need to.
However in *copy_from_user_iovec implementations (in both filemap
and ntfs/file), it is assumed that copy_from_user_inatomic will zero the
tail.
This patch removes that assumption, so that after this patch it will
be safe for copy_from_user_inatomic to not zero the tail.
This patch also adds some commentary to filemap.h and asm-i386/uaccess.h.
After this patch, all architectures that might disable preempt when
kmap_atomic is called need to have their __copy_from_user_inatomic* "fixed".
This includes
- powerpc
- i386
- mips
- sparc
Interestingly 'frv' disables preempt in kmap_atomic, but its
copy_from_user doesn't expect faults and never zeros the tail...
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./fs/ntfs/file.c | 26 ++++++++++++++------------
./include/asm-i386/uaccess.h | 6 ++++++
./mm/filemap.c | 8 ++------
./mm/filemap.h | 26 ++++++++++++++++++--------
4 files changed, 40 insertions(+), 26 deletions(-)
diff ./fs/ntfs/file.c~current~ ./fs/ntfs/file.c
--- ./fs/ntfs/file.c~current~ 2006-05-22 11:20:26.000000000 +1000
+++ ./fs/ntfs/file.c 2006-05-22 11:20:26.000000000 +1000
@@ -1358,7 +1358,7 @@ err_out:
goto out;
}
-static size_t __ntfs_copy_from_user_iovec(char *vaddr,
+static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
const struct iovec *iov, size_t iov_ofs, size_t bytes)
{
size_t total = 0;
@@ -1376,10 +1376,6 @@ static size_t __ntfs_copy_from_user_iove
bytes -= len;
vaddr += len;
if (unlikely(left)) {
- /*
- * Zero the rest of the target like __copy_from_user().
- */
- memset(vaddr, 0, bytes);
total -= left;
break;
}
@@ -1420,11 +1416,13 @@ static inline void ntfs_set_next_iovec(c
* pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
* single-segment behaviour.
*
- * We call the same helper (__ntfs_copy_from_user_iovec()) both when atomic and
- * when not atomic. This is ok because __ntfs_copy_from_user_iovec() calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep(). And on many
+ * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both
+ * when atomic and when not atomic. This is ok because
+ * __ntfs_copy_from_user_iovec_inatomic() calls __copy_from_user_inatomic()
+ * and it is ok to call this when non-atomic.
+ * Infact, the only difference between __copy_from_user_inatomic() and
+ * __copy_from_user() is that the latter calls might_sleep() and the former
+ * should not zero the tail of the buffer on error. And on many
* architectures __copy_from_user_inatomic() is just defined to
* __copy_from_user() so it makes no difference at all on those architectures.
*/
@@ -1441,14 +1439,18 @@ static inline size_t ntfs_copy_from_user
if (len > bytes)
len = bytes;
kaddr = kmap_atomic(*pages, KM_USER0);
- copied = __ntfs_copy_from_user_iovec(kaddr + ofs,
+ copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs,
*iov, *iov_ofs, len);
kunmap_atomic(kaddr, KM_USER0);
if (unlikely(copied != len)) {
/* Do it the slow way. */
kaddr = kmap(*pages);
- copied = __ntfs_copy_from_user_iovec(kaddr + ofs,
+ copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs,
*iov, *iov_ofs, len);
+ /*
+ * Zero the rest of the target like __copy_from_user().
+ */
+ memset(kaddr + ofs + copied, 0, len - copied);
kunmap(*pages);
if (unlikely(copied != len))
goto err_out;
diff ./include/asm-i386/uaccess.h~current~ ./include/asm-i386/uaccess.h
--- ./include/asm-i386/uaccess.h~current~ 2006-05-22 11:20:26.000000000 +1000
+++ ./include/asm-i386/uaccess.h 2006-05-22 11:20:26.000000000 +1000
@@ -458,6 +458,12 @@ __copy_to_user(void __user *to, const vo
*
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
+ *
+ * An alternate version - __copy_from_user_inatomic() - may be called from
+ * atomic context and will fail rather than sleep. In this case the
+ * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h
+ * for explanation of why this is needed.
+ * FIXME this isn't implimented yet EMXIF
*/
static __always_inline unsigned long
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
diff ./mm/filemap.c~current~ ./mm/filemap.c
--- ./mm/filemap.c~current~ 2006-05-22 11:20:26.000000000 +1000
+++ ./mm/filemap.c 2006-05-22 11:20:26.000000000 +1000
@@ -1804,7 +1804,7 @@ int remove_suid(struct dentry *dentry)
EXPORT_SYMBOL(remove_suid);
size_t
-__filemap_copy_from_user_iovec(char *vaddr,
+__filemap_copy_from_user_iovec_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
{
size_t copied = 0, left = 0;
@@ -1820,12 +1820,8 @@ __filemap_copy_from_user_iovec(char *vad
vaddr += copy;
iov++;
- if (unlikely(left)) {
- /* zero the rest of the target like __copy_from_user */
- if (bytes)
- memset(vaddr, 0, bytes);
+ if (unlikely(left))
break;
- }
}
return copied - left;
}
diff ./mm/filemap.h~current~ ./mm/filemap.h
--- ./mm/filemap.h~current~ 2006-05-22 11:20:26.000000000 +1000
+++ ./mm/filemap.h 2006-05-22 11:20:26.000000000 +1000
@@ -16,15 +16,23 @@
#include <linux/uaccess.h>
size_t
-__filemap_copy_from_user_iovec(char *vaddr,
- const struct iovec *iov,
- size_t base,
- size_t bytes);
+__filemap_copy_from_user_iovec_inatomic(char *vaddr,
+ const struct iovec *iov,
+ size_t base,
+ size_t bytes);
/*
* Copy as much as we can into the page and return the number of bytes which
* were sucessfully copied. If a fault is encountered then clear the page
* out to (offset+bytes) and return the number of bytes which were copied.
+ *
+ * NOTE: For this to work reliably we really want copy_from_user_inatomic_nocache
+ * to *NOT* zero any tail of the buffer that it failed to copy. If it does,
+ * and if the following non-atomic copy succeeds, then there is a small window
+ * where the target page contains neither the data before the write, nor the
+ * data after the write (it contains zero). A read at this time will see
+ * data that is inconsistent with any ordering of the read and the write.
+ * (This has been detected in practice).
*/
static inline size_t
filemap_copy_from_user(struct page *page, unsigned long offset,
@@ -60,13 +68,15 @@ filemap_copy_from_user_iovec(struct page
size_t copied;
kaddr = kmap_atomic(page, KM_USER0);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
+ copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
+ base, bytes);
kunmap_atomic(kaddr, KM_USER0);
if (copied != bytes) {
kaddr = kmap(page);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
+ copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
+ base, bytes);
+ if (bytes - copied)
+ memset(kaddr + offset + copied, 0, bytes - copied);
kunmap(page);
}
return copied;
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 002 of 2] Make copy_from_user_inatomic NOT zero the tail on i386
2006-05-22 4:46 [PATCH - RESEND - 000 of 2] Avoid subtle cache consistancy problem NeilBrown
2006-05-22 4:46 ` [PATCH 001 of 2] Prepare for __copy_from_user_inatomic to not zero missed bytes NeilBrown
@ 2006-05-22 4:46 ` NeilBrown
1 sibling, 0 replies; 6+ messages in thread
From: NeilBrown @ 2006-05-22 4:46 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
As described in a previous patch and documented in mm/filemap.h,
copy_from_user_inatomic* shouldn't zero out the tail of the buffer
after an incomplete copy.
This patch implements that change for i386.
For the _nocache version, a new __copy_user_intel_nocache is defined
similar to copy_user_zeroio_intel_nocache, and this is ultimately
used for the copy.
For the regular version, __copy_from_user_ll_nozero is defined which
uses __copy_user and __copy_user_intel - the later needs casts to
reposition the __user annotations.
If copy_from_user_atomic is given a constant length of 1, 2, or 4,
then we do still zero the destintion on failure. This didn't seem
worth the effort of fixing as the places where it is used really
don't care.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./arch/i386/lib/usercopy.c | 119 +++++++++++++++++++++++++++++++++++++++++++
./include/asm-i386/uaccess.h | 46 ++++++++++++----
2 files changed, 153 insertions(+), 12 deletions(-)
diff ./arch/i386/lib/usercopy.c~current~ ./arch/i386/lib/usercopy.c
--- ./arch/i386/lib/usercopy.c~current~ 2006-05-22 11:48:30.000000000 +1000
+++ ./arch/i386/lib/usercopy.c 2006-05-22 14:34:09.000000000 +1000
@@ -528,6 +528,97 @@ static unsigned long __copy_user_zeroing
return size;
}
+static unsigned long __copy_user_intel_nocache(void *to,
+ const void __user *from, unsigned long size)
+{
+ int d0, d1;
+
+ __asm__ __volatile__(
+ " .align 2,0x90\n"
+ "0: movl 32(%4), %%eax\n"
+ " cmpl $67, %0\n"
+ " jbe 2f\n"
+ "1: movl 64(%4), %%eax\n"
+ " .align 2,0x90\n"
+ "2: movl 0(%4), %%eax\n"
+ "21: movl 4(%4), %%edx\n"
+ " movnti %%eax, 0(%3)\n"
+ " movnti %%edx, 4(%3)\n"
+ "3: movl 8(%4), %%eax\n"
+ "31: movl 12(%4),%%edx\n"
+ " movnti %%eax, 8(%3)\n"
+ " movnti %%edx, 12(%3)\n"
+ "4: movl 16(%4), %%eax\n"
+ "41: movl 20(%4), %%edx\n"
+ " movnti %%eax, 16(%3)\n"
+ " movnti %%edx, 20(%3)\n"
+ "10: movl 24(%4), %%eax\n"
+ "51: movl 28(%4), %%edx\n"
+ " movnti %%eax, 24(%3)\n"
+ " movnti %%edx, 28(%3)\n"
+ "11: movl 32(%4), %%eax\n"
+ "61: movl 36(%4), %%edx\n"
+ " movnti %%eax, 32(%3)\n"
+ " movnti %%edx, 36(%3)\n"
+ "12: movl 40(%4), %%eax\n"
+ "71: movl 44(%4), %%edx\n"
+ " movnti %%eax, 40(%3)\n"
+ " movnti %%edx, 44(%3)\n"
+ "13: movl 48(%4), %%eax\n"
+ "81: movl 52(%4), %%edx\n"
+ " movnti %%eax, 48(%3)\n"
+ " movnti %%edx, 52(%3)\n"
+ "14: movl 56(%4), %%eax\n"
+ "91: movl 60(%4), %%edx\n"
+ " movnti %%eax, 56(%3)\n"
+ " movnti %%edx, 60(%3)\n"
+ " addl $-64, %0\n"
+ " addl $64, %4\n"
+ " addl $64, %3\n"
+ " cmpl $63, %0\n"
+ " ja 0b\n"
+ " sfence \n"
+ "5: movl %0, %%eax\n"
+ " shrl $2, %0\n"
+ " andl $3, %%eax\n"
+ " cld\n"
+ "6: rep; movsl\n"
+ " movl %%eax,%0\n"
+ "7: rep; movsb\n"
+ "8:\n"
+ ".section .fixup,\"ax\"\n"
+ "9: lea 0(%%eax,%0,4),%0\n"
+ "16: jmp 8b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 0b,16b\n"
+ " .long 1b,16b\n"
+ " .long 2b,16b\n"
+ " .long 21b,16b\n"
+ " .long 3b,16b\n"
+ " .long 31b,16b\n"
+ " .long 4b,16b\n"
+ " .long 41b,16b\n"
+ " .long 10b,16b\n"
+ " .long 51b,16b\n"
+ " .long 11b,16b\n"
+ " .long 61b,16b\n"
+ " .long 12b,16b\n"
+ " .long 71b,16b\n"
+ " .long 13b,16b\n"
+ " .long 81b,16b\n"
+ " .long 14b,16b\n"
+ " .long 91b,16b\n"
+ " .long 6b,9b\n"
+ " .long 7b,16b\n"
+ ".previous"
+ : "=&c"(size), "=&D" (d0), "=&S" (d1)
+ : "1"(to), "2"(from), "0"(size)
+ : "eax", "edx", "memory");
+ return size;
+}
+
#else
/*
@@ -694,6 +785,19 @@ unsigned long __copy_from_user_ll(void *
}
EXPORT_SYMBOL(__copy_from_user_ll);
+unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+ if (movsl_is_ok(to, from, n))
+ __copy_user(to, from, n);
+ else
+ n = __copy_user_intel((void __user *)to,
+ (const void *)from, n);
+ return n;
+}
+EXPORT_SYMBOL(__copy_from_user_ll_nozero);
+
unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
unsigned long n)
{
@@ -709,6 +813,21 @@ unsigned long __copy_from_user_ll_nocach
return n;
}
+unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+#ifdef CONFIG_X86_INTEL_USERCOPY
+ if ( n > 64 && cpu_has_xmm2)
+ n = __copy_user_intel_nocache(to, from, n);
+ else
+ __copy_user(to, from, n);
+#else
+ __copy_user(to, from, n);
+#endif
+ return n;
+}
+
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
diff ./include/asm-i386/uaccess.h~current~ ./include/asm-i386/uaccess.h
--- ./include/asm-i386/uaccess.h~current~ 2006-05-22 11:20:26.000000000 +1000
+++ ./include/asm-i386/uaccess.h 2006-05-22 14:25:28.000000000 +1000
@@ -390,8 +390,12 @@ unsigned long __must_check __copy_to_use
const void *from, unsigned long n);
unsigned long __must_check __copy_from_user_ll(void *to,
const void __user *from, unsigned long n);
+unsigned long __must_check __copy_from_user_ll_nozero(void *to,
+ const void __user *from, unsigned long n);
unsigned long __must_check __copy_from_user_ll_nocache(void *to,
const void __user *from, unsigned long n);
+unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to,
+ const void __user *from, unsigned long n);
/*
* Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
@@ -463,11 +467,36 @@ __copy_to_user(void __user *to, const vo
* atomic context and will fail rather than sleep. In this case the
* uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h
* for explanation of why this is needed.
- * FIXME this isn't implimented yet EMXIF
*/
static __always_inline unsigned long
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
{
+ /* Avoid zeroing the tail if the copy fails..
+ * If 'n' is constant and 1, 2, or 4, we do still zero on a failure,
+ * but as the zeroing behaviour is only significant when n is not
+ * constant, that shouldn't be a problem.
+ */
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+
+ switch (n) {
+ case 1:
+ __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ return ret;
+ case 2:
+ __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ return ret;
+ case 4:
+ __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ return ret;
+ }
+ }
+ return __copy_from_user_ll_nozero(to, from, n);
+}
+static __always_inline unsigned long
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ might_sleep();
if (__builtin_constant_p(n)) {
unsigned long ret;
@@ -488,9 +517,10 @@ __copy_from_user_inatomic(void *to, cons
#define ARCH_HAS_NOCACHE_UACCESS
-static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to,
+static __always_inline unsigned long __copy_from_user_nocache(void *to,
const void __user *from, unsigned long n)
{
+ might_sleep();
if (__builtin_constant_p(n)) {
unsigned long ret;
@@ -510,17 +540,9 @@ static __always_inline unsigned long __c
}
static __always_inline unsigned long
-__copy_from_user(void *to, const void __user *from, unsigned long n)
+__copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n)
{
- might_sleep();
- return __copy_from_user_inatomic(to, from, n);
-}
-
-static __always_inline unsigned long
-__copy_from_user_nocache(void *to, const void __user *from, unsigned long n)
-{
- might_sleep();
- return __copy_from_user_inatomic_nocache(to, from, n);
+ return __copy_from_user_ll_nocache_nozero(to, from, n);
}
unsigned long __must_check copy_to_user(void __user *to,
^ permalink raw reply [flat|nested] 6+ messages in thread