From: Hiro Yoshioka <hyoshiok@miraclelinux.com>
To: lkml.hyoshiok@gmail.com
Cc: ak@suse.de, arjan@infradead.org, taka@valinux.co.jp,
linux-kernel@vger.kernel.org, hyoshiok@miraclelinux.com
Subject: Re: [RFC] [PATCH] cache pollution aware __copy_from_user_ll()
Date: Thu, 18 Aug 2005 20:11:38 +0900 (JST) [thread overview]
Message-ID: <20050818.201138.607962419.hyoshiok@miraclelinux.com> (raw)
In-Reply-To: <98df96d305081804061ea70686@mail.gmail.com>
> So I make two APIs.
> __copy_user_zeroing_nocache()
> __copy_user_zeroing_inatomic_nocache()
>
> The former is a low latency version and the other is a throughput version.
1) using stack to save/restore MMX registers
2) low latency version of cache aware copy
3) __copy_user*_nocache APIs so if you want to use it.
diff -ur linux-2.6.12.4.orig/Makefile linux-2.6.12.4.preempt/Makefile
--- linux-2.6.12.4.orig/Makefile 2005-08-12 14:37:59.000000000 +0900
+++ linux-2.6.12.4.preempt/Makefile 2005-08-18 18:47:07.000000000 +0900
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 12
-EXTRAVERSION = .4.orig
+EXTRAVERSION = .4.preempt
NAME=Woozy Numbat
# *DOCUMENTATION*
diff -ur linux-2.6.12.4.orig/arch/i386/lib/usercopy.c linux-2.6.12.4.preempt/arch/i386/lib/usercopy.c
--- linux-2.6.12.4.orig/arch/i386/lib/usercopy.c 2005-08-05 16:04:37.000000000 +0900
+++ linux-2.6.12.4.preempt/arch/i386/lib/usercopy.c 2005-08-18 19:07:49.000000000 +0900
@@ -10,6 +10,7 @@
#include <linux/highmem.h>
#include <linux/blkdev.h>
#include <linux/module.h>
+#include <asm/i387.h>
#include <asm/uaccess.h>
#include <asm/mmx.h>
@@ -511,6 +512,254 @@
: "memory"); \
} while (0)
+#define MMX_SAVE do { \
+ preempt_disable(); \
+ __asm__ __volatile__ ( \
+ "movl %%cr0,%0 ;\n\t" \
+ "clts ;\n\t" \
+ "movq %%mm0,(%1) ;\n\t" \
+ "movq %%mm1,8(%1) ;\n\t" \
+ "movq %%mm2,16(%1) ;\n\t" \
+ "movq %%mm3,24(%1) ;\n\t" \
+ : "=&r" (cr0) \
+ : "r" (mmx_save) \
+ : "memory"); \
+} while(0)
+
+#define MMX_RESTORE do { \
+ __asm__ __volatile__ ( \
+ "sfence ;\n\t" \
+ "movq (%1),%%mm0 ;\n\t" \
+ "movq 8(%1),%%mm1 ;\n\t" \
+ "movq 16(%1),%%mm2 ;\n\t" \
+ "movq 24(%1),%%mm3 ;\n\t" \
+ "movl %0,%%cr0 ;\n\t" \
+ : \
+ : "r" (cr0), "r" (mmx_save) \
+ : "memory"); \
+ preempt_enable(); \
+} while(0)
+
+#define ALIGN8 __attribute__((aligned(8)))
+
+/* Non Temporal Hint version of mmx_memcpy */
+/* It is cache aware */
+/* hyoshiok@miraclelinux.com */
+static unsigned long
+__copy_user_zeroing_nocache(void *to, const void *from, size_t len)
+{
+ /* Note! gcc doesn't seem to align stack variables properly, so we
+ * need to make use of unaligned loads and stores.
+ */
+ void *p;
+ int i;
+ char mmx_save[8*4] ALIGN8;
+ int cr0;
+
+ if (unlikely(in_interrupt())){
+ __copy_user_zeroing(to, from, len);
+ return len;
+ }
+
+ p = to;
+ i = len >> 6; /* len/64 */
+
+ /* kernel_fpu_begin();*/
+ MMX_SAVE;
+
+ __asm__ __volatile__ (
+ "1: prefetchnta (%0)\n" /* This set is 28 bytes */
+ " prefetchnta 64(%0)\n"
+ " prefetchnta 128(%0)\n"
+ " prefetchnta 192(%0)\n"
+ " prefetchnta 256(%0)\n"
+ "2: \n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from) );
+
+ for(; i>5; i--)
+ {
+ __asm__ __volatile__ (
+ "1: prefetchnta 320(%0)\n"
+ "2: movq (%0), %%mm0\n"
+ " movq 8(%0), %%mm1\n"
+ " movq 16(%0), %%mm2\n"
+ " movq 24(%0), %%mm3\n"
+ " movntq %%mm0, (%1)\n"
+ " movntq %%mm1, 8(%1)\n"
+ " movntq %%mm2, 16(%1)\n"
+ " movntq %%mm3, 24(%1)\n"
+ " movq 32(%0), %%mm0\n"
+ " movq 40(%0), %%mm1\n"
+ " movq 48(%0), %%mm2\n"
+ " movq 56(%0), %%mm3\n"
+ " movntq %%mm0, 32(%1)\n"
+ " movntq %%mm1, 40(%1)\n"
+ " movntq %%mm2, 48(%1)\n"
+ " movntq %%mm3, 56(%1)\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from), "r" (to) : "memory");
+ from+=64;
+ to+=64;
+#ifdef CONFIG_PREEMPT
+ if ( (i%64)==0 ) {
+ MMX_RESTORE;
+ MMX_SAVE;
+ };
+#endif
+ }
+
+ for(; i>0; i--)
+ {
+ __asm__ __volatile__ (
+ " movq (%0), %%mm0\n"
+ " movq 8(%0), %%mm1\n"
+ " movq 16(%0), %%mm2\n"
+ " movq 24(%0), %%mm3\n"
+ " movntq %%mm0, (%1)\n"
+ " movntq %%mm1, 8(%1)\n"
+ " movntq %%mm2, 16(%1)\n"
+ " movntq %%mm3, 24(%1)\n"
+ " movq 32(%0), %%mm0\n"
+ " movq 40(%0), %%mm1\n"
+ " movq 48(%0), %%mm2\n"
+ " movq 56(%0), %%mm3\n"
+ " movntq %%mm0, 32(%1)\n"
+ " movntq %%mm1, 40(%1)\n"
+ " movntq %%mm2, 48(%1)\n"
+ " movntq %%mm3, 56(%1)\n"
+ : : "r" (from), "r" (to) : "memory");
+ from+=64;
+ to+=64;
+ }
+ /*
+ * Now do the tail of the block
+ */
+ /* kernel_fpu_end();*/
+ MMX_RESTORE;
+ if(i=(len&63))
+ __copy_user_zeroing(to, from, i);
+ return i;
+}
+
+static unsigned long
+__copy_user_zeroing_inatomic_nocache(void *to, const void *from, size_t len)
+{
+ /* Note! gcc doesn't seem to align stack variables properly, so we
+ * need to make use of unaligned loads and stores.
+ */
+ void *p;
+ int i;
+ char mmx_save[8*4] ALIGN8;
+ int cr0;
+
+ if (unlikely(in_interrupt())){
+ __copy_user_zeroing(to, from, len);
+ return len;
+ }
+
+ p = to;
+ i = len >> 6; /* len/64 */
+
+ /* kernel_fpu_begin();*/
+ MMX_SAVE;
+
+ __asm__ __volatile__ (
+ "1: prefetchnta (%0)\n" /* This set is 28 bytes */
+ " prefetchnta 64(%0)\n"
+ " prefetchnta 128(%0)\n"
+ " prefetchnta 192(%0)\n"
+ " prefetchnta 256(%0)\n"
+ "2: \n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from) );
+
+ for(; i>5; i--)
+ {
+ __asm__ __volatile__ (
+ "1: prefetchnta 320(%0)\n"
+ "2: movq (%0), %%mm0\n"
+ " movq 8(%0), %%mm1\n"
+ " movq 16(%0), %%mm2\n"
+ " movq 24(%0), %%mm3\n"
+ " movntq %%mm0, (%1)\n"
+ " movntq %%mm1, 8(%1)\n"
+ " movntq %%mm2, 16(%1)\n"
+ " movntq %%mm3, 24(%1)\n"
+ " movq 32(%0), %%mm0\n"
+ " movq 40(%0), %%mm1\n"
+ " movq 48(%0), %%mm2\n"
+ " movq 56(%0), %%mm3\n"
+ " movntq %%mm0, 32(%1)\n"
+ " movntq %%mm1, 40(%1)\n"
+ " movntq %%mm2, 48(%1)\n"
+ " movntq %%mm3, 56(%1)\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from), "r" (to) : "memory");
+ from+=64;
+ to+=64;
+ }
+
+ for(; i>0; i--)
+ {
+ __asm__ __volatile__ (
+ " movq (%0), %%mm0\n"
+ " movq 8(%0), %%mm1\n"
+ " movq 16(%0), %%mm2\n"
+ " movq 24(%0), %%mm3\n"
+ " movntq %%mm0, (%1)\n"
+ " movntq %%mm1, 8(%1)\n"
+ " movntq %%mm2, 16(%1)\n"
+ " movntq %%mm3, 24(%1)\n"
+ " movq 32(%0), %%mm0\n"
+ " movq 40(%0), %%mm1\n"
+ " movq 48(%0), %%mm2\n"
+ " movq 56(%0), %%mm3\n"
+ " movntq %%mm0, 32(%1)\n"
+ " movntq %%mm1, 40(%1)\n"
+ " movntq %%mm2, 48(%1)\n"
+ " movntq %%mm3, 56(%1)\n"
+ : : "r" (from), "r" (to) : "memory");
+ from+=64;
+ to+=64;
+ }
+ /*
+ * Now do the tail of the block
+ */
+ /* kernel_fpu_end();*/
+ MMX_RESTORE;
+ if(i=(len&63))
+ __copy_user_zeroing(to, from, i);
+ return i;
+}
unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
{
@@ -582,6 +831,36 @@
return n;
}
+unsigned long
+__copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n)
+{
+ BUG_ON((long)n < 0);
+ if (n < 512) {
+ if (movsl_is_ok(to, from, n))
+ __copy_user_zeroing(to, from, n);
+ else
+ n = __copy_user_zeroing_intel(to, from, n);
+ }
+ else
+ n = __copy_user_zeroing_nocache(to, from, n);
+ return n;
+}
+
+unsigned long
+__copy_from_user_ll_inatomic_nocache(void *to, const void __user *from, unsigned long n)
+{
+ BUG_ON((long)n < 0);
+ if (n < 512) {
+ if (movsl_is_ok(to, from, n))
+ __copy_user_zeroing(to, from, n);
+ else
+ n = __copy_user_zeroing_intel(to, from, n);
+ }
+ else
+ n = __copy_user_zeroing_inatomic_nocache(to, from, n);
+ return n;
+}
+
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
diff -ur linux-2.6.12.4.orig/include/asm-i386/uaccess.h linux-2.6.12.4.preempt/include/asm-i386/uaccess.h
--- linux-2.6.12.4.orig/include/asm-i386/uaccess.h 2005-08-05 16:04:37.000000000 +0900
+++ linux-2.6.12.4.preempt/include/asm-i386/uaccess.h 2005-08-18 19:16:55.000000000 +0900
@@ -413,6 +413,10 @@
const void *from, unsigned long n);
unsigned long __must_check __copy_from_user_ll(void *to,
const void __user *from, unsigned long n);
+unsigned long __must_check __copy_from_user_ll_nocache(void *to,
+ const void __user *from, unsigned long n);
+unsigned long __must_check __copy_from_user_ll_inatomic_nocache(void *to,
+ const void __user *from, unsigned long n);
/*
* Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
@@ -502,11 +506,55 @@
}
static inline unsigned long
+__copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n)
+{
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+
+ switch (n) {
+ case 1:
+ __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ return ret;
+ case 2:
+ __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ return ret;
+ case 4:
+ __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ return ret;
+ }
+ }
+ return __copy_from_user_ll_inatomic_nocache(to, from, n);
+}
+
+static inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_sleep();
return __copy_from_user_inatomic(to, from, n);
}
+
+static inline unsigned long
+__copy_from_user_nocache(void *to, const void __user *from, unsigned long n)
+{
+ might_sleep();
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+
+ switch (n) {
+ case 1:
+ __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ return ret;
+ case 2:
+ __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ return ret;
+ case 4:
+ __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ return ret;
+ }
+ }
+ return __copy_from_user_ll_nocache(to, from, n);
+}
+
unsigned long __must_check copy_to_user(void __user *to,
const void *from, unsigned long n);
unsigned long __must_check copy_from_user(void *to,
diff -ur linux-2.6.12.4.orig/mm/filemap.c linux-2.6.12.4.preempt/mm/filemap.c
--- linux-2.6.12.4.orig/mm/filemap.c 2005-08-05 16:04:37.000000000 +0900
+++ linux-2.6.12.4.preempt/mm/filemap.c 2005-08-16 10:16:06.000000000 +0900
@@ -1727,13 +1727,13 @@
int left;
kaddr = kmap_atomic(page, KM_USER0);
- left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
+ left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes);
kunmap_atomic(kaddr, KM_USER0);
if (left != 0) {
/* Do it the slow way */
kaddr = kmap(page);
- left = __copy_from_user(kaddr + offset, buf, bytes);
+ left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
kunmap(page);
}
return bytes - left;
@@ -1750,7 +1750,7 @@
int copy = min(bytes, iov->iov_len - base);
base = 0;
- left = __copy_from_user_inatomic(vaddr, buf, copy);
+ left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);
copied += copy;
bytes -= copy;
vaddr += copy;
Regards,
Hiro
--
Hiro Yoshioka
CTO/Miracle Linux Corporation
next prev parent reply other threads:[~2005-08-18 11:16 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20050816.131729.15816429.taka@valinux.co.jp.suse.lists.linux.kernel>
[not found] ` <20050816.135425.719901536.hyoshiok@miraclelinux.com.suse.lists.linux.kernel>
[not found] ` <1124171015.3215.0.camel@laptopd505.fenrus.org.suse.lists.linux.kernel>
[not found] ` <20050816.191617.1025215458.hyoshiok@miraclelinux.com.suse.lists.linux.kernel>
[not found] ` <1124187950.3215.31.camel@laptopd505.fenrus.org.suse.lists.linux.kernel>
2005-08-16 13:15 ` [RFC] [PATCH] cache pollution aware __copy_from_user_ll() Andi Kleen
2005-08-18 11:06 ` Hiro Yoshioka
2005-08-18 11:11 ` Hiro Yoshioka [this message]
2005-08-18 23:29 ` Hiro Yoshioka
2005-08-22 1:24 ` Hiro Yoshioka
2005-08-22 13:07 ` Andi Kleen
2005-08-22 2:43 ` Hiro Yoshioka
2005-08-22 23:12 ` Hiro Yoshioka
2005-08-24 14:11 ` Hiro Yoshioka
2005-08-24 14:21 ` Arjan van de Ven
2005-08-24 16:22 ` Hirokazu Takahashi
2005-08-25 4:53 ` Hiro Yoshioka
[not found] <20050818.201138.607962419.hyoshiok@miraclelinux.com.suse.lists.linux.kernel>
[not found] ` <98df96d30508181629d85edb5@mail.gmail.com.suse.lists.linux.kernel>
[not found] ` <20050823.081246.846946371.hyoshiok@miraclelinux.com.suse.lists.linux.kernel>
[not found] ` <20050824.231156.278740508.hyoshiok@miraclelinux.com.suse.lists.linux.kernel>
2005-08-24 16:18 ` Andi Kleen
2005-08-25 4:54 ` Hiro Yoshioka
2005-09-01 9:07 ` Hiro Yoshioka
2005-09-01 9:36 ` Andi Kleen
2005-09-02 1:43 ` Hiro Yoshioka
2005-09-02 2:06 ` Andi Kleen
2005-09-02 2:08 ` Andrew Morton
2005-09-02 2:17 ` Andi Kleen
2005-09-02 2:28 ` Andrew Morton
2005-09-02 3:41 ` Hiro Yoshioka
2005-09-02 4:29 ` Andrew Morton
2005-09-02 4:37 ` Hiro Yoshioka
2005-09-03 11:59 ` Hiro Yoshioka
2005-08-17 15:19 Chuck Ebbert
2005-08-18 9:45 ` Hiro Yoshioka
-- strict thread matches above, loose matches on Subject: below --
2005-08-16 18:09 Chuck Ebbert
2005-08-16 23:21 ` Hiro Yoshioka
2005-08-17 4:50 ` Hiro Yoshioka
[not found] <20050815121555.29159.qmail@science.horizon.com.suse.lists.linux.kernel>
[not found] ` <1124108702.3228.33.camel@laptopd505.fenrus.org.suse.lists.linux.kernel>
2005-08-15 15:02 ` Andi Kleen
2005-08-15 15:09 ` Arjan van de Ven
2005-08-15 15:13 ` Andi Kleen
2005-08-15 12:15 linux
2005-08-15 12:25 ` Arjan van de Ven
2005-08-14 21:24 Ian Kumlien
2005-08-15 7:21 ` Arjan van de Ven
2005-08-15 14:49 ` Ian Kumlien
2005-08-14 9:16 Hiro Yoshioka
2005-08-14 9:41 ` Arjan van de Ven
2005-08-14 10:22 ` Hiro Yoshioka
2005-08-14 10:35 ` Arjan van de Ven
2005-08-14 10:45 ` Christoph Hellwig
2005-08-15 6:43 ` Hiro Yoshioka
2005-08-15 7:16 ` Arjan van de Ven
2005-08-15 8:44 ` Hiro Yoshioka
2005-08-15 8:53 ` Arjan van de Ven
2005-08-15 23:33 ` Hiro Yoshioka
2005-08-16 3:30 ` Hiro Yoshioka
2005-08-16 4:17 ` Hirokazu Takahashi
2005-08-16 4:54 ` Hiro Yoshioka
2005-08-16 5:43 ` Arjan van de Ven
2005-08-16 10:16 ` Hiro Yoshioka
2005-08-16 10:19 ` Hirokazu Takahashi
2005-08-16 10:25 ` Arjan van de Ven
2005-08-16 10:24 ` Hirokazu Takahashi
2005-08-16 5:44 ` Arjan van de Ven
2005-08-16 5:49 ` Arjan van de Ven
[not found] ` <20050817.110503.97359275.taka@valinux.co.jp>
2005-08-17 5:10 ` Hiro Yoshioka
2005-08-17 14:30 ` Akira Tsukamoto
2005-08-17 15:27 ` Akira Tsukamoto
2005-08-18 17:53 ` Lee Revell
2005-08-18 2:37 ` Akira Tsukamoto
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050818.201138.607962419.hyoshiok@miraclelinux.com \
--to=hyoshiok@miraclelinux.com \
--cc=ak@suse.de \
--cc=arjan@infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lkml.hyoshiok@gmail.com \
--cc=taka@valinux.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.