linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Linus Torvalds <torvalds@linux-foundation.org>
To: David Miller <davem@davemloft.net>
Cc: linux-arch@vger.kernel.org
Subject: Re: Mostly portable strnlen_user()
Date: Fri, 25 May 2012 20:56:37 -0700	[thread overview]
Message-ID: <CA+55aFxDyPCEFbOFaPbXF556NbtakuC0RcoUKhCa5k2EsqJ8CQ@mail.gmail.com> (raw)
In-Reply-To: <20120525.224318.1418525735588086513.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 1486 bytes --]

On Fri, May 25, 2012 at 7:43 PM, David Miller <davem@davemloft.net> wrote:
>
> It CSE's them into the loop, but like I said a few days
> ago it doesn't CSE them into the find_zero() code block.

Ok, I think I have a solution.

This is *totally* untested, but it compiles on x86. And I think it's
"close to the right thing".

It makes those constants explicit, so that sharing them is easy when
we have two different users, and actually does that in fs/namei.c.

The interface is a bit odd, but the rules are:

 - has_zero -> prep_zero_mask -> create_zero_mask -> { zero_bytemask ,
find_zero }

where two masks that have been created by prep_zero_mask can be or'ed
together to create one "one or the other" mask.

So the has_zero -> prep_zero_mask boundary is due to your BE
efficiency issue (ie "has_zero()" goes inside the loop, and
"prep_zero_mask()" goes outside).

The prep_zero_mask -> create_zero_mask boundary is due to that "we can
combine multiple masks at this level" issue.

And the create_zero_mask -> { zero_bytemask , find_zero } boundary is
because we actually want to create a bytemask for some things, and
just find the zero for others.

Does this *work*? I don't know. But it generates code that looks *roughly* sane.

Btw, this patch replaces the one you sent - the interdiff didn't look
sane (but it goes on top of the one I sent out earlier). And I just
put your BE-specific header into asm-generic after all.

Does this look sane to you?

                   Linus

[-- Attachment #2: patch.diff --]
[-- Type: application/octet-stream, Size: 11569 bytes --]

 arch/openrisc/include/asm/Kbuild      |  1 +
 arch/sparc/include/asm/Kbuild         |  1 +
 arch/x86/Kconfig                      |  1 +
 arch/x86/include/asm/word-at-a-time.h | 33 ++++++++++--
 arch/x86/lib/usercopy.c               | 97 -----------------------------------
 fs/namei.c                            | 22 ++++----
 include/asm-generic/word-at-a-time.h  | 44 ++++++++++++++++
 lib/strncpy_from_user.c               | 47 +++--------------
 lib/strnlen_user.c                    | 13 +++--
 9 files changed, 102 insertions(+), 157 deletions(-)

diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index c936483bc8e2..3f35c38d7b64 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -66,3 +66,4 @@ generic-y += topology.h
 generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
+generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 2c2e38821f60..67f83e0a0d68 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -21,3 +21,4 @@ generic-y += div64.h
 generic-y += local64.h
 generic-y += irq_regs.h
 generic-y += local.h
+generic-y += word-at-a-time.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 54bbfac5b91a..d700811785ea 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@ config X86
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
 	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
+	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 
 config INSTRUCTION_DECODER
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index ae03facfadd6..0a993cae80a8 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -13,6 +13,12 @@
 
 #ifdef CONFIG_64BIT
 
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
 /*
  * Jan Achrenius on G+: microoptimized version of
  * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
@@ -37,10 +43,31 @@ static inline long count_masked_bytes(long mask)
 
 #endif
 
-/* Return the high bit set in the first byte that is a zero */
-static inline unsigned long has_zero(unsigned long a)
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
 {
-	return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
+	return count_masked_bytes(mask);
 }
 
 /*
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 4239f063f6ac..f61ee67ec00f 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -43,100 +43,3 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	return len;
 }
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
-
-/*
- * Do a strncpy, return length of string without final '\0'.
- * 'count' is the user-supplied count (return 'count' if we
- * hit it), 'max' is the address space maximum (and we return
- * -EFAULT if we hit it).
- */
-static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
-{
-	long res = 0;
-
-	/*
-	 * Truncate 'max' to the user-specified limit, so that
-	 * we only have one limit we need to check in the loop
-	 */
-	if (max > count)
-		max = count;
-
-	while (max >= sizeof(unsigned long)) {
-		unsigned long c, mask;
-
-		/* Fall back to byte-at-a-time if we get a page fault */
-		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
-			break;
-		mask = has_zero(c);
-		if (mask) {
-			mask = (mask - 1) & ~mask;
-			mask >>= 7;
-			*(unsigned long *)(dst+res) = c & mask;
-			return res + count_masked_bytes(mask);
-		}
-		*(unsigned long *)(dst+res) = c;
-		res += sizeof(unsigned long);
-		max -= sizeof(unsigned long);
-	}
-
-	while (max) {
-		char c;
-
-		if (unlikely(__get_user(c,src+res)))
-			return -EFAULT;
-		dst[res] = c;
-		if (!c)
-			return res;
-		res++;
-		max--;
-	}
-
-	/*
-	 * Uhhuh. We hit 'max'. But was that the user-specified maximum
-	 * too? If so, that's ok - we got as much as the user asked for.
-	 */
-	if (res >= count)
-		return res;
-
-	/*
-	 * Nope: we hit the address space limit, and we still had more
-	 * characters the caller would have wanted. That's an EFAULT.
-	 */
-	return -EFAULT;
-}
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-long
-strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	unsigned long max_addr, src_addr;
-
-	if (unlikely(count <= 0))
-		return 0;
-
-	max_addr = user_addr_max();
-	src_addr = (unsigned long)src;
-	if (likely(src_addr < max_addr)) {
-		unsigned long max = max_addr - src_addr;
-		return do_strncpy_from_user(dst, src, count, max);
-	}
-	return -EFAULT;
-}
-EXPORT_SYMBOL(strncpy_from_user);
diff --git a/fs/namei.c b/fs/namei.c
index 93ff12b1a1de..c651f02c9fec 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1452,7 +1452,8 @@ EXPORT_SYMBOL(full_name_hash);
  */
 static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 {
-	unsigned long a, mask, hash, len;
+	unsigned long a, b, adata, bdata, mask, hash, len;
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 
 	hash = a = 0;
 	len = -sizeof(unsigned long);
@@ -1460,17 +1461,18 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 		hash = (hash + a) * 9;
 		len += sizeof(unsigned long);
 		a = load_unaligned_zeropad(name+len);
-		/* Do we have any NUL or '/' bytes in this word? */
-		mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/'));
-	} while (!mask);
-
-	/* The mask *below* the first high bit set */
-	mask = (mask - 1) & ~mask;
-	mask >>= 7;
-	hash += a & mask;
+		b = a ^ REPEAT_BYTE('/');
+	} while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
+
+	adata = prep_zero_mask(a, adata, &constants);
+	bdata = prep_zero_mask(b, bdata, &constants);
+
+	mask = create_zero_mask(adata | bdata);
+
+	hash += a & zero_bytemask(mask);
 	*hashp = fold_hash(hash);
 
-	return len + count_masked_bytes(mask);
+	return len + find_zero(mask);
 }
 
 #else
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
new file mode 100644
index 000000000000..90c3d41188da
--- /dev/null
+++ b/include/asm-generic/word-at-a-time.h
@@ -0,0 +1,44 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long high_bits, low_bits;
+};
+
+#define DECLARE_CONST \
+	const struct word_at_a_time constants = { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) };
+#define USE_CONST , &constants
+
+/* Bit set in the bytes that have a zero */
+static inline long create_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+	unsigned long mask = (val & c->low_bits) + c->low_bits;
+	return ~(mask | rhs);
+}
+
+static inline long find_zero(unsigned long mask)
+{
+	long byte = 0;
+#ifdef CONFIG_64BIT
+	if (mask >> 32)
+		mask >>= 32;
+	else
+		byte = 4;
+#endif
+	if (mask >> 16)
+		mask >>= 16;
+	else
+		byte += 2;
+	return (mask >> 8) ? byte : byte + 1;
+}
+
+static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+	unsigned long rhs = val | c->low_bits;
+	*data = rhs;
+	return (val + c->high_bits) & ~rhs;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index c4c09b0e96ba..bb2b201d6ad0 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -4,37 +4,7 @@
 #include <linux/errno.h>
 
 #include <asm/byteorder.h>
-
-static inline long find_zero(unsigned long mask)
-{
-	long byte = 0;
-
-#ifdef __BIG_ENDIAN
-#ifdef CONFIG_64BIT
-	if (mask >> 32)
-		mask >>= 32;
-	else
-		byte = 4;
-#endif
-	if (mask >> 16)
-		mask >>= 16;
-	else
-		byte += 2;
-	return (mask >> 8) ? byte : byte + 1;
-#else
-#ifdef CONFIG_64BIT
-	if (!((unsigned int) mask)) {
-		mask >>= 32;
-		byte = 4;
-	}
-#endif
-	if (!(mask & 0xffff)) {
-		mask >>= 16;
-		byte += 2;
-	}
-	return (mask & 0xff) ? byte : byte + 1;
-#endif
-}
+#include <asm/word-at-a-time.h>
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 #define IS_UNALIGNED(src, dst)	0
@@ -51,8 +21,7 @@ static inline long find_zero(unsigned long mask)
  */
 static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
 {
-	const unsigned long high_bits = REPEAT_BYTE(0xfe) + 1;
-	const unsigned long low_bits = REPEAT_BYTE(0x7f);
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 	long res = 0;
 
 	/*
@@ -66,18 +35,16 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
 		goto byte_at_a_time;
 
 	while (max >= sizeof(unsigned long)) {
-		unsigned long c, v, rhs;
+		unsigned long c, data;
 
 		/* Fall back to byte-at-a-time if we get a page fault */
 		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
 			break;
-		rhs = c | low_bits;
-		v = (c + high_bits) & ~rhs;
 		*(unsigned long *)(dst+res) = c;
-		if (v) {
-			v = (c & low_bits) + low_bits;
-			v = ~(v | rhs);
-			return res + find_zero(v);
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data);
 		}
 		res += sizeof(unsigned long);
 		max -= sizeof(unsigned long);
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 3dc78a846b1a..735f91ee9b94 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -30,6 +30,7 @@
  */
 static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
 {
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 	long align, res = 0;
 	unsigned long c;
 
@@ -53,13 +54,11 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
 	c |= aligned_byte_mask(align);
 
 	for (;;) {
-		unsigned long mask;
-
-		mask = has_zero(c);
-		if (mask) {
-			mask = (mask - 1) & ~mask;
-			mask >>= 7;
-			return res + count_masked_bytes(mask) + 1 - align;
+		unsigned long data;
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data) + 1 - align;
 		}
 		res += sizeof(unsigned long);
 		if (unlikely(max < sizeof(unsigned long)))

  reply	other threads:[~2012-05-26  3:57 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-25 22:35 Mostly portable strnlen_user() Linus Torvalds
2012-05-25 23:06 ` David Miller
2012-05-25 23:11   ` Linus Torvalds
2012-05-25 23:14     ` David Miller
2012-05-25 23:37       ` Linus Torvalds
2012-05-25 23:41         ` David Miller
2012-05-26  0:41           ` David Miller
2012-05-26  1:09             ` Linus Torvalds
2012-05-26  1:21               ` David Miller
2012-05-26  2:13               ` Linus Torvalds
2012-05-26  2:43                 ` David Miller
2012-05-26  3:56                   ` Linus Torvalds [this message]
2012-05-26  4:15                     ` David Miller
2012-05-26  4:19                       ` Linus Torvalds
2012-05-26  4:34                         ` David Miller
2012-05-26  4:44                         ` David Miller
2012-05-26  5:06                           ` Linus Torvalds
2012-05-26  5:59                             ` David Miller
2012-05-26  8:32                         ` Jonas Bonn
2012-05-26 18:39                           ` Linus Torvalds
2012-05-26 23:46                             ` David Miller
2012-05-27  8:28                             ` Jonas Bonn
2012-05-28  3:07                             ` Paul Mackerras
2012-05-28  3:47                               ` Linus Torvalds
2012-05-28  3:53                                 ` Linus Torvalds
2012-05-26  4:16                     ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CA+55aFxDyPCEFbOFaPbXF556NbtakuC0RcoUKhCa5k2EsqJ8CQ@mail.gmail.com \
    --to=torvalds@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=linux-arch@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).