linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Matteo Croce <mcroce@linux.microsoft.com>
To: linux-kernel@vger.kernel.org, Nick Kossifidis <mick@ics.forth.gr>,
	Guo Ren <guoren@kernel.org>,
	Christoph Hellwig <hch@infradead.org>,
	David Laight <David.Laight@aculab.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Emil Renner Berthing <kernel@esmil.dk>,
	Drew Fustini <drew@beagleboard.org>
Cc: linux-arch@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	linux-riscv@lists.infradead.org
Subject: [PATCH 1/3] lib/string: optimized memcpy
Date: Fri, 25 Jun 2021 03:01:58 +0200	[thread overview]
Message-ID: <20210625010200.362755-2-mcroce@linux.microsoft.com> (raw)
In-Reply-To: <20210625010200.362755-1-mcroce@linux.microsoft.com>

From: Matteo Croce <mcroce@microsoft.com>

Rewrite the generic memcpy() to copy a word at time, without generating
unaligned accesses.

The procedure is made of three steps:
First copy data one byte at time until the destination buffer is aligned
to a long boundary.
Then copy the data one long at time shifting the current and the next long
to compose a long at every cycle.
Finally, copy the remainder one byte at time.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 lib/string.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

diff --git a/lib/string.c b/lib/string.c
index 546d59711a12..15e906f97d9e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -33,6 +33,24 @@
 #include <asm/word-at-a-time.h>
 #include <asm/page.h>
 
+#define MIN_THRESHOLD (sizeof(long) * 2)
+
+/* convenience union to avoid cast between different pointer types */
+union types {
+	u8 *as_u8;
+	unsigned long *as_ulong;
+	uintptr_t as_uptr;
+};
+
+union const_types {
+	const u8 *as_u8;
+	const unsigned long *as_ulong;
+	uintptr_t as_uptr;
+};
+
+static const unsigned int bytes_long = sizeof(long);
+static const unsigned int word_mask = bytes_long - 1;
+
 #ifndef __HAVE_ARCH_STRNCASECMP
 /**
  * strncasecmp - Case insensitive, length-limited string comparison
@@ -878,16 +896,73 @@ EXPORT_SYMBOL(memset64);
  * You should not use this function to access IO space, use memcpy_toio()
  * or memcpy_fromio() instead.
  */
+
+#ifdef __BIG_ENDIAN
+#define MERGE_UL(h, l, d) ((h) << ((d) * 8) | (l) >> ((bytes_long - (d)) * 8))
+#else
+#define MERGE_UL(h, l, d) ((h) >> ((d) * 8) | (l) << ((bytes_long - (d)) * 8))
+#endif
+
 void *memcpy(void *dest, const void *src, size_t count)
 {
-	char *tmp = dest;
-	const char *s = src;
+	union const_types s = { .as_u8 = src };
+	union types d = { .as_u8 = dest };
+	int distance = 0;
+
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+		if (count < MIN_THRESHOLD)
+			goto copy_remainder;
+
+		/* Copy a byte at time until destination is aligned. */
+		for (; d.as_uptr & word_mask; count--)
+			*d.as_u8++ = *s.as_u8++;
+
+		distance = s.as_uptr & word_mask;
+	}
 
+	if (distance) {
+		unsigned long last, next;
+
+		/*
+		 * s is distance bytes ahead of d, and d just reached
+		 * the alignment boundary. Move s backward to word align it
+		 * and shift data to compensate for distance, in order to do
+		 * word-by-word copy.
+		 */
+		s.as_u8 -= distance;
+
+		next = s.as_ulong[0];
+		for (; count >= bytes_long + word_mask; count -= bytes_long) {
+			last = next;
+			next = s.as_ulong[1];
+
+			d.as_ulong[0] = MERGE_UL(last, next, distance);
+
+			d.as_ulong++;
+			s.as_ulong++;
+		}
+
+		/* Restore s with the original offset. */
+		s.as_u8 += distance;
+	} else {
+		/*
+		 * If the source and dest lower bits are the same, do a simple
+		 * 32/64 bit wide copy.
+		 */
+		for (; count >= bytes_long; count -= bytes_long)
+			*d.as_ulong++ = *s.as_ulong++;
+	}
+
+copy_remainder:
 	while (count--)
-		*tmp++ = *s++;
+		*d.as_u8++ = *s.as_u8++;
+
 	return dest;
 }
 EXPORT_SYMBOL(memcpy);
+
+#undef MERGE_UL
+
 #endif
 
 #ifndef __HAVE_ARCH_MEMMOVE
-- 
2.31.1


  reply	other threads:[~2021-06-25  1:02 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-25  1:01 [PATCH 0/3] lib/string: optimized mem* functions Matteo Croce
2021-06-25  1:01 ` Matteo Croce [this message]
2021-06-25  5:01   ` [PATCH 1/3] lib/string: optimized memcpy kernel test robot
2021-06-25 14:41   ` kernel test robot
2021-06-25  1:01 ` [PATCH 2/3] lib/string: optimized memmove Matteo Croce
2021-06-25  1:02 ` [PATCH 3/3] lib/string: optimized memset Matteo Croce
2021-06-25 17:45 ` [PATCH 0/3] lib/string: optimized mem* functions Nick Desaulniers
2021-06-27  0:19   ` Matteo Croce
2021-07-01  0:29     ` Matteo Croce

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210625010200.362755-2-mcroce@linux.microsoft.com \
    --to=mcroce@linux.microsoft.com \
    --cc=David.Laight@aculab.com \
    --cc=akpm@linux-foundation.org \
    --cc=drew@beagleboard.org \
    --cc=guoren@kernel.org \
    --cc=hch@infradead.org \
    --cc=kernel@esmil.dk \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=mick@ics.forth.gr \
    --cc=ndesaulniers@google.com \
    --cc=palmer@dabbelt.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).