From: Matteo Croce <mcroce@linux.microsoft.com>
To: linux-kernel@vger.kernel.org, Nick Kossifidis <mick@ics.forth.gr>,
Guo Ren <guoren@kernel.org>,
Christoph Hellwig <hch@infradead.org>,
David Laight <David.Laight@aculab.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Emil Renner Berthing <kernel@esmil.dk>,
Drew Fustini <drew@beagleboard.org>
Cc: linux-arch@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
Nick Desaulniers <ndesaulniers@google.com>,
linux-riscv@lists.infradead.org
Subject: [PATCH v2 1/3] lib/string: optimized memcpy
Date: Fri, 2 Jul 2021 14:31:51 +0200 [thread overview]
Message-ID: <20210702123153.14093-2-mcroce@linux.microsoft.com> (raw)
In-Reply-To: <20210702123153.14093-1-mcroce@linux.microsoft.com>
From: Matteo Croce <mcroce@microsoft.com>
Rewrite the generic memcpy() to copy a word at time, without generating
unaligned accesses.
The procedure is made of three steps:
First copy data one byte at time until the destination buffer is aligned
to a long boundary.
Then copy the data one long at time shifting the current and the next long
to compose a long at every cycle.
Finally, copy the remainder one byte at time.
This is the improvement on RISC-V:
original aligned: 75 Mb/s
original unaligned: 75 Mb/s
new aligned: 114 Mb/s
new unaligned: 107 Mb/s
and this the binary size increase according to bloat-o-meter:
Function old new delta
memcpy 36 324 +288
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
lib/string.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 77 insertions(+), 3 deletions(-)
diff --git a/lib/string.c b/lib/string.c
index 546d59711a12..caeef4264c43 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -33,6 +33,23 @@
#include <asm/word-at-a-time.h>
#include <asm/page.h>
+#define BYTES_LONG sizeof(long)
+#define WORD_MASK (BYTES_LONG - 1)
+#define MIN_THRESHOLD (BYTES_LONG * 2)
+
+/* convenience union to avoid cast between different pointer types */
+union types {
+ u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+union const_types {
+ const u8 *as_u8;
+ const unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
#ifndef __HAVE_ARCH_STRNCASECMP
/**
* strncasecmp - Case insensitive, length-limited string comparison
@@ -869,6 +886,13 @@ EXPORT_SYMBOL(memset64);
#endif
#ifndef __HAVE_ARCH_MEMCPY
+
+#ifdef __BIG_ENDIAN
+#define MERGE_UL(h, l, d) ((h) << ((d) * 8) | (l) >> ((BYTES_LONG - (d)) * 8))
+#else
+#define MERGE_UL(h, l, d) ((h) >> ((d) * 8) | (l) << ((BYTES_LONG - (d)) * 8))
+#endif
+
/**
* memcpy - Copy one area of memory to another
* @dest: Where to copy to
@@ -880,14 +904,64 @@ EXPORT_SYMBOL(memset64);
*/
void *memcpy(void *dest, const void *src, size_t count)
{
- char *tmp = dest;
- const char *s = src;
+ union const_types s = { .as_u8 = src };
+ union types d = { .as_u8 = dest };
+ int distance = 0;
+
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ if (count < MIN_THRESHOLD)
+ goto copy_remainder;
+
+ /* Copy a byte at time until destination is aligned. */
+ for (; d.as_uptr & WORD_MASK; count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ distance = s.as_uptr & WORD_MASK;
+ }
+
+ if (distance) {
+ unsigned long last, next;
+ /*
+ * s is distance bytes ahead of d, and d just reached
+ * the alignment boundary. Move s backward to word align it
+ * and shift data to compensate for distance, in order to do
+ * word-by-word copy.
+ */
+ s.as_u8 -= distance;
+
+ next = s.as_ulong[0];
+ for (; count >= BYTES_LONG; count -= BYTES_LONG) {
+ last = next;
+ next = s.as_ulong[1];
+
+ d.as_ulong[0] = MERGE_UL(last, next, distance);
+
+ d.as_ulong++;
+ s.as_ulong++;
+ }
+
+ /* Restore s with the original offset. */
+ s.as_u8 += distance;
+ } else {
+ /*
+ * If the source and dest lower bits are the same, do a simple
+ * 32/64 bit wide copy.
+ */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *d.as_ulong++ = *s.as_ulong++;
+ }
+
+copy_remainder:
while (count--)
- *tmp++ = *s++;
+ *d.as_u8++ = *s.as_u8++;
+
return dest;
}
EXPORT_SYMBOL(memcpy);
+
+#undef MERGE_UL
+
#endif
#ifndef __HAVE_ARCH_MEMMOVE
--
2.31.1
next prev parent reply other threads:[~2021-07-02 12:32 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-02 12:31 [PATCH v2 0/3] lib/string: optimized mem* functions Matteo Croce
2021-07-02 12:31 ` Matteo Croce [this message]
2021-07-02 14:37 ` [PATCH v2 1/3] lib/string: optimized memcpy Ben Dooks
2021-07-02 14:44 ` Matteo Croce
2021-07-02 12:31 ` [PATCH v2 2/3] lib/string: optimized memmove Matteo Croce
2021-07-02 12:31 ` [PATCH v2 3/3] lib/string: optimized memset Matteo Croce
2021-07-10 21:31 ` [PATCH v2 0/3] lib/string: optimized mem* functions Andrew Morton
2021-07-10 23:07 ` Matteo Croce
2021-07-12 8:15 ` David Laight
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210702123153.14093-2-mcroce@linux.microsoft.com \
--to=mcroce@linux.microsoft.com \
--cc=David.Laight@aculab.com \
--cc=akpm@linux-foundation.org \
--cc=drew@beagleboard.org \
--cc=guoren@kernel.org \
--cc=hch@infradead.org \
--cc=kernel@esmil.dk \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=mick@ics.forth.gr \
--cc=ndesaulniers@google.com \
--cc=palmer@dabbelt.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).