All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luc Pelletier <lucp.at.work@gmail.com>
To: bruce.richardson@intel.com, konstantin.ananyev@intel.com
Cc: dev@dpdk.org, Luc Pelletier <lucp.at.work@gmail.com>,
	Xiaoyun Li <xiaoyun.li@intel.com>,
	stable@dpdk.org
Subject: [PATCH v5] eal: fix unaligned loads/stores in rte_memcpy_generic
Date: Mon, 17 Jan 2022 10:37:12 -0500	[thread overview]
Message-ID: <20220117153711.32829-1-lucp.at.work@gmail.com> (raw)
In-Reply-To: <20220115194102.444140-1-lucp.at.work@gmail.com>

Calls to rte_memcpy_generic could result in unaligned loads/stores for
1 < n < 16. This is undefined behavior according to the C standard,
and it gets flagged by the clang undefined behavior sanitizer.

rte_memcpy_generic is called with unaligned src and dst addresses.
When 1 < n < 16, the code would cast both src and dst to a qword,
dword or word pointer, without verifying the alignment of src/dst. The
code was changed to use a packed structure to perform the unaligned
load/store operations. This results in unaligned load/store operations
to be C standards-compliant.

Fixes: d35cc1fe6a7a ("eal/x86: revert select optimized memcpy at run-time")
Cc: Xiaoyun Li <xiaoyun.li@intel.com>
Cc: stable@dpdk.org

Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
---

Thanks to Stephen's pointer to look at the linux kernel, I was able to
find a way to perform the unaligned load/store using pure C code. The
new functions added to perform the load/store could likely be moved to a
different file and the code duplication could likely be eliminated by
using a macro. However, I will hold off on making these changes until I
get confirmation from maintainers that this technique is acceptable and
this is what we want to move forward with.

 lib/eal/x86/include/rte_memcpy.h | 142 +++++++++++++++++--------------
 1 file changed, 80 insertions(+), 62 deletions(-)

diff --git a/lib/eal/x86/include/rte_memcpy.h b/lib/eal/x86/include/rte_memcpy.h
index 1b6c6e585f..4e876d39eb 100644
--- a/lib/eal/x86/include/rte_memcpy.h
+++ b/lib/eal/x86/include/rte_memcpy.h
@@ -45,6 +45,83 @@ extern "C" {
 static __rte_always_inline void *
 rte_memcpy(void *dst, const void *src, size_t n);
 
+static __rte_always_inline uint64_t
+rte_load_unaligned_uint64(const void *ptr)
+{
+	struct unaligned_uint64 { uint64_t val; } __rte_packed;
+	return ((const struct unaligned_uint64 *)ptr)->val;
+}
+
+static __rte_always_inline uint32_t
+rte_load_unaligned_uint32(const void *ptr)
+{
+	struct unaligned_uint32 { uint32_t val; } __rte_packed;
+	return ((const struct unaligned_uint32 *)ptr)->val;
+}
+
+static __rte_always_inline uint16_t
+rte_load_unaligned_uint16(const void *ptr)
+{
+	struct unaligned_uint16 { uint16_t val; } __rte_packed;
+	return ((const struct unaligned_uint16 *)ptr)->val;
+}
+
+static __rte_always_inline void
+rte_store_unaligned_uint64(void *ptr, uint64_t val)
+{
+	struct unaligned_uint64 { uint64_t val; } __rte_packed;
+	((struct unaligned_uint64 *)ptr)->val = val;
+}
+
+static __rte_always_inline void
+rte_store_unaligned_uint32(void *ptr, uint32_t val)
+{
+	struct unaligned_uint32 { uint32_t val; } __rte_packed;
+	((struct unaligned_uint32 *)ptr)->val = val;
+}
+
+static __rte_always_inline void
+rte_store_unaligned_uint16(void *ptr, uint16_t val)
+{
+	struct unaligned_uint16 { uint16_t val; } __rte_packed;
+	((struct unaligned_uint16 *)ptr)->val = val;
+}
+
+/**
+ * Copy bytes from one location to another,
+ * locations should not overlap.
+ * Use with unaligned src/dst, and n <= 15.
+ */
+static __rte_always_inline void *
+rte_mov15_or_less_unaligned(void *dst, const void *src, size_t n)
+{
+	void *ret = dst;
+	if (n & 8) {
+		rte_store_unaligned_uint64(
+			dst,
+			rte_load_unaligned_uint64(src));
+		src = ((const uint64_t *)src + 1);
+		dst = ((uint64_t *)dst + 1);
+	}
+	if (n & 4) {
+		rte_store_unaligned_uint32(
+			dst,
+			rte_load_unaligned_uint32(src));
+		src = ((const uint32_t *)src + 1);
+		dst = ((uint32_t *)dst + 1);
+	}
+	if (n & 2) {
+		rte_store_unaligned_uint16(
+			dst,
+			rte_load_unaligned_uint16(src));
+		src = ((const uint16_t *)src + 1);
+		dst = ((uint16_t *)dst + 1);
+	}
+	if (n & 1)
+		*(uint8_t *)dst = *(const uint8_t *)src;
+	return ret;
+}
+
 #if defined __AVX512F__ && defined RTE_MEMCPY_AVX512
 
 #define ALIGNMENT_MASK 0x3F
@@ -171,8 +248,6 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n)
 static __rte_always_inline void *
 rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
-	uintptr_t dstu = (uintptr_t)dst;
-	uintptr_t srcu = (uintptr_t)src;
 	void *ret = dst;
 	size_t dstofss;
 	size_t bits;
@@ -181,24 +256,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
 	 * Copy less than 16 bytes
 	 */
 	if (n < 16) {
-		if (n & 0x01) {
-			*(uint8_t *)dstu = *(const uint8_t *)srcu;
-			srcu = (uintptr_t)((const uint8_t *)srcu + 1);
-			dstu = (uintptr_t)((uint8_t *)dstu + 1);
-		}
-		if (n & 0x02) {
-			*(uint16_t *)dstu = *(const uint16_t *)srcu;
-			srcu = (uintptr_t)((const uint16_t *)srcu + 1);
-			dstu = (uintptr_t)((uint16_t *)dstu + 1);
-		}
-		if (n & 0x04) {
-			*(uint32_t *)dstu = *(const uint32_t *)srcu;
-			srcu = (uintptr_t)((const uint32_t *)srcu + 1);
-			dstu = (uintptr_t)((uint32_t *)dstu + 1);
-		}
-		if (n & 0x08)
-			*(uint64_t *)dstu = *(const uint64_t *)srcu;
-		return ret;
+		return rte_mov15_or_less_unaligned(dst, src, n);
 	}
 
 	/**
@@ -379,8 +437,6 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
 static __rte_always_inline void *
 rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
-	uintptr_t dstu = (uintptr_t)dst;
-	uintptr_t srcu = (uintptr_t)src;
 	void *ret = dst;
 	size_t dstofss;
 	size_t bits;
@@ -389,25 +445,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
 	 * Copy less than 16 bytes
 	 */
 	if (n < 16) {
-		if (n & 0x01) {
-			*(uint8_t *)dstu = *(const uint8_t *)srcu;
-			srcu = (uintptr_t)((const uint8_t *)srcu + 1);
-			dstu = (uintptr_t)((uint8_t *)dstu + 1);
-		}
-		if (n & 0x02) {
-			*(uint16_t *)dstu = *(const uint16_t *)srcu;
-			srcu = (uintptr_t)((const uint16_t *)srcu + 1);
-			dstu = (uintptr_t)((uint16_t *)dstu + 1);
-		}
-		if (n & 0x04) {
-			*(uint32_t *)dstu = *(const uint32_t *)srcu;
-			srcu = (uintptr_t)((const uint32_t *)srcu + 1);
-			dstu = (uintptr_t)((uint32_t *)dstu + 1);
-		}
-		if (n & 0x08) {
-			*(uint64_t *)dstu = *(const uint64_t *)srcu;
-		}
-		return ret;
+		return rte_mov15_or_less_unaligned(dst, src, n);
 	}
 
 	/**
@@ -672,8 +710,6 @@ static __rte_always_inline void *
 rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
 	__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
-	uintptr_t dstu = (uintptr_t)dst;
-	uintptr_t srcu = (uintptr_t)src;
 	void *ret = dst;
 	size_t dstofss;
 	size_t srcofs;
@@ -682,25 +718,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
 	 * Copy less than 16 bytes
 	 */
 	if (n < 16) {
-		if (n & 0x01) {
-			*(uint8_t *)dstu = *(const uint8_t *)srcu;
-			srcu = (uintptr_t)((const uint8_t *)srcu + 1);
-			dstu = (uintptr_t)((uint8_t *)dstu + 1);
-		}
-		if (n & 0x02) {
-			*(uint16_t *)dstu = *(const uint16_t *)srcu;
-			srcu = (uintptr_t)((const uint16_t *)srcu + 1);
-			dstu = (uintptr_t)((uint16_t *)dstu + 1);
-		}
-		if (n & 0x04) {
-			*(uint32_t *)dstu = *(const uint32_t *)srcu;
-			srcu = (uintptr_t)((const uint32_t *)srcu + 1);
-			dstu = (uintptr_t)((uint32_t *)dstu + 1);
-		}
-		if (n & 0x08) {
-			*(uint64_t *)dstu = *(const uint64_t *)srcu;
-		}
-		return ret;
+		return rte_mov15_or_less_unaligned(dst, src, n);
 	}
 
 	/**
-- 
2.25.1


  parent reply	other threads:[~2022-01-17 15:41 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-15 19:41 [PATCH] eal: fix unaligned loads/stores in rte_memcpy_generic Luc Pelletier
2022-01-15 21:39 ` [PATCH v2] " Luc Pelletier
2022-01-15 22:13   ` Stephen Hemminger
2022-01-16 14:09     ` Luc Pelletier
2022-01-16 16:32       ` Stephen Hemminger
2022-01-24 23:21         ` Georg Sauthoff
2022-01-25  7:59           ` Morten Brørup
2022-01-25 19:57           ` Luc Pelletier
2022-01-16 14:13 ` [PATCH v3] " Luc Pelletier
2022-01-16 14:33   ` Luc Pelletier
2022-01-16 16:34     ` Stephen Hemminger
2022-01-16 17:59       ` Morten Brørup
2022-01-16 20:33 ` [PATCH v4] " Luc Pelletier
2022-01-17 15:37 ` Luc Pelletier [this message]
2022-02-04 16:42   ` [PATCH v5] " Luc Pelletier
2022-02-04 17:16   ` Ananyev, Konstantin
2022-02-08 16:53     ` Thomas Monjalon
2022-02-09 15:05     ` Luc Pelletier
2022-02-10 14:04       ` Ananyev, Konstantin
2022-02-10 16:56         ` Luc Pelletier
2022-02-11 15:51           ` Ananyev, Konstantin
2022-02-13 22:31             ` Luc Pelletier
2022-02-14 13:41               ` Ananyev, Konstantin
2022-02-25 15:51 ` [PATCH v6] eal: fix rte_memcpy strict aliasing/alignment bugs Luc Pelletier
2022-02-25 16:38 ` [PATCH v7] " Luc Pelletier
2022-03-10 14:55   ` Ananyev, Konstantin
2022-04-07 15:24     ` David Marchand
2022-04-07 15:32       ` David Marchand
2022-04-07 15:40         ` David Marchand
2022-05-13 19:15           ` Luc Pelletier
2022-05-19 16:41             ` David Marchand
2022-04-08 13:47       ` Luc Pelletier
2022-05-19 16:47       ` David Marchand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220117153711.32829-1-lucp.at.work@gmail.com \
    --to=lucp.at.work@gmail.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=konstantin.ananyev@intel.com \
    --cc=stable@dpdk.org \
    --cc=xiaoyun.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.