public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kunit/memcpy: Adding dynamic size and window tests
@ 2022-09-29  3:08 Kees Cook
  2022-09-29 21:02 ` Nick Desaulniers
  0 siblings, 1 reply; 3+ messages in thread
From: Kees Cook @ 2022-09-29  3:08 UTC (permalink / raw)
  To: Nick Desaulniers; +Cc: Kees Cook, linux-kernel, linux-hardening

The "side effects" memmove() test accidentally found a corner case in
the recent refactoring of the i386 assembly memmove(), but missed
another corner case. Instead of hoping to get lucky next time, implement
much more complete tests of memcpy() and memmove() -- especially the
moving window overlap for memmove() -- which catches all the issues
encountered and should catch anything new.

Cc: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/lkml/CAKwvOdkaKTa2aiA90VzFrChNQM6O_ro+b7VWs=op70jx-DKaXA@mail.gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 lib/memcpy_kunit.c | 187 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)

diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 2b5cc70ac53f..f15daa66c6a6 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -270,6 +270,190 @@ static void memset_test(struct kunit *test)
 #undef TEST_OP
 }
 
+static u8 large_src[1024];
+static u8 large_dst[2048];
+static const u8 large_zero[2048];
+
+static void init_large(struct kunit *test)
+{
+	int failed_rng = 0;
+
+	/* Get many bit patterns. */
+	get_random_bytes(large_src, sizeof(large_src));
+
+	/* Make sure we have non-zero edges. */
+	while (large_src[0] == 0) {
+		get_random_bytes(large_src, 1);
+		KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
+				    "Is the RNG broken?");
+	}
+	while (large_src[sizeof(large_src) - 1] == 0) {
+		get_random_bytes(&large_src[sizeof(large_src) - 1], 1);
+		KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
+				    "Is the RNG broken?");
+	}
+
+	/* Explicitly zero the entire destination. */
+	memset(large_dst, 0, sizeof(large_dst));
+}
+
+/*
+ * Instead of an indirect function call for "copy" or a giant macro,
+ * use a bool to pick memcpy or memmove.
+ */
+static void copy_large_test(struct kunit *test, bool use_memmove)
+{
+	init_large(test);
+
+	/* Copy a growing number of non-overlapping bytes ... */
+	for (int bytes = 1; bytes <= sizeof(large_src); bytes++) {
+		/* Over a shifting destination window ... */
+		for (int offset = 0; offset < sizeof(large_src); offset++) {
+			int right_zero_pos = offset + bytes;
+			int right_zero_size = sizeof(large_dst) - right_zero_pos;
+
+			/* Copy! */
+			if (use_memmove)
+				memmove(large_dst + offset, large_src, bytes);
+			else
+				memcpy(large_dst + offset, large_src, bytes);
+
+			/* Did we touch anything before the copy area? */
+			KUNIT_ASSERT_EQ_MSG(test, memcmp(large_dst, large_zero, offset), 0,
+					    "with size %d at offset %d", bytes, offset);
+			/* Did we touch anything after the copy area? */
+			KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
+					    "with size %d at offset %d", bytes, offset);
+
+			/* Are we byte-for-byte exact across the copy? */
+			KUNIT_ASSERT_EQ_MSG(test, memcmp(large_dst + offset, large_src, bytes), 0,
+					    "with size %d at offset %d", bytes, offset);
+
+			/* Zero out what we copied for the next cycle. */
+			memset(large_dst + offset, 0, bytes);
+		}
+		/* Avoid stall warnings. */
+		cond_resched();
+	}
+}
+
+static void memcpy_large_test(struct kunit *test)
+{
+	copy_large_test(test, false);
+}
+
+static void memmove_large_test(struct kunit *test)
+{
+	copy_large_test(test, true);
+}
+
+/*
+ * Take a single step if within "inc" of the start or end,
+ * otherwise, take a full "inc" steps.
+ */
+static inline int next_step(int idx, int start, int end, int inc)
+{
+	start += inc;
+	end -= inc;
+
+	if (idx < start || idx + inc > end)
+		inc = 1;
+	return idx + inc;
+}
+
+static void memmove_overlap_test(struct kunit *test)
+{
+	/*
+	 * Running all possible offset and overlap combinations takes a
+	 * very long time. Instead, only check up to 128 bytes offset
+	 * into the destintation buffer (which should result in crossing
+	 * cachelines), with a step size of 1 through 7 to try to skip some
+	 * redundancy.
+	 */
+	static const int offset_max = 128; /* sizeof(large_src); */
+	static const int bytes_step = 7;
+	static const int window_step = 7;
+
+	static const int bytes_start = 1;
+	static const int bytes_end = sizeof(large_src) + 1;
+
+	init_large(test);
+
+	/* Copy a growing number of overlapping bytes ... */
+	for (int bytes = bytes_start; bytes < bytes_end;
+	     bytes = next_step(bytes, bytes_start, bytes_end, bytes_step)) {
+
+		/* Over a shifting destination window ... */
+		for (int d_off = 0; d_off < offset_max; d_off++) {
+			int s_start = max(d_off - bytes, 0);
+			int s_end = min_t(int, d_off + bytes, sizeof(large_src));
+
+			/* Over a shifting source window ... */
+			for (int s_off = s_start; s_off < s_end;
+			     s_off = next_step(s_off, s_start, s_end, window_step)) {
+				int left_zero_pos, left_zero_size;
+				int right_zero_pos, right_zero_size;
+				int src_pos, src_orig_pos, src_size;
+				int pos;
+
+				/* Place the source in the destination buffer. */
+				memcpy(&large_dst[s_off], large_src, bytes);
+
+				/* Copy to destination offset. */
+				memmove(&large_dst[d_off], &large_dst[s_off], bytes);
+
+				/* Make sure destination entirely matches. */
+				KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[d_off], large_src, bytes), 0,
+					"with size %d at src offset %d and dest offset %d",
+					bytes, s_off, d_off);
+
+				/* Calculate the expected zero spans. */
+				if (s_off < d_off) {
+					left_zero_pos = 0;
+					left_zero_size = s_off;
+
+					right_zero_pos = d_off + bytes;
+					right_zero_size = sizeof(large_dst) - right_zero_pos;
+
+					src_pos = s_off;
+					src_orig_pos = 0;
+					src_size = d_off - s_off;
+				} else {
+					left_zero_pos = 0;
+					left_zero_size = d_off;
+
+					right_zero_pos = s_off + bytes;
+					right_zero_size = sizeof(large_dst) - right_zero_pos;
+
+					src_pos = d_off + bytes;
+					src_orig_pos = src_pos - s_off;
+					src_size = right_zero_pos - src_pos;
+				}
+
+				/* Check non-overlapping source is unchanged.*/
+				KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[src_pos], &large_src[src_orig_pos], src_size), 0,
+					"with size %d at src offset %d and dest offset %d",
+					bytes, s_off, d_off);
+
+				/* Check leading buffer contents are zero. */
+				KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[left_zero_pos], large_zero, left_zero_size), 0,
+					"with size %d at src offset %d and dest offset %d",
+					bytes, s_off, d_off);
+				/* Check trailing buffer contents are zero. */
+				KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
+					"with size %d at src offset %d and dest offset %d",
+					bytes, s_off, d_off);
+
+				/* Zero out everything not already zeroed.*/
+				pos = left_zero_pos + left_zero_size;
+				memset(&large_dst[pos], 0, right_zero_pos - pos);
+			}
+			/* Avoid stall warnings. */
+			cond_resched();
+		}
+	}
+}
+
 static void strtomem_test(struct kunit *test)
 {
 	static const char input[sizeof(unsigned long)] = "hi";
@@ -325,7 +509,10 @@ static void strtomem_test(struct kunit *test)
 static struct kunit_case memcpy_test_cases[] = {
 	KUNIT_CASE(memset_test),
 	KUNIT_CASE(memcpy_test),
+	KUNIT_CASE(memcpy_large_test),
 	KUNIT_CASE(memmove_test),
+	KUNIT_CASE(memmove_large_test),
+	KUNIT_CASE(memmove_overlap_test),
 	KUNIT_CASE(strtomem_test),
 	{}
 };
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] kunit/memcpy: Adding dynamic size and window tests
  2022-09-29  3:08 [PATCH] kunit/memcpy: Adding dynamic size and window tests Kees Cook
@ 2022-09-29 21:02 ` Nick Desaulniers
  2022-10-18  8:07   ` Kees Cook
  0 siblings, 1 reply; 3+ messages in thread
From: Nick Desaulniers @ 2022-09-29 21:02 UTC (permalink / raw)
  To: Kees Cook; +Cc: linux-kernel, linux-hardening

On Wed, Sep 28, 2022 at 8:08 PM Kees Cook <keescook@chromium.org> wrote:
>
> The "side effects" memmove() test accidentally found a corner case in
> the recent refactoring of the i386 assembly memmove(), but missed
> another corner case. Instead of hoping to get lucky next time, implement
> much more complete tests of memcpy() and memmove() -- especially the
> moving window overlap for memmove() -- which catches all the issues
> encountered and should catch anything new.
>
> Cc: Nick Desaulniers <ndesaulniers@google.com>
> Link: https://lore.kernel.org/lkml/CAKwvOdkaKTa2aiA90VzFrChNQM6O_ro+b7VWs=op70jx-DKaXA@mail.gmail.com
> Signed-off-by: Kees Cook <keescook@chromium.org>

Regardless of my comments, I ran this through:

$ ./tools/testing/kunit/kunit.py run --arch=i386 memcpy --make_options LLVM=1
$ ./tools/testing/kunit/kunit.py run --arch=arm64 memcpy --make_options LLVM=1
$ ./tools/testing/kunit/kunit.py run --arch=arm memcpy --make_options LLVM=1
$ ./tools/testing/kunit/kunit.py run --arch=x86_64 memcpy --make_options LLVM=1
All were green for me.

Tested-by: Nick Desaulniers <ndesaulniers@google.com>

Do you have any thoughts on the test in my v4 wrt. potential for
conflicts in -next?
https://lore.kernel.org/lkml/20220928210512.642594-1-ndesaulniers@google.com/
It looks like even without this patch of yours,
$ ./tools/testing/kunit/kunit.py run --arch=i386 memcpy --make_options LLVM=1
demonstrates the bug in my v3.

I also tested my v4 on top of this change with the above command line;
it passes. :)

> ---
>  lib/memcpy_kunit.c | 187 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 187 insertions(+)
>
> diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
> index 2b5cc70ac53f..f15daa66c6a6 100644
> --- a/lib/memcpy_kunit.c
> +++ b/lib/memcpy_kunit.c
> @@ -270,6 +270,190 @@ static void memset_test(struct kunit *test)
>  #undef TEST_OP
>  }
>
> +static u8 large_src[1024];
> +static u8 large_dst[2048];
> +static const u8 large_zero[2048];
> +
> +static void init_large(struct kunit *test)
> +{
> +       int failed_rng = 0;
> +
> +       /* Get many bit patterns. */
> +       get_random_bytes(large_src, sizeof(large_src));

I know sizeof == ARRAY_SIZE when we have an array of u8, but please
consider using ARRAY_SIZE.

> +
> +       /* Make sure we have non-zero edges. */
> +       while (large_src[0] == 0) {
> +               get_random_bytes(large_src, 1);
> +               KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
> +                                   "Is the RNG broken?");
> +       }
> +       while (large_src[sizeof(large_src) - 1] == 0) {
> +               get_random_bytes(&large_src[sizeof(large_src) - 1], 1);
> +               KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
> +                                   "Is the RNG broken?");
> +       }

The above duplication could probably be separated out into another
static function where you pass in the address of the array element to
set to non-zero.

> +
> +       /* Explicitly zero the entire destination. */
> +       memset(large_dst, 0, sizeof(large_dst));
> +}
> +
> +/*
> + * Instead of an indirect function call for "copy" or a giant macro,
> + * use a bool to pick memcpy or memmove.
> + */
> +static void copy_large_test(struct kunit *test, bool use_memmove)
> +{
> +       init_large(test);
> +
> +       /* Copy a growing number of non-overlapping bytes ... */
> +       for (int bytes = 1; bytes <= sizeof(large_src); bytes++) {
> +               /* Over a shifting destination window ... */
> +               for (int offset = 0; offset < sizeof(large_src); offset++) {
> +                       int right_zero_pos = offset + bytes;
> +                       int right_zero_size = sizeof(large_dst) - right_zero_pos;
> +
> +                       /* Copy! */
> +                       if (use_memmove)
> +                               memmove(large_dst + offset, large_src, bytes);
> +                       else
> +                               memcpy(large_dst + offset, large_src, bytes);
> +
> +                       /* Did we touch anything before the copy area? */
> +                       KUNIT_ASSERT_EQ_MSG(test, memcmp(large_dst, large_zero, offset), 0,
> +                                           "with size %d at offset %d", bytes, offset);
> +                       /* Did we touch anything after the copy area? */
> +                       KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
> +                                           "with size %d at offset %d", bytes, offset);
> +
> +                       /* Are we byte-for-byte exact across the copy? */
> +                       KUNIT_ASSERT_EQ_MSG(test, memcmp(large_dst + offset, large_src, bytes), 0,
> +                                           "with size %d at offset %d", bytes, offset);
> +
> +                       /* Zero out what we copied for the next cycle. */
> +                       memset(large_dst + offset, 0, bytes);
> +               }
> +               /* Avoid stall warnings. */
> +               cond_resched();

I'm just curious what that is? ^
Should it go in the inner loop?

> +       }
> +}
> +
> +static void memcpy_large_test(struct kunit *test)
> +{
> +       copy_large_test(test, false);
> +}
> +
> +static void memmove_large_test(struct kunit *test)
> +{
> +       copy_large_test(test, true);
> +}
> +
> +/*
> + * Take a single step if within "inc" of the start or end,
> + * otherwise, take a full "inc" steps.

I still have a hard time following what this logic is doing,
particularly the clamping to 1. Can you elaborate more in this
comment?

> + */
> +static inline int next_step(int idx, int start, int end, int inc)

Please drop the inline keyword here.

> +{
> +       start += inc;
> +       end -= inc;
> +
> +       if (idx < start || idx + inc > end)
> +               inc = 1;
> +       return idx + inc;
> +}
> +
> +static void memmove_overlap_test(struct kunit *test)
> +{
> +       /*
> +        * Running all possible offset and overlap combinations takes a
> +        * very long time. Instead, only check up to 128 bytes offset
> +        * into the destintation buffer (which should result in crossing

typo: s/destintation/destination/

> +        * cachelines), with a step size of 1 through 7 to try to skip some
> +        * redundancy.
> +        */
> +       static const int offset_max = 128; /* sizeof(large_src); */

I thought large_src was 1024? Perhaps this comment is stale or
contradictory to the comment in the block above the variable
definition?

> +       static const int bytes_step = 7;
> +       static const int window_step = 7;
> +
> +       static const int bytes_start = 1;
> +       static const int bytes_end = sizeof(large_src) + 1;
> +
> +       init_large(test);
> +
> +       /* Copy a growing number of overlapping bytes ... */
> +       for (int bytes = bytes_start; bytes < bytes_end;
> +            bytes = next_step(bytes, bytes_start, bytes_end, bytes_step)) {
> +
> +               /* Over a shifting destination window ... */
> +               for (int d_off = 0; d_off < offset_max; d_off++) {
> +                       int s_start = max(d_off - bytes, 0);
> +                       int s_end = min_t(int, d_off + bytes, sizeof(large_src));
> +
> +                       /* Over a shifting source window ... */
> +                       for (int s_off = s_start; s_off < s_end;
> +                            s_off = next_step(s_off, s_start, s_end, window_step)) {

Might a while loop with a distinct update statement look cleaner than
a multiline for predicate?

> +                               int left_zero_pos, left_zero_size;
> +                               int right_zero_pos, right_zero_size;
> +                               int src_pos, src_orig_pos, src_size;
> +                               int pos;
> +
> +                               /* Place the source in the destination buffer. */
> +                               memcpy(&large_dst[s_off], large_src, bytes);
> +
> +                               /* Copy to destination offset. */
> +                               memmove(&large_dst[d_off], &large_dst[s_off], bytes);
> +
> +                               /* Make sure destination entirely matches. */
> +                               KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[d_off], large_src, bytes), 0,
> +                                       "with size %d at src offset %d and dest offset %d",
> +                                       bytes, s_off, d_off);
> +
> +                               /* Calculate the expected zero spans. */
> +                               if (s_off < d_off) {
> +                                       left_zero_pos = 0;
> +                                       left_zero_size = s_off;
> +
> +                                       right_zero_pos = d_off + bytes;
> +                                       right_zero_size = sizeof(large_dst) - right_zero_pos;
> +
> +                                       src_pos = s_off;
> +                                       src_orig_pos = 0;
> +                                       src_size = d_off - s_off;
> +                               } else {
> +                                       left_zero_pos = 0;
> +                                       left_zero_size = d_off;
> +
> +                                       right_zero_pos = s_off + bytes;
> +                                       right_zero_size = sizeof(large_dst) - right_zero_pos;
> +
> +                                       src_pos = d_off + bytes;
> +                                       src_orig_pos = src_pos - s_off;
> +                                       src_size = right_zero_pos - src_pos;
> +                               }

Looking at the arms of these branches, I see a fair amount of
duplication. Mind deduplicating some of the statements here? The
assignments of left_zero_pos and right_zero_size look invariant of the
predicate.

> +
> +                               /* Check non-overlapping source is unchanged.*/
> +                               KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[src_pos], &large_src[src_orig_pos], src_size), 0,
> +                                       "with size %d at src offset %d and dest offset %d",
> +                                       bytes, s_off, d_off);
> +
> +                               /* Check leading buffer contents are zero. */
> +                               KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[left_zero_pos], large_zero, left_zero_size), 0,
> +                                       "with size %d at src offset %d and dest offset %d",
> +                                       bytes, s_off, d_off);
> +                               /* Check trailing buffer contents are zero. */
> +                               KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
> +                                       "with size %d at src offset %d and dest offset %d",
> +                                       bytes, s_off, d_off);
> +
> +                               /* Zero out everything not already zeroed.*/
> +                               pos = left_zero_pos + left_zero_size;
> +                               memset(&large_dst[pos], 0, right_zero_pos - pos);
> +                       }
> +                       /* Avoid stall warnings. */
> +                       cond_resched();
> +               }
> +       }
> +}
> +
>  static void strtomem_test(struct kunit *test)
>  {
>         static const char input[sizeof(unsigned long)] = "hi";
> @@ -325,7 +509,10 @@ static void strtomem_test(struct kunit *test)
>  static struct kunit_case memcpy_test_cases[] = {
>         KUNIT_CASE(memset_test),
>         KUNIT_CASE(memcpy_test),
> +       KUNIT_CASE(memcpy_large_test),
>         KUNIT_CASE(memmove_test),
> +       KUNIT_CASE(memmove_large_test),
> +       KUNIT_CASE(memmove_overlap_test),
>         KUNIT_CASE(strtomem_test),
>         {}
>  };
> --
> 2.34.1
>


-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] kunit/memcpy: Adding dynamic size and window tests
  2022-09-29 21:02 ` Nick Desaulniers
@ 2022-10-18  8:07   ` Kees Cook
  0 siblings, 0 replies; 3+ messages in thread
From: Kees Cook @ 2022-10-18  8:07 UTC (permalink / raw)
  To: Nick Desaulniers; +Cc: linux-kernel, linux-hardening

On Thu, Sep 29, 2022 at 02:02:05PM -0700, Nick Desaulniers wrote:
> On Wed, Sep 28, 2022 at 8:08 PM Kees Cook <keescook@chromium.org> wrote:
> >
> > The "side effects" memmove() test accidentally found a corner case in
> > the recent refactoring of the i386 assembly memmove(), but missed
> > another corner case. Instead of hoping to get lucky next time, implement
> > much more complete tests of memcpy() and memmove() -- especially the
> > moving window overlap for memmove() -- which catches all the issues
> > encountered and should catch anything new.
> >
> > Cc: Nick Desaulniers <ndesaulniers@google.com>
> > Link: https://lore.kernel.org/lkml/CAKwvOdkaKTa2aiA90VzFrChNQM6O_ro+b7VWs=op70jx-DKaXA@mail.gmail.com
> > Signed-off-by: Kees Cook <keescook@chromium.org>
> 
> Regardless of my comments, I ran this through:
> 
> $ ./tools/testing/kunit/kunit.py run --arch=i386 memcpy --make_options LLVM=1
> $ ./tools/testing/kunit/kunit.py run --arch=arm64 memcpy --make_options LLVM=1
> $ ./tools/testing/kunit/kunit.py run --arch=arm memcpy --make_options LLVM=1
> $ ./tools/testing/kunit/kunit.py run --arch=x86_64 memcpy --make_options LLVM=1
> All were green for me.
> 
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>

Thanks!

> Do you have any thoughts on the test in my v4 wrt. potential for
> conflicts in -next?
> https://lore.kernel.org/lkml/20220928210512.642594-1-ndesaulniers@google.com/

I designed this patch to avoid conflicts with your changes. (Has anyone
picked up the memmove refactor for -next yet?)

> It looks like even without this patch of yours,
> $ ./tools/testing/kunit/kunit.py run --arch=i386 memcpy --make_options LLVM=1
> demonstrates the bug in my v3.

Yeah, it got lucky, mainly. :)

> I also tested my v4 on top of this change with the above command line;
> it passes. :)

Perfecto! :)

> 
> > ---
> >  lib/memcpy_kunit.c | 187 +++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 187 insertions(+)
> >
> > diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
> > index 2b5cc70ac53f..f15daa66c6a6 100644
> > --- a/lib/memcpy_kunit.c
> > +++ b/lib/memcpy_kunit.c
> > @@ -270,6 +270,190 @@ static void memset_test(struct kunit *test)
> >  #undef TEST_OP
> >  }
> >
> > +static u8 large_src[1024];
> > +static u8 large_dst[2048];
> > +static const u8 large_zero[2048];
> > +
> > +static void init_large(struct kunit *test)
> > +{
> > +       int failed_rng = 0;
> > +
> > +       /* Get many bit patterns. */
> > +       get_random_bytes(large_src, sizeof(large_src));
> 
> I know sizeof == ARRAY_SIZE when we have an array of u8, but please
> consider using ARRAY_SIZE.

Yeah, I will break myself of this habit yet. Fixed.

> 
> > +
> > +       /* Make sure we have non-zero edges. */
> > +       while (large_src[0] == 0) {
> > +               get_random_bytes(large_src, 1);
> > +               KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
> > +                                   "Is the RNG broken?");
> > +       }
> > +       while (large_src[sizeof(large_src) - 1] == 0) {
> > +               get_random_bytes(&large_src[sizeof(large_src) - 1], 1);
> > +               KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
> > +                                   "Is the RNG broken?");
> > +       }
> 
> The above duplication could probably be separated out into another
> static function where you pass in the address of the array element to
> set to non-zero.

Done in v2.

> 
> > +
> > +       /* Explicitly zero the entire destination. */
> > +       memset(large_dst, 0, sizeof(large_dst));
> > +}
> > +
> > +/*
> > + * Instead of an indirect function call for "copy" or a giant macro,
> > + * use a bool to pick memcpy or memmove.
> > + */
> > +static void copy_large_test(struct kunit *test, bool use_memmove)
> > +{
> > +       init_large(test);
> > +
> > +       /* Copy a growing number of non-overlapping bytes ... */
> > +       for (int bytes = 1; bytes <= sizeof(large_src); bytes++) {
> > +               /* Over a shifting destination window ... */
> > +               for (int offset = 0; offset < sizeof(large_src); offset++) {
> > +                       int right_zero_pos = offset + bytes;
> > +                       int right_zero_size = sizeof(large_dst) - right_zero_pos;
> > +
> > +                       /* Copy! */
> > +                       if (use_memmove)
> > +                               memmove(large_dst + offset, large_src, bytes);
> > +                       else
> > +                               memcpy(large_dst + offset, large_src, bytes);
> > +
> > +                       /* Did we touch anything before the copy area? */
> > +                       KUNIT_ASSERT_EQ_MSG(test, memcmp(large_dst, large_zero, offset), 0,
> > +                                           "with size %d at offset %d", bytes, offset);
> > +                       /* Did we touch anything after the copy area? */
> > +                       KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
> > +                                           "with size %d at offset %d", bytes, offset);
> > +
> > +                       /* Are we byte-for-byte exact across the copy? */
> > +                       KUNIT_ASSERT_EQ_MSG(test, memcmp(large_dst + offset, large_src, bytes), 0,
> > +                                           "with size %d at offset %d", bytes, offset);
> > +
> > +                       /* Zero out what we copied for the next cycle. */
> > +                       memset(large_dst + offset, 0, bytes);
> > +               }
> > +               /* Avoid stall warnings. */
> > +               cond_resched();
> 
> I'm just curious what that is? ^
> Should it go in the inner loop?

This is to keep the soft-lockup detector from yelling about this
extremely slow test. :P I put it here because it's seems the right
balance between doing in too much (inner loop) and not at all.

> 
> > +       }
> > +}
> > +
> > +static void memcpy_large_test(struct kunit *test)
> > +{
> > +       copy_large_test(test, false);
> > +}
> > +
> > +static void memmove_large_test(struct kunit *test)
> > +{
> > +       copy_large_test(test, true);
> > +}
> > +
> > +/*
> > + * Take a single step if within "inc" of the start or end,
> > + * otherwise, take a full "inc" steps.
> 
> I still have a hard time following what this logic is doing,
> particularly the clamping to 1. Can you elaborate more in this
> comment?

Sure! I've tried to flesh this out in v2.

> 
> > + */
> > +static inline int next_step(int idx, int start, int end, int inc)
> 
> Please drop the inline keyword here.

Done.

> 
> > +{
> > +       start += inc;
> > +       end -= inc;
> > +
> > +       if (idx < start || idx + inc > end)
> > +               inc = 1;
> > +       return idx + inc;
> > +}
> > +
> > +static void memmove_overlap_test(struct kunit *test)
> > +{
> > +       /*
> > +        * Running all possible offset and overlap combinations takes a
> > +        * very long time. Instead, only check up to 128 bytes offset
> > +        * into the destintation buffer (which should result in crossing
> 
> typo: s/destintation/destination/

Fixed :)

> 
> > +        * cachelines), with a step size of 1 through 7 to try to skip some
> > +        * redundancy.
> > +        */
> > +       static const int offset_max = 128; /* sizeof(large_src); */
> 
> I thought large_src was 1024? Perhaps this comment is stale or
> contradictory to the comment in the block above the variable
> definition?

This was a left-over note about how big it actually was while I was
tuning it for sane run-times. I've updated the comment.

> 
> > +       static const int bytes_step = 7;
> > +       static const int window_step = 7;
> > +
> > +       static const int bytes_start = 1;
> > +       static const int bytes_end = sizeof(large_src) + 1;
> > +
> > +       init_large(test);
> > +
> > +       /* Copy a growing number of overlapping bytes ... */
> > +       for (int bytes = bytes_start; bytes < bytes_end;
> > +            bytes = next_step(bytes, bytes_start, bytes_end, bytes_step)) {
> > +
> > +               /* Over a shifting destination window ... */
> > +               for (int d_off = 0; d_off < offset_max; d_off++) {
> > +                       int s_start = max(d_off - bytes, 0);
> > +                       int s_end = min_t(int, d_off + bytes, sizeof(large_src));
> > +
> > +                       /* Over a shifting source window ... */
> > +                       for (int s_off = s_start; s_off < s_end;
> > +                            s_off = next_step(s_off, s_start, s_end, window_step)) {
> 
> Might a while loop with a distinct update statement look cleaner than
> a multiline for predicate?

I originally tried it either way, and I found the "while" even uglier. :)

> 
> > +                               int left_zero_pos, left_zero_size;
> > +                               int right_zero_pos, right_zero_size;
> > +                               int src_pos, src_orig_pos, src_size;
> > +                               int pos;
> > +
> > +                               /* Place the source in the destination buffer. */
> > +                               memcpy(&large_dst[s_off], large_src, bytes);
> > +
> > +                               /* Copy to destination offset. */
> > +                               memmove(&large_dst[d_off], &large_dst[s_off], bytes);
> > +
> > +                               /* Make sure destination entirely matches. */
> > +                               KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[d_off], large_src, bytes), 0,
> > +                                       "with size %d at src offset %d and dest offset %d",
> > +                                       bytes, s_off, d_off);
> > +
> > +                               /* Calculate the expected zero spans. */
> > +                               if (s_off < d_off) {
> > +                                       left_zero_pos = 0;
> > +                                       left_zero_size = s_off;
> > +
> > +                                       right_zero_pos = d_off + bytes;
> > +                                       right_zero_size = sizeof(large_dst) - right_zero_pos;
> > +
> > +                                       src_pos = s_off;
> > +                                       src_orig_pos = 0;
> > +                                       src_size = d_off - s_off;
> > +                               } else {
> > +                                       left_zero_pos = 0;
> > +                                       left_zero_size = d_off;
> > +
> > +                                       right_zero_pos = s_off + bytes;
> > +                                       right_zero_size = sizeof(large_dst) - right_zero_pos;
> > +
> > +                                       src_pos = d_off + bytes;
> > +                                       src_orig_pos = src_pos - s_off;
> > +                                       src_size = right_zero_pos - src_pos;
> > +                               }
> 
> Looking at the arms of these branches, I see a fair amount of
> duplication. Mind deduplicating some of the statements here? The
> assignments of left_zero_pos and right_zero_size look invariant of the
> predicate.

I did this originally too, but I found it easier to reason by keeping
all of the bounds calculations separated like this for readability. Any
duplication will get optimized away. :)

Thanks for the review!

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-10-18  8:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-09-29  3:08 [PATCH] kunit/memcpy: Adding dynamic size and window tests Kees Cook
2022-09-29 21:02 ` Nick Desaulniers
2022-10-18  8:07   ` Kees Cook

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox