git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Phillip Wood <phillip.wood123@gmail.com>
To: Ghanshyam Thakkar <shyamthakkar001@gmail.com>, git@vger.kernel.org
Cc: "Josh Steadmon" <steadmon@google.com>,
	"René Scharfe" <l.s.r@web.de>,
	"Christian Couder" <chriscool@tuxfamily.org>,
	"Kaartic Sivaraam" <kaartic.sivaraam@gmail.com>
Subject: Re: [GSoC][PATCH] t: port helper/test-oid-array.c to unit-tests/t-oid-array.c
Date: Thu, 4 Jul 2024 17:33:48 +0100	[thread overview]
Message-ID: <db2f97b6-a06e-470f-b1f9-60a78a0a2a7f@gmail.com> (raw)
In-Reply-To: <20240703034638.8019-2-shyamthakkar001@gmail.com>

Hi Ghanshyam

Overall this looks like a faithful conversion, I've left a few comments 
below.

On 03/07/2024 04:46, Ghanshyam Thakkar wrote:
> helper/test-oid-array.c along with t0064-oid-array.sh test the
> oid-array.h library, which provides storage and processing
> efficiency over large lists of object identifiers.
> 
> Migrate them to the unit testing framework for better runtime
> performance and efficiency. Also 'the_hash_algo' is used internally in
> oid_array_lookup(), but we do not initialize a repository directory,
> therefore initialize the_hash_algo manually. And
> init_hash_algo():lib-oid.c can aid in this process, so make it public.
> 
> Mentored-by: Christian Couder <chriscool@tuxfamily.org>
> Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
> Signed-off-by: Ghanshyam Thakkar <shyamthakkar001@gmail.com>
> ---
> Note: Once 'rs/unit-tests-test-run' is merged to atleast next, I plan to
> replace these internal function used in TEST_LOOKUP() with TEST_RUN().

Nice idea

> diff --git a/t/unit-tests/lib-oid.c b/t/unit-tests/lib-oid.c
> index 37105f0a8f..8f0ccac532 100644
> --- a/t/unit-tests/lib-oid.c
> +++ b/t/unit-tests/lib-oid.c
> @@ -3,7 +3,7 @@
>   #include "strbuf.h"
>   #include "hex.h"
>   
> -static int init_hash_algo(void)
> +int init_hash_algo(void)
>   {
>   	static int algo = -1;
>   
> diff --git a/t/unit-tests/lib-oid.h b/t/unit-tests/lib-oid.h
> index 8d2acca768..011a2d88de 100644
> --- a/t/unit-tests/lib-oid.h
> +++ b/t/unit-tests/lib-oid.h
> @@ -13,5 +13,11 @@
>    * environment variable.
>    */
>   int get_oid_arbitrary_hex(const char *s, struct object_id *oid);
> +/*
> + * Returns one of GIT_HASH_{SHA1, SHA256, UNKNOWN} based on the value of
> + * GIT_TEST_DEFAULT_HASH. The fallback value in case of absence of
> + * GIT_TEST_DEFAULT_HASH is GIT_HASH_SHA1.
> + */
> +int init_hash_algo(void);
>   
>   #endif /* LIB_OID_H */
> diff --git a/t/unit-tests/t-oid-array.c b/t/unit-tests/t-oid-array.c
> new file mode 100644
> index 0000000000..0a506fab07
> --- /dev/null
> +++ b/t/unit-tests/t-oid-array.c
> @@ -0,0 +1,138 @@
> +#define USE_THE_REPOSITORY_VARIABLE
> +
> +#include "test-lib.h"
> +#include "lib-oid.h"
> +#include "oid-array.h"
> +#include "hex.h"
> +
> +static inline int test_min(int a, int b)
> +{
> +	return a <= b ? a : b;
> +}
> +
> +static int fill_array(struct oid_array *array, const char *hexes[], size_t n)
> +{
> +	for (size_t i = 0; i < n; i++) {
> +		struct object_id oid;
> +
> +		if (get_oid_arbitrary_hex(hexes[i], &oid))
> +			return -1;
> +		oid_array_append(array, &oid);
> +	}
> +	if (!check_int(array->nr, ==, n))

This should probably use check_uint() as the arguments are unsigned 
integers.

> +		return -1;
> +	return 0;
> +}
> +
> +static int add_to_oid_array(const struct object_id *oid, void *data)
> +{
> +	struct oid_array *array = data;

style: we leave a blank line after variable declarations at the start of 
a block.

> +	oid_array_append(array, oid);
> +	return 0;
> +}
> +
> +static void t_enumeration(const char *input_args[], size_t input_sz,
> +			  const char *result[], size_t result_sz)

style: we use "const char **arg" rather than "const char *arg[]"

> +{
> +	struct oid_array input = OID_ARRAY_INIT, expect = OID_ARRAY_INIT,
> +			 actual = OID_ARRAY_INIT;
> +	size_t i;
> +
> +	if (fill_array(&input, input_args, input_sz))
> +		return;
> +	if (fill_array(&expect, result, result_sz))
> +		return;
> +
> +	oid_array_for_each_unique(&input, add_to_oid_array, &actual);
> +	check_int(actual.nr, ==, expect.nr);
> +
> +	for (i = 0; i < test_min(actual.nr, expect.nr); i++) {
> +		if (!check(oideq(&actual.oid[i], &expect.oid[i])))
> +			test_msg("expected: %s\n       got: %s\n     index: %" PRIuMAX,
> +				 oid_to_hex(&expect.oid[i]), oid_to_hex(&actual.oid[i]),
> +				 (uintmax_t)i);
> +	}
> +	check_int(i, ==, result_sz);
> +
> +	oid_array_clear(&actual);
> +	oid_array_clear(&input);
> +	oid_array_clear(&expect);
> +}
> +
> +#define TEST_ENUMERATION(input, result, desc)                                     \
> +	TEST(t_enumeration(input, ARRAY_SIZE(input), result, ARRAY_SIZE(result)), \
> +			   desc " works")

This macro and its helper function look good.

> +static int t_lookup(struct object_id *oid_query, const char *query,
> +		    const char *hexes[], size_t n)
> +{
> +	struct oid_array array = OID_ARRAY_INIT;
> +	int ret;
> +
> +	if (get_oid_arbitrary_hex(query, oid_query))
> +		return INT_MIN;
> +	if (fill_array(&array, hexes, n))
> +		return INT_MIN;
> +	ret = oid_array_lookup(&array, oid_query);
> +
> +	oid_array_clear(&array);
> +	return ret;
> +}
> +
> +#define TEST_LOOKUP(input_args, query, condition, desc)                   \

Passing in the condition is a bit unfortunate as it means that the 
caller has to know which variable name to use. It might be nicer to have 
a function instead that takes the upper and lower bounds of the expected 
result and then does

	check_int(res, >=, expected_lower);
	check_int(res, <=, expected_upper);

It might be worth checking that array[res] matches the expected entry as 
well.

> +	do {                                                              \
> +		int skip = test__run_begin();                             \
> +		if (!skip) {                                              \
> +			struct object_id oid_query;                       \
> +			int ret = t_lookup(&oid_query, query, input_args, \
> +					   ARRAY_SIZE(input_args));       \
> +                                                                          \
> +			if (ret != INT_MIN && !check(condition))          \
> +				test_msg("oid query for lookup: %s",      \
> +					 oid_to_hex(&oid_query));         \
> +		}                                                         \
> +		test__run_end(!skip, TEST_LOCATION(), desc " works");     \
> +	} while (0)
> +
> +static void setup(void)
> +{
> +	int algo = init_hash_algo();
> +	/* because the_hash_algo is used by oid_array_lookup() internally */
> +	if (check_int(algo, !=, GIT_HASH_UNKNOWN))
> +		repo_set_hash_algo(the_repository, algo);
> +}
> +
> +int cmd_main(int argc UNUSED, const char **argv UNUSED)
> +{
> +	const char *arr_input[] = { "88", "44", "aa", "55" };
> +	const char *arr_input_dup[] = { "88", "44", "aa", "55",
> +					"88", "44", "aa", "55",
> +					"88", "44", "aa", "55" };
> +	const char *res_sorted[] = { "44", "55", "88", "aa" };
> +
> +	if (!TEST(setup(), "setup"))
> +		test_skip_all("hash algo initialization failed");
> +
> +	TEST_ENUMERATION(arr_input, res_sorted, "ordered enumeration");
> +	TEST_ENUMERATION(arr_input_dup, res_sorted,
> +			 "ordered enumeration with duplicate suppression");
> +
> +	/* ret is the return value of oid_array_lookup() */
> +	TEST_LOOKUP(arr_input, "55", ret == 1, "lookup");
> +	TEST_LOOKUP(arr_input, "33", ret < 0, "lookup non-existent entry");
> +	TEST_LOOKUP(arr_input_dup, "55", ret >= 3 && ret <= 5,
> +		    "lookup with duplicates");
> +	TEST_LOOKUP(arr_input_dup, "66", ret < 0,
> +		    "lookup non-existent entry with duplicates");
> +
> +	TEST_LOOKUP(((const char *[]){
> +		    "55",
> +		    init_hash_algo() == GIT_HASH_SHA1 ?
> +				"5500000000000000000000000000000000000001" :
> +				"5500000000000000000000000000000000000000000000000000000000000001" }),

> +		    "55", ret == 0, "lookup with almost duplicate values");

This might be slightly more readable if we stored the oids in a separate 
variable at the beginning of this function and then it would look 
something like

	TEST_LOOKUP(((const char *[]) {"55", nearly_55}), "55", 0, 0,
		    "lookup with almost duplicate values");

Having said that it is kind of unfortunate that we have all the variable 
definitions at the start as it makes it harder to see what's going on in 
each test. We could avoid that by using TEST_RUN() and declaring the 
variables in the test block. For example the first test would look like

	TEST_RUN("ordered enumeration") {
		const char *input[] = { "88", "44", "aa", "55" };
		const char *expected[] = { "44", "55", "88", "aa" };

		TEST_ENUMERATION(input, expected)
	}

where TEST_ENUMERATION is adjusted so it does not call TEST(). It's more 
verbose but it is clearer what the input and expected values actually are.

Best Wishes

Phillip

> +	TEST_LOOKUP(((const char *[]){ "55", "55" }), "55",
> +		    ret >= 0 && ret <= 1, "lookup with single duplicate value");
> +
> +	return test_done();
> +}

  reply	other threads:[~2024-07-04 16:33 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-03  3:46 [GSoC][PATCH] t: port helper/test-oid-array.c to unit-tests/t-oid-array.c Ghanshyam Thakkar
2024-07-04 16:33 ` Phillip Wood [this message]
2024-08-03 13:31   ` Ghanshyam Thakkar
2024-08-03 13:21 ` [GSoC][PATCH v2] " Ghanshyam Thakkar
2024-08-19 16:55   ` Christian Couder
2024-08-24 17:00     ` Ghanshyam Thakkar
2024-08-24 17:02   ` [GSoC][PATCH v3] " Ghanshyam Thakkar
2024-08-25  6:38     ` Christian Couder
2024-08-25 10:45       ` Ghanshyam Thakkar
2024-09-01 21:26     ` [PATCH v4] " Ghanshyam Thakkar
2024-09-04  7:42       ` Christian Couder
2024-09-04 15:01         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=db2f97b6-a06e-470f-b1f9-60a78a0a2a7f@gmail.com \
    --to=phillip.wood123@gmail.com \
    --cc=chriscool@tuxfamily.org \
    --cc=git@vger.kernel.org \
    --cc=kaartic.sivaraam@gmail.com \
    --cc=l.s.r@web.de \
    --cc=phillip.wood@dunelm.org.uk \
    --cc=shyamthakkar001@gmail.com \
    --cc=steadmon@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).