All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Patrick Steinhardt <ps@pks.im>
Cc: git@vger.kernel.org,  Han-Wen Nienhuys <hanwenn@gmail.com>
Subject: Re: [PATCH v2 5/8] reftable/record: store "val1" hashes as static arrays
Date: Thu, 28 Dec 2023 09:03:10 -0800	[thread overview]
Message-ID: <xmqq1qb6i7jl.fsf@gitster.g> (raw)
In-Reply-To: <46ca3a37f805cd36faa26927220c2793d4cdd561.1703743174.git.ps@pks.im> (Patrick Steinhardt's message of "Thu, 28 Dec 2023 07:27:55 +0100")

Patrick Steinhardt <ps@pks.im> writes:

> When reading ref records of type "val1" we store its object ID in an

I'd find it easier to follow if we had a comma before "we store",
but perhaps I am old fashioned.

> allocated array. This results in an additional allocation for every
> single ref record we read, which is rather inefficient especially when
> iterating over refs.
>
> Refactor the code to instead use a static array of `GIT_MAX_RAWSZ`

"a static" -> "an embedded", perhaps?  The struct as the whole may
or may not be static but the point of this patch is that the array
is embedded in it.

> bytes. While this means that `struct ref_record` is bigger now, we
> typically do not store all refs in an array anyway and instead only
> handle a limited number of records at the same point in time.

Nicely explained.

> Using `git show-ref --quiet` in a repository with ~350k refs this leads
> to a significant drop in allocations. Before:
>
>     HEAP SUMMARY:
>         in use at exit: 21,098 bytes in 192 blocks
>       total heap usage: 2,116,683 allocs, 2,116,491 frees, 76,098,060 bytes allocated
>
> After:
>
>     HEAP SUMMARY:
>         in use at exit: 21,098 bytes in 192 blocks
>       total heap usage: 1,419,031 allocs, 1,418,839 frees, 62,145,036 bytes allocated
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  reftable/block_test.c      |  4 +---
>  reftable/merged_test.c     | 16 ++++++----------
>  reftable/readwrite_test.c  | 14 ++++----------
>  reftable/record.c          |  3 ---
>  reftable/record_test.c     |  1 -
>  reftable/reftable-record.h |  3 ++-
>  reftable/stack_test.c      |  2 --
>  7 files changed, 13 insertions(+), 30 deletions(-)
>
> diff --git a/reftable/block_test.c b/reftable/block_test.c
> index c00bbc8aed..dedb05c7d8 100644
> --- a/reftable/block_test.c
> +++ b/reftable/block_test.c
> @@ -49,13 +49,11 @@ static void test_block_read_write(void)
>  
>  	for (i = 0; i < N; i++) {
>  		char name[100];
> -		uint8_t hash[GIT_SHA1_RAWSZ];
>  		snprintf(name, sizeof(name), "branch%02d", i);
> -		memset(hash, i, sizeof(hash));
>  
>  		rec.u.ref.refname = name;
>  		rec.u.ref.value_type = REFTABLE_REF_VAL1;
> -		rec.u.ref.value.val1 = hash;
> +		memset(rec.u.ref.value.val1, i, GIT_SHA1_RAWSZ);
>  
>  		names[i] = xstrdup(name);
>  		n = block_writer_add(&bw, &rec);
> diff --git a/reftable/merged_test.c b/reftable/merged_test.c
> index d08c16abef..b3927a5d73 100644
> --- a/reftable/merged_test.c
> +++ b/reftable/merged_test.c
> @@ -123,13 +123,11 @@ static void readers_destroy(struct reftable_reader **readers, size_t n)
>  
>  static void test_merged_between(void)
>  {
> -	uint8_t hash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 0 };
> -
>  	struct reftable_ref_record r1[] = { {
>  		.refname = "b",
>  		.update_index = 1,
>  		.value_type = REFTABLE_REF_VAL1,
> -		.value.val1 = hash1,
> +		.value.val1 = { 1, 2, 3, 0 },
>  	} };
>  	struct reftable_ref_record r2[] = { {
>  		.refname = "a",
> @@ -165,26 +163,24 @@ static void test_merged_between(void)
>  
>  static void test_merged(void)
>  {
> -	uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
> -	uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
>  	struct reftable_ref_record r1[] = {
>  		{
>  			.refname = "a",
>  			.update_index = 1,
>  			.value_type = REFTABLE_REF_VAL1,
> -			.value.val1 = hash1,
> +			.value.val1 = { 1 },
>  		},
>  		{
>  			.refname = "b",
>  			.update_index = 1,
>  			.value_type = REFTABLE_REF_VAL1,
> -			.value.val1 = hash1,
> +			.value.val1 = { 1 },
>  		},
>  		{
>  			.refname = "c",
>  			.update_index = 1,
>  			.value_type = REFTABLE_REF_VAL1,
> -			.value.val1 = hash1,
> +			.value.val1 = { 1 },
>  		}
>  	};
>  	struct reftable_ref_record r2[] = { {
> @@ -197,13 +193,13 @@ static void test_merged(void)
>  			.refname = "c",
>  			.update_index = 3,
>  			.value_type = REFTABLE_REF_VAL1,
> -			.value.val1 = hash2,
> +			.value.val1 = { 2 },
>  		},
>  		{
>  			.refname = "d",
>  			.update_index = 3,
>  			.value_type = REFTABLE_REF_VAL1,
> -			.value.val1 = hash1,
> +			.value.val1 = { 1 },
>  		},
>  	};
>  
> diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
> index 9c16e0504e..87b238105c 100644
> --- a/reftable/readwrite_test.c
> +++ b/reftable/readwrite_test.c
> @@ -60,18 +60,15 @@ static void write_table(char ***names, struct strbuf *buf, int N,
>  	*names = reftable_calloc(sizeof(char *) * (N + 1));
>  	reftable_writer_set_limits(w, update_index, update_index);
>  	for (i = 0; i < N; i++) {
> -		uint8_t hash[GIT_SHA256_RAWSZ] = { 0 };
>  		char name[100];
>  		int n;
>  
> -		set_test_hash(hash, i);
> -
>  		snprintf(name, sizeof(name), "refs/heads/branch%02d", i);
>  
>  		ref.refname = name;
>  		ref.update_index = update_index;
>  		ref.value_type = REFTABLE_REF_VAL1;
> -		ref.value.val1 = hash;
> +		set_test_hash(ref.value.val1, i);
>  		(*names)[i] = xstrdup(name);
>  
>  		n = reftable_writer_add_ref(w, &ref);
> @@ -675,11 +672,10 @@ static void test_write_object_id_min_length(void)
>  	struct strbuf buf = STRBUF_INIT;
>  	struct reftable_writer *w =
>  		reftable_new_writer(&strbuf_add_void, &buf, &opts);
> -	uint8_t hash[GIT_SHA1_RAWSZ] = {42};
>  	struct reftable_ref_record ref = {
>  		.update_index = 1,
>  		.value_type = REFTABLE_REF_VAL1,
> -		.value.val1 = hash,
> +		.value.val1 = {42},
>  	};
>  	int err;
>  	int i;
> @@ -711,11 +707,10 @@ static void test_write_object_id_length(void)
>  	struct strbuf buf = STRBUF_INIT;
>  	struct reftable_writer *w =
>  		reftable_new_writer(&strbuf_add_void, &buf, &opts);
> -	uint8_t hash[GIT_SHA1_RAWSZ] = {42};
>  	struct reftable_ref_record ref = {
>  		.update_index = 1,
>  		.value_type = REFTABLE_REF_VAL1,
> -		.value.val1 = hash,
> +		.value.val1 = {42},
>  	};
>  	int err;
>  	int i;
> @@ -814,11 +809,10 @@ static void test_write_multiple_indices(void)
>  	writer = reftable_new_writer(&strbuf_add_void, &writer_buf, &opts);
>  	reftable_writer_set_limits(writer, 1, 1);
>  	for (i = 0; i < 100; i++) {
> -		unsigned char hash[GIT_SHA1_RAWSZ] = {i};
>  		struct reftable_ref_record ref = {
>  			.update_index = 1,
>  			.value_type = REFTABLE_REF_VAL1,
> -			.value.val1 = hash,
> +			.value.val1 = {i},
>  		};
>  
>  		strbuf_reset(&buf);
> diff --git a/reftable/record.c b/reftable/record.c
> index 5e258c734b..a67a6b4d8a 100644
> --- a/reftable/record.c
> +++ b/reftable/record.c
> @@ -219,7 +219,6 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec,
>  	case REFTABLE_REF_DELETION:
>  		break;
>  	case REFTABLE_REF_VAL1:
> -		ref->value.val1 = reftable_malloc(hash_size);
>  		memcpy(ref->value.val1, src->value.val1, hash_size);
>  		break;
>  	case REFTABLE_REF_VAL2:
> @@ -303,7 +302,6 @@ void reftable_ref_record_release(struct reftable_ref_record *ref)
>  		reftable_free(ref->value.val2.value);
>  		break;
>  	case REFTABLE_REF_VAL1:
> -		reftable_free(ref->value.val1);
>  		break;
>  	case REFTABLE_REF_DELETION:
>  		break;
> @@ -394,7 +392,6 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
>  			return -1;
>  		}
>  
> -		r->value.val1 = reftable_malloc(hash_size);
>  		memcpy(r->value.val1, in.buf, hash_size);
>  		string_view_consume(&in, hash_size);
>  		break;
> diff --git a/reftable/record_test.c b/reftable/record_test.c
> index 70ae78feca..5c94d26e35 100644
> --- a/reftable/record_test.c
> +++ b/reftable/record_test.c
> @@ -119,7 +119,6 @@ static void test_reftable_ref_record_roundtrip(void)
>  		case REFTABLE_REF_DELETION:
>  			break;
>  		case REFTABLE_REF_VAL1:
> -			in.u.ref.value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
>  			set_hash(in.u.ref.value.val1, 1);
>  			break;
>  		case REFTABLE_REF_VAL2:
> diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h
> index f7eb2d6015..7f3a0df635 100644
> --- a/reftable/reftable-record.h
> +++ b/reftable/reftable-record.h
> @@ -9,6 +9,7 @@ license that can be found in the LICENSE file or at
>  #ifndef REFTABLE_RECORD_H
>  #define REFTABLE_RECORD_H
>  
> +#include "hash-ll.h"
>  #include <stdint.h>
>  
>  /*
> @@ -38,7 +39,7 @@ struct reftable_ref_record {
>  #define REFTABLE_NR_REF_VALUETYPES 4
>  	} value_type;
>  	union {
> -		uint8_t *val1; /* malloced hash. */
> +		unsigned char val1[GIT_MAX_RAWSZ];
>  		struct {
>  			uint8_t *value; /* first value, malloced hash  */
>  			uint8_t *target_value; /* second value, malloced hash */
> diff --git a/reftable/stack_test.c b/reftable/stack_test.c
> index 14a3fc11ee..feab49d7f7 100644
> --- a/reftable/stack_test.c
> +++ b/reftable/stack_test.c
> @@ -463,7 +463,6 @@ static void test_reftable_stack_add(void)
>  		refs[i].refname = xstrdup(buf);
>  		refs[i].update_index = i + 1;
>  		refs[i].value_type = REFTABLE_REF_VAL1;
> -		refs[i].value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
>  		set_test_hash(refs[i].value.val1, i);
>  
>  		logs[i].refname = xstrdup(buf);
> @@ -600,7 +599,6 @@ static void test_reftable_stack_tombstone(void)
>  		refs[i].update_index = i + 1;
>  		if (i % 2 == 0) {
>  			refs[i].value_type = REFTABLE_REF_VAL1;
> -			refs[i].value.val1 = reftable_malloc(GIT_SHA1_RAWSZ);
>  			set_test_hash(refs[i].value.val1, i);
>  		}

  reply	other threads:[~2023-12-28 17:03 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-20  9:17 [PATCH 0/7] reftable: fixes and optimizations (pt.2) Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 1/7] reftable/stack: do not overwrite errors when compacting Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 2/7] reftable/writer: fix index corruption when writing multiple indices Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 3/7] reftable/record: constify some parts of the interface Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 4/7] reftable/record: store "val1" hashes as static arrays Patrick Steinhardt
2023-12-20  9:25   ` Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 5/7] reftable/record: store "val2" " Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 6/7] reftable/merged: really reuse buffers to compute record keys Patrick Steinhardt
2023-12-20  9:17 ` [PATCH 7/7] reftable/merged: transfer ownership of records when iterating Patrick Steinhardt
2023-12-20 19:10 ` [PATCH 0/7] reftable: fixes and optimizations (pt.2) Junio C Hamano
2023-12-28  6:27 ` [PATCH v2 0/8] " Patrick Steinhardt
2023-12-28  6:27   ` [PATCH v2 1/8] reftable/stack: do not overwrite errors when compacting Patrick Steinhardt
2023-12-28  6:27   ` [PATCH v2 2/8] reftable/stack: do not auto-compact twice in `reftable_stack_add()` Patrick Steinhardt
2023-12-28  6:27   ` [PATCH v2 3/8] reftable/writer: fix index corruption when writing multiple indices Patrick Steinhardt
2023-12-28  6:27   ` [PATCH v2 4/8] reftable/record: constify some parts of the interface Patrick Steinhardt
2023-12-28  6:27   ` [PATCH v2 5/8] reftable/record: store "val1" hashes as static arrays Patrick Steinhardt
2023-12-28 17:03     ` Junio C Hamano [this message]
2023-12-28  6:28   ` [PATCH v2 6/8] reftable/record: store "val2" " Patrick Steinhardt
2023-12-28  6:28   ` [PATCH v2 7/8] reftable/merged: really reuse buffers to compute record keys Patrick Steinhardt
2023-12-28 17:03     ` Junio C Hamano
2023-12-28  6:28   ` [PATCH v2 8/8] reftable/merged: transfer ownership of records when iterating Patrick Steinhardt
2023-12-28 17:04     ` Junio C Hamano
2024-01-03  6:22 ` [PATCH v3 0/8] reftable: fixes and optimizations (pt.2) Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 1/8] reftable/stack: do not overwrite errors when compacting Patrick Steinhardt
2024-02-14 15:12     ` Han-Wen Nienhuys
2024-02-15  7:43       ` Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 2/8] reftable/stack: do not auto-compact twice in `reftable_stack_add()` Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 3/8] reftable/writer: fix index corruption when writing multiple indices Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 4/8] reftable/record: constify some parts of the interface Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 5/8] reftable/record: store "val1" hashes as static arrays Patrick Steinhardt
2024-02-05 11:39     ` Karthik Nayak
2024-02-06  6:03       ` Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 6/8] reftable/record: store "val2" " Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 7/8] reftable/merged: really reuse buffers to compute record keys Patrick Steinhardt
2024-01-03  6:22   ` [PATCH v3 8/8] reftable/merged: transfer ownership of records when iterating Patrick Steinhardt
2024-02-05 13:08   ` [PATCH v3 0/8] reftable: fixes and optimizations (pt.2) Karthik Nayak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqq1qb6i7jl.fsf@gitster.g \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=hanwenn@gmail.com \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.