Git development

Git development
 help / color / mirror / Atom feed

* [PATCH v5] tests: move t0009-prio-queue.sh to the new unit testing framework
From: Chandra Pratap via GitGitGadget @ 2024-01-25 20:02 UTC (permalink / raw)
  To: git; +Cc: Chandra Pratap, Jeff King, Chandra Pratap, Chandra Pratap
In-Reply-To: <pull.1642.v4.git.1705865326185.gitgitgadget@gmail.com>

From: Chandra Pratap <chandrapratap3519@gmail.com>

t/t0009-prio-queue.sh along with t/helper/test-prio-queue.c unit
tests Git's implementation of a priority queue. Migrate the
test over to the new unit testing framework to simplify debugging
and reduce test run-time. Refactor the required logic and add
a new test case in addition to porting over the original ones in
shell.

Signed-off-by: Chandra Pratap <chandrapratap3519@gmail.com>
---
    tests: move t0009-prio-queue.sh to the new unit testing framework
    
    Thanks for the feedbacks! I have updated the patch so that the resulting
    tests show something like:
    
    > check "result[j++] == show(get)" failed at
    > unit-tests/t-prio-queue.c:60
    
    > left: 20 right: 2
    
    > input: push 1, push 2, get, get, get, push 1, push 2, get, get, get,
    
    > failed at input index 8
    
    in the case of a failing test.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1642%2FChand-ra%2Fprio-queue-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1642/Chand-ra/prio-queue-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/1642

Range-diff vs v4:

 1:  e9a0dba6f47 ! 1:  88e70127ad3 tests: move t0009-prio-queue.sh to the new unit testing framework
     @@ t/unit-tests/t-prio-queue.c (new)
      +	return v ? *v : MISSING;
      +}
      +
     ++static void print_input(int *input, size_t input_size)
     ++{
     ++	char buf[128];
     ++	int len = 0;
     ++	for(size_t i = 0; i < input_size; i++) {
     ++		switch (input[i]) {
     ++		case GET:
     ++			len += xsnprintf(buf+len, sizeof(buf), "get, ");
     ++			break;
     ++		case DUMP:
     ++			len += xsnprintf(buf+len, sizeof(buf), "dump");
     ++			break;
     ++		case STACK:
     ++			len += xsnprintf(buf+len, sizeof(buf), "stack, ");
     ++			break;
     ++		case REVERSE:
     ++			len += xsnprintf(buf+len, sizeof(buf), "reverse, ");
     ++			break;
     ++		default:
     ++			len += xsnprintf(buf+len, sizeof(buf), "push %d, ", input[i]);
     ++			break;
     ++		}
     ++	}
     ++	test_msg("input: %s", buf);
     ++}
     ++
      +static void test_prio_queue(int *input, int *result, size_t input_size)
      +{
      +	struct prio_queue pq = { intcmp };
      +
      +	for (int i = 0, j = 0; i < input_size; i++) {
      +		void *peek, *get;
     -+		switch(input[i]) {
     ++		switch (input[i]) {
      +		case GET:
      +			peek = prio_queue_peek(&pq);
      +			get = prio_queue_get(&pq);
      +			if (!check(peek == get))
      +				return;
     -+			if(!check_int(result[j++], ==, show(get)))
     -+				test_msg("failed at result[] index %d", j-1);
     ++			if (!check_int(result[j++], ==, show(get))) {
     ++				print_input(input, input_size);
     ++				test_msg("failed at input index %d", i);
     ++			}
      +			break;
      +		case DUMP:
      +			while ((peek = prio_queue_peek(&pq))) {
      +				get = prio_queue_get(&pq);
      +				if (!check(peek == get))
      +					return;
     -+				if(!check_int(result[j++], ==, show(get)))
     -+					test_msg("failed at result[] index %d", j-1);
     ++				if (!check_int(result[j++], ==, show(get))) {
     ++					print_input(input, input_size);
     ++					test_msg("failed at input index %d", i);
     ++				}
      +			}
      +			break;
      +		case STACK:
     @@ t/unit-tests/t-prio-queue.c (new)
      +	clear_prio_queue(&pq);
      +}
      +
     ++
      +#define BASIC_INPUT 2, 6, 3, 10, 9, 5, 7, 4, 5, 8, 1, DUMP
      +#define BASIC_RESULT 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10
      +


 Makefile                    |   2 +-
 t/helper/test-prio-queue.c  |  51 --------------
 t/helper/test-tool.c        |   1 -
 t/helper/test-tool.h        |   1 -
 t/t0009-prio-queue.sh       |  66 ------------------
 t/unit-tests/t-prio-queue.c | 129 ++++++++++++++++++++++++++++++++++++
 6 files changed, 130 insertions(+), 120 deletions(-)
 delete mode 100644 t/helper/test-prio-queue.c
 delete mode 100755 t/t0009-prio-queue.sh
 create mode 100644 t/unit-tests/t-prio-queue.c

diff --git a/Makefile b/Makefile
index 312d95084c1..d5e48e656b3 100644
--- a/Makefile
+++ b/Makefile
@@ -828,7 +828,6 @@ TEST_BUILTINS_OBJS += test-partial-clone.o
 TEST_BUILTINS_OBJS += test-path-utils.o
 TEST_BUILTINS_OBJS += test-pcre2-config.o
 TEST_BUILTINS_OBJS += test-pkt-line.o
-TEST_BUILTINS_OBJS += test-prio-queue.o
 TEST_BUILTINS_OBJS += test-proc-receive.o
 TEST_BUILTINS_OBJS += test-progress.o
 TEST_BUILTINS_OBJS += test-reach.o
@@ -1340,6 +1339,7 @@ THIRD_PARTY_SOURCES += sha1dc/%
 
 UNIT_TEST_PROGRAMS += t-basic
 UNIT_TEST_PROGRAMS += t-strbuf
+UNIT_TEST_PROGRAMS += t-prio-queue
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS))
 UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o
diff --git a/t/helper/test-prio-queue.c b/t/helper/test-prio-queue.c
deleted file mode 100644
index f0bf255f5f0..00000000000
--- a/t/helper/test-prio-queue.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "test-tool.h"
-#include "prio-queue.h"
-
-static int intcmp(const void *va, const void *vb, void *data UNUSED)
-{
-	const int *a = va, *b = vb;
-	return *a - *b;
-}
-
-static void show(int *v)
-{
-	if (!v)
-		printf("NULL\n");
-	else
-		printf("%d\n", *v);
-	free(v);
-}
-
-int cmd__prio_queue(int argc UNUSED, const char **argv)
-{
-	struct prio_queue pq = { intcmp };
-
-	while (*++argv) {
-		if (!strcmp(*argv, "get")) {
-			void *peek = prio_queue_peek(&pq);
-			void *get = prio_queue_get(&pq);
-			if (peek != get)
-				BUG("peek and get results do not match");
-			show(get);
-		} else if (!strcmp(*argv, "dump")) {
-			void *peek;
-			void *get;
-			while ((peek = prio_queue_peek(&pq))) {
-				get = prio_queue_get(&pq);
-				if (peek != get)
-					BUG("peek and get results do not match");
-				show(get);
-			}
-		} else if (!strcmp(*argv, "stack")) {
-			pq.compare = NULL;
-		} else {
-			int *v = xmalloc(sizeof(*v));
-			*v = atoi(*argv);
-			prio_queue_put(&pq, v);
-		}
-	}
-
-	clear_prio_queue(&pq);
-
-	return 0;
-}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 876cd2dc313..5f591b9718f 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -57,7 +57,6 @@ static struct test_cmd cmds[] = {
 	{ "path-utils", cmd__path_utils },
 	{ "pcre2-config", cmd__pcre2_config },
 	{ "pkt-line", cmd__pkt_line },
-	{ "prio-queue", cmd__prio_queue },
 	{ "proc-receive", cmd__proc_receive },
 	{ "progress", cmd__progress },
 	{ "reach", cmd__reach },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 70dd4eba119..b921138d8ec 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -50,7 +50,6 @@ int cmd__partial_clone(int argc, const char **argv);
 int cmd__path_utils(int argc, const char **argv);
 int cmd__pcre2_config(int argc, const char **argv);
 int cmd__pkt_line(int argc, const char **argv);
-int cmd__prio_queue(int argc, const char **argv);
 int cmd__proc_receive(int argc, const char **argv);
 int cmd__progress(int argc, const char **argv);
 int cmd__reach(int argc, const char **argv);
diff --git a/t/t0009-prio-queue.sh b/t/t0009-prio-queue.sh
deleted file mode 100755
index eea99107a48..00000000000
--- a/t/t0009-prio-queue.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/sh
-
-test_description='basic tests for priority queue implementation'
-
-TEST_PASSES_SANITIZE_LEAK=true
-. ./test-lib.sh
-
-cat >expect <<'EOF'
-1
-2
-3
-4
-5
-5
-6
-7
-8
-9
-10
-EOF
-test_expect_success 'basic ordering' '
-	test-tool prio-queue 2 6 3 10 9 5 7 4 5 8 1 dump >actual &&
-	test_cmp expect actual
-'
-
-cat >expect <<'EOF'
-2
-3
-4
-1
-5
-6
-EOF
-test_expect_success 'mixed put and get' '
-	test-tool prio-queue 6 2 4 get 5 3 get get 1 dump >actual &&
-	test_cmp expect actual
-'
-
-cat >expect <<'EOF'
-1
-2
-NULL
-1
-2
-NULL
-EOF
-test_expect_success 'notice empty queue' '
-	test-tool prio-queue 1 2 get get get 1 2 get get get >actual &&
-	test_cmp expect actual
-'
-
-cat >expect <<'EOF'
-3
-2
-6
-4
-5
-1
-8
-EOF
-test_expect_success 'stack order' '
-	test-tool prio-queue stack 8 1 5 4 6 2 3 dump >actual &&
-	test_cmp expect actual
-'
-
-test_done
diff --git a/t/unit-tests/t-prio-queue.c b/t/unit-tests/t-prio-queue.c
new file mode 100644
index 00000000000..8456b23255c
--- /dev/null
+++ b/t/unit-tests/t-prio-queue.c
@@ -0,0 +1,129 @@
+#include "test-lib.h"
+#include "prio-queue.h"
+
+static int intcmp(const void *va, const void *vb, void *data UNUSED)
+{
+	const int *a = va, *b = vb;
+	return *a - *b;
+}
+
+
+#define MISSING  -1
+#define DUMP	 -2
+#define STACK	 -3
+#define GET	 -4
+#define REVERSE  -5
+
+static int show(int *v)
+{
+	return v ? *v : MISSING;
+}
+
+static void print_input(int *input, size_t input_size)
+{
+	char buf[128];
+	int len = 0;
+	for(size_t i = 0; i < input_size; i++) {
+		switch (input[i]) {
+		case GET:
+			len += xsnprintf(buf+len, sizeof(buf), "get, ");
+			break;
+		case DUMP:
+			len += xsnprintf(buf+len, sizeof(buf), "dump");
+			break;
+		case STACK:
+			len += xsnprintf(buf+len, sizeof(buf), "stack, ");
+			break;
+		case REVERSE:
+			len += xsnprintf(buf+len, sizeof(buf), "reverse, ");
+			break;
+		default:
+			len += xsnprintf(buf+len, sizeof(buf), "push %d, ", input[i]);
+			break;
+		}
+	}
+	test_msg("input: %s", buf);
+}
+
+static void test_prio_queue(int *input, int *result, size_t input_size)
+{
+	struct prio_queue pq = { intcmp };
+
+	for (int i = 0, j = 0; i < input_size; i++) {
+		void *peek, *get;
+		switch (input[i]) {
+		case GET:
+			peek = prio_queue_peek(&pq);
+			get = prio_queue_get(&pq);
+			if (!check(peek == get))
+				return;
+			if (!check_int(result[j++], ==, show(get))) {
+				print_input(input, input_size);
+				test_msg("failed at input index %d", i);
+			}
+			break;
+		case DUMP:
+			while ((peek = prio_queue_peek(&pq))) {
+				get = prio_queue_get(&pq);
+				if (!check(peek == get))
+					return;
+				if (!check_int(result[j++], ==, show(get))) {
+					print_input(input, input_size);
+					test_msg("failed at input index %d", i);
+				}
+			}
+			break;
+		case STACK:
+			pq.compare = NULL;
+			break;
+		case REVERSE:
+			prio_queue_reverse(&pq);
+			break;
+		default:
+			prio_queue_put(&pq, &input[i]);
+			break;
+		}
+	}
+	clear_prio_queue(&pq);
+}
+
+
+#define BASIC_INPUT 2, 6, 3, 10, 9, 5, 7, 4, 5, 8, 1, DUMP
+#define BASIC_RESULT 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10
+
+#define MIXED_PUT_GET_INPUT 6, 2, 4, GET, 5, 3, GET, GET, 1, DUMP
+#define MIXED_PUT_GET_RESULT 2, 3, 4, 1, 5, 6
+
+#define EMPTY_QUEUE_INPUT 1, 2, GET, GET, GET, 1, 2, GET, GET, GET
+#define EMPTY_QUEUE_RESULT 1, 2, MISSING, 1, 2, MISSING
+
+#define STACK_INPUT STACK, 8, 1, 5, 4, 6, 2, 3, DUMP
+#define STACK_RESULT 3, 2, 6, 4, 5, 1, 8
+
+#define REVERSE_STACK_INPUT STACK, 1, 2, 3, 4, 5, 6, REVERSE, DUMP
+#define REVERSE_STACK_RESULT 1, 2, 3, 4, 5, 6
+
+#define TEST_INPUT(INPUT, RESULT, name)			\
+  static void test_##name(void)				\
+{								\
+	int input[] = {INPUT};					\
+	int result[] = {RESULT};				\
+	test_prio_queue(input, result, ARRAY_SIZE(input));	\
+}
+
+TEST_INPUT(BASIC_INPUT, BASIC_RESULT, basic)
+TEST_INPUT(MIXED_PUT_GET_INPUT, MIXED_PUT_GET_RESULT, mixed)
+TEST_INPUT(EMPTY_QUEUE_INPUT, EMPTY_QUEUE_RESULT, empty)
+TEST_INPUT(STACK_INPUT, STACK_RESULT, stack)
+TEST_INPUT(REVERSE_STACK_INPUT, REVERSE_STACK_RESULT, reverse)
+
+int cmd_main(int argc, const char **argv)
+{
+	TEST(test_basic(), "prio-queue works for basic input");
+	TEST(test_mixed(), "prio-queue works for mixed put & get commands");
+	TEST(test_empty(), "prio-queue works when queue is empty");
+	TEST(test_stack(), "prio-queue works when used as a LIFO stack");
+	TEST(test_reverse(), "prio-queue works when LIFO stack is reversed");
+
+	return test_done();
+}

base-commit: 1a87c842ece327d03d08096395969aca5e0a6996
-- 
gitgitgadget

^ permalink raw reply related

* Re: what should "git clean -n -f [-d] [-x] <pattern>" do?
From: Sergey Organov @ 2024-01-25 20:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Elijah Newren, git
In-Reply-To: <xmqq1qa5xq4n.fsf@gitster.g>

Junio C Hamano <gitster@pobox.com> writes:

> Sergey Organov <sorganov@gmail.com> writes:
>
>> Now we are going to introduce "dry run" option "-n". Most simple and
>> obvious way to do it is to set internal flag "dry_run" and then at every
>> invocation of "remove(file_name)" put an if(dry_run) that will just
>> print(file_name) instead or removing it. Let's suppose we did just that.
>> We get this behavior:
>>
>>   $ git clean -n
>>   fatal: clean.requireForce defaults to true and neither -i nor -f given; refusing to clean
>>   $ git clean -f -n
>>   would remove "a"
>>   would remove "b"
>>   $ git clean -f -f -n
>>   would remove "a"
>>   would remove "b"
>>   would remove "sub/a"
>>   $
>>
>> I see this as logical, clean, and straightforward behavior, meeting user
>> expectations for "dry run" option, so I suggest to do just that.
>
> I think we are saying the same thing.  If the original semantics
> were "you must force with -f to do anything useful", instead of "you
> must choose either forcing with -f or not doing with -n", then it
> would have led to the above behaviour.
>
> The thing is, it is way too late to change it that way without
> breaking too many folks, and that is the problem.

If we agree on the behavior above for sane "dry run", yet you worry
about backward compatibility so much to deny changing the behavior of
"-n", then a way to go could be to introduce, say, "-d" for sane "dry
run", and obsolete "-n" while keeping it alone.

Thanks,
-- Sergey Organov



^ permalink raw reply

* Re: what should "git clean -n -f [-d] [-x] <pattern>" do?
From: Sergey Organov @ 2024-01-25 20:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Elijah Newren, git
In-Reply-To: <87ede56tva.fsf@osv.gnss.ru>

Sergey Organov <sorganov@gmail.com> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>> Sergey Organov <sorganov@gmail.com> writes:
>>
>>> Now we are going to introduce "dry run" option "-n". Most simple and
>>> obvious way to do it is to set internal flag "dry_run" and then at every
>>> invocation of "remove(file_name)" put an if(dry_run) that will just
>>> print(file_name) instead or removing it. Let's suppose we did just that.
>>> We get this behavior:
>>>
>>>   $ git clean -n
>>>   fatal: clean.requireForce defaults to true and neither -i nor -f given; refusing to clean
>>>   $ git clean -f -n
>>>   would remove "a"
>>>   would remove "b"
>>>   $ git clean -f -f -n
>>>   would remove "a"
>>>   would remove "b"
>>>   would remove "sub/a"
>>>   $
>>>
>>> I see this as logical, clean, and straightforward behavior, meeting user
>>> expectations for "dry run" option, so I suggest to do just that.
>>
>> I think we are saying the same thing.  If the original semantics
>> were "you must force with -f to do anything useful", instead of "you
>> must choose either forcing with -f or not doing with -n", then it
>> would have led to the above behaviour.
>>
>> The thing is, it is way too late to change it that way without
>> breaking too many folks, and that is the problem.
>
> If we agree on the behavior above for sane "dry run", yet you worry
> about backward compatibility so much to deny changing the behavior of
> "-n", then a way to go could be to introduce, say, "-d" for sane "dry
> run", and obsolete "-n" while keeping it alone.

Except exactly "-d" is already taken, but you get the idea.

-- Sergey Organov

^ permalink raw reply

* Re: [PATCH 06/10] trailer: make trailer_info struct private
From: Junio C Hamano @ 2024-01-25 20:32 UTC (permalink / raw)
  To: Josh Steadmon
  Cc: Linus Arver via GitGitGadget, git, Emily Shaffer,
	Christian Couder, Linus Arver
In-Reply-To: <ZbK4BcESMfljw7Ym@google.com>

Josh Steadmon <steadmon@google.com> writes:

> The pimpl pattern also seems like it will force us to think harder about
> providing a useful interface, so I am in favor of using it here as
> practice for future libification.

Sounds good.


^ permalink raw reply

* [PATCH 0/2] index-pack: fsck honor checks
From: John Cai via GitGitGadget @ 2024-01-25 20:51 UTC (permalink / raw)
  To: git; +Cc: John Cai

git-index-pack has a --strict mode that can take an optional argument to
provide a list of fsck issues to change their severity. --fsck-objects does
not have such a utility, which would be useful if one would like to be more
lenient or strict on data integrity in a repository.

Like --strict, Allow --fsck-objects to also take a list of fsck msgs to
change the severity.

John Cai (2):
  index-pack: test and document --strict=<msg>
  index-pack: --fsck-objects to take an optional argument for fsck msgs

 Documentation/git-index-pack.txt | 19 +++++++++++----
 builtin/index-pack.c             |  5 ++--
 t/t5300-pack-object.sh           | 41 ++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 6 deletions(-)


base-commit: 186b115d3062e6230ee296d1ddaa0c4b72a464b5
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1658%2Fjohn-cai%2Fjc%2Findex-pack-fsck-honor-checks-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1658/john-cai/jc/index-pack-fsck-honor-checks-v1
Pull-Request: https://github.com/git/git/pull/1658
-- 
gitgitgadget

^ permalink raw reply

* [PATCH 1/2] index-pack: test and document --strict=<msg>
From: John Cai via GitGitGadget @ 2024-01-25 20:51 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai
In-Reply-To: <pull.1658.git.git.1706215884.gitgitgadget@gmail.com>

From: John Cai <johncai86@gmail.com>

5d477a334a (fsck (receive-pack): allow demoting errors to warnings,
2015-06-22) allowed a list of fsck msg to downgrade to be passed to
--strict. However this is a hidden argument that was not documented nor
tested. Though true that most users would not call this option
direction, (nor use index-pack for that matter) it is still useful to
document and test this feature.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 Documentation/git-index-pack.txt |  9 +++++++--
 builtin/index-pack.c             |  2 +-
 t/t5300-pack-object.sh           | 22 ++++++++++++++++++++++
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt
index 6486620c3d8..14f806d07d1 100644
--- a/Documentation/git-index-pack.txt
+++ b/Documentation/git-index-pack.txt
@@ -79,8 +79,13 @@ OPTIONS
 	to force the version for the generated pack index, and to force
 	64-bit index entries on objects located above the given offset.
 
---strict::
-	Die, if the pack contains broken objects or links.
+--strict[=<msg-ids>]::
+	Die, if the pack contains broken objects or links. If `<msg-ids>` is passed,
+	it should be a comma-separated list of `<msg-id>=<severity>` elements where
+	`<msg-id>` and `<severity>` are used to change the severity of some possible
+	issues, eg: `--strict="missingEmail=ignore,badTagName=error"`. See the entry
+	for the `fsck.<msg-id>` configuration options in `linkgit:git-fsck[1] for
+	more information on the possible values of `<msg-id>` and `<severity>`.
 
 --progress-title::
 	For internal use only.
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 1ea87e01f29..1e53ca23775 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -24,7 +24,7 @@
 #include "setup.h"
 
 static const char index_pack_usage[] =
-"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
+"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-ids>]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
 
 struct object_entry {
 	struct pack_idx_entry idx;
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index d402ec18b79..9563372ae27 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -441,6 +441,28 @@ test_expect_success 'index-pack with --strict' '
 	)
 '
 
+test_expect_success 'index-pack with --strict downgrading fsck msgs' '
+	test_when_finished rm -rf strict &&
+	git init strict &&
+	(
+		cd strict &&
+		test_commit first hello &&
+		cat >commit <<-EOF &&
+		tree $(git rev-parse HEAD^{tree})
+		parent $(git rev-parse HEAD)
+		author A U Thor
+		committer A U Thor
+
+		commit: this is a commit wit bad emails
+
+		EOF
+		git hash-object --literally -t commit -w --stdin <commit >commit_list &&
+		PACK=$(git pack-objects test <commit_list) &&
+		test_must_fail git index-pack --strict "test-$PACK.pack" &&
+		git index-pack --strict="missingEmail=ignore" "test-$PACK.pack"
+	)
+'
+
 test_expect_success 'honor pack.packSizeLimit' '
 	git config pack.packSizeLimit 3m &&
 	packname_10=$(git pack-objects test-10 <obj-list) &&
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH 2/2] index-pack: --fsck-objects to take an optional argument for fsck msgs
From: John Cai via GitGitGadget @ 2024-01-25 20:51 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai
In-Reply-To: <pull.1658.git.git.1706215884.gitgitgadget@gmail.com>

From: John Cai <johncai86@gmail.com>

git-index-pack has a --strict mode that can take an optional argument to
provide a list of fsck issues to change their severity. --fsck-objects
does not have such a utility, which would be useful if one would like to
be more lenient or strict on data integrity in a repository.

Like --strict, Allow --fsck-objects to also take a list of fsck msgs to
change the severity.

This commit also removes the "For internal use only" note for
--fsck-objects, and documents the option. This won't often be used by
the normal end user, but it turns out it is useful for Git forges like
GitLab.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 Documentation/git-index-pack.txt | 10 ++++++++--
 builtin/index-pack.c             |  5 +++--
 t/t5300-pack-object.sh           | 29 ++++++++++++++++++++++++-----
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt
index 14f806d07d1..37709b13c88 100644
--- a/Documentation/git-index-pack.txt
+++ b/Documentation/git-index-pack.txt
@@ -96,8 +96,14 @@ default and "Indexing objects" when `--stdin` is specified.
 --check-self-contained-and-connected::
 	Die if the pack contains broken links. For internal use only.
 
---fsck-objects::
-	For internal use only.
+--fsck-objects[=<msg-ids>]::
+	Instructs index-pack to check for broken objects instead of broken
+	links. If `<msg-ids>` is passed, it should be  a comma-separated list of
+	`<msg-id>=<severity>` where `<msg-id>` and `<severity>` are used to
+	change the severity of `fsck` errors, eg: `--strict="missingEmail=ignore,badTagName=ignore"`.
+	See the entry for the `fsck.<msg-id>` configuration options in
+	`linkgit:git-fsck[1] for more information on the possible values of
+	`<msg-id>` and `<severity>`.
 +
 Die if the pack contains broken objects. If the pack contains a tree
 pointing to a .gitmodules blob that does not exist, prints the hash of
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 1e53ca23775..519162f5b91 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -24,7 +24,7 @@
 #include "setup.h"
 
 static const char index_pack_usage[] =
-"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-ids>]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
+"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-ids>]] [--fsck-objects[=<msg-ids>]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
 
 struct object_entry {
 	struct pack_idx_entry idx;
@@ -1785,8 +1785,9 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 			} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
 				strict = 1;
 				check_self_contained_and_connected = 1;
-			} else if (!strcmp(arg, "--fsck-objects")) {
+			} else if (skip_to_optional_arg(arg, "--fsck-objects", &arg)) {
 				do_fsck_object = 1;
+				fsck_set_msg_types(&fsck_options, arg);
 			} else if (!strcmp(arg, "--verify")) {
 				verify = 1;
 			} else if (!strcmp(arg, "--verify-stat")) {
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index 9563372ae27..916cf939beb 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -441,8 +441,7 @@ test_expect_success 'index-pack with --strict' '
 	)
 '
 
-test_expect_success 'index-pack with --strict downgrading fsck msgs' '
-	test_when_finished rm -rf strict &&
+test_expect_success 'setup for --strict and --fsck-objects downgrading fsck msgs' '
 	git init strict &&
 	(
 		cd strict &&
@@ -457,12 +456,32 @@ test_expect_success 'index-pack with --strict downgrading fsck msgs' '
 
 		EOF
 		git hash-object --literally -t commit -w --stdin <commit >commit_list &&
-		PACK=$(git pack-objects test <commit_list) &&
-		test_must_fail git index-pack --strict "test-$PACK.pack" &&
-		git index-pack --strict="missingEmail=ignore" "test-$PACK.pack"
+		git pack-objects test <commit_list >pack-name
 	)
 '
 
+test_with_bad_commit () {
+	must_fail_arg="$1" &&
+	must_pass_arg="$2" &&
+	(
+		cd strict &&
+		test_expect_fail git index-pack "$must_fail_arg" "test-$(cat pack-name).pack"
+		git index-pack "$must_pass_arg" "test-$(cat pack-name).pack"
+	)
+}
+
+test_expect_success 'index-pack with --strict downgrading fsck msgs' '
+	test_with_bad_commit --strict --strict="missingEmail=ignore"
+'
+
+test_expect_success 'index-pack with --fsck-objects downgrading fsck msgs' '
+	test_with_bad_commit --fsck-objects --fsck-objects="missingEmail=ignore"
+'
+
+test_expect_success 'cleanup for --strict and --fsck-objects downgrading fsck msgs' '
+	rm -rf strict
+'
+
 test_expect_success 'honor pack.packSizeLimit' '
 	git config pack.packSizeLimit 3m &&
 	packname_10=$(git pack-objects test-10 <obj-list) &&
-- 
gitgitgadget

^ permalink raw reply related

* Re: [PATCH v2 1/4] refs: introduce `is_pseudoref()` and `is_headref()`
From: Karthik Nayak @ 2024-01-25 21:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, ps, phillip.wood123
In-Reply-To: <xmqqa5otxtqy.fsf@gitster.g>

On Thu, Jan 25, 2024 at 5:28 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Karthik Nayak <karthik.188@gmail.com> writes:
>
> >>> +int is_headref(struct ref_store *refs, const char *refname)
> >>> +{
> >>> +   if (!strcmp(refname, "HEAD"))
> >>> +           return refs_ref_exists(refs, refname);
> >>
> >> Given that "git for-each-ref refs/remotes" does not show
> >> "refs/remotes/origin/HEAD" in the output when we do not have the
> >> remote-tracking branch that symref points at, we probably do want
> >> to omit "HEAD" from the output when the HEAD symref points at an
> >> unborn branch.  If refs_ref_exists() says "no, it does not exist"
> >> in such a case, we are perfectly fine with this code.
> >>
> >> We do not have to worry about the unborn state for pseudorefs
> >> because they would never be symbolic.  But that in turn makes me
> >> suspect that the check done with refs_ref_exists() in the
> >> is_pseudoref() helper is a bit too lenient by allowing it to be a
> >> symbolic ref.  Shouldn't we be using a check based on
> >> read_ref_full(), like we did in another topic recently [*]?
> >>
> >>
> >> [Reference]
> >>
> >>  * https://lore.kernel.org/git/xmqqzfxa9usx.fsf@gitster.g/
> >>
> >
> > Thanks, this makes sense and the link is helpful. I'll do something
> > similar, but since HEAD can be a symref, I'll drop the
> > `RESOLVE_REF_NO_RECURSE` flag and only use `RESOLVE_REF_READING`.
>
> Just to make sure there is no misunderstanding, I think how
> is_headref() does what it does in the patch is perfectly fine,
> including its use of refs_ref_exists().  The side I was referring to
> with "in turn makes me suspect" is the other helper function that
> will never have to deal with a symref.  Use of refs_ref_exists() in
> that function is too loose.
>

AH! Totally misunderstood, thanks for reiterating.

^ permalink raw reply

* Re: [PATCH 1/2] index-pack: test and document --strict=<msg>
From: Junio C Hamano @ 2024-01-25 22:46 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai
In-Reply-To: <9b353aff73d6351b86cc7b55982f1565e76d08e9.1706215884.git.gitgitgadget@gmail.com>

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> 5d477a334a (fsck (receive-pack): allow demoting errors to warnings,
> 2015-06-22) allowed a list of fsck msg to downgrade to be passed to
> --strict. However this is a hidden argument that was not documented nor
> tested. Though true that most users would not call this option
> direction, (nor use index-pack for that matter) it is still useful to

Though it is true that ... call this option directly (nor use
index-pack, for that matter), it is still ...

or something like that, probably.

> document and test this feature.

And I agree with that.  Thanks for adding the necessary doc.

> +--strict[=<msg-ids>]::

<msg-id> in the context of "git fsck --help" seems to refer to the
left hand side of <msg-id>=<severity>.  <msg-ids> sounds as if it is
just the list of <msg-id> without saying anything about their severity,
which is not what we want to imply.

Either use a made-up word that is clearly different and can not be
mistaken as a list of <msg-id>, or spell it out a bit more
explicitly, may make it easier to follow?

	--strict[=<fsck-config>]
	--strict[=<msg-id>=<severity>...]

I dunno.

Use of <msg-id> and <severity> below looks good in the body of the
paragraph here.

> +	Die, if the pack contains broken objects or links. If `<msg-ids>` is passed,
> +	it should be a comma-separated list of `<msg-id>=<severity>` elements where
> +	`<msg-id>` and `<severity>` are used to change the severity of some possible
> +	issues, eg: `--strict="missingEmail=ignore,badTagName=error"`. See the entry
> +	for the `fsck.<msg-id>` configuration options in `linkgit:git-fsck[1] for
> +	more information on the possible values of `<msg-id>` and `<severity>`.

"eg:" -> "e.g.," probably.

> diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
> index d402ec18b79..9563372ae27 100755
> --- a/t/t5300-pack-object.sh
> +++ b/t/t5300-pack-object.sh
> @@ -441,6 +441,28 @@ test_expect_success 'index-pack with --strict' '
>  	)
>  '
>  
> +test_expect_success 'index-pack with --strict downgrading fsck msgs' '
> +	test_when_finished rm -rf strict &&
> +	git init strict &&
> +	(
> +		cd strict &&
> +		test_commit first hello &&
> +		cat >commit <<-EOF &&
> +		tree $(git rev-parse HEAD^{tree})
> +		parent $(git rev-parse HEAD)
> +		author A U Thor
> +		committer A U Thor
> +
> +		commit: this is a commit wit bad emails

"wit" -> "with"; as this typo does not contribute anything to
the badness we expect index-pack to notice, it would pay to
make sure we do not have it, to avoid distracting readers.

> +		EOF
> +		git hash-object --literally -t commit -w --stdin <commit >commit_list &&
> +		PACK=$(git pack-objects test <commit_list) &&
> +		test_must_fail git index-pack --strict "test-$PACK.pack" &&
> +		git index-pack --strict="missingEmail=ignore" "test-$PACK.pack"
> +	)
> +'
> +
>  test_expect_success 'honor pack.packSizeLimit' '
>  	git config pack.packSizeLimit 3m &&
>  	packname_10=$(git pack-objects test-10 <obj-list) &&

^ permalink raw reply

* Re: [PATCH 2/2] index-pack: --fsck-objects to take an optional argument for fsck msgs
From: Junio C Hamano @ 2024-01-25 23:13 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai
In-Reply-To: <074e0c7ab923777c66516ced18b4fd1dadf7677f.1706215884.git.gitgitgadget@gmail.com>

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> git-index-pack has a --strict mode that can take an optional argument to

"mode" -> "option", probably.

> provide a list of fsck issues to change their severity. --fsck-objects
> does not have such a utility, which would be useful if one would like to
> be more lenient or strict on data integrity in a repository.
>
> Like --strict, Allow --fsck-objects to also take a list of fsck msgs to
> change the severity.

"Allow" -> "allow".

> This commit also removes the "For internal use only" note for
> --fsck-objects, and documents the option. This won't often be used by
> the normal end user, but it turns out it is useful for Git forges like
> GitLab.

"This commit also removes", "documents" -> "Remove", "document".

> ---fsck-objects::
> -	For internal use only.
> +--fsck-objects[=<msg-ids>]::
> +	Instructs index-pack to check for broken objects instead of broken
> +	links. If `<msg-ids>` is passed, it should be  a comma-separated list of

Very much pleased to see an additional description that is written
to clarify the difference between this option and the other --strict
option.  The original was totally unclear on this point, and it is
very much appreciated.

The other option notices and dies upon seeing either a broken object
or a dangling link.  This one only diagnoses broken objects and does
not care if the objects are connected.  However, saying "instead of"
here tempts readers to mistakenly think that the other one only
checks links and this one only checks contents, which is not what we
want to say.  Perhaps "to check for broken objects, but unlike
`--strict`, do not choke on broken links" or something?

Same comment on <msg-ids> as the previous step.

> +	`<msg-id>=<severity>` where `<msg-id>` and `<severity>` are used to
> +	change the severity of `fsck` errors, eg: `--strict="missingEmail=ignore,badTagName=ignore"`.

Same comment for "eg:" as before.

> @@ -1785,8 +1785,9 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
>  			} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
>  				strict = 1;
>  				check_self_contained_and_connected = 1;
> -			} else if (!strcmp(arg, "--fsck-objects")) {
> +			} else if (skip_to_optional_arg(arg, "--fsck-objects", &arg)) {
>  				do_fsck_object = 1;
> +				fsck_set_msg_types(&fsck_options, arg);
>  			} else if (!strcmp(arg, "--verify")) {
>  				verify = 1;
>  			} else if (!strcmp(arg, "--verify-stat")) {

The implementation of this part looks quite obvious, once you see
how "--strict[=<msgid>=<level>]" is implemented.

Looking good.

Thanks.

^ permalink raw reply

* Re: [PATCH 08/10] trailer: prepare to move parse_trailers_from_command_line_args() to builtin
From: Josh Steadmon @ 2024-01-25 23:39 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Emily Shaffer, Junio C Hamano, Christian Couder, Linus Arver
In-Reply-To: <406725df46a81f485d7a74c11488e625d3026dc5.1704869487.git.gitgitgadget@gmail.com>

On 2024.01.10 06:51, Linus Arver via GitGitGadget wrote:
> From: Linus Arver <linusa@google.com>
> 
> Expose more functions in the trailer.h API, in preparation for moving
> out
> 
>     parse_trailers_from_command_line_args()
> 
> to interpret-trailer.c, because the trailer API should not be concerned
> with command line arguments (as it has nothing to do with trailers
> themselves). The interpret-trailers builtin is the only user of the
> above function.
> 
> Signed-off-by: Linus Arver <linusa@google.com>
> ---
>  trailer.c | 66 +++++++++++++++++++++++++++----------------------------
>  trailer.h | 10 +++++++++
>  2 files changed, 42 insertions(+), 34 deletions(-)
> 
> diff --git a/trailer.c b/trailer.c
> index 360e76376b8..e2d541372a3 100644
> --- a/trailer.c
> +++ b/trailer.c
> @@ -33,7 +33,7 @@ struct trailer_block {
>  	size_t trailer_nr;
>  };
>  
> -struct conf_info {
> +struct trailer_conf {

Can you also add a note about this conf_info -> trailer_conf rename? I
agree that it's an improvement but I think it should be mentioned in the
commit message.

^ permalink raw reply

* Re: [PATCH 00/10] Enrich Trailer API
From: Josh Steadmon @ 2024-01-25 23:54 UTC (permalink / raw)
  To: Linus Arver via GitGitGadget
  Cc: git, Emily Shaffer, Junio C Hamano, Christian Couder, Linus Arver
In-Reply-To: <pull.1632.git.1704869487.gitgitgadget@gmail.com>

On 2024.01.10 06:51, Linus Arver via GitGitGadget wrote:
> This patch series is the first 10 patches of a much larger series I've been
> working. The main goal of this series is to enrich the API in trailer.h. The
> larger series brings a number of additional code simplifications and
> cleanups (exposing and fixing some bugs along the way), and builds on top of
> this series. The goal of the larger series is to make the trailer interface
> ready for unit testing. By "trailer API" I mean those functions exposed in
> trailer.h.

I agree with Junio's points, and I added a small nitpick about patch 8's
commit message. But apart from that, this all looks good to me and I'm
interested in seeing the followup series as well. Thanks!

Reviewed-by: Josh Steadmon <steadmon@google.com>

^ permalink raw reply

* Re: [PATCH 08/10] trailer: prepare to move parse_trailers_from_command_line_args() to builtin
From: Linus Arver @ 2024-01-26  0:14 UTC (permalink / raw)
  To: Josh Steadmon, Linus Arver via GitGitGadget
  Cc: git, Emily Shaffer, Junio C Hamano, Christian Couder
In-Reply-To: <ZbLxG8zVZAnrCQea@google.com>

Josh Steadmon <steadmon@google.com> writes:

> On 2024.01.10 06:51, Linus Arver via GitGitGadget wrote:
>> diff --git a/trailer.c b/trailer.c
>> index 360e76376b8..e2d541372a3 100644
>> --- a/trailer.c
>> +++ b/trailer.c
>> @@ -33,7 +33,7 @@ struct trailer_block {
>>  	size_t trailer_nr;
>>  };
>>  
>> -struct conf_info {
>> +struct trailer_conf {
>
> Can you also add a note about this conf_info -> trailer_conf rename? I
> agree that it's an improvement but I think it should be mentioned in the
> commit message.

Will do.

^ permalink raw reply

* Re: [PATCH v3 0/5] completion: improvements for git-bisect
From: Britton Kerin @ 2024-01-26  0:23 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, Junio C Hamano
In-Reply-To: <ZaofJhHsFjRxx7a3@tanuki>

On Thu, Jan 18, 2024 at 10:05 PM Patrick Steinhardt <ps@pks.im> wrote:
>
> On Thu, Jan 18, 2024 at 11:43:18AM -0900, Britton Leo Kerin wrote:
> > CC to add: CC: Junio C Hamano <gitster@pobox.com>, Patrick Steinhardt <ps@pks.im>
> >
> > Relative to v2 this only changes a wrong misleading commit message.
> >
> > Bring completion for bisect up to date with current features.
>
> Thanks for your patches! I've got a few comments to bring them more in
> line with how we're doing things in the Git project, but I agree with
> the overall direction that they are going into.
>
> It might be a good idea to also add a few tests in t9902-completion.sh
> to ensure that the newly introduced completions work as expected.

Thanks for the review, other than the one philosophical issue about how
functions should be introduced/moved I agree with everything you said and
will rework the series accordingly.

Britton

^ permalink raw reply

* Re: What's cooking in git.git (Jan 2024, #07; Tue, 23)
From: Junio C Hamano @ 2024-01-26  0:26 UTC (permalink / raw)
  To: git
In-Reply-To: <xmqqfryntw4x.fsf@gitster.g>

The next issue (#08) will appear tomorrow, but here is the changes
of the draft for it, relative to #07, after today's integration.

-What's cooking in git.git (Jan 2024, #07; Tue, 23)
+What's cooking in git.git (Jan 2024, #08; Thu, 25)

--------------------------------------------------
Born topics

[New Topics]

 * jc/coc-whitespace-fix (2024-01-23) 1 commit
  - CoC: whitespace fix
 
  Docfix.
 
  Will merge to 'next'.
  source: <xmqqmssvnb8d.fsf_-_@gitster.g>

 * jc/ls-files-doc-update (2024-01-25) 1 commit
  - ls-files: avoid the verb "deprecate" for individual options
 
  The documentation for the --exclude-per-directory option marked it
  as deprecated, which confused readers into thinking there may be a
  plan to remove it in the future, which was not our intention.
 
  Will merge to 'next'.
  source: <xmqqjznybfp4.fsf@gitster.g>

 * jk/fetch-auto-tag-following-fix (2024-01-24) 1 commit
  - transport-helper: re-examine object dir after fetching
 
  Fetching via protocol v0 over Smart HTTP transport sometimes failed
  to correctly auto-follow tags.
 
  Will merge to 'next'.
  source: <20240124010056.GA2603087@coredump.intra.peff.net>

 * ps/reftable-compacted-tables-permission-fix (2024-01-24) 1 commit
  - reftable/stack: adjust permissions of compacted tables
 
  Reftable bugfix.
 
  Will merge to 'next'.
  source: <b2fb6f5ad0c558527341bd8040544d6b0ae5d8a2.1706100744.git.ps@pks.im>

 * jc/index-pack-fsck-levels (2024-01-25) 2 commits
  - index-pack: --fsck-objects to take an optional argument for fsck msgs
  - index-pack: test and document --strict=<msg>
 
  The "--fsck-objects" option of "git index-pack" now can take the
  optional parameter to tweak severity of different fsck errors.
 
  Expecting a reroll.
  cf. <xmqq8r4dt4k5.fsf@gitster.g>
  source: <pull.1658.git.git.1706215884.gitgitgadget@gmail.com>

 * zf/subtree-split-fix (2024-01-25) 1 commit
  - subtree: fix split processing with multiple subtrees present
 
  "git subtree" (in contrib/) update.
 
  Will merge to 'next'.
  source: <pull.1587.v6.git.1701442494319.gitgitgadget@gmail.com>

--------------------------------------------------
Moved from [New Topics] to [Cooking]

 * al/t2400-depipe (2024-01-20) 1 commit

 * kl/allow-working-in-dot-git-in-non-bare-repository (2024-01-20) 1 commit
- - setup: allow cwd=.git w/ bareRepository=explicit
+  (merged to 'next' on 2024-01-24 at e77b796e11)
+ + setup: allow cwd=.git w/ bareRepository=explicit
 
  Loosen "disable repository discovery of a bare repository" check,
  triggered by setting safe.bareRepository configuration variable to
  'explicit', to exclude the ".git/" directory inside a non-bare
  repository from the check.
 
- Will merge to 'next'.
+ Will merge to 'master'.
  source: <pull.1645.git.1705709303098.gitgitgadget@gmail.com>

 * rs/parse-options-with-keep-unknown-abbrev-fix (2024-01-22) 2 commits

 * pb/ci-github-skip-logs-for-broken-tests (2024-01-22) 1 commit

 * pb/complete-log-more (2024-01-22) 4 commits
- - completion: complete missing 'git log' options
- - completion: complete --encoding
- - completion: complete --patch-with-raw
- - completion: complete missing rev-list options
+  (merged to 'next' on 2024-01-24 at 081d2a92fa)
+ + completion: complete missing 'git log' options
+ + completion: complete --encoding
+ + completion: complete --patch-with-raw
+ + completion: complete missing rev-list options
 
  The completion script (in contrib/) learned more options that can
  be used with "git log".
 
- Will merge to 'next'.
+ Will merge to 'master'.
  source: <pull.1650.git.git.1705810071.gitgitgadget@gmail.com>

 * jc/reftable-core-fsync (2024-01-23) 1 commit
- - reftable: honor core.fsync
+  (merged to 'next' on 2024-01-24 at cea12beddb)
+ + reftable: honor core.fsync
 
  The write codepath for the reftable data learned to honor
  core.fsync configuration.
 
- Will merge to 'next'.
+ Will merge to 'master'.
  source: <pull.1654.git.git.1706035870956.gitgitgadget@gmail.com>

--------------------------------------------------
Other topics

[Cooking]

-* ad/custom-merge-placeholder-for-symbolic-pathnames (2024-01-18) 1 commit
+* ad/custom-merge-placeholder-for-symbolic-pathnames (2024-01-24) 1 commit
- - merge-ll: expose revision names to custom drivers
+  (merged to 'next' on 2024-01-24 at d9cf4e227d)
+ + merge-ll: expose revision names to custom drivers
 
  The labels on conflict markers for the common ancestor, our version,
  and the other version are available to custom 3-way merge driver
  via %S, %X, and %Y placeholders.
 
- Expecting a reroll.
- cf. <xmqqcytvei3c.fsf@gitster.g>
- source: <pull.1648.v3.git.git.1705615794307.gitgitgadget@gmail.com>
+ Will merge to 'master'.
+ source: <pull.1648.v4.git.git.1706126951879.gitgitgadget@gmail.com>

 * jc/reffiles-tests (2024-01-22) 12 commits
- - t5312: move reffiles specific tests to t0601
- - t4202: move reffiles specific tests to t0600
- - t3903: make drop stash test ref backend agnostic
- - t1503: move reffiles specific tests to t0600
- - t1415: move reffiles specific tests to t0601
- - t1410: move reffiles specific tests to t0600
- - t1406: move reffiles specific tests to t0600
- - t1405: move reffiles specific tests to t0601
- - t1404: move reffiles specific tests to t0600
- - t1414: convert test to use Git commands instead of writing refs manually
- - remove REFFILES prerequisite for some tests in t1405 and t2017
- - t3210: move to t0601
+  (merged to 'next' on 2024-01-24 at 0d1aaa6807)
+ + t5312: move reffiles specific tests to t0601
+ + t4202: move reffiles specific tests to t0600
+ + t3903: make drop stash test ref backend agnostic
+ + t1503: move reffiles specific tests to t0600
+ + t1415: move reffiles specific tests to t0601
+ + t1410: move reffiles specific tests to t0600
+ + t1406: move reffiles specific tests to t0600
+ + t1405: move reffiles specific tests to t0601
+ + t1404: move reffiles specific tests to t0600
+ + t1414: convert test to use Git commands instead of writing refs manually
+ + remove REFFILES prerequisite for some tests in t1405 and t2017
+ + t3210: move to t0601
 
  Tests on ref API are moved around to prepare for reftable.
 
- Will merge to 'next'.
+ Will merge to 'master'.
  cf. <Za5TW-q4cKS8pNNc@tanuki>
  source: <pull.1647.v2.git.git.1705695540.gitgitgadget@gmail.com>

-* kn/for-all-refs (2024-01-19) 5 commits
+* kn/for-all-refs (2024-01-24) 4 commits
  - for-each-ref: avoid filtering on empty pattern
  - refs: introduce `refs_for_each_all_refs()`
  - refs: extract out `loose_fill_ref_dir_regular_file()`
- - refs: make `is_pseudoref_syntax()` stricter
- - refs: expose `is_pseudoref_syntax()`
+ - refs: introduce `is_pseudoref()` and `is_headref()`
 
  "git for-each-ref" filters its output with prefixes given from the
  command line, but it did not honor an empty string to mean "pass
  everything", which has been corrected.
 
- Expecting a reroll.
- cf. <CAOLa=ZQOcqGBJOSehok4BYGUE8RKtnE9eiJYogeT8E6NWZ25xw@mail.gmail.com>
- cf. <Za-gF_Hp_lXViGWw@tanuki>
- source: <20240119142705.139374-1-karthik.188@gmail.com>
+ Expecting a reroll?
+ cf. <xmqqfrymeega.fsf@gitster.g>
+ source: <20240124152726.124873-1-karthik.188@gmail.com>

 * la/trailer-api (2024-01-12) 10 commits
  - trailer: delete obsolete argument handling code from API
  - trailer: move arg handling to interpret-trailers.c
  - trailer: prepare to move parse_trailers_from_command_line_args() to builtin
  - trailer: spread usage of "trailer_block" language
  - trailer: make trailer_info struct private
  - sequencer: use the trailer iterator
  - trailer: delete obsolete formatting functions
  - trailer: unify trailer formatting machinery
  - trailer: include "trailer" term in API functions
  - trailer: move process_trailers() to interpret-trailers.c
 
  Code clean-up.
 
- Needs review.
+ Expecting a (hopefully final and small) reroll.
+ cf. <owlysf2l2bnj.fsf@fine.c.googlers.com>
  source: <pull.1632.git.1704869487.gitgitgadget@gmail.com>

-* ps/tests-with-ref-files-backend (2024-01-12) 6 commits
+* ps/tests-with-ref-files-backend (2024-01-24) 6 commits
  - t: mark tests regarding git-pack-refs(1) to be backend specific
  - t5526: break test submodule differently
  - t1419: mark test suite as files-backend specific
  - t1302: make tests more robust with new extensions
  - t1301: mark test for `core.sharedRepository` as reffiles specific
  - t1300: make tests more robust with non-default ref backends
 
  Prepare existing tests on refs to work better with non-default
  backends.
 
  Needs review.
- source: <cover.1704877233.git.ps@pks.im>
+ source: <cover.1706085756.git.ps@pks.im>

 * sd/negotiate-trace-fix (2024-01-03) 1 commit
- - push: region_leave trace for negotiate_using_fetch
+  (merged to 'next' on 2024-01-24 at 6305853ab2)
+ + push: region_leave trace for negotiate_using_fetch
 
  Tracing fix.
 
- Will merge to 'next'?
+ Will merge to 'master'.
  source: <20240103224054.1940209-1-delmerico@google.com>

 * tb/path-filter-fix (2024-01-16) 17 commits
  - bloom: introduce `deinit_bloom_filters()`
  - commit-graph: reuse existing Bloom filters where possible
  - object.h: fix mis-aligned flag bits table
  - commit-graph: drop unnecessary `graph_read_bloom_data_context`
  - commit-graph.c: unconditionally load Bloom filters
  - bloom: prepare to discard incompatible Bloom filters
  - bloom: annotate filters with hash version
  - commit-graph: new Bloom filter version that fixes murmur3
  - repo-settings: introduce commitgraph.changedPathsVersion
  - t4216: test changed path filters with high bit paths
  - t/helper/test-read-graph: implement `bloom-filters` mode
  - bloom.h: make `load_bloom_filter_from_graph()` public
  - t/helper/test-read-graph.c: extract `dump_graph_info()`
  - gitformat-commit-graph: describe version 2 of BDAT
  - commit-graph: ensure Bloom filters are read with consistent settings
  - revision.c: consult Bloom filters for root commits
  - t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
 
  The Bloom filter used for path limited history traversal was broken
  on systems whose "char" is unsigned; update the implementation and
  bump the format version to 2.
 
- Will merge to 'next'?
+ Will merge to 'next'.
  source: <cover.1705442923.git.me@ttaylorr.com>

^ permalink raw reply

* Re: what should "git clean -n -f [-d] [-x] <pattern>" do?
From: Junio C Hamano @ 2024-01-26  7:44 UTC (permalink / raw)
  To: Sergey Organov; +Cc: Elijah Newren, git
In-Reply-To: <87a5ot6tos.fsf@osv.gnss.ru>

Sergey Organov <sorganov@gmail.com> writes:

> Sergey Organov <sorganov@gmail.com> writes:
>
>> Junio C Hamano <gitster@pobox.com> writes:
>> ..
>>> ...  If the original semantics
>>> were "you must force with -f to do anything useful", instead of "you
>>> must choose either forcing with -f or not doing with -n", then it
>>> would have led to the above behaviour.
>> ...
>> If we agree on the behavior above for sane "dry run"...

Not so fast.  I said "if the original semantics were ... then it
would have led to the above behaviour".  As the original semantics
were not, that conclusion does not stand.

The "-n" option here were not added primarily as a dry-run option,
and haven't been treated as such forever.  As can be seen by the
"you must give either -f or -n option, and it is an error to give
neither" rule, from the end-user's point of view, it is a way to say
"between do-it (-f) and do-not-do-it (-n), I choose the latter for
this invocation".  And in that context, an attempt to make "-f -f"
mean a stronger form of forcing than "-f" was a mistake, because it
makes your "I want to see what happens if I tried that opration that
requires the stronger force" request impossible.

And there are two equally valid ways to deal with this misfeature.

One is to admit that "-f -f" was a mistake (which I already said),
and a natural consequence of that admission is to introduce a more
specific "in addition to what you do usually, this riskier operation
is allowed" option (e.g., --nested-repo).  This leads to a design
that matches real world usage better, even if we did not have the
"how to ask dry-run?" issue, because in the real world, when there
are multiple "risky" things you may have to explicitly ask to
enable, these things do not necessarily form a nice linear
"riskiness levels" that you can express your risk tolerance with the
number of "-f" options.  When you need to add special protection for
a new case other than "nested repo", for example, the "riskiness
levels" design may need to place it above the "nested repo" level of
riskiness and may require the user to give three "-f" options, but
that would make it impossible to protect against nuking of nested
repos while allowing only that newly added case.  By having more
specific "this particular risky operation is allowed", "-f" can
still be "between do-it and do-not-do-it, I choose the former", and
the "--nested-repo" (and other options to allow specific risky
operations we add in the future) would not have to have funny
interactions with "-n".

The other valid way is to treat the use of the "riskiness levels" to
specify what is forced still as a good idea.  If one comes from that
position, the resulting UI would be consistent with what you have
been advocating for.  One or more "-f" will specify what kind of
risky stuff are allowed, and "-n" will say whether the operation
gets carried out or merely shown what would happen if "-n" weren't
there.

It is just that I think "riskiness levels" I did in a0f4afbe (clean:
require double -f options to nuke nested git repository and work
tree, 2009-06-30) was an utter mistake, and that is why I feel very
hesitant to agree with the design that still promotes it.

^ permalink raw reply

* Re: [PATCH v2] reftable/stack: adjust permissions of compacted tables
From: Patrick Steinhardt @ 2024-01-26 10:05 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git
In-Reply-To: <CAGAWz+7hQGMbnc8c9iCzyWQgS=wkaEXXbC7-Biqw2i7oc6rneQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2943 bytes --]

On Thu, Jan 25, 2024 at 10:02:15AM -0600, Justin Tobler wrote:
> On Wed, Jan 24, 2024 at 7:21 AM Patrick Steinhardt <ps@pks.im> wrote:
> >
> > diff --git a/reftable/stack_test.c b/reftable/stack_test.c
> > index 289e902146..2e7d1768b7 100644
> > --- a/reftable/stack_test.c
> > +++ b/reftable/stack_test.c
> > @@ -443,15 +443,16 @@ static void test_reftable_stack_add(void)
> >         int err = 0;
> >         struct reftable_write_options cfg = {
> >                 .exact_log_message = 1,
> > +               .default_permissions = 0660,
> >         };
> >         struct reftable_stack *st = NULL;
> >         char *dir = get_tmp_dir(__LINE__);
> > -
> >         struct reftable_ref_record refs[2] = { { NULL } };
> >         struct reftable_log_record logs[2] = { { NULL } };
> > +       struct strbuf scratch = STRBUF_INIT;
> 
> The variable name `scratch` seems rather vague to me as opposed to something
> like `path`. After a quick search though, `scratch` appears to be a fairly
> common name used in similar scenarios. So probably not a big deal, but
> something
> I thought I'd mention.

Yeah. I basically copied the below checks from another test where we
already had the permission checks, and also adopted the name of the
`scratch` variable. I agree though that `path` would be a better name,
so let me change it.

> > +       struct stat stat_result;
> >         int N = ARRAY_SIZE(refs);
> >
> > -
> >         err = reftable_new_stack(&st, dir, cfg);
> >         EXPECT_ERR(err);
> >         st->disable_auto_compact = 1;
> > @@ -509,12 +510,32 @@ static void test_reftable_stack_add(void)
> >                 reftable_log_record_release(&dest);
> >         }
> >
> > +#ifndef GIT_WINDOWS_NATIVE
> > +       strbuf_addstr(&scratch, dir);
> > +       strbuf_addstr(&scratch, "/tables.list");
> > +       err = stat(scratch.buf, &stat_result);
> > +       EXPECT(!err);
> > +       EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions);
> > +
> > +       strbuf_reset(&scratch);
> > +       strbuf_addstr(&scratch, dir);
> > +       strbuf_addstr(&scratch, "/");
> > +       /* do not try at home; not an external API for reftable. */
> > +       strbuf_addstr(&scratch, st->readers[0]->name);
> > +       err = stat(scratch.buf, &stat_result);
> > +       EXPECT(!err);
> > +       EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions);
> > +#else
> > +       (void) stat_result;
> > +#endif
> 
> Why do we ignore Windows here? And would it warrant explaining in the commit
> message?

Because Windows has a different acccess control model for files and
doesn't natively use POSIX permissions. I'm not a 100% sure whether we
do emulate the permission bits or not, but I cannot test on Windows and
the other test where this was ripped out of also makes the code
conditional.

Will explain in the commit message.

Thanks!

Patrick

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* [PATCH v3] reftable/stack: adjust permissions of compacted tables
From: Patrick Steinhardt @ 2024-01-26 10:09 UTC (permalink / raw)
  To: git; +Cc: Karthik Nayak, Justin Tobler
In-Reply-To: <cover.1706099090.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 3129 bytes --]

When creating a new compacted table from a range of preexisting ones we
don't set the default permissions on the resulting table when specified
by the user. This has the effect that the "core.sharedRepository" config
will not be honored correctly.

Fix this bug and add a test to catch this issue. Note that we only test
on non-Windows platforms because Windows does not use POSIX permissions
natively.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---

Changes compared to v2:

  - Extended the commit message to say why we don't test on Windows
    systems.

  - Renamed the `scratch` variable to `path`.

Thanks!

Patrick

 reftable/stack.c      |  6 ++++++
 reftable/stack_test.c | 25 +++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/reftable/stack.c b/reftable/stack.c
index 7ffeb3ee10..38e784a8ab 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -731,6 +731,12 @@ static int stack_compact_locked(struct reftable_stack *st, int first, int last,
 	strbuf_addstr(temp_tab, ".temp.XXXXXX");
 
 	tab_fd = mkstemp(temp_tab->buf);
+	if (st->config.default_permissions &&
+	    chmod(temp_tab->buf, st->config.default_permissions) < 0) {
+		err = REFTABLE_IO_ERROR;
+		goto done;
+	}
+
 	wr = reftable_new_writer(reftable_fd_write, &tab_fd, &st->config);
 
 	err = stack_write_compact(st, wr, first, last, config);
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 289e902146..5089392f7b 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -443,15 +443,16 @@ static void test_reftable_stack_add(void)
 	int err = 0;
 	struct reftable_write_options cfg = {
 		.exact_log_message = 1,
+		.default_permissions = 0660,
 	};
 	struct reftable_stack *st = NULL;
 	char *dir = get_tmp_dir(__LINE__);
-
 	struct reftable_ref_record refs[2] = { { NULL } };
 	struct reftable_log_record logs[2] = { { NULL } };
+	struct strbuf path = STRBUF_INIT;
+	struct stat stat_result;
 	int N = ARRAY_SIZE(refs);
 
-
 	err = reftable_new_stack(&st, dir, cfg);
 	EXPECT_ERR(err);
 	st->disable_auto_compact = 1;
@@ -509,12 +510,32 @@ static void test_reftable_stack_add(void)
 		reftable_log_record_release(&dest);
 	}
 
+#ifndef GIT_WINDOWS_NATIVE
+	strbuf_addstr(&path, dir);
+	strbuf_addstr(&path, "/tables.list");
+	err = stat(path.buf, &stat_result);
+	EXPECT(!err);
+	EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions);
+
+	strbuf_reset(&path);
+	strbuf_addstr(&path, dir);
+	strbuf_addstr(&path, "/");
+	/* do not try at home; not an external API for reftable. */
+	strbuf_addstr(&path, st->readers[0]->name);
+	err = stat(path.buf, &stat_result);
+	EXPECT(!err);
+	EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions);
+#else
+	(void) stat_result;
+#endif
+
 	/* cleanup */
 	reftable_stack_destroy(st);
 	for (i = 0; i < N; i++) {
 		reftable_ref_record_release(&refs[i]);
 		reftable_log_record_release(&logs[i]);
 	}
+	strbuf_release(&path);
 	clear_dir(dir);
 }
 

base-commit: e02ecfcc534e2021aae29077a958dd11c3897e4c
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH 0/5] reftable: fix writing multi-level indices
From: Patrick Steinhardt @ 2024-01-26 10:31 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 1285 bytes --]

Hi,

yesterday I noticed that writing reftables with more than 250,000 refs
led to the last block of refs in the table not being seekable anymore.
As it turns out, 250k refs was the boundary at which we start to write
multi-level indices for refs.

The root cause is a bug with how we write multi-level indices: we did
not flush out the last block of the previous index level, and thus we
didn't index it in the higher-level index. This patch series fixes this
issue and also polishes surrounding code a bit.

The topic depends on Junio's jc/reftable-core-fsync at 1df18a1c9a
(reftable: honor core.fsync, 2024-01-23) due to a semantic merge
conflict in the newly added test.

Patrick

Patrick Steinhardt (5):
  reftable/reader: be more careful about errors in indexed seeks
  reftable/writer: use correct type to iterate through index entries
  reftable/writer: simplify writing index records
  reftable/writer: fix writing multi-level indices
  reftable: document reading and writing indices

 reftable/reader.c         | 30 ++++++++++++++++++
 reftable/readwrite_test.c | 56 ++++++++++++++++++++++++++++++++++
 reftable/writer.c         | 64 ++++++++++++++++++++++-----------------
 3 files changed, 123 insertions(+), 27 deletions(-)

-- 
2.43.GIT

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* [PATCH 1/5] reftable/reader: be more careful about errors in indexed seeks
From: Patrick Steinhardt @ 2024-01-26 10:31 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1706263918.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 1003 bytes --]

When doing an indexed seek we first need to do a linear seek in order to
find the index block for our wanted key. We do not check the returned
error of the linear seek though. This is likely not an issue because the
next call to `table_iter_next()` would return error, too. But it very
much is a code smell when an error variable is being assigned to without
actually checking it.

Safeguard the code by checking for errors.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/reftable/reader.c b/reftable/reader.c
index 64dc366fb1..278f727a3d 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -509,6 +509,9 @@ static int reader_seek_indexed(struct reftable_reader *r,
 		goto done;

 	err = reader_seek_linear(&index_iter, &want_index);
+	if (err < 0)
+		goto done;
+
 	while (1) {
 		err = table_iter_next(&index_iter, &index_result);
 		table_iter_block_done(&index_iter);
-- 
2.43.GIT

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH 2/5] reftable/writer: use correct type to iterate through index entries
From: Patrick Steinhardt @ 2024-01-26 10:31 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1706263918.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 1718 bytes --]

The reftable writer is tracking the number of blocks it has to index via
the `index_len` variable. But while this variable is of type `size_t`,
some sites use an `int` to loop through the index entries.

Convert the code to consistently use `size_t`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/writer.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/reftable/writer.c b/reftable/writer.c
index 92935baa70..5a0b87b406 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -379,20 +379,21 @@ int reftable_writer_add_logs(struct reftable_writer *w,
 
 static int writer_finish_section(struct reftable_writer *w)
 {
+	struct reftable_block_stats *bstats = NULL;
 	uint8_t typ = block_writer_type(w->block_writer);
 	uint64_t index_start = 0;
 	int max_level = 0;
-	int threshold = w->opts.unpadded ? 1 : 3;
+	size_t threshold = w->opts.unpadded ? 1 : 3;
 	int before_blocks = w->stats.idx_stats.blocks;
-	int err = writer_flush_block(w);
-	int i = 0;
-	struct reftable_block_stats *bstats = NULL;
+	int err;
+
+	err = writer_flush_block(w);
 	if (err < 0)
 		return err;
 
 	while (w->index_len > threshold) {
 		struct reftable_index_record *idx = NULL;
-		int idx_len = 0;
+		size_t i, idx_len;
 
 		max_level++;
 		index_start = w->next;
@@ -630,11 +631,8 @@ int reftable_writer_close(struct reftable_writer *w)
 
 static void writer_clear_index(struct reftable_writer *w)
 {
-	int i = 0;
-	for (i = 0; i < w->index_len; i++) {
+	for (size_t i = 0; i < w->index_len; i++)
 		strbuf_release(&w->index[i].last_key);
-	}
-
 	FREE_AND_NULL(w->index);
 	w->index_len = 0;
 	w->index_cap = 0;
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH 3/5] reftable/writer: simplify writing index records
From: Patrick Steinhardt @ 2024-01-26 10:31 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1706263918.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 1556 bytes --]

When finishing the current section we may end up writing index records
for the section to the table. The logic to do so essentially copies what
we already have in `writer_add_record()`, making this more complicated
than it really has to be.

Simplify the code by using `writer_add_record()` instead.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/writer.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/reftable/writer.c b/reftable/writer.c
index 5a0b87b406..2525f236b9 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -405,6 +405,7 @@ static int writer_finish_section(struct reftable_writer *w)
 		w->index = NULL;
 		w->index_len = 0;
 		w->index_cap = 0;
+
 		for (i = 0; i < idx_len; i++) {
 			struct reftable_record rec = {
 				.type = BLOCK_TYPE_INDEX,
@@ -412,26 +413,14 @@ static int writer_finish_section(struct reftable_writer *w)
 					.idx = idx[i],
 				},
 			};
-			if (block_writer_add(w->block_writer, &rec) == 0) {
-				continue;
-			}
 
-			err = writer_flush_block(w);
+			err = writer_add_record(w, &rec);
 			if (err < 0)
 				return err;
-
-			writer_reinit_block_writer(w, BLOCK_TYPE_INDEX);
-
-			err = block_writer_add(w->block_writer, &rec);
-			if (err != 0) {
-				/* write into fresh block should always succeed
-				 */
-				abort();
-			}
 		}
-		for (i = 0; i < idx_len; i++) {
+
+		for (i = 0; i < idx_len; i++)
 			strbuf_release(&idx[i].last_key);
-		}
 		reftable_free(idx);
 	}
 
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH 4/5] reftable/writer: fix writing multi-level indices
From: Patrick Steinhardt @ 2024-01-26 10:31 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1706263918.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 4607 bytes --]

When finishing a section we will potentially write an index that makes
it more efficient to look up relevant blocks. The index records written
will encode, for each block of the indexed section, what the offset of
that block is as well as the last key of that block. Thus, the reader
would iterate through the index records to find the first key larger or
equal to the wanted key and then use the encoded offset to look up the
desired block.

When there are a lot of blocks to index though we may end up writing
multiple index blocks, too. To not require a linear search across all
index blocks we instead end up writing a multi-level index. Instead of
referring to the block we are after, an index record may point to
another index block. The reader will then access the highest-level index
and follow down the chain of index blocks until it hits the sought-after
block.

It has been observed though that it is impossible to seek ref records of
the last ref block when using a multi-level index. While the multi-level
index exists and looks fine for most of the part, the highest-level
index was missing an index record pointing to the last block of the next
index. Thus, every additional level made more refs become unseekable at
the end of the ref section.

The root cause is that we are not flushing the last block of the current
level once done writing the level. Consequently, it wasn't recorded in
the blocks that need to be indexed by the next-higher level and thus we
forgot about it.

Fix this bug by flushing blocks after we have written all index records.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/readwrite_test.c | 56 +++++++++++++++++++++++++++++++++++++++
 reftable/writer.c         |  8 +++---
 2 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 6b99daeaf2..853923397e 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -866,6 +866,61 @@ static void test_write_multiple_indices(void)
 	strbuf_release(&buf);
 }

+static void test_write_multi_level_index(void)
+{
+	struct reftable_write_options opts = {
+		.block_size = 100,
+	};
+	struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT;
+	struct reftable_block_source source = { 0 };
+	struct reftable_iterator it = { 0 };
+	const struct reftable_stats *stats;
+	struct reftable_writer *writer;
+	struct reftable_reader *reader;
+	int err;
+
+	writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts);
+	reftable_writer_set_limits(writer, 1, 1);
+	for (size_t i = 0; i < 200; i++) {
+		struct reftable_ref_record ref = {
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = {i},
+		};
+
+		strbuf_reset(&buf);
+		strbuf_addf(&buf, "refs/heads/%03" PRIuMAX, (uintmax_t)i);
+		ref.refname = buf.buf,
+
+		err = reftable_writer_add_ref(writer, &ref);
+		EXPECT_ERR(err);
+	}
+	reftable_writer_close(writer);
+
+	/*
+	 * The written refs should be sufficiently large to result in a
+	 * multi-level index.
+	 */
+	stats = reftable_writer_stats(writer);
+	EXPECT(stats->ref_stats.max_index_level == 2);
+
+	block_source_from_strbuf(&source, &writer_buf);
+	err = reftable_new_reader(&reader, &source, "filename");
+	EXPECT_ERR(err);
+
+	/*
+	 * Seeking the last ref should work as expected.
+	 */
+	err = reftable_reader_seek_ref(reader, &it, "refs/heads/199");
+	EXPECT_ERR(err);
+
+	reftable_iterator_destroy(&it);
+	reftable_writer_free(writer);
+	reftable_reader_free(reader);
+	strbuf_release(&writer_buf);
+	strbuf_release(&buf);
+}
+
 static void test_corrupt_table_empty(void)
 {
 	struct strbuf buf = STRBUF_INIT;
@@ -916,5 +971,6 @@ int readwrite_test_main(int argc, const char *argv[])
 	RUN_TEST(test_write_object_id_length);
 	RUN_TEST(test_write_object_id_min_length);
 	RUN_TEST(test_write_multiple_indices);
+	RUN_TEST(test_write_multi_level_index);
 	return 0;
 }
diff --git a/reftable/writer.c b/reftable/writer.c
index 2525f236b9..24fce5630a 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -419,15 +419,15 @@ static int writer_finish_section(struct reftable_writer *w)
 				return err;
 		}

+		err = writer_flush_block(w);
+		if (err < 0)
+			return err;
+
 		for (i = 0; i < idx_len; i++)
 			strbuf_release(&idx[i].last_key);
 		reftable_free(idx);
 	}

-	err = writer_flush_block(w);
-	if (err < 0)
-		return err;
-
 	writer_clear_index(w);

 	bstats = writer_reftable_block_stats(w, typ);
-- 
2.43.GIT

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH 5/5] reftable: document reading and writing indices
From: Patrick Steinhardt @ 2024-01-26 10:31 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1706263918.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 3863 bytes --]

The way the index gets written and read is not trivial at all and
requires the reader to piece together a bunch of parts to figure out how
it works. Add some documentation to hopefully make this easier to
understand for the next reader.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reader.c | 27 +++++++++++++++++++++++++++
 reftable/writer.c | 23 +++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/reftable/reader.c b/reftable/reader.c
index 278f727a3d..6011d0aa04 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -508,11 +508,38 @@ static int reader_seek_indexed(struct reftable_reader *r,
 	if (err < 0)
 		goto done;
 
+	/*
+	 * The index may consist of multiple levels, where each level may have
+	 * multiple index blocks. We start by doing a linear search in the
+	 * highest layer that identifies the relevant index block as well as
+	 * the record inside that block that corresponds to our wanted key.
+	 */
 	err = reader_seek_linear(&index_iter, &want_index);
 	if (err < 0)
 		goto done;
 
+	/*
+	 * Traverse down the levels until we find a non-index entry.
+	 */
 	while (1) {
+		/*
+		 * In case we seek a record that does not exist the index iter
+		 * will tell us that the iterator is over. This works because
+		 * the last index entry of the current level will contain the
+		 * last key it knows about. So in case our seeked key is larger
+		 * than the last indexed key we know that it won't exist.
+		 *
+		 * There is one subtlety in the layout of the index section
+		 * that makes this work as expected: the highest-level index is
+		 * at end of the section and will point backwards and thus we
+		 * start reading from the end of the index section, not the
+		 * beginning.
+		 *
+		 * If that wasn't the case and the order was reversed then the
+		 * linear seek would seek into the lower levels and traverse
+		 * all levels of the index only to find out that the key does
+		 * not exist.
+		 */
 		err = table_iter_next(&index_iter, &index_result);
 		table_iter_block_done(&index_iter);
 		if (err != 0)
diff --git a/reftable/writer.c b/reftable/writer.c
index 24fce5630a..6028789dee 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -391,6 +391,24 @@ static int writer_finish_section(struct reftable_writer *w)
 	if (err < 0)
 		return err;
 
+	/*
+	 * When the section we are about to index has a lot of blocks then the
+	 * index itself may span across multiple blocks, as well. This would
+	 * require a linear scan over index blocks only to find the desired
+	 * indexed block, which is inefficient. Instead, we write a multi-level
+	 * index where index records of level N+1 will refer to index blocks of
+	 * level N. This isn't constant time, either, but at least logarithmic.
+	 *
+	 * This loop handles writing this multi-level index. Note that we write
+	 * the lowest-level index pointing to the indexed blocks first. We then
+	 * continue writing additional index levels until the current level has
+	 * less blocks than the threshold so that the highest level will be at
+	 * the end of the index section.
+	 *
+	 * Readers are thus required to start reading the index section from
+	 * its end, which is why we set `index_start` to the beginning of the
+	 * last index section.
+	 */
 	while (w->index_len > threshold) {
 		struct reftable_index_record *idx = NULL;
 		size_t i, idx_len;
@@ -428,6 +446,11 @@ static int writer_finish_section(struct reftable_writer *w)
 		reftable_free(idx);
 	}
 
+	/*
+	 * The index may still contain a number of index blocks lower than the
+	 * threshold. Clear it so that these entries don't leak into the next
+	 * index section.
+	 */
 	writer_clear_index(w);
 
 	bstats = writer_reftable_block_stats(w, typ);
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* Re: what should "git clean -n -f [-d] [-x] <pattern>" do?
From: Sergey Organov @ 2024-01-26 12:09 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Elijah Newren, git
In-Reply-To: <xmqqzfwspmh0.fsf@gitster.g>

Junio C Hamano <gitster@pobox.com> writes:

> Sergey Organov <sorganov@gmail.com> writes:
>
>> Sergey Organov <sorganov@gmail.com> writes:
>>
>>> Junio C Hamano <gitster@pobox.com> writes:
>>> ..
>>>> ...  If the original semantics
>>>> were "you must force with -f to do anything useful", instead of "you
>>>> must choose either forcing with -f or not doing with -n", then it
>>>> would have led to the above behaviour.
>>> ...
>>> If we agree on the behavior above for sane "dry run"...
>
> Not so fast.  I said "if the original semantics were ... then it
> would have led to the above behaviour".  As the original semantics
> were not, that conclusion does not stand.

OK, fine, then my point is that the original semantics if flawed.

>
> The "-n" option here were not added primarily as a dry-run option,
> and haven't been treated as such forever.  As can be seen by the
> "you must give either -f or -n option, and it is an error to give
> neither" rule, from the end-user's point of view, it is a way to say
> "between do-it (-f) and do-not-do-it (-n), I choose the latter for
> this invocation".

Yep, and in my opinion this is even more a mistake than "-f -f".

> And in that context, an attempt to make "-f -f"
> mean a stronger form of forcing than "-f" was a mistake, because it
> makes your "I want to see what happens if I tried that opration that
> requires the stronger force" request impossible.

I believe this just emphasizes the original mistake of "-n" design
meaning something else than simple "dry run".

>
> And there are two equally valid ways to deal with this misfeature.

I rather see two almost independent misfeatures here, so I believe both
are to be addressed.

>
> One is to admit that "-f -f" was a mistake (which I already said),
> and a natural consequence of that admission is to introduce a more
> specific "in addition to what you do usually, this riskier operation
> is allowed" option (e.g., --nested-repo).

This addresses one of the two deficiencies I see, yes.

> This leads to a design that matches real world usage better, even if
> we did not have the "how to ask dry-run?" issue, because in the real
> world, when there are multiple "risky" things you may have to
> explicitly ask to enable, these things do not necessarily form a nice
> linear "riskiness levels" that you can express your risk tolerance
> with the number of "-f" options. When you need to add special
> protection for a new case other than "nested repo", for example, the
> "riskiness levels" design may need to place it above the "nested repo"
> level of riskiness and may require the user to give three "-f"
> options, but that would make it impossible to protect against nuking
> of nested repos while allowing only that newly added case. By having
> more specific "this particular risky operation is allowed", "-f" can
> still be "between do-it and do-not-do-it, I choose the former",

Yep, makes sense.

> and  the "--nested-repo" (and other options to allow specific risky
> operations we add in the future) would not have to have funny
> interactions with "-n".

Yep, but it still leaves "-n" being defective, as it for whatever reason
surprisingly clashes with "-f". I believe it shouldn't.

> The other valid way is to treat the use of the "riskiness levels" to
> specify what is forced still as a good idea.  If one comes from that
> position, the resulting UI would be consistent with what you have
> been advocating for.  One or more "-f" will specify what kind of
> risky stuff are allowed, and "-n" will say whether the operation
> gets carried out or merely shown what would happen if "-n" weren't
> there.

I'm not arguing in favor of "-f -f". My point is that even if you fix
"-f -f", "-n" deficiency will still cry for fixing.

>
> It is just that I think "riskiness levels" I did in a0f4afbe (clean:
> require double -f options to nuke nested git repository and work
> tree, 2009-06-30) was an utter mistake, and that is why I feel very
> hesitant to agree with the design that still promotes it.

Again, I'm not arguing in favor of "-f -f", I'm rather neutral about it.

I'm still arguing in favor of fixing "-n", and I believe a fix is needed
independently from decision about "-f -f".

Thanks,
-- Sergey Organov

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox