git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Eric Sunshine <sunshine@sunshineco.com>,
	Patrick Steinhardt <ps@pks.im>, Victoria Dye <vdye@github.com>,
	Victoria Dye <vdye@github.com>
Subject: [PATCH v2 11/17] mktree: overwrite duplicate entries
Date: Wed, 19 Jun 2024 21:57:59 +0000	[thread overview]
Message-ID: <fb555658057f834d94f232f1d8b380a6304a3671.1718834285.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1746.v2.git.1718834285.gitgitgadget@gmail.com>

From: Victoria Dye <vdye@github.com>

If multiple tree entries with the same name are provided as input to
'mktree', only write the last one to the tree. Entries are considered
duplicates if they have identical names (*not* considering mode); if a blob
and a tree with the same name are provided, only the last one will be
written to the tree. A tree with duplicate entries is invalid (per 'git
fsck'), so that condition should be avoided wherever possible.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 Documentation/git-mktree.txt |  3 ++-
 builtin/mktree.c             | 45 ++++++++++++++++++++++++++++++++----
 t/t1010-mktree.sh            | 36 +++++++++++++++++++++++++++--
 3 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-mktree.txt b/Documentation/git-mktree.txt
index 5f3a6dfe38e..cf1fd82f754 100644
--- a/Documentation/git-mktree.txt
+++ b/Documentation/git-mktree.txt
@@ -54,7 +54,8 @@ cannot be represented in a tree object. The command will fail without
 writing the tree if a higher order stage is specified for any entry.
 
 The order of the tree entries is normalized by `mktree` so pre-sorting the
-input by path is not required.
+input by path is not required. Multiple entries provided with the same path
+are deduplicated, with only the last one specified added to the tree.
 
 GIT
 ---
diff --git a/builtin/mktree.c b/builtin/mktree.c
index 8f0af24b6b1..a91d3a7b028 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -15,6 +15,9 @@
 #include "object-store-ll.h"
 
 struct tree_entry {
+	/* Internal */
+	size_t order;
+
 	unsigned mode;
 	struct object_id oid;
 	int len;
@@ -74,15 +77,49 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
 	ent->len = len;
 	oidcpy(&ent->oid, oid);
 
+	ent->order = arr->nr;
 	tree_entry_array_push(arr, ent);
 }
 
-static int ent_compare(const void *a_, const void *b_)
+static int ent_compare(const void *a_, const void *b_, void *ctx)
 {
+	int cmp;
 	struct tree_entry *a = *(struct tree_entry **)a_;
 	struct tree_entry *b = *(struct tree_entry **)b_;
-	return base_name_compare(a->name, a->len, a->mode,
-				 b->name, b->len, b->mode);
+	int ignore_mode = *((int *)ctx);
+
+	if (ignore_mode)
+		cmp = name_compare(a->name, a->len, b->name, b->len);
+	else
+		cmp = base_name_compare(a->name, a->len, a->mode,
+					b->name, b->len, b->mode);
+	return cmp ? cmp : b->order - a->order;
+}
+
+static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
+{
+	size_t count = arr->nr;
+	struct tree_entry *prev = NULL;
+
+	int ignore_mode = 1;
+	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
+
+	arr->nr = 0;
+	for (size_t i = 0; i < count; i++) {
+		struct tree_entry *curr = arr->entries[i];
+		if (prev &&
+		    !name_compare(prev->name, prev->len,
+				  curr->name, curr->len)) {
+			FREE_AND_NULL(curr);
+		} else {
+			arr->entries[arr->nr++] = curr;
+			prev = curr;
+		}
+	}
+
+	/* Sort again to order the entries for tree insertion */
+	ignore_mode = 0;
+	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
 }
 
 static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
@@ -90,7 +127,7 @@ static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
 	struct strbuf buf;
 	size_t size = 0;
 
-	QSORT(arr->entries, arr->nr, ent_compare);
+	sort_and_dedup_tree_entry_array(arr);
 	for (size_t i = 0; i < arr->nr; i++)
 		size += 32 + arr->entries[i]->len;
 
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index 7e750530455..08760141d6f 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -6,11 +6,16 @@ TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success setup '
-	for d in a a- a0
+	for d in folder folder- folder0
 	do
 		mkdir "$d" && echo "$d/one" >"$d/one" &&
 		git add "$d" || return 1
 	done &&
+	for f in before folder.txt later
+	do
+		echo "$f" >"$f" &&
+		git add "$f" || return 1
+	done &&
 	echo zero >one &&
 	git update-index --add --info-only one &&
 	git write-tree --missing-ok >tree.missing &&
@@ -171,7 +176,7 @@ test_expect_success '--literally can create invalid trees' '
 
 test_expect_success 'mktree validates path' '
 	tree_oid="$(cat tree)" &&
-	blob_oid="$(git rev-parse $tree_oid:a/one)" &&
+	blob_oid="$(git rev-parse $tree_oid:folder.txt)" &&
 	head_oid="$(git rev-parse HEAD)" &&
 
 	# Valid: tree with or without trailing slash, blob without trailing slash
@@ -202,4 +207,31 @@ test_expect_success 'mktree validates path' '
 	test_grep "invalid path ${SQ}.git/${SQ}" err
 '
 
+test_expect_success 'mktree with duplicate entries' '
+	tree_oid=$(cat tree) &&
+	folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+	before_oid=$(git rev-parse ${tree_oid}:before) &&
+	head_oid=$(git rev-parse HEAD) &&
+
+	{
+		printf "100755 blob $before_oid\ttest\n" &&
+		printf "040000 tree $folder_oid\ttest-\n" &&
+		printf "160000 commit $head_oid\ttest.txt\n" &&
+		printf "040000 tree $folder_oid\ttest\n" &&
+		printf "100644 blob $before_oid\ttest0\n" &&
+		printf "160000 commit $head_oid\ttest-\n"
+	} >top.dup &&
+	git mktree <top.dup >tree.actual &&
+
+	{
+		printf "160000 commit $head_oid\ttest-\n" &&
+		printf "160000 commit $head_oid\ttest.txt\n" &&
+		printf "040000 tree $folder_oid\ttest\n" &&
+		printf "100644 blob $before_oid\ttest0\n"
+	} >expect &&
+	git ls-tree $(cat tree.actual) >actual &&
+
+	test_cmp expect actual
+'
+
 test_done
-- 
gitgitgadget


  parent reply	other threads:[~2024-06-19 21:58 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-11 18:24 [PATCH 00/16] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 01/16] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 02/16] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 03/16] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-11 18:45   ` Eric Sunshine
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 04/16] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-11 22:45   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 05/16] index-info.c: identify empty input lines in read_index_info Victoria Dye via GitGitGadget
2024-06-11 22:52   ` Junio C Hamano
2024-06-18 17:33     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 06/16] index-info.c: parse object type in provided " Victoria Dye via GitGitGadget
2024-06-12  1:54   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 07/16] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-12  2:11   ` Junio C Hamano
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 18:35     ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 08/16] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-12  2:18   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 09/16] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-12  2:26   ` Junio C Hamano
2024-06-12 19:01     ` Victoria Dye
2024-06-12 19:45       ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 10/16] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 18:48     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 11/16] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 12/16] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-13 18:38     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 13/16] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 14/16] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 19:50     ` Junio C Hamano
2024-06-17 19:23     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 15/16] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 16/16] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 00/17] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 01/17] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 02/17] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 03/17] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 04/17] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 05/17] index-info.c: return unrecognized lines to caller Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 06/17] index-info.c: parse object type in provided in read_index_info Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 07/17] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-20 20:18     ` Junio C Hamano
2024-06-19 21:57   ` [PATCH v2 08/17] mktree.c: do not fail on mismatched submodule type Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 09/17] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 10/17] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-19 21:57   ` Victoria Dye via GitGitGadget [this message]
2024-06-20 22:05     ` [PATCH v2 11/17] mktree: overwrite duplicate entries Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 12/17] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-20 22:26     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 13/17] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-26 21:10     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 14/17] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-19 21:58   ` [PATCH v2 15/17] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-26 21:23     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 16/17] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-27 19:29     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 17/17] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-25 23:26   ` [PATCH v2 00/17] mktree: support more flexible usage Junio C Hamano
2024-07-10 21:40     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fb555658057f834d94f232f1d8b380a6304a3671.1718834285.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=ps@pks.im \
    --cc=sunshine@sunshineco.com \
    --cc=vdye@github.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).