From: "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Eric Sunshine <sunshine@sunshineco.com>,
Patrick Steinhardt <ps@pks.im>, Victoria Dye <vdye@github.com>,
Victoria Dye <vdye@github.com>
Subject: [PATCH v2 11/17] mktree: overwrite duplicate entries
Date: Wed, 19 Jun 2024 21:57:59 +0000 [thread overview]
Message-ID: <fb555658057f834d94f232f1d8b380a6304a3671.1718834285.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1746.v2.git.1718834285.gitgitgadget@gmail.com>
From: Victoria Dye <vdye@github.com>
If multiple tree entries with the same name are provided as input to
'mktree', only write the last one to the tree. Entries are considered
duplicates if they have identical names (*not* considering mode); if a blob
and a tree with the same name are provided, only the last one will be
written to the tree. A tree with duplicate entries is invalid (per 'git
fsck'), so that condition should be avoided wherever possible.
Signed-off-by: Victoria Dye <vdye@github.com>
---
Documentation/git-mktree.txt | 3 ++-
builtin/mktree.c | 45 ++++++++++++++++++++++++++++++++----
t/t1010-mktree.sh | 36 +++++++++++++++++++++++++++--
3 files changed, 77 insertions(+), 7 deletions(-)
diff --git a/Documentation/git-mktree.txt b/Documentation/git-mktree.txt
index 5f3a6dfe38e..cf1fd82f754 100644
--- a/Documentation/git-mktree.txt
+++ b/Documentation/git-mktree.txt
@@ -54,7 +54,8 @@ cannot be represented in a tree object. The command will fail without
writing the tree if a higher order stage is specified for any entry.
The order of the tree entries is normalized by `mktree` so pre-sorting the
-input by path is not required.
+input by path is not required. Multiple entries provided with the same path
+are deduplicated, with only the last one specified added to the tree.
GIT
---
diff --git a/builtin/mktree.c b/builtin/mktree.c
index 8f0af24b6b1..a91d3a7b028 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -15,6 +15,9 @@
#include "object-store-ll.h"
struct tree_entry {
+ /* Internal */
+ size_t order;
+
unsigned mode;
struct object_id oid;
int len;
@@ -74,15 +77,49 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
ent->len = len;
oidcpy(&ent->oid, oid);
+ ent->order = arr->nr;
tree_entry_array_push(arr, ent);
}
-static int ent_compare(const void *a_, const void *b_)
+static int ent_compare(const void *a_, const void *b_, void *ctx)
{
+ int cmp;
struct tree_entry *a = *(struct tree_entry **)a_;
struct tree_entry *b = *(struct tree_entry **)b_;
- return base_name_compare(a->name, a->len, a->mode,
- b->name, b->len, b->mode);
+ int ignore_mode = *((int *)ctx);
+
+ if (ignore_mode)
+ cmp = name_compare(a->name, a->len, b->name, b->len);
+ else
+ cmp = base_name_compare(a->name, a->len, a->mode,
+ b->name, b->len, b->mode);
+ return cmp ? cmp : b->order - a->order;
+}
+
+static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
+{
+ size_t count = arr->nr;
+ struct tree_entry *prev = NULL;
+
+ int ignore_mode = 1;
+ QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
+
+ arr->nr = 0;
+ for (size_t i = 0; i < count; i++) {
+ struct tree_entry *curr = arr->entries[i];
+ if (prev &&
+ !name_compare(prev->name, prev->len,
+ curr->name, curr->len)) {
+ FREE_AND_NULL(curr);
+ } else {
+ arr->entries[arr->nr++] = curr;
+ prev = curr;
+ }
+ }
+
+ /* Sort again to order the entries for tree insertion */
+ ignore_mode = 0;
+ QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
}
static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
@@ -90,7 +127,7 @@ static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
struct strbuf buf;
size_t size = 0;
- QSORT(arr->entries, arr->nr, ent_compare);
+ sort_and_dedup_tree_entry_array(arr);
for (size_t i = 0; i < arr->nr; i++)
size += 32 + arr->entries[i]->len;
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index 7e750530455..08760141d6f 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -6,11 +6,16 @@ TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh
test_expect_success setup '
- for d in a a- a0
+ for d in folder folder- folder0
do
mkdir "$d" && echo "$d/one" >"$d/one" &&
git add "$d" || return 1
done &&
+ for f in before folder.txt later
+ do
+ echo "$f" >"$f" &&
+ git add "$f" || return 1
+ done &&
echo zero >one &&
git update-index --add --info-only one &&
git write-tree --missing-ok >tree.missing &&
@@ -171,7 +176,7 @@ test_expect_success '--literally can create invalid trees' '
test_expect_success 'mktree validates path' '
tree_oid="$(cat tree)" &&
- blob_oid="$(git rev-parse $tree_oid:a/one)" &&
+ blob_oid="$(git rev-parse $tree_oid:folder.txt)" &&
head_oid="$(git rev-parse HEAD)" &&
# Valid: tree with or without trailing slash, blob without trailing slash
@@ -202,4 +207,31 @@ test_expect_success 'mktree validates path' '
test_grep "invalid path ${SQ}.git/${SQ}" err
'
+test_expect_success 'mktree with duplicate entries' '
+ tree_oid=$(cat tree) &&
+ folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+ before_oid=$(git rev-parse ${tree_oid}:before) &&
+ head_oid=$(git rev-parse HEAD) &&
+
+ {
+ printf "100755 blob $before_oid\ttest\n" &&
+ printf "040000 tree $folder_oid\ttest-\n" &&
+ printf "160000 commit $head_oid\ttest.txt\n" &&
+ printf "040000 tree $folder_oid\ttest\n" &&
+ printf "100644 blob $before_oid\ttest0\n" &&
+ printf "160000 commit $head_oid\ttest-\n"
+ } >top.dup &&
+ git mktree <top.dup >tree.actual &&
+
+ {
+ printf "160000 commit $head_oid\ttest-\n" &&
+ printf "160000 commit $head_oid\ttest.txt\n" &&
+ printf "040000 tree $folder_oid\ttest\n" &&
+ printf "100644 blob $before_oid\ttest0\n"
+ } >expect &&
+ git ls-tree $(cat tree.actual) >actual &&
+
+ test_cmp expect actual
+'
+
test_done
--
gitgitgadget
next prev parent reply other threads:[~2024-06-19 21:58 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-11 18:24 [PATCH 00/16] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 01/16] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 02/16] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 03/16] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-11 18:45 ` Eric Sunshine
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 04/16] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-11 22:45 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 05/16] index-info.c: identify empty input lines in read_index_info Victoria Dye via GitGitGadget
2024-06-11 22:52 ` Junio C Hamano
2024-06-18 17:33 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 06/16] index-info.c: parse object type in provided " Victoria Dye via GitGitGadget
2024-06-12 1:54 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 07/16] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-12 2:11 ` Junio C Hamano
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-12 18:35 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 08/16] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-12 2:18 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 09/16] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-12 2:26 ` Junio C Hamano
2024-06-12 19:01 ` Victoria Dye
2024-06-12 19:45 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 10/16] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-12 18:48 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 11/16] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 12/16] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-13 18:38 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 13/16] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 14/16] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-12 19:50 ` Junio C Hamano
2024-06-17 19:23 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 15/16] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 16/16] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 00/17] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 01/17] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 02/17] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 03/17] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 04/17] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 05/17] index-info.c: return unrecognized lines to caller Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 06/17] index-info.c: parse object type in provided in read_index_info Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 07/17] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-20 20:18 ` Junio C Hamano
2024-06-19 21:57 ` [PATCH v2 08/17] mktree.c: do not fail on mismatched submodule type Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 09/17] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 10/17] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-19 21:57 ` Victoria Dye via GitGitGadget [this message]
2024-06-20 22:05 ` [PATCH v2 11/17] mktree: overwrite duplicate entries Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 12/17] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-20 22:26 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 13/17] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-26 21:10 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 14/17] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-19 21:58 ` [PATCH v2 15/17] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-26 21:23 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 16/17] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-27 19:29 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 17/17] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-25 23:26 ` [PATCH v2 00/17] mktree: support more flexible usage Junio C Hamano
2024-07-10 21:40 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fb555658057f834d94f232f1d8b380a6304a3671.1718834285.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=ps@pks.im \
--cc=sunshine@sunshineco.com \
--cc=vdye@github.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).