From: "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Victoria Dye <vdye@github.com>, Victoria Dye <vdye@github.com>
Subject: [PATCH 15/16] mktree: allow deeper paths in input
Date: Tue, 11 Jun 2024 18:24:47 +0000 [thread overview]
Message-ID: <058354f45f7b837ebeb08337a8dfd6e0ec1e9d1b.1718130288.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1746.git.1718130288.gitgitgadget@gmail.com>
From: Victoria Dye <vdye@github.com>
Update 'git mktree' to handle entries nested inside of directories (e.g.
'path/to/a/file.txt'). This functionality requires a series of changes:
* In 'sort_and_dedup_tree_entry_array()', remove entries inside of
directories that come after them in input order.
* Also in 'sort_and_dedup_tree_entry_array()', mark directories that contain
entries that come after them in input order (e.g., 'folder/' followed by
'folder/file.txt') as "need to expand".
* In 'add_tree_entry_to_index()', if a tree entry is marked as "need to
expand", recurse into it with 'read_tree_at()' & 'build_index_from_tree'.
* In 'build_index_from_tree()', if a user-specified tree entry is contained
within the current iterated entry, return 'READ_TREE_RECURSIVE' to recurse
into the iterated tree.
Signed-off-by: Victoria Dye <vdye@github.com>
---
Documentation/git-mktree.txt | 5 ++
builtin/mktree.c | 101 ++++++++++++++++++++++++++++++---
t/t1010-mktree.sh | 107 +++++++++++++++++++++++++++++++++--
3 files changed, 200 insertions(+), 13 deletions(-)
diff --git a/Documentation/git-mktree.txt b/Documentation/git-mktree.txt
index 99abd3c31a6..db90fdcdc8f 100644
--- a/Documentation/git-mktree.txt
+++ b/Documentation/git-mktree.txt
@@ -50,6 +50,11 @@ INPUT FORMAT
Tree entries may be specified in any of the formats compatible with the
`--index-info` option to linkgit:git-update-index[1].
+Entries may use full pathnames containing directory separators to specify
+entries nested within one or more directories. These entries are inserted into
+the appropriate tree in the base tree-ish if one exists. Otherwise, empty parent
+trees are created to contain the entries.
+
The order of the tree entries is normalized by `mktree` so pre-sorting the input
by path is not required. Multiple entries provided with the same path are
deduplicated, with only the last one specified added to the tree.
diff --git a/builtin/mktree.c b/builtin/mktree.c
index 9e9d2554cad..00b77869a56 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -22,6 +22,7 @@ struct tree_entry {
/* Internal */
size_t order;
+ int expand_dir;
unsigned mode;
struct object_id oid;
@@ -39,6 +40,7 @@ struct tree_entry_array {
struct tree_entry **entries;
struct hashmap df_name_hash;
+ int has_nested_entries;
};
static int df_name_hash_cmp(const void *cmp_data UNUSED,
@@ -70,6 +72,13 @@ static void tree_entry_array_push(struct tree_entry_array *arr, struct tree_entr
arr->entries[arr->nr++] = ent;
}
+static struct tree_entry *tree_entry_array_pop(struct tree_entry_array *arr)
+{
+ if (!arr->nr)
+ return NULL;
+ return arr->entries[--arr->nr];
+}
+
static void clear_tree_entry_array(struct tree_entry_array *arr)
{
hashmap_clear(&arr->df_name_hash);
@@ -107,8 +116,10 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
if (!verify_path(ent->name, mode))
die(_("invalid path '%s'"), path);
- if (strchr(ent->name, '/'))
- die("path %s contains slash", path);
+
+ /* mark has_nested_entries if needed */
+ if (!arr->has_nested_entries && strchr(ent->name, '/'))
+ arr->has_nested_entries = 1;
/* Add trailing slash to dir */
if (S_ISDIR(mode))
@@ -167,6 +178,46 @@ static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
ignore_mode = 0;
QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
+ if (arr->has_nested_entries) {
+ struct tree_entry_array parent_dir_ents = { 0 };
+
+ count = arr->nr;
+ arr->nr = 0;
+
+ /* Remove any entries where one of its parent dirs has a higher 'order' */
+ for (size_t i = 0; i < count; i++) {
+ const char *skipped_prefix;
+ struct tree_entry *parent;
+ struct tree_entry *curr = arr->entries[i];
+ int skip_entry = 0;
+
+ while ((parent = tree_entry_array_pop(&parent_dir_ents))) {
+ if (!skip_prefix(curr->name, parent->name, &skipped_prefix))
+ continue;
+
+ /* entry in dir, so we push the parent back onto the stack */
+ tree_entry_array_push(&parent_dir_ents, parent);
+
+ if (parent->order > curr->order)
+ skip_entry = 1;
+ else
+ parent->expand_dir = 1;
+
+ break;
+ }
+
+ if (!skip_entry) {
+ arr->entries[arr->nr++] = curr;
+ if (S_ISDIR(curr->mode))
+ tree_entry_array_push(&parent_dir_ents, curr);
+ } else {
+ FREE_AND_NULL(curr);
+ }
+ }
+
+ release_tree_entry_array(&parent_dir_ents);
+ }
+
/* Finally, initialize the directory-file conflict hash map */
for (size_t i = 0; i < count; i++) {
struct tree_entry *curr = arr->entries[i];
@@ -214,15 +265,40 @@ struct build_index_data {
struct index_state istate;
};
+static int build_index_from_tree(const struct object_id *oid,
+ struct strbuf *base, const char *filename,
+ unsigned mode, void *context);
+
static int add_tree_entry_to_index(struct build_index_data *data,
struct tree_entry *ent)
{
- struct cache_entry *ce;
- ce = make_cache_entry(&data->istate, ent->mode, &ent->oid, ent->name, 0, 0);
- if (!ce)
- return error(_("make_cache_entry failed for path '%s'"), ent->name);
+ if (ent->expand_dir) {
+ int ret = 0;
+ struct pathspec ps = { 0 };
+ struct tree *subtree = parse_tree_indirect(&ent->oid);
+ struct strbuf base_path = STRBUF_INIT;
+ strbuf_add(&base_path, ent->name, ent->len);
+
+ if (!subtree)
+ ret = error(_("not a tree object: %s"), oid_to_hex(&ent->oid));
+ else if (read_tree_at(the_repository, subtree, &base_path, 0, &ps,
+ build_index_from_tree, data) < 0)
+ ret = -1;
+
+ strbuf_release(&base_path);
+ if (ret)
+ return ret;
+
+ } else {
+ struct cache_entry *ce = make_cache_entry(&data->istate,
+ ent->mode, &ent->oid,
+ ent->name, 0, 0);
+ if (!ce)
+ return error(_("make_cache_entry failed for path '%s'"), ent->name);
+
+ add_index_entry(&data->istate, ce, ADD_CACHE_JUST_APPEND);
+ }
- add_index_entry(&data->istate, ce, ADD_CACHE_JUST_APPEND);
return 0;
}
@@ -249,10 +325,12 @@ static int build_index_from_tree(const struct object_id *oid,
base_tree_ent->name[base_tree_ent->len - 1] = '/';
while (cbdata->iter.current) {
+ const char *skipped_prefix;
struct tree_entry *ent = cbdata->iter.current;
+ int cmp;
- int cmp = name_compare(ent->name, ent->len,
- base_tree_ent->name, base_tree_ent->len);
+ cmp = name_compare(ent->name, ent->len,
+ base_tree_ent->name, base_tree_ent->len);
if (!cmp || cmp < 0) {
advance_tree_entry_iterator(&cbdata->iter);
@@ -266,6 +344,11 @@ static int build_index_from_tree(const struct object_id *oid,
goto cleanup_and_return;
} else
continue;
+ } else if (skip_prefix(ent->name, base_tree_ent->name, &skipped_prefix) &&
+ S_ISDIR(base_tree_ent->mode)) {
+ /* The entry is in the current traversed tree entry, so we recurse */
+ result = READ_TREE_RECURSIVE;
+ goto cleanup_and_return;
}
break;
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index ea5a011405e..1d6365141fc 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -89,12 +89,21 @@ test_expect_success 'mktree with invalid submodule OIDs' '
grep "object $tree_oid is a tree but specified type was (commit)" err
'
-test_expect_success 'mktree refuses to read ls-tree -r output (1)' '
- test_must_fail git mktree <all
+test_expect_success 'mktree reads ls-tree -r output (1)' '
+ git mktree <all >actual &&
+ test_cmp tree actual
'
-test_expect_success 'mktree refuses to read ls-tree -r output (2)' '
- test_must_fail git mktree <all.withsub
+test_expect_success 'mktree reads ls-tree -r output (2)' '
+ git mktree <all.withsub >actual &&
+ test_cmp tree.withsub actual
+'
+
+test_expect_success 'mktree de-duplicates files inside directories' '
+ git ls-tree $(cat tree) >everything &&
+ cat <all >top_and_all &&
+ git mktree <top_and_all >actual &&
+ test_cmp tree actual
'
test_expect_success 'mktree fails on malformed input' '
@@ -238,6 +247,50 @@ test_expect_success 'mktree with duplicate entries' '
test_cmp expect actual
'
+test_expect_success 'mktree adds entry after nested entry' '
+ tree_oid=$(cat tree) &&
+ folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+ one_oid=$(git rev-parse ${tree_oid}:folder/one) &&
+
+ {
+ printf "040000 tree $folder_oid\tearly\n" &&
+ printf "100644 blob $one_oid\tearly/one\n" &&
+ printf "100644 blob $one_oid\tlater\n" &&
+ printf "040000 tree $EMPTY_TREE\tnew-tree\n" &&
+ printf "100644 blob $one_oid\tnew-tree/one\n" &&
+ printf "100644 blob $one_oid\tzzz\n"
+ } >top.rec &&
+ git mktree <top.rec >tree.actual &&
+
+ {
+ printf "040000 tree $folder_oid\tearly\n" &&
+ printf "100644 blob $one_oid\tlater\n" &&
+ printf "040000 tree $folder_oid\tnew-tree\n" &&
+ printf "100644 blob $one_oid\tzzz\n"
+ } >expect &&
+ git ls-tree $(cat tree.actual) >actual &&
+
+ test_cmp expect actual
+'
+
+test_expect_success 'mktree inserts entries into directories' '
+ folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+ one_oid=$(git rev-parse ${tree_oid}:folder/one) &&
+ blob_oid=$(git rev-parse ${tree_oid}:before) &&
+ {
+ printf "040000 tree $folder_oid\tfolder\n" &&
+ printf "100644 blob $blob_oid\tfolder/two\n"
+ } | git mktree >actual &&
+
+ {
+ printf "100644 blob $one_oid\tfolder/one\n" &&
+ printf "100644 blob $blob_oid\tfolder/two\n"
+ } >expect &&
+ git ls-tree -r $(cat actual) >actual &&
+
+ test_cmp expect actual
+'
+
test_expect_success 'mktree with base tree' '
tree_oid=$(cat tree) &&
folder_oid=$(git rev-parse ${tree_oid}:folder) &&
@@ -274,4 +327,50 @@ test_expect_success 'mktree with base tree' '
test_cmp expect actual
'
+test_expect_success 'mktree with base tree (deep)' '
+ tree_oid=$(cat tree) &&
+ folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+ before_oid=$(git rev-parse ${tree_oid}:before) &&
+ folder_one_oid=$(git rev-parse ${tree_oid}:folder/one) &&
+ head_oid=$(git rev-parse HEAD) &&
+
+ {
+ printf "100755 blob $before_oid\tfolder/before\n" &&
+ printf "100644 blob $before_oid\tfolder/one.txt\n" &&
+ printf "160000 commit $head_oid\tfolder/sub\n" &&
+ printf "040000 tree $folder_oid\tfolder/one\n" &&
+ printf "040000 tree $folder_oid\tfolder/one/deeper\n"
+ } >top.append &&
+ git mktree <top.append $(cat tree) >tree.actual &&
+
+ {
+ printf "100755 blob $before_oid\tfolder/before\n" &&
+ printf "100644 blob $before_oid\tfolder/one.txt\n" &&
+ printf "100644 blob $folder_one_oid\tfolder/one/deeper/one\n" &&
+ printf "100644 blob $folder_one_oid\tfolder/one/one\n" &&
+ printf "160000 commit $head_oid\tfolder/sub\n"
+ } >expect &&
+ git ls-tree -r $(cat tree.actual) -- folder/ >actual &&
+
+ test_cmp expect actual
+'
+
+test_expect_success 'mktree fails on directory-file conflict' '
+ tree_oid="$(cat tree)" &&
+ blob_oid="$(git rev-parse $tree_oid:folder.txt)" &&
+
+ {
+ printf "100644 blob $blob_oid\ttest\n" &&
+ printf "100644 blob $blob_oid\ttest/deeper\n"
+ } |
+ test_must_fail git mktree 2>err &&
+ grep "You have both test and test/deeper" err &&
+
+ {
+ printf "100644 blob $blob_oid\tfolder/one/deeper/deep\n"
+ } |
+ test_must_fail git mktree $tree_oid 2>err &&
+ grep "You have both folder/one and folder/one/deeper/deep" err
+'
+
test_done
--
gitgitgadget
next prev parent reply other threads:[~2024-06-11 18:25 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-11 18:24 [PATCH 00/16] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 01/16] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 02/16] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 03/16] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-11 18:45 ` Eric Sunshine
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 04/16] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-11 22:45 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 05/16] index-info.c: identify empty input lines in read_index_info Victoria Dye via GitGitGadget
2024-06-11 22:52 ` Junio C Hamano
2024-06-18 17:33 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 06/16] index-info.c: parse object type in provided " Victoria Dye via GitGitGadget
2024-06-12 1:54 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 07/16] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-12 2:11 ` Junio C Hamano
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-12 18:35 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 08/16] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-12 2:18 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 09/16] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-12 2:26 ` Junio C Hamano
2024-06-12 19:01 ` Victoria Dye
2024-06-12 19:45 ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 10/16] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-12 18:48 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 11/16] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 12/16] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-13 18:38 ` Victoria Dye
2024-06-11 18:24 ` [PATCH 13/16] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 14/16] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-12 9:40 ` Patrick Steinhardt
2024-06-12 19:50 ` Junio C Hamano
2024-06-17 19:23 ` Victoria Dye
2024-06-11 18:24 ` Victoria Dye via GitGitGadget [this message]
2024-06-11 18:24 ` [PATCH 16/16] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 00/17] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 01/17] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 02/17] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 03/17] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 04/17] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 05/17] index-info.c: return unrecognized lines to caller Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 06/17] index-info.c: parse object type in provided in read_index_info Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 07/17] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-20 20:18 ` Junio C Hamano
2024-06-19 21:57 ` [PATCH v2 08/17] mktree.c: do not fail on mismatched submodule type Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 09/17] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 10/17] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 11/17] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-20 22:05 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 12/17] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-20 22:26 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 13/17] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-26 21:10 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 14/17] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-19 21:58 ` [PATCH v2 15/17] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-26 21:23 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 16/17] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-27 19:29 ` Junio C Hamano
2024-06-19 21:58 ` [PATCH v2 17/17] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-25 23:26 ` [PATCH v2 00/17] mktree: support more flexible usage Junio C Hamano
2024-07-10 21:40 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=058354f45f7b837ebeb08337a8dfd6e0ec1e9d1b.1718130288.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=vdye@github.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).