git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Victoria Dye <vdye@github.com>, Victoria Dye <vdye@github.com>
Subject: [PATCH 15/16] mktree: allow deeper paths in input
Date: Tue, 11 Jun 2024 18:24:47 +0000	[thread overview]
Message-ID: <058354f45f7b837ebeb08337a8dfd6e0ec1e9d1b.1718130288.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1746.git.1718130288.gitgitgadget@gmail.com>

From: Victoria Dye <vdye@github.com>

Update 'git mktree' to handle entries nested inside of directories (e.g.
'path/to/a/file.txt'). This functionality requires a series of changes:

* In 'sort_and_dedup_tree_entry_array()', remove entries inside of
  directories that come after them in input order.
* Also in 'sort_and_dedup_tree_entry_array()', mark directories that contain
  entries that come after them in input order (e.g., 'folder/' followed by
  'folder/file.txt') as "need to expand".
* In 'add_tree_entry_to_index()', if a tree entry is marked as "need to
  expand", recurse into it with 'read_tree_at()' & 'build_index_from_tree'.
* In 'build_index_from_tree()', if a user-specified tree entry is contained
  within the current iterated entry, return 'READ_TREE_RECURSIVE' to recurse
  into the iterated tree.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 Documentation/git-mktree.txt |   5 ++
 builtin/mktree.c             | 101 ++++++++++++++++++++++++++++++---
 t/t1010-mktree.sh            | 107 +++++++++++++++++++++++++++++++++--
 3 files changed, 200 insertions(+), 13 deletions(-)

diff --git a/Documentation/git-mktree.txt b/Documentation/git-mktree.txt
index 99abd3c31a6..db90fdcdc8f 100644
--- a/Documentation/git-mktree.txt
+++ b/Documentation/git-mktree.txt
@@ -50,6 +50,11 @@ INPUT FORMAT
 Tree entries may be specified in any of the formats compatible with the
 `--index-info` option to linkgit:git-update-index[1].
 
+Entries may use full pathnames containing directory separators to specify
+entries nested within one or more directories. These entries are inserted into
+the appropriate tree in the base tree-ish if one exists. Otherwise, empty parent
+trees are created to contain the entries.
+
 The order of the tree entries is normalized by `mktree` so pre-sorting the input
 by path is not required. Multiple entries provided with the same path are
 deduplicated, with only the last one specified added to the tree.
diff --git a/builtin/mktree.c b/builtin/mktree.c
index 9e9d2554cad..00b77869a56 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -22,6 +22,7 @@ struct tree_entry {
 
 	/* Internal */
 	size_t order;
+	int expand_dir;
 
 	unsigned mode;
 	struct object_id oid;
@@ -39,6 +40,7 @@ struct tree_entry_array {
 	struct tree_entry **entries;
 
 	struct hashmap df_name_hash;
+	int has_nested_entries;
 };
 
 static int df_name_hash_cmp(const void *cmp_data UNUSED,
@@ -70,6 +72,13 @@ static void tree_entry_array_push(struct tree_entry_array *arr, struct tree_entr
 	arr->entries[arr->nr++] = ent;
 }
 
+static struct tree_entry *tree_entry_array_pop(struct tree_entry_array *arr)
+{
+	if (!arr->nr)
+		return NULL;
+	return arr->entries[--arr->nr];
+}
+
 static void clear_tree_entry_array(struct tree_entry_array *arr)
 {
 	hashmap_clear(&arr->df_name_hash);
@@ -107,8 +116,10 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
 
 		if (!verify_path(ent->name, mode))
 			die(_("invalid path '%s'"), path);
-		if (strchr(ent->name, '/'))
-			die("path %s contains slash", path);
+
+		/* mark has_nested_entries if needed */
+		if (!arr->has_nested_entries && strchr(ent->name, '/'))
+			arr->has_nested_entries = 1;
 
 		/* Add trailing slash to dir */
 		if (S_ISDIR(mode))
@@ -167,6 +178,46 @@ static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
 	ignore_mode = 0;
 	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
 
+	if (arr->has_nested_entries) {
+		struct tree_entry_array parent_dir_ents = { 0 };
+
+		count = arr->nr;
+		arr->nr = 0;
+
+		/* Remove any entries where one of its parent dirs has a higher 'order' */
+		for (size_t i = 0; i < count; i++) {
+			const char *skipped_prefix;
+			struct tree_entry *parent;
+			struct tree_entry *curr = arr->entries[i];
+			int skip_entry = 0;
+
+			while ((parent = tree_entry_array_pop(&parent_dir_ents))) {
+				if (!skip_prefix(curr->name, parent->name, &skipped_prefix))
+					continue;
+
+				/* entry in dir, so we push the parent back onto the stack */
+				tree_entry_array_push(&parent_dir_ents, parent);
+
+				if (parent->order > curr->order)
+					skip_entry = 1;
+				else
+					parent->expand_dir = 1;
+
+				break;
+			}
+
+			if (!skip_entry) {
+				arr->entries[arr->nr++] = curr;
+				if (S_ISDIR(curr->mode))
+					tree_entry_array_push(&parent_dir_ents, curr);
+			} else {
+				FREE_AND_NULL(curr);
+			}
+		}
+
+		release_tree_entry_array(&parent_dir_ents);
+	}
+
 	/* Finally, initialize the directory-file conflict hash map */
 	for (size_t i = 0; i < count; i++) {
 		struct tree_entry *curr = arr->entries[i];
@@ -214,15 +265,40 @@ struct build_index_data {
 	struct index_state istate;
 };
 
+static int build_index_from_tree(const struct object_id *oid,
+				 struct strbuf *base, const char *filename,
+				 unsigned mode, void *context);
+
 static int add_tree_entry_to_index(struct build_index_data *data,
 				   struct tree_entry *ent)
 {
-	struct cache_entry *ce;
-	ce = make_cache_entry(&data->istate, ent->mode, &ent->oid, ent->name, 0, 0);
-	if (!ce)
-		return error(_("make_cache_entry failed for path '%s'"), ent->name);
+	if (ent->expand_dir) {
+		int ret = 0;
+		struct pathspec ps = { 0 };
+		struct tree *subtree = parse_tree_indirect(&ent->oid);
+		struct strbuf base_path = STRBUF_INIT;
+		strbuf_add(&base_path, ent->name, ent->len);
+
+		if (!subtree)
+			ret = error(_("not a tree object: %s"), oid_to_hex(&ent->oid));
+		else if (read_tree_at(the_repository, subtree, &base_path, 0, &ps,
+				 build_index_from_tree, data) < 0)
+			ret = -1;
+
+		strbuf_release(&base_path);
+		if (ret)
+			return ret;
+
+	} else {
+		struct cache_entry *ce = make_cache_entry(&data->istate,
+							  ent->mode, &ent->oid,
+							  ent->name, 0, 0);
+		if (!ce)
+			return error(_("make_cache_entry failed for path '%s'"), ent->name);
+
+		add_index_entry(&data->istate, ce, ADD_CACHE_JUST_APPEND);
+	}
 
-	add_index_entry(&data->istate, ce, ADD_CACHE_JUST_APPEND);
 	return 0;
 }
 
@@ -249,10 +325,12 @@ static int build_index_from_tree(const struct object_id *oid,
 		base_tree_ent->name[base_tree_ent->len - 1] = '/';
 
 	while (cbdata->iter.current) {
+		const char *skipped_prefix;
 		struct tree_entry *ent = cbdata->iter.current;
+		int cmp;
 
-		int cmp = name_compare(ent->name, ent->len,
-				       base_tree_ent->name, base_tree_ent->len);
+		cmp = name_compare(ent->name, ent->len,
+				   base_tree_ent->name, base_tree_ent->len);
 		if (!cmp || cmp < 0) {
 			advance_tree_entry_iterator(&cbdata->iter);
 
@@ -266,6 +344,11 @@ static int build_index_from_tree(const struct object_id *oid,
 				goto cleanup_and_return;
 			} else
 				continue;
+		} else if (skip_prefix(ent->name, base_tree_ent->name, &skipped_prefix) &&
+			   S_ISDIR(base_tree_ent->mode)) {
+			/* The entry is in the current traversed tree entry, so we recurse */
+			result = READ_TREE_RECURSIVE;
+			goto cleanup_and_return;
 		}
 
 		break;
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index ea5a011405e..1d6365141fc 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -89,12 +89,21 @@ test_expect_success 'mktree with invalid submodule OIDs' '
 	grep "object $tree_oid is a tree but specified type was (commit)" err
 '
 
-test_expect_success 'mktree refuses to read ls-tree -r output (1)' '
-	test_must_fail git mktree <all
+test_expect_success 'mktree reads ls-tree -r output (1)' '
+	git mktree <all >actual &&
+	test_cmp tree actual
 '
 
-test_expect_success 'mktree refuses to read ls-tree -r output (2)' '
-	test_must_fail git mktree <all.withsub
+test_expect_success 'mktree reads ls-tree -r output (2)' '
+	git mktree <all.withsub >actual &&
+	test_cmp tree.withsub actual
+'
+
+test_expect_success 'mktree de-duplicates files inside directories' '
+	git ls-tree $(cat tree) >everything &&
+	cat <all >top_and_all &&
+	git mktree <top_and_all >actual &&
+	test_cmp tree actual
 '
 
 test_expect_success 'mktree fails on malformed input' '
@@ -238,6 +247,50 @@ test_expect_success 'mktree with duplicate entries' '
 	test_cmp expect actual
 '
 
+test_expect_success 'mktree adds entry after nested entry' '
+	tree_oid=$(cat tree) &&
+	folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+	one_oid=$(git rev-parse ${tree_oid}:folder/one) &&
+
+	{
+		printf "040000 tree $folder_oid\tearly\n" &&
+		printf "100644 blob $one_oid\tearly/one\n" &&
+		printf "100644 blob $one_oid\tlater\n" &&
+		printf "040000 tree $EMPTY_TREE\tnew-tree\n" &&
+		printf "100644 blob $one_oid\tnew-tree/one\n" &&
+		printf "100644 blob $one_oid\tzzz\n"
+	} >top.rec &&
+	git mktree <top.rec >tree.actual &&
+
+	{
+		printf "040000 tree $folder_oid\tearly\n" &&
+		printf "100644 blob $one_oid\tlater\n" &&
+		printf "040000 tree $folder_oid\tnew-tree\n" &&
+		printf "100644 blob $one_oid\tzzz\n"
+	} >expect &&
+	git ls-tree $(cat tree.actual) >actual &&
+
+	test_cmp expect actual
+'
+
+test_expect_success 'mktree inserts entries into directories' '
+	folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+	one_oid=$(git rev-parse ${tree_oid}:folder/one) &&
+	blob_oid=$(git rev-parse ${tree_oid}:before) &&
+	{
+		printf "040000 tree $folder_oid\tfolder\n" &&
+		printf "100644 blob $blob_oid\tfolder/two\n"
+	} | git mktree >actual &&
+
+	{
+		printf "100644 blob $one_oid\tfolder/one\n" &&
+		printf "100644 blob $blob_oid\tfolder/two\n"
+	} >expect &&
+	git ls-tree -r $(cat actual) >actual &&
+
+	test_cmp expect actual
+'
+
 test_expect_success 'mktree with base tree' '
 	tree_oid=$(cat tree) &&
 	folder_oid=$(git rev-parse ${tree_oid}:folder) &&
@@ -274,4 +327,50 @@ test_expect_success 'mktree with base tree' '
 	test_cmp expect actual
 '
 
+test_expect_success 'mktree with base tree (deep)' '
+	tree_oid=$(cat tree) &&
+	folder_oid=$(git rev-parse ${tree_oid}:folder) &&
+	before_oid=$(git rev-parse ${tree_oid}:before) &&
+	folder_one_oid=$(git rev-parse ${tree_oid}:folder/one) &&
+	head_oid=$(git rev-parse HEAD) &&
+
+	{
+		printf "100755 blob $before_oid\tfolder/before\n" &&
+		printf "100644 blob $before_oid\tfolder/one.txt\n" &&
+		printf "160000 commit $head_oid\tfolder/sub\n" &&
+		printf "040000 tree $folder_oid\tfolder/one\n" &&
+		printf "040000 tree $folder_oid\tfolder/one/deeper\n"
+	} >top.append &&
+	git mktree <top.append $(cat tree) >tree.actual &&
+
+	{
+		printf "100755 blob $before_oid\tfolder/before\n" &&
+		printf "100644 blob $before_oid\tfolder/one.txt\n" &&
+		printf "100644 blob $folder_one_oid\tfolder/one/deeper/one\n" &&
+		printf "100644 blob $folder_one_oid\tfolder/one/one\n" &&
+		printf "160000 commit $head_oid\tfolder/sub\n"
+	} >expect &&
+	git ls-tree -r $(cat tree.actual) -- folder/ >actual &&
+
+	test_cmp expect actual
+'
+
+test_expect_success 'mktree fails on directory-file conflict' '
+	tree_oid="$(cat tree)" &&
+	blob_oid="$(git rev-parse $tree_oid:folder.txt)" &&
+
+	{
+		printf "100644 blob $blob_oid\ttest\n" &&
+		printf "100644 blob $blob_oid\ttest/deeper\n"
+	} |
+	test_must_fail git mktree 2>err &&
+	grep "You have both test and test/deeper" err &&
+
+	{
+		printf "100644 blob $blob_oid\tfolder/one/deeper/deep\n"
+	} |
+	test_must_fail git mktree $tree_oid 2>err &&
+	grep "You have both folder/one and folder/one/deeper/deep" err
+'
+
 test_done
-- 
gitgitgadget


  parent reply	other threads:[~2024-06-11 18:25 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-11 18:24 [PATCH 00/16] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 01/16] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 02/16] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 03/16] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-11 18:45   ` Eric Sunshine
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 04/16] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-11 22:45   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 05/16] index-info.c: identify empty input lines in read_index_info Victoria Dye via GitGitGadget
2024-06-11 22:52   ` Junio C Hamano
2024-06-18 17:33     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 06/16] index-info.c: parse object type in provided " Victoria Dye via GitGitGadget
2024-06-12  1:54   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 07/16] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-12  2:11   ` Junio C Hamano
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 18:35     ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 08/16] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-12  2:18   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 09/16] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-12  2:26   ` Junio C Hamano
2024-06-12 19:01     ` Victoria Dye
2024-06-12 19:45       ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 10/16] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 18:48     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 11/16] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 12/16] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-13 18:38     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 13/16] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 14/16] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 19:50     ` Junio C Hamano
2024-06-17 19:23     ` Victoria Dye
2024-06-11 18:24 ` Victoria Dye via GitGitGadget [this message]
2024-06-11 18:24 ` [PATCH 16/16] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 00/17] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 01/17] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 02/17] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 03/17] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 04/17] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 05/17] index-info.c: return unrecognized lines to caller Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 06/17] index-info.c: parse object type in provided in read_index_info Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 07/17] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-20 20:18     ` Junio C Hamano
2024-06-19 21:57   ` [PATCH v2 08/17] mktree.c: do not fail on mismatched submodule type Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 09/17] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 10/17] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 11/17] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-20 22:05     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 12/17] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-20 22:26     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 13/17] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-26 21:10     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 14/17] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-19 21:58   ` [PATCH v2 15/17] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-26 21:23     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 16/17] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-27 19:29     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 17/17] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-25 23:26   ` [PATCH v2 00/17] mktree: support more flexible usage Junio C Hamano
2024-07-10 21:40     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=058354f45f7b837ebeb08337a8dfd6e0ec1e9d1b.1718130288.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=vdye@github.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).