git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Victoria Dye <vdye@github.com>, Victoria Dye <vdye@github.com>
Subject: [PATCH 07/16] mktree: use read_index_info to read stdin lines
Date: Tue, 11 Jun 2024 18:24:39 +0000	[thread overview]
Message-ID: <8d1e1eaa70b96779416f2f48a862d31a730c4521.1718130288.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1746.git.1718130288.gitgitgadget@gmail.com>

From: Victoria Dye <vdye@github.com>

Replace the custom input parsing of 'mktree' with 'read_index_info()', which
handles not only the 'ls-tree' output format it already handles but also the
other formats compatible with 'update-index'. This lends some consistency
across the commands (avoiding the need for two similar implementations for
input parsing) and adds flexibility to mktree.

Update 'Documentation/git-mktree.txt' to reflect the more permissive input
format.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 Documentation/git-mktree.txt |  17 +++--
 builtin/mktree.c             | 139 ++++++++++++-----------------------
 t/t1010-mktree.sh            |  66 +++++++++++++++++
 3 files changed, 125 insertions(+), 97 deletions(-)

diff --git a/Documentation/git-mktree.txt b/Documentation/git-mktree.txt
index 383f09dd333..507682ed23e 100644
--- a/Documentation/git-mktree.txt
+++ b/Documentation/git-mktree.txt
@@ -3,7 +3,7 @@ git-mktree(1)
 
 NAME
 ----
-git-mktree - Build a tree-object from ls-tree formatted text
+git-mktree - Build a tree-object from formatted tree entries
 
 
 SYNOPSIS
@@ -13,15 +13,13 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-Reads standard input in non-recursive `ls-tree` output format, and creates
-a tree object.  The order of the tree entries is normalized by mktree so
-pre-sorting the input is not required.  The object name of the tree object
-built is written to the standard output.
+Reads entry information from stdin and creates a tree object from those entries.
+The object name of the tree object built is written to the standard output.
 
 OPTIONS
 -------
 -z::
-	Read the NUL-terminated `ls-tree -z` output instead.
+	Input lines are separated with NUL rather than LF.
 
 --missing::
 	Allow missing objects.  The default behaviour (without this option)
@@ -35,6 +33,13 @@ OPTIONS
 	optional.  Note - if the `-z` option is used, lines are terminated
 	with NUL.
 
+INPUT FORMAT
+------------
+Tree entries may be specified in any of the formats compatible with the
+`--index-info` option to linkgit:git-update-index[1]. The order of the tree
+entries is normalized by `mktree` so pre-sorting the input by path is not
+required.
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/mktree.c b/builtin/mktree.c
index 15bd908702a..5530257252d 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -6,6 +6,7 @@
 #include "builtin.h"
 #include "gettext.h"
 #include "hex.h"
+#include "index-info.h"
 #include "quote.h"
 #include "strbuf.h"
 #include "tree.h"
@@ -93,123 +94,80 @@ static const char *mktree_usage[] = {
 	NULL
 };
 
-static void mktree_line(char *buf, int nul_term_line, int allow_missing,
-			struct tree_entry_array *arr)
+struct mktree_line_data {
+	struct tree_entry_array *arr;
+	int allow_missing;
+};
+
+static int mktree_line(unsigned int mode, struct object_id *oid,
+		       enum object_type obj_type, int stage UNUSED,
+		       const char *path, void *cbdata)
 {
-	char *ptr, *ntr;
-	const char *p;
-	unsigned mode;
-	enum object_type mode_type; /* object type derived from mode */
-	enum object_type obj_type; /* object type derived from sha */
+	struct mktree_line_data *data = cbdata;
+	enum object_type mode_type = object_type(mode);
 	struct object_info oi = OBJECT_INFO_INIT;
-	char *path, *to_free = NULL;
-	struct object_id oid;
+	enum object_type parsed_obj_type;
 
-	ptr = buf;
-	/*
-	 * Read non-recursive ls-tree output format:
-	 *     mode SP type SP sha1 TAB name
-	 */
-	mode = strtoul(ptr, &ntr, 8);
-	if (ptr == ntr || !ntr || *ntr != ' ')
-		die("input format error: %s", buf);
-	ptr = ntr + 1; /* type */
-	ntr = strchr(ptr, ' ');
-	if (!ntr || parse_oid_hex(ntr + 1, &oid, &p) ||
-	    *p != '\t')
-		die("input format error: %s", buf);
-
-	/* It is perfectly normal if we do not have a commit from a submodule */
-	if (S_ISGITLINK(mode))
-		allow_missing = 1;
-
-
-	*ntr++ = 0; /* now at the beginning of SHA1 */
-
-	path = (char *)p + 1;  /* at the beginning of name */
-	if (!nul_term_line && path[0] == '"') {
-		struct strbuf p_uq = STRBUF_INIT;
-		if (unquote_c_style(&p_uq, path, NULL))
-			die("invalid quoting");
-		path = to_free = strbuf_detach(&p_uq, NULL);
-	}
+	if (obj_type && mode_type != obj_type)
+		die("object type (%s) doesn't match mode type (%s)",
+		    type_name(obj_type), type_name(mode_type));
 
-	/*
-	 * Object type is redundantly derivable three ways.
-	 * These should all agree.
-	 */
-	mode_type = object_type(mode);
-	if (mode_type != type_from_string(ptr)) {
-		die("entry '%s' object type (%s) doesn't match mode type (%s)",
-			path, ptr, type_name(mode_type));
-	}
+	oi.typep = &parsed_obj_type;
 
-	/* Check the type of object identified by oid without fetching objects */
-	oi.typep = &obj_type;
-	if (oid_object_info_extended(the_repository, &oid, &oi,
+	if (oid_object_info_extended(the_repository, oid, &oi,
 				     OBJECT_INFO_LOOKUP_REPLACE |
 				     OBJECT_INFO_QUICK |
 				     OBJECT_INFO_SKIP_FETCH_OBJECT) < 0)
-		obj_type = -1;
+		parsed_obj_type = -1;
 
-	if (obj_type < 0) {
-		if (allow_missing) {
-			; /* no problem - missing objects are presumed to be of the right type */
+	if (parsed_obj_type < 0) {
+		if (data->allow_missing || S_ISGITLINK(mode)) {
+			; /* no problem - missing objects & submodules are presumed to be of the right type */
 		} else {
-			die("entry '%s' object %s is unavailable", path, oid_to_hex(&oid));
-		}
-	} else {
-		if (obj_type != mode_type) {
-			/*
-			 * The object exists but is of the wrong type.
-			 * This is a problem regardless of allow_missing
-			 * because the new tree entry will never be correct.
-			 */
-			die("entry '%s' object %s is a %s but specified type was (%s)",
-				path, oid_to_hex(&oid), type_name(obj_type), type_name(mode_type));
+			die("entry '%s' object %s is unavailable", path, oid_to_hex(oid));
 		}
+	} else if (parsed_obj_type != mode_type) {
+		/*
+		 * The object exists but is of the wrong type.
+		 * This is a problem regardless of allow_missing
+		 * because the new tree entry will never be correct.
+		 */
+		die("entry '%s' object %s is a %s but specified type was (%s)",
+		    path, oid_to_hex(oid), type_name(parsed_obj_type), type_name(mode_type));
 	}
 
-	append_to_tree(mode, &oid, path, arr);
-	free(to_free);
+	append_to_tree(mode, oid, path, data->arr);
+	return 0;
 }
 
 int cmd_mktree(int ac, const char **av, const char *prefix)
 {
-	struct strbuf sb = STRBUF_INIT;
 	struct object_id oid;
 	int nul_term_line = 0;
-	int allow_missing = 0;
 	int is_batch_mode = 0;
-	int got_eof = 0;
 	struct tree_entry_array arr = { 0 };
-	strbuf_getline_fn getline_fn;
+	struct mktree_line_data mktree_line_data = { .arr = &arr };
+	int ret;
 
 	const struct option option[] = {
 		OPT_BOOL('z', NULL, &nul_term_line, N_("input is NUL terminated")),
-		OPT_BOOL(0, "missing", &allow_missing, N_("allow missing objects")),
+		OPT_BOOL(0, "missing", &mktree_line_data.allow_missing, N_("allow missing objects")),
 		OPT_BOOL(0, "batch", &is_batch_mode, N_("allow creation of more than one tree")),
 		OPT_END()
 	};
 
 	ac = parse_options(ac, av, prefix, option, mktree_usage, 0);
-	getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf;
-
-	while (!got_eof) {
-		while (1) {
-			if (getline_fn(&sb, stdin) == EOF) {
-				got_eof = 1;
-				break;
-			}
-			if (sb.buf[0] == '\0') {
-				/* empty lines denote tree boundaries in batch mode */
-				if (is_batch_mode)
-					break;
-				die("input format error: (blank line only valid in batch mode)");
-			}
-			mktree_line(sb.buf, nul_term_line, allow_missing, &arr);
-		}
-		if (is_batch_mode && got_eof && arr.nr < 1) {
+
+	do {
+		ret = read_index_info(nul_term_line, mktree_line, &mktree_line_data);
+		if (ret < 0)
+			break;
+
+		/* empty lines denote tree boundaries in batch mode */
+		if (ret > 0 && !is_batch_mode)
+			die("input format error: (blank line only valid in batch mode)");
+
+		if (is_batch_mode && !ret && arr.nr < 1) {
 			/*
 			 * Execution gets here if the last tree entry is terminated with a
 			 * new-line.  The final new-line has been made optional to be
@@ -222,9 +180,8 @@ int cmd_mktree(int ac, const char **av, const char *prefix)
 			fflush(stdout);
 		}
 		clear_tree_entry_array(&arr); /* reset tree entry buffer for re-use in batch mode */
-	}
+	} while (ret > 0);
 
 	release_tree_entry_array(&arr);
-	strbuf_release(&sb);
-	return 0;
+	return !!ret;
 }
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index 22875ba598c..9b2ab0c97ad 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -54,11 +54,36 @@ test_expect_success 'ls-tree output in wrong order given to mktree (2)' '
 	test_cmp tree.withsub actual
 '
 
+test_expect_success '--batch creates multiple trees' '
+	cat top >multi-tree &&
+	echo "" >>multi-tree &&
+	cat top.withsub >>multi-tree &&
+
+	cat tree >expect &&
+	cat tree.withsub >>expect &&
+	git mktree --batch <multi-tree >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success 'allow missing object with --missing' '
 	git mktree --missing <top.missing >actual &&
 	test_cmp tree.missing actual
 '
 
+test_expect_success 'mktree with invalid submodule OIDs' '
+	# non-existent OID - ok
+	printf "160000 commit $(test_oid numeric)\tA\n" >in &&
+	git mktree <in >tree.actual &&
+	git ls-tree $(cat tree.actual) >actual &&
+	test_cmp in actual &&
+
+	# existing OID, wrong type - error
+	tree_oid="$(cat tree)" &&
+	printf "160000 commit $tree_oid\tA" |
+	test_must_fail git mktree 2>err &&
+	grep "object $tree_oid is a tree but specified type was (commit)" err
+'
+
 test_expect_success 'mktree refuses to read ls-tree -r output (1)' '
 	test_must_fail git mktree <all
 '
@@ -67,4 +92,45 @@ test_expect_success 'mktree refuses to read ls-tree -r output (2)' '
 	test_must_fail git mktree <all.withsub
 '
 
+test_expect_success 'mktree fails on malformed input' '
+	# empty line without --batch
+	echo "" |
+	test_must_fail git mktree 2>err &&
+	grep "blank line only valid in batch mode" err &&
+
+	# bad whitespace
+	printf "100644 blob $EMPTY_BLOB A" |
+	test_must_fail git mktree 2>err &&
+	grep "malformed input line" err &&
+
+	# invalid type
+	printf "100644 bad $EMPTY_BLOB\tA" |
+	test_must_fail git mktree 2>err &&
+	grep "invalid object type" err &&
+
+	# invalid OID length
+	printf "100755 blob abc123\tA" |
+	test_must_fail git mktree 2>err &&
+	grep "malformed input line" err &&
+
+	# bad quoting
+	printf "100644 blob $EMPTY_BLOB\t\"A" |
+	test_must_fail git mktree 2>err &&
+	grep "bad quoting of path name" err
+'
+
+test_expect_success 'mktree fails on mode mismatch' '
+	tree_oid="$(cat tree)" &&
+
+	# mode-type mismatch
+	printf "100644 tree $tree_oid\tA" |
+	test_must_fail git mktree 2>err &&
+	grep "object type (tree) doesn${SQ}t match mode type (blob)" err &&
+
+	# mode-object mismatch (no --missing)
+	printf "100644 $tree_oid\tA" |
+	test_must_fail git mktree 2>err &&
+	grep "object $tree_oid is a tree but specified type was (blob)" err
+'
+
 test_done
-- 
gitgitgadget


  parent reply	other threads:[~2024-06-11 18:24 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-11 18:24 [PATCH 00/16] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 01/16] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 02/16] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 03/16] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-11 18:45   ` Eric Sunshine
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 04/16] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-11 22:45   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 05/16] index-info.c: identify empty input lines in read_index_info Victoria Dye via GitGitGadget
2024-06-11 22:52   ` Junio C Hamano
2024-06-18 17:33     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 06/16] index-info.c: parse object type in provided " Victoria Dye via GitGitGadget
2024-06-12  1:54   ` Junio C Hamano
2024-06-11 18:24 ` Victoria Dye via GitGitGadget [this message]
2024-06-12  2:11   ` [PATCH 07/16] mktree: use read_index_info to read stdin lines Junio C Hamano
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 18:35     ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 08/16] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-12  2:18   ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 09/16] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-12  2:26   ` Junio C Hamano
2024-06-12 19:01     ` Victoria Dye
2024-06-12 19:45       ` Junio C Hamano
2024-06-11 18:24 ` [PATCH 10/16] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 18:48     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 11/16] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-11 18:24 ` [PATCH 12/16] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-13 18:38     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 13/16] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 14/16] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-12  9:40   ` Patrick Steinhardt
2024-06-12 19:50     ` Junio C Hamano
2024-06-17 19:23     ` Victoria Dye
2024-06-11 18:24 ` [PATCH 15/16] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-11 18:24 ` [PATCH 16/16] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-19 21:57 ` [PATCH v2 00/17] mktree: support more flexible usage Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 01/17] mktree: use OPT_BOOL Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 02/17] mktree: rename treeent to tree_entry Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 03/17] mktree: use non-static tree_entry array Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 04/17] update-index: generalize 'read_index_info' Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 05/17] index-info.c: return unrecognized lines to caller Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 06/17] index-info.c: parse object type in provided in read_index_info Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 07/17] mktree: use read_index_info to read stdin lines Victoria Dye via GitGitGadget
2024-06-20 20:18     ` Junio C Hamano
2024-06-19 21:57   ` [PATCH v2 08/17] mktree.c: do not fail on mismatched submodule type Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 09/17] mktree: add a --literally option Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 10/17] mktree: validate paths more carefully Victoria Dye via GitGitGadget
2024-06-19 21:57   ` [PATCH v2 11/17] mktree: overwrite duplicate entries Victoria Dye via GitGitGadget
2024-06-20 22:05     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 12/17] mktree: create tree using an in-core index Victoria Dye via GitGitGadget
2024-06-20 22:26     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 13/17] mktree: use iterator struct to add tree entries to index Victoria Dye via GitGitGadget
2024-06-26 21:10     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 14/17] mktree: add directory-file conflict hashmap Victoria Dye via GitGitGadget
2024-06-19 21:58   ` [PATCH v2 15/17] mktree: optionally add to an existing tree Victoria Dye via GitGitGadget
2024-06-26 21:23     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 16/17] mktree: allow deeper paths in input Victoria Dye via GitGitGadget
2024-06-27 19:29     ` Junio C Hamano
2024-06-19 21:58   ` [PATCH v2 17/17] mktree: remove entries when mode is 0 Victoria Dye via GitGitGadget
2024-06-25 23:26   ` [PATCH v2 00/17] mktree: support more flexible usage Junio C Hamano
2024-07-10 21:40     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8d1e1eaa70b96779416f2f48a862d31a730c4521.1718130288.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=vdye@github.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).