From: John Cai <johncai86@gmail.com>
To: git@vger.kernel.org
Cc: avarab@gmail.com, me@ttaylorr.com, John Cai <johncai86@gmail.com>
Subject: [RFC] cat-file: add a --stdin-cmd mode
Date: Fri, 21 Jan 2022 12:54:05 -0500 [thread overview]
Message-ID: <20220121175405.45753-1-johncai86@gmail.com> (raw)
This RFC patch proposes a new flag --stdin-cmd that works with
git-cat-file --batch. Similar to git-update-ref --stdin, it will accept
commands and arguments from stdin.
The start of this idea was discussed in [1], where the original
motivation was to be able to control when the buffer was flushed to
stdout in --buffer mode.
However, this can actually be much more useful in situations when
git-cat-file --batch is being used as a long lived backend query
process. At GitLab, we use a pair of cat-file processes. One for
iterating over object metadata with --batch-check, and the other to grab
object contents with --batch. However, if we had --stdin-cmd, we could
get rid of the second --batch-check process, and just have one progress
where we can flip between getting object info, and getting object contents.
This can lead to huge savings.
git cat-file --batch --stdin-cmd
$ <command> [arg1] [arg2] NL
We can also add a -z mode to allow for NUL-terminated lines
$ <command> [arg1] [arg2] NUL
This patch adds three commands: object, info, fflush
$ object <sha1> NL
$ info <sha1> NL
$ fflush NL
These three would be immediately useful in GitLab's context, but one can
imagine this mode to be further extended for other things.
For instance, a non-trivial part of "cat-file --batch" time is spent
on parsing its argument and seeing if it's a revision, ref etc. So we
could add a command that only accepts a full-length 40
character SHA-1.
This would be the first step in adding such an interface to
git-cat-file.
[1] https://lore.kernel.org/git/pull.1124.git.git.1636149400.gitgitgadget@gmail.com/
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
---
builtin/cat-file.c | 137 +++++++++++++++++++++++++++++++++++++++++++-
strvec.c | 23 ++++++++
strvec.h | 8 +++
t/t1006-cat-file.sh | 72 +++++++++++++++++++++++
4 files changed, 239 insertions(+), 1 deletion(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 7b3f42950e..2679b34b43 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,7 @@
#include "packfile.h"
#include "object-store.h"
#include "promisor-remote.h"
+#include "strvec.h"
struct batch_options {
int enabled;
@@ -26,7 +27,10 @@ struct batch_options {
int unordered;
int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
const char *format;
+ int stdin_cmd;
+ int end_null;
};
+static char line_termination = '\n';
static const char *force_path;
@@ -508,6 +512,126 @@ static int batch_unordered_packed(const struct object_id *oid,
data);
}
+enum batch_state {
+ /* Non-transactional state open for commands. */
+ BATCH_STATE_OPEN,
+};
+
+static void parse_cmd_object(struct batch_options *opt,
+ const int argc, const char **argv,
+ struct strbuf *output,
+ struct expand_data *data)
+{
+ batch_one_object(argv[0], output, opt, data);
+}
+
+static void parse_cmd_info(struct batch_options *opt,
+ const int argc, const char **argv,
+ struct strbuf *output,
+ struct expand_data *data)
+{
+ opt->print_contents = 0;
+ batch_one_object(argv[0], output, opt, data);
+}
+
+static void parse_cmd_fflush(struct batch_options *opt,
+ const int argc, const char **argv,
+ struct strbuf *output,
+ struct expand_data *data)
+{
+ fflush(stdout);
+}
+
+typedef void (*parse_cmd_fn_t)(struct batch_options *, const int,
+ const char **, struct strbuf *,
+ struct expand_data *);
+
+static const struct parse_cmd {
+ const char *prefix;
+ parse_cmd_fn_t fn;
+ unsigned args;
+ enum batch_state state;
+} command[] = {
+ { "object", parse_cmd_object, 1, BATCH_STATE_OPEN },
+ { "info", parse_cmd_info, 1, BATCH_STATE_OPEN },
+ { "fflush", parse_cmd_fflush, 0, BATCH_STATE_OPEN },
+};
+
+static void batch_objects_stdin_cmd(struct batch_options *opt,
+ struct strbuf *output,
+ struct expand_data *data)
+{
+ struct strbuf input = STRBUF_INIT;
+ enum batch_state state = BATCH_STATE_OPEN;
+
+ /* Read each line dispatch its command */
+ while (!strbuf_getwholeline(&input, stdin, line_termination)) {
+ int i;
+ const struct parse_cmd *cmd = NULL;
+ struct strvec args = STRVEC_INIT;
+ size_t n;
+ const char *p;
+
+ if (*input.buf == line_termination)
+ die("empty command in input");
+ else if (isspace(*input.buf))
+ die("whitespace before command: %s", input.buf);
+
+ for (i = 0; i < ARRAY_SIZE(command); i++) {
+ const char *prefix = command[i].prefix;
+ char c;
+
+ if (!starts_with(input.buf, prefix))
+ continue;
+
+ /*
+ * If the command has arguments, verify that it's
+ * followed by a space. Otherwise, it shall be followed
+ * by a line terminator.
+ */
+ c = command[i].args ? ' ' : line_termination;
+ if (input.buf[strlen(prefix)] != c)
+ continue;
+
+ cmd = &command[i];
+ break;
+ }
+ if (!cmd)
+ die("unknown command: %s", input.buf);
+
+ /*
+ * Read additional arguments. Do not raise an error in
+ * case there is an early EOF to let the command
+ * handle missing arguments with a proper error message
+ */
+ p = input.buf + strlen(cmd->prefix) + 1;
+ if (*p == line_termination || !*p) {
+ n = 0;
+ } else {
+ const char *pos = strstr(p, "\n");
+ char *str = xstrndup(p, pos - p);
+
+ n = strvec_split_delim(&args, str, ' ', - 1);
+ free(str);
+ }
+
+ if (n < cmd->args)
+ die("%s: too few arguments", cmd->prefix);
+ if (n > cmd->args)
+ die("%s: too many arguments", cmd->prefix);
+
+ switch (state) {
+ case BATCH_STATE_OPEN:
+ /* TODO: command state management */
+ break;
+ }
+ cmd->fn(opt, args.nr, args.v, output, data);
+ strvec_clear(&args);
+ }
+
+ strbuf_release(&input);
+}
+
static int batch_objects(struct batch_options *opt)
{
struct strbuf input = STRBUF_INIT;
@@ -515,6 +639,7 @@ static int batch_objects(struct batch_options *opt)
struct expand_data data;
int save_warning;
int retval = 0;
+ const int stdin_cmd = opt->stdin_cmd;
if (!opt->format)
opt->format = "%(objectname) %(objecttype) %(objectsize)";
@@ -590,7 +715,8 @@ static int batch_objects(struct batch_options *opt)
save_warning = warn_on_object_refname_ambiguity;
warn_on_object_refname_ambiguity = 0;
- while (strbuf_getline(&input, stdin) != EOF) {
+ while (!stdin_cmd &&
+ strbuf_getline(&input, stdin) != EOF) {
if (data.split_on_whitespace) {
/*
* Split at first whitespace, tying off the beginning
@@ -608,6 +734,9 @@ static int batch_objects(struct batch_options *opt)
batch_one_object(input.buf, &output, opt, &data);
}
+ if (stdin_cmd)
+ batch_objects_stdin_cmd(opt, &output, &data);
+
strbuf_release(&input);
strbuf_release(&output);
warn_on_object_refname_ambiguity = save_warning;
@@ -685,6 +814,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
batch_option_callback),
OPT_CMDMODE(0, "batch-all-objects", &opt,
N_("with --batch[-check]: ignores stdin, batches all known objects"), 'b'),
+ OPT_BOOL(0, "stdin-cmd", &batch.stdin_cmd,
+ N_("with --batch[-check]: enters stdin 'command mode")),
+ OPT_BOOL('z', NULL, &batch.end_null, N_("with --stdin-cmd, use NUL termination")),
+
/* Batch-specific options */
OPT_GROUP(N_("Change or optimize batch output")),
OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")),
@@ -738,6 +871,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
/* Batch defaults */
if (batch.buffer_output < 0)
batch.buffer_output = batch.all_objects;
+ if (batch.end_null)
+ line_termination = '\0';
/* Return early if we're in batch mode? */
if (batch.enabled) {
diff --git a/strvec.c b/strvec.c
index 61a76ce6cb..7dca04bf7a 100644
--- a/strvec.c
+++ b/strvec.c
@@ -85,6 +85,29 @@ void strvec_split(struct strvec *array, const char *to_split)
}
}
+size_t strvec_split_delim(struct strvec *array, const char *string,
+ int delim, int maxsplit)
+{
+ size_t count = 0;
+ const char *p = string, *end;
+
+ for (;;) {
+ count++;
+ if (maxsplit >= 0 && count > maxsplit) {
+ strvec_push(array, p);
+ return count;
+ }
+ end = strchr(p, delim);
+ if (end) {
+ strvec_push_nodup(array, xmemdupz(p, end - p));
+ p = end + 1;
+ } else {
+ strvec_push(array, p);
+ return count;
+ }
+ }
+}
+
void strvec_clear(struct strvec *array)
{
if (array->v != empty_strvec) {
diff --git a/strvec.h b/strvec.h
index 9f55c8766b..c474918b91 100644
--- a/strvec.h
+++ b/strvec.h
@@ -73,6 +73,14 @@ void strvec_pop(struct strvec *);
/* Splits by whitespace; does not handle quoted arguments! */
void strvec_split(struct strvec *, const char *);
+/**
+ * strvec_split_delim() is a split function that behaves more like its
+ * string_list_split() cousin than the whitespace-splitting
+ * strvec_split().
+ */
+size_t strvec_split_delim(struct strvec *array, const char *string,
+ int delim, int maxsplit);
+
/**
* Free all memory associated with the array and return it to the
* initial, empty state.
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 145eee11df..8f339746ec 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -964,4 +964,76 @@ test_expect_success 'cat-file --batch-all-objects --batch-check ignores replace'
test_cmp expect actual
'
+F='%s\0'
+
+test_expect_success 'stdin-cmd not enough arguments' '
+ echo "object " >cmd &&
+ test_expect_code 128 git cat-file --batch --stdin-cmd < cmd 2>err &&
+ grep -E "^fatal:.*too few arguments" err
+'
+
+test_expect_success 'stdin-cmd too many arguments' '
+ echo "object foo bar" >cmd &&
+ test_expect_code 128 git cat-file --batch --stdin-cmd < cmd 2>err &&
+ grep -E "^fatal:.*too many arguments" err
+'
+
+test_expect_success 'stdin-cmd unknown command' '
+ echo unknown_command >cmd &&
+ test_expect_code 128 git cat-file --batch --stdin-cmd < cmd 2>err &&
+ grep -E "^fatal:.*unknown command.*" err &&
+ test_expect_code 128 git cat-file --batch --stdin-cmd -z < cmd 2>err &&
+ grep -E "^fatal:.*unknown command.*" err
+'
+
+test_expect_success 'setup object data' '
+ content="Object Data" &&
+ size=$(strlen "$content") &&
+ sha1=$(echo_without_newline "$content" | git hash-object -w --stdin)
+'
+
+test_expect_success 'stdin-cmd calling object works' '
+ echo "object $sha1" | git cat-file --batch --stdin-cmd >actual &&
+ echo "$sha1 blob $size" >expect &&
+ echo `git cat-file -p "$sha1"` >>expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'stdin-cmd -z calling object works' '
+ printf $F "object $sha1" | git cat-file --batch --stdin-cmd -z >actual &&
+ echo "$sha1 blob $size" >expect &&
+ echo `git cat-file -p "$sha1"` >>expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'setup object data' '
+ content="Object Data" &&
+ size=$(strlen "$content") &&
+ sha1=$(echo_without_newline "$content" | git hash-object -w --stdin)
+'
+
+test_expect_success 'stdin-cmd calling object works' '
+ echo "object $sha1" | git cat-file --batch --stdin-cmd >actual &&
+ echo "$sha1 blob $size" >expect &&
+ echo `git cat-file -p "$sha1"` >>expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'stdin-cmd -z calling object works' '
+ printf $F "object $sha1" | git cat-file --batch --stdin-cmd -z >actual &&
+ echo "$sha1 blob $size" >expect &&
+ echo `git cat-file -p "$sha1"` >>expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'stdin-cmd fflush works' '
+ printf "fflush\n" > cmd &&
+ test_expect_code 0 git cat-file --batch --stdin-cmd < cmd 2>err
+'
+
+test_expect_success 'stdin-cmd -z fflush works' '
+ printf $F 'fflush' > cmd &&
+ test_expect_code 0 git cat-file --batch --stdin-cmd -z < cmd 2>err
+'
+
test_done
--
2.34.1
next reply other threads:[~2022-01-21 17:54 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-21 17:54 John Cai [this message]
2022-01-23 15:11 ` [RFC] cat-file: add a --stdin-cmd mode Phillip Wood
2022-01-24 14:29 ` John Cai
2022-01-25 13:57 ` Phillip Wood
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220121175405.45753-1-johncai86@gmail.com \
--to=johncai86@gmail.com \
--cc=avarab@gmail.com \
--cc=git@vger.kernel.org \
--cc=me@ttaylorr.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.