git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Lars Schneider <larsxschneider@gmail.com>
To: Jonathan Tan <jonathantanmy@google.com>
Cc: git@vger.kernel.org, bmwill@google.com, gitster@pobox.com
Subject: Re: [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
Date: Sun, 6 Aug 2017 21:58:24 +0200	[thread overview]
Message-ID: <323E470B-994B-4AD8-9F30-588C2B97A845@gmail.com> (raw)
In-Reply-To: <6327579311fdb941a11b6d452318777a3c42ee65.1501092795.git.jonathantanmy@google.com>


> On 26 Jul 2017, at 20:17, Jonathan Tan <jonathantanmy@google.com> wrote:
> 
> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
> 
> As you can see in the change to t0021, this commit changes the error
> message reported when the long-running process does not introduce itself
> with the expected "server"-terminated line. Originally, the error
> message reports that the filter "does not support filter protocol
> version 2", differentiating between the old single-file filter protocol
> and the new multi-file filter protocol - I have updated it to something
> more generic and useful.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> convert.c             |  75 ++++--------------------------------
> pkt-line.c            |  19 ----------
> pkt-line.h            |   2 -
> sub-process.c         | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
> sub-process.h         |  26 +++++++++++++
> t/t0021-conversion.sh |   2 +-
> 6 files changed, 137 insertions(+), 90 deletions(-)
> 
> diff --git a/convert.c b/convert.c
> index dbdbb24e4..1012462e3 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -513,78 +513,17 @@ static struct hashmap subprocess_map;
> 
> static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
> {
> -	int err, i;
> -	struct cmd2process *entry = (struct cmd2process *)subprocess;
> -	struct string_list cap_list = STRING_LIST_INIT_NODUP;
> -	char *cap_buf;
> -	const char *cap_name;
> -	struct child_process *process = &subprocess->process;
> -	const char *cmd = subprocess->cmd;
> -
> -	static const struct {
> -		const char *name;
> -		unsigned int cap;
> -	} known_caps[] = {
> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> 		{ "clean",  CAP_CLEAN  },
> 		{ "smudge", CAP_SMUDGE },
> 		{ "delay",  CAP_DELAY  },
> +		{ NULL, 0 }
> 	};
> -
> -	sigchain_push(SIGPIPE, SIG_IGN);
> -
> -	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
> -	if (err)
> -		goto done;
> -
> -	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
> -	if (err) {
> -		error("external filter '%s' does not support filter protocol version 2", cmd);
> -		goto done;
> -	}
> -	err = strcmp(packet_read_line(process->out, NULL), "version=2");
> -	if (err)
> -		goto done;
> -	err = packet_read_line(process->out, NULL) != NULL;
> -	if (err)
> -		goto done;
> -
> -	for (i = 0; i < ARRAY_SIZE(known_caps); ++i) {
> -		err = packet_write_fmt_gently(
> -			process->in, "capability=%s\n", known_caps[i].name);
> -		if (err)
> -			goto done;
> -	}
> -	err = packet_flush_gently(process->in);
> -	if (err)
> -		goto done;
> -
> -	for (;;) {
> -		cap_buf = packet_read_line(process->out, NULL);
> -		if (!cap_buf)
> -			break;
> -		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
> -
> -		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
> -			continue;
> -
> -		cap_name = cap_list.items[1].string;
> -		i = ARRAY_SIZE(known_caps) - 1;
> -		while (i >= 0 && strcmp(cap_name, known_caps[i].name))
> -			i--;
> -
> -		if (i >= 0)
> -			entry->supported_capabilities |= known_caps[i].cap;
> -		else
> -			warning("external filter '%s' requested unsupported filter capability '%s'",
> -			cmd, cap_name);
> -
> -		string_list_clear(&cap_list, 0);
> -	}
> -
> -done:
> -	sigchain_pop(SIGPIPE);
> -
> -	return err;
> +	struct cmd2process *entry = (struct cmd2process *)subprocess;
> +	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);

Wouldn't it make sense to add `supported_capabilities` to `struct subprocess_entry` ?


> }
> 
> static void handle_filter_error(const struct strbuf *filter_status,
> diff --git a/pkt-line.c b/pkt-line.c
> index 9d845ecc3..7db911957 100644
> --- a/pkt-line.c
> +++ b/pkt-line.c
> @@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
> 	return status;
> }
> 
> -int packet_writel(int fd, const char *line, ...)
> -{
> -	va_list args;
> -	int err;
> -	va_start(args, line);
> -	for (;;) {
> -		if (!line)
> -			break;
> -		if (strlen(line) > LARGE_PACKET_DATA_MAX)
> -			return -1;
> -		err = packet_write_fmt_gently(fd, "%s\n", line);
> -		if (err)
> -			return err;
> -		line = va_arg(args, const char*);
> -	}
> -	va_end(args);
> -	return packet_flush_gently(fd);
> -}
> -
> static int packet_write_gently(const int fd_out, const char *buf, size_t size)
> {
> 	static char packet_write_buffer[LARGE_PACKET_MAX];
> diff --git a/pkt-line.h b/pkt-line.h
> index 450183b64..66ef610fc 100644
> --- a/pkt-line.h
> +++ b/pkt-line.h
> @@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
> void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> int packet_flush_gently(int fd);
> int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> -LAST_ARG_MUST_BE_NULL
> -int packet_writel(int fd, const char *line, ...);
> int write_packetized_from_fd(int fd_in, int fd_out);
> int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
> 
> diff --git a/sub-process.c b/sub-process.c
> index 6cbffa440..37b4bd0ad 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -108,3 +108,106 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
> 	hashmap_add(hashmap, entry);
> 	return 0;
> }
> +
> +static int handshake_version(struct child_process *process,
> +			     const char *welcome_prefix, int *versions,

Maybe it would be less ambiguous if we call it `supported_versions` ? 


> +			     int *chosen_version)
> +{
> +	int version_scratch;
> +	int i;
> +	char *line;
> +	const char *p;
> +
> +	if (!chosen_version)
> +		chosen_version = &version_scratch;

I am not an C expert but wouldn't 'version_scratch' go out of scope as soon
as the function returns? Why don't you error here right away?


> +	if (packet_write_fmt_gently(process->in, "%s-client\n",
> +				    welcome_prefix))
> +		return error("Could not write client identification");

Nit: Would it make sense to rename `welcome_prefix` to `client_id`?
Alternatively, could we rename the error messages to "welcome prefix"?


> +	for (i = 0; versions[i]; i++) {
> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> +					    versions[i]))
> +			return error("Could not write requested version");

Maybe: "Could not write supported versions"?


> +	}
> +	if (packet_flush_gently(process->in))
> +		return error("Could not write flush packet");

I feel this error is too generic.
Maybe: "Could not finish writing supported versions"?


> +
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, welcome_prefix, &p) ||
> +	    strcmp(p, "-server"))
> +		return error("Unexpected line '%s', expected %s-server",
> +			     line ? line : "<flush packet>", welcome_prefix);
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, "version=", &p) ||
> +	    strtol_i(p, 10, chosen_version))

Maybe `strlen("version=")` would be more clear than 10?


> +		return error("Unexpected line '%s', expected version",

Maybe "... expected version number" ?


> +			     line ? line : "<flush packet>");
> +	if ((line = packet_read_line(process->out, NULL)))
> +		return error("Unexpected line '%s', expected flush", line);
> +
> +	/* Check to make sure that the version received is supported */
> +	for (i = 0; versions[i]; i++) {
> +		if (versions[i] == *chosen_version)
> +			break;
> +	}
> +	if (!versions[i])
> +		return error("Version %d not supported", *chosen_version);
> +
> +	return 0;
> +}
> +
> +static int handshake_capabilities(struct child_process *process,
> +				  struct subprocess_capability *capabilities,
> +				  unsigned int *supported_capabilities)

I feel the naming could be misleading. I think ...
`capabilities` is really `supported_capabilities` 
and 
`supported_capabilities` is really `negiotated_capabilties` or `agreed_capabilites`


> +{
> +	int i;
> +	char *line;
> +
> +	for (i = 0; capabilities[i].name; i++) {
> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> +					    capabilities[i].name))
> +			return error("Could not write requested capability");

I think this should be "Could not write supported capability", no?


> +	}
> +	if (packet_flush_gently(process->in))
> +		return error("Could not write flush packet");

Maybe " "Could not finish writing supported capability" ?


> +	while ((line = packet_read_line(process->out, NULL))) {
> +		const char *p;
> +		if (!skip_prefix(line, "capability=", &p))
> +			continue;

Shouldn't we write an error in this case?


> +		for (i = 0;
> +		     capabilities[i].name && strcmp(p, capabilities[i].name);
> +		     i++)
> +			;
> +		if (capabilities[i].name) {
> +			if (supported_capabilities)
> +				*supported_capabilities |= capabilities[i].flag;
> +		} else {
> +			warning("external filter requested unsupported filter capability '%s'",
> +				p);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities) {
> +	int retval;
> +	struct child_process *process = &entry->process;
> +
> +	sigchain_push(SIGPIPE, SIG_IGN);
> +
> +	retval = handshake_version(process, welcome_prefix, versions,
> +				   chosen_version) ||
> +		 handshake_capabilities(process, capabilities,
> +					supported_capabilities);
> +
> +	sigchain_pop(SIGPIPE);
> +	return retval;
> +}
> diff --git a/sub-process.h b/sub-process.h
> index d37c1499a..6857eb1b5 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -29,6 +29,16 @@ struct subprocess_entry {
> 	struct child_process process;
> };
> 
> +struct subprocess_capability {
> +	const char *name;
> +
> +	/*
> +	 * subprocess_handshake will "|=" this value to supported_capabilities
> +	 * if the server reports that it supports this capability.
> +	 */
> +	unsigned int flag;
> +};
> +
> /* subprocess functions */
> 
> /* Function to test two subprocess hashmap entries for equality. */
> @@ -62,6 +72,22 @@ static inline struct child_process *subprocess_get_child_process(
> 	return &entry->process;
> }
> 
> +/*
> + * Perform the version and capability negotiation as described in the "Long
> + * Running Filter Process" section of the gitattributes documentation using the
> + * given requested versions and capabilities. The "versions" and "capabilities"
> + * parameters are arrays terminated by a 0 or blank struct.

Should we reference the "gitattributes docs" if we want to make this generic?
I thought "technical/api-sub-process.txt" would explain this kind of stuff
and I was surprised that you deleted it in an earlier patch.


> + *
> + * This function is typically called when a subprocess is started (as part of
> + * the "startfn" passed to subprocess_start).
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities);
> +
> /*
>  * Helper function that will read packets looking for "status=<foo>"
>  * key/value pairs and return the value from the last "status" packet
> diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
> index eb3d83744..46f8e583c 100755
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
> 
> 		cp "$TEST_ROOT/test.o" test.r &&
> 		test_must_fail git add . 2>git-stderr.log &&
> -		grep "does not support filter protocol version" git-stderr.log
> +		grep "expected git-filter-server" git-stderr.log
> 	)
> '
> 

The generalization of this protocol is nice to see.

Thanks for working on it,
Lars




  reply	other threads:[~2017-08-06 19:58 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
2017-07-24 22:21 ` Jonathan Nieder
2017-07-25 14:38 ` Ben Peart
2017-07-25 17:53   ` Jonathan Tan
2017-07-25 18:29 ` [PATCH v2 0/2] " Jonathan Tan
2017-07-25 18:29 ` [PATCH v2 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
2017-07-25 20:18   ` Brandon Williams
2017-07-25 18:29 ` [PATCH v2 2/2] sub-process: refactor handshake to common function Jonathan Tan
2017-07-25 20:28   ` Brandon Williams
2017-07-25 22:25   ` Junio C Hamano
2017-07-26 16:52 ` [PATCH] " Lars Schneider
2017-07-26 18:14   ` Junio C Hamano
2017-07-26 18:17 ` [PATCH for NEXT v3 0/2] " Jonathan Tan
2017-07-26 19:48   ` Junio C Hamano
2017-07-29 16:26   ` Junio C Hamano
2017-07-26 18:17 ` [PATCH for NEXT v3 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
2017-07-26 18:17 ` [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function Jonathan Tan
2017-08-06 19:58   ` Lars Schneider [this message]
2017-08-07 17:21     ` Jonathan Tan
2017-08-07 17:51       ` Lars Schneider
2017-08-07 18:17         ` Jonathan Tan
2017-08-07 18:29           ` Ben Peart

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=323E470B-994B-4AD8-9F30-588C2B97A845@gmail.com \
    --to=larsxschneider@gmail.com \
    --cc=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).