git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] Add git-archive
@ 2006-09-05 12:16 Franck Bui-Huu
  2006-09-05 19:23 ` Junio C Hamano
  2006-09-06 20:17 ` [PATCH 1/2] Add git-archive Rene Scharfe
  0 siblings, 2 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-05 12:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Rene Scharfe, git

git-archive is a command to make TAR and ZIP archives of a git tree.
It helps prevent a proliferation of git-{format}-tree commands.

Instead of directly calling git-{tar,zip}-tree command, it defines
a very simple API, that archiver should implement and register in
"git-archive.c". This API is made up by 2 functions whose prototype
is defined in "archive.h" file.

 - The first one is used to parse 'extra' parameters which have
   signification only for the specific archiver. That would allow
   different archive backends to have different kind of options.

 - The second one is used to ask to an archive backend to build
   the archive given some already resolved parameters.

The main reason for making this API is to avoid using
git-{tar,zip}-tree commands, hence making them useless. Maybe it's
time for them to die ?

It also implements remote operations by defining a very simple
protocol: it first sends the name of the specific uploader followed
the repository name (git-upload-tar git://example.org/repo.git).
Then it sends "arguments" key word followed by all options given
when invoking 'git-archive'.

The remote protocol is implemented in "git-archive.c" for client
side and is triggered by "--remote=<repo>" option. For example,
to fetch a TAR archive in a remote repo, you can issue:

$ git archive --format=tar --remote=git://xxx/yyy/zzz.git HEAD

We choose to not make a new command "git-fetch-archive" for example,
avoind one more GIT command which should be nice for users (less
commands to remember, keeps existing --remote option).

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
---
 .gitignore          |    1
 Makefile            |    3 -
 archive.h           |   43 ++++++++
 builtin-archive.c   |  262 +++++++++++++++++++++++++++++++++++++++++++++++++++
 builtin-tar-tree.c  |   66 +++++++++++++
 builtin.h           |    1
 generate-cmdlist.sh |    1
 git.c               |    1
 8 files changed, 377 insertions(+), 1 deletions(-)

diff --git a/.gitignore b/.gitignore
index 78cb671..a3e7ca1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ git-apply
 git-applymbox
 git-applypatch
 git-archimport
+git-archive
 git-bisect
 git-branch
 git-cat-file
diff --git a/Makefile b/Makefile
index 389daf7..51ed4dd 100644
--- a/Makefile
+++ b/Makefile
@@ -242,7 +242,7 @@ LIB_FILE=libgit.a
 XDIFF_LIB=xdiff/lib.a

 LIB_H = \
-	blob.h cache.h commit.h csum-file.h delta.h \
+	archive.h blob.h cache.h commit.h csum-file.h delta.h \
 	diff.h object.h pack.h para-walk.h pkt-line.h quote.h refs.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h
@@ -267,6 +267,7 @@ LIB_OBJS = \
 BUILTIN_OBJS = \
 	builtin-add.o \
 	builtin-apply.o \
+	builtin-archive.o \
 	builtin-cat-file.o \
 	builtin-checkout-index.o \
 	builtin-check-ref-format.o \
diff --git a/archive.h b/archive.h
new file mode 100644
index 0000000..6c69953
--- /dev/null
+++ b/archive.h
@@ -0,0 +1,43 @@
+#ifndef ARCHIVE_H
+#define ARCHIVE_H
+
+typedef int (*write_archive_fn_t)(struct tree *tree,
+				  const unsigned char *commit_sha1,
+				  const char *prefix,
+				  time_t time,
+				  const char **pathspec);
+
+typedef int (*parse_extra_args_fn_t)(int argc,
+				     const char **argv,
+				     const char **reason);
+
+struct archiver_struct {
+	const char *name;
+	write_archive_fn_t write_archive;
+	parse_extra_args_fn_t parse_extra;
+	const char *remote;
+	const char *prefix;
+};
+
+extern struct archiver_struct archivers[];
+
+extern int parse_archive_args(int argc, const char **argv,
+			      struct archiver_struct **ar,
+			      const char **reason);
+
+extern int parse_treeish_arg(const char **argv,
+			     struct tree **tree,
+			     const unsigned char **commit_sha1,
+			     time_t *archive_time,
+			     const char *prefix,
+			     const char **reason);
+/*
+ *
+ */
+extern int write_tar_archive(struct tree *tree,
+			     const unsigned char *commit_sha1,
+			     const char *prefix,
+			     time_t time,
+			     const char **pathspec);
+
+#endif	/* ARCHIVE_H */
diff --git a/builtin-archive.c b/builtin-archive.c
new file mode 100644
index 0000000..9341813
--- /dev/null
+++ b/builtin-archive.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2006 Franck Bui-Huu
+ * Copyright (c) 2006 Rene Scharfe
+ */
+#include <time.h>
+#include "cache.h"
+#include "builtin.h"
+#include "archive.h"
+#include "commit.h"
+#include "tree-walk.h"
+#include "exec_cmd.h"
+#include "pkt-line.h"
+
+static const char archive_usage[] = \
+"git-archive --format=<fmt> [--prefix=<prefix>/] [-0|...|-9]
<tree-ish> [path...]";
+
+struct archiver_struct archivers[] = {
+	{ "tar", write_tar_archive },
+};
+
+static void concat_argv(int argc, const char **argv, char line[], int size)
+{
+	char *p;
+	int len, i;
+
+	p = line;
+	for (i = 1; i < argc; i++) {
+		/* server needn't these options */
+		if (!strncmp(argv[i], "--format=", 9) ||
+		    !strncmp(argv[i], "--remote=", 9))
+			continue;
+
+		len = strlen(argv[i]);
+		if (p + len + 1> line + size)
+			die("too many options");
+
+		strcpy(p, argv[i]);
+		p += len;
+		*p++ = ' ';
+	}
+	if (p > line)
+		p--;
+	*p = '\0';
+}
+
+static int run_remote_archiver(struct archiver_struct *ar, int argc,
+			       const char **argv)
+{
+	char *url, buf[1024];
+	pid_t pid;
+	int fd[2];
+	int len, rv;
+
+	sprintf(buf, "git-upload-%s", ar->name);
+
+	url = strdup(ar->remote);
+	pid = git_connect(fd, url, buf);
+	if (pid < 0)
+		return pid;
+
+	concat_argv(argc, argv, buf, sizeof(buf));
+
+	packet_write(fd[1], "arguments %s\n", buf);
+	packet_flush(fd[1]);
+
+	len = packet_read_line(fd[0], buf, sizeof(buf));
+	if (!len)
+		die("git-archive: expected ACK/NAK, got EOF");
+	if (buf[len-1] == '\n')
+		buf[--len] = 0;
+	if (strcmp(buf, "ACK")) {
+		if (len > 5 && !strncmp(buf, "NACK ", 5))
+			die("git-archive: NACK %s", buf + 5);
+		die("git-archive: protocol error");
+	}
+
+	len = packet_read_line(fd[0], buf, sizeof(buf));
+	if (len)
+		die("git-archive: expected a flush");
+
+	/* Now, start reading from fd[0] and spit it out to stdout */
+	rv = copy_fd(fd[0], 1);
+
+	close(fd[0]);
+	rv |= finish_connect(pid);
+
+	return !!rv;
+}
+
+static struct archiver_struct *get_archiver(const char *name)
+{
+	struct archiver_struct *ar = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(archivers); i++) {
+		if (!strcmp(name, archivers[i].name)) {
+			ar = &archivers[i];
+			break;
+		}
+	}
+	return ar;
+}
+
+int parse_treeish_arg(const char **argv, struct tree **tree,
+		      const unsigned char **commit_sha1,
+		      time_t *archive_time, const char *prefix,
+		      const char **reason)
+{
+	const char *name = argv[0];
+	struct commit *commit;
+	unsigned char sha1[20];
+	int rv = -1;
+
+	if (get_sha1(name, sha1)) {
+		*reason = "Not a valid object name";
+		goto out;
+	}
+
+	commit = lookup_commit_reference_gently(sha1, 1);
+	if (commit) {
+		*commit_sha1 = commit->object.sha1;
+		*archive_time = commit->date;
+	} else {
+		*archive_time = time(NULL);
+	}
+
+	*tree = parse_tree_indirect(sha1);
+	if (*tree == NULL) {
+		*reason = "not a tree object";
+		goto out;
+	}
+
+	if (prefix) {
+		unsigned char tree_sha1[20];
+		unsigned int mode;
+		int err;
+
+		err = get_tree_entry((*tree)->object.sha1, prefix,
+				     tree_sha1, &mode);
+		if (err || !S_ISDIR(mode)) {
+			*reason = "current working directory is untracked";
+			goto out;
+		}
+		free(*tree);
+		*tree = parse_tree_indirect(tree_sha1);
+	}
+	//free(*tree);
+	rv = 0;
+out:
+	return rv;
+}
+
+int parse_archive_args(int argc, const char **argv,
+		       struct archiver_struct **ar,
+		       const char **reason)
+{
+	const char *format = NULL;
+	const char *remote = NULL;
+	const char *prefix = NULL;
+	int list = 0;
+	int rv = -1, i;
+
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+
+		if (!strcmp(arg, "--list") || !strcmp(arg, "-l")) {
+			list = 1;
+			continue;
+		}
+		if (!strncmp(arg, "--format=", 9)) {
+			format = arg + 9;
+			continue;
+		}
+		if (!strncmp(arg, "--prefix=", 9)) {
+			prefix = arg + 9;
+			continue;
+		}
+		if (!strncmp(arg, "--remote=", 9)) {
+			remote = arg + 9;
+			continue;
+		}
+		if (arg[0] == '-' && isdigit(arg[1]) && arg[2] == '\0') {
+			zlib_compression_level = arg[1] - '0';
+			continue;
+		}
+		if (!strcmp(arg, "--")) {
+			i++;
+			break;
+		}
+		if (arg[0] == '-') {
+			*reason = archive_usage;
+			goto out;
+		}
+		break;
+	}
+	if (list) {
+		if (!remote) {
+			int i;
+
+			for (i = 0; i < ARRAY_SIZE(archivers); i++)
+				printf("%s\n", archivers[i].name);
+			exit(0);
+		}
+		*reason = "--list and --remote not supported together";
+		goto out;
+	}
+	if (argc - i < 1) {
+		*reason = archive_usage;
+		goto out;
+	}
+	if (!format){
+		*reason = "You must specify an archive format";
+		goto out;
+	}
+	*ar = get_archiver(format);
+	if (*ar == NULL) {
+		*reason = "Unknown archive format";
+		goto out;
+	}
+	if ((*ar)->parse_extra) {
+		if ((*ar)->parse_extra(argc, argv, reason) < 0)
+			goto out;
+	}
+	(*ar)->remote = remote;
+	(*ar)->prefix = prefix ? : "";
+	rv = i;
+out:
+	return rv;
+}
+
+int cmd_archive(int argc, const char **argv, const char *prefix)
+{
+	struct archiver_struct *ar;
+	const char *reason;
+	const char **pathspec;
+	struct tree *tree;
+	const unsigned char *commit_sha1;
+	time_t archive_time;
+	int rv;
+
+	rv = parse_archive_args(argc, argv, &ar, &reason);
+	if (rv < 0)
+		goto err;
+
+	if (ar->remote)
+		return run_remote_archiver(ar, argc, argv);
+
+	if (prefix == NULL)
+		prefix = setup_git_directory();
+
+	argv += rv;
+	if (parse_treeish_arg(argv, &tree, &commit_sha1,
+			      &archive_time, prefix, &reason) < 0)
+		goto err;
+
+	pathspec = get_pathspec(ar->prefix, argv + 1);
+
+	return ar->write_archive(tree, commit_sha1, ar->prefix,
+				 archive_time, pathspec);
+err:
+	return error("%s", reason);
+}
diff --git a/builtin-tar-tree.c b/builtin-tar-tree.c
index 61a4135..e0da01e 100644
--- a/builtin-tar-tree.c
+++ b/builtin-tar-tree.c
@@ -9,6 +9,7 @@ #include "strbuf.h"
 #include "tar.h"
 #include "builtin.h"
 #include "pkt-line.h"
+#include "archive.h"

 #define RECORDSIZE	(512)
 #define BLOCKSIZE	(RECORDSIZE * 20)
@@ -338,6 +339,71 @@ static int generate_tar(int argc, const
 	return 0;
 }

+static int write_tar_entry(const unsigned char *sha1,
+                           const char *base, int baselen,
+                           const char *filename, unsigned mode, int stage)
+{
+	static struct strbuf path;
+	int filenamelen = strlen(filename);
+	void *buffer;
+	char type[20];
+	unsigned long size;
+
+	if (!path.alloc) {
+		path.buf = xmalloc(PATH_MAX);
+		path.alloc = PATH_MAX;
+		path.len = path.eof = 0;
+	}
+	if (path.alloc < baselen + filenamelen) {
+		free(path.buf);
+		path.buf = xmalloc(baselen + filenamelen);
+		path.alloc = baselen + filenamelen;
+	}
+	memcpy(path.buf, base, baselen);
+	memcpy(path.buf + baselen, filename, filenamelen);
+	path.len = baselen + filenamelen;
+	if (S_ISDIR(mode)) {
+		strbuf_append_string(&path, "/");
+		buffer = NULL;
+		size = 0;
+	} else {
+		buffer = read_sha1_file(sha1, type, &size);
+		if (!buffer)
+			die("cannot read %s", sha1_to_hex(sha1));
+	}
+
+	write_entry(sha1, &path, mode, buffer, size);
+
+	return READ_TREE_RECURSIVE;
+}
+
+int write_tar_archive(struct tree *tree, const unsigned char *commit_sha1,
+                      const char *prefix, time_t time, const char **pathspec)
+{
+	int plen = strlen(prefix);
+
+	git_config(git_tar_config);
+
+	archive_time = time;
+
+	if (commit_sha1)
+		write_global_extended_header(commit_sha1);
+
+	if (prefix && plen > 0 && prefix[plen - 1] == '/') {
+		char *base = strdup(prefix);
+		int baselen = strlen(base);
+
+		while (baselen > 0 && base[baselen - 1] == '/')
+			base[--baselen] = '\0';
+		write_tar_entry(tree->object.sha1, "", 0, base, 040777, 0);
+		free(base);
+	}
+	read_tree_recursive(tree, prefix, plen, 0, pathspec, write_tar_entry);
+	write_trailer();
+
+	return 0;
+}
+
 static const char *exec = "git-upload-tar";

 static int remote_tar(int argc, const char **argv)
diff --git a/builtin.h b/builtin.h
index 8472c79..2391afb 100644
--- a/builtin.h
+++ b/builtin.h
@@ -15,6 +15,7 @@ extern int write_tree(unsigned char *sha

 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_apply(int argc, const char **argv, const char *prefix);
+extern int cmd_archive(int argc, const char **argv, const char *prefix);
 extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
 extern int cmd_checkout_index(int argc, const char **argv, const char *prefix);
 extern int cmd_check_ref_format(int argc, const char **argv, const
char *prefix);
diff --git a/generate-cmdlist.sh b/generate-cmdlist.sh
index ec1eda2..5450918 100755
--- a/generate-cmdlist.sh
+++ b/generate-cmdlist.sh
@@ -12,6 +12,7 @@ struct cmdname_help common_cmds[] = {"
 sort <<\EOF |
 add
 apply
+archive
 bisect
 branch
 checkout
diff --git a/git.c b/git.c
index 82c8fee..c62c5cf 100644
--- a/git.c
+++ b/git.c
@@ -218,6 +218,7 @@ static void handle_internal_command(int
 	} commands[] = {
 		{ "add", cmd_add, RUN_SETUP },
 		{ "apply", cmd_apply },
+		{ "archive", cmd_archive },
 		{ "cat-file", cmd_cat_file, RUN_SETUP },
 		{ "checkout-index", cmd_checkout_index, RUN_SETUP },
 		{ "check-ref-format", cmd_check_ref_format },
-- 
1.4.2.gbba4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-05 12:16 [PATCH 1/2] Add git-archive Franck Bui-Huu
@ 2006-09-05 19:23 ` Junio C Hamano
  2006-09-06 13:46   ` Franck Bui-Huu
  2006-09-06 20:17 ` [PATCH 1/2] Add git-archive Rene Scharfe
  1 sibling, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-05 19:23 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: git

"Franck Bui-Huu" <vagabon.xyz@gmail.com> writes:

> git-archive is a command to make TAR and ZIP archives of a git tree.
> It helps prevent a proliferation of git-{format}-tree commands.

Thanks.  I like the overall structure, at least mostly.
Also dropping -tree suffix from the command name is nice, short
and sweet.

Obviously I cannot apply this patch because it is totally
whitespace damaged, but here are some comments.

> diff --git a/archive.h b/archive.h
> new file mode 100644
> index 0000000..6c69953
> --- /dev/null
> +++ b/archive.h
> @@ -0,0 +1,43 @@
> +#ifndef ARCHIVE_H
> +#define ARCHIVE_H
> +
> +typedef int (*write_archive_fn_t)(struct tree *tree,
> +				  const unsigned char *commit_sha1,
> +				  const char *prefix,
> +				  time_t time,
> +				  const char **pathspec);

The type of the first argument might have to be different,
depending on performance analysis by Rene on struct tree vs
struct tree_desc.

> +typedef int (*parse_extra_args_fn_t)(int argc,
> +				     const char **argv,
> +				     const char **reason);
> +

I do not see a way for parse_extra to record the parameter it
successfully parsed, other than in a source-file-global, static
variable.  Not a very nice design for a library, if we are
building one from scratch.

Also, you are passing "reason" around from everywhere, but that
is used by the caller to pass it to error(), so it might be
simpler to just call error() when you want to assign to *reason,
and make an error return.  The caller does not have to do
anything if you do that.  Your way might interact with the
remote protocol better, though -- I haven't thought this part
through yet so do not take this as a serious objection, but just
a comment.

> +struct archiver_struct {
> +	const char *name;
> +	write_archive_fn_t write_archive;
> +	parse_extra_args_fn_t parse_extra;
> +	const char *remote;
> +	const char *prefix;
> +};

Somehow "struct foo_struct" makes me feel uneasy, when I do not
see the reason to call it "struct foo".

Also, the first three fields are permanent property of the
archiver while two are to wrap runtime arguments of one
particular invocation.  I would have liked...

> +extern struct archiver_struct archivers[];

... this array to have only the former, and a separate structure
"struct archive_args" to be defined.

	struct archive_args {
        	const char *remote;
                const char *prefix;
	};

After parse_archive_args finds the archiver specified with
--format=*, it can call its parse_extra to retrieve a suitable
struct that has struct archive_args embedded at the beginning,
and then set remote and prefix on the returned structure.

Then a specific parse_extra implementation can be written like this:

	static struct tar_archive_args {
        	struct archive_args a;
                int z_compress;
                ...
	};

	struct archive_args *
        tar_archive_parse_extra(int ac, const char **av)
	{
        	struct tar_archive_args *args = xcalloc(1, sizeof(*args));

		while (ac--) {
			const char *arg = *++av;
                	if (arg[0] == '-' &&
                            '0' <= arg[1] && arg[1] <= '9')
				args->z_compress = arg[1] - '0';
			...
		}
		return (struct archive_args *)args;
	}

and this can be passed to tar_archive_write_archive as an
argument.

> +extern int parse_treeish_arg(const char **argv,
> +			     struct tree **tree,
> +			     const unsigned char **commit_sha1,
> +			     time_t *archive_time,
> +			     const char *prefix,
> +			     const char **reason);
> +extern int write_tar_archive(struct tree *tree,
> +			     const unsigned char *commit_sha1,
> +			     const char *prefix,
> +			     time_t time,
> +			     const char **pathspec);

I suspect we would want "struct tree_desc" based interface,
instead of "struct tree".

> +static const char archive_usage[] = \
> +"git-archive --format=<fmt> [--prefix=<prefix>/] [-0|...|-9]
> <tree-ish> [path...]";

I do not think "-[0-9]" belongs to generic "git-archive".  It
does not make much sense to run compress on zip output.  More
like:

git-archive --format=<fmt> [--prefix=<prefix>] [format specific options] <tree-ish> [path...]

It has one potential advantage, though -- git-daemon _could_
look at it and notice that the client asks for too expensive
compression level.  But I do not think it is the only way to
achive that to make "-[0-9]" a generic option.

> +static int run_remote_archiver(struct archiver_struct *ar, int argc,
> +			       const char **argv)
> +{
> +	char *url, buf[1024];
> +	pid_t pid;
> +	int fd[2];
> +	int len, rv;
> +
> +	sprintf(buf, "git-upload-%s", ar->name);

Are you calling git-upload-{tar,zip,rar,...} here?

> +	url = strdup(ar->remote);
> +	pid = git_connect(fd, url, buf);
> +	if (pid < 0)
> +		return pid;
> +
> +	concat_argv(argc, argv, buf, sizeof(buf));
> +	packet_write(fd[1], "arguments %s\n", buf);
> +	packet_flush(fd[1]);

Parameter concatenation with SP is a bad idea for two reasons.
You cannot have SP in argument.  Also packet_write() may not
like the length of the arguments.

A sequence of one argument per packet, with prefix "argument "
for future extension so that we can send other stuff if/when
needed, followed by a flush would be preferred.

> +	/* Now, start reading from fd[0] and spit it out to stdout */
> +	rv = copy_fd(fd[0], 1);
> +	close(fd[0]);
> +	rv |= finish_connect(pid);

It was painful to bolt progress indicator support onto original
upload-pack protocol, while making sure that older and newer
clients and servers interoperate with each other.  Since this is
a new protocol, we should start with the side-band support from
the beginning (see upload-pack and look for use_sideband).

Instead of sending the payload straight out, upload-archive side
would read from the underlying archiver, and send it with
one-byte prefix to say if it is a normal payload (band 1),
message to stderr used to show progress indicator and error
messages (band 2), or error exit situation (band 3).  The client
side here would receive the packetized data and do the reverse.

> +int parse_treeish_arg(const char **argv, struct tree **tree,
> +		      const unsigned char **commit_sha1,
> +		      time_t *archive_time, const char *prefix,
> +		      const char **reason)
> +{
>...
> +	if (prefix) {
> +		unsigned char tree_sha1[20];
> +		unsigned int mode;
> +		int err;
> +
> +		err = get_tree_entry((*tree)->object.sha1, prefix,
> +				     tree_sha1, &mode);
> +		if (err || !S_ISDIR(mode)) {
> +			*reason = "current working directory is untracked";
> +			goto out;
> +		}
> +		free(*tree);
> +		*tree = parse_tree_indirect(tree_sha1);
> +	}

I like the simplicity of just optionally sending one subtree (or
the whole thing), but I think this part would be made more
efficient if we go with "struct tree_desc" based interface.

Also I wonder how this interacts with the pathspec you take from
the command line.  Personally I think this single subtree
support is good enough and limiting with pathspec is not needed.

> +int parse_archive_args(int argc, const char **argv,
> +		       struct archiver_struct **ar,
> +		       const char **reason)
> +{
>...
> +		if (arg[0] == '-' && isdigit(arg[1]) && arg[2] == '\0') {
> +			zlib_compression_level = arg[1] - '0';
> +			continue;
> +		}

Commented on this part already.

> +	if (list) {
> +		if (!remote) {
> +			int i;
> +
> +			for (i = 0; i < ARRAY_SIZE(archivers); i++)
> +				printf("%s\n", archivers[i].name);
> +			exit(0);
> +		}

You do not need a different "i" that shadows the outer one here.

> +	(*ar)->remote = remote;
> +	(*ar)->prefix = prefix ? : "";

Please be nicer to other people by staying away from GNU
extension "A ? : B", especially when A is so simple.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-05 19:23 ` Junio C Hamano
@ 2006-09-06 13:46   ` Franck Bui-Huu
  2006-09-06 20:14     ` Rene Scharfe
  2006-09-06 21:42     ` Junio C Hamano
  0 siblings, 2 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-06 13:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck Bui-Huu, git, rene.scharfe

Junio C Hamano wrote:
> "Franck Bui-Huu" <vagabon.xyz@gmail.com> writes:
> 
>> git-archive is a command to make TAR and ZIP archives of a git tree.
>> It helps prevent a proliferation of git-{format}-tree commands.
> 
> Thanks.  I like the overall structure, at least mostly.
> Also dropping -tree suffix from the command name is nice, short
> and sweet.
> 

great !

> Obviously I cannot apply this patch because it is totally
> whitespace damaged, but here are some comments.

(sigh), sorry for that.

> 
>> diff --git a/archive.h b/archive.h
>> new file mode 100644
>> index 0000000..6c69953
>> --- /dev/null
>> +++ b/archive.h
>> @@ -0,0 +1,43 @@
>> +#ifndef ARCHIVE_H
>> +#define ARCHIVE_H
>> +
>> +typedef int (*write_archive_fn_t)(struct tree *tree,
>> +				  const unsigned char *commit_sha1,
>> +				  const char *prefix,
>> +				  time_t time,
>> +				  const char **pathspec);
> 
> The type of the first argument might have to be different,
> depending on performance analysis by Rene on struct tree vs
> struct tree_desc.
> 

OK. We'll wait for Rene.

>> +typedef int (*parse_extra_args_fn_t)(int argc,
>> +				     const char **argv,
>> +				     const char **reason);
>> +
> 
> I do not see a way for parse_extra to record the parameter it
> successfully parsed, other than in a source-file-global, static
> variable.  Not a very nice design for a library, if we are
> building one from scratch.
> 

Interesting, could you explain why static variables are not nice ?

> Also, you are passing "reason" around from everywhere, but that
> is used by the caller to pass it to error(), so it might be
> simpler to just call error() when you want to assign to *reason,
> and make an error return.  The caller does not have to do
> anything if you do that.  Your way might interact with the
> remote protocol better, though -- I haven't thought this part
> through yet so do not take this as a serious objection, but just
> a comment.
> 

You might have missed my second patch:

		"[PATCH 2/2] Add git-upload-archive"

Basically the server can also use 'reason' to report a failure
description during NACK. I find it more useful than the simple
"server sent EOF" error message.

>> +struct archiver_struct {
>> +	const char *name;
>> +	write_archive_fn_t write_archive;
>> +	parse_extra_args_fn_t parse_extra;
>> +	const char *remote;
>> +	const char *prefix;
>> +};
> 
> Somehow "struct foo_struct" makes me feel uneasy, when I do not
> see the reason to call it "struct foo".
> 

no strong feeling here. I'll call it "struct archiver". BTW there
are a couple of "struct foo_struct" in git source...

> Also, the first three fields are permanent property of the
> archiver while two are to wrap runtime arguments of one
> particular invocation.  I would have liked...
> 

'remote' case is not a generic argument that can be passed to
archiver backends. Remember, the archiver backends only do local
operation. They do not know about remote protocol which is part
of git-archive command. That's the reason why I think we shouldn't
make this field part of arguments structure. It completely change
the behaviour of git-archive when it is used.

>> +extern struct archiver_struct archivers[];
> 
> ... this array to have only the former, and a separate structure
> "struct archive_args" to be defined.
> 
> 	struct archive_args {
>         	const char *remote;
>                 const char *prefix;
> 	};
> 
> After parse_archive_args finds the archiver specified with
> --format=*, it can call its parse_extra to retrieve a suitable
> struct that has struct archive_args embedded at the beginning,
> and then set remote and prefix on the returned structure.
> 

One bad side is that we need to malloc this embedded structure.
Therefore we have to free this embedded structure somewhere. 

We could have the following structures in archive.h, but we need
to export all these archiver backend definitions.

	struct tar_archive_args {
		int z_compress;
	};

	struct tar_archive_args {
		[...]
	};

	struct archive_args {
		const char		*prefix;
		struct tree		*tree;
		const unsigned char	*commit_sha1;
		const char		*prefix;
		time_t			time;
		const char		**pathspec;
		union {
			struct tar_archive_args tar_args;
			struct zip_archive_args zip_args;
		} u;
	};

	struct archiver {
		const char *name;
		write_archive_fn_t write_archive;
		parse_extra_args_fn_t parse_extra;
		const char *remote;
	};

	typedef int (*write_archive_fn_t)(struct archive_args *archive_args);

> Then a specific parse_extra implementation can be written like this:
> 
> 	static struct tar_archive_args {
>         	struct archive_args a;
>                 int z_compress;
>                 ...
> 	};
> 
> 	struct archive_args *
>         tar_archive_parse_extra(int ac, const char **av)
> 	{
>         	struct tar_archive_args *args = xcalloc(1, sizeof(*args));
> 
> 		while (ac--) {
> 			const char *arg = *++av;
>                 	if (arg[0] == '-' &&
>                             '0' <= arg[1] && arg[1] <= '9')
> 				args->z_compress = arg[1] - '0';
> 			...
> 		}
> 		return (struct archive_args *)args;
> 	}
> 
> and this can be passed to tar_archive_write_archive as an
> argument.
> 
>> +extern int parse_treeish_arg(const char **argv,
>> +			     struct tree **tree,
>> +			     const unsigned char **commit_sha1,
>> +			     time_t *archive_time,
>> +			     const char *prefix,
>> +			     const char **reason);
>> +extern int write_tar_archive(struct tree *tree,
>> +			     const unsigned char *commit_sha1,
>> +			     const char *prefix,
>> +			     time_t time,
>> +			     const char **pathspec);
> 
> I suspect we would want "struct tree_desc" based interface,
> instead of "struct tree".
> 
>> +static const char archive_usage[] = \
>> +"git-archive --format=<fmt> [--prefix=<prefix>/] [-0|...|-9]
>> <tree-ish> [path...]";
> 
> I do not think "-[0-9]" belongs to generic "git-archive".  It
> does not make much sense to run compress on zip output.  More
> like:
> 

I forgot to remove that.

> git-archive --format=<fmt> [--prefix=<prefix>] [format specific options] <tree-ish> [path...]
> 

I forgot to change that.

> 
>> +static int run_remote_archiver(struct archiver_struct *ar, int argc,
>> +			       const char **argv)
>> +{
>> +	char *url, buf[1024];
>> +	pid_t pid;
>> +	int fd[2];
>> +	int len, rv;
>> +
>> +	sprintf(buf, "git-upload-%s", ar->name);
> 
> Are you calling git-upload-{tar,zip,rar,...} here?
> 

yes. Actually git-upload-{tar,zip,...} commands are going to be
removed, but git-daemon know them as a daemon service. It will
map these services to the generic "git-upload-archive" command.
One benefit is that we could still disable TAR format and enable
TGZ one. Please take a look to the second patch that adds
git-upload-archive command.

>> +	url = strdup(ar->remote);
>> +	pid = git_connect(fd, url, buf);
>> +	if (pid < 0)
>> +		return pid;
>> +
>> +	concat_argv(argc, argv, buf, sizeof(buf));
>> +	packet_write(fd[1], "arguments %s\n", buf);
>> +	packet_flush(fd[1]);
> 
> Parameter concatenation with SP is a bad idea for two reasons.
> You cannot have SP in argument.  Also packet_write() may not
> like the length of the arguments.
> 
> A sequence of one argument per packet, with prefix "argument "
> for future extension so that we can send other stuff if/when
> needed, followed by a flush would be preferred.

Absolutely.

> 
>> +	/* Now, start reading from fd[0] and spit it out to stdout */
>> +	rv = copy_fd(fd[0], 1);
>> +	close(fd[0]);
>> +	rv |= finish_connect(pid);
> 
> It was painful to bolt progress indicator support onto original
> upload-pack protocol, while making sure that older and newer
> clients and servers interoperate with each other.  Since this is
> a new protocol, we should start with the side-band support from
> the beginning (see upload-pack and look for use_sideband).
> 
> Instead of sending the payload straight out, upload-archive side
> would read from the underlying archiver, and send it with
> one-byte prefix to say if it is a normal payload (band 1),
> message to stderr used to show progress indicator and error
> messages (band 2), or error exit situation (band 3).  The client
> side here would receive the packetized data and do the reverse.
> 

OK, I'll take a look

>> +int parse_treeish_arg(const char **argv, struct tree **tree,
>> +		      const unsigned char **commit_sha1,
>> +		      time_t *archive_time, const char *prefix,
>> +		      const char **reason)
>> +{
>> ...
>> +	if (prefix) {
>> +		unsigned char tree_sha1[20];
>> +		unsigned int mode;
>> +		int err;
>> +
>> +		err = get_tree_entry((*tree)->object.sha1, prefix,
>> +				     tree_sha1, &mode);
>> +		if (err || !S_ISDIR(mode)) {
>> +			*reason = "current working directory is untracked";
>> +			goto out;
>> +		}
>> +		free(*tree);
>> +		*tree = parse_tree_indirect(tree_sha1);
>> +	}
> 
> I like the simplicity of just optionally sending one subtree (or
> the whole thing), but I think this part would be made more
> efficient if we go with "struct tree_desc" based interface.
> 
> Also I wonder how this interacts with the pathspec you take from
> the command line.  Personally I think this single subtree
> support is good enough and limiting with pathspec is not needed.
> 

As I said in a previous email, I'm new with git internals. I prefer
let that part to others who better have a better knowledge on the
subject. I'll dig into that later though...

>> +int parse_archive_args(int argc, const char **argv,
>> +		       struct archiver_struct **ar,
>> +		       const char **reason)
>> +{
>> ...
>> +		if (arg[0] == '-' && isdigit(arg[1]) && arg[2] == '\0') {
>> +			zlib_compression_level = arg[1] - '0';
>> +			continue;
>> +		}
> 
> Commented on this part already.
> 

OK

>> +	if (list) {
>> +		if (!remote) {
>> +			int i;
>> +
>> +			for (i = 0; i < ARRAY_SIZE(archivers); i++)
>> +				printf("%s\n", archivers[i].name);
>> +			exit(0);
>> +		}
> 
> You do not need a different "i" that shadows the outer one here.

Yes.

> 
>> +	(*ar)->remote = remote;
>> +	(*ar)->prefix = prefix ? : "";
> 
> Please be nicer to other people by staying away from GNU
> extension "A ? : B", especially when A is so simple.
> 
sure.

Thanks for your comments,

		Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-06 13:46   ` Franck Bui-Huu
@ 2006-09-06 20:14     ` Rene Scharfe
  2006-09-06 20:29       ` Jakub Narebski
  2006-09-06 21:42     ` Junio C Hamano
  1 sibling, 1 reply; 40+ messages in thread
From: Rene Scharfe @ 2006-09-06 20:14 UTC (permalink / raw)
  To: Franck; +Cc: Junio C Hamano, git

Franck Bui-Huu schrieb:
> Junio C Hamano wrote:
>> "Franck Bui-Huu" <vagabon.xyz@gmail.com> writes:
>>
>>> git-archive is a command to make TAR and ZIP archives of a git tree.
>>> It helps prevent a proliferation of git-{format}-tree commands.
>> Thanks.  I like the overall structure, at least mostly.
>> Also dropping -tree suffix from the command name is nice, short
>> and sweet.
>>
> 
> great !
> 
>> Obviously I cannot apply this patch because it is totally
>> whitespace damaged, but here are some comments.
> 
> (sigh), sorry for that.
> 
>>> diff --git a/archive.h b/archive.h
>>> new file mode 100644
>>> index 0000000..6c69953
>>> --- /dev/null
>>> +++ b/archive.h
>>> @@ -0,0 +1,43 @@
>>> +#ifndef ARCHIVE_H
>>> +#define ARCHIVE_H
>>> +
>>> +typedef int (*write_archive_fn_t)(struct tree *tree,
>>> +				  const unsigned char *commit_sha1,
>>> +				  const char *prefix,
>>> +				  time_t time,
>>> +				  const char **pathspec);
>> The type of the first argument might have to be different,
>> depending on performance analysis by Rene on struct tree vs
>> struct tree_desc.
>>
> 
> OK. We'll wait for Rene.

The performance difference I noticed was caused by a memleak; the speed
advantage of a struct tree_desc based traverser is significant if you
look only at the traversers' performance, but it is lost in the noise
of the "real" work that the payload function is doing (see my other
mail).

>>> +static int run_remote_archiver(struct archiver_struct *ar, int argc,
>>> +			       const char **argv)
>>> +{
>>> +	char *url, buf[1024];
>>> +	pid_t pid;
>>> +	int fd[2];
>>> +	int len, rv;
>>> +
>>> +	sprintf(buf, "git-upload-%s", ar->name);
>> Are you calling git-upload-{tar,zip,rar,...} here?
>>
> 
> yes. Actually git-upload-{tar,zip,...} commands are going to be
> removed, but git-daemon know them as a daemon service. It will
> map these services to the generic "git-upload-archive" command.
> One benefit is that we could still disable TAR format and enable
> TGZ one. Please take a look to the second patch that adds
> git-upload-archive command.

I don't think git-daemon should need to care about specific
archivers.  Policy decisions, like disallowing certain archive types
or compression levels, should be made in git-upload-archive.  This
way all code regarding archive uploading is found in one place:
git-upload-archive.  We can keep git-upload-tar as a legacy
interface, but please use only git-upload-archive for the new stuff
(and not git-upload-zip etc.).

>>> +int parse_treeish_arg(const char **argv, struct tree **tree,
>>> +		      const unsigned char **commit_sha1,
>>> +		      time_t *archive_time, const char *prefix,
>>> +		      const char **reason)
>>> +{
>>> ...
>>> +	if (prefix) {
>>> +		unsigned char tree_sha1[20];
>>> +		unsigned int mode;
>>> +		int err;
>>> +
>>> +		err = get_tree_entry((*tree)->object.sha1, prefix,
>>> +				     tree_sha1, &mode);
>>> +		if (err || !S_ISDIR(mode)) {
>>> +			*reason = "current working directory is untracked";
>>> +			goto out;
>>> +		}
>>> +		free(*tree);
>>> +		*tree = parse_tree_indirect(tree_sha1);
>>> +	}
>> I like the simplicity of just optionally sending one subtree (or
>> the whole thing), but I think this part would be made more
>> efficient if we go with "struct tree_desc" based interface.
>>
>> Also I wonder how this interacts with the pathspec you take from
>> the command line.  Personally I think this single subtree
>> support is good enough and limiting with pathspec is not needed.

[Note: There's potential for confusion here because we have two
types of prefixes.  One is the present working directory inside the
git archive, the other is the one specified with --prefix=.  Here
we have the working directory kind of prefix.]

IMHO should work like in the following example, and the code above
cuts off the Documentation part:

   $ cd Documentation
   $ git-archive --format=tar --prefix=v1.0/ HEAD howto | tar tf -
   v1.0/howto/
   v1.0/howto/isolate-bugs-with-bisect.txt
   ...

I agree that simple subtree matching would be enough, at least for
now.

René

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-05 12:16 [PATCH 1/2] Add git-archive Franck Bui-Huu
  2006-09-05 19:23 ` Junio C Hamano
@ 2006-09-06 20:17 ` Rene Scharfe
  1 sibling, 0 replies; 40+ messages in thread
From: Rene Scharfe @ 2006-09-06 20:17 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Junio C Hamano, git

Franck Bui-Huu schrieb:
> diff --git a/builtin-tar-tree.c b/builtin-tar-tree.c
> index 61a4135..e0da01e 100644
> --- a/builtin-tar-tree.c
> +++ b/builtin-tar-tree.c
> @@ -9,6 +9,7 @@ #include "strbuf.h"
> #include "tar.h"
> #include "builtin.h"
> #include "pkt-line.h"
> +#include "archive.h"
> 
> #define RECORDSIZE    (512)
> #define BLOCKSIZE    (RECORDSIZE * 20)
> @@ -338,6 +339,71 @@ static int generate_tar(int argc, const
>     return 0;
> }
> 
> +static int write_tar_entry(const unsigned char *sha1,
> +                           const char *base, int baselen,
> +                           const char *filename, unsigned mode, int stage)
> +{
> +    static struct strbuf path;
> +    int filenamelen = strlen(filename);
> +    void *buffer;
> +    char type[20];
> +    unsigned long size;
> +
> +    if (!path.alloc) {
> +        path.buf = xmalloc(PATH_MAX);
> +        path.alloc = PATH_MAX;
> +        path.len = path.eof = 0;
> +    }
> +    if (path.alloc < baselen + filenamelen) {
> +        free(path.buf);
> +        path.buf = xmalloc(baselen + filenamelen);
> +        path.alloc = baselen + filenamelen;
> +    }
> +    memcpy(path.buf, base, baselen);
> +    memcpy(path.buf + baselen, filename, filenamelen);
> +    path.len = baselen + filenamelen;
> +    if (S_ISDIR(mode)) {
> +        strbuf_append_string(&path, "/");
> +        buffer = NULL;
> +        size = 0;
> +    } else {
> +        buffer = read_sha1_file(sha1, type, &size);
> +        if (!buffer)
> +            die("cannot read %s", sha1_to_hex(sha1));
> +    }
> +
> +    write_entry(sha1, &path, mode, buffer, size);

Here occurs the memory leak that I've been talking about.  buffer needs
to be free'd.

> +
> +    return READ_TREE_RECURSIVE;
> +}

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-06 20:14     ` Rene Scharfe
@ 2006-09-06 20:29       ` Jakub Narebski
  2006-09-08 20:21         ` Rene Scharfe
  0 siblings, 1 reply; 40+ messages in thread
From: Jakub Narebski @ 2006-09-06 20:29 UTC (permalink / raw)
  To: git

Rene Scharfe wrote:

> IMHO should work like in the following example, and the code above
> cuts off the Documentation part:
> 
>    $ cd Documentation
>    $ git-archive --format=tar --prefix=v1.0/ HEAD howto | tar tf -
>    v1.0/howto/
>    v1.0/howto/isolate-bugs-with-bisect.txt
>    ...
> 
> I agree that simple subtree matching would be enough, at least for
> now.

What about

     $ git-archive --format=tar --prefix=v1.0/ HEAD:Documentation/howto

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-06 13:46   ` Franck Bui-Huu
  2006-09-06 20:14     ` Rene Scharfe
@ 2006-09-06 21:42     ` Junio C Hamano
  2006-09-07  6:32       ` Franck Bui-Huu
  1 sibling, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-06 21:42 UTC (permalink / raw)
  To: Franck; +Cc: git

Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

>>> +typedef int (*parse_extra_args_fn_t)(int argc,
>>> +				     const char **argv,
>>> +				     const char **reason);
>>> +
>> 
>> I do not see a way for parse_extra to record the parameter it
>> successfully parsed, other than in a source-file-global, static
>> variable.  Not a very nice design for a library, if we are
>> building one from scratch.
>
> Interesting, could you explain why static variables are not nice ?

Mostly taste and a little bit of re-entrancy worries.

> You might have missed my second patch:
>
> 		"[PATCH 2/2] Add git-upload-archive"
>
> Basically the server can also use 'reason' to report a failure
> description during NACK. I find it more useful than the simple
> "server sent EOF" error message.

That's a good intention, but we would also need to convey the
"server side found problem and died with these error() output"
anyway, so it would be covered either way (see how error()/die()
messages from git-upload-pack are given to git-fetch-pack over
the wire).

> 'remote' case is not a generic argument that can be passed to
> archiver backends. Remember, the archiver backends only do local
> operation. They do not know about remote protocol which is part
> of git-archive command. That's the reason why I think we shouldn't
> make this field part of arguments structure.

Ok.  Passing that as a separate paramter would make sense.

>> After parse_archive_args finds the archiver specified with
>> --format=*, it can call its parse_extra to retrieve a suitable
>> struct that has struct archive_args embedded at the beginning,
>> and then set remote and prefix on the returned structure.
>
> One bad side is that we need to malloc this embedded structure.

Not at all, if you read the example I did you would notice that
I changed parse_extra for each backend to return this structure
allocated for that particular backend.

>>> +static int run_remote_archiver(struct archiver_struct *ar, int argc,
>>> +			       const char **argv)
>>> +{
>>> +	char *url, buf[1024];
>>> +	pid_t pid;
>>> +	int fd[2];
>>> +	int len, rv;
>>> +
>>> +	sprintf(buf, "git-upload-%s", ar->name);
>> 
>> Are you calling git-upload-{tar,zip,rar,...} here?
>
> yes. Actually git-upload-{tar,zip,...} commands are going to be
> removed, but git-daemon know them as a daemon service.

That would break "git-archive --remove=ssh://site/repo treeish"
wouldn't it?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-06 21:42     ` Junio C Hamano
@ 2006-09-07  6:32       ` Franck Bui-Huu
  2006-09-07  7:19         ` Junio C Hamano
  0 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07  6:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

2006/9/6, Junio C Hamano <junkio@cox.net>:
> Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
>
> > Interesting, could you explain why static variables are not nice ?
>
> Mostly taste and a little bit of re-entrancy worries.
>

OK.

> > You might have missed my second patch:
> >
> >               "[PATCH 2/2] Add git-upload-archive"
> >
> > Basically the server can also use 'reason' to report a failure
> > description during NACK. I find it more useful than the simple
> > "server sent EOF" error message.
>
> That's a good intention, but we would also need to convey the
> "server side found problem and died with these error() output"
> anyway,

OK.

>
> > One bad side is that we need to malloc this embedded structure.
>
> Not at all, if you read the example I did you would notice that
> I changed parse_extra for each backend to return this structure
> allocated for that particular backend.
>

sorry I wasn't clear. My point was that the structure need to be
'mallocated'. Which funtion allocate it doesn't matter, we will need
to free it later. That's what I tried to avoid with the alternative I
sent you in my previous email. Do you think we could use it ?

> >>> +static int run_remote_archiver(struct archiver_struct *ar, int argc,
> >>> +                          const char **argv)
> >>> +{
> >>> +   char *url, buf[1024];
> >>> +   pid_t pid;
> >>> +   int fd[2];
> >>> +   int len, rv;
> >>> +
> >>> +   sprintf(buf, "git-upload-%s", ar->name);
> >>
> >> Are you calling git-upload-{tar,zip,rar,...} here?
> >
> > yes. Actually git-upload-{tar,zip,...} commands are going to be
> > removed, but git-daemon know them as a daemon service.
>
> That would break "git-archive --remove=ssh://site/repo treeish"
> wouldn't it?
>

Yes. But couldn't we make some alias like:

alias git-upload-tar='git-upload-archive --format=tar'
alias git-upload-zip='git-upload-zip --format=zip'

and the same could be done if we plan to remote git-tar-tree command:

alias git-tar-tree='git-archive --format=tar'

These alias would be internal to git (always defined)

-- 
               Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-07  6:32       ` Franck Bui-Huu
@ 2006-09-07  7:19         ` Junio C Hamano
  2006-09-07  7:53           ` Franck Bui-Huu
  0 siblings, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-07  7:19 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Junio C Hamano, git

"Franck Bui-Huu" <vagabon.xyz@gmail.com> writes:

> sorry I wasn't clear. My point was that the structure need to be
> 'mallocated'. Which funtion allocate it doesn't matter, we will need
> to free it later. That's what I tried to avoid with the alternative I
> sent you in my previous email. Do you think we could use it ?

I do not think allocation and free matter much, but if you want
to do it that way, enumerating all the possible struct in one
place is fine by me for this application.  After all we are not
defining a plug-in architecture that lets others to write their
archive backends and load them without recompiling git-archive
binary.

>> >>> +static int run_remote_archiver(struct archiver_struct *ar, int argc,
>> >>> +                          const char **argv)
>> >>> +{
>> >>> +   char *url, buf[1024];
>> >>> +   pid_t pid;
>> >>> +   int fd[2];
>> >>> +   int len, rv;
>> >>> +
>> >>> +   sprintf(buf, "git-upload-%s", ar->name);
>> >>
>> >> Are you calling git-upload-{tar,zip,rar,...} here?
>> >
>> > yes. Actually git-upload-{tar,zip,...} commands are going to be
>> > removed, but git-daemon know them as a daemon service.
>>
>> That would break "git-archive --remove=ssh://site/repo treeish"
>> wouldn't it?
>
> Yes. But couldn't we make some alias like:
>...
> These alias would be internal to git (always defined)

You _could_ work things around by building special cases into
the system, but I would rather avoid doing that unless
necessary.

Is there a reason that "git-upload-archive --format=tar" is not
desirable at this point of the code?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-07  7:19         ` Junio C Hamano
@ 2006-09-07  7:53           ` Franck Bui-Huu
  2006-09-07  8:16             ` Junio C Hamano
  0 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07  7:53 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck Bui-Huu, git, Rene Scharfe

Junio C Hamano wrote:
> "Franck Bui-Huu" <vagabon.xyz@gmail.com> writes:
>> These alias would be internal to git (always defined)
> 
> You _could_ work things around by building special cases into
> the system, but I would rather avoid doing that unless
> necessary.
> 

yes that was not a great idea...

> Is there a reason that "git-upload-archive --format=tar" is not
> desirable at this point of the code?
> 

My first intention was to enable/disable specific archive format
through daemon service. But we could in an other way: send a 
"git-upload-archive --format=tar" request then in upload-archive
check that git-upload-tar service is enabled. This service would
exist even if git-upload-tar is not a valid command.

But Rene thinks that part should be in git-upload-archive. I dunno
what is the best direction. I have used git-daemon service because
the service infrastucture already allow us to achieve that.

I'm sorry to not make things faster. I'm new to git internals and
unfortunately I'm busy to do some other (crap) things. I'll send
a new patchset this morning and I'll sum up what's done and what
we still need to do.

Thanks
		Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-07  7:53           ` Franck Bui-Huu
@ 2006-09-07  8:16             ` Junio C Hamano
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
  0 siblings, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-07  8:16 UTC (permalink / raw)
  To: Franck; +Cc: git

Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

> My first intention was to enable/disable specific archive format
> through daemon service. But we could in an other way: send a 
> "git-upload-archive --format=tar" request then in upload-archive
> check that git-upload-tar service is enabled. This service would
> exist even if git-upload-tar is not a valid command.
>
> But Rene thinks that part should be in git-upload-archive. I dunno
> what is the best direction. I have used git-daemon service because
> the service infrastucture already allow us to achieve that.

Hmph.  daemon_service was a nice idea but the current
implementation falls short by not giving finer control such as
"this service is Ok with such and such option but not this
option".  Here we ideally would want to say something like
"git-archive is fine with upload-tar but not upload-zip" or "Ok
as long as upload-tar's numeric arg is less than 6".  Needs some
thought _if_ we plan to add tons of services to the daemon.

> I'm sorry to not make things faster. I'm new to git internals and
> unfortunately I'm busy to do some other (crap) things. I'll send
> a new patchset this morning and I'll sum up what's done and what
> we still need to do.

Taking things slowly is just fine.  Otherwise _I_ would burn out
;-).

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Add git-archive [take #2]
  2006-09-07  8:16             ` Junio C Hamano
@ 2006-09-07 13:08               ` Franck Bui-Huu
  2006-09-07 13:12                 ` [PATCH 1/4] Add git-archive Franck Bui-Huu
                                   ` (5 more replies)
  0 siblings, 6 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07 13:08 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck, git, Rene Scharfe, Jakub Narebski

I'm sending a new version of the patchset which allows 'git-archive'
and 'git-upload-archive' command. I tried to take into account all
feedbacks made by Junio and Rene, but there are still some open points.

  1/ Allow 'git-upload-archive' command to enable/disable some
     formats. This should be done by 'git-upload-archive'.

  2/ Can I remove 'git-upload-tar' command ? If so, the current
     implementation of 'git-upload-archive' won't work with the old
     'git-tar-tree --remote' command because the protocol needs a
     "argument --format" to be passed to the server. Therefore we can
     either modify 'git-tar-tree' or simply remove it.

  3/ Should I kill 'git-zip-tree' command ? If so should I rename
     builtin-zip-tree.c file into zip-tree.c or something else ?

  4/ Progress indicator support. Junio wants to mimic upload-pack for
     that. But it will lead in a lot of duplicated code if we don't
     try to share code. Can we copy that code anyways and clean up
     later ?

  5/ Should we use "struct tree_desc" based interface for tree parsing
     ? According to Rene it doesn't worth it as soon as you actually
     start to do something to the trees

  6/ Simple subtree matching would be enough, at least for now.

Did I forgot something else ?

Point 1 seems to be important. As soons as we plug git-upload-archive,
we need to control what kind of formats the server deal with.

Point 2 and 3 are easy to achieve, just need a "go wild" authorization.

Point 4, seems to be high in priority since we don't want to deal with
clients and servers interoperate issues.

Points 5 and 6 can be done later. These improvements won't break if
done after releasing.

		Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/4] Add git-archive
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
@ 2006-09-07 13:12                 ` Franck Bui-Huu
  2006-09-08  2:35                   ` Junio C Hamano
  2006-09-07 13:12                 ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
                                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07 13:12 UTC (permalink / raw)
  To: junkio; +Cc: rene.scharfe, git

git-archive is a command to make TAR and ZIP archives of a git tree.
It helps prevent a proliferation of git-{format}-tree commands.

Instead of directly calling git-{tar,zip}-tree command, it defines
a very simple API, that archiver should implement and register in
"git-archive.c". This API is made up by 2 functions whose prototype
is defined in "archive.h" file.

 - The first one is used to parse 'extra' parameters which have
   signification only for the specific archiver. That would allow
   different archive backends to have different kind of options.

 - The second one is used to ask to an archive backend to build
   the archive given some already resolved parameters.

The main reason for making this API is to avoid using
git-{tar,zip}-tree commands, hence making them useless. Maybe it's
time for them to die ?

It also implements remote operations by defining a very simple
protocol: it first sends the name of the specific uploader followed
the repository name (git-upload-tar git://example.org/repo.git).
Then it sends options. It's done by sending a sequence of one
argument per packet, with prefix "argument ", followed by a flush.

The remote protocol is implemented in "git-archive.c" for client
side and is triggered by "--remote=<repo>" option. For example,
to fetch a TAR archive in a remote repo, you can issue:

$ git archive --format=tar --remote=git://xxx/yyy/zzz.git HEAD

We choose to not make a new command "git-fetch-archive" for example,
avoind one more GIT command which should be nice for users (less
commands to remember, keeps existing --remote option).

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
---
 .gitignore                    |    1 
 Documentation/git-archive.txt |  100 ++++++++++++++++++
 Makefile                      |    3 -
 archive.h                     |   41 +++++++
 builtin-archive.c             |  227 +++++++++++++++++++++++++++++++++++++++++
 builtin.h                     |    1 
 generate-cmdlist.sh           |    1 
 git.c                         |    1 
 8 files changed, 374 insertions(+), 1 deletions(-)

diff --git a/.gitignore b/.gitignore
index 78cb671..a3e7ca1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ git-apply
 git-applymbox
 git-applypatch
 git-archimport
+git-archive
 git-bisect
 git-branch
 git-cat-file
diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
new file mode 100644
index 0000000..913528d
--- /dev/null
+++ b/Documentation/git-archive.txt
@@ -0,0 +1,100 @@
+git-archive(1)
+==============
+
+NAME
+----
+git-archive - Creates a archive of the files in the named tree
+
+
+SYNOPSIS
+--------
+'git-archive' --format=<fmt> [--list] [--prefix=<prefix>/] [<extra>]
+	      [--remote=<repo>] <tree-ish> [path...]
+
+DESCRIPTION
+-----------
+Creates an archive of the specified format containing the tree
+structure for the named tree.  If <prefix> is specified it is
+prepended to the filenames in the archive.
+
+'git-archive' behaves differently when given a tree ID versus when
+given a commit ID or tag ID.  In the first case the current time is
+used as modification time of each file in the archive.  In the latter
+case the commit time as recorded in the referenced commit object is
+used instead.  Additionally the commit ID is stored in a global
+extended pax header if the tar format is used; it can be extracted
+using 'git-get-tar-commit-id'. In ZIP files it is stored as a file
+comment.
+
+OPTIONS
+-------
+
+--format=<fmt>::
+	Format of the resulting archive: 'tar', 'zip'...
+
+--list::
+	Show all available formats.
+
+--prefix=<prefix>/::
+	Prepend <prefix>/ to each filename in the archive.
+
+<extra>::
+	This can be any options that the archiver backend understand.
+
+--remote=<repo>::
+	Instead of making a tar archive from local repository,
+	retrieve a tar archive from a remote repository.
+
+<tree-ish>::
+	The tree or commit to produce an archive for.
+
+path::
+	If one or more paths are specified, include only these in the
+	archive, otherwise include all files and subdirectories.
+
+CONFIGURATION
+-------------
+By default, file and directories modes are set to 0666 or 0777 in tar
+archives.  It is possible to change this by setting the "umask" variable
+in the repository configuration as follows :
+
+[tar]
+        umask = 002	;# group friendly
+
+The special umask value "user" indicates that the user's current umask
+will be used instead. The default value remains 0, which means world
+readable/writable files and directories.
+
+EXAMPLES
+--------
+git archive --format=tar --prefix=junk/ HEAD | (cd /var/tmp/ && tar xf -)::
+
+	Create a tar archive that contains the contents of the
+	latest commit on the current branch, and extracts it in
+	`/var/tmp/junk` directory.
+
+git archive --format=tar --prefix=git-1.4.0/ v1.4.0 | gzip >git-1.4.0.tar.gz::
+
+	Create a compressed tarball for v1.4.0 release.
+
+git archive --format=tar --prefix=git-1.4.0/ v1.4.0{caret}\{tree\} | gzip >git-1.4.0.tar.gz::
+
+	Create a compressed tarball for v1.4.0 release, but without a
+	global extended pax header.
+
+git archive --format=zip --prefix=git-docs/ HEAD:Documentation/ > git-1.4.0-docs.zip::
+
+	Put everything in the current head's Documentation/ directory
+	into 'git-1.4.0-docs.zip', with the prefix 'git-docs/'.
+
+Author
+------
+Written by Franck Bui-Huu and Rene Scharfe.
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the gitlink:git[7] suite
diff --git a/Makefile b/Makefile
index 389daf7..51ed4dd 100644
--- a/Makefile
+++ b/Makefile
@@ -242,7 +242,7 @@ LIB_FILE=libgit.a
 XDIFF_LIB=xdiff/lib.a
 
 LIB_H = \
-	blob.h cache.h commit.h csum-file.h delta.h \
+	archive.h blob.h cache.h commit.h csum-file.h delta.h \
 	diff.h object.h pack.h para-walk.h pkt-line.h quote.h refs.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h
@@ -267,6 +267,7 @@ LIB_OBJS = \
 BUILTIN_OBJS = \
 	builtin-add.o \
 	builtin-apply.o \
+	builtin-archive.o \
 	builtin-cat-file.o \
 	builtin-checkout-index.o \
 	builtin-check-ref-format.o \
diff --git a/archive.h b/archive.h
new file mode 100644
index 0000000..f33398e
--- /dev/null
+++ b/archive.h
@@ -0,0 +1,41 @@
+#ifndef ARCHIVE_H
+#define ARCHIVE_H
+
+#define MAX_EXTRA_ARGS	32
+#define MAX_ARGS	(MAX_EXTRA_ARGS + 32)
+
+struct archiver_args {
+	const char *base;
+	struct tree *tree;
+	const unsigned char *commit_sha1;
+	time_t time;
+	const char **pathspec;
+	void *extra;
+};
+
+typedef int (*write_archive_fn_t)(struct archiver_args *);
+
+typedef void *(*parse_extra_args_fn_t)(int argc, const char **argv);
+
+struct archiver {
+	const char *name;
+	const char *remote;
+	struct archiver_args args;
+	write_archive_fn_t write_archive;
+	parse_extra_args_fn_t parse_extra;
+};
+
+extern struct archiver archivers[];
+
+extern int parse_archive_args(int argc,
+			      const char **argv,
+			      struct archiver **ar);
+
+extern void parse_treeish_arg(const char **treeish,
+			      struct archiver_args *ar_args,
+			      const char *prefix);
+
+extern void parse_pathspec_arg(const char **pathspec,
+			       struct archiver_args *args);
+
+#endif	/* ARCHIVE_H */
diff --git a/builtin-archive.c b/builtin-archive.c
new file mode 100644
index 0000000..6064358
--- /dev/null
+++ b/builtin-archive.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2006 Franck Bui-Huu
+ * Copyright (c) 2006 Rene Scharfe
+ */
+#include <time.h>
+#include "cache.h"
+#include "builtin.h"
+#include "archive.h"
+#include "commit.h"
+#include "tree-walk.h"
+#include "exec_cmd.h"
+#include "pkt-line.h"
+
+static const char archive_usage[] = \
+"git-archive --format=<fmt> [--prefix=<prefix>/] [<extra>] <tree-ish> [path...]";
+
+
+struct archiver archivers[] = { };
+
+
+static int run_remote_archiver(struct archiver *ar, int argc,
+			       const char **argv)
+{
+	char *url, buf[1024];
+	int fd[2], i, len, rv;
+	pid_t pid;
+
+	sprintf(buf, "git-upload-archive");
+
+	url = strdup(ar->remote);
+	pid = git_connect(fd, url, buf);
+	if (pid < 0)
+		return pid;
+
+	for (i = 1; i < argc; i++) {
+		if (!strncmp(argv[i], "--remote=", 9))
+			continue;
+		packet_write(fd[1], "argument %s\n", argv[i]);
+	}
+	packet_flush(fd[1]);
+
+	len = packet_read_line(fd[0], buf, sizeof(buf));
+	if (!len)
+		die("git-archive: expected ACK/NAK, got EOF");
+	if (buf[len-1] == '\n')
+		buf[--len] = 0;
+	if (strcmp(buf, "ACK")) {
+		if (len > 5 && !strncmp(buf, "NACK ", 5))
+			die("git-archive: NACK %s", buf + 5);
+		die("git-archive: protocol error");
+	}
+
+	len = packet_read_line(fd[0], buf, sizeof(buf));
+	if (len)
+		die("git-archive: expected a flush");
+
+	/* Now, start reading from fd[0] and spit it out to stdout */
+	rv = copy_fd(fd[0], 1);
+
+	close(fd[0]);
+	rv |= finish_connect(pid);
+
+	return !!rv;
+}
+
+static struct archiver *get_archiver(const char *name)
+{
+	struct archiver *ar = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(archivers); i++) {
+		if (!strcmp(name, archivers[i].name)) {
+			ar = &archivers[i];
+			break;
+		}
+	}
+	return ar;
+}
+
+void parse_pathspec_arg(const char **pathspec, struct archiver_args *ar_args)
+{
+	ar_args->pathspec = get_pathspec(ar_args->base, pathspec);
+}
+
+void parse_treeish_arg(const char **argv, struct archiver_args *ar_args,
+		       const char *prefix)
+{
+	const char *name = argv[0];
+	const unsigned char *commit_sha1;
+	time_t archive_time;
+	struct tree *tree;
+	struct commit *commit;
+	unsigned char sha1[20];
+
+	if (get_sha1(name, sha1))
+		die("Not a valid object name");
+
+	commit = lookup_commit_reference_gently(sha1, 1);
+	if (commit) {
+		commit_sha1 = commit->object.sha1;
+		archive_time = commit->date;
+	} else {
+		archive_time = time(NULL);
+	}
+
+	tree = parse_tree_indirect(sha1);
+	if (tree == NULL)
+		die("not a tree object");
+
+	if (prefix) {
+		unsigned char tree_sha1[20];
+		unsigned int mode;
+		int err;
+
+		err = get_tree_entry(tree->object.sha1, prefix,
+				     tree_sha1, &mode);
+		if (err || !S_ISDIR(mode))
+			die("current working directory is untracked");
+
+		free(tree);
+		tree = parse_tree_indirect(tree_sha1);
+	}
+	//free(tree);
+	ar_args->tree = tree;
+	ar_args->commit_sha1 = commit_sha1;
+	ar_args->time = archive_time;
+}
+
+static const char *default_parse_extra(struct archiver *ar,
+				       const char **argv)
+{
+	static char msg[64];
+
+	snprintf(msg, sizeof(msg) - 4, "'%s' format does not handle %s",
+		 ar->name, *argv);
+
+	return strcat(msg, "...");
+}
+
+int parse_archive_args(int argc, const char **argv, struct archiver **ar)
+{
+	const char *extra_argv[MAX_EXTRA_ARGS];
+	int extra_argc = 0;
+	const char *format = NULL; /* some default values */
+	const char *remote = NULL;
+	const char *base = "";
+	int list = 0;
+	int i;
+
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+
+		if (!strcmp(arg, "--list") || !strcmp(arg, "-l")) {
+			list = 1;
+			continue;
+		}
+		if (!strncmp(arg, "--format=", 9)) {
+			format = arg + 9;
+			continue;
+		}
+		if (!strncmp(arg, "--prefix=", 9)) {
+			base = arg + 9;
+			continue;
+		}
+		if (!strncmp(arg, "--remote=", 9)) {
+			remote = arg + 9;
+			continue;
+		}
+		if (!strcmp(arg, "--")) {
+			i++;
+			break;
+		}
+		if (arg[0] == '-') {
+			extra_argv[extra_argc++] = arg;
+			continue;
+		}
+		break;
+	}
+	if (list) {
+		if (!remote) {
+			for (i = 0; i < ARRAY_SIZE(archivers); i++)
+				printf("%s\n", archivers[i].name);
+			exit(0);
+		}
+		die("--list and --remote are mutually exclusive");
+	}
+	if (argc - i < 1) {
+		die("%s", archive_usage);
+	}
+	if (!format){
+		die("You must specify an archive format");
+	}
+	*ar = get_archiver(format);
+	if (*ar == NULL) {
+		die("Unknown archive format '%s'", format);
+	}
+	if (extra_argc) {
+		if (!(*ar)->parse_extra) {
+			die("%s", default_parse_extra(*ar, extra_argv));
+		}
+		(*ar)->args.extra = (*ar)->parse_extra(extra_argc, extra_argv);
+	}
+	(*ar)->remote = remote;
+	(*ar)->args.base = base;
+
+	return i;
+}
+
+int cmd_archive(int argc, const char **argv, const char *prefix)
+{
+	struct archiver *ar;
+	int tree_idx;
+
+	tree_idx = parse_archive_args(argc, argv, &ar);
+
+	if (ar->remote)
+		return run_remote_archiver(ar, argc, argv);
+
+	if (prefix == NULL)
+		prefix = setup_git_directory();
+
+	argv += tree_idx;
+	parse_treeish_arg(argv, &ar->args, prefix);
+	parse_pathspec_arg(argv + 1, &ar->args);
+
+	return ar->write_archive(&ar->args);
+}
diff --git a/builtin.h b/builtin.h
index 8472c79..2391afb 100644
--- a/builtin.h
+++ b/builtin.h
@@ -15,6 +15,7 @@ extern int write_tree(unsigned char *sha
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_apply(int argc, const char **argv, const char *prefix);
+extern int cmd_archive(int argc, const char **argv, const char *prefix);
 extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
 extern int cmd_checkout_index(int argc, const char **argv, const char *prefix);
 extern int cmd_check_ref_format(int argc, const char **argv, const char *prefix);
diff --git a/generate-cmdlist.sh b/generate-cmdlist.sh
index ec1eda2..5450918 100755
--- a/generate-cmdlist.sh
+++ b/generate-cmdlist.sh
@@ -12,6 +12,7 @@ struct cmdname_help common_cmds[] = {"
 sort <<\EOF |
 add
 apply
+archive
 bisect
 branch
 checkout
diff --git a/git.c b/git.c
index 82c8fee..c62c5cf 100644
--- a/git.c
+++ b/git.c
@@ -218,6 +218,7 @@ static void handle_internal_command(int 
 	} commands[] = {
 		{ "add", cmd_add, RUN_SETUP },
 		{ "apply", cmd_apply },
+		{ "archive", cmd_archive },
 		{ "cat-file", cmd_cat_file, RUN_SETUP },
 		{ "checkout-index", cmd_checkout_index, RUN_SETUP },
 		{ "check-ref-format", cmd_check_ref_format },
-- 
1.4.2

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
  2006-09-07 13:12                 ` [PATCH 1/4] Add git-archive Franck Bui-Huu
@ 2006-09-07 13:12                 ` Franck Bui-Huu
  2006-09-08 20:21                   ` Rene Scharfe
  2006-09-07 13:12                 ` [PATCH 3/4] git-archive: wire up ZIP format Franck Bui-Huu
                                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07 13:12 UTC (permalink / raw)
  To: junkio; +Cc: rene.scharfe, git

From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
---
 archive.h          |    4 +++
 builtin-archive.c  |    4 ++-
 builtin-tar-tree.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 1 deletions(-)

diff --git a/archive.h b/archive.h
index f33398e..3690c53 100644
--- a/archive.h
+++ b/archive.h
@@ -37,5 +37,9 @@ extern void parse_treeish_arg(const char
 
 extern void parse_pathspec_arg(const char **pathspec,
 			       struct archiver_args *args);
+/*
+ *
+ */
+extern int write_tar_archive(struct archiver_args *);
 
 #endif	/* ARCHIVE_H */
diff --git a/builtin-archive.c b/builtin-archive.c
index 6064358..214ec5d 100644
--- a/builtin-archive.c
+++ b/builtin-archive.c
@@ -15,7 +15,9 @@ static const char archive_usage[] = \
 "git-archive --format=<fmt> [--prefix=<prefix>/] [<extra>] <tree-ish> [path...]";
 
 
-struct archiver archivers[] = { };
+struct archiver archivers[] = {
+	{ .name = "tar", .write_archive = write_tar_archive },
+};
 
 
 static int run_remote_archiver(struct archiver *ar, int argc,
diff --git a/builtin-tar-tree.c b/builtin-tar-tree.c
index 61a4135..1134730 100644
--- a/builtin-tar-tree.c
+++ b/builtin-tar-tree.c
@@ -9,6 +9,7 @@ #include "strbuf.h"
 #include "tar.h"
 #include "builtin.h"
 #include "pkt-line.h"
+#include "archive.h"
 
 #define RECORDSIZE	(512)
 #define BLOCKSIZE	(RECORDSIZE * 20)
@@ -338,6 +339,72 @@ static int generate_tar(int argc, const 
 	return 0;
 }
 
+static int write_tar_entry(const unsigned char *sha1,
+                           const char *base, int baselen,
+                           const char *filename, unsigned mode, int stage)
+{
+	static struct strbuf path;
+	int filenamelen = strlen(filename);
+	void *buffer;
+	char type[20];
+	unsigned long size;
+
+	if (!path.alloc) {
+		path.buf = xmalloc(PATH_MAX);
+		path.alloc = PATH_MAX;
+		path.len = path.eof = 0;
+	}
+	if (path.alloc < baselen + filenamelen) {
+		free(path.buf);
+		path.buf = xmalloc(baselen + filenamelen);
+		path.alloc = baselen + filenamelen;
+	}
+	memcpy(path.buf, base, baselen);
+	memcpy(path.buf + baselen, filename, filenamelen);
+	path.len = baselen + filenamelen;
+	if (S_ISDIR(mode)) {
+		strbuf_append_string(&path, "/");
+		buffer = NULL;
+		size = 0;
+	} else {
+		buffer = read_sha1_file(sha1, type, &size);
+		if (!buffer)
+			die("cannot read %s", sha1_to_hex(sha1));
+	}
+
+	write_entry(sha1, &path, mode, buffer, size);
+	free(buffer);
+
+	return READ_TREE_RECURSIVE;
+}
+
+int write_tar_archive(struct archiver_args *args)
+{
+	int plen = strlen(args->base);
+
+	git_config(git_tar_config);
+
+	archive_time = args->time;
+
+	if (args->commit_sha1)
+		write_global_extended_header(args->commit_sha1);
+
+	if (args->base && plen > 0 && args->base[plen - 1] == '/') {
+		char *base = strdup(args->base);
+		int baselen = strlen(base);
+
+		while (baselen > 0 && base[baselen - 1] == '/')
+			base[--baselen] = '\0';
+		write_tar_entry(args->tree->object.sha1, "", 0, base, 040777, 0);
+		free(base);
+	}
+	read_tree_recursive(args->tree, args->base, plen, 0,
+			    args->pathspec, write_tar_entry);
+	write_trailer();
+
+	return 0;
+}
+
 static const char *exec = "git-upload-tar";
 
 static int remote_tar(int argc, const char **argv)
-- 
1.4.2

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/4] git-archive: wire up ZIP format.
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
  2006-09-07 13:12                 ` [PATCH 1/4] Add git-archive Franck Bui-Huu
  2006-09-07 13:12                 ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
@ 2006-09-07 13:12                 ` Franck Bui-Huu
  2006-09-07 13:12                 ` [PATCH 4/4] Add git-upload-archive Franck Bui-Huu
                                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07 13:12 UTC (permalink / raw)
  To: junkio; +Cc: rene.scharfe, git

From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
---
 archive.h          |    1 +
 builtin-archive.c  |    1 +
 builtin-zip-tree.c |   28 ++++++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/archive.h b/archive.h
index 3690c53..760776d 100644
--- a/archive.h
+++ b/archive.h
@@ -41,5 +41,6 @@ extern void parse_pathspec_arg(const cha
  *
  */
 extern int write_tar_archive(struct archiver_args *);
+extern int write_zip_archive(struct archiver_args *);
 
 #endif	/* ARCHIVE_H */
diff --git a/builtin-archive.c b/builtin-archive.c
index 214ec5d..fd53e9a 100644
--- a/builtin-archive.c
+++ b/builtin-archive.c
@@ -17,6 +17,7 @@ static const char archive_usage[] = \
 
 struct archiver archivers[] = {
 	{ .name = "tar", .write_archive = write_tar_archive },
+	{ .name = "zip", .write_archive = write_zip_archive },
 };
 
 
diff --git a/builtin-zip-tree.c b/builtin-zip-tree.c
index a5b834d..788317c 100644
--- a/builtin-zip-tree.c
+++ b/builtin-zip-tree.c
@@ -8,6 +8,7 @@ #include "blob.h"
 #include "tree.h"
 #include "quote.h"
 #include "builtin.h"
+#include "archive.h"
 
 static const char zip_tree_usage[] =
 "git-zip-tree [-0|...|-9] <tree-ish> [ <base> ]";
@@ -351,3 +352,30 @@ int cmd_zip_tree(int argc, const char **
 
 	return 0;
 }
+
+int write_zip_archive(struct archiver_args *args)
+{
+	int plen = strlen(args->base);
+
+	dos_time(&args->time, &zip_date, &zip_time);
+
+	zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE);
+	zip_dir_size = ZIP_DIRECTORY_MIN_SIZE;
+
+	if (args->base && plen > 0 && args->base[plen - 1] == '/') {
+		char *base = strdup(args->base);
+		int baselen = strlen(base);
+
+		while (baselen > 0 && base[baselen - 1] == '/')
+			base[--baselen] = '\0';
+		write_zip_entry(args->tree->object.sha1, "", 0, base, 040777, 0);
+		free(base);
+	}
+	read_tree_recursive(args->tree, args->base, plen, 0,
+			    args->pathspec, write_zip_entry);
+	write_zip_trailer(args->commit_sha1);
+
+	free(zip_dir);
+
+	return 0;
+}
-- 
1.4.2

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 4/4] Add git-upload-archive
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
                                   ` (2 preceding siblings ...)
  2006-09-07 13:12                 ` [PATCH 3/4] git-archive: wire up ZIP format Franck Bui-Huu
@ 2006-09-07 13:12                 ` Franck Bui-Huu
  2006-09-07 17:26                 ` Add git-archive [take #2] Franck Bui-Huu
  2006-09-08  0:37                 ` Junio C Hamano
  5 siblings, 0 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07 13:12 UTC (permalink / raw)
  To: junkio; +Cc: rene.scharfe, git

This command implements the git archive protocol on the server
side. This command is not intended to be used by the end user.
Underlying git-archive command line options are sent over the
protocol from "git-archive --remote=...", just like upload-tar
currently does with "git-tar-tree=...".

As for "git-archive" command implementation, this new command
does not execute any existing "git-{tar,zip}-tree" but rely
on the archive API defined by "git-archive" patch. Hence we
get 2 good points:

 - "git-archive" and "git-upload-archive" share all option
   parsing code.

 - All kind of git-upload-{tar,zip} could be removed.

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
---
 .gitignore                           |    1 
 Documentation/git-upload-archive.txt |   37 +++++++++++++++++
 Makefile                             |    1 
 builtin-upload-archive.c             |   72 ++++++++++++++++++++++++++++++++++
 builtin.h                            |    1 
 daemon.c                             |    7 +++
 git.c                                |    1 
 7 files changed, 120 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore
index a3e7ca1..fda9bf2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -119,6 +119,7 @@ git-unpack-objects
 git-update-index
 git-update-ref
 git-update-server-info
+git-upload-archive
 git-upload-pack
 git-upload-tar
 git-var
diff --git a/Documentation/git-upload-archive.txt b/Documentation/git-upload-archive.txt
new file mode 100644
index 0000000..388bb53
--- /dev/null
+++ b/Documentation/git-upload-archive.txt
@@ -0,0 +1,37 @@
+git-upload-archive(1)
+====================
+
+NAME
+----
+git-upload-archive - Send archive
+
+
+SYNOPSIS
+--------
+'git-upload-archive' <directory>
+
+DESCRIPTION
+-----------
+Invoked by 'git-archive --remote' and sends a generated archive to the
+other end over the git protocol.
+
+This command is usually not invoked directly by the end user.  The UI
+for the protocol is on the 'git-archive' side, and the program pair
+is meant to be used to get an archive from a remote repository.
+
+OPTIONS
+-------
+<directory>::
+	The repository to get a tar archive from.
+
+Author
+------
+Written by Franck Bui-Huu.
+
+Documentation
+--------------
+Documentation by Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the gitlink:git[7] suite
diff --git a/Makefile b/Makefile
index 51ed4dd..461e1d6 100644
--- a/Makefile
+++ b/Makefile
@@ -305,6 +305,7 @@ BUILTIN_OBJS = \
 	builtin-unpack-objects.o \
 	builtin-update-index.o \
 	builtin-update-ref.o \
+	builtin-upload-archive.o \
 	builtin-upload-tar.o \
 	builtin-verify-pack.o \
 	builtin-write-tree.o \
diff --git a/builtin-upload-archive.c b/builtin-upload-archive.c
new file mode 100644
index 0000000..4fad377
--- /dev/null
+++ b/builtin-upload-archive.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2006 Franck Bui-Huu
+ */
+#include <time.h>
+#include "cache.h"
+#include "builtin.h"
+#include "archive.h"
+#include "pkt-line.h"
+
+static const char upload_archive_usage[] =
+	"git-upload-archive <repo>";
+
+
+int cmd_upload_archive(int argc, const char **argv, const char *prefix)
+{
+	struct archiver *ar;
+	const char *sent_argv[MAX_ARGS];
+	const char *arg_cmd = "argument ";
+	char *p, buf[4096];
+	int treeish_idx;
+	int sent_argc;
+	int len;
+
+	if (argc != 2)
+		usage(upload_archive_usage);
+
+	if (strlen(argv[1]) > sizeof(buf))
+		die("insanely long repository name");
+
+	strcpy(buf, argv[1]); /* enter-repo smudges its argument */
+
+	if (!enter_repo(buf, 0))
+		die("not a git archive");
+
+	/* put received options in sent_argv[] */
+	sent_argc = 1;
+	sent_argv[0] = "git-upload-archive";
+	for (p = buf;;) {
+		/* This will die if not enough free space in buf */
+		len = packet_read_line(0, p, (buf + sizeof buf) - p);
+		if (len == 0)
+			break;	/* got a flush */
+		if (sent_argc > MAX_ARGS - 2)
+			die("Too many options (>29)");
+
+		if (p[len-1] == '\n') {
+			p[--len] = 0;
+		}
+		if (len < strlen(arg_cmd) ||
+		    strncmp(arg_cmd, p, strlen(arg_cmd)))
+			die("'argument' token or flush expected");
+
+		len -= strlen(arg_cmd);
+		memmove(p, p + strlen(arg_cmd), len);
+		sent_argv[sent_argc++] = p;
+		p += len;
+		*p++ = 0;
+	}
+	sent_argv[sent_argc] = NULL;
+
+	/* parse all options sent by the client */
+	treeish_idx = parse_archive_args(sent_argc, sent_argv, &ar);
+
+	parse_treeish_arg(sent_argv + treeish_idx, &ar->args, prefix);
+	parse_pathspec_arg(sent_argv + treeish_idx + 1, &ar->args);
+
+	packet_write(1, "ACK\n");
+	packet_flush(1);
+
+	return ar->write_archive(&ar->args);
+}
+
diff --git a/builtin.h b/builtin.h
index 2391afb..f3efb58 100644
--- a/builtin.h
+++ b/builtin.h
@@ -58,6 +58,7 @@ extern int cmd_zip_tree(int argc, const 
 extern int cmd_unpack_objects(int argc, const char **argv, const char *prefix);
 extern int cmd_update_index(int argc, const char **argv, const char *prefix);
 extern int cmd_update_ref(int argc, const char **argv, const char *prefix);
+extern int cmd_upload_archive(int argc, const char **argv, const char *prefix);
 extern int cmd_upload_tar(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_whatchanged(int argc, const char **argv, const char *prefix);
diff --git a/daemon.c b/daemon.c
index a4a08f3..5570878 100644
--- a/daemon.c
+++ b/daemon.c
@@ -324,6 +324,12 @@ static int upload_pack(void)
 	return -1;
 }
 
+static int upload_archive(void)
+{
+	execl_git_cmd("upload-archive", ".", NULL);
+	return -1;
+}
+
 static int upload_tar(void)
 {
 	execl_git_cmd("upload-tar", ".", NULL);
@@ -331,6 +337,7 @@ static int upload_tar(void)
 }
 
 static struct daemon_service daemon_service[] = {
+	{ "upload-archive", "uploadarch", upload_archive, 0, 1 },
 	{ "upload-pack", "uploadpack", upload_pack, 1, 1 },
 	{ "upload-tar", "uploadtar", upload_tar, 0, 1 },
 };
diff --git a/git.c b/git.c
index c62c5cf..315dc0b 100644
--- a/git.c
+++ b/git.c
@@ -261,6 +261,7 @@ static void handle_internal_command(int 
 		{ "unpack-objects", cmd_unpack_objects, RUN_SETUP },
 		{ "update-index", cmd_update_index, RUN_SETUP },
 		{ "update-ref", cmd_update_ref, RUN_SETUP },
+		{ "upload-archive", cmd_upload_archive },
 		{ "upload-tar", cmd_upload_tar },
 		{ "version", cmd_version },
 		{ "whatchanged", cmd_whatchanged, RUN_SETUP | USE_PAGER },
-- 
1.4.2

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
                                   ` (3 preceding siblings ...)
  2006-09-07 13:12                 ` [PATCH 4/4] Add git-upload-archive Franck Bui-Huu
@ 2006-09-07 17:26                 ` Franck Bui-Huu
  2006-09-08  0:37                 ` Junio C Hamano
  5 siblings, 0 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-07 17:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck, git, Rene Scharfe, Jakub Narebski

2006/9/7, Franck Bui-Huu <vagabon.xyz@gmail.com>:
>
>   4/ Progress indicator support. Junio wants to mimic upload-pack for
>      that. But it will lead in a lot of duplicated code if we don't
>      try to share code. Can we copy that code anyways and clean up
>      later ?
>

BTW, could we move the side-band code into git_connect() function and
adding to it a new parameter like:

int git_connect(int fd[2], char *url, const char *prog, int side_band)
or
int git_connect(int fd[2], char *url, const char *prog, flags)

Hence it automatically spawns a side-band process in the client side
and it also sends an extended option for git-daemon to ask for using
side band for server services.

-- 
               Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
                                   ` (4 preceding siblings ...)
  2006-09-07 17:26                 ` Add git-archive [take #2] Franck Bui-Huu
@ 2006-09-08  0:37                 ` Junio C Hamano
  2006-09-08  8:18                   ` Franck Bui-Huu
  2006-09-08 20:21                   ` Rene Scharfe
  5 siblings, 2 replies; 40+ messages in thread
From: Junio C Hamano @ 2006-09-08  0:37 UTC (permalink / raw)
  To: Franck; +Cc: git

Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

> I'm sending a new version of the patchset which allows 'git-archive'
> and 'git-upload-archive' command. I tried to take into account all
> feedbacks made by Junio and Rene, but there are still some open points.
>
>   1/ Allow 'git-upload-archive' command to enable/disable some
>      formats. This should be done by 'git-upload-archive'.

Perhaps.  I was thinking about the way how a site administrator
can configure such when upload-archive is spawned via git-daemon
(for users coming from ssh and spawn an upload-archive on their
own, it's their own process and upload-archive has no business
deciding what is allowed and what is forbidden).  Not very many
clean ways I can think of unfortunately.

>   2/ Can I remove 'git-upload-tar' command ?
>   3/ Should I kill 'git-zip-tree' command ?

We do not deprecate commands that easily.  Notice we have kept
git-resolve for a long time (we should remove it and by now it
should be safe)?

Especially tar-tree --remote and upload-archive talks different
protocols, so it is not like not removing it is making your life
more difficult.  Perhaps after next release (1.4.3 or 1.5?  I
dunno) in the new development cycle, we would start saying
"don't use tar-tree --remote, use archive --fmt=tar --remote",
and then the release after that (1.4.4 or 1.5.1?  Again I dunno)
we might remove it.  The same thing for zip-tree, although that
one has lived shorter so it might not be missed if we remove it
earlier.

In any case, don't make removal of them as part of the series
please.  Let's make sure this new toy works well first, and then
start talking about removing things that have become obsolete.

>   4/ Progress indicator support. Junio wants to mimic upload-pack for
>      that. But it will lead in a lot of duplicated code if we don't
>      try to share code. Can we copy that code anyways and clean up
>      later ?

Refactoring first is always preferred, since "later" tends to
come very late (or worse, never) for clean-up tasks than feature
enhancements.

>   5/ Should we use "struct tree_desc" based interface for tree parsing
>      ? According to Rene it doesn't worth it as soon as you actually
>      start to do something to the trees

That became a non-issue I agree.  Whichever is easier.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] Add git-archive
  2006-09-07 13:12                 ` [PATCH 1/4] Add git-archive Franck Bui-Huu
@ 2006-09-08  2:35                   ` Junio C Hamano
  2006-09-08  9:00                     ` Franck Bui-Huu
  0 siblings, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-08  2:35 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: git

Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

> The main reason for making this API is to avoid using
> git-{tar,zip}-tree commands, hence making them useless. Maybe it's
> time for them to die ?

The answer is "not yet" and the above paragraph, at least the
last sentence and half, do not belong to the commit log message.

> It also implements remote operations by defining a very simple
> protocol: it first sends the name of the specific uploader followed
> the repository name (git-upload-tar git://example.org/repo.git).
> Then it sends options. It's done by sending a sequence of one
> argument per packet, with prefix "argument ", followed by a flush.

I haven't looked the existing code closely recently, but my
impression was that if you want to make the protocol operable
with both git-daemon, ssh target, and local pipe, it is easier
to make the request message exactly like you would invoke the
remote command over the ssh connection in the target repository
(see connect.c).  I am not sure how well the above
"git-upload-tar reponame" would work.  I would have expected it
to be "git-upload-archive reponame" with the first on-protocol
parameter being "--fmt=tar".

Does your code work well when you run the remote archive
fetching locally, i.e. "git-archive --remote=../other.git",
I wonder?

... goes on and reads the patch, notices that the protocol
command is git-upload-archive with the archive location.  Which
is GOOD.  It is just the above description is a tad stale.

> diff --git a/archive.h b/archive.h
> new file mode 100644
> index 0000000..f33398e
> --- /dev/null
> +++ b/archive.h
> @@ -0,0 +1,41 @@
> +#ifndef ARCHIVE_H
> +#define ARCHIVE_H
> +
> +#define MAX_EXTRA_ARGS	32
> +#define MAX_ARGS	(MAX_EXTRA_ARGS + 32)
> +
> +struct archiver_args {
> +	const char *base;
> +	struct tree *tree;
> +	const unsigned char *commit_sha1;
> +	time_t time;
> +	const char **pathspec;
> +	void *extra;
> +};
> +
> +typedef int (*write_archive_fn_t)(struct archiver_args *);
> +
> +typedef void *(*parse_extra_args_fn_t)(int argc, const char **argv);
> +
> +struct archiver {
> +	const char *name;
> +	const char *remote;
> +	struct archiver_args args;
> +	write_archive_fn_t write_archive;
> +	parse_extra_args_fn_t parse_extra;
> +};
> +
> +extern struct archiver archivers[];

I thought the reason for archiver_args (and archiver_args.extra)
was because we wanted to avoid storing per invocation parameters
in static variables, which would hamper reentrancy.  If one
process is creating two archives, both format=tar, it might be
reasonable for the code (future archiver enhancement, not your
current implementation of git-archive driver) to parse two sets
of parameters first (to get separate archiver instances) and
call their write_archive, but if archivers[] list has the
per-invocation parameter args then we are back to square one,
aren't we?

Reentrancy may not matter, but in any case the above archiver_args
is not helping enough to improve the situation, I think.

Actually you may be able to get away by returning a copy of
archivers[] element from get_archiver() when we need reentrancy
in the future.  Of course the caller needs to free() it when it
is done with it, since it is a per-invocation handle.

> +void parse_treeish_arg(const char **argv, struct archiver_args *ar_args,
> +		       const char *prefix)
> +{
> +...
> +	//free(tree);
> +	ar_args->tree = tree;
> +	ar_args->commit_sha1 = commit_sha1;
> +	ar_args->time = archive_time;
> +}

Stray comment...

Overall looks good, except you already know your issue #4 ;-).

I haven't had a chance to look at connect.c code but I have a
mild suspicion that full reuse of upload-pack code by moving
everything into connect.c is not possible; at least the initial
handshake to determine if sideband is to be used is specific to
upload-pack protocol which needed to bolt-it-on to an existing
protocol to support older clients and servers, and I do not
think we would want to carry that baggage for this new protocol.

The pipe setup code in upload-pack.c::create_pack_file() is
quite specific to upload-pack.  I am not sure how much of it can
be reused by refactoring, but it may be worth a try.

The part that reads from fd 1 and 2 and to multiplex into the
stream on the uploader side (the main while() loop in the same
create_pack_file() function, and send_client_data() function),
and the code to setup and demultiplex the bands on the
downloader side (setup_sideband() in fetch-clone.c) should be
reusable as-is, I think.  They are defined as static so you
would need to move the code around to make them available from
elsewhere.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08  0:37                 ` Junio C Hamano
@ 2006-09-08  8:18                   ` Franck Bui-Huu
  2006-09-08  8:47                     ` Jakub Narebski
  2006-09-08  8:58                     ` Junio C Hamano
  2006-09-08 20:21                   ` Rene Scharfe
  1 sibling, 2 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-08  8:18 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck, git

Junio C Hamano wrote:
> Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
>>
>>   2/ Can I remove 'git-upload-tar' command ?
>>   3/ Should I kill 'git-zip-tree' command ?
> 
> We do not deprecate commands that easily.  Notice we have kept
> git-resolve for a long time (we should remove it and by now it
> should be safe)?
> 

heh ? I've just noticed that you removed 'git-upload-tar' from
master branch (commit d9edcbd6061a392c1315ab6f3aedb9992a3c01b1).

Futhermore I was thinking about 'git-zip-tree' removal because
it's a very recent command. It shouldn't hurt to remove it now
and make our life easier, not sure though...

> Especially tar-tree --remote and upload-archive talks different
> protocols, so it is not like not removing it is making your life
> more difficult.  Perhaps after next release (1.4.3 or 1.5?  I

since you removed 'git-upload-tar', it would be good to remove
'--remote' option from 'git-tar-tree' command as well. 

> In any case, don't make removal of them as part of the series
> please.  Let's make sure this new toy works well first, and then
> start talking about removing things that have become obsolete.
> 

OK, I'll let you do that.

		Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08  8:18                   ` Franck Bui-Huu
@ 2006-09-08  8:47                     ` Jakub Narebski
  2006-09-08  8:58                     ` Junio C Hamano
  1 sibling, 0 replies; 40+ messages in thread
From: Jakub Narebski @ 2006-09-08  8:47 UTC (permalink / raw)
  To: git

Franck Bui-Huu wrote:

>> Especially tar-tree --remote and upload-archive talks different
>> protocols, so it is not like not removing it is making your life
>> more difficult.  Perhaps after next release (1.4.3 or 1.5?  I
> 
> since you removed 'git-upload-tar', it would be good to remove
> '--remote' option from 'git-tar-tree' command as well. 

git-tar-tree --remote was talked about, so I'd rather have the removal of
this option was postponed.

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08  8:18                   ` Franck Bui-Huu
  2006-09-08  8:47                     ` Jakub Narebski
@ 2006-09-08  8:58                     ` Junio C Hamano
  2006-09-08  9:43                       ` Franck Bui-Huu
  1 sibling, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-08  8:58 UTC (permalink / raw)
  To: Franck; +Cc: git

Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

> Junio C Hamano wrote:
>> Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
>>>
>>>   2/ Can I remove 'git-upload-tar' command ?
>>>   3/ Should I kill 'git-zip-tree' command ?
>> 
>> We do not deprecate commands that easily.  Notice we have kept
>> git-resolve for a long time (we should remove it and by now it
>> should be safe)?
>
> heh ? I've just noticed that you removed 'git-upload-tar' from
> master branch (commit d9edcbd6061a392c1315ab6f3aedb9992a3c01b1).
>
> Futhermore I was thinking about 'git-zip-tree' removal because
> it's a very recent command. It shouldn't hurt to remove it now
> and make our life easier, not sure though...

I do not think I removed upload-tar.  I removed it from daemon
service list and the documentation for daemon, because that part
is a new code.

	git tar-tree --remote=../linux-2.6/.git HEAD
	git tar-tree --remote=kernel.org:git next

should still work; the former is "from a directory next door"
and connect.c invokes local /bin/sh as the transport, and the
latter is "ssh login to kernel.org and use ./git directory".

I recall from earlier review of your code, "git archive" should
work well over these two transports in addition to git://
protocol that talks with git-daemon.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] Add git-archive
  2006-09-08  2:35                   ` Junio C Hamano
@ 2006-09-08  9:00                     ` Franck Bui-Huu
  2006-09-08 20:21                       ` Rene Scharfe
  0 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-08  9:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck Bui-Huu, git

Junio C Hamano wrote:
> Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
> 
[snip]
> 
> Does your code work well when you run the remote archive
> fetching locally, i.e. "git-archive --remote=../other.git",
> I wonder?
> 
> ... goes on and reads the patch, notices that the protocol
> command is git-upload-archive with the archive location.  Which
> is GOOD.  It is just the above description is a tad stale.
> 

yeah, sorry for that. I forgot to update the patch description.


>> diff --git a/archive.h b/archive.h
>> new file mode 100644
>> index 0000000..f33398e
>> --- /dev/null
>> +++ b/archive.h
>> @@ -0,0 +1,41 @@
>> +#ifndef ARCHIVE_H
>> +#define ARCHIVE_H
>> +
>> +#define MAX_EXTRA_ARGS	32
>> +#define MAX_ARGS	(MAX_EXTRA_ARGS + 32)
>> +

[snip]

> 
> I thought the reason for archiver_args (and archiver_args.extra)
> was because we wanted to avoid storing per invocation parameters
> in static variables, which would hamper reentrancy.  If one
> process is creating two archives, both format=tar, it might be
> reasonable for the code (future archiver enhancement, not your
> current implementation of git-archive driver) to parse two sets
> of parameters first (to get separate archiver instances) and
> call their write_archive, but if archivers[] list has the
> per-invocation parameter args then we are back to square one,
> aren't we?
> 
> Reentrancy may not matter, but in any case the above archiver_args
> is not helping enough to improve the situation, I think.
> 
> Actually you may be able to get away by returning a copy of
> archivers[] element from get_archiver() when we need reentrancy
> in the future.  Of course the caller needs to free() it when it
> is done with it, since it is a per-invocation handle.
> 

yup I did half work here. Does the following is ok now ?

-- >8 --

Subject: Add git-archive

git-archive is a command to make TAR and ZIP archives of a git tree.
It helps prevent a proliferation of git-{format}-tree commands.

Instead of directly calling git-{tar,zip}-tree command, it defines
a very simple API, that archiver should implement and register in
"git-archive.c". This API is made up by 2 functions whose prototype
is defined in "archive.h" file.

 - The first one is used to parse 'extra' parameters which have
   signification only for the specific archiver. That would allow
   different archive backends to have different kind of options.

 - The second one is used to ask to an archive backend to build
   the archive given some already resolved parameters.

It also implements remote operations by defining a very simple
protocol: it first sends 'git-upload-archive' followed the
repository name. Then it sends options. It's done by sending a
sequence of one argument per packet, with prefix "argument ",
followed by a flush.

The remote protocol is implemented in "git-archive.c" for client
side and is triggered by "--remote=<repo>" option. For example,
to fetch a TAR archive in a remote repo, you can issue:

$ git archive --format=tar --remote=git://xxx/yyy/zzz.git HEAD

We choose to not make a new command "git-fetch-archive" for example,
avoind one more GIT command which should be nice for users (less
commands to remember, keeps existing --remote option).

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
---
 .gitignore                    |    1 
 Documentation/git-archive.txt |  100 ++++++++++++++++++
 Makefile                      |    3 -
 archive.h                     |   41 +++++++
 builtin-archive.c             |  225 +++++++++++++++++++++++++++++++++++++++++
 builtin.h                     |    1 
 generate-cmdlist.sh           |    1 
 git.c                         |    1 
 8 files changed, 372 insertions(+), 1 deletions(-)

diff --git a/.gitignore b/.gitignore
index 78cb671..a3e7ca1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ git-apply
 git-applymbox
 git-applypatch
 git-archimport
+git-archive
 git-bisect
 git-branch
 git-cat-file
diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
new file mode 100644
index 0000000..913528d
--- /dev/null
+++ b/Documentation/git-archive.txt
@@ -0,0 +1,100 @@
+git-archive(1)
+==============
+
+NAME
+----
+git-archive - Creates a archive of the files in the named tree
+
+
+SYNOPSIS
+--------
+'git-archive' --format=<fmt> [--list] [--prefix=<prefix>/] [<extra>]
+	      [--remote=<repo>] <tree-ish> [path...]
+
+DESCRIPTION
+-----------
+Creates an archive of the specified format containing the tree
+structure for the named tree.  If <prefix> is specified it is
+prepended to the filenames in the archive.
+
+'git-archive' behaves differently when given a tree ID versus when
+given a commit ID or tag ID.  In the first case the current time is
+used as modification time of each file in the archive.  In the latter
+case the commit time as recorded in the referenced commit object is
+used instead.  Additionally the commit ID is stored in a global
+extended pax header if the tar format is used; it can be extracted
+using 'git-get-tar-commit-id'. In ZIP files it is stored as a file
+comment.
+
+OPTIONS
+-------
+
+--format=<fmt>::
+	Format of the resulting archive: 'tar', 'zip'...
+
+--list::
+	Show all available formats.
+
+--prefix=<prefix>/::
+	Prepend <prefix>/ to each filename in the archive.
+
+<extra>::
+	This can be any options that the archiver backend understand.
+
+--remote=<repo>::
+	Instead of making a tar archive from local repository,
+	retrieve a tar archive from a remote repository.
+
+<tree-ish>::
+	The tree or commit to produce an archive for.
+
+path::
+	If one or more paths are specified, include only these in the
+	archive, otherwise include all files and subdirectories.
+
+CONFIGURATION
+-------------
+By default, file and directories modes are set to 0666 or 0777 in tar
+archives.  It is possible to change this by setting the "umask" variable
+in the repository configuration as follows :
+
+[tar]
+        umask = 002	;# group friendly
+
+The special umask value "user" indicates that the user's current umask
+will be used instead. The default value remains 0, which means world
+readable/writable files and directories.
+
+EXAMPLES
+--------
+git archive --format=tar --prefix=junk/ HEAD | (cd /var/tmp/ && tar xf -)::
+
+	Create a tar archive that contains the contents of the
+	latest commit on the current branch, and extracts it in
+	`/var/tmp/junk` directory.
+
+git archive --format=tar --prefix=git-1.4.0/ v1.4.0 | gzip >git-1.4.0.tar.gz::
+
+	Create a compressed tarball for v1.4.0 release.
+
+git archive --format=tar --prefix=git-1.4.0/ v1.4.0{caret}\{tree\} | gzip >git-1.4.0.tar.gz::
+
+	Create a compressed tarball for v1.4.0 release, but without a
+	global extended pax header.
+
+git archive --format=zip --prefix=git-docs/ HEAD:Documentation/ > git-1.4.0-docs.zip::
+
+	Put everything in the current head's Documentation/ directory
+	into 'git-1.4.0-docs.zip', with the prefix 'git-docs/'.
+
+Author
+------
+Written by Franck Bui-Huu and Rene Scharfe.
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the gitlink:git[7] suite
diff --git a/Makefile b/Makefile
index 389daf7..51ed4dd 100644
--- a/Makefile
+++ b/Makefile
@@ -242,7 +242,7 @@ LIB_FILE=libgit.a
 XDIFF_LIB=xdiff/lib.a
 
 LIB_H = \
-	blob.h cache.h commit.h csum-file.h delta.h \
+	archive.h blob.h cache.h commit.h csum-file.h delta.h \
 	diff.h object.h pack.h para-walk.h pkt-line.h quote.h refs.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h
@@ -267,6 +267,7 @@ LIB_OBJS = \
 BUILTIN_OBJS = \
 	builtin-add.o \
 	builtin-apply.o \
+	builtin-archive.o \
 	builtin-cat-file.o \
 	builtin-checkout-index.o \
 	builtin-check-ref-format.o \
diff --git a/archive.h b/archive.h
new file mode 100644
index 0000000..24b016f
--- /dev/null
+++ b/archive.h
@@ -0,0 +1,41 @@
+#ifndef ARCHIVE_H
+#define ARCHIVE_H
+
+#define MAX_EXTRA_ARGS	32
+#define MAX_ARGS	(MAX_EXTRA_ARGS + 32)
+
+struct archiver_args {
+	const char *base;
+	struct tree *tree;
+	const unsigned char *commit_sha1;
+	time_t time;
+	const char **pathspec;
+	void *extra;
+};
+
+typedef int (*write_archive_fn_t)(struct archiver_args *);
+
+typedef void *(*parse_extra_args_fn_t)(int argc, const char **argv);
+
+struct archiver {
+	const char *name;
+	const char *remote;
+	struct archiver_args args;
+	write_archive_fn_t write_archive;
+	parse_extra_args_fn_t parse_extra;
+};
+
+extern struct archiver archivers[];
+
+extern int parse_archive_args(int argc,
+			      const char **argv,
+			      struct archiver *ar);
+
+extern void parse_treeish_arg(const char **treeish,
+			      struct archiver_args *ar_args,
+			      const char *prefix);
+
+extern void parse_pathspec_arg(const char **pathspec,
+			       struct archiver_args *args);
+
+#endif	/* ARCHIVE_H */
diff --git a/builtin-archive.c b/builtin-archive.c
new file mode 100644
index 0000000..5671cbd
--- /dev/null
+++ b/builtin-archive.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2006 Franck Bui-Huu
+ * Copyright (c) 2006 Rene Scharfe
+ */
+#include <time.h>
+#include "cache.h"
+#include "builtin.h"
+#include "archive.h"
+#include "commit.h"
+#include "tree-walk.h"
+#include "exec_cmd.h"
+#include "pkt-line.h"
+
+static const char archive_usage[] = \
+"git-archive --format=<fmt> [--prefix=<prefix>/] [<extra>] <tree-ish> [path...]";
+
+
+struct archiver archivers[] = { };
+
+
+static int run_remote_archiver(struct archiver *ar, int argc,
+			       const char **argv)
+{
+	char *url, buf[1024];
+	int fd[2], i, len, rv;
+	pid_t pid;
+
+	sprintf(buf, "git-upload-archive");
+
+	url = strdup(ar->remote);
+	pid = git_connect(fd, url, buf);
+	if (pid < 0)
+		return pid;
+
+	for (i = 1; i < argc; i++) {
+		if (!strncmp(argv[i], "--remote=", 9))
+			continue;
+		packet_write(fd[1], "argument %s\n", argv[i]);
+	}
+	packet_flush(fd[1]);
+
+	len = packet_read_line(fd[0], buf, sizeof(buf));
+	if (!len)
+		die("git-archive: expected ACK/NAK, got EOF");
+	if (buf[len-1] == '\n')
+		buf[--len] = 0;
+	if (strcmp(buf, "ACK")) {
+		if (len > 5 && !strncmp(buf, "NACK ", 5))
+			die("git-archive: NACK %s", buf + 5);
+		die("git-archive: protocol error");
+	}
+
+	len = packet_read_line(fd[0], buf, sizeof(buf));
+	if (len)
+		die("git-archive: expected a flush");
+
+	/* Now, start reading from fd[0] and spit it out to stdout */
+	rv = copy_fd(fd[0], 1);
+
+	close(fd[0]);
+	rv |= finish_connect(pid);
+
+	return !!rv;
+}
+
+static int init_archiver(const char *name, struct archiver *ar)
+{
+	int rv = -1, i;
+
+	for (i = 0; i < ARRAY_SIZE(archivers); i++) {
+		if (!strcmp(name, archivers[i].name)) {
+			memcpy(ar, &archivers[i], sizeof(struct archiver));
+			rv = 0;
+			break;
+		}
+	}
+	return rv;
+}
+
+void parse_pathspec_arg(const char **pathspec, struct archiver_args *ar_args)
+{
+	ar_args->pathspec = get_pathspec(ar_args->base, pathspec);
+}
+
+void parse_treeish_arg(const char **argv, struct archiver_args *ar_args,
+		       const char *prefix)
+{
+	const char *name = argv[0];
+	const unsigned char *commit_sha1;
+	time_t archive_time;
+	struct tree *tree;
+	struct commit *commit;
+	unsigned char sha1[20];
+
+	if (get_sha1(name, sha1))
+		die("Not a valid object name");
+
+	commit = lookup_commit_reference_gently(sha1, 1);
+	if (commit) {
+		commit_sha1 = commit->object.sha1;
+		archive_time = commit->date;
+	} else {
+		archive_time = time(NULL);
+	}
+
+	tree = parse_tree_indirect(sha1);
+	if (tree == NULL)
+		die("not a tree object");
+
+	if (prefix) {
+		unsigned char tree_sha1[20];
+		unsigned int mode;
+		int err;
+
+		err = get_tree_entry(tree->object.sha1, prefix,
+				     tree_sha1, &mode);
+		if (err || !S_ISDIR(mode))
+			die("current working directory is untracked");
+
+		free(tree);
+		tree = parse_tree_indirect(tree_sha1);
+	}
+	ar_args->tree = tree;
+	ar_args->commit_sha1 = commit_sha1;
+	ar_args->time = archive_time;
+}
+
+static const char *default_parse_extra(struct archiver *ar,
+				       const char **argv)
+{
+	static char msg[64];
+
+	snprintf(msg, sizeof(msg) - 4, "'%s' format does not handle %s",
+		 ar->name, *argv);
+
+	return strcat(msg, "...");
+}
+
+int parse_archive_args(int argc, const char **argv, struct archiver *ar)
+{
+	const char *extra_argv[MAX_EXTRA_ARGS];
+	int extra_argc = 0;
+	const char *format = NULL; /* some default values */
+	const char *remote = NULL;
+	const char *base = "";
+	int list = 0;
+	int i;
+
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+
+		if (!strcmp(arg, "--list") || !strcmp(arg, "-l")) {
+			list = 1;
+			continue;
+		}
+		if (!strncmp(arg, "--format=", 9)) {
+			format = arg + 9;
+			continue;
+		}
+		if (!strncmp(arg, "--prefix=", 9)) {
+			base = arg + 9;
+			continue;
+		}
+		if (!strncmp(arg, "--remote=", 9)) {
+			remote = arg + 9;
+			continue;
+		}
+		if (!strcmp(arg, "--")) {
+			i++;
+			break;
+		}
+		if (arg[0] == '-') {
+			extra_argv[extra_argc++] = arg;
+			continue;
+		}
+		break;
+	}
+	if (list) {
+		if (!remote) {
+			for (i = 0; i < ARRAY_SIZE(archivers); i++)
+				printf("%s\n", archivers[i].name);
+			exit(0);
+		}
+		die("--list and --remote are mutually exclusive");
+	}
+	if (argc - i < 1) {
+		die("%s", archive_usage);
+	}
+	if (!format){
+		die("You must specify an archive format");
+	}
+	if (init_archiver(format, ar) < 0) {
+		die("Unknown archive format '%s'", format);
+	}
+	if (extra_argc && !remote) {
+		if (!ar->parse_extra) {
+			die("%s", default_parse_extra(ar, extra_argv));
+		}
+		ar->args.extra = ar->parse_extra(extra_argc, extra_argv);
+	}
+	ar->remote = remote;
+	ar->args.base = base;
+
+	return i;
+}
+
+int cmd_archive(int argc, const char **argv, const char *prefix)
+{
+	struct archiver ar;
+	int tree_idx;
+
+	tree_idx = parse_archive_args(argc, argv, &ar);
+
+	if (ar.remote)
+		return run_remote_archiver(&ar, argc, argv);
+
+	if (prefix == NULL)
+		prefix = setup_git_directory();
+
+	argv += tree_idx;
+	parse_treeish_arg(argv, &ar.args, prefix);
+	parse_pathspec_arg(argv + 1, &ar.args);
+
+	return ar.write_archive(&ar.args);
+}
diff --git a/builtin.h b/builtin.h
index 8472c79..2391afb 100644
--- a/builtin.h
+++ b/builtin.h
@@ -15,6 +15,7 @@ extern int write_tree(unsigned char *sha
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_apply(int argc, const char **argv, const char *prefix);
+extern int cmd_archive(int argc, const char **argv, const char *prefix);
 extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
 extern int cmd_checkout_index(int argc, const char **argv, const char *prefix);
 extern int cmd_check_ref_format(int argc, const char **argv, const char *prefix);
diff --git a/generate-cmdlist.sh b/generate-cmdlist.sh
index ec1eda2..5450918 100755
--- a/generate-cmdlist.sh
+++ b/generate-cmdlist.sh
@@ -12,6 +12,7 @@ struct cmdname_help common_cmds[] = {"
 sort <<\EOF |
 add
 apply
+archive
 bisect
 branch
 checkout
diff --git a/git.c b/git.c
index 82c8fee..c62c5cf 100644
--- a/git.c
+++ b/git.c
@@ -218,6 +218,7 @@ static void handle_internal_command(int 
 	} commands[] = {
 		{ "add", cmd_add, RUN_SETUP },
 		{ "apply", cmd_apply },
+		{ "archive", cmd_archive },
 		{ "cat-file", cmd_cat_file, RUN_SETUP },
 		{ "checkout-index", cmd_checkout_index, RUN_SETUP },
 		{ "check-ref-format", cmd_check_ref_format },
-- 
1.4.2

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08  8:58                     ` Junio C Hamano
@ 2006-09-08  9:43                       ` Franck Bui-Huu
  2006-09-08 21:42                         ` Junio C Hamano
  0 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-08  9:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck, git

Junio C Hamano wrote:
> 
> I do not think I removed upload-tar.  I removed it from daemon
> service list and the documentation for daemon, because that part
> is a new code.
> 
> 	git tar-tree --remote=../linux-2.6/.git HEAD
> 	git tar-tree --remote=kernel.org:git next
> 
> should still work; the former is "from a directory next door"
> and connect.c invokes local /bin/sh as the transport, and the
> latter is "ssh login to kernel.org and use ./git directory".
> 

Sorry I was speaking about the git protocol. It has been included
in master branch. But

	git tar-tree --remote=git://anything/repo.git

does not work anymore, does it ? Do you plan to make it work again
with git-upload-archive (that would need some modifications in
git-tar-tree --remote code) or just let the --remote option work
for local and ssh transport (that would be one good reason for using
for git-archive instead of git-tar-tree) ?

> I recall from earlier review of your code, "git archive" should
> work well over these two transports in addition to git://
> protocol that talks with git-daemon.
> 
yes
		Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08  0:37                 ` Junio C Hamano
  2006-09-08  8:18                   ` Franck Bui-Huu
@ 2006-09-08 20:21                   ` Rene Scharfe
  2006-09-08 21:42                     ` Junio C Hamano
  1 sibling, 1 reply; 40+ messages in thread
From: Rene Scharfe @ 2006-09-08 20:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck, git

Junio C Hamano schrieb:
> Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
> 
>> I'm sending a new version of the patchset which allows 
>> 'git-archive' and 'git-upload-archive' command. I tried to take 
>> into account all feedbacks made by Junio and Rene, but there are 
>> still some open points.
>> 
>> 1/ Allow 'git-upload-archive' command to enable/disable some 
>> formats. This should be done by 'git-upload-archive'.
> 
> Perhaps.  I was thinking about the way how a site administrator can 
> configure such when upload-archive is spawned via git-daemon (for 
> users coming from ssh and spawn an upload-archive on their own, it's 
> their own process and upload-archive has no business deciding what is
>  allowed and what is forbidden).  Not very many clean ways I can
> think of unfortunately.

Mmpf, ssh is (one of the things) in my blind spot.  Do you mean a
ssh+git-shell connection?  One could argue that since this is a
restricted connection anyway upload-archive _has_ a right to restrict
archive format etc., too.  On a full, unrestricted ssh connection one
can start git-archive directly.  I'd do that anyway because I'm used to
do this with tar. ;-)

Anyway, I think having config options for git-upload-archive for
restricting formats and compression levels is as clean as we can get in
the absence of a way for upload-archive to detect which protocol is
used for the current connection.  Mmh, maybe an environment variable
which is set by the daemon can be used?  This is no dirtier than what
webservers do..

René

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] Add git-archive
  2006-09-08  9:00                     ` Franck Bui-Huu
@ 2006-09-08 20:21                       ` Rene Scharfe
  2006-09-09 14:31                         ` Franck Bui-Huu
  0 siblings, 1 reply; 40+ messages in thread
From: Rene Scharfe @ 2006-09-08 20:21 UTC (permalink / raw)
  To: Franck; +Cc: Junio C Hamano, git

Only a few trivial comments, as I managed to catch a cold somehow and
can't think straight for longer than three seconds.

>  .gitignore                    |    1 
>  Documentation/git-archive.txt |  100 ++++++++++++++++++
>  Makefile                      |    3 -
>  archive.h                     |   41 +++++++
>  builtin-archive.c             |  225 +++++++++++++++++++++++++++++++++++++++++
>  builtin.h                     |    1 
>  generate-cmdlist.sh           |    1 
>  git.c                         |    1 
>  8 files changed, 372 insertions(+), 1 deletions(-)
> 
> diff --git a/.gitignore b/.gitignore
> index 78cb671..a3e7ca1 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -8,6 +8,7 @@ git-apply
>  git-applymbox
>  git-applypatch
>  git-archimport
> +git-archive
>  git-bisect
>  git-branch
>  git-cat-file
> diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
> new file mode 100644
> index 0000000..913528d
> --- /dev/null
> +++ b/Documentation/git-archive.txt
> @@ -0,0 +1,100 @@
> +git-archive(1)
> +==============
> +
> +NAME
> +----
> +git-archive - Creates a archive of the files in the named tree
> +
> +
> +SYNOPSIS
> +--------
> +'git-archive' --format=<fmt> [--list] [--prefix=<prefix>/] [<extra>]
> +	      [--remote=<repo>] <tree-ish> [path...]
> +
> +DESCRIPTION
> +-----------
> +Creates an archive of the specified format containing the tree
> +structure for the named tree.  If <prefix> is specified it is
> +prepended to the filenames in the archive.
> +
> +'git-archive' behaves differently when given a tree ID versus when
> +given a commit ID or tag ID.  In the first case the current time is
> +used as modification time of each file in the archive.  In the latter
> +case the commit time as recorded in the referenced commit object is
> +used instead.  Additionally the commit ID is stored in a global
> +extended pax header if the tar format is used; it can be extracted
> +using 'git-get-tar-commit-id'. In ZIP files it is stored as a file
> +comment.
> +
> +OPTIONS
> +-------
> +
> +--format=<fmt>::
> +	Format of the resulting archive: 'tar', 'zip'...
> +
> +--list::
> +	Show all available formats.
> +
> +--prefix=<prefix>/::
> +	Prepend <prefix>/ to each filename in the archive.
> +
> +<extra>::
> +	This can be any options that the archiver backend understand.
> +
> +--remote=<repo>::
> +	Instead of making a tar archive from local repository,
> +	retrieve a tar archive from a remote repository.
> +
> +<tree-ish>::
> +	The tree or commit to produce an archive for.
> +
> +path::
> +	If one or more paths are specified, include only these in the
> +	archive, otherwise include all files and subdirectories.
> +
> +CONFIGURATION
> +-------------
> +By default, file and directories modes are set to 0666 or 0777 in tar
> +archives.  It is possible to change this by setting the "umask" variable
> +in the repository configuration as follows :
> +
> +[tar]
> +        umask = 002	;# group friendly
> +
> +The special umask value "user" indicates that the user's current umask
> +will be used instead. The default value remains 0, which means world
> +readable/writable files and directories.
> +
> +EXAMPLES
> +--------
> +git archive --format=tar --prefix=junk/ HEAD | (cd /var/tmp/ && tar xf -)::
> +
> +	Create a tar archive that contains the contents of the
> +	latest commit on the current branch, and extracts it in
> +	`/var/tmp/junk` directory.
> +
> +git archive --format=tar --prefix=git-1.4.0/ v1.4.0 | gzip >git-1.4.0.tar.gz::
> +
> +	Create a compressed tarball for v1.4.0 release.
> +
> +git archive --format=tar --prefix=git-1.4.0/ v1.4.0{caret}\{tree\} | gzip >git-1.4.0.tar.gz::
> +
> +	Create a compressed tarball for v1.4.0 release, but without a
> +	global extended pax header.
> +
> +git archive --format=zip --prefix=git-docs/ HEAD:Documentation/ > git-1.4.0-docs.zip::
> +
> +	Put everything in the current head's Documentation/ directory
> +	into 'git-1.4.0-docs.zip', with the prefix 'git-docs/'.
> +
> +Author
> +------
> +Written by Franck Bui-Huu and Rene Scharfe.
> +
> +Documentation
> +--------------
> +Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
> +
> +GIT
> +---
> +Part of the gitlink:git[7] suite
> diff --git a/Makefile b/Makefile
> index 389daf7..51ed4dd 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -242,7 +242,7 @@ LIB_FILE=libgit.a
>  XDIFF_LIB=xdiff/lib.a
>  
>  LIB_H = \
> -	blob.h cache.h commit.h csum-file.h delta.h \
> +	archive.h blob.h cache.h commit.h csum-file.h delta.h \
>  	diff.h object.h pack.h para-walk.h pkt-line.h quote.h refs.h \
>  	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
>  	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h
> @@ -267,6 +267,7 @@ LIB_OBJS = \
>  BUILTIN_OBJS = \
>  	builtin-add.o \
>  	builtin-apply.o \
> +	builtin-archive.o \
>  	builtin-cat-file.o \
>  	builtin-checkout-index.o \
>  	builtin-check-ref-format.o \
> diff --git a/archive.h b/archive.h
> new file mode 100644
> index 0000000..24b016f
> --- /dev/null
> +++ b/archive.h
> @@ -0,0 +1,41 @@
> +#ifndef ARCHIVE_H
> +#define ARCHIVE_H
> +
> +#define MAX_EXTRA_ARGS	32
> +#define MAX_ARGS	(MAX_EXTRA_ARGS + 32)
> +
> +struct archiver_args {
> +	const char *base;
> +	struct tree *tree;
> +	const unsigned char *commit_sha1;
> +	time_t time;
> +	const char **pathspec;
> +	void *extra;
> +};
> +
> +typedef int (*write_archive_fn_t)(struct archiver_args *);
> +
> +typedef void *(*parse_extra_args_fn_t)(int argc, const char **argv);
> +
> +struct archiver {
> +	const char *name;
> +	const char *remote;
> +	struct archiver_args args;
> +	write_archive_fn_t write_archive;
> +	parse_extra_args_fn_t parse_extra;
> +};
> +
> +extern struct archiver archivers[];
> +
> +extern int parse_archive_args(int argc,
> +			      const char **argv,
> +			      struct archiver *ar);
> +
> +extern void parse_treeish_arg(const char **treeish,
> +			      struct archiver_args *ar_args,
> +			      const char *prefix);
> +
> +extern void parse_pathspec_arg(const char **pathspec,
> +			       struct archiver_args *args);
> +
> +#endif	/* ARCHIVE_H */
> diff --git a/builtin-archive.c b/builtin-archive.c
> new file mode 100644
> index 0000000..5671cbd
> --- /dev/null
> +++ b/builtin-archive.c
> @@ -0,0 +1,225 @@
> +/*
> + * Copyright (c) 2006 Franck Bui-Huu
> + * Copyright (c) 2006 Rene Scharfe
> + */
> +#include <time.h>
> +#include "cache.h"
> +#include "builtin.h"
> +#include "archive.h"
> +#include "commit.h"
> +#include "tree-walk.h"
> +#include "exec_cmd.h"
> +#include "pkt-line.h"
> +
> +static const char archive_usage[] = \
> +"git-archive --format=<fmt> [--prefix=<prefix>/] [<extra>] <tree-ish> [path...]";
> +
> +
> +struct archiver archivers[] = { };
> +
> +
> +static int run_remote_archiver(struct archiver *ar, int argc,
> +			       const char **argv)
> +{
> +	char *url, buf[1024];
> +	int fd[2], i, len, rv;
> +	pid_t pid;
> +
> +	sprintf(buf, "git-upload-archive");
> +
> +	url = strdup(ar->remote);

xstrdup()

> +	pid = git_connect(fd, url, buf);
> +	if (pid < 0)
> +		return pid;
> +
> +	for (i = 1; i < argc; i++) {
> +		if (!strncmp(argv[i], "--remote=", 9))
> +			continue;
> +		packet_write(fd[1], "argument %s\n", argv[i]);
> +	}
> +	packet_flush(fd[1]);
> +
> +	len = packet_read_line(fd[0], buf, sizeof(buf));
> +	if (!len)
> +		die("git-archive: expected ACK/NAK, got EOF");
> +	if (buf[len-1] == '\n')
> +		buf[--len] = 0;
> +	if (strcmp(buf, "ACK")) {
> +		if (len > 5 && !strncmp(buf, "NACK ", 5))
> +			die("git-archive: NACK %s", buf + 5);
> +		die("git-archive: protocol error");
> +	}
> +
> +	len = packet_read_line(fd[0], buf, sizeof(buf));
> +	if (len)
> +		die("git-archive: expected a flush");
> +
> +	/* Now, start reading from fd[0] and spit it out to stdout */
> +	rv = copy_fd(fd[0], 1);
> +
> +	close(fd[0]);
> +	rv |= finish_connect(pid);
> +
> +	return !!rv;
> +}
> +
> +static int init_archiver(const char *name, struct archiver *ar)
> +{
> +	int rv = -1, i;
> +
> +	for (i = 0; i < ARRAY_SIZE(archivers); i++) {
> +		if (!strcmp(name, archivers[i].name)) {
> +			memcpy(ar, &archivers[i], sizeof(struct archiver));
> +			rv = 0;
> +			break;
> +		}
> +	}
> +	return rv;
> +}
> +
> +void parse_pathspec_arg(const char **pathspec, struct archiver_args *ar_args)
> +{
> +	ar_args->pathspec = get_pathspec(ar_args->base, pathspec);
> +}
> +
> +void parse_treeish_arg(const char **argv, struct archiver_args *ar_args,
> +		       const char *prefix)
> +{
> +	const char *name = argv[0];
> +	const unsigned char *commit_sha1;
> +	time_t archive_time;
> +	struct tree *tree;
> +	struct commit *commit;
> +	unsigned char sha1[20];
> +
> +	if (get_sha1(name, sha1))
> +		die("Not a valid object name");
> +
> +	commit = lookup_commit_reference_gently(sha1, 1);
> +	if (commit) {
> +		commit_sha1 = commit->object.sha1;
> +		archive_time = commit->date;
> +	} else {
> +		archive_time = time(NULL);
> +	}
> +
> +	tree = parse_tree_indirect(sha1);
> +	if (tree == NULL)
> +		die("not a tree object");
> +
> +	if (prefix) {
> +		unsigned char tree_sha1[20];
> +		unsigned int mode;
> +		int err;
> +
> +		err = get_tree_entry(tree->object.sha1, prefix,
> +				     tree_sha1, &mode);
> +		if (err || !S_ISDIR(mode))
> +			die("current working directory is untracked");
> +
> +		free(tree);
> +		tree = parse_tree_indirect(tree_sha1);
> +	}
> +	ar_args->tree = tree;
> +	ar_args->commit_sha1 = commit_sha1;
> +	ar_args->time = archive_time;
> +}
> +
> +static const char *default_parse_extra(struct archiver *ar,
> +				       const char **argv)
> +{
> +	static char msg[64];
> +
> +	snprintf(msg, sizeof(msg) - 4, "'%s' format does not handle %s",
> +		 ar->name, *argv);
> +
> +	return strcat(msg, "...");
> +}
> +
> +int parse_archive_args(int argc, const char **argv, struct archiver *ar)
> +{
> +	const char *extra_argv[MAX_EXTRA_ARGS];
> +	int extra_argc = 0;
> +	const char *format = NULL; /* some default values */

This comment does not convey any information.

> +	const char *remote = NULL;
> +	const char *base = "";
> +	int list = 0;
> +	int i;
> +
> +	for (i = 1; i < argc; i++) {
> +		const char *arg = argv[i];
> +
> +		if (!strcmp(arg, "--list") || !strcmp(arg, "-l")) {
> +			list = 1;
> +			continue;
> +		}
> +		if (!strncmp(arg, "--format=", 9)) {
> +			format = arg + 9;
> +			continue;
> +		}
> +		if (!strncmp(arg, "--prefix=", 9)) {
> +			base = arg + 9;
> +			continue;
> +		}
> +		if (!strncmp(arg, "--remote=", 9)) {
> +			remote = arg + 9;
> +			continue;
> +		}
> +		if (!strcmp(arg, "--")) {
> +			i++;
> +			break;
> +		}
> +		if (arg[0] == '-') {
> +			extra_argv[extra_argc++] = arg;

Overrun is not checked.

> +			continue;
> +		}
> +		break;
> +	}
> +	if (list) {
> +		if (!remote) {
> +			for (i = 0; i < ARRAY_SIZE(archivers); i++)
> +				printf("%s\n", archivers[i].name);
> +			exit(0);
> +		}
> +		die("--list and --remote are mutually exclusive");
> +	}

Not sure if we really need a list option.  I guess it only really
makes sense if we have more than five formats.  I have no _strong_
feelings against it, though. *shrug*

> +	if (argc - i < 1) {
> +		die("%s", archive_usage);

usage()

> +	}
> +	if (!format){
> +		die("You must specify an archive format");
> +	}
> +	if (init_archiver(format, ar) < 0) {
> +		die("Unknown archive format '%s'", format);
> +	}
> +	if (extra_argc && !remote) {
> +		if (!ar->parse_extra) {
> +			die("%s", default_parse_extra(ar, extra_argv));
> +		}
> +		ar->args.extra = ar->parse_extra(extra_argc, extra_argv);
> +	}
> +	ar->remote = remote;
> +	ar->args.base = base;
> +
> +	return i;
> +}
> +
> +int cmd_archive(int argc, const char **argv, const char *prefix)
> +{
> +	struct archiver ar;
> +	int tree_idx;
> +
> +	tree_idx = parse_archive_args(argc, argv, &ar);
> +
> +	if (ar.remote)
> +		return run_remote_archiver(&ar, argc, argv);
> +
> +	if (prefix == NULL)
> +		prefix = setup_git_directory();
> +
> +	argv += tree_idx;
> +	parse_treeish_arg(argv, &ar.args, prefix);
> +	parse_pathspec_arg(argv + 1, &ar.args);
> +
> +	return ar.write_archive(&ar.args);
> +}
> diff --git a/builtin.h b/builtin.h
> index 8472c79..2391afb 100644
> --- a/builtin.h
> +++ b/builtin.h
> @@ -15,6 +15,7 @@ extern int write_tree(unsigned char *sha
>  
>  extern int cmd_add(int argc, const char **argv, const char *prefix);
>  extern int cmd_apply(int argc, const char **argv, const char *prefix);
> +extern int cmd_archive(int argc, const char **argv, const char *prefix);
>  extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
>  extern int cmd_checkout_index(int argc, const char **argv, const char *prefix);
>  extern int cmd_check_ref_format(int argc, const char **argv, const char *prefix);
> diff --git a/generate-cmdlist.sh b/generate-cmdlist.sh
> index ec1eda2..5450918 100755
> --- a/generate-cmdlist.sh
> +++ b/generate-cmdlist.sh
> @@ -12,6 +12,7 @@ struct cmdname_help common_cmds[] = {"
>  sort <<\EOF |
>  add
>  apply
> +archive
>  bisect
>  branch
>  checkout
> diff --git a/git.c b/git.c
> index 82c8fee..c62c5cf 100644
> --- a/git.c
> +++ b/git.c
> @@ -218,6 +218,7 @@ static void handle_internal_command(int 
>  	} commands[] = {
>  		{ "add", cmd_add, RUN_SETUP },
>  		{ "apply", cmd_apply },
> +		{ "archive", cmd_archive },
>  		{ "cat-file", cmd_cat_file, RUN_SETUP },
>  		{ "checkout-index", cmd_checkout_index, RUN_SETUP },
>  		{ "check-ref-format", cmd_check_ref_format },

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] Add git-archive
  2006-09-06 20:29       ` Jakub Narebski
@ 2006-09-08 20:21         ` Rene Scharfe
  0 siblings, 0 replies; 40+ messages in thread
From: Rene Scharfe @ 2006-09-08 20:21 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: git

Jakub Narebski schrieb:
> Rene Scharfe wrote:
> 
>> IMHO should work like in the following example, and the code above 
>> cuts off the Documentation part:
>> 
>> $ cd Documentation $ git-archive --format=tar --prefix=v1.0/ HEAD howto | tar tf - 
>> v1.0/howto/ 
>> v1.0/howto/isolate-bugs-with-bisect.txt ...
>> 
>> I agree that simple subtree matching would be enough, at least for 
>> now.
> 
> What about
> 
> $ git-archive --format=tar --prefix=v1.0/ HEAD:Documentation/howto

That is fine, too (cutting off Documentation/howto).

My comment above was about the piece of code that handles cd'ing around in
the repository.  git-tar-tree ignores the current working directory -- you
always get the full tree put into your tar file, and you have to do the
"trick" you mentioned if you want to archive only a subtree.  This is a bit
strange, so I think we should do it right from the start in git-archive.

René

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-07 13:12                 ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
@ 2006-09-08 20:21                   ` Rene Scharfe
  2006-09-08 21:42                     ` Junio C Hamano
  2006-09-09 14:38                     ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
  0 siblings, 2 replies; 40+ messages in thread
From: Rene Scharfe @ 2006-09-08 20:21 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: junkio, git

Franck Bui-Huu schrieb:
> From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
> 
> Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
> Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>

I did not sign off this exact patch.  I wrote and submitted the
builtin-tar-tree.c part, with memory leak and all, then sent a note
on where the leak needs to be plugged.  You put it together and
converted it to struct archiver_args.  I'd very much have liked to
see a comment stating this.  Or simply just say "based on code by
Rene" or something.  The same is true for patch 3/4.

> ---
>  archive.h          |    4 +++
>  builtin-archive.c  |    4 ++-
>  builtin-tar-tree.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 74 insertions(+), 1 deletions(-)
> 
> diff --git a/archive.h b/archive.h
> index f33398e..3690c53 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -37,5 +37,9 @@ extern void parse_treeish_arg(const char
>  
>  extern void parse_pathspec_arg(const char **pathspec,
>  			       struct archiver_args *args);
> +/*
> + *
> + */

Especially I would not have signed off this invisible comment. ;)

René

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08  9:43                       ` Franck Bui-Huu
@ 2006-09-08 21:42                         ` Junio C Hamano
  0 siblings, 0 replies; 40+ messages in thread
From: Junio C Hamano @ 2006-09-08 21:42 UTC (permalink / raw)
  To: Franck; +Cc: git

Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

> Sorry I was speaking about the git protocol. It has been included
> in master branch.

I do not think so.  The recent "master" history is like this
(look at "gitk --full-history v1.4.2..master -- daemon.c"):

d819e4e daemon: prepare for multiple services

This introduced the daemon_service[] table and run_service
facility.  The service table contained upload-pack only.

This was merged into "master" and pushed out with 

1efca00 Merge early part of branch 'jc/daemon'

At this point, 'jc/daemon' topic branch contained the d819e4e
above and this one, which added upload-tar to the service table:

74c0cc2 daemon: add upload-tar service.

this was part of "next" but was not in "master".  This made
"tar-tree --remote" against git-daemon usable in "next".  But
before I pushed this out to "master", upload-archive design you
and Rene were working on became a lot more promising and
attractive.  Especially, discussion with Rene made me realize
that redoing upload-xxx protocol for each archiver was not a
good design.  So I did:

d9edcbd Revert "daemon: add upload-tar service."

on 'jc/daemon' branch.  And then that topic was merged into
"master" and pushed out, because that will allow you and Rene to
work your patch against "master" which would be nicer to not
just you but to everybody -- other people can test your patches
before they hit my tree on any branch.

In other words,

> 	git tar-tree --remote=git://anything/repo.git

never worked in "master".  At least that is how I wanted the
commit ancestry graph to look like and why the above merges and
reverts were done in the order described above.

Once git archive hits "master", we should announce that "git
tar-tree" and "git zip-tree" are deprecated, and we will remove
them sometime later.  As part of the same announcement we could
say that "git tar-tree --remote" works with transports other
than git-daemon, but we do not plan to add git native transport
support to it before its removal, because "git archive" is
preferred method both on local and remote archiving from then
on.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Add git-archive [take #2]
  2006-09-08 20:21                   ` Rene Scharfe
@ 2006-09-08 21:42                     ` Junio C Hamano
  0 siblings, 0 replies; 40+ messages in thread
From: Junio C Hamano @ 2006-09-08 21:42 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: git

Rene Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> Junio C Hamano schrieb:
>> Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
>> 
>>> 1/ Allow 'git-upload-archive' command to enable/disable some 
>>> formats. This should be done by 'git-upload-archive'.
>> 
>> Perhaps.  I was thinking about the way how a site administrator can 
>> configure such when upload-archive is spawned via git-daemon (for 
>> users coming from ssh and spawn an upload-archive on their own, it's 
>> their own process and upload-archive has no business deciding what is
>>  allowed and what is forbidden).  Not very many clean ways I can
>> think of unfortunately.
>
> Mmpf, ssh is (one of the things) in my blind spot.  Do you mean a
> ssh+git-shell connection?  One could argue that since this is a
> restricted connection anyway upload-archive _has_ a right to restrict
> archive format etc., too.  On a full, unrestricted ssh connection one
> can start git-archive directly.  I'd do that anyway because I'm used to
> do this with tar. ;-)

Ah, I was not thinking about git-shell and it might want to be
restrictive.

> ... Mmh, maybe an environment variable
> which is set by the daemon can be used?  This is no dirtier than what
> webservers do..

Exactly my thought, except "no dirtier" part I did not think
through but now you said it I tend to agree.

So site administrator can ask git-daemon to export some
environment variable that git-upload-archive notices and
restrict service.  If we choose to we can add a similar facility
to set the same environment variable to git-shell, so services
to retricted ssh users can be limited the same way by the
administrator.  That sounds like a good plan; we do not have
to do that for git-shell until somebody asks.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-08 20:21                   ` Rene Scharfe
@ 2006-09-08 21:42                     ` Junio C Hamano
  2006-09-09  1:53                       ` Junio C Hamano
  2006-09-09 14:38                     ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
  1 sibling, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-08 21:42 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: git, Franck Bui-Huu

Rene Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> I did not sign off this exact patch.  I wrote and submitted the
> builtin-tar-tree.c part, with memory leak and all, then sent a note
> on where the leak needs to be plugged.  You put it together and
> converted it to struct archiver_args.  I'd very much have liked to
> see a comment stating this.  Or simply just say "based on code by
> Rene" or something.  The same is true for patch 3/4.
>...

Thanks for clarification -- I also was wondering if you two were
working as a team exchanging drafts and the message I saw was
the fruit of such collaboration (like the way Johannes/Alex team
worked on C rewrite of merge-recursive).  Otherwise the sign-off
was indeed inappropriate.

>> +/*
>> + *
>> + */
>
> Especially I would not have signed off this invisible comment. ;)

I take your response is a mild NAK.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-08 21:42                     ` Junio C Hamano
@ 2006-09-09  1:53                       ` Junio C Hamano
  2006-09-09 15:02                         ` Rene Scharfe
  2006-09-09 15:10                         ` Franck Bui-Huu
  0 siblings, 2 replies; 40+ messages in thread
From: Junio C Hamano @ 2006-09-09  1:53 UTC (permalink / raw)
  To: Franck Bui-Huu, Rene Scharfe; +Cc: git

Junio C Hamano <junkio@cox.net> writes:

> Rene Scharfe <rene.scharfe@lsrfire.ath.cx> writes:
>
>> I did not sign off this exact patch.  I wrote and submitted the
>> builtin-tar-tree.c part, with memory leak and all, then sent a note
>> on where the leak needs to be plugged.  You put it together and
>> converted it to struct archiver_args.  I'd very much have liked to
>> see a comment stating this.  Or simply just say "based on code by
>> Rene" or something.  The same is true for patch 3/4.
>>...
>> Especially I would not have signed off this invisible comment. ;)
>
> I take your response is a mild NAK.

Just to reduce everybody's pain, why don't I fix them up and
push out the 4 series in "pu" with attribution clarification and
review comments from Rene in mind, so that you two can Ack them?
After that they will be placed on "next".

I needed to apply small tweaks on 1/4 (ANSI-C pedantic did not
like empty struct initializers) and 4/4 (the updated 1/1 needed
the way struct archiver is initialized and used be different
from the original one) as well.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] Add git-archive
  2006-09-08 20:21                       ` Rene Scharfe
@ 2006-09-09 14:31                         ` Franck Bui-Huu
  2006-09-09 15:02                           ` Rene Scharfe
  0 siblings, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-09 14:31 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: Junio C Hamano, git

2006/9/8, Rene Scharfe <rene.scharfe@lsrfire.ath.cx>:
> Only a few trivial comments, as I managed to catch a cold somehow and
> can't think straight for longer than three seconds.
>
> >  .gitignore                    |    1
> >  Documentation/git-archive.txt |  100 ++++++++++++++++++
> >  Makefile                      |    3 -

[snip]

> > +
> > +     url = strdup(ar->remote);
>
> xstrdup()
>

ok, but need to rebase...

> > +     pid = git_connect(fd, url, buf);
> > +     if (pid < 0)
> > +             return pid;
> > +

[snip]

> > +     int extra_argc = 0;
> > +     const char *format = NULL; /* some default values */
>
> This comment does not convey any information.
>

OK, I'll remove it

> > +     const char *remote = NULL;
> > +     const char *base = "";

[snip]

> > +             }
> > +             if (arg[0] == '-') {
> > +                     extra_argv[extra_argc++] = arg;
>
> Overrun is not checked.
>

Indeed, I'll fix it.

> > +                     continue;
> > +             }
> > +             break;
> > +     }
> > +     if (list) {
> > +             if (!remote) {
> > +                     for (i = 0; i < ARRAY_SIZE(archivers); i++)
> > +                             printf("%s\n", archivers[i].name);
> > +                     exit(0);
> > +             }
> > +             die("--list and --remote are mutually exclusive");
> > +     }
>
> Not sure if we really need a list option.  I guess it only really
> makes sense if we have more than five formats.  I have no _strong_
> feelings against it, though. *shrug*
>

well it's almost free to add it, and no need any maintenance if we add
a new archiver backend, so I would say let it.

> > +     if (argc - i < 1) {
> > +             die("%s", archive_usage);
>
> usage()
>

ok

-- 
               Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-08 20:21                   ` Rene Scharfe
  2006-09-08 21:42                     ` Junio C Hamano
@ 2006-09-09 14:38                     ` Franck Bui-Huu
  1 sibling, 0 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-09 14:38 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: junkio, git

2006/9/8, Rene Scharfe <rene.scharfe@lsrfire.ath.cx>:
> Franck Bui-Huu schrieb:
> > From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
> >
> > Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
> > Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
>
> I did not sign off this exact patch.  I wrote and submitted the
> builtin-tar-tree.c part, with memory leak and all, then sent a note
> on where the leak needs to be plugged.  You put it together and
> converted it to struct archiver_args.  I'd very much have liked to
> see a comment stating this.  Or simply just say "based on code by
> Rene" or something.  The same is true for patch 3/4.
>

OK I'll change that....

> > ---
> >  extern void parse_pathspec_arg(const char **pathspec,
> >                              struct archiver_args *args);
> > +/*
> > + *
> > + */
>
> Especially I would not have signed off this invisible comment. ;)
>
> René
>

and put something usefull here.

Thanks
-- 
               Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-09  1:53                       ` Junio C Hamano
@ 2006-09-09 15:02                         ` Rene Scharfe
  2006-09-09 15:10                         ` Franck Bui-Huu
  1 sibling, 0 replies; 40+ messages in thread
From: Rene Scharfe @ 2006-09-09 15:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck Bui-Huu, git

Junio C Hamano schrieb:
> Just to reduce everybody's pain, why don't I fix them up and
> push out the 4 series in "pu" with attribution clarification and
> review comments from Rene in mind, so that you two can Ack them?
> After that they will be placed on "next".

This is an excellent idea.  What you have in pu is a good base to
add the (few and small) missing pieces.  Consider patches 1-3 ACKed;
I haven't looked at git-upload-archive, yet.

Thanks,
René

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] Add git-archive
  2006-09-09 14:31                         ` Franck Bui-Huu
@ 2006-09-09 15:02                           ` Rene Scharfe
  2006-09-09 15:25                             ` Franck Bui-Huu
  0 siblings, 1 reply; 40+ messages in thread
From: Rene Scharfe @ 2006-09-09 15:02 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Junio C Hamano, git

Franck Bui-Huu schrieb:
> 2006/9/8, Rene Scharfe <rene.scharfe@lsrfire.ath.cx>:
>> > +     url = strdup(ar->remote);
>>
>> xstrdup()
>>
> 
> ok, but need to rebase...

Why?  On a similar note , can we use Junio's pu branch (and soon his
next branch) as our base for further work?

>> > +     if (list) {
>> > +             if (!remote) {
>> > +                     for (i = 0; i < ARRAY_SIZE(archivers); i++)
>> > +                             printf("%s\n", archivers[i].name);
>> > +                     exit(0);
>> > +             }
>> > +             die("--list and --remote are mutually exclusive");
>> > +     }
>>
>> Not sure if we really need a list option.  I guess it only really
>> makes sense if we have more than five formats.  I have no _strong_
>> feelings against it, though. *shrug*
>>
> 
> well it's almost free to add it, and no need any maintenance if we add
> a new archiver backend, so I would say let it.

I thought a bit about it, and I can now see a good use case for --list:
checking the capabilities of a remote site.  Unfortunately this is
currently forbidden.  Why?  git-archive --list writes to stdout, so the
result can be transported the same way an archive would.

René

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-09  1:53                       ` Junio C Hamano
  2006-09-09 15:02                         ` Rene Scharfe
@ 2006-09-09 15:10                         ` Franck Bui-Huu
  2006-09-09 19:42                           ` Junio C Hamano
  1 sibling, 1 reply; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-09 15:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Rene Scharfe, git

2006/9/9, Junio C Hamano <junkio@cox.net>:
> Junio C Hamano <junkio@cox.net> writes:
>
> > Rene Scharfe <rene.scharfe@lsrfire.ath.cx> writes:
> >
> >> I did not sign off this exact patch.  I wrote and submitted the
> >> builtin-tar-tree.c part, with memory leak and all, then sent a note
> >> on where the leak needs to be plugged.  You put it together and
> >> converted it to struct archiver_args.  I'd very much have liked to
> >> see a comment stating this.  Or simply just say "based on code by
> >> Rene" or something.  The same is true for patch 3/4.
> >>...
> >> Especially I would not have signed off this invisible comment. ;)
> >
> > I take your response is a mild NAK.
>
> Just to reduce everybody's pain, why don't I fix them up and
> push out the 4 series in "pu" with attribution clarification and
> review comments from Rene in mind, so that you two can Ack them?
> After that they will be placed on "next".
>

Almost Acked by me, except you've missed some Rene's comments. And
more important I fixed an "uninitialized variable" bug. See patch
below.

> I needed to apply small tweaks on 1/4 (ANSI-C pedantic did not
> like empty struct initializers) and 4/4 (the updated 1/1 needed
> the way struct archiver is initialized and used be different
> from the original one) as well.
>

thanks for that.

-- >8 --

diff --git a/builtin-archive.c b/builtin-archive.c
index 0a02519..9b90d87 100644
--- a/builtin-archive.c
+++ b/builtin-archive.c
@@ -28,7 +28,7 @@ static int run_remote_archiver(struct ar

 	sprintf(buf, "git-upload-archive");

-	url = strdup(ar->remote);
+	url = xstrdup(ar->remote);
 	pid = git_connect(fd, url, buf);
 	if (pid < 0)
 		return pid;
@@ -101,6 +101,7 @@ void parse_treeish_arg(const char **argv
 		commit_sha1 = commit->object.sha1;
 		archive_time = commit->date;
 	} else {
+		commit_sha1 = NULL;
 		archive_time = time(NULL);
 	}

@@ -141,7 +142,7 @@ int parse_archive_args(int argc, const c
 {
 	const char *extra_argv[MAX_EXTRA_ARGS];
 	int extra_argc = 0;
-	const char *format = NULL; /* some default values */
+	const char *format = NULL;
 	const char *remote = NULL;
 	const char *base = "";
 	int list = 0;
@@ -171,6 +172,8 @@ int parse_archive_args(int argc, const c
 			break;
 		}
 		if (arg[0] == '-') {
+			if (extra_argc > MAX_EXTRA_ARGS - 1)
+				die("Too many extra options");
 			extra_argv[extra_argc++] = arg;
 			continue;
 		}
@@ -185,7 +188,7 @@ int parse_archive_args(int argc, const c
 		die("--list and --remote are mutually exclusive");
 	}
 	if (argc - i < 1) {
-		die("%s", archive_usage);
+		usage(archive_usage);
 	}
 	if (!format){
 		die("You must specify an archive format");

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] Add git-archive
  2006-09-09 15:02                           ` Rene Scharfe
@ 2006-09-09 15:25                             ` Franck Bui-Huu
  0 siblings, 0 replies; 40+ messages in thread
From: Franck Bui-Huu @ 2006-09-09 15:25 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: Junio C Hamano, git

2006/9/9, Rene Scharfe <rene.scharfe@lsrfire.ath.cx>:
> Franck Bui-Huu schrieb:
> > 2006/9/8, Rene Scharfe <rene.scharfe@lsrfire.ath.cx>:
> >> > +     url = strdup(ar->remote);
> >>
> >> xstrdup()
> >>
> >
> > ok, but need to rebase...
>
> Why?  On a similar note , can we use Junio's pu branch (and soon his
> next branch) as our base for further work?
>

Yes. I just noticed Junio's post right after my reply.

> >> > +     if (list) {
> >> > +             if (!remote) {
> >> > +                     for (i = 0; i < ARRAY_SIZE(archivers); i++)
> >> > +                             printf("%s\n", archivers[i].name);
> >> > +                     exit(0);
> >> > +             }
> >> > +             die("--list and --remote are mutually exclusive");
> >> > +     }
> >>
> >> Not sure if we really need a list option.  I guess it only really
> >> makes sense if we have more than five formats.  I have no _strong_
> >> feelings against it, though. *shrug*
> >>
> >
> > well it's almost free to add it, and no need any maintenance if we add
> > a new archiver backend, so I would say let it.
>
> I thought a bit about it, and I can now see a good use case for --list:
> checking the capabilities of a remote site.  Unfortunately this is
> currently forbidden.  Why?  git-archive --list writes to stdout, so the
> result can be transported the same way an archive would.
>

Yes that was the main goal for this option. But then we talked about
enable/disable formats on the server side, and adding side band
support...all that points are still dark for me and I don't know how
'--list --remote' will interact with them. So I prefered to make this
option simple for now and not make the user to believe that doing

git archive --list --remote=...

list the capabilites of the remote side.

-- 
               Franck

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] git-archive: wire up TAR format.
  2006-09-09 15:10                         ` Franck Bui-Huu
@ 2006-09-09 19:42                           ` Junio C Hamano
  2006-09-10 16:10                             ` [PATCH] Use xstrdup instead of strdup in builtin-{tar,zip}-tree.c Rene Scharfe
  0 siblings, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2006-09-09 19:42 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Junio C Hamano, Rene Scharfe, git

"Franck Bui-Huu" <vagabon.xyz@gmail.com> writes:

> Almost Acked by me, except you've missed some Rene's comments. And
> more important I fixed an "uninitialized variable" bug. See patch
> below.

Gaah, I swear I fixed all of these at one time in my tree but
somehow forgot to apply the fix-up patch while cleaning it up.

Big thanks for eyeballing.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH] Use xstrdup instead of strdup in builtin-{tar,zip}-tree.c
  2006-09-09 19:42                           ` Junio C Hamano
@ 2006-09-10 16:10                             ` Rene Scharfe
  0 siblings, 0 replies; 40+ messages in thread
From: Rene Scharfe @ 2006-09-10 16:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Franck Bui-Huu, git

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
This patch applies to the current 'next' branch.

diff --git a/builtin-tar-tree.c b/builtin-tar-tree.c
index c20eb0e..e8e492f 100644
--- a/builtin-tar-tree.c
+++ b/builtin-tar-tree.c
@@ -390,7 +390,7 @@ int write_tar_archive(struct archiver_ar
 		write_global_extended_header(args->commit_sha1);
 
 	if (args->base && plen > 0 && args->base[plen - 1] == '/') {
-		char *base = strdup(args->base);
+		char *base = xstrdup(args->base);
 		int baselen = strlen(base);
 
 		while (baselen > 0 && base[baselen - 1] == '/')
diff --git a/builtin-zip-tree.c b/builtin-zip-tree.c
index 4e79633..fdac2bd 100644
--- a/builtin-zip-tree.c
+++ b/builtin-zip-tree.c
@@ -363,7 +363,7 @@ int write_zip_archive(struct archiver_ar
 	zip_dir_size = ZIP_DIRECTORY_MIN_SIZE;
 
 	if (args->base && plen > 0 && args->base[plen - 1] == '/') {
-		char *base = strdup(args->base);
+		char *base = xstrdup(args->base);
 		int baselen = strlen(base);
 
 		while (baselen > 0 && base[baselen - 1] == '/')

^ permalink raw reply related	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2006-09-10 16:11 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-05 12:16 [PATCH 1/2] Add git-archive Franck Bui-Huu
2006-09-05 19:23 ` Junio C Hamano
2006-09-06 13:46   ` Franck Bui-Huu
2006-09-06 20:14     ` Rene Scharfe
2006-09-06 20:29       ` Jakub Narebski
2006-09-08 20:21         ` Rene Scharfe
2006-09-06 21:42     ` Junio C Hamano
2006-09-07  6:32       ` Franck Bui-Huu
2006-09-07  7:19         ` Junio C Hamano
2006-09-07  7:53           ` Franck Bui-Huu
2006-09-07  8:16             ` Junio C Hamano
2006-09-07 13:08               ` Add git-archive [take #2] Franck Bui-Huu
2006-09-07 13:12                 ` [PATCH 1/4] Add git-archive Franck Bui-Huu
2006-09-08  2:35                   ` Junio C Hamano
2006-09-08  9:00                     ` Franck Bui-Huu
2006-09-08 20:21                       ` Rene Scharfe
2006-09-09 14:31                         ` Franck Bui-Huu
2006-09-09 15:02                           ` Rene Scharfe
2006-09-09 15:25                             ` Franck Bui-Huu
2006-09-07 13:12                 ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
2006-09-08 20:21                   ` Rene Scharfe
2006-09-08 21:42                     ` Junio C Hamano
2006-09-09  1:53                       ` Junio C Hamano
2006-09-09 15:02                         ` Rene Scharfe
2006-09-09 15:10                         ` Franck Bui-Huu
2006-09-09 19:42                           ` Junio C Hamano
2006-09-10 16:10                             ` [PATCH] Use xstrdup instead of strdup in builtin-{tar,zip}-tree.c Rene Scharfe
2006-09-09 14:38                     ` [PATCH 2/4] git-archive: wire up TAR format Franck Bui-Huu
2006-09-07 13:12                 ` [PATCH 3/4] git-archive: wire up ZIP format Franck Bui-Huu
2006-09-07 13:12                 ` [PATCH 4/4] Add git-upload-archive Franck Bui-Huu
2006-09-07 17:26                 ` Add git-archive [take #2] Franck Bui-Huu
2006-09-08  0:37                 ` Junio C Hamano
2006-09-08  8:18                   ` Franck Bui-Huu
2006-09-08  8:47                     ` Jakub Narebski
2006-09-08  8:58                     ` Junio C Hamano
2006-09-08  9:43                       ` Franck Bui-Huu
2006-09-08 21:42                         ` Junio C Hamano
2006-09-08 20:21                   ` Rene Scharfe
2006-09-08 21:42                     ` Junio C Hamano
2006-09-06 20:17 ` [PATCH 1/2] Add git-archive Rene Scharfe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).