git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] GIT: Create tar archives of tree on the fly
@ 2005-04-26 14:42 Rene Scharfe
  2005-04-29 22:26 ` Linus Torvalds
  0 siblings, 1 reply; 4+ messages in thread
From: Rene Scharfe @ 2005-04-26 14:42 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

This patch introduces tar-tree, a tool to generate tar archives out of
git repositories.  Basically I took ls-tree and cat-file and melted them
together.  That means tar-tree doesn't create any temporary files, it
just streams out the archive as it goes.

This could be useful for the web interface(s) to provide a downloadable
tarball for any commit or tree object.  For bigger repositories like the
Linux kernel caching the resulting files might be a good idea,
though. :-P

On my machine it's also a bit faster than directly tarring up the
checked out files.  I only ran a few basic checks to make sure the
performance is in the same ballpark, YMMV.

Example usage:

  $ tar-tree a2755a80f40e5794ddc20e00f781af9d6320fafb linux-2.6.12-rc3 |
        bzip2 -9 > linux-2.6.12-rc3.tar.bz2

tar-tree accepts tree IDs and commit IDs.  In the former case all files
within the archive get the current time set as mtime.  Given a commit ID
tar-tree tries to figure out the commit date and sets mtime of all files
to that instead.

Currently the size of a file within the created archive is limited to
2^33-1.  This could be fixed easily within the archive format (with a
Pax extended header), but size is unsigned long throughout GIT, so this
would need to be fixed first.  OTOH I think putting 4GB+ files into a
GIT archive is insane anyway. :]

Path names are limited to 500 characters at the moment.  This can be
stretched if the need should arise.

Patch is against d1df5743809614241883ecad51876607cf432034.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>

diff -Nur a/Makefile b/Makefile
--- a/Makefile	2005-04-26 03:26:45.000000000 +0200
+++ b/Makefile	2005-04-26 08:09:03.000000000 +0200
@@ -18,7 +18,7 @@
 	cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
 	check-files ls-tree merge-base merge-cache unpack-file git-export \
 	diff-cache convert-cache http-pull rpush rpull rev-list git-mktag \
-	diff-tree-helper
+	diff-tree-helper tar-tree
 
 all: $(PROG)
 
diff -Nur a/tar-tree.c b/tar-tree.c
--- a/tar-tree.c	1970-01-01 01:00:00.000000000 +0100
+++ b/tar-tree.c	2005-04-26 08:23:05.000000000 +0200
@@ -0,0 +1,328 @@
+#include <time.h>
+#include "cache.h"
+
+static const char *tar_tree_usage = "tar-tree <key> [basedir]";
+static const char *basedir;
+static time_t archive_time;
+
+struct path_prefix {
+	struct path_prefix *prev;
+	const char *name;
+};
+
+static unsigned long write_out(void *buf, unsigned long size)
+{
+	while (size > 0) {
+		long ret = write(1, buf, size);
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				continue;
+			/* Ignore epipe */
+			if (errno == EPIPE)
+				break;
+			die("tar-tree: %s", strerror(errno));
+		} else if (!ret) {
+			die("tar-tree: disk full?");
+		}
+		size -= ret;
+		buf += ret;
+	}
+	return size;
+}
+
+static unsigned long write_block(void *buf, unsigned long size)
+{
+	unsigned long ret = write_out(buf, size);
+	if (!ret) {
+		unsigned long slack = 512 - size % 512;
+		if (slack % 512) {
+			char padding[511];
+			memset(padding, 0, slack);
+			ret = write_out(padding, slack);
+		}
+	}
+	return ret;
+}
+
+static void append_string(char **p, const char *s)
+{
+	unsigned int len = strlen(s);
+	memcpy(*p, s, len);
+	*p += len;
+}
+
+static void append_char(char **p, char c)
+{
+	**p = c;
+	*p += 1;
+}
+
+static void append_long(char **p, long n)
+{
+	int len = sprintf(*p, "%ld", n);
+	*p += len;
+}
+
+static void append_path_prefix(char **buffer, struct path_prefix *prefix)
+{
+	if (!prefix)
+		return;
+	append_path_prefix(buffer, prefix->prev);
+	append_string(buffer, prefix->name);
+	append_char(buffer, '/');
+}
+
+static unsigned int path_prefix_len(struct path_prefix *prefix)
+{
+	if (!prefix)
+		return 0;
+	return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1;
+}
+
+static void append_path(char **p, int is_dir, const char *basepath,
+			struct path_prefix *prefix, const char *path)
+{
+	if (basepath) {
+		append_string(p, basepath);
+		append_char(p, '/');
+	}
+	append_path_prefix(p, prefix);
+	append_string(p, path);
+	if (is_dir)
+		append_char(p, '/');
+}
+
+static unsigned int path_len(int is_dir, const char *basepath,
+			     struct path_prefix *prefix, const char *path)
+{
+	unsigned int len = 0;
+	if (basepath)
+		len += strlen(basepath) + 1;
+	len += path_prefix_len(prefix) + strlen(path);
+	if (is_dir)
+		len++;
+	return len;
+}
+
+static void append_extended_header_prefix(char **p, const char *keyword,
+					  int valuelen)
+{
+	int reclen = 1 + 1 + strlen(keyword) + 1 + valuelen + 1;
+	if (reclen > 9)
+		reclen++;
+	if (reclen > 99)
+		reclen++;
+	if (reclen > 512)
+		die("tar-tree: extended header too big, wtf?");
+	append_long(p, reclen);
+	append_char(p, ' ');
+	append_string(p, keyword);
+	append_char(p, '=');
+}
+
+static long write_header(const char *, const char *, struct path_prefix *,
+			 const char *, unsigned int, unsigned long);
+
+static long write_extended_header(const char *headerfilename, int is_dir,
+				  const char *basepath,
+				  struct path_prefix *prefix,
+				  const char *path, unsigned int namelen)
+{
+	char records[512], *p;
+	unsigned long ret;
+
+	memset(records, 0, sizeof(records));
+	p = records;
+	append_extended_header_prefix(&p, "path", namelen);
+	append_path(&p, is_dir, basepath, prefix, path);
+	append_char(&p, '\n');
+	ret = write_header(NULL, NULL, NULL, headerfilename, 0100600,
+	                   p - records);
+	if (!ret)
+		ret = write_out(records, sizeof(records));
+	return ret;
+}
+
+static long write_header(const char *sha1, const char *basepath,
+			 struct path_prefix *prefix, const char *path,
+			 unsigned int mode, unsigned long size)
+{
+	unsigned int namelen; 
+	char *p, header[512];
+	unsigned int checksum = 0;
+	int i;
+
+	memset(header, 0, sizeof(header));
+
+	namelen = path_len(S_ISDIR(mode), basepath, prefix, path);
+	if (namelen > 500) {
+		fprintf(stderr, "tar-tree: name too log of object %s\n",
+		        sha1_to_hex(sha1));
+		return size;
+	} else if (namelen > 100) {
+		unsigned long ret;
+		char *sha1_hex = sha1_to_hex(sha1);
+		char headerfilename[51];
+
+		sprintf(header, "%s.data", sha1_hex);
+		sprintf(headerfilename, "%s.paxheader", sha1_hex);
+		ret = write_extended_header(headerfilename, S_ISDIR(mode),
+		                            basepath, prefix, path, namelen);
+		if (ret)
+			return ret;
+	} else {
+		p = header;
+		append_path(&p, S_ISDIR(mode), basepath, prefix, path);
+	}
+
+	if (S_ISDIR(mode))
+		mode |= 0755;	/* GIT doesn't store permissions of dirs */
+	sprintf(&header[100], "%07o", mode & 07777);
+
+	/* XXX: should we provide more meaningful info here? */
+	sprintf(&header[108], "%07o", 0);	/* uid */
+	sprintf(&header[116], "%07o", 0);	/* gid */
+	strncpy(&header[265], "git", 31);	/* uname */
+	strncpy(&header[297], "git", 31);	/* gname */
+
+	sprintf(&header[124], "%011lo", S_ISDIR(mode) ? 0 : size);
+	sprintf(&header[136], "%011lo", archive_time);
+
+	/* typeflag */
+	if (!sha1)
+		header[156] = 'x';	/* extended header */
+	else
+		header[156] = S_ISDIR(mode) ? '5' : '0';
+
+	strcpy(&header[257], "ustar");
+	strcpy(&header[263], "00");
+
+	printf(&header[329], "%07o", 0);	/* devmajor */
+	printf(&header[337], "%07o", 0);	/* devminor */
+
+	memset(&header[148], ' ', 8);
+	for (i = 0; i < sizeof(header); i++)
+		checksum += header[i];
+	sprintf(&header[148], "%07o", checksum & 0x1fffff);
+
+	return write_out(header, sizeof(header));
+}
+
+static unsigned long write_trailer(void)
+{
+	char block[1024];
+	memset(block, 0, sizeof(block));
+	return write_out(block, sizeof(block));
+}
+
+static void traverse_tree(void *buffer, unsigned long size,
+			  struct path_prefix *prefix)
+{
+	struct path_prefix this_prefix;
+	this_prefix.prev = prefix;
+
+	while (size) {
+		int namelen = strlen(buffer)+1;
+		void *eltbuf;
+		char elttype[20];
+		unsigned long eltsize;
+		unsigned char *sha1 = buffer + namelen;
+		char *path = strchr(buffer, ' ') + 1;
+		unsigned int mode;
+
+		if (size < namelen + 20 || sscanf(buffer, "%o", &mode) != 1)
+			die("corrupt 'tree' file");
+		buffer = sha1 + 20;
+		size -= namelen + 20;
+
+		eltbuf = read_sha1_file(sha1, elttype, &eltsize);
+		if (!eltbuf) {
+			error("cannot read %s", sha1_to_hex(sha1));
+			continue;
+		}
+		if (write_header(sha1, basedir, prefix, path, mode, eltsize))
+			exit(0);
+		if (!strcmp(elttype, "tree")) {
+			this_prefix.name = path;
+			traverse_tree(eltbuf, eltsize, &this_prefix);
+		} else if (!strcmp(elttype, "blob")) {
+			if (write_block(eltbuf, eltsize))
+				exit(0);
+		}
+		free(eltbuf);
+	}
+}
+
+time_t commit_time(const unsigned char *sha1)
+{
+	char type[20];
+	void *buffer;
+	unsigned long size;
+	time_t result = 0;
+
+	buffer = read_sha1_file(sha1, type, &size);
+	if (buffer) {
+		char *p = buffer;
+		while (size > 0) {
+			char *endp = memchr(p, '\n', size);
+			if (!endp)
+				break;
+			*endp = '\0';
+			if (endp - p > 10 && !memcmp(p, "committer ", 10)) {
+				char *nump = strrchr(p, ' ');
+				if (!nump)
+					break;
+				*nump = '\0';
+				nump = strrchr(p, ' ');
+				if (!nump)
+					break;
+				result = strtoul(nump, &endp, 10);
+				if (*endp != '\0')
+					result = 0;
+				break;
+			}
+			size -= endp - p - 1;
+			p = endp + 1;
+		}
+	}
+	free(buffer);
+	return result;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned char sha1[20];
+	void *buffer;
+	unsigned long size;
+	unsigned char tree_sha1[20];
+
+	switch (argc) {
+	case 3:
+		basedir = argv[2];
+		/* FALLTHROUGH */
+	case 2:
+		if (get_sha1_hex(argv[1], sha1) < 0)
+			usage(tar_tree_usage);
+		break;
+	default:
+		usage(tar_tree_usage);
+	}
+
+	sha1_file_directory = getenv(DB_ENVIRONMENT);
+	if (!sha1_file_directory)
+		sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
+
+	buffer = read_tree_with_tree_or_commit_sha1(sha1, &size, tree_sha1);
+	if (!buffer)
+		die("unable to read sha1 file");
+	if (memcmp(sha1, tree_sha1, 20))	/* is sha1 a commit object? */
+		archive_time = commit_time(sha1);
+	if (!archive_time)
+		archive_time = time(NULL);
+	if (basedir)
+		write_header("0", NULL, NULL, basedir, 040755, 0);
+	traverse_tree(buffer, size, NULL);
+	free(buffer);
+	write_trailer();
+	return 0;
+}

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] GIT: Create tar archives of tree on the fly
  2005-04-26 14:42 [PATCH] GIT: Create tar archives of tree on the fly Rene Scharfe
@ 2005-04-29 22:26 ` Linus Torvalds
  2005-04-30  0:13   ` Rene Scharfe
  2005-04-30  1:22   ` Rene Scharfe
  0 siblings, 2 replies; 4+ messages in thread
From: Linus Torvalds @ 2005-04-29 22:26 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: git



Having just done the git-0.7.tar.gz file with git-tar-tree, I started 
wondering if there is some nice way to encode the commit version that got 
tarred up into the tar archive itself.

There are various obvious ways, like creating a fake zero-sized file 
called <base>/.git-version-<commit-id>, and maybe that's the right thing 
to do. But maybe the tar archive format (and no, I don't even want to know 
details) has some nice way to hide off a keyname even _without_ having to 
create a file.

Would people like to have such a file for later? Obviously there would be 
a need to suppress it with a command line flag if you don't want it (or 
have a cmd line flag to enable it in the first place), what do people 
think? Rene?

		Linus

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] GIT: Create tar archives of tree on the fly
  2005-04-29 22:26 ` Linus Torvalds
@ 2005-04-30  0:13   ` Rene Scharfe
  2005-04-30  1:22   ` Rene Scharfe
  1 sibling, 0 replies; 4+ messages in thread
From: Rene Scharfe @ 2005-04-30  0:13 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

Linus Torvalds schrieb:
> 
> Having just done the git-0.7.tar.gz file with git-tar-tree, I started
>  wondering if there is some nice way to encode the commit version
> that got tarred up into the tar archive itself.

The pax archive format allows for comments; you can store the commit ID
in a (archive-)global comment.  Archivers are supposed to ignore it and
GNU tar at least does so.  You can extract the ID with

   $ dd bs=1 skip=523 count=41 2>/dev/null < TARFILE

because it would always end up at that position at the start of the archive.

Rene

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] GIT: Create tar archives of tree on the fly
  2005-04-29 22:26 ` Linus Torvalds
  2005-04-30  0:13   ` Rene Scharfe
@ 2005-04-30  1:22   ` Rene Scharfe
  1 sibling, 0 replies; 4+ messages in thread
From: Rene Scharfe @ 2005-04-30  1:22 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

On Fri, Apr 29, 2005 at 03:26:14PM -0700, Linus Torvalds wrote:
> 
> 
> Having just done the git-0.7.tar.gz file with git-tar-tree, I started 
> wondering if there is some nice way to encode the commit version that got 
> tarred up into the tar archive itself.

... and here is the patch that changes git-tar-tree to add the commit ID
as a comment in a global pax header to the tar file.  Archivers ignore
this field.  A little sample program is included to read the ID from a
previously prepared archive.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>

Write commit ID to global extended pax header at the beginning of the tar
file, if possible.  get-tar-commit-id.c is an example program to get the
ID back out of such a tar archive.

---
commit 716d21c45ba1c329fb88febf4704a4ab629a3933
tree 72f4d42eac2cd9099a663c16cb8201f90a8ff9c9
parent 0fc65a4572625405ff6dd9d8c16d835f2b1ebd49
author Rene Scharfe <rene.scharfe@lsrfire.ath.cx> 1114812895 +0200
committer Rene Scharfe <rene.scharfe@lsrfire.ath.cx> 1114812895 +0200

Index: get-tar-commit-id.c
===================================================================
--- /dev/null  (tree:c1546808797f6a3c4e6ae82069cee3dc316fbf24)
+++ 72f4d42eac2cd9099a663c16cb8201f90a8ff9c9/get-tar-commit-id.c  (mode:100644 sha1:a1a17e53d29136df431d2a128292d7aefefaea41)
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define HEADERSIZE	1024
+
+int main(int argc, char **argv)
+{
+	char buffer[HEADERSIZE];
+	ssize_t n;
+
+	n = read(0, buffer, HEADERSIZE);
+	if (n < HEADERSIZE) {
+		fprintf(stderr, "read error\n");
+		return 3;
+	}
+	if (buffer[156] != 'g')
+		return 1;
+	if (memcmp(&buffer[512], "52 comment=", 11))
+		return 1;
+	n = write(1, &buffer[523], 41);
+	if (n < 41) {
+		fprintf(stderr, "write error\n");
+		return 2;
+	}
+	return 0;
+}
Index: tar-tree.c
===================================================================
--- c1546808797f6a3c4e6ae82069cee3dc316fbf24/tar-tree.c  (mode:100644 sha1:5cc7cfef6db1269d81589b82255537fb64ba02fa)
+++ 72f4d42eac2cd9099a663c16cb8201f90a8ff9c9/tar-tree.c  (mode:100644 sha1:ea7ea91add54532c38dde9e343efe34d56805341)
@@ -160,7 +160,7 @@
 	return len;
 }
 
-static void write_header(const char *, const char *, struct path_prefix *,
+static void write_header(const char *, char, const char *, struct path_prefix *,
                          const char *, unsigned int, unsigned long);
 
 /* stores a pax extended header directly in the block buffer */
@@ -169,7 +169,7 @@
                                   struct path_prefix *prefix,
                                   const char *path, unsigned int namelen)
 {
-	char *records, *p;
+	char *p;
 	unsigned int size = 1 + 6 + namelen + 1;
 	if (size > 9)
 		size++;
@@ -177,12 +177,10 @@
 		size++;
 	if (size > RECORDSIZE)
 		die("tar-tree: extended header too big, wtf?");
-	write_header(NULL, NULL, NULL, headerfilename, 0100600, size);
-
-	records = block + offset;
-	memset(records, 0, RECORDSIZE);
+	write_header(NULL, 'x', NULL, NULL, headerfilename, 0100600, size);
+	p = block + offset;
+	memset(p, 0, RECORDSIZE);
 	offset += RECORDSIZE;
-	p = records;
 	append_long(&p, size);
 	append_string(&p, " path=");
 	append_path(&p, is_dir, basepath, prefix, path);
@@ -190,8 +188,22 @@
 	write_if_needed();
 }
 
+static void write_global_extended_header(const char *sha1)
+{
+	char *p;
+	write_header(NULL, 'g', NULL, NULL, "pax_global_header", 0, 52);
+	p = block + offset;
+	memset(p, 0, RECORDSIZE);
+	offset += RECORDSIZE;
+	append_long(&p, 52);	/* 2 + 9 + 40 + 1 */
+	append_string(&p, " comment=");
+	append_string(&p, sha1_to_hex(sha1));
+	append_char(&p, '\n');
+	write_if_needed();
+}
+
 /* stores a ustar header directly in the block buffer */
-static void write_header(const char *sha1, const char *basepath,
+static void write_header(const char *sha1, char typeflag, const char *basepath,
                          struct path_prefix *prefix, const char *path,
                          unsigned int mode, unsigned long size)
 {
@@ -236,11 +248,7 @@
 	sprintf(&header[124], "%011lo", S_ISDIR(mode) ? 0 : size);
 	sprintf(&header[136], "%011lo", archive_time);
 
-	/* typeflag */
-	if (!sha1)
-		header[156] = 'x';	/* extended header */
-	else
-		header[156] = S_ISDIR(mode) ? '5' : '0';
+	header[156] = typeflag;
 
 	memcpy(&header[257], "ustar", 6);
 	memcpy(&header[263], "00", 2);
@@ -279,7 +287,8 @@
 		eltbuf = read_sha1_file(sha1, elttype, &eltsize);
 		if (!eltbuf)
 			die("cannot read %s", sha1_to_hex(sha1));
-		write_header(sha1, basedir, prefix, path, mode, eltsize);
+		write_header(sha1, S_ISDIR(mode) ? '5' : '0', basedir,
+		             prefix, path, mode, eltsize);
 		if (!strcmp(elttype, "tree")) {
 			this_prefix.name = path;
 			traverse_tree(eltbuf, eltsize, &this_prefix);
@@ -320,6 +329,7 @@
 int main(int argc, char **argv)
 {
 	unsigned char sha1[20];
+	unsigned char commit_sha1[20];
 	void *buffer;
 	unsigned long size;
 
@@ -339,8 +349,9 @@
 	if (!sha1_file_directory)
 		sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
 
-	buffer = read_object_with_reference(sha1, "commit", &size, NULL);
+	buffer = read_object_with_reference(sha1, "commit", &size, commit_sha1);
 	if (buffer) {
+		write_global_extended_header(commit_sha1);
 		archive_time = commit_time(buffer, size);
 		free(buffer);
 	}
@@ -351,7 +362,7 @@
 	if (!archive_time)
 		archive_time = time(NULL);
 	if (basedir)
-		write_header("0", NULL, NULL, basedir, 040755, 0);
+		write_header("0", '5', NULL, NULL, basedir, 040755, 0);
 	traverse_tree(buffer, size, NULL);
 	free(buffer);
 	write_trailer();

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-04-30  2:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-26 14:42 [PATCH] GIT: Create tar archives of tree on the fly Rene Scharfe
2005-04-29 22:26 ` Linus Torvalds
2005-04-30  0:13   ` Rene Scharfe
2005-04-30  1:22   ` Rene Scharfe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).