git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org
Subject: [PATCH] GIT: Create tar archives of tree on the fly
Date: Tue, 26 Apr 2005 16:42:23 +0200	[thread overview]
Message-ID: <20050426144222.GA12035@lsrfire.ath.cx> (raw)

This patch introduces tar-tree, a tool to generate tar archives out of
git repositories.  Basically I took ls-tree and cat-file and melted them
together.  That means tar-tree doesn't create any temporary files, it
just streams out the archive as it goes.

This could be useful for the web interface(s) to provide a downloadable
tarball for any commit or tree object.  For bigger repositories like the
Linux kernel caching the resulting files might be a good idea,
though. :-P

On my machine it's also a bit faster than directly tarring up the
checked out files.  I only ran a few basic checks to make sure the
performance is in the same ballpark, YMMV.

Example usage:

  $ tar-tree a2755a80f40e5794ddc20e00f781af9d6320fafb linux-2.6.12-rc3 |
        bzip2 -9 > linux-2.6.12-rc3.tar.bz2

tar-tree accepts tree IDs and commit IDs.  In the former case all files
within the archive get the current time set as mtime.  Given a commit ID
tar-tree tries to figure out the commit date and sets mtime of all files
to that instead.

Currently the size of a file within the created archive is limited to
2^33-1.  This could be fixed easily within the archive format (with a
Pax extended header), but size is unsigned long throughout GIT, so this
would need to be fixed first.  OTOH I think putting 4GB+ files into a
GIT archive is insane anyway. :]

Path names are limited to 500 characters at the moment.  This can be
stretched if the need should arise.

Patch is against d1df5743809614241883ecad51876607cf432034.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>

diff -Nur a/Makefile b/Makefile
--- a/Makefile	2005-04-26 03:26:45.000000000 +0200
+++ b/Makefile	2005-04-26 08:09:03.000000000 +0200
@@ -18,7 +18,7 @@
 	cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
 	check-files ls-tree merge-base merge-cache unpack-file git-export \
 	diff-cache convert-cache http-pull rpush rpull rev-list git-mktag \
-	diff-tree-helper
+	diff-tree-helper tar-tree
 
 all: $(PROG)
 
diff -Nur a/tar-tree.c b/tar-tree.c
--- a/tar-tree.c	1970-01-01 01:00:00.000000000 +0100
+++ b/tar-tree.c	2005-04-26 08:23:05.000000000 +0200
@@ -0,0 +1,328 @@
+#include <time.h>
+#include "cache.h"
+
+static const char *tar_tree_usage = "tar-tree <key> [basedir]";
+static const char *basedir;
+static time_t archive_time;
+
+struct path_prefix {
+	struct path_prefix *prev;
+	const char *name;
+};
+
+static unsigned long write_out(void *buf, unsigned long size)
+{
+	while (size > 0) {
+		long ret = write(1, buf, size);
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				continue;
+			/* Ignore epipe */
+			if (errno == EPIPE)
+				break;
+			die("tar-tree: %s", strerror(errno));
+		} else if (!ret) {
+			die("tar-tree: disk full?");
+		}
+		size -= ret;
+		buf += ret;
+	}
+	return size;
+}
+
+static unsigned long write_block(void *buf, unsigned long size)
+{
+	unsigned long ret = write_out(buf, size);
+	if (!ret) {
+		unsigned long slack = 512 - size % 512;
+		if (slack % 512) {
+			char padding[511];
+			memset(padding, 0, slack);
+			ret = write_out(padding, slack);
+		}
+	}
+	return ret;
+}
+
+static void append_string(char **p, const char *s)
+{
+	unsigned int len = strlen(s);
+	memcpy(*p, s, len);
+	*p += len;
+}
+
+static void append_char(char **p, char c)
+{
+	**p = c;
+	*p += 1;
+}
+
+static void append_long(char **p, long n)
+{
+	int len = sprintf(*p, "%ld", n);
+	*p += len;
+}
+
+static void append_path_prefix(char **buffer, struct path_prefix *prefix)
+{
+	if (!prefix)
+		return;
+	append_path_prefix(buffer, prefix->prev);
+	append_string(buffer, prefix->name);
+	append_char(buffer, '/');
+}
+
+static unsigned int path_prefix_len(struct path_prefix *prefix)
+{
+	if (!prefix)
+		return 0;
+	return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1;
+}
+
+static void append_path(char **p, int is_dir, const char *basepath,
+			struct path_prefix *prefix, const char *path)
+{
+	if (basepath) {
+		append_string(p, basepath);
+		append_char(p, '/');
+	}
+	append_path_prefix(p, prefix);
+	append_string(p, path);
+	if (is_dir)
+		append_char(p, '/');
+}
+
+static unsigned int path_len(int is_dir, const char *basepath,
+			     struct path_prefix *prefix, const char *path)
+{
+	unsigned int len = 0;
+	if (basepath)
+		len += strlen(basepath) + 1;
+	len += path_prefix_len(prefix) + strlen(path);
+	if (is_dir)
+		len++;
+	return len;
+}
+
+static void append_extended_header_prefix(char **p, const char *keyword,
+					  int valuelen)
+{
+	int reclen = 1 + 1 + strlen(keyword) + 1 + valuelen + 1;
+	if (reclen > 9)
+		reclen++;
+	if (reclen > 99)
+		reclen++;
+	if (reclen > 512)
+		die("tar-tree: extended header too big, wtf?");
+	append_long(p, reclen);
+	append_char(p, ' ');
+	append_string(p, keyword);
+	append_char(p, '=');
+}
+
+static long write_header(const char *, const char *, struct path_prefix *,
+			 const char *, unsigned int, unsigned long);
+
+static long write_extended_header(const char *headerfilename, int is_dir,
+				  const char *basepath,
+				  struct path_prefix *prefix,
+				  const char *path, unsigned int namelen)
+{
+	char records[512], *p;
+	unsigned long ret;
+
+	memset(records, 0, sizeof(records));
+	p = records;
+	append_extended_header_prefix(&p, "path", namelen);
+	append_path(&p, is_dir, basepath, prefix, path);
+	append_char(&p, '\n');
+	ret = write_header(NULL, NULL, NULL, headerfilename, 0100600,
+	                   p - records);
+	if (!ret)
+		ret = write_out(records, sizeof(records));
+	return ret;
+}
+
+static long write_header(const char *sha1, const char *basepath,
+			 struct path_prefix *prefix, const char *path,
+			 unsigned int mode, unsigned long size)
+{
+	unsigned int namelen; 
+	char *p, header[512];
+	unsigned int checksum = 0;
+	int i;
+
+	memset(header, 0, sizeof(header));
+
+	namelen = path_len(S_ISDIR(mode), basepath, prefix, path);
+	if (namelen > 500) {
+		fprintf(stderr, "tar-tree: name too log of object %s\n",
+		        sha1_to_hex(sha1));
+		return size;
+	} else if (namelen > 100) {
+		unsigned long ret;
+		char *sha1_hex = sha1_to_hex(sha1);
+		char headerfilename[51];
+
+		sprintf(header, "%s.data", sha1_hex);
+		sprintf(headerfilename, "%s.paxheader", sha1_hex);
+		ret = write_extended_header(headerfilename, S_ISDIR(mode),
+		                            basepath, prefix, path, namelen);
+		if (ret)
+			return ret;
+	} else {
+		p = header;
+		append_path(&p, S_ISDIR(mode), basepath, prefix, path);
+	}
+
+	if (S_ISDIR(mode))
+		mode |= 0755;	/* GIT doesn't store permissions of dirs */
+	sprintf(&header[100], "%07o", mode & 07777);
+
+	/* XXX: should we provide more meaningful info here? */
+	sprintf(&header[108], "%07o", 0);	/* uid */
+	sprintf(&header[116], "%07o", 0);	/* gid */
+	strncpy(&header[265], "git", 31);	/* uname */
+	strncpy(&header[297], "git", 31);	/* gname */
+
+	sprintf(&header[124], "%011lo", S_ISDIR(mode) ? 0 : size);
+	sprintf(&header[136], "%011lo", archive_time);
+
+	/* typeflag */
+	if (!sha1)
+		header[156] = 'x';	/* extended header */
+	else
+		header[156] = S_ISDIR(mode) ? '5' : '0';
+
+	strcpy(&header[257], "ustar");
+	strcpy(&header[263], "00");
+
+	printf(&header[329], "%07o", 0);	/* devmajor */
+	printf(&header[337], "%07o", 0);	/* devminor */
+
+	memset(&header[148], ' ', 8);
+	for (i = 0; i < sizeof(header); i++)
+		checksum += header[i];
+	sprintf(&header[148], "%07o", checksum & 0x1fffff);
+
+	return write_out(header, sizeof(header));
+}
+
+static unsigned long write_trailer(void)
+{
+	char block[1024];
+	memset(block, 0, sizeof(block));
+	return write_out(block, sizeof(block));
+}
+
+static void traverse_tree(void *buffer, unsigned long size,
+			  struct path_prefix *prefix)
+{
+	struct path_prefix this_prefix;
+	this_prefix.prev = prefix;
+
+	while (size) {
+		int namelen = strlen(buffer)+1;
+		void *eltbuf;
+		char elttype[20];
+		unsigned long eltsize;
+		unsigned char *sha1 = buffer + namelen;
+		char *path = strchr(buffer, ' ') + 1;
+		unsigned int mode;
+
+		if (size < namelen + 20 || sscanf(buffer, "%o", &mode) != 1)
+			die("corrupt 'tree' file");
+		buffer = sha1 + 20;
+		size -= namelen + 20;
+
+		eltbuf = read_sha1_file(sha1, elttype, &eltsize);
+		if (!eltbuf) {
+			error("cannot read %s", sha1_to_hex(sha1));
+			continue;
+		}
+		if (write_header(sha1, basedir, prefix, path, mode, eltsize))
+			exit(0);
+		if (!strcmp(elttype, "tree")) {
+			this_prefix.name = path;
+			traverse_tree(eltbuf, eltsize, &this_prefix);
+		} else if (!strcmp(elttype, "blob")) {
+			if (write_block(eltbuf, eltsize))
+				exit(0);
+		}
+		free(eltbuf);
+	}
+}
+
+time_t commit_time(const unsigned char *sha1)
+{
+	char type[20];
+	void *buffer;
+	unsigned long size;
+	time_t result = 0;
+
+	buffer = read_sha1_file(sha1, type, &size);
+	if (buffer) {
+		char *p = buffer;
+		while (size > 0) {
+			char *endp = memchr(p, '\n', size);
+			if (!endp)
+				break;
+			*endp = '\0';
+			if (endp - p > 10 && !memcmp(p, "committer ", 10)) {
+				char *nump = strrchr(p, ' ');
+				if (!nump)
+					break;
+				*nump = '\0';
+				nump = strrchr(p, ' ');
+				if (!nump)
+					break;
+				result = strtoul(nump, &endp, 10);
+				if (*endp != '\0')
+					result = 0;
+				break;
+			}
+			size -= endp - p - 1;
+			p = endp + 1;
+		}
+	}
+	free(buffer);
+	return result;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned char sha1[20];
+	void *buffer;
+	unsigned long size;
+	unsigned char tree_sha1[20];
+
+	switch (argc) {
+	case 3:
+		basedir = argv[2];
+		/* FALLTHROUGH */
+	case 2:
+		if (get_sha1_hex(argv[1], sha1) < 0)
+			usage(tar_tree_usage);
+		break;
+	default:
+		usage(tar_tree_usage);
+	}
+
+	sha1_file_directory = getenv(DB_ENVIRONMENT);
+	if (!sha1_file_directory)
+		sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
+
+	buffer = read_tree_with_tree_or_commit_sha1(sha1, &size, tree_sha1);
+	if (!buffer)
+		die("unable to read sha1 file");
+	if (memcmp(sha1, tree_sha1, 20))	/* is sha1 a commit object? */
+		archive_time = commit_time(sha1);
+	if (!archive_time)
+		archive_time = time(NULL);
+	if (basedir)
+		write_header("0", NULL, NULL, basedir, 040755, 0);
+	traverse_tree(buffer, size, NULL);
+	free(buffer);
+	write_trailer();
+	return 0;
+}

             reply	other threads:[~2005-04-26 14:38 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-26 14:42 Rene Scharfe [this message]
2005-04-29 22:26 ` [PATCH] GIT: Create tar archives of tree on the fly Linus Torvalds
2005-04-30  0:13   ` Rene Scharfe
2005-04-30  1:22   ` Rene Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050426144222.GA12035@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=git@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).