All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"René Scharfe" <rene.scharfe@lsrfire.ath.cx>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v3 5/9] archive-tar: stream large blobs to tar file
Date: Thu,  3 May 2012 08:51:04 +0700	[thread overview]
Message-ID: <1336009868-7411-6-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1336009868-7411-1-git-send-email-pclouds@gmail.com>

t5000 verifies output while t1050 makes sure the command always
respects core.bigfilethreshold

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 archive-tar.c       |   56 ++++++++++++++++++++++++++++++++++++++++++++++----
 t/t1050-large.sh    |    4 +++
 t/t5000-tar-tree.sh |    6 +++++
 3 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 3be0cdf..93387ea 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -4,6 +4,7 @@
 #include "cache.h"
 #include "tar.h"
 #include "archive.h"
+#include "streaming.h"
 #include "run-command.h"
 
 #define RECORDSIZE	(512)
@@ -30,10 +31,9 @@ static void write_if_needed(void)
  * queues up writes, so that all our write(2) calls write exactly one
  * full block; pads writes to RECORDSIZE
  */
-static void write_blocked(const void *data, unsigned long size)
+static void do_write_blocked(const void *data, unsigned long size)
 {
 	const char *buf = data;
-	unsigned long tail;
 
 	if (offset) {
 		unsigned long chunk = BLOCKSIZE - offset;
@@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size)
 		memcpy(block + offset, buf, size);
 		offset += size;
 	}
+}
+
+static void finish_record(void)
+{
+	unsigned long tail;
 	tail = offset % RECORDSIZE;
 	if (tail)  {
 		memset(block + offset, 0, RECORDSIZE - tail);
@@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size)
 	write_if_needed();
 }
 
+static void write_blocked(const void *data, unsigned long size)
+{
+	do_write_blocked(data, size);
+	finish_record();
+}
+
 /*
  * The end of tar archives is marked by 2*512 nul bytes and after that
  * follows the rest of the block (if any).
@@ -78,6 +89,33 @@ static void write_trailer(void)
 }
 
 /*
+ * queues up writes, so that all our write(2) calls write exactly one
+ * full block; pads writes to RECORDSIZE
+ */
+static int stream_blocked(const unsigned char *sha1)
+{
+	struct git_istream *st;
+	enum object_type type;
+	unsigned long sz;
+	char buf[BLOCKSIZE];
+	ssize_t readlen;
+
+	st = open_istream(sha1, &type, &sz, NULL);
+	if (!st)
+		return error("cannot stream blob %s", sha1_to_hex(sha1));
+	for (;;) {
+		readlen = read_istream(st, buf, sizeof(buf));
+		if (readlen <= 0)
+			break;
+		do_write_blocked(buf, readlen);
+	}
+	close_istream(st);
+	if (!readlen)
+		finish_record();
+	return readlen;
+}
+
+/*
  * pax extended header records have the format "%u %s=%s\n".  %u contains
  * the size of the whole string (including the %u), the first %s is the
  * keyword, the second one is the value.  This function constructs such a
@@ -203,7 +241,11 @@ static int write_tar_entry(struct archiver_args *args,
 	} else
 		memcpy(header.name, path, pathlen);
 
-	if (S_ISLNK(mode) || S_ISREG(mode)) {
+	if (S_ISREG(mode) && !args->convert &&
+	    sha1_object_info(sha1, &size) == OBJ_BLOB &&
+	    size > big_file_threshold)
+		buffer = NULL;
+	else if (S_ISLNK(mode) || S_ISREG(mode)) {
 		enum object_type type;
 		buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
 		if (!buffer)
@@ -235,8 +277,12 @@ static int write_tar_entry(struct archiver_args *args,
 	}
 	strbuf_release(&ext_header);
 	write_blocked(&header, sizeof(header));
-	if (S_ISREG(mode) && buffer && size > 0)
-		write_blocked(buffer, size);
+	if (S_ISREG(mode) && size > 0) {
+		if (buffer)
+			write_blocked(buffer, size);
+		else
+			err = stream_blocked(sha1);
+	}
 	free(buffer);
 	return err;
 }
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 4d127f1..fe47554 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -134,4 +134,8 @@ test_expect_success 'repack' '
 	git repack -ad
 '
 
+test_expect_success 'tar achiving' '
+	git archive --format=tar HEAD >/dev/null
+'
+
 test_done
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 527c9e7..d9b997f 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -84,6 +84,12 @@ test_expect_success \
     'git archive vs. git tar-tree' \
     'test_cmp b.tar b2.tar'
 
+test_expect_success 'git archive on large files' '
+    test_config core.bigfilethreshold 1 &&
+    git archive HEAD >b3.tar &&
+    test_cmp b.tar b3.tar
+'
+
 test_expect_success \
     'git archive in a bare repo' \
     '(cd bare.git && git archive HEAD) >b3.tar'
-- 
1.7.3.1.256.g2539c.dirty

  parent reply	other threads:[~2012-05-03  1:53 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-03  1:50 [PATCH v3 0/9] Large file support for git-archive Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 1/9] streaming: void pointer instead of char pointer Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 2/9] archive-tar: turn write_tar_entry into blob-writing only Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 3/9] archive-tar: unindent write_tar_entry by one level Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 4/9] archive: delegate blob reading to backend Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` Nguyễn Thái Ngọc Duy [this message]
2012-05-03  1:51 ` [PATCH v3 6/9] archive-zip: remove uncompressed_size Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 7/9] archive-zip: factor out helpers for writing sizes and CRC Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 8/9] archive-zip: streaming for stored files Nguyễn Thái Ngọc Duy
2012-05-03  1:51 ` [PATCH v3 9/9] archive-zip: streaming for deflated files Nguyễn Thái Ngọc Duy
2012-05-03  8:52 ` [PATCH 10/9] t5000: rationalize unzip tests René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1336009868-7411-6-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=rene.scharfe@lsrfire.ath.cx \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.