git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"René Scharfe" <rene.scharfe@lsrfire.ath.cx>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 06/10] archive-tar: stream large blobs to tar file
Date: Wed,  2 May 2012 20:25:18 +0700	[thread overview]
Message-ID: <1335965122-17458-7-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1335965122-17458-1-git-send-email-pclouds@gmail.com>

t5000 makes sure it produces correct output while t1050 is about not
going over memory limit (i.e. respect core.bigfilethreshold from the
beginning to the end)

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 archive-tar.c       |   44 +++++++++++++++++++++++++++++++++++++++++---
 t/t1050-large.sh    |    4 ++++
 t/t5000-tar-tree.sh |    7 +++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 9060f9a..759e2bf 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -4,6 +4,7 @@
 #include "cache.h"
 #include "tar.h"
 #include "archive.h"
+#include "streaming.h"
 #include "run-command.h"
 
 #define RECORDSIZE	(512)
@@ -80,6 +81,35 @@ static void write_trailer(void)
 }
 
 /*
+ * queues up writes, so that all our write(2) calls write exactly one
+ * full block; pads writes to RECORDSIZE
+ */
+static int stream_blocked(const unsigned char *sha1)
+{
+	struct git_istream *st;
+	enum object_type type;
+	unsigned long sz;
+	char buf[BLOCKSIZE];
+	ssize_t readlen;
+
+	st = open_istream(sha1, &type, &sz, NULL);
+	if (!st)
+		return error("cannot stream blob %s", sha1_to_hex(sha1));
+	for (;;) {
+		readlen = read_istream(st, buf, sizeof(buf));
+		if (readlen <= 0)
+			break;
+		write_blocked(buf, readlen, 1);
+	}
+	close_istream(st);
+
+	/* pad the remaining (if any) to full 512-byte blocks */
+	if (!readlen)
+		write_blocked(NULL, 0, 0);
+	return readlen;
+}
+
+/*
  * pax extended header records have the format "%u %s=%s\n".  %u contains
  * the size of the whole string (including the %u), the first %s is the
  * keyword, the second one is the value.  This function constructs such a
@@ -205,7 +235,11 @@ static int write_tar_entry(struct archiver_args *args,
 	} else
 		memcpy(header.name, path, pathlen);
 
-	if (S_ISLNK(mode) || S_ISREG(mode)) {
+	if (S_ISREG(mode) && !args->convert &&
+	    sha1_object_info(sha1, &size) == OBJ_BLOB &&
+	    size > big_file_threshold)
+		buffer = NULL;
+	else if (S_ISLNK(mode) || S_ISREG(mode)) {
 		enum object_type type;
 		buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
 		if (!buffer)
@@ -237,8 +271,12 @@ static int write_tar_entry(struct archiver_args *args,
 	}
 	strbuf_release(&ext_header);
 	write_blocked(&header, sizeof(header), 0);
-	if (S_ISREG(mode) && buffer && size > 0)
-		write_blocked(buffer, size, 0);
+	if (S_ISREG(mode) && size > 0) {
+		if (buffer)
+			write_blocked(buffer, size, 0);
+		else
+			err = stream_blocked(sha1);
+	}
 	free(buffer);
 	return err;
 }
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 4d127f1..fe47554 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -134,4 +134,8 @@ test_expect_success 'repack' '
 	git repack -ad
 '
 
+test_expect_success 'tar achiving' '
+	git archive --format=tar HEAD >/dev/null
+'
+
 test_done
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 527c9e7..421c356 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -84,6 +84,13 @@ test_expect_success \
     'git archive vs. git tar-tree' \
     'test_cmp b.tar b2.tar'
 
+test_expect_success 'git archive on large files' '
+    git config core.bigfilethreshold 1 &&
+    git archive HEAD >b3.tar &&
+    git config --unset core.bigfilethreshold &&
+    test_cmp b.tar b3.tar
+'
+
 test_expect_success \
     'git archive in a bare repo' \
     '(cd bare.git && git archive HEAD) >b3.tar'
-- 
1.7.8.36.g69ee2

  parent reply	other threads:[~2012-05-02 13:30 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-02 13:25 [PATCH v2 00/10] Large file support for git-archive Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 01/10] streaming: void pointer instead of char pointer Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 02/10] archive-tar: turn write_tar_entry into blob-writing only Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 03/10] archive-tar: unindent write_tar_entry by one level Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 04/10] archive: delegate blob reading to backend Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 05/10] archive-tar: allow to accumulate writes before writing 512-byte blocks Nguyễn Thái Ngọc Duy
2012-05-02 14:28   ` René Scharfe
2012-05-02 14:43     ` Nguyen Thai Ngoc Duy
2012-05-02 13:25 ` Nguyễn Thái Ngọc Duy [this message]
2012-05-02 14:34   ` [PATCH v2 06/10] archive-tar: stream large blobs to tar file René Scharfe
2012-05-02 13:25 ` [PATCH v2 07/10] archive-zip: remove uncompressed_size Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 08/10] archive-zip: factor out helpers for writing sizes and CRC Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 09/10] archive-zip: streaming for stored files Nguyễn Thái Ngọc Duy
2012-05-02 13:25 ` [PATCH v2 10/10] archive-zip: streaming for deflated files Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1335965122-17458-7-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=rene.scharfe@lsrfire.ath.cx \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).