From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 09/10] archive: support streaming large files to a tar archive
Date: Sun, 4 Mar 2012 19:59:55 +0700 [thread overview]
Message-ID: <1330865996-2069-10-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1330865996-2069-1-git-send-email-pclouds@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
archive-tar.c | 35 ++++++++++++++++++++++++++++-------
archive-zip.c | 9 +++++----
archive.c | 51 ++++++++++++++++++++++++++++++++++-----------------
archive.h | 11 +++++++++--
t/t1050-large.sh | 2 +-
5 files changed, 77 insertions(+), 31 deletions(-)
diff --git a/archive-tar.c b/archive-tar.c
index 20af005..5bffe49 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -5,6 +5,7 @@
#include "tar.h"
#include "archive.h"
#include "run-command.h"
+#include "streaming.h"
#define RECORDSIZE (512)
#define BLOCKSIZE (RECORDSIZE * 20)
@@ -123,9 +124,29 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen)
return i;
}
+static void write_file(struct git_istream *stream, const void *buffer,
+ unsigned long size)
+{
+ if (!stream) {
+ write_blocked(buffer, size);
+ return;
+ }
+ for (;;) {
+ char buf[1024 * 16];
+ ssize_t readlen;
+
+ readlen = read_istream(stream, buf, sizeof(buf));
+
+ if (!readlen)
+ break;
+ write_blocked(buf, readlen);
+ }
+}
+
static int write_tar_entry(struct archiver_args *args,
- const unsigned char *sha1, const char *path, size_t pathlen,
- unsigned int mode, void *buffer, unsigned long size)
+ const unsigned char *sha1, const char *path,
+ size_t pathlen, unsigned int mode, void *buffer,
+ struct git_istream *stream, unsigned long size)
{
struct ustar_header header;
struct strbuf ext_header = STRBUF_INIT;
@@ -200,14 +221,14 @@ static int write_tar_entry(struct archiver_args *args,
if (ext_header.len > 0) {
err = write_tar_entry(args, sha1, NULL, 0, 0, ext_header.buf,
- ext_header.len);
+ NULL, ext_header.len);
if (err)
return err;
}
strbuf_release(&ext_header);
write_blocked(&header, sizeof(header));
- if (S_ISREG(mode) && buffer && size > 0)
- write_blocked(buffer, size);
+ if (S_ISREG(mode) && size > 0)
+ write_file(stream, buffer, size);
return err;
}
@@ -219,7 +240,7 @@ static int write_global_extended_header(struct archiver_args *args)
strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40);
err = write_tar_entry(args, NULL, NULL, 0, 0, ext_header.buf,
- ext_header.len);
+ NULL, ext_header.len);
strbuf_release(&ext_header);
return err;
}
@@ -308,7 +329,7 @@ static int write_tar_archive(const struct archiver *ar,
if (args->commit_sha1)
err = write_global_extended_header(args);
if (!err)
- err = write_archive_entries(args, write_tar_entry);
+ err = write_archive_entries(args, write_tar_entry, 1);
if (!err)
write_trailer();
return err;
diff --git a/archive-zip.c b/archive-zip.c
index 02d1f37..4a1e917 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -120,9 +120,10 @@ static void *zlib_deflate(void *data, unsigned long size,
return buffer;
}
-static int write_zip_entry(struct archiver_args *args,
- const unsigned char *sha1, const char *path, size_t pathlen,
- unsigned int mode, void *buffer, unsigned long size)
+int write_zip_entry(struct archiver_args *args,
+ const unsigned char *sha1, const char *path,
+ size_t pathlen, unsigned int mode, void *buffer,
+ struct git_istream *stream, unsigned long size)
{
struct zip_local_header header;
struct zip_dir_header dirent;
@@ -271,7 +272,7 @@ static int write_zip_archive(const struct archiver *ar,
zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE);
zip_dir_size = ZIP_DIRECTORY_MIN_SIZE;
- err = write_archive_entries(args, write_zip_entry);
+ err = write_archive_entries(args, write_zip_entry, 0);
if (!err)
write_zip_trailer(args->commit_sha1);
diff --git a/archive.c b/archive.c
index 1ee837d..257eadf 100644
--- a/archive.c
+++ b/archive.c
@@ -5,6 +5,7 @@
#include "archive.h"
#include "parse-options.h"
#include "unpack-trees.h"
+#include "streaming.h"
static char const * const archive_usage[] = {
"git archive [options] <tree-ish> [<path>...]",
@@ -59,26 +60,35 @@ static void format_subst(const struct commit *commit,
free(to_free);
}
-static void *sha1_file_to_archive(const char *path, const unsigned char *sha1,
- unsigned int mode, enum object_type *type,
- unsigned long *sizep, const struct commit *commit)
+void sha1_file_to_archive(void **buffer, struct git_istream **stream,
+ const char *path, const unsigned char *sha1,
+ unsigned int mode, enum object_type *type,
+ unsigned long *sizep,
+ const struct commit *commit)
{
- void *buffer;
+ if (stream) {
+ struct stream_filter *filter;
+ filter = get_stream_filter(path, sha1);
+ if (!commit && S_ISREG(mode) && is_null_stream_filter(filter)) {
+ *buffer = NULL;
+ *stream = open_istream(sha1, type, sizep, NULL);
+ return;
+ }
+ *stream = NULL;
+ }
- buffer = read_sha1_file(sha1, type, sizep);
- if (buffer && S_ISREG(mode)) {
+ *buffer = read_sha1_file(sha1, type, sizep);
+ if (*buffer && S_ISREG(mode)) {
struct strbuf buf = STRBUF_INIT;
size_t size = 0;
- strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
+ strbuf_attach(&buf, *buffer, *sizep, *sizep + 1);
convert_to_working_tree(path, buf.buf, buf.len, &buf);
if (commit)
format_subst(commit, buf.buf, buf.len, &buf);
- buffer = strbuf_detach(&buf, &size);
+ *buffer = strbuf_detach(&buf, &size);
*sizep = size;
}
-
- return buffer;
}
static void setup_archive_check(struct git_attr_check *check)
@@ -97,6 +107,7 @@ static void setup_archive_check(struct git_attr_check *check)
struct archiver_context {
struct archiver_args *args;
write_archive_entry_fn_t write_entry;
+ int stream_ok;
};
static int write_archive_entry(const unsigned char *sha1, const char *base,
@@ -109,6 +120,7 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
write_archive_entry_fn_t write_entry = c->write_entry;
struct git_attr_check check[2];
const char *path_without_prefix;
+ struct git_istream *stream = NULL;
int convert = 0;
int err;
enum object_type type;
@@ -133,25 +145,29 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
strbuf_addch(&path, '/');
if (args->verbose)
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
- err = write_entry(args, sha1, path.buf, path.len, mode, NULL, 0);
+ err = write_entry(args, sha1, path.buf, path.len, mode, NULL, NULL, 0);
if (err)
return err;
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
}
- buffer = sha1_file_to_archive(path_without_prefix, sha1, mode,
- &type, &size, convert ? args->commit : NULL);
- if (!buffer)
+ sha1_file_to_archive(&buffer, c->stream_ok ? &stream : NULL,
+ path_without_prefix, sha1, mode,
+ &type, &size, convert ? args->commit : NULL);
+ if (!buffer && !stream)
return error("cannot read %s", sha1_to_hex(sha1));
if (args->verbose)
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
- err = write_entry(args, sha1, path.buf, path.len, mode, buffer, size);
+ err = write_entry(args, sha1, path.buf, path.len, mode, buffer, stream, size);
+ if (stream)
+ close_istream(stream);
free(buffer);
return err;
}
int write_archive_entries(struct archiver_args *args,
- write_archive_entry_fn_t write_entry)
+ write_archive_entry_fn_t write_entry,
+ int stream_ok)
{
struct archiver_context context;
struct unpack_trees_options opts;
@@ -167,13 +183,14 @@ int write_archive_entries(struct archiver_args *args,
if (args->verbose)
fprintf(stderr, "%.*s\n", (int)len, args->base);
err = write_entry(args, args->tree->object.sha1, args->base,
- len, 040777, NULL, 0);
+ len, 040777, NULL, NULL, 0);
if (err)
return err;
}
context.args = args;
context.write_entry = write_entry;
+ context.stream_ok = stream_ok;
/*
* Setup index and instruct attr to read index only
diff --git a/archive.h b/archive.h
index 2b0884f..370cca9 100644
--- a/archive.h
+++ b/archive.h
@@ -27,9 +27,16 @@ extern void register_archiver(struct archiver *);
extern void init_tar_archiver(void);
extern void init_zip_archiver(void);
-typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, void *buffer, unsigned long size);
+struct git_istream;
+typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
+ const unsigned char *sha1,
+ const char *path, size_t pathlen,
+ unsigned int mode,
+ void *buffer,
+ struct git_istream *stream,
+ unsigned long size);
-extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
+extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry, int stream_ok);
extern int write_archive(int argc, const char **argv, const char *prefix, int setup_prefix, const char *name_hint, int remote);
const char *archive_format_from_filename(const char *filename);
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index c749ecb..1e64692 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -149,7 +149,7 @@ test_expect_success 'repack' '
git repack -ad
'
-test_expect_failure 'tar achiving' '
+test_expect_success 'tar achiving' '
git archive --format=tar HEAD >/dev/null
'
--
1.7.8.36.g69ee2
next prev parent reply other threads:[~2012-03-04 13:03 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-27 7:55 [PATCH 00/11] Large blob fixes Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 01/11] Add more large blob test cases Nguyễn Thái Ngọc Duy
2012-02-27 20:18 ` Peter Baumann
2012-02-27 7:55 ` [PATCH 02/11] Factor out and export large blob writing code to arbitrary file handle Nguyễn Thái Ngọc Duy
2012-02-27 17:29 ` Junio C Hamano
2012-02-27 21:50 ` Junio C Hamano
2012-02-27 7:55 ` [PATCH 03/11] cat-file: use streaming interface to print blobs Nguyễn Thái Ngọc Duy
2012-02-27 17:44 ` Junio C Hamano
2012-02-28 1:08 ` Nguyen Thai Ngoc Duy
2012-02-27 7:55 ` [PATCH 04/11] parse_object: special code path for blobs to avoid putting whole object in memory Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 05/11] show: use streaming interface for showing blobs Nguyễn Thái Ngọc Duy
2012-02-27 18:00 ` Junio C Hamano
2012-02-27 7:55 ` [PATCH 06/11] index-pack --verify: skip sha-1 collision test Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 07/11] index-pack: split second pass obj handling into own function Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 08/11] index-pack: reduce memory usage when the pack has large blobs Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 09/11] pack-check: do not unpack blobs Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 10/11] archive: support streaming large files to a tar archive Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 11/11] fsck: use streaming interface for writing lost-found blobs Nguyễn Thái Ngọc Duy
2012-02-27 18:43 ` [PATCH 00/11] Large blob fixes Junio C Hamano
2012-02-28 1:23 ` Nguyen Thai Ngoc Duy
2012-03-04 12:59 ` [PATCH v2 00/10] " Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 01/10] Add more large blob test cases Nguyễn Thái Ngọc Duy
2012-03-06 0:59 ` Junio C Hamano
2012-03-04 12:59 ` [PATCH v2 02/10] streaming: make streaming-write-entry to be more reusable Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 03/10] cat-file: use streaming interface to print blobs Nguyễn Thái Ngọc Duy
2012-03-04 23:12 ` Junio C Hamano
2012-03-05 2:42 ` Nguyen Thai Ngoc Duy
2012-03-04 12:59 ` [PATCH v2 04/10] parse_object: special code path for blobs to avoid putting whole object in memory Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 05/10] show: use streaming interface for showing blobs Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 06/10] index-pack: split second pass obj handling into own function Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 07/10] index-pack: reduce memory usage when the pack has large blobs Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 08/10] pack-check: do not unpack blobs Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` Nguyễn Thái Ngọc Duy [this message]
2012-03-04 12:59 ` [PATCH v2 10/10] fsck: use streaming interface for writing lost-found blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 00/11] Large blob fixes Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 01/11] Add more large blob test cases Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 02/11] streaming: make streaming-write-entry to be more reusable Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 03/11] cat-file: use streaming interface to print blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 04/11] parse_object: special code path for blobs to avoid putting whole object in memory Nguyễn Thái Ngọc Duy
2012-03-06 0:57 ` Junio C Hamano
2012-03-05 3:43 ` [PATCH v3 05/11] show: use streaming interface for showing blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 06/11] index-pack: split second pass obj handling into own function Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 07/11] index-pack: reduce memory usage when the pack has large blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 08/11] pack-check: do not unpack blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 09/11] archive: support streaming large files to a tar archive Nguyễn Thái Ngọc Duy
2012-03-06 0:57 ` Junio C Hamano
2012-03-05 3:43 ` [PATCH v3 10/11] fsck: use streaming interface for writing lost-found blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 11/11] update-server-info: respect core.bigfilethreshold Nguyễn Thái Ngọc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1330865996-2069-10-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).