git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: git@vger.kernel.org
Subject: [PATCH 07/11] streaming_write_entry(): use streaming API in write_entry()
Date: Sun, 15 May 2011 17:30:27 -0700	[thread overview]
Message-ID: <1305505831-31587-8-git-send-email-gitster@pobox.com> (raw)
In-Reply-To: <1305505831-31587-1-git-send-email-gitster@pobox.com>

When the output to a path does not have to be converted, we can read from
the object database from the streaming API and write to the file in the
working tree, without having to hold everything in the memory.

The ident, auto- and safe- crlf conversions inherently require you to read
the whole thing before deciding what to do, so while it is technically
possible to support them by using a buffer of an unbound size or rewinding
and reading the stream twice, it is less practical than the traditional
"read the whole thing in core and convert" approach.

Adding streaming filters for the other conversions on top of this should
be doable by tweaking the can_bypass_conversion() function (it should be
renamed to can_filter_stream() when it happens). Then the streaming API
can be extended to wrap the git_istream streaming_write_entry() opens on
the underlying object in another git_istream that reads from it, filters
what is read, and let the streaming_write_entry() read the filtered
result. But that is outside the scope of this series.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h   |    1 +
 convert.c |   23 +++++++++++++++++++++++
 entry.c   |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 0 deletions(-)

diff --git a/cache.h b/cache.h
index 39c09b2..39e53c8 100644
--- a/cache.h
+++ b/cache.h
@@ -1157,6 +1157,7 @@ extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int can_bypass_conversion(const char *path);
 
 /* add */
 /*
diff --git a/convert.c b/convert.c
index efc7e07..d3c0041 100644
--- a/convert.c
+++ b/convert.c
@@ -813,3 +813,26 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 	}
 	return ret | convert_to_git(path, src, len, dst, 0);
 }
+
+/*
+ * You would be crazy to set CRLF, smuge/clean or ident to
+ * a large binary blob you would want us not to slurp into
+ * the memory!
+ */
+int can_bypass_conversion(const char *path)
+{
+	struct conv_attrs ca;
+	enum crlf_action crlf_action;
+
+	convert_attrs(&ca, path);
+
+	if (ca.ident ||
+	    (ca.drv && (ca.drv->smudge || ca.drv->clean)))
+		return 0;
+
+	crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+	if ((crlf_action == CRLF_BINARY) ||
+	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
+		return 1;
+	return 0;
+}
diff --git a/entry.c b/entry.c
index cc6502a..7733a6b 100644
--- a/entry.c
+++ b/entry.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "blob.h"
 #include "dir.h"
+#include "streaming.h"
 
 static void create_directories(const char *path, int path_len,
 			       const struct checkout *state)
@@ -114,6 +115,50 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st)
 	return 0;
 }
 
+static int streaming_write_entry(struct cache_entry *ce, char *path,
+				 const struct checkout *state, int to_tempfile,
+				 int *fstat_done, struct stat *statbuf)
+{
+	struct git_istream *st;
+	enum object_type type;
+	unsigned long sz;
+	int result = -1;
+	int fd = -1;
+
+	st = open_istream(ce->sha1, &type, &sz);
+	if (!st)
+		return -1;
+	if (type != OBJ_BLOB)
+		goto close_and_exit;
+
+	fd = open_output_fd(path, ce, to_tempfile);
+	if (fd < 0)
+		goto close_and_exit;
+
+	for (;;) {
+		char buf[10240];
+		ssize_t wrote;
+		ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+		if (!readlen)
+			break;
+
+		wrote = write_in_full(fd, buf, readlen);
+
+		if (wrote != readlen)
+			goto close_and_exit;
+	}
+	*fstat_done = fstat_output(fd, state, statbuf);
+
+close_and_exit:
+	close_istream(st);
+	if (0 <= fd)
+		result |= close(fd);
+	if (result && 0 <= fd)
+		unlink(path);
+	return result;
+}
+
 static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
 {
 	unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
@@ -124,6 +169,12 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 	size_t wrote, newsize = 0;
 	struct stat st;
 
+	if ((ce_mode_s_ifmt == S_IFREG) &&
+	    can_bypass_conversion(path) &&
+	    !streaming_write_entry(ce, path, state, to_tempfile,
+				   &fstat_done, &st))
+		goto finish;
+
 	switch (ce_mode_s_ifmt) {
 	case S_IFREG:
 	case S_IFLNK:
@@ -176,6 +227,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 		return error("unknown file mode for %s in index", path);
 	}
 
+finish:
 	if (state->refresh_cache) {
 		if (!fstat_done)
 			lstat(ce->name, &st);
-- 
1.7.5.1.365.g32b65

  parent reply	other threads:[~2011-05-16  0:31 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-16  0:30 [PATCH 00/11] writing out a huge blob to working tree Junio C Hamano
2011-05-16  0:30 ` [PATCH 01/11] packed_object_info_detail(): do not return a string Junio C Hamano
2011-05-17  0:45   ` Thiago Farina
2011-05-17  2:36     ` Junio C Hamano
2011-05-16  0:30 ` [PATCH 02/11] sha1_object_info_extended(): expose a bit more info Junio C Hamano
2011-05-16  0:30 ` [PATCH 03/11] sha1_object_info_extended(): hint about objects in delta-base cache Junio C Hamano
2011-05-16  0:40   ` Shawn Pearce
2011-05-16  0:30 ` [PATCH 04/11] unpack_object_header(): make it public Junio C Hamano
2011-05-16  0:30 ` [PATCH 05/11] write_entry(): separate two helper functions out Junio C Hamano
2011-05-16  0:30 ` [PATCH 06/11] streaming: a new API to read from the object store Junio C Hamano
2011-05-18  8:09   ` Jeff King
2011-05-19  1:52     ` Junio C Hamano
2011-05-16  0:30 ` Junio C Hamano [this message]
2011-05-16  0:30 ` [PATCH 08/11] streaming_write_entry(): support files with holes Junio C Hamano
2011-05-16 10:53   ` Nguyen Thai Ngoc Duy
2011-05-16 14:39     ` Junio C Hamano
2011-05-17  1:18       ` Nguyen Thai Ngoc Duy
2011-05-17  5:23         ` Junio C Hamano
2011-05-16 13:03   ` Thiago Farina
2011-05-16  0:30 ` [PATCH 09/11] streaming: read non-delta incrementally from a pack Junio C Hamano
2011-05-16  0:58   ` Shawn Pearce
2011-05-16  5:00     ` Junio C Hamano
2011-05-16  0:30 ` [PATCH 10/11] sha1_file.c: expose helpers to read loose objects Junio C Hamano
2011-05-16  0:30 ` [PATCH 11/11] streaming: read loose objects incrementally Junio C Hamano
2011-05-16  0:47 ` [PATCH 00/11] writing out a huge blob to working tree Shawn Pearce
2011-05-18  6:41 ` Jeff King
2011-05-18  7:08   ` Jeff King
2011-05-18  7:50     ` Jeff King
2011-05-18 15:12       ` Junio C Hamano
2011-05-18  8:17 ` Jeff King
2011-05-19 21:33 ` [PATCH v2 " Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 01/11] packed_object_info_detail(): do not return a string Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 02/11] sha1_object_info_extended(): expose a bit more info Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 03/11] sha1_object_info_extended(): hint about objects in delta-base cache Junio C Hamano
2011-05-20 23:05     ` René Scharfe
2011-05-21  1:49       ` Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 04/11] unpack_object_header(): make it public Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 05/11] write_entry(): separate two helper functions out Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 06/11] streaming: a new API to read from the object store Junio C Hamano
2011-05-20 23:05     ` René Scharfe
2011-05-21  1:49       ` Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 07/11] streaming_write_entry(): use streaming API in write_entry() Junio C Hamano
2011-05-20 22:52     ` Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 08/11] streaming_write_entry(): support files with holes Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 09/11] streaming: read non-delta incrementally from a pack Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 10/11] sha1_file.c: expose helpers to read loose objects Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 11/11] streaming: read loose objects incrementally Junio C Hamano
2011-05-19 21:44   ` [Not A PATCH v2 02/11] interdiff Junio C Hamano
2011-05-19 22:21   ` [PATCH v2 00/11] writing out a huge blob to working tree Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1305505831-31587-8-git-send-email-gitster@pobox.com \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).