All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH WIP 3/4] write_entry: use streaming interface for checkout large files
Date: Thu, 28 May 2009 15:29:09 +1000	[thread overview]
Message-ID: <1243488550-15357-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1243488550-15357-3-git-send-email-pclouds@gmail.com>

With this patch, Git's memory consumption should go pretty flat no
matter how large input files are. So:

 - less memory will be used
 - more less memory for systems that do not have proper mmap() support
 - unmappable files can now be checked in

TODO: buffer size, file size limit that triggers this routine

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 entry.c |   68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 68 insertions(+), 0 deletions(-)

diff --git a/entry.c b/entry.c
index cc841ed..2a49d7b 100644
--- a/entry.c
+++ b/entry.c
@@ -91,6 +91,65 @@ static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
 	return NULL;
 }
 
+/*
+ * Trying to write entry using blob streaming interface.
+ * Return 1 if normal interface should be used.
+ */
+static int write_large_entry(struct cache_entry *ce, char *path,
+			     const struct checkout *state, int to_tempfile)
+{
+	unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
+	struct loose_object_handle *oh = open_loose_object(ce->sha1);
+	char buf[8192];
+	long len;
+	int fd;
+	size_t wrote;
+
+	if (!oh)
+		return 1;
+
+	if (loose_object_type(oh) != OBJ_BLOB) {
+		close_loose_object(oh);
+		return error("git checkout-index: unable to read sha1 file of %s (%s)",
+			     path, sha1_to_hex(ce->sha1));
+	}
+
+	if (convert_to_working_tree_needed(ce->name,  xsize_t(loose_object_size(oh)))) {
+		close_loose_object(oh);
+		return 1;
+	}
+
+	if (to_tempfile) {
+		if (ce_mode_s_ifmt == S_IFREG)
+			strcpy(path, ".merge_file_XXXXXX");
+		else
+			strcpy(path, ".merge_link_XXXXXX");
+		fd = mkstemp(path);
+	} else if (ce_mode_s_ifmt == S_IFREG) {
+		fd = create_file(path, ce->ce_mode);
+	} else {
+		fd = create_file(path, 0666);
+	}
+	if (fd < 0) {
+		close_loose_object(oh);
+		return error("git checkout-index: unable to create file %s (%s)",
+			     path, strerror(errno));
+	}
+
+	while ((len = read_loose_object(oh, buf, sizeof(buf))) > 0) {
+		wrote = write_in_full(fd, buf, len);
+		if (wrote != len) {
+			close(fd);
+			close_loose_object(oh);
+			return error("git checkout-index: unable to write file %s", path);
+		}
+	}
+
+	close(fd);
+	close_loose_object(oh);
+	return 0;
+}
+
 static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
 {
 	unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
@@ -104,6 +163,15 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 	switch (ce_mode_s_ifmt) {
 	case S_IFREG:
 	case S_IFLNK:
+		if (ce_mode_s_ifmt == S_IFREG) {
+			ret = write_large_entry(ce, path, state, to_tempfile);
+			if (ret < 0) /* failed */
+				return ret;
+			if (ret == 0) /* successful */
+				break;
+			/* else, go through */
+		}
+
 		new = read_blob_entry(ce, &size);
 		if (!new)
 			return error("git checkout-index: unable to read sha1 file of %s (%s)",
-- 
1.6.3.1.257.gbd13

  reply	other threads:[~2009-05-28  5:29 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-28  5:29 [PATCH WIP 0/4] Special code path for large blobs Nguyễn Thái Ngọc Duy
2009-05-28  5:29 ` [PATCH WIP 1/4] convert.c: refactor in order to skip conversion early without looking into file content Nguyễn Thái Ngọc Duy
2009-05-28  5:29   ` [PATCH WIP 2/4] sha1_file.c: add streaming interface for reading blobs Nguyễn Thái Ngọc Duy
2009-05-28  5:29     ` Nguyễn Thái Ngọc Duy [this message]
2009-05-28  5:29       ` [PATCH WIP 4/4] index_fd: support indexing large files Nguyễn Thái Ngọc Duy
2009-05-28 18:03 ` [PATCH WIP 0/4] Special code path for large blobs Nicolas Pitre
2009-06-02  4:46   ` Nguyen Thai Ngoc Duy
2009-06-02 14:45     ` Shawn O. Pearce
2009-06-02 17:22       ` Nicolas Pitre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243488550-15357-4-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.