From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH WIP 2/4] sha1_file.c: add streaming interface for reading blobs
Date: Thu, 28 May 2009 15:29:08 +1000 [thread overview]
Message-ID: <1243488550-15357-3-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1243488550-15357-2-git-send-email-pclouds@gmail.com>
The reason is quite obvious: large files should not be read entirely
into memory (and in some cases, cannot).
This patch deals with separate blobs only for two reasons:
1. large blobs are less likely to be put in packs (*)
2. streaming interface for blobs in pack is more complicated, thus
more troublesome
(*) With regard to the first point, there is an assumption that large
blobs must stay out of pack otherwise you cannot make use of this
interface. This is not true now, but it was discussed and worked on in
the past. Hopefully a patch series that makes this assumption true
will come soon.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 8 ++++
sha1_file.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 121 insertions(+), 0 deletions(-)
diff --git a/cache.h b/cache.h
index f3fc822..f6f70ce 100644
--- a/cache.h
+++ b/cache.h
@@ -655,6 +655,14 @@ extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsig
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
extern int force_object_loose(const unsigned char *sha1, time_t mtime);
+struct loose_object_handle;
+struct loose_object_handle *open_loose_object(const unsigned char *sha1);
+int read_loose_object(struct loose_object_handle *oh, void *buf, unsigned long len);
+int close_loose_object(struct loose_object_handle *oh);
+const unsigned char *loose_object_sha1(struct loose_object_handle *oh);
+unsigned long loose_object_size(struct loose_object_handle *oh);
+enum object_type loose_object_type(struct loose_object_handle *oh);
+
/* global flag to enable extra checks when accessing packed objects */
extern int do_check_packed_object_crc;
diff --git a/sha1_file.c b/sha1_file.c
index e73cd4f..2ed06a2 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1340,6 +1340,119 @@ static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type
return unpack_sha1_rest(&stream, hdr, *size, sha1);
}
+
+struct loose_object_handle {
+ unsigned char sha1[20];
+ enum object_type type;
+ unsigned long size;
+
+ int fd;
+ z_stream stream;
+ char *bufin, *bufout;
+ int bufin_length, bufout_length;
+ unsigned long start, end;
+};
+
+enum object_type loose_object_type(struct loose_object_handle *oh)
+{
+ return oh->type;
+}
+
+unsigned long loose_object_size(struct loose_object_handle *oh)
+{
+ return oh->size;
+}
+
+const unsigned char *loose_object_sha1(struct loose_object_handle *oh)
+{
+ return oh->sha1;
+}
+
+struct loose_object_handle *open_loose_object(const unsigned char *sha1)
+{
+ int ret, len;
+ struct loose_object_handle oh, *ohp;
+
+ oh.fd = open_sha1_file(sha1);
+ if (oh.fd == -1)
+ return NULL;
+
+ oh.bufin_length = 8192;
+ oh.bufin = xmalloc(oh.bufin_length);
+ len = xread(oh.fd, oh.bufin, oh.bufin_length);
+ if (len == -1) {
+ free(oh.bufin);
+ return NULL;
+ }
+
+ oh.bufout_length = 8192;
+ oh.bufout = xmalloc(oh.bufout_length);
+
+ ret = unpack_sha1_header(&oh.stream, (unsigned char *)oh.bufin, len, oh.bufout, oh.bufout_length);
+ if (ret < Z_OK || (oh.type = parse_sha1_header(oh.bufout, &oh.size)) < 0) {
+ free(oh.bufin);
+ free(oh.bufout);
+ return NULL;
+ }
+
+ ohp = xmalloc(sizeof(*ohp));
+ *ohp = oh;
+ memcpy(ohp->sha1, sha1, 20);
+
+ ohp->start = strlen(ohp->bufout)+1;
+ ohp->end = ohp->stream.total_out;
+ return ohp;
+}
+
+int read_loose_object(struct loose_object_handle *oh, void *buf, unsigned long buflen)
+{
+ if (oh->end == oh->start) {
+ int status;
+
+ oh->start = 0;
+ oh->stream.next_out = (unsigned char*)oh->bufout;
+ oh->stream.avail_out = oh->bufout_length;
+ status = inflate(&oh->stream, Z_NO_FLUSH);
+ oh->end = oh->stream.next_out - (unsigned char*)oh->bufout;
+
+ if (oh->stream.avail_in == 0) {
+ oh->stream.avail_in = xread(oh->fd, oh->bufin, oh->bufin_length);
+ oh->stream.next_in = (unsigned char *)oh->bufin;
+ }
+
+ /* trying to get Z_STREAM_END */
+ if (oh->stream.total_out == oh->size && status == Z_OK) {
+ status = inflate(&oh->stream, Z_NO_FLUSH);
+
+ if (status < 0)
+ error("corrupt loose object '%s'", sha1_to_hex(oh->sha1));
+ else if (oh->stream.avail_in)
+ error("garbage at end of loose object '%s'",
+ sha1_to_hex(oh->sha1));
+ }
+ }
+
+ if (oh->end > oh->start) {
+ int len = oh->end - oh->start;
+ memcpy(buf, (char *) oh->bufout + oh->start, len);
+ oh->start = oh->end;
+ return len;
+ }
+
+ /* How can it get here? */
+ return -1;
+}
+
+int close_loose_object(struct loose_object_handle *oh)
+{
+ close(oh->fd);
+ free(oh->bufin);
+ free(oh->bufout);
+ inflateEnd(&oh->stream);
+ free(oh);
+ return 0;
+}
+
unsigned long get_size_from_delta(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos)
--
1.6.3.1.257.gbd13
next prev parent reply other threads:[~2009-05-28 5:29 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-05-28 5:29 [PATCH WIP 0/4] Special code path for large blobs Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` [PATCH WIP 1/4] convert.c: refactor in order to skip conversion early without looking into file content Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` Nguyễn Thái Ngọc Duy [this message]
2009-05-28 5:29 ` [PATCH WIP 3/4] write_entry: use streaming interface for checkout large files Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` [PATCH WIP 4/4] index_fd: support indexing " Nguyễn Thái Ngọc Duy
2009-05-28 18:03 ` [PATCH WIP 0/4] Special code path for large blobs Nicolas Pitre
2009-06-02 4:46 ` Nguyen Thai Ngoc Duy
2009-06-02 14:45 ` Shawn O. Pearce
2009-06-02 17:22 ` Nicolas Pitre
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1243488550-15357-3-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).