From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org
Subject: Re: [PATCH v2 06/11] streaming: a new API to read from the object store
Date: Sat, 21 May 2011 01:05:36 +0200 [thread overview]
Message-ID: <4DD6F3C0.4060107@lsrfire.ath.cx> (raw)
In-Reply-To: <1305840826-7783-7-git-send-email-gitster@pobox.com>
Am 19.05.2011 23:33, schrieb Junio C Hamano:
> Given an object name, use open_istream() to get a git_istream handle
> that you can read_istream() from as if you are using read(2) to read
> the contents of the object, and close it with close_istream() when
> you are done.
>
> Currently, we do not do anything fancy--it just calls read_sha1_file()
> and keeps the contents in memory as a whole, and carve it out as you
> request with read_istream().
>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
> Makefile | 2 +
> streaming.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> streaming.h | 15 +++++
> 3 files changed, 216 insertions(+), 0 deletions(-)
> create mode 100644 streaming.c
> create mode 100644 streaming.h
>
> diff --git a/Makefile b/Makefile
> index 320ccc7..83bd539 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -552,6 +552,7 @@ LIB_H += sha1-lookup.h
> LIB_H += sideband.h
> LIB_H += sigchain.h
> LIB_H += strbuf.h
> +LIB_H += streaming.h
> LIB_H += string-list.h
> LIB_H += submodule.h
> LIB_H += tag.h
> @@ -657,6 +658,7 @@ LIB_OBJS += shallow.o
> LIB_OBJS += sideband.o
> LIB_OBJS += sigchain.o
> LIB_OBJS += strbuf.o
> +LIB_OBJS += streaming.o
> LIB_OBJS += string-list.o
> LIB_OBJS += submodule.o
> LIB_OBJS += symlinks.o
> diff --git a/streaming.c b/streaming.c
> new file mode 100644
> index 0000000..84330b4
> --- /dev/null
> +++ b/streaming.c
> @@ -0,0 +1,199 @@
> +/*
> + * Copyright (c) 2011, Google Inc.
> + */
> +#include "cache.h"
> +#include "streaming.h"
> +
> +enum input_source {
> + stream_error = -1,
> + incore = 0,
> + loose = 1,
> + pack_non_delta = 2
> +};
> +
> +typedef int (*open_istream_fn)(struct git_istream *,
> + struct object_info *,
> + const unsigned char *,
> + enum object_type *);
> +typedef int (*close_istream_fn)(struct git_istream *);
> +typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);
> +
> +struct stream_vtbl {
> + close_istream_fn close;
> + read_istream_fn read;
> +};
> +
> +#define open_method_decl(name) \
> + int open_istream_ ##name \
> + (struct git_istream *st, struct object_info *oi, \
> + const unsigned char *sha1, \
> + enum object_type *type)
> +
> +#define close_method_decl(name) \
> + int close_istream_ ##name \
> + (struct git_istream *st)
> +
> +#define read_method_decl(name) \
> + ssize_t read_istream_ ##name \
> + (struct git_istream *st, char *buf, size_t sz)
It would be nice if those macros could be got rid of once the interface
stabilizes.
> +
> +/* forward declaration */
> +static open_method_decl(incore);
> +static open_method_decl(loose);
> +static open_method_decl(pack_non_delta);
> +
> +static open_istream_fn open_istream_tbl[] = {
> + open_istream_incore,
> + open_istream_loose,
> + open_istream_pack_non_delta,
> +};
These three uses of the macro can be avoided by moving open_istream_tbl
and open_istream() to the end of the file. It would be just as clear
and clean, albeit not as close to literal programming style.
> +
> +struct git_istream {
> + enum input_source source;
source seems to be write-only.
> + const struct stream_vtbl *vtbl;
> + unsigned long size; /* inflated size of full object */
> +
> + union {
> + struct {
> + char *buf; /* from read_object() */
> + unsigned long read_ptr;
> + } incore;
> +
> + struct {
> + int fd; /* open for reading */
> + /* NEEDSWORK: what else? */
> + } loose;
> +
> + struct {
> + int fd; /* open for reading */
> + /* NEEDSWORK: what else? */
> + } in_pack;
> + } u;
> +};
> +
> +int close_istream(struct git_istream *st)
> +{
> + return st->vtbl->close(st);
> +}
> +
> +ssize_t read_istream(struct git_istream *st, char *buf, size_t sz)
> +{
> + return st->vtbl->read(st, buf, sz);
> +}
> +
> +static enum input_source istream_source(const unsigned char *sha1,
> + enum object_type *type,
> + struct object_info *oi)
> +{
> + unsigned long size;
> + int status;
> +
> + oi->sizep = &size;
> + status = sha1_object_info_extended(sha1, oi);
> + if (status < 0)
> + return stream_error;
> + *type = status;
> +
> + switch (oi->whence) {
> + case OI_LOOSE:
> + return loose;
> + case OI_PACKED:
> + if (!oi->u.packed.is_delta && big_file_threshold <= size)
> + return pack_non_delta;
> + /* fallthru */
> + default:
> + return incore;
> + }
> +}
> +
> +struct git_istream *open_istream(const unsigned char *sha1,
> + enum object_type *type,
> + unsigned long *size)
> +{
> + struct git_istream *st;
> + struct object_info oi;
> + const unsigned char *real = lookup_replace_object(sha1);
> + enum input_source src = istream_source(real, type, &oi);
> +
> + if (src < 0)
> + return NULL;
> +
> + st = xmalloc(sizeof(*st));
> + st->source = src;
> + if (open_istream_tbl[src](st, &oi, real, type)) {
> + if (open_istream_incore(st, &oi, real, type)) {
> + free(st);
> + st = NULL;
return NULL;
// Otherwise we get a problem three lines down.
> + }
> + }
> + *size = st->size;
> + return st;
> +}
> +
> +/*****************************************************************
> + *
> + * Loose object stream
> + *
> + *****************************************************************/
> +
> +static open_method_decl(loose)
> +{
> + return -1; /* for now */
> +}
> +
> +
> +/*****************************************************************
> + *
> + * Non-delta packed object stream
> + *
> + *****************************************************************/
> +
> +static open_method_decl(pack_non_delta)
> +{
> + return -1; /* for now */
> +}
> +
> +
> +/*****************************************************************
> + *
> + * In-core stream
> + *
> + *****************************************************************/
> +
> +static close_method_decl(incore)
> +{
> + free(st->u.incore.buf);
> + return 0;
> +}
> +
> +static read_method_decl(incore)
> +{
> + size_t read_size = sz;
> + size_t remainder = st->size - st->u.incore.read_ptr;
> +
> + if (remainder <= read_size)
> + read_size = remainder;
> + if (read_size) {
> + memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
> + st->u.incore.read_ptr += read_size;
> + }
> + return read_size;
> +}
> +
> +static struct stream_vtbl incore_vtbl = {
> + close_istream_incore,
> + read_istream_incore,
> +};
> +
> +static open_method_decl(incore)
> +{
> + st->u.incore.buf = read_sha1_file_extended(sha1, type, &st->size, 0);
> + st->u.incore.read_ptr = 0;
> + st->vtbl = &incore_vtbl;
> +
> + if (!st->u.incore.buf) {
> + free(st->u.incore.buf);
free(NULL) is a noop.
> + return -1;
> + }
> + return 0;
> +}
> diff --git a/streaming.h b/streaming.h
> new file mode 100644
> index 0000000..18cbe68
> --- /dev/null
> +++ b/streaming.h
> @@ -0,0 +1,15 @@
> +/*
> + * Copyright (c) 2011, Google Inc.
> + */
> +#ifndef STREAMING_H
> +#define STREAMING_H 1
> +#include "cache.h"
> +
> +/* opaque */
> +struct git_istream;
> +
> +extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *);
> +extern int close_istream(struct git_istream *);
> +extern ssize_t read_istream(struct git_istream *, char *, size_t);
> +
> +#endif /* STREAMING_H */
next prev parent reply other threads:[~2011-05-20 23:06 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-16 0:30 [PATCH 00/11] writing out a huge blob to working tree Junio C Hamano
2011-05-16 0:30 ` [PATCH 01/11] packed_object_info_detail(): do not return a string Junio C Hamano
2011-05-17 0:45 ` Thiago Farina
2011-05-17 2:36 ` Junio C Hamano
2011-05-16 0:30 ` [PATCH 02/11] sha1_object_info_extended(): expose a bit more info Junio C Hamano
2011-05-16 0:30 ` [PATCH 03/11] sha1_object_info_extended(): hint about objects in delta-base cache Junio C Hamano
2011-05-16 0:40 ` Shawn Pearce
2011-05-16 0:30 ` [PATCH 04/11] unpack_object_header(): make it public Junio C Hamano
2011-05-16 0:30 ` [PATCH 05/11] write_entry(): separate two helper functions out Junio C Hamano
2011-05-16 0:30 ` [PATCH 06/11] streaming: a new API to read from the object store Junio C Hamano
2011-05-18 8:09 ` Jeff King
2011-05-19 1:52 ` Junio C Hamano
2011-05-16 0:30 ` [PATCH 07/11] streaming_write_entry(): use streaming API in write_entry() Junio C Hamano
2011-05-16 0:30 ` [PATCH 08/11] streaming_write_entry(): support files with holes Junio C Hamano
2011-05-16 10:53 ` Nguyen Thai Ngoc Duy
2011-05-16 14:39 ` Junio C Hamano
2011-05-17 1:18 ` Nguyen Thai Ngoc Duy
2011-05-17 5:23 ` Junio C Hamano
2011-05-16 13:03 ` Thiago Farina
2011-05-16 0:30 ` [PATCH 09/11] streaming: read non-delta incrementally from a pack Junio C Hamano
2011-05-16 0:58 ` Shawn Pearce
2011-05-16 5:00 ` Junio C Hamano
2011-05-16 0:30 ` [PATCH 10/11] sha1_file.c: expose helpers to read loose objects Junio C Hamano
2011-05-16 0:30 ` [PATCH 11/11] streaming: read loose objects incrementally Junio C Hamano
2011-05-16 0:47 ` [PATCH 00/11] writing out a huge blob to working tree Shawn Pearce
2011-05-18 6:41 ` Jeff King
2011-05-18 7:08 ` Jeff King
2011-05-18 7:50 ` Jeff King
2011-05-18 15:12 ` Junio C Hamano
2011-05-18 8:17 ` Jeff King
2011-05-19 21:33 ` [PATCH v2 " Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 01/11] packed_object_info_detail(): do not return a string Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 02/11] sha1_object_info_extended(): expose a bit more info Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 03/11] sha1_object_info_extended(): hint about objects in delta-base cache Junio C Hamano
2011-05-20 23:05 ` René Scharfe
2011-05-21 1:49 ` Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 04/11] unpack_object_header(): make it public Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 05/11] write_entry(): separate two helper functions out Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 06/11] streaming: a new API to read from the object store Junio C Hamano
2011-05-20 23:05 ` René Scharfe [this message]
2011-05-21 1:49 ` Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 07/11] streaming_write_entry(): use streaming API in write_entry() Junio C Hamano
2011-05-20 22:52 ` Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 08/11] streaming_write_entry(): support files with holes Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 09/11] streaming: read non-delta incrementally from a pack Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 10/11] sha1_file.c: expose helpers to read loose objects Junio C Hamano
2011-05-19 21:33 ` [PATCH v2 11/11] streaming: read loose objects incrementally Junio C Hamano
2011-05-19 21:44 ` [Not A PATCH v2 02/11] interdiff Junio C Hamano
2011-05-19 22:21 ` [PATCH v2 00/11] writing out a huge blob to working tree Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DD6F3C0.4060107@lsrfire.ath.cx \
--to=rene.scharfe@lsrfire.ath.cx \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.