git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: Elijah Newren <newren@gmail.com>,
	"Eric W. Biederman" <ebiederm@gmail.com>,
	Jeff King <peff@peff.net>, Junio C Hamano <gitster@pobox.com>,
	Patrick Steinhardt <ps@pks.im>
Subject: [PATCH v4 1/7] bulk-checkin: extract abstract `bulk_checkin_source`
Date: Thu, 19 Oct 2023 13:28:42 -0400	[thread overview]
Message-ID: <97bb6e9f59e5092f0519c7d1141d0673313fdc33.1697736516.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1697736516.git.me@ttaylorr.com>

A future commit will want to implement a very similar routine as in
`stream_blob_to_pack()` with two notable changes:

  - Instead of streaming just OBJ_BLOBs, this new function may want to
    stream objects of arbitrary type.

  - Instead of streaming the object's contents from an open
    file-descriptor, this new function may want to "stream" its contents
    from memory.

To avoid duplicating a significant chunk of code between the existing
`stream_blob_to_pack()`, extract an abstract `bulk_checkin_source`. This
concept currently is a thin layer of `lseek()` and `read_in_full()`, but
will grow to understand how to perform analogous operations when writing
out an object's contents from memory.

Suggested-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 bulk-checkin.c | 61 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 8 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 6ce62999e5..c05d06e1e1 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -140,8 +140,41 @@ static int already_written(struct bulk_checkin_packfile *state, struct object_id
 	return 0;
 }
 
+struct bulk_checkin_source {
+	enum { SOURCE_FILE } type;
+
+	/* SOURCE_FILE fields */
+	int fd;
+
+	/* common fields */
+	size_t size;
+	const char *path;
+};
+
+static off_t bulk_checkin_source_seek_to(struct bulk_checkin_source *source,
+					 off_t offset)
+{
+	switch (source->type) {
+	case SOURCE_FILE:
+		return lseek(source->fd, offset, SEEK_SET);
+	default:
+		BUG("unknown bulk-checkin source: %d", source->type);
+	}
+}
+
+static ssize_t bulk_checkin_source_read(struct bulk_checkin_source *source,
+					void *buf, size_t nr)
+{
+	switch (source->type) {
+	case SOURCE_FILE:
+		return read_in_full(source->fd, buf, nr);
+	default:
+		BUG("unknown bulk-checkin source: %d", source->type);
+	}
+}
+
 /*
- * Read the contents from fd for size bytes, streaming it to the
+ * Read the contents from 'source' for 'size' bytes, streaming it to the
  * packfile in state while updating the hash in ctx. Signal a failure
  * by returning a negative value when the resulting pack would exceed
  * the pack size limit and this is not the first object in the pack,
@@ -157,7 +190,7 @@ static int already_written(struct bulk_checkin_packfile *state, struct object_id
  */
 static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
 			       git_hash_ctx *ctx, off_t *already_hashed_to,
-			       int fd, size_t size, const char *path,
+			       struct bulk_checkin_source *source,
 			       unsigned flags)
 {
 	git_zstream s;
@@ -167,22 +200,28 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
 	int status = Z_OK;
 	int write_object = (flags & HASH_WRITE_OBJECT);
 	off_t offset = 0;
+	size_t size = source->size;
 
 	git_deflate_init(&s, pack_compression_level);
 
-	hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
+	hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB,
+					      size);
 	s.next_out = obuf + hdrlen;
 	s.avail_out = sizeof(obuf) - hdrlen;
 
 	while (status != Z_STREAM_END) {
 		if (size && !s.avail_in) {
 			ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
-			ssize_t read_result = read_in_full(fd, ibuf, rsize);
+			ssize_t read_result;
+
+			read_result = bulk_checkin_source_read(source, ibuf,
+							       rsize);
 			if (read_result < 0)
-				die_errno("failed to read from '%s'", path);
+				die_errno("failed to read from '%s'",
+					  source->path);
 			if (read_result != rsize)
 				die("failed to read %d bytes from '%s'",
-				    (int)rsize, path);
+				    (int)rsize, source->path);
 			offset += rsize;
 			if (*already_hashed_to < offset) {
 				size_t hsize = offset - *already_hashed_to;
@@ -258,6 +297,12 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 	unsigned header_len;
 	struct hashfile_checkpoint checkpoint = {0};
 	struct pack_idx_entry *idx = NULL;
+	struct bulk_checkin_source source = {
+		.type = SOURCE_FILE,
+		.fd = fd,
+		.size = size,
+		.path = path,
+	};
 
 	seekback = lseek(fd, 0, SEEK_CUR);
 	if (seekback == (off_t) -1)
@@ -283,7 +328,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 			crc32_begin(state->f);
 		}
 		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
-					 fd, size, path, flags))
+					 &source, flags))
 			break;
 		/*
 		 * Writing this object to the current pack will make
@@ -295,7 +340,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
 		flush_bulk_checkin_packfile(state);
-		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
+		if (bulk_checkin_source_seek_to(&source, seekback) == (off_t)-1)
 			return error("cannot seek back");
 	}
 	the_hash_algo->final_oid_fn(result_oid, &ctx);
-- 
2.42.0.405.g86fe3250c2


  reply	other threads:[~2023-10-19 17:28 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-19 17:28 [PATCH v4 0/7] merge-ort: implement support for packing objects together Taylor Blau
2023-10-19 17:28 ` Taylor Blau [this message]
2023-10-20  7:35   ` [PATCH v4 1/7] bulk-checkin: extract abstract `bulk_checkin_source` Jeff King
2023-10-20 16:55     ` Junio C Hamano
2023-10-19 17:28 ` [PATCH v4 2/7] bulk-checkin: generify `stream_blob_to_pack()` for arbitrary types Taylor Blau
2023-10-19 17:28 ` [PATCH v4 3/7] bulk-checkin: refactor deflate routine to accept a `bulk_checkin_source` Taylor Blau
2023-10-19 17:28 ` [PATCH v4 4/7] bulk-checkin: implement `SOURCE_INCORE` mode for `bulk_checkin_source` Taylor Blau
2023-10-23  9:19   ` Patrick Steinhardt
2023-10-23 18:58     ` Jeff King
2023-10-24  6:34       ` Patrick Steinhardt
2023-10-24 17:08         ` Junio C Hamano
2023-10-19 17:28 ` [PATCH v4 5/7] bulk-checkin: introduce `index_blob_bulk_checkin_incore()` Taylor Blau
2023-10-19 17:28 ` [PATCH v4 6/7] bulk-checkin: introduce `index_tree_bulk_checkin_incore()` Taylor Blau
2023-10-19 17:29 ` [PATCH v4 7/7] builtin/merge-tree.c: implement support for `--write-pack` Taylor Blau
2023-10-19 21:47 ` [PATCH v4 0/7] merge-ort: implement support for packing objects together Junio C Hamano
2023-10-20  7:29 ` Jeff King
2023-10-20 16:53   ` Junio C Hamano
2023-10-23  9:19 ` Patrick Steinhardt
2023-10-23 22:44 ` [PATCH v5 0/5] " Taylor Blau
2023-10-23 22:44   ` [PATCH v5 1/5] bulk-checkin: extract abstract `bulk_checkin_source` Taylor Blau
2023-10-25  7:37     ` Jeff King
2023-10-25 15:39       ` Taylor Blau
2023-10-27 23:12       ` Junio C Hamano
2023-10-23 22:44   ` [PATCH v5 2/5] bulk-checkin: generify `stream_blob_to_pack()` for arbitrary types Taylor Blau
2023-10-23 22:45   ` [PATCH v5 3/5] bulk-checkin: introduce `index_blob_bulk_checkin_incore()` Taylor Blau
2023-10-25  7:58     ` Patrick Steinhardt
2023-10-25 15:44       ` Taylor Blau
2023-10-25 17:21         ` Eric Sunshine
2023-10-26  8:16           ` Patrick Steinhardt
2023-11-11  0:17           ` Elijah Newren
2023-10-23 22:45   ` [PATCH v5 4/5] bulk-checkin: introduce `index_tree_bulk_checkin_incore()` Taylor Blau
2023-10-23 22:45   ` [PATCH v5 5/5] builtin/merge-tree.c: implement support for `--write-pack` Taylor Blau
2023-10-25  7:58     ` Patrick Steinhardt
2023-10-25 15:46       ` Taylor Blau
2023-11-10 23:51     ` Elijah Newren
2023-11-11  0:27       ` Junio C Hamano
2023-11-11  1:34         ` Taylor Blau
2023-11-11  1:24       ` Taylor Blau
2023-11-13 22:05         ` Jeff King
2023-11-14  1:40           ` Junio C Hamano
2023-11-14  2:54             ` Elijah Newren
2023-11-14 21:55             ` Jeff King
2023-11-14  3:08           ` Elijah Newren
2023-11-13 22:02       ` Jeff King
2023-11-13 22:34         ` Taylor Blau
2023-11-14  2:50           ` Elijah Newren
2023-11-14 21:53             ` Jeff King
2023-11-14 22:04           ` Jeff King
2023-10-23 23:31   ` [PATCH v5 0/5] merge-ort: implement support for packing objects together Junio C Hamano
2023-11-06 15:46     ` Johannes Schindelin
2023-11-06 23:19       ` Junio C Hamano
2023-11-07  3:42       ` Jeff King
2023-11-07 15:58       ` Taylor Blau
2023-11-07 18:22         ` [RFC PATCH 0/3] replay: implement support for writing new objects to a pack Taylor Blau
2023-11-07 18:22           ` [RFC PATCH 1/3] merge-ort.c: finalize ODB transactions after each step Taylor Blau
2023-11-11  3:45             ` Elijah Newren
2023-11-07 18:22           ` [RFC PATCH 2/3] tmp-objdir: introduce `tmp_objdir_repack()` Taylor Blau
2023-11-08  7:05             ` Patrick Steinhardt
2023-11-09 19:26               ` Taylor Blau
2023-11-07 18:23           ` [RFC PATCH 3/3] builtin/replay.c: introduce `--write-pack` Taylor Blau
2023-11-11  3:42           ` [RFC PATCH 0/3] replay: implement support for writing new objects to a pack Elijah Newren
2023-11-11  4:04           ` Elijah Newren
2023-10-25  7:58   ` [PATCH v5 0/5] merge-ort: implement support for packing objects together Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=97bb6e9f59e5092f0519c7d1141d0673313fdc33.1697736516.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=ebiederm@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).