Git development
 help / color / mirror / Atom feed
From: Justin Tobler <jltobler@gmail.com>
To: git@vger.kernel.org
Cc: ps@pks.im, Justin Tobler <jltobler@gmail.com>
Subject: [PATCH 5/6] object-file: generalize packfile writes to use odb_write_stream
Date: Mon, 30 Mar 2026 22:38:34 -0500	[thread overview]
Message-ID: <20260331033835.2863514-6-jltobler@gmail.com> (raw)
In-Reply-To: <20260331033835.2863514-1-jltobler@gmail.com>

The `index_blob_packfile_transaction()` function streams blob data
directly from an fd. This makes it difficult to reuse as part of a
generic transactional object writing interface.

Refactor the packfile write path to operate on a `struct
odb_write_stream`, allowing callers to supply data from arbitrary
sources.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 object-file.c | 99 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 29 deletions(-)

diff --git a/object-file.c b/object-file.c
index 1de2244ac5..4c797d6498 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1433,18 +1433,18 @@ static int hash_blob_stream(const struct git_hash_algo *hash_algo,
 }
 
 /*
- * Read the contents from fd for size bytes, streaming it to the
+ * Read the contents from the stream provided, streaming it to the
  * packfile in state while updating the hash in ctx.
  */
 static void stream_blob_to_pack(struct transaction_packfile *state,
-				struct git_hash_ctx *ctx, int fd, size_t size,
-				const char *path)
+				struct git_hash_ctx *ctx, size_t size,
+				struct odb_write_stream *stream)
 {
 	git_zstream s;
-	unsigned char ibuf[16384];
 	unsigned char obuf[16384];
 	unsigned hdrlen;
 	int status = Z_OK;
+	size_t total = 0;
 
 	git_deflate_init(&s, pack_compression_level);
 
@@ -1453,24 +1453,19 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
 	s.avail_out = sizeof(obuf) - hdrlen;
 
 	while (status != Z_STREAM_END) {
-		if (size && !s.avail_in) {
-			size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
-			ssize_t read_result = read_in_full(fd, ibuf, rsize);
-			if (read_result < 0)
-				die_errno("failed to read from '%s'", path);
-			if ((size_t)read_result != rsize)
-				die("failed to read %u bytes from '%s'",
-				    (unsigned)rsize, path);
+		if (!stream->is_finished && !s.avail_in) {
+			unsigned long rsize;
+			unsigned const char *buf = stream->read(stream, &rsize);
 
 			if (rsize)
-				git_hash_update(ctx, ibuf, rsize);
+				git_hash_update(ctx, buf, rsize);
 
-			s.next_in = ibuf;
+			s.next_in = (unsigned char *)buf;
 			s.avail_in = rsize;
-			size -= rsize;
+			total += rsize;
 		}
 
-		status = git_deflate(&s, size ? 0 : Z_FINISH);
+		status = git_deflate(&s, stream->is_finished ? Z_FINISH : 0);
 
 		if (!s.avail_out || status == Z_STREAM_END) {
 			size_t written = s.next_out - obuf;
@@ -1490,6 +1485,10 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
 			die("unexpected deflate failure: %d", status);
 		}
 	}
+
+	if (total != size)
+		die("unexpected number of bytes read");
+
 	git_deflate_end(&s);
 }
 
@@ -1543,6 +1542,40 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
 	odb_reprepare(repo->objects);
 }
 
+struct read_object_fd_data {
+	int fd;
+	size_t size;
+	unsigned char buf[16384];
+};
+
+static const void *read_object_fd(struct odb_write_stream *stream,
+				  unsigned long *len)
+{
+	struct read_object_fd_data *data = stream->data;
+	ssize_t read_result;
+	size_t rsize;
+
+	if (stream->is_finished) {
+		*len = 0;
+		return NULL;
+	}
+
+	rsize = data->size < sizeof(data->buf) ? data->size : sizeof(data->buf);
+	read_result = read_in_full(data->fd, data->buf, rsize);
+	if (read_result < 0)
+		die_errno("failed to read blob data");
+	if ((size_t)read_result != rsize)
+		die("failed to read %u bytes of blob data", (unsigned)rsize);
+
+	data->size -= rsize;
+	if (!data->size)
+		stream->is_finished = 1;
+
+	*len = rsize;
+
+	return data->buf;
+}
+
 /*
  * This writes the specified object to a packfile. Objects written here
  * during the same transaction are written to the same packfile. The
@@ -1561,10 +1594,13 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
  * binary blobs, they generally do not want to get any conversion, and
  * callers should avoid this code path when filters are requested.
  */
-static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
-					   struct object_id *result_oid, int fd,
-					   size_t size, const char *path)
+static int index_blob_packfile_transaction(struct odb_transaction *base,
+					   struct odb_write_stream *stream,
+					   size_t size, struct object_id *result_oid)
 {
+	struct odb_transaction_files *transaction = container_of(base,
+								 struct odb_transaction_files,
+								 base);
 	struct transaction_packfile *state = &transaction->packfile;
 	struct git_hash_ctx ctx;
 	unsigned char obuf[16384];
@@ -1593,7 +1629,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 	hashfile_checkpoint(state->f, &checkpoint);
 	idx->offset = state->offset;
 	crc32_begin(state->f);
-	stream_blob_to_pack(state, &ctx, fd, size, path);
+	stream_blob_to_pack(state, &ctx, size, stream);
 	git_hash_final_oid(result_oid, &ctx);
 
 	idx->crc32 = crc32_end(state->f);
@@ -1634,15 +1670,20 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 	} else {
 		if (flags & INDEX_WRITE_OBJECT) {
 			struct object_database *odb = the_repository->objects;
-			struct odb_transaction_files *files_transaction;
-			struct odb_transaction *transaction;
-
-			transaction = odb_transaction_begin(odb);
-			files_transaction = container_of(odb->transaction,
-							 struct odb_transaction_files,
-							 base);
-			ret = index_blob_packfile_transaction(files_transaction, oid, fd,
-						      xsize_t(st->st_size), path);
+			struct odb_transaction *transaction = odb_transaction_begin(odb);
+			struct read_object_fd_data data = {
+				.fd = fd,
+				.size = xsize_t(st->st_size),
+			};
+			struct odb_write_stream in_stream = {
+				.read = read_object_fd,
+				.data = &data,
+			};
+
+			ret = index_blob_packfile_transaction(odb->transaction,
+							      &in_stream,
+							      xsize_t(st->st_size),
+							      oid);
 			odb_transaction_commit(transaction);
 		} else {
 			if (hash_blob_stream(the_repository->hash_algo, oid, fd,
-- 
2.53.0.381.g628a66ccf6


  parent reply	other threads:[~2026-03-31  3:39 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31  3:38 [PATCH 0/6] odb: add write operation to ODB transaction interface Justin Tobler
2026-03-31  3:38 ` [PATCH 1/6] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 13:56     ` Justin Tobler
2026-03-31 15:58       ` Junio C Hamano
2026-03-31 16:44         ` Justin Tobler
2026-03-31  3:38 ` [PATCH 2/6] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31  3:38 ` [PATCH 3/6] object-file: remove flags from transaction packfile writes Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:10     ` Justin Tobler
2026-03-31  3:38 ` [PATCH 4/6] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:14     ` Justin Tobler
2026-03-31  3:38 ` Justin Tobler [this message]
2026-03-31  7:48   ` [PATCH 5/6] object-file: generalize packfile writes to use odb_write_stream Patrick Steinhardt
2026-03-31 14:31     ` Justin Tobler
2026-03-31 22:59       ` Patrick Steinhardt
2026-03-31 23:21         ` Justin Tobler
2026-03-31 23:40           ` Patrick Steinhardt
2026-03-31  3:38 ` [PATCH 6/6] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:40     ` Justin Tobler
2026-04-01  3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
2026-04-01  3:03   ` [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-01  3:03   ` [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-01  3:03   ` [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-04-01 11:23     ` Patrick Steinhardt
2026-04-01  3:03   ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-01 11:23     ` Patrick Steinhardt
2026-04-01 14:02       ` Justin Tobler
2026-04-01  3:03   ` [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-01  3:03   ` [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-01  3:03   ` [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-01 11:24   ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-04-02 21:32   ` [PATCH v3 " Justin Tobler
2026-04-02 21:32     ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-02 21:32     ` [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-02 21:32     ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-11 17:58       ` Jeff King
2026-05-12 15:19         ` Justin Tobler
2026-04-02 21:32     ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-06 20:16       ` Jeff King
2026-04-06 20:19         ` Jeff King
2026-04-02 21:32     ` [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-02 21:32     ` [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-02 21:32     ` [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-08  7:25     ` [PATCH v3 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-05-14 18:37     ` [PATCH v4 " Justin Tobler
2026-05-14 18:37       ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-05-14 18:37       ` [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-05-14 18:37       ` [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-14 18:37       ` [PATCH v4 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-05-14 18:37       ` [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-05-14 18:37       ` [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-05-14 18:37       ` [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-05-15  3:56       ` [PATCH v4 0/7] odb: add write operation to ODB transaction interface Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260331033835.2863514-6-jltobler@gmail.com \
    --to=jltobler@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox