Git development
 help / color / mirror / Atom feed
From: Justin Tobler <jltobler@gmail.com>
To: git@vger.kernel.org
Cc: ps@pks.im, gitster@pobox.com, peff@peff.net,
	Justin Tobler <jltobler@gmail.com>
Subject: [PATCH v4 4/7] object-file: remove flags from transaction packfile writes
Date: Thu, 14 May 2026 13:37:37 -0500	[thread overview]
Message-ID: <20260514183740.1505171-5-jltobler@gmail.com> (raw)
In-Reply-To: <20260514183740.1505171-1-jltobler@gmail.com>

The `index_blob_packfile_transaction()` function handles streaming a
blob from an fd to compute its object ID and conditionally writes the
object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A
subsequent commit will make these packfile object writes part of the
transaction interface. Consequently, having the object write be
conditional on this flag is a bit awkward.

In preparation for this change, introduce a dedicated
`hash_blob_stream()` helper that only computes the OID from a `struct
odb_write_stream`. This is invoked by `index_fd()` instead when the
INDEX_WRITE_OBJECT is not set. The object write performed via
`index_blob_packfile_transaction()` is made unconditional accordingly.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 object-file.c   | 132 +++++++++++++++++++++++++++++-------------------
 odb/streaming.c |  46 +++++++++++++++++
 odb/streaming.h |  12 +++++
 3 files changed, 138 insertions(+), 52 deletions(-)

diff --git a/object-file.c b/object-file.c
index a1afca23c5..a59030911f 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1397,11 +1397,10 @@ static int already_written(struct odb_transaction_files *transaction,
 }
 
 /* Lazily create backing packfile for the state */
-static void prepare_packfile_transaction(struct odb_transaction_files *transaction,
-					 unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction_files *transaction)
 {
 	struct transaction_packfile *state = &transaction->packfile;
-	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+	if (state->f)
 		return;
 
 	state->f = create_tmp_packfile(transaction->base.source->odb->repo,
@@ -1414,6 +1413,39 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
 		die_errno("unable to write pack header");
 }
 
+static int hash_blob_stream(struct odb_write_stream *stream,
+			    const struct git_hash_algo *hash_algo,
+			    struct object_id *result_oid, size_t size)
+{
+	unsigned char buf[16384];
+	struct git_hash_ctx ctx;
+	unsigned header_len;
+	size_t bytes_hashed = 0;
+
+	header_len = format_object_header((char *)buf, sizeof(buf),
+					  OBJ_BLOB, size);
+	hash_algo->init_fn(&ctx);
+	git_hash_update(&ctx, buf, header_len);
+
+	while (!stream->is_finished) {
+		ssize_t read_result = odb_write_stream_read(stream, buf,
+							    sizeof(buf));
+
+		if (read_result < 0)
+			return -1;
+
+		git_hash_update(&ctx, buf, read_result);
+		bytes_hashed += read_result;
+	}
+
+	if (bytes_hashed != size)
+		return -1;
+
+	git_hash_final_oid(result_oid, &ctx);
+
+	return 0;
+}
+
 /*
  * Read the contents from fd for size bytes, streaming it to the
  * packfile in state while updating the hash in ctx. Signal a failure
@@ -1431,15 +1463,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
  */
 static int stream_blob_to_pack(struct transaction_packfile *state,
 			       struct git_hash_ctx *ctx, off_t *already_hashed_to,
-			       int fd, size_t size, const char *path,
-			       unsigned flags)
+			       int fd, size_t size, const char *path)
 {
 	git_zstream s;
 	unsigned char ibuf[16384];
 	unsigned char obuf[16384];
 	unsigned hdrlen;
 	int status = Z_OK;
-	int write_object = (flags & INDEX_WRITE_OBJECT);
 	off_t offset = 0;
 
 	git_deflate_init(&s, pack_compression_level);
@@ -1474,20 +1504,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
 		status = git_deflate(&s, size ? 0 : Z_FINISH);
 
 		if (!s.avail_out || status == Z_STREAM_END) {
-			if (write_object) {
-				size_t written = s.next_out - obuf;
-
-				/* would we bust the size limit? */
-				if (state->nr_written &&
-				    pack_size_limit_cfg &&
-				    pack_size_limit_cfg < state->offset + written) {
-					git_deflate_abort(&s);
-					return -1;
-				}
-
-				hashwrite(state->f, obuf, written);
-				state->offset += written;
+			size_t written = s.next_out - obuf;
+
+			/* would we bust the size limit? */
+			if (state->nr_written &&
+			    pack_size_limit_cfg &&
+			    pack_size_limit_cfg < state->offset + written) {
+				git_deflate_abort(&s);
+				return -1;
 			}
+
+			hashwrite(state->f, obuf, written);
+			state->offset += written;
 			s.next_out = obuf;
 			s.avail_out = sizeof(obuf);
 		}
@@ -1575,8 +1603,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
  */
 static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
 					   struct object_id *result_oid, int fd,
-					   size_t size, const char *path,
-					   unsigned flags)
+					   size_t size, const char *path)
 {
 	struct transaction_packfile *state = &transaction->packfile;
 	off_t seekback, already_hashed_to;
@@ -1584,7 +1611,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 	unsigned char obuf[16384];
 	unsigned header_len;
 	struct hashfile_checkpoint checkpoint;
-	struct pack_idx_entry *idx = NULL;
+	struct pack_idx_entry *idx;
 
 	seekback = lseek(fd, 0, SEEK_CUR);
 	if (seekback == (off_t)-1)
@@ -1595,33 +1622,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 	transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
 	git_hash_update(&ctx, obuf, header_len);
 
-	/* Note: idx is non-NULL when we are writing */
-	if ((flags & INDEX_WRITE_OBJECT) != 0) {
-		CALLOC_ARRAY(idx, 1);
-
-		prepare_packfile_transaction(transaction, flags);
-		hashfile_checkpoint_init(state->f, &checkpoint);
-	}
+	CALLOC_ARRAY(idx, 1);
+	prepare_packfile_transaction(transaction);
+	hashfile_checkpoint_init(state->f, &checkpoint);
 
 	already_hashed_to = 0;
 
 	while (1) {
-		prepare_packfile_transaction(transaction, flags);
-		if (idx) {
-			hashfile_checkpoint(state->f, &checkpoint);
-			idx->offset = state->offset;
-			crc32_begin(state->f);
-		}
+		prepare_packfile_transaction(transaction);
+		hashfile_checkpoint(state->f, &checkpoint);
+		idx->offset = state->offset;
+		crc32_begin(state->f);
+
 		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
-					 fd, size, path, flags))
+					 fd, size, path))
 			break;
 		/*
 		 * Writing this object to the current pack will make
 		 * it too big; we need to truncate it, start a new
 		 * pack, and write into it.
 		 */
-		if (!idx)
-			BUG("should not happen");
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
 		flush_packfile_transaction(transaction);
@@ -1629,8 +1649,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 			return error("cannot seek back");
 	}
 	git_hash_final_oid(result_oid, &ctx);
-	if (!idx)
-		return 0;
 
 	idx->crc32 = crc32_end(state->f);
 	if (already_written(transaction, result_oid)) {
@@ -1668,18 +1686,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	} else {
-		struct object_database *odb = the_repository->objects;
-		struct odb_transaction_files *files_transaction;
-		struct odb_transaction *transaction;
-
-		transaction = odb_transaction_begin(odb);
-		files_transaction = container_of(odb->transaction,
-						 struct odb_transaction_files,
-						 base);
-		ret = index_blob_packfile_transaction(files_transaction, oid, fd,
-						      xsize_t(st->st_size),
-						      path, flags);
-		odb_transaction_commit(transaction);
+		struct odb_write_stream stream;
+		odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
+
+		if (flags & INDEX_WRITE_OBJECT) {
+			struct object_database *odb = the_repository->objects;
+			struct odb_transaction_files *files_transaction;
+			struct odb_transaction *transaction;
+
+			transaction = odb_transaction_begin(odb);
+			files_transaction = container_of(odb->transaction,
+							 struct odb_transaction_files,
+							 base);
+			ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+						      xsize_t(st->st_size), path);
+			odb_transaction_commit(transaction);
+		} else {
+			ret = hash_blob_stream(&stream,
+					       the_repository->hash_algo, oid,
+					       xsize_t(st->st_size));
+		}
+
+		odb_write_stream_release(&stream);
 	}
 
 	close(fd);
diff --git a/odb/streaming.c b/odb/streaming.c
index a68dd2cbe3..20531e864c 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -237,6 +237,11 @@ ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
 	return st->read(st, buf, sz);
 }
 
+void odb_write_stream_release(struct odb_write_stream *st)
+{
+	free(st->data);
+}
+
 int odb_stream_blob_to_fd(struct object_database *odb,
 			  int fd,
 			  const struct object_id *oid,
@@ -292,3 +297,44 @@ int odb_stream_blob_to_fd(struct object_database *odb,
 	odb_read_stream_close(st);
 	return result;
 }
+
+struct read_object_fd_data {
+	int fd;
+	size_t remaining;
+};
+
+static ssize_t read_object_fd(struct odb_write_stream *stream,
+			      unsigned char *buf, size_t len)
+{
+	struct read_object_fd_data *data = stream->data;
+	ssize_t read_result;
+	size_t count;
+
+	if (stream->is_finished)
+		return 0;
+
+	count = data->remaining < len ? data->remaining : len;
+	read_result = read_in_full(data->fd, buf, count);
+	if (read_result < 0 || (size_t)read_result != count)
+		return -1;
+
+	data->remaining -= count;
+	if (!data->remaining)
+		stream->is_finished = 1;
+
+	return read_result;
+}
+
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+			      size_t size)
+{
+	struct read_object_fd_data *data;
+
+	CALLOC_ARRAY(data, 1);
+	data->fd = fd;
+	data->remaining = size;
+
+	stream->data = data;
+	stream->read = read_object_fd;
+	stream->is_finished = 0;
+}
diff --git a/odb/streaming.h b/odb/streaming.h
index 65ced911fe..2a8cac19a4 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -5,6 +5,7 @@
 #define STREAMING_H 1
 
 #include "object.h"
+#include "odb.h"
 
 struct object_database;
 struct odb_read_stream;
@@ -65,6 +66,11 @@ struct odb_write_stream {
 ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
 			      size_t len);
 
+/*
+ * Releases memory allocated for underlying stream data.
+ */
+void odb_write_stream_release(struct odb_write_stream *stream);
+
 /*
  * Look up the object by its ID and write the full contents to the file
  * descriptor. The object must be a blob, or the function will fail. When
@@ -82,4 +88,10 @@ int odb_stream_blob_to_fd(struct object_database *odb,
 			  struct stream_filter *filter,
 			  int can_seek);
 
+/*
+ * Sets up an ODB write stream that reads from an fd.
+ */
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+			      size_t size);
+
 #endif /* STREAMING_H */
-- 
2.54.0.105.g59ff4886a5


  parent reply	other threads:[~2026-05-14 18:38 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31  3:38 [PATCH 0/6] odb: add write operation to ODB transaction interface Justin Tobler
2026-03-31  3:38 ` [PATCH 1/6] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 13:56     ` Justin Tobler
2026-03-31 15:58       ` Junio C Hamano
2026-03-31 16:44         ` Justin Tobler
2026-03-31  3:38 ` [PATCH 2/6] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31  3:38 ` [PATCH 3/6] object-file: remove flags from transaction packfile writes Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:10     ` Justin Tobler
2026-03-31  3:38 ` [PATCH 4/6] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:14     ` Justin Tobler
2026-03-31  3:38 ` [PATCH 5/6] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:31     ` Justin Tobler
2026-03-31 22:59       ` Patrick Steinhardt
2026-03-31 23:21         ` Justin Tobler
2026-03-31 23:40           ` Patrick Steinhardt
2026-03-31  3:38 ` [PATCH 6/6] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:40     ` Justin Tobler
2026-04-01  3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
2026-04-01  3:03   ` [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-01  3:03   ` [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-01  3:03   ` [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-04-01 11:23     ` Patrick Steinhardt
2026-04-01  3:03   ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-01 11:23     ` Patrick Steinhardt
2026-04-01 14:02       ` Justin Tobler
2026-04-01  3:03   ` [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-01  3:03   ` [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-01  3:03   ` [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-01 11:24   ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-04-02 21:32   ` [PATCH v3 " Justin Tobler
2026-04-02 21:32     ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-02 21:32     ` [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-02 21:32     ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-11 17:58       ` Jeff King
2026-05-12 15:19         ` Justin Tobler
2026-04-02 21:32     ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-06 20:16       ` Jeff King
2026-04-06 20:19         ` Jeff King
2026-04-02 21:32     ` [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-02 21:32     ` [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-02 21:32     ` [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-08  7:25     ` [PATCH v3 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-05-14 18:37     ` [PATCH v4 " Justin Tobler
2026-05-14 18:37       ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-05-14 18:37       ` [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-05-14 18:37       ` [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-14 18:37       ` Justin Tobler [this message]
2026-05-14 18:37       ` [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-05-14 18:37       ` [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-05-14 18:37       ` [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-05-15  3:56       ` [PATCH v4 0/7] odb: add write operation to ODB transaction interface Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260514183740.1505171-5-jltobler@gmail.com \
    --to=jltobler@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox