Git development
 help / color / mirror / Atom feed
From: Justin Tobler <jltobler@gmail.com>
To: git@vger.kernel.org
Cc: ps@pks.im, Justin Tobler <jltobler@gmail.com>
Subject: [PATCH 3/6] object-file: remove flags from transaction packfile writes
Date: Mon, 30 Mar 2026 22:38:32 -0500	[thread overview]
Message-ID: <20260331033835.2863514-4-jltobler@gmail.com> (raw)
In-Reply-To: <20260331033835.2863514-1-jltobler@gmail.com>

The `index_blob_packfile_transaction()` function handles streaming a
blob from an fd to compute its object ID and conditionally writes the
object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A
subsequent commit will make these packfile object writes part of the
transaction interface. Consequently, having the object write be
conditional on this flag is a bit awkward.

In preparation for this change, introduce a dedicated
`hash_blob_stream()` helper that only computes the OID from the fd. This
is invoked by `index_fd()` instead when the INDEX_WRITE_OBJECT is not
set. The object write performed via `index_blob_packfile_transaction()`
is made unconditional accordingly.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 object-file.c | 124 +++++++++++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 53 deletions(-)

diff --git a/object-file.c b/object-file.c
index bfbb632cf8..493173eaf4 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1388,11 +1388,10 @@ static int already_written(struct odb_transaction_files *transaction,
 }
 
 /* Lazily create backing packfile for the state */
-static void prepare_packfile_transaction(struct odb_transaction_files *transaction,
-					 unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction_files *transaction)
 {
 	struct transaction_packfile *state = &transaction->packfile;
-	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+	if (state->f)
 		return;
 
 	state->f = create_tmp_packfile(transaction->base.source->odb->repo,
@@ -1405,6 +1404,34 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
 		die_errno("unable to write pack header");
 }
 
+static int hash_blob_stream(const struct git_hash_algo *hash_algo,
+			    struct object_id *result_oid, int fd, size_t size)
+{
+	unsigned char buf[16384];
+	struct git_hash_ctx ctx;
+	unsigned header_len;
+
+	header_len = format_object_header((char *)buf, sizeof(buf),
+					  OBJ_BLOB, size);
+	hash_algo->init_fn(&ctx);
+	git_hash_update(&ctx, buf, header_len);
+
+	while (size) {
+		size_t rsize = size < sizeof(buf) ? size : sizeof(buf);
+		ssize_t read_result = read_in_full(fd, buf, rsize);
+
+		if ((size_t)read_result != rsize)
+			return -1;
+
+		git_hash_update(&ctx, buf, rsize);
+		size -= read_result;
+	}
+
+	git_hash_final_oid(result_oid, &ctx);
+
+	return 0;
+}
+
 /*
  * Read the contents from fd for size bytes, streaming it to the
  * packfile in state while updating the hash in ctx. Signal a failure
@@ -1422,15 +1449,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
  */
 static int stream_blob_to_pack(struct transaction_packfile *state,
 			       struct git_hash_ctx *ctx, off_t *already_hashed_to,
-			       int fd, size_t size, const char *path,
-			       unsigned flags)
+			       int fd, size_t size, const char *path)
 {
 	git_zstream s;
 	unsigned char ibuf[16384];
 	unsigned char obuf[16384];
 	unsigned hdrlen;
 	int status = Z_OK;
-	int write_object = (flags & INDEX_WRITE_OBJECT);
 	off_t offset = 0;
 
 	git_deflate_init(&s, pack_compression_level);
@@ -1465,20 +1490,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
 		status = git_deflate(&s, size ? 0 : Z_FINISH);
 
 		if (!s.avail_out || status == Z_STREAM_END) {
-			if (write_object) {
-				size_t written = s.next_out - obuf;
-
-				/* would we bust the size limit? */
-				if (state->nr_written &&
-				    pack_size_limit_cfg &&
-				    pack_size_limit_cfg < state->offset + written) {
-					git_deflate_abort(&s);
-					return -1;
-				}
-
-				hashwrite(state->f, obuf, written);
-				state->offset += written;
+			size_t written = s.next_out - obuf;
+
+			/* would we bust the size limit? */
+			if (state->nr_written &&
+			    pack_size_limit_cfg &&
+			    pack_size_limit_cfg < state->offset + written) {
+				git_deflate_abort(&s);
+				return -1;
 			}
+
+			hashwrite(state->f, obuf, written);
+			state->offset += written;
 			s.next_out = obuf;
 			s.avail_out = sizeof(obuf);
 		}
@@ -1566,8 +1589,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
  */
 static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
 					   struct object_id *result_oid, int fd,
-					   size_t size, const char *path,
-					   unsigned flags)
+					   size_t size, const char *path)
 {
 	struct transaction_packfile *state = &transaction->packfile;
 	off_t seekback, already_hashed_to;
@@ -1575,7 +1597,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 	unsigned char obuf[16384];
 	unsigned header_len;
 	struct hashfile_checkpoint checkpoint;
-	struct pack_idx_entry *idx = NULL;
+	struct pack_idx_entry *idx;
 
 	seekback = lseek(fd, 0, SEEK_CUR);
 	if (seekback == (off_t)-1)
@@ -1586,33 +1608,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 	transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
 	git_hash_update(&ctx, obuf, header_len);
 
-	/* Note: idx is non-NULL when we are writing */
-	if ((flags & INDEX_WRITE_OBJECT) != 0) {
-		CALLOC_ARRAY(idx, 1);
-
-		prepare_packfile_transaction(transaction, flags);
-		hashfile_checkpoint_init(state->f, &checkpoint);
-	}
+	CALLOC_ARRAY(idx, 1);
+	prepare_packfile_transaction(transaction);
+	hashfile_checkpoint_init(state->f, &checkpoint);
 
 	already_hashed_to = 0;
 
 	while (1) {
-		prepare_packfile_transaction(transaction, flags);
-		if (idx) {
-			hashfile_checkpoint(state->f, &checkpoint);
-			idx->offset = state->offset;
-			crc32_begin(state->f);
-		}
+		prepare_packfile_transaction(transaction);
+		hashfile_checkpoint(state->f, &checkpoint);
+		idx->offset = state->offset;
+		crc32_begin(state->f);
+
 		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
-					 fd, size, path, flags))
+					 fd, size, path))
 			break;
 		/*
 		 * Writing this object to the current pack will make
 		 * it too big; we need to truncate it, start a new
 		 * pack, and write into it.
 		 */
-		if (!idx)
-			BUG("should not happen");
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
 		flush_packfile_transaction(transaction);
@@ -1620,8 +1635,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 			return error("cannot seek back");
 	}
 	git_hash_final_oid(result_oid, &ctx);
-	if (!idx)
-		return 0;
 
 	idx->crc32 = crc32_end(state->f);
 	if (already_written(transaction, result_oid)) {
@@ -1642,7 +1655,7 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 	     int fd, struct stat *st,
 	     enum object_type type, const char *path, unsigned flags)
 {
-	int ret;
+	int ret = 0;
 
 	/*
 	 * Call xsize_t() only when needed to avoid potentially unnecessary
@@ -1659,18 +1672,23 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	} else {
-		struct object_database *odb = the_repository->objects;
-		struct odb_transaction_files *files_transaction;
-		struct odb_transaction *transaction;
-
-		transaction = odb_transaction_begin(odb);
-		files_transaction = container_of(odb->transaction,
-						 struct odb_transaction_files,
-						 base);
-		ret = index_blob_packfile_transaction(files_transaction, oid, fd,
-						      xsize_t(st->st_size),
-						      path, flags);
-		odb_transaction_commit(transaction);
+		if (flags & INDEX_WRITE_OBJECT) {
+			struct object_database *odb = the_repository->objects;
+			struct odb_transaction_files *files_transaction;
+			struct odb_transaction *transaction;
+
+			transaction = odb_transaction_begin(odb);
+			files_transaction = container_of(odb->transaction,
+							 struct odb_transaction_files,
+							 base);
+			ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+						      xsize_t(st->st_size), path);
+			odb_transaction_commit(transaction);
+		} else {
+			if (hash_blob_stream(the_repository->hash_algo, oid, fd,
+					     xsize_t(st->st_size)))
+				die("failed to hash blob");
+		}
 	}
 
 	close(fd);
-- 
2.53.0.381.g628a66ccf6


  parent reply	other threads:[~2026-03-31  3:39 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31  3:38 [PATCH 0/6] odb: add write operation to ODB transaction interface Justin Tobler
2026-03-31  3:38 ` [PATCH 1/6] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 13:56     ` Justin Tobler
2026-03-31 15:58       ` Junio C Hamano
2026-03-31 16:44         ` Justin Tobler
2026-03-31  3:38 ` [PATCH 2/6] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31  3:38 ` Justin Tobler [this message]
2026-03-31  7:48   ` [PATCH 3/6] object-file: remove flags from transaction packfile writes Patrick Steinhardt
2026-03-31 14:10     ` Justin Tobler
2026-03-31  3:38 ` [PATCH 4/6] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:14     ` Justin Tobler
2026-03-31  3:38 ` [PATCH 5/6] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:31     ` Justin Tobler
2026-03-31 22:59       ` Patrick Steinhardt
2026-03-31 23:21         ` Justin Tobler
2026-03-31 23:40           ` Patrick Steinhardt
2026-03-31  3:38 ` [PATCH 6/6] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-03-31  7:48   ` Patrick Steinhardt
2026-03-31 14:40     ` Justin Tobler
2026-04-01  3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
2026-04-01  3:03   ` [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-01  3:03   ` [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-01  3:03   ` [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-04-01 11:23     ` Patrick Steinhardt
2026-04-01  3:03   ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-01 11:23     ` Patrick Steinhardt
2026-04-01 14:02       ` Justin Tobler
2026-04-01  3:03   ` [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-01  3:03   ` [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-01  3:03   ` [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-01 11:24   ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-04-02 21:32   ` [PATCH v3 " Justin Tobler
2026-04-02 21:32     ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-02 21:32     ` [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-02 21:32     ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-11 17:58       ` Jeff King
2026-05-12 15:19         ` Justin Tobler
2026-04-02 21:32     ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-06 20:16       ` Jeff King
2026-04-06 20:19         ` Jeff King
2026-04-02 21:32     ` [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-02 21:32     ` [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-02 21:32     ` [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-08  7:25     ` [PATCH v3 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-05-14 18:37     ` [PATCH v4 " Justin Tobler
2026-05-14 18:37       ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-05-14 18:37       ` [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-05-14 18:37       ` [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-14 18:37       ` [PATCH v4 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-05-14 18:37       ` [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-05-14 18:37       ` [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-05-14 18:37       ` [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-05-15  3:56       ` [PATCH v4 0/7] odb: add write operation to ODB transaction interface Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260331033835.2863514-4-jltobler@gmail.com \
    --to=jltobler@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox