From: Justin Tobler <jltobler@gmail.com>
To: git@vger.kernel.org
Cc: ps@pks.im, Justin Tobler <jltobler@gmail.com>
Subject: [PATCH 3/6] object-file: remove flags from transaction packfile writes
Date: Mon, 30 Mar 2026 22:38:32 -0500 [thread overview]
Message-ID: <20260331033835.2863514-4-jltobler@gmail.com> (raw)
In-Reply-To: <20260331033835.2863514-1-jltobler@gmail.com>
The `index_blob_packfile_transaction()` function handles streaming a
blob from an fd to compute its object ID and conditionally writes the
object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A
subsequent commit will make these packfile object writes part of the
transaction interface. Consequently, having the object write be
conditional on this flag is a bit awkward.
In preparation for this change, introduce a dedicated
`hash_blob_stream()` helper that only computes the OID from the fd. This
is invoked by `index_fd()` instead when the INDEX_WRITE_OBJECT is not
set. The object write performed via `index_blob_packfile_transaction()`
is made unconditional accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 124 +++++++++++++++++++++++++++++---------------------
1 file changed, 71 insertions(+), 53 deletions(-)
diff --git a/object-file.c b/object-file.c
index bfbb632cf8..493173eaf4 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1388,11 +1388,10 @@ static int already_written(struct odb_transaction_files *transaction,
}
/* Lazily create backing packfile for the state */
-static void prepare_packfile_transaction(struct odb_transaction_files *transaction,
- unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction_files *transaction)
{
struct transaction_packfile *state = &transaction->packfile;
- if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+ if (state->f)
return;
state->f = create_tmp_packfile(transaction->base.source->odb->repo,
@@ -1405,6 +1404,34 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
die_errno("unable to write pack header");
}
+static int hash_blob_stream(const struct git_hash_algo *hash_algo,
+ struct object_id *result_oid, int fd, size_t size)
+{
+ unsigned char buf[16384];
+ struct git_hash_ctx ctx;
+ unsigned header_len;
+
+ header_len = format_object_header((char *)buf, sizeof(buf),
+ OBJ_BLOB, size);
+ hash_algo->init_fn(&ctx);
+ git_hash_update(&ctx, buf, header_len);
+
+ while (size) {
+ size_t rsize = size < sizeof(buf) ? size : sizeof(buf);
+ ssize_t read_result = read_in_full(fd, buf, rsize);
+
+ if ((size_t)read_result != rsize)
+ return -1;
+
+ git_hash_update(&ctx, buf, rsize);
+ size -= read_result;
+ }
+
+ git_hash_final_oid(result_oid, &ctx);
+
+ return 0;
+}
+
/*
* Read the contents from fd for size bytes, streaming it to the
* packfile in state while updating the hash in ctx. Signal a failure
@@ -1422,15 +1449,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
*/
static int stream_blob_to_pack(struct transaction_packfile *state,
struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path,
- unsigned flags)
+ int fd, size_t size, const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- int write_object = (flags & INDEX_WRITE_OBJECT);
off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1465,20 +1490,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
status = git_deflate(&s, size ? 0 : Z_FINISH);
if (!s.avail_out || status == Z_STREAM_END) {
- if (write_object) {
- size_t written = s.next_out - obuf;
-
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
- hashwrite(state->f, obuf, written);
- state->offset += written;
+ size_t written = s.next_out - obuf;
+
+ /* would we bust the size limit? */
+ if (state->nr_written &&
+ pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + written) {
+ git_deflate_abort(&s);
+ return -1;
}
+
+ hashwrite(state->f, obuf, written);
+ state->offset += written;
s.next_out = obuf;
s.avail_out = sizeof(obuf);
}
@@ -1566,8 +1589,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
*/
static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
struct object_id *result_oid, int fd,
- size_t size, const char *path,
- unsigned flags)
+ size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
@@ -1575,7 +1597,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
- struct pack_idx_entry *idx = NULL;
+ struct pack_idx_entry *idx;
seekback = lseek(fd, 0, SEEK_CUR);
if (seekback == (off_t)-1)
@@ -1586,33 +1608,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
- /* Note: idx is non-NULL when we are writing */
- if ((flags & INDEX_WRITE_OBJECT) != 0) {
- CALLOC_ARRAY(idx, 1);
-
- prepare_packfile_transaction(transaction, flags);
- hashfile_checkpoint_init(state->f, &checkpoint);
- }
+ CALLOC_ARRAY(idx, 1);
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint_init(state->f, &checkpoint);
already_hashed_to = 0;
while (1) {
- prepare_packfile_transaction(transaction, flags);
- if (idx) {
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
- }
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+
if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path, flags))
+ fd, size, path))
break;
/*
* Writing this object to the current pack will make
* it too big; we need to truncate it, start a new
* pack, and write into it.
*/
- if (!idx)
- BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
flush_packfile_transaction(transaction);
@@ -1620,8 +1635,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
return error("cannot seek back");
}
git_hash_final_oid(result_oid, &ctx);
- if (!idx)
- return 0;
idx->crc32 = crc32_end(state->f);
if (already_written(transaction, result_oid)) {
@@ -1642,7 +1655,7 @@ int index_fd(struct index_state *istate, struct object_id *oid,
int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
{
- int ret;
+ int ret = 0;
/*
* Call xsize_t() only when needed to avoid potentially unnecessary
@@ -1659,18 +1672,23 @@ int index_fd(struct index_state *istate, struct object_id *oid,
ret = index_core(istate, oid, fd, xsize_t(st->st_size),
type, path, flags);
} else {
- struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size),
- path, flags);
- odb_transaction_commit(transaction);
+ if (flags & INDEX_WRITE_OBJECT) {
+ struct object_database *odb = the_repository->objects;
+ struct odb_transaction_files *files_transaction;
+ struct odb_transaction *transaction;
+
+ transaction = odb_transaction_begin(odb);
+ files_transaction = container_of(odb->transaction,
+ struct odb_transaction_files,
+ base);
+ ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+ xsize_t(st->st_size), path);
+ odb_transaction_commit(transaction);
+ } else {
+ if (hash_blob_stream(the_repository->hash_algo, oid, fd,
+ xsize_t(st->st_size)))
+ die("failed to hash blob");
+ }
}
close(fd);
--
2.53.0.381.g628a66ccf6
next prev parent reply other threads:[~2026-03-31 3:39 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-31 3:38 [PATCH 0/6] odb: add write operation to ODB transaction interface Justin Tobler
2026-03-31 3:38 ` [PATCH 1/6] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-03-31 7:48 ` Patrick Steinhardt
2026-03-31 13:56 ` Justin Tobler
2026-03-31 15:58 ` Junio C Hamano
2026-03-31 16:44 ` Justin Tobler
2026-03-31 3:38 ` [PATCH 2/6] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-03-31 7:48 ` Patrick Steinhardt
2026-03-31 3:38 ` Justin Tobler [this message]
2026-03-31 7:48 ` [PATCH 3/6] object-file: remove flags from transaction packfile writes Patrick Steinhardt
2026-03-31 14:10 ` Justin Tobler
2026-03-31 3:38 ` [PATCH 4/6] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-03-31 7:48 ` Patrick Steinhardt
2026-03-31 14:14 ` Justin Tobler
2026-03-31 3:38 ` [PATCH 5/6] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-03-31 7:48 ` Patrick Steinhardt
2026-03-31 14:31 ` Justin Tobler
2026-03-31 22:59 ` Patrick Steinhardt
2026-03-31 23:21 ` Justin Tobler
2026-03-31 23:40 ` Patrick Steinhardt
2026-03-31 3:38 ` [PATCH 6/6] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-03-31 7:48 ` Patrick Steinhardt
2026-03-31 14:40 ` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
2026-04-01 3:03 ` [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-01 3:03 ` [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-04-01 11:23 ` Patrick Steinhardt
2026-04-01 3:03 ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-01 11:23 ` Patrick Steinhardt
2026-04-01 14:02 ` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-01 3:03 ` [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-01 3:03 ` [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-01 11:24 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
2026-04-02 21:32 ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-02 21:32 ` [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-11 17:58 ` Jeff King
2026-05-12 15:19 ` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-04-06 20:16 ` Jeff King
2026-04-06 20:19 ` Jeff King
2026-04-02 21:32 ` [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-04-02 21:32 ` [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-04-02 21:32 ` [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-04-08 7:25 ` [PATCH v3 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
2026-05-14 18:37 ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-05-14 18:37 ` [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
2026-05-14 18:37 ` [PATCH v4 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
2026-05-14 18:37 ` [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
2026-05-14 18:37 ` [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
2026-05-14 18:37 ` [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-05-15 3:56 ` [PATCH v4 0/7] odb: add write operation to ODB transaction interface Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260331033835.2863514-4-jltobler@gmail.com \
--to=jltobler@gmail.com \
--cc=git@vger.kernel.org \
--cc=ps@pks.im \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.