* [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
` (7 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The current ODB transaction interface is colocated with other ODB
interfaces in "odb.{c,h}". Subsequent commits will expand `struct
odb_transaction` to support write operations on the transaction
directly. To keep things organized and prevent "odb.{c,h}" from becoming
more unwieldy, split out `struct odb_transaction` into a separate
header.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Makefile | 1 +
builtin/add.c | 1 +
builtin/unpack-objects.c | 1 +
builtin/update-index.c | 1 +
cache-tree.c | 1 +
meson.build | 1 +
object-file.c | 1 +
odb.c | 25 -------------------------
odb.h | 31 -------------------------------
odb/transaction.c | 28 ++++++++++++++++++++++++++++
odb/transaction.h | 38 ++++++++++++++++++++++++++++++++++++++
read-cache.c | 1 +
12 files changed, 74 insertions(+), 56 deletions(-)
create mode 100644 odb/transaction.c
create mode 100644 odb/transaction.h
diff --git a/Makefile b/Makefile
index dbf0022054..6342db13e5 100644
--- a/Makefile
+++ b/Makefile
@@ -1219,6 +1219,7 @@ LIB_OBJS += odb.o
LIB_OBJS += odb/source.o
LIB_OBJS += odb/source-files.o
LIB_OBJS += odb/streaming.o
+LIB_OBJS += odb/transaction.o
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
diff --git a/builtin/add.c b/builtin/add.c
index 7737ab878b..c859f66519 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -16,6 +16,7 @@
#include "run-command.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 6fc64e9e4b..bc9b1e047e 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -9,6 +9,7 @@
#include "hex.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 8a5907767b..bcc43852ef 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -19,6 +19,7 @@
#include "tree-walk.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "refs.h"
#include "resolve-undo.h"
#include "parse-options.h"
diff --git a/cache-tree.c b/cache-tree.c
index 60bcc07c3b..f056869cfd 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -10,6 +10,7 @@
#include "cache-tree.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "read-cache-ll.h"
#include "replace-object.h"
#include "repository.h"
diff --git a/meson.build b/meson.build
index 8309942d18..6dc23b3af2 100644
--- a/meson.build
+++ b/meson.build
@@ -405,6 +405,7 @@ libgit_sources = [
'odb/source.c',
'odb/source-files.c',
'odb/streaming.c',
+ 'odb/transaction.c',
'oid-array.c',
'oidmap.c',
'oidset.c',
diff --git a/object-file.c b/object-file.c
index f0b029ff0b..bfbb632cf8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -21,6 +21,7 @@
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
+#include "odb/transaction.h"
#include "oidtree.h"
#include "pack.h"
#include "packfile.h"
diff --git a/odb.c b/odb.c
index 350e23f3c0..8c3cbc1b53 100644
--- a/odb.c
+++ b/odb.c
@@ -1069,28 +1069,3 @@ void odb_reprepare(struct object_database *o)
obj_read_unlock();
}
-
-struct odb_transaction *odb_transaction_begin(struct object_database *odb)
-{
- if (odb->transaction)
- return NULL;
-
- odb->transaction = odb_transaction_files_begin(odb->sources);
-
- return odb->transaction;
-}
-
-void odb_transaction_commit(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- /*
- * Ensure the transaction ending matches the pending transaction.
- */
- ASSERT(transaction == transaction->source->odb->transaction);
-
- transaction->commit(transaction);
- transaction->source->odb->transaction = NULL;
- free(transaction);
-}
diff --git a/odb.h b/odb.h
index 9aee260105..ec5367b13e 100644
--- a/odb.h
+++ b/odb.h
@@ -35,24 +35,6 @@ struct packed_git;
struct packfile_store;
struct cached_object_entry;
-/*
- * A transaction may be started for an object database prior to writing new
- * objects via odb_transaction_begin(). These objects are not committed until
- * odb_transaction_commit() is invoked. Only a single transaction may be pending
- * at a time.
- *
- * Each ODB source is expected to implement its own transaction handling.
- */
-struct odb_transaction;
-typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
-struct odb_transaction {
- /* The ODB source the transaction is opened against. */
- struct odb_source *source;
-
- /* The ODB source specific callback invoked to commit a transaction. */
- odb_transaction_commit_fn commit;
-};
-
/*
* The object database encapsulates access to objects in a repository. It
* manages one or more sources that store the actual objects which are
@@ -154,19 +136,6 @@ void odb_close(struct object_database *o);
*/
void odb_reprepare(struct object_database *o);
-/*
- * Starts an ODB transaction. Subsequent objects are written to the transaction
- * and not committed until odb_transaction_commit() is invoked on the
- * transaction. If the ODB already has a pending transaction, NULL is returned.
- */
-struct odb_transaction *odb_transaction_begin(struct object_database *odb);
-
-/*
- * Commits an ODB transaction making the written objects visible. If the
- * specified transaction is NULL, the function is a no-op.
- */
-void odb_transaction_commit(struct odb_transaction *transaction);
-
/*
* Find source by its object directory path. Returns a `NULL` pointer in case
* the source could not be found.
diff --git a/odb/transaction.c b/odb/transaction.c
new file mode 100644
index 0000000000..9bf3f347dc
--- /dev/null
+++ b/odb/transaction.c
@@ -0,0 +1,28 @@
+#include "git-compat-util.h"
+#include "object-file.h"
+#include "odb/transaction.h"
+
+struct odb_transaction *odb_transaction_begin(struct object_database *odb)
+{
+ if (odb->transaction)
+ return NULL;
+
+ odb->transaction = odb_transaction_files_begin(odb->sources);
+
+ return odb->transaction;
+}
+
+void odb_transaction_commit(struct odb_transaction *transaction)
+{
+ if (!transaction)
+ return;
+
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->source->odb->transaction);
+
+ transaction->commit(transaction);
+ transaction->source->odb->transaction = NULL;
+ free(transaction);
+}
diff --git a/odb/transaction.h b/odb/transaction.h
new file mode 100644
index 0000000000..a56e392f21
--- /dev/null
+++ b/odb/transaction.h
@@ -0,0 +1,38 @@
+#ifndef ODB_TRANSACTION_H
+#define ODB_TRANSACTION_H
+
+#include "odb.h"
+#include "odb/source.h"
+
+/*
+ * A transaction may be started for an object database prior to writing new
+ * objects via odb_transaction_begin(). These objects are not committed until
+ * odb_transaction_commit() is invoked. Only a single transaction may be pending
+ * at a time.
+ *
+ * Each ODB source is expected to implement its own transaction handling.
+ */
+struct odb_transaction;
+typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
+struct odb_transaction {
+ /* The ODB source the transaction is opened against. */
+ struct odb_source *source;
+
+ /* The ODB source specific callback invoked to commit a transaction. */
+ odb_transaction_commit_fn commit;
+};
+
+/*
+ * Starts an ODB transaction. Subsequent objects are written to the transaction
+ * and not committed until odb_transaction_commit() is invoked on the
+ * transaction. If the ODB already has a pending transaction, NULL is returned.
+ */
+struct odb_transaction *odb_transaction_begin(struct object_database *odb);
+
+/*
+ * Commits an ODB transaction making the written objects visible. If the
+ * specified transaction is NULL, the function is a no-op.
+ */
+void odb_transaction_commit(struct odb_transaction *transaction);
+
+#endif
diff --git a/read-cache.c b/read-cache.c
index 5049f9baca..8147c7e94a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -20,6 +20,7 @@
#include "dir.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "oid-array.h"
#include "tree.h"
#include "commit.h"
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()`
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
2026-04-01 3:03 ` [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
` (6 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
Each ODB source is expected to provide an ODB transaction implementation
that should be used when starting a transaction. With d6fc6fe6f8
(odb/source: make `begin_transaction()` function pluggable, 2026-03-05),
the `struct odb_source` now provides a pluggable callback for beginning
transactions. Use the callback provided by the ODB source accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
odb/transaction.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/odb/transaction.c b/odb/transaction.c
index 9bf3f347dc..592ac84075 100644
--- a/odb/transaction.c
+++ b/odb/transaction.c
@@ -1,5 +1,5 @@
#include "git-compat-util.h"
-#include "object-file.h"
+#include "odb/source.h"
#include "odb/transaction.h"
struct odb_transaction *odb_transaction_begin(struct object_database *odb)
@@ -7,7 +7,7 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb)
if (odb->transaction)
return NULL;
- odb->transaction = odb_transaction_files_begin(odb->sources);
+ odb_source_begin_transaction(odb->sources, &odb->transaction);
return odb->transaction;
}
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
2026-04-01 3:03 ` [PATCH v2 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-01 3:03 ` [PATCH v2 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 11:23 ` Patrick Steinhardt
2026-04-01 3:03 ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
` (5 subsequent siblings)
8 siblings, 1 reply; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The `read()` callback used by `struct odb_write_stream` currently
returns a pointer to an internal buffer along with the number of bytes
read. This makes buffer ownership unclear and provides no way to report
errors.
Update the interface to instead require the caller to provide a buffer,
and have the callback return the number of bytes written to it or a
negative value on error. Call sites are updated accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/unpack-objects.c | 19 +++++++------------
object-file.c | 13 ++++++++++---
odb.h | 2 +-
3 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index bc9b1e047e..420619e2cb 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -360,24 +360,21 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
struct input_zstream_data {
git_zstream *zstream;
- unsigned char buf[8192];
int status;
};
-static const void *feed_input_zstream(struct odb_write_stream *in_stream,
- unsigned long *readlen)
+static ssize_t feed_input_zstream(struct odb_write_stream *in_stream,
+ unsigned char *buf, size_t buf_len)
{
struct input_zstream_data *data = in_stream->data;
git_zstream *zstream = data->zstream;
void *in = fill(1);
- if (in_stream->is_finished) {
- *readlen = 0;
- return NULL;
- }
+ if (in_stream->is_finished)
+ return 0;
- zstream->next_out = data->buf;
- zstream->avail_out = sizeof(data->buf);
+ zstream->next_out = buf;
+ zstream->avail_out = buf_len;
zstream->next_in = in;
zstream->avail_in = len;
@@ -385,9 +382,7 @@ static const void *feed_input_zstream(struct odb_write_stream *in_stream,
in_stream->is_finished = data->status != Z_OK;
use(len - zstream->avail_in);
- *readlen = sizeof(data->buf) - zstream->avail_out;
-
- return data->buf;
+ return buf_len - zstream->avail_out;
}
static void stream_blob(unsigned long size, unsigned nr)
diff --git a/object-file.c b/object-file.c
index bfbb632cf8..f3038756fc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1066,6 +1066,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
struct git_hash_ctx c, compat_c;
struct strbuf tmp_file = STRBUF_INIT;
struct strbuf filename = STRBUF_INIT;
+ unsigned char buf[8192];
int dirlen;
char hdr[MAX_HEADER_LEN];
int hdrlen;
@@ -1098,9 +1099,15 @@ int odb_source_loose_write_stream(struct odb_source *source,
unsigned char *in0 = stream.next_in;
if (!stream.avail_in && !in_stream->is_finished) {
- const void *in = in_stream->read(in_stream, &stream.avail_in);
- stream.next_in = (void *)in;
- in0 = (unsigned char *)in;
+ ssize_t read_len = in_stream->read(in_stream, buf, sizeof(buf));
+ if (read_len < 0) {
+ err = -1;
+ goto cleanup;
+ }
+
+ stream.avail_in = read_len;
+ stream.next_in = buf;
+ in0 = buf;
/* All data has been read. */
if (in_stream->is_finished)
flush = 1;
diff --git a/odb.h b/odb.h
index ec5367b13e..91ec206eed 100644
--- a/odb.h
+++ b/odb.h
@@ -530,7 +530,7 @@ static inline int odb_write_object(struct object_database *odb,
}
struct odb_write_stream {
- const void *(*read)(struct odb_write_stream *, unsigned long *len);
+ ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t len);
void *data;
int is_finished;
};
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v2 4/7] object-file: remove flags from transaction packfile writes
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
` (2 preceding siblings ...)
2026-04-01 3:03 ` [PATCH v2 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 11:23 ` Patrick Steinhardt
2026-04-01 3:03 ` [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
` (4 subsequent siblings)
8 siblings, 1 reply; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The `index_blob_packfile_transaction()` function handles streaming a
blob from an fd to compute its object ID and conditionally writes the
object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A
subsequent commit will make these packfile object writes part of the
transaction interface. Consequently, having the object write be
conditional on this flag is a bit awkward.
In preparation for this change, introduce a dedicated
`hash_blob_stream()` helper that only computes the OID from a `struct
odb_write_stream`. This is invoked by `index_fd()` instead when the
INDEX_WRITE_OBJECT is not set. The object write performed via
`index_blob_packfile_transaction()` is made unconditional accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 131 +++++++++++++++++++++++++++++-------------------
odb/streaming.c | 40 +++++++++++++++
odb/streaming.h | 8 +++
3 files changed, 127 insertions(+), 52 deletions(-)
diff --git a/object-file.c b/object-file.c
index f3038756fc..f317a24ccf 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1395,11 +1395,10 @@ static int already_written(struct odb_transaction_files *transaction,
}
/* Lazily create backing packfile for the state */
-static void prepare_packfile_transaction(struct odb_transaction_files *transaction,
- unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction_files *transaction)
{
struct transaction_packfile *state = &transaction->packfile;
- if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+ if (state->f)
return;
state->f = create_tmp_packfile(transaction->base.source->odb->repo,
@@ -1412,6 +1411,38 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
die_errno("unable to write pack header");
}
+static int hash_blob_stream(struct odb_write_stream *stream,
+ const struct git_hash_algo *hash_algo,
+ struct object_id *result_oid, size_t size)
+{
+ unsigned char buf[16384];
+ struct git_hash_ctx ctx;
+ unsigned header_len;
+ size_t total = 0;
+
+ header_len = format_object_header((char *)buf, sizeof(buf),
+ OBJ_BLOB, size);
+ hash_algo->init_fn(&ctx);
+ git_hash_update(&ctx, buf, header_len);
+
+ while (!stream->is_finished) {
+ ssize_t read_result = stream->read(stream, buf, sizeof(buf));
+
+ if (read_result < 0)
+ return -1;
+
+ git_hash_update(&ctx, buf, read_result);
+ total += read_result;
+ }
+
+ if (total != size)
+ return -1;
+
+ git_hash_final_oid(result_oid, &ctx);
+
+ return 0;
+}
+
/*
* Read the contents from fd for size bytes, streaming it to the
* packfile in state while updating the hash in ctx. Signal a failure
@@ -1429,15 +1460,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
*/
static int stream_blob_to_pack(struct transaction_packfile *state,
struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path,
- unsigned flags)
+ int fd, size_t size, const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- int write_object = (flags & INDEX_WRITE_OBJECT);
off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1472,20 +1501,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
status = git_deflate(&s, size ? 0 : Z_FINISH);
if (!s.avail_out || status == Z_STREAM_END) {
- if (write_object) {
- size_t written = s.next_out - obuf;
-
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
- hashwrite(state->f, obuf, written);
- state->offset += written;
+ size_t written = s.next_out - obuf;
+
+ /* would we bust the size limit? */
+ if (state->nr_written &&
+ pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + written) {
+ git_deflate_abort(&s);
+ return -1;
}
+
+ hashwrite(state->f, obuf, written);
+ state->offset += written;
s.next_out = obuf;
s.avail_out = sizeof(obuf);
}
@@ -1573,8 +1600,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
*/
static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
struct object_id *result_oid, int fd,
- size_t size, const char *path,
- unsigned flags)
+ size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
@@ -1582,7 +1608,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
- struct pack_idx_entry *idx = NULL;
+ struct pack_idx_entry *idx;
seekback = lseek(fd, 0, SEEK_CUR);
if (seekback == (off_t)-1)
@@ -1593,33 +1619,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
- /* Note: idx is non-NULL when we are writing */
- if ((flags & INDEX_WRITE_OBJECT) != 0) {
- CALLOC_ARRAY(idx, 1);
-
- prepare_packfile_transaction(transaction, flags);
- hashfile_checkpoint_init(state->f, &checkpoint);
- }
+ CALLOC_ARRAY(idx, 1);
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint_init(state->f, &checkpoint);
already_hashed_to = 0;
while (1) {
- prepare_packfile_transaction(transaction, flags);
- if (idx) {
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
- }
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+
if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path, flags))
+ fd, size, path))
break;
/*
* Writing this object to the current pack will make
* it too big; we need to truncate it, start a new
* pack, and write into it.
*/
- if (!idx)
- BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
flush_packfile_transaction(transaction);
@@ -1627,8 +1646,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
return error("cannot seek back");
}
git_hash_final_oid(result_oid, &ctx);
- if (!idx)
- return 0;
idx->crc32 = crc32_end(state->f);
if (already_written(transaction, result_oid)) {
@@ -1666,18 +1683,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
ret = index_core(istate, oid, fd, xsize_t(st->st_size),
type, path, flags);
} else {
- struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size),
- path, flags);
- odb_transaction_commit(transaction);
+ struct odb_write_stream stream = { 0 };
+ odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
+
+ if (flags & INDEX_WRITE_OBJECT) {
+ struct object_database *odb = the_repository->objects;
+ struct odb_transaction_files *files_transaction;
+ struct odb_transaction *transaction;
+
+ transaction = odb_transaction_begin(odb);
+ files_transaction = container_of(odb->transaction,
+ struct odb_transaction_files,
+ base);
+ ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+ xsize_t(st->st_size), path);
+ odb_transaction_commit(transaction);
+ } else {
+ ret = hash_blob_stream(&stream,
+ the_repository->hash_algo, oid,
+ xsize_t(st->st_size));
+ }
+
+ free(stream.data);
}
close(fd);
diff --git a/odb/streaming.c b/odb/streaming.c
index 5927a12954..85187541c5 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -287,3 +287,43 @@ int odb_stream_blob_to_fd(struct object_database *odb,
odb_read_stream_close(st);
return result;
}
+
+struct read_object_fd_data {
+ int fd;
+ size_t remaining;
+};
+
+static ssize_t read_object_fd(struct odb_write_stream *stream,
+ unsigned char *buf, size_t len)
+{
+ struct read_object_fd_data *data = stream->data;
+ ssize_t read_result;
+ size_t count;
+
+ if (stream->is_finished)
+ return 0;
+
+ count = data->remaining < len ? data->remaining : len;
+ read_result = read_in_full(data->fd, buf, count);
+ if (read_result < 0 || (size_t)read_result != count)
+ return -1;
+
+ data->remaining -= count;
+ if (!data->remaining)
+ stream->is_finished = 1;
+
+ return read_result;
+}
+
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size)
+{
+ struct read_object_fd_data *data;
+
+ CALLOC_ARRAY(data, 1);
+ data->fd = fd;
+ data->remaining = size;
+
+ stream->data = data;
+ stream->read = read_object_fd;
+}
diff --git a/odb/streaming.h b/odb/streaming.h
index c7861f7e13..e5232cd4d1 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -5,6 +5,7 @@
#define STREAMING_H 1
#include "object.h"
+#include "odb.h"
struct object_database;
struct odb_read_stream;
@@ -64,4 +65,11 @@ int odb_stream_blob_to_fd(struct object_database *odb,
struct stream_filter *filter,
int can_seek);
+/*
+ * Sets up an ODB write stream that reads from an fd. The caller is expected to
+ * free the underlying stream data.
+ */
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size);
+
#endif /* STREAMING_H */
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* Re: [PATCH v2 4/7] object-file: remove flags from transaction packfile writes
2026-04-01 3:03 ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
@ 2026-04-01 11:23 ` Patrick Steinhardt
2026-04-01 14:02 ` Justin Tobler
0 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2026-04-01 11:23 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, gitster
On Tue, Mar 31, 2026 at 10:03:12PM -0500, Justin Tobler wrote:
> diff --git a/object-file.c b/object-file.c
> index f3038756fc..f317a24ccf 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1412,6 +1411,38 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
> die_errno("unable to write pack header");
> }
>
> +static int hash_blob_stream(struct odb_write_stream *stream,
> + const struct git_hash_algo *hash_algo,
> + struct object_id *result_oid, size_t size)
> +{
> + unsigned char buf[16384];
> + struct git_hash_ctx ctx;
> + unsigned header_len;
> + size_t total = 0;
One nit: I think `total` and `size` don't really give a good sense of
which variable tracks what. If this was instead `bytes_hashed` and
`size` it would become a lot more obvious.
> @@ -1666,18 +1683,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
> ret = index_core(istate, oid, fd, xsize_t(st->st_size),
> type, path, flags);
> } else {
> - struct object_database *odb = the_repository->objects;
> - struct odb_transaction_files *files_transaction;
> - struct odb_transaction *transaction;
> -
> - transaction = odb_transaction_begin(odb);
> - files_transaction = container_of(odb->transaction,
> - struct odb_transaction_files,
> - base);
> - ret = index_blob_packfile_transaction(files_transaction, oid, fd,
> - xsize_t(st->st_size),
> - path, flags);
> - odb_transaction_commit(transaction);
> + struct odb_write_stream stream = { 0 };
> + odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
I would assume that `odb_write_stream_from_fd()` knows to fully
initialize the stream, so zero-initializing shouldn't be necessary,
right?
> diff --git a/odb/streaming.h b/odb/streaming.h
> index c7861f7e13..e5232cd4d1 100644
> --- a/odb/streaming.h
> +++ b/odb/streaming.h
> @@ -5,6 +5,7 @@
> #define STREAMING_H 1
>
> #include "object.h"
> +#include "odb.h"
>
> struct object_database;
> struct odb_read_stream;
> @@ -64,4 +65,11 @@ int odb_stream_blob_to_fd(struct object_database *odb,
> struct stream_filter *filter,
> int can_seek);
>
> +/*
> + * Sets up an ODB write stream that reads from an fd. The caller is expected to
> + * free the underlying stream data.
> + */
Hm. Shouldn't we provide an interface that let's the caller do this
without having to know about the stream's internals, like
`odb_write_stream_release()`?
Patrick
^ permalink raw reply [flat|nested] 57+ messages in thread* Re: [PATCH v2 4/7] object-file: remove flags from transaction packfile writes
2026-04-01 11:23 ` Patrick Steinhardt
@ 2026-04-01 14:02 ` Justin Tobler
0 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 14:02 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, gitster
On 26/04/01 01:23PM, Patrick Steinhardt wrote:
> On Tue, Mar 31, 2026 at 10:03:12PM -0500, Justin Tobler wrote:
> > diff --git a/object-file.c b/object-file.c
> > index f3038756fc..f317a24ccf 100644
> > --- a/object-file.c
> > +++ b/object-file.c
> > @@ -1412,6 +1411,38 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
> > die_errno("unable to write pack header");
> > }
> >
> > +static int hash_blob_stream(struct odb_write_stream *stream,
> > + const struct git_hash_algo *hash_algo,
> > + struct object_id *result_oid, size_t size)
> > +{
> > + unsigned char buf[16384];
> > + struct git_hash_ctx ctx;
> > + unsigned header_len;
> > + size_t total = 0;
>
> One nit: I think `total` and `size` don't really give a good sense of
> which variable tracks what. If this was instead `bytes_hashed` and
> `size` it would become a lot more obvious.
That's fair. I'll update the names in the next version.
> > @@ -1666,18 +1683,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
> > ret = index_core(istate, oid, fd, xsize_t(st->st_size),
> > type, path, flags);
> > } else {
> > - struct object_database *odb = the_repository->objects;
> > - struct odb_transaction_files *files_transaction;
> > - struct odb_transaction *transaction;
> > -
> > - transaction = odb_transaction_begin(odb);
> > - files_transaction = container_of(odb->transaction,
> > - struct odb_transaction_files,
> > - base);
> > - ret = index_blob_packfile_transaction(files_transaction, oid, fd,
> > - xsize_t(st->st_size),
> > - path, flags);
> > - odb_transaction_commit(transaction);
> > + struct odb_write_stream stream = { 0 };
> > + odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
>
> I would assume that `odb_write_stream_from_fd()` knows to fully
> initialize the stream, so zero-initializing shouldn't be necessary,
> right?
Currently the `is_finished` field is not being initialized, but there
isn't really any reason we couldn't do that in
`odb_write_stream_from_fd()` though. Will update accordingly.
> > diff --git a/odb/streaming.h b/odb/streaming.h
> > index c7861f7e13..e5232cd4d1 100644
> > --- a/odb/streaming.h
> > +++ b/odb/streaming.h
> > @@ -5,6 +5,7 @@
> > #define STREAMING_H 1
> >
> > #include "object.h"
> > +#include "odb.h"
> >
> > struct object_database;
> > struct odb_read_stream;
> > @@ -64,4 +65,11 @@ int odb_stream_blob_to_fd(struct object_database *odb,
> > struct stream_filter *filter,
> > int can_seek);
> >
> > +/*
> > + * Sets up an ODB write stream that reads from an fd. The caller is expected to
> > + * free the underlying stream data.
> > + */
>
> Hm. Shouldn't we provide an interface that let's the caller do this
> without having to know about the stream's internals, like
> `odb_write_stream_release()`?
I thought about doint this, but hesitated because the other `struct
odb_write_stream` usage sets up its `data` field differently and
consequently would have no need for a `odb_write_stream_release()`. I'm
probably overthinking this though and it probably makes sense just to
add it.
Thanks,
-Justin
^ permalink raw reply [flat|nested] 57+ messages in thread
* [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
` (3 preceding siblings ...)
2026-04-01 3:03 ` [PATCH v2 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
` (3 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
In certain scenarios, Git handles writing blobs that exceed
"core.bigFileThreshold" differently by streaming the object directly
into a packfile. When there is an active ODB transaction, these blobs
are streamed to the same packfile instead of using a separate packfile
for each. If "pack.packSizeLimit" is configured and streaming another
object causes the packfile to exceed the configured limit, the packfile
is truncated back to the previous object and the object write is
restarted in a new packfile.
This works fine, but requires the fd being read from to save a
checkpoint so it becomes possible to rewind the input source via seeking
back to a known offset at the beginning. In a subsequent commit, blob
streaming is converted to use `struct odb_write_stream` as a more
generic input source instead of an fd which doesn't provide a mechanism
for rewinding.
For this use case though, rewinding the fd is not strictly necessary
because the inflated size of the object is known and can be used to
approximate whether writing the object would cause the packfile to
exceed the configured limit prior to writing anything. These blobs
written to the packfile are never deltified thus the size difference
between what is written versus the inflated size is due to zlib
compression. While this does prevent packfiles from being filled to the
potential maximum is some cases, it should be good enough and still
prevents the packfile from exceeding any configured limit.
Use the inflated blob size to determine whether writing an object to a
packfile will exceed the configured "pack.packSizeLimit".
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 86 +++++++++++++++------------------------------------
1 file changed, 25 insertions(+), 61 deletions(-)
diff --git a/object-file.c b/object-file.c
index f317a24ccf..23229fbd95 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1445,29 +1445,17 @@ static int hash_blob_stream(struct odb_write_stream *stream,
/*
* Read the contents from fd for size bytes, streaming it to the
- * packfile in state while updating the hash in ctx. Signal a failure
- * by returning a negative value when the resulting pack would exceed
- * the pack size limit and this is not the first object in the pack,
- * so that the caller can discard what we wrote from the current pack
- * by truncating it and opening a new one. The caller will then call
- * us again after rewinding the input fd.
- *
- * The already_hashed_to pointer is kept untouched by the caller to
- * make sure we do not hash the same byte when we are called
- * again. This way, the caller does not have to checkpoint its hash
- * status before calling us just in case we ask it to call us again
- * with a new pack.
+ * packfile in state while updating the hash in ctx.
*/
-static int stream_blob_to_pack(struct transaction_packfile *state,
- struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path)
+static void stream_blob_to_pack(struct transaction_packfile *state,
+ struct git_hash_ctx *ctx, int fd, size_t size,
+ const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1484,15 +1472,9 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
if ((size_t)read_result != rsize)
die("failed to read %u bytes from '%s'",
(unsigned)rsize, path);
- offset += rsize;
- if (*already_hashed_to < offset) {
- size_t hsize = offset - *already_hashed_to;
- if (rsize < hsize)
- hsize = rsize;
- if (hsize)
- git_hash_update(ctx, ibuf, hsize);
- *already_hashed_to = offset;
- }
+
+ git_hash_update(ctx, ibuf, rsize);
+
s.next_in = ibuf;
s.avail_in = rsize;
size -= rsize;
@@ -1503,14 +1485,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
if (!s.avail_out || status == Z_STREAM_END) {
size_t written = s.next_out - obuf;
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
hashwrite(state->f, obuf, written);
state->offset += written;
s.next_out = obuf;
@@ -1527,7 +1501,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
}
}
git_deflate_end(&s);
- return 0;
}
static void flush_packfile_transaction(struct odb_transaction_files *transaction)
@@ -1603,48 +1576,39 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
- off_t seekback, already_hashed_to;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
struct pack_idx_entry *idx;
- seekback = lseek(fd, 0, SEEK_CUR);
- if (seekback == (off_t)-1)
- return error("cannot find the current offset");
-
header_len = format_object_header((char *)obuf, sizeof(obuf),
OBJ_BLOB, size);
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
+ /*
+ * If writing another object to the packfile could result in it
+ * exceeding the configured size limit, flush the current packfile
+ * transaction.
+ *
+ * Note that this uses the inflated object size as an approximation.
+ * Blob objects written in this manner are not delta-compressed, so
+ * the difference between the inflated and on-disk size is limited
+ * to zlib compression and is sufficient for this check.
+ */
+ if (state->nr_written && pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + size)
+ flush_packfile_transaction(transaction);
+
CALLOC_ARRAY(idx, 1);
prepare_packfile_transaction(transaction);
hashfile_checkpoint_init(state->f, &checkpoint);
- already_hashed_to = 0;
-
- while (1) {
- prepare_packfile_transaction(transaction);
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
-
- if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path))
- break;
- /*
- * Writing this object to the current pack will make
- * it too big; we need to truncate it, start a new
- * pack, and write into it.
- */
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- flush_packfile_transaction(transaction);
- if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
- return error("cannot seek back");
- }
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+ stream_blob_to_pack(state, &ctx, fd, size, path);
git_hash_final_oid(result_oid, &ctx);
idx->crc32 = crc32_end(state->f);
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
` (4 preceding siblings ...)
2026-04-01 3:03 ` [PATCH v2 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 3:03 ` [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
` (2 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The `index_blob_packfile_transaction()` function streams blob data
directly from an fd. This makes it difficult to reuse as part of a
generic transactional object writing interface.
Refactor the packfile write path to operate on a `struct
odb_write_stream`, allowing callers to supply data from arbitrary
sources.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 55 +++++++++++++++++++++++++++------------------------
1 file changed, 29 insertions(+), 26 deletions(-)
diff --git a/object-file.c b/object-file.c
index 23229fbd95..f7e830c4ec 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1444,18 +1444,19 @@ static int hash_blob_stream(struct odb_write_stream *stream,
}
/*
- * Read the contents from fd for size bytes, streaming it to the
+ * Read the contents from the stream provided, streaming it to the
* packfile in state while updating the hash in ctx.
*/
static void stream_blob_to_pack(struct transaction_packfile *state,
- struct git_hash_ctx *ctx, int fd, size_t size,
- const char *path)
+ struct git_hash_ctx *ctx, size_t size,
+ struct odb_write_stream *stream)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
+ size_t total = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1464,23 +1465,20 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
s.avail_out = sizeof(obuf) - hdrlen;
while (status != Z_STREAM_END) {
- if (size && !s.avail_in) {
- size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
- ssize_t read_result = read_in_full(fd, ibuf, rsize);
- if (read_result < 0)
- die_errno("failed to read from '%s'", path);
- if ((size_t)read_result != rsize)
- die("failed to read %u bytes from '%s'",
- (unsigned)rsize, path);
+ if (!stream->is_finished && !s.avail_in) {
+ ssize_t rsize = stream->read(stream, ibuf, sizeof(ibuf));
+
+ if (rsize < 0)
+ die("failed to read blob data");
git_hash_update(ctx, ibuf, rsize);
s.next_in = ibuf;
s.avail_in = rsize;
- size -= rsize;
+ total += rsize;
}
- status = git_deflate(&s, size ? 0 : Z_FINISH);
+ status = git_deflate(&s, stream->is_finished ? Z_FINISH : 0);
if (!s.avail_out || status == Z_STREAM_END) {
size_t written = s.next_out - obuf;
@@ -1500,6 +1498,11 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
die("unexpected deflate failure: %d", status);
}
}
+
+ if (total != size)
+ die("read %" PRIuMAX " bytes of blob data, but expected %" PRIuMAX " bytes",
+ (uintmax_t)total, (uintmax_t)size);
+
git_deflate_end(&s);
}
@@ -1571,10 +1574,13 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
- struct object_id *result_oid, int fd,
- size_t size, const char *path)
+static int index_blob_packfile_transaction(struct odb_transaction *base,
+ struct odb_write_stream *stream,
+ size_t size, struct object_id *result_oid)
{
+ struct odb_transaction_files *transaction = container_of(base,
+ struct odb_transaction_files,
+ base);
struct transaction_packfile *state = &transaction->packfile;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
@@ -1608,7 +1614,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
crc32_begin(state->f);
- stream_blob_to_pack(state, &ctx, fd, size, path);
+ stream_blob_to_pack(state, &ctx, size, stream);
git_hash_final_oid(result_oid, &ctx);
idx->crc32 = crc32_end(state->f);
@@ -1652,15 +1658,12 @@ int index_fd(struct index_state *istate, struct object_id *oid,
if (flags & INDEX_WRITE_OBJECT) {
struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size), path);
+ struct odb_transaction *transaction = odb_transaction_begin(odb);
+
+ ret = index_blob_packfile_transaction(odb->transaction,
+ &stream,
+ xsize_t(st->st_size),
+ oid);
odb_transaction_commit(transaction);
} else {
ret = hash_blob_stream(&stream,
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
` (5 preceding siblings ...)
2026-04-01 3:03 ` [PATCH v2 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
@ 2026-04-01 3:03 ` Justin Tobler
2026-04-01 11:24 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-01 3:03 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
How an ODB transaction handles writing objects is expected to vary
between implementations. Introduce a new `write_object_stream()`
callback in `struct odb_transaction` to make this function pluggable.
Wire up `index_blob_packfile_transaction()` for use with `struct
odb_transaction_files` accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 16 +++++++++-------
odb/transaction.c | 7 +++++++
odb/transaction.h | 25 ++++++++++++++++++++++---
3 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/object-file.c b/object-file.c
index f7e830c4ec..45ed87c4d9 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1574,9 +1574,10 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_packfile_transaction(struct odb_transaction *base,
- struct odb_write_stream *stream,
- size_t size, struct object_id *result_oid)
+static int odb_transaction_files_write_object_stream(struct odb_transaction *base,
+ struct odb_write_stream *stream,
+ size_t size,
+ struct object_id *result_oid)
{
struct odb_transaction_files *transaction = container_of(base,
struct odb_transaction_files,
@@ -1660,10 +1661,10 @@ int index_fd(struct index_state *istate, struct object_id *oid,
struct object_database *odb = the_repository->objects;
struct odb_transaction *transaction = odb_transaction_begin(odb);
- ret = index_blob_packfile_transaction(odb->transaction,
- &stream,
- xsize_t(st->st_size),
- oid);
+ ret = odb_transaction_write_object_stream(odb->transaction,
+ &stream,
+ xsize_t(st->st_size),
+ oid);
odb_transaction_commit(transaction);
} else {
ret = hash_blob_stream(&stream,
@@ -2128,6 +2129,7 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
transaction = xcalloc(1, sizeof(*transaction));
transaction->base.source = source;
transaction->base.commit = odb_transaction_files_commit;
+ transaction->base.write_object_stream = odb_transaction_files_write_object_stream;
return &transaction->base;
}
diff --git a/odb/transaction.c b/odb/transaction.c
index 592ac84075..b16e07aebf 100644
--- a/odb/transaction.c
+++ b/odb/transaction.c
@@ -26,3 +26,10 @@ void odb_transaction_commit(struct odb_transaction *transaction)
transaction->source->odb->transaction = NULL;
free(transaction);
}
+
+int odb_transaction_write_object_stream(struct odb_transaction *transaction,
+ struct odb_write_stream *stream,
+ size_t len, struct object_id *oid)
+{
+ return transaction->write_object_stream(transaction, stream, len, oid);
+}
diff --git a/odb/transaction.h b/odb/transaction.h
index a56e392f21..854fda06f5 100644
--- a/odb/transaction.h
+++ b/odb/transaction.h
@@ -12,14 +12,24 @@
*
* Each ODB source is expected to implement its own transaction handling.
*/
-struct odb_transaction;
-typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
struct odb_transaction {
/* The ODB source the transaction is opened against. */
struct odb_source *source;
/* The ODB source specific callback invoked to commit a transaction. */
- odb_transaction_commit_fn commit;
+ void (*commit)(struct odb_transaction *transaction);
+
+ /*
+ * This callback is expected to write the given object stream into
+ * the ODB transaction. Note that for now, only blobs support streaming.
+ *
+ * The resulting object ID shall be written into the out pointer. The
+ * callback is expected to return 0 on success, a negative error code
+ * otherwise.
+ */
+ int (*write_object_stream)(struct odb_transaction *transaction,
+ struct odb_write_stream *stream, size_t len,
+ struct object_id *oid);
};
/*
@@ -35,4 +45,13 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb);
*/
void odb_transaction_commit(struct odb_transaction *transaction);
+/*
+ * Writes the object in the provided stream into the transaction. The resulting
+ * object ID is written into the out pointer. Returns 0 on success, a negative
+ * error code otherwise.
+ */
+int odb_transaction_write_object_stream(struct odb_transaction *transaction,
+ struct odb_write_stream *stream,
+ size_t len, struct object_id *oid);
+
#endif
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* Re: [PATCH v2 0/7] odb: add write operation to ODB transaction interface
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
` (6 preceding siblings ...)
2026-04-01 3:03 ` [PATCH v2 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
@ 2026-04-01 11:24 ` Patrick Steinhardt
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
8 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2026-04-01 11:24 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, gitster
On Tue, Mar 31, 2026 at 10:03:08PM -0500, Justin Tobler wrote:
> Greetings,
>
> This series lays the groundwork for introducing write operations to the
> ODB transaction interface. The eventual goal is for all object writes
> performed within a transaction to go through this interface explicitly,
> rather than implicitly relying on the transaction to reconfigure ODB
> sources so that writes are redirected to a temporary location.
>
> For now, only `odb_transaction_write_object_stream()` is implemented and
> wires up the existing logic for streaming "large" blobs directly into a
> packfile as part of the transaction.
>
> Most of the patches are structural refactorings to enable this, but
> patch 4 introduces a behavioral change in how packfiles that would
> exceed "pack.packSizeLimit" are handled.
>
> Changes since V1:
> - Fixed some typos
> - Improved error handling
> - Removed unnecessary guard statement
> - Documented in comments why inflated object size is used to approximate
> if object write will exceed "pack.packSizeLimit".
> - Updated `struct odb_write_stream` read() callback to support returning
> errors and using caller provided buffer
> - Updated the `hash_blob_stream()` function signature to operate on a
> `struct odb_write_stream` instead of an fd directly
> - Renamed some variables/functions for better clarity
Thanks. I've had some smaller nits, but overall I'm happy with this
state.
Patrick
^ permalink raw reply [flat|nested] 57+ messages in thread* [PATCH v3 0/7] odb: add write operation to ODB transaction interface
2026-04-01 3:03 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Justin Tobler
` (7 preceding siblings ...)
2026-04-01 11:24 ` [PATCH v2 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
` (8 more replies)
8 siblings, 9 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
Greetings,
This series lays the groundwork for introducing write operations to the
ODB transaction interface. The eventual goal is for all object writes
performed within a transaction to go through this interface explicitly,
rather than implicitly relying on the transaction to reconfigure ODB
sources so that writes are redirected to a temporary location.
For now, only `odb_transaction_write_object_stream()` is implemented and
wires up the existing logic for streaming "large" blobs directly into a
packfile as part of the transaction.
Most of the patches are structural refactorings to enable this, but
patch 4 introduces a behavioral change in how packfiles that would
exceed "pack.packSizeLimit" are handled.
Changes since V2:
- Renamed some variables to improve clarity
- Make `odb_write_stream_from_fd()` fully initialize the underlying
`struct odb_write_stream`
- Move `struct odb_write_stream` to "odb/streaming.h"
- Make the `hash_blob_stream()` helper more generic by operating on a
`struct odb_write_stream` instead of reading from an fd directly.
- Introduce an `odb_write_stream_release()` helper to free the
underlying stream data.
Changes since V1:
- Fixed some typos
- Improved error handling
- Removed unnecessary guard statement
- Documented in comments why inflated object size is used to approximate
if object write will exceed "pack.packSizeLimit".
- Updated `struct odb_write_stream` read() callback to support returning
errors and using caller provided buffer
- Updated the `hash_blob_stream()` function signature to operate on a
`struct odb_write_stream` instead of an fd directly
- Renamed some variables/functions for better clarity
Thanks,
-Justin
Justin Tobler (7):
odb: split `struct odb_transaction` into separate header
odb/transaction: use pluggable `begin_transaction()`
odb: update `struct odb_write_stream` read() callback
object-file: remove flags from transaction packfile writes
object-file: avoid fd seekback by checking object size upfront
object-file: generalize packfile writes to use odb_write_stream
odb/transaction: make `write_object_stream()` pluggable
Makefile | 1 +
builtin/add.c | 1 +
builtin/unpack-objects.c | 21 ++--
builtin/update-index.c | 1 +
cache-tree.c | 1 +
meson.build | 1 +
object-file.c | 237 ++++++++++++++++++++-------------------
odb.c | 25 -----
odb.h | 37 +-----
odb/streaming.c | 51 +++++++++
odb/streaming.h | 30 +++++
odb/transaction.c | 35 ++++++
odb/transaction.h | 57 ++++++++++
read-cache.c | 1 +
14 files changed, 311 insertions(+), 188 deletions(-)
create mode 100644 odb/transaction.c
create mode 100644 odb/transaction.h
Range-diff against v2:
1: eee372b426 = 1: eee372b426 odb: split `struct odb_transaction` into separate header
2: 57ac075560 = 2: 57ac075560 odb/transaction: use pluggable `begin_transaction()`
3: 556f003d0a ! 3: 11321ad607 odb: update `struct odb_write_stream` read() callback
@@ Commit message
Update the interface to instead require the caller to provide a buffer,
and have the callback return the number of bytes written to it or a
- negative value on error. Call sites are updated accordingly.
+ negative value on error. While at it, also move the `struct
+ odb_write_stream` definition to "odb/streaming.h". Call sites are
+ updated accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
## builtin/unpack-objects.c ##
+@@
+ #include "hex.h"
+ #include "object-file.h"
+ #include "odb.h"
++#include "odb/streaming.h"
+ #include "odb/transaction.h"
+ #include "object.h"
+ #include "delta.h"
@@ builtin/unpack-objects.c: static void unpack_non_delta_entry(enum object_type type, unsigned long size,
struct input_zstream_data {
@@ object-file.c: int odb_source_loose_write_stream(struct odb_source *source,
- const void *in = in_stream->read(in_stream, &stream.avail_in);
- stream.next_in = (void *)in;
- in0 = (unsigned char *)in;
-+ ssize_t read_len = in_stream->read(in_stream, buf, sizeof(buf));
++ ssize_t read_len = odb_write_stream_read(in_stream, buf,
++ sizeof(buf));
+ if (read_len < 0) {
+ err = -1;
+ goto cleanup;
@@ object-file.c: int odb_source_loose_write_stream(struct odb_source *source,
## odb.h ##
@@ odb.h: static inline int odb_write_object(struct object_database *odb,
+ return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0);
}
- struct odb_write_stream {
+-struct odb_write_stream {
- const void *(*read)(struct odb_write_stream *, unsigned long *len);
-+ ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t len);
- void *data;
- int is_finished;
- };
+- void *data;
+- int is_finished;
+-};
++struct odb_write_stream;
+
+ int odb_write_object_stream(struct object_database *odb,
+ struct odb_write_stream *stream, size_t len,
+
+ ## odb/streaming.c ##
+@@ odb/streaming.c: struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+ return st;
+ }
+
++ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
++{
++ return st->read(st, buf, sz);
++}
++
+ int odb_stream_blob_to_fd(struct object_database *odb,
+ int fd,
+ const struct object_id *oid,
+
+ ## odb/streaming.h ##
+@@ odb/streaming.h: int odb_read_stream_close(struct odb_read_stream *stream);
+ */
+ ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);
+
++/*
++ * A stream that provides an object to be written to the object database without
++ * loading all of it into memory.
++ */
++struct odb_write_stream {
++ ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t);
++ void *data;
++ int is_finished;
++};
++
++/*
++ * Read data from the stream into the buffer. Returns 0 when finished and the
++ * number of bytes read on success. Returns a negative error code in case
++ * reading from the stream fails.
++ */
++ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
++ size_t len);
++
+ /*
+ * Look up the object by its ID and write the full contents to the file
+ * descriptor. The object must be a blob, or the function will fail. When
4: a9f0e5ad8a ! 4: 72d4656eee object-file: remove flags from transaction packfile writes
@@ object-file.c: static void prepare_packfile_transaction(struct odb_transaction_f
+ unsigned char buf[16384];
+ struct git_hash_ctx ctx;
+ unsigned header_len;
-+ size_t total = 0;
++ size_t bytes_hashed = 0;
+
+ header_len = format_object_header((char *)buf, sizeof(buf),
+ OBJ_BLOB, size);
@@ object-file.c: static void prepare_packfile_transaction(struct odb_transaction_f
+ git_hash_update(&ctx, buf, header_len);
+
+ while (!stream->is_finished) {
-+ ssize_t read_result = stream->read(stream, buf, sizeof(buf));
++ ssize_t read_result = odb_write_stream_read(stream, buf,
++ sizeof(buf));
+
+ if (read_result < 0)
+ return -1;
+
+ git_hash_update(&ctx, buf, read_result);
-+ total += read_result;
++ bytes_hashed += read_result;
+ }
+
-+ if (total != size)
++ if (bytes_hashed != size)
+ return -1;
+
+ git_hash_final_oid(result_oid, &ctx);
@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
- xsize_t(st->st_size),
- path, flags);
- odb_transaction_commit(transaction);
-+ struct odb_write_stream stream = { 0 };
++ struct odb_write_stream stream;
+ odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
+
+ if (flags & INDEX_WRITE_OBJECT) {
@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
+ xsize_t(st->st_size));
+ }
+
-+ free(stream.data);
++ odb_write_stream_release(&stream);
}
close(fd);
## odb/streaming.c ##
+@@ odb/streaming.c: ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
+ return st->read(st, buf, sz);
+ }
+
++void odb_write_stream_release(struct odb_write_stream *st)
++{
++ free(st->data);
++}
++
+ int odb_stream_blob_to_fd(struct object_database *odb,
+ int fd,
+ const struct object_id *oid,
@@ odb/streaming.c: int odb_stream_blob_to_fd(struct object_database *odb,
odb_read_stream_close(st);
return result;
@@ odb/streaming.c: int odb_stream_blob_to_fd(struct object_database *odb,
+
+ stream->data = data;
+ stream->read = read_object_fd;
++ stream->is_finished = 0;
+}
## odb/streaming.h ##
@@ odb/streaming.h
struct object_database;
struct odb_read_stream;
+@@ odb/streaming.h: struct odb_write_stream {
+ ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
+ size_t len);
+
++/*
++ * Releases memory allocated for underlying stream data.
++ */
++void odb_write_stream_release(struct odb_write_stream *stream);
++
+ /*
+ * Look up the object by its ID and write the full contents to the file
+ * descriptor. The object must be a blob, or the function will fail. When
@@ odb/streaming.h: int odb_stream_blob_to_fd(struct object_database *odb,
struct stream_filter *filter,
int can_seek);
+/*
-+ * Sets up an ODB write stream that reads from an fd. The caller is expected to
-+ * free the underlying stream data.
++ * Sets up an ODB write stream that reads from an fd.
+ */
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size);
5: b7ac82ed7e = 5: e4896101ff object-file: avoid fd seekback by checking object size upfront
6: d6c4187a0f ! 6: b3cb0a707c object-file: generalize packfile writes to use odb_write_stream
@@ object-file.c: static int hash_blob_stream(struct odb_write_stream *stream,
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
-+ size_t total = 0;
++ size_t bytes_read = 0;
git_deflate_init(&s, pack_compression_level);
@@ object-file.c: static void stream_blob_to_pack(struct transaction_packfile *stat
- die("failed to read %u bytes from '%s'",
- (unsigned)rsize, path);
+ if (!stream->is_finished && !s.avail_in) {
-+ ssize_t rsize = stream->read(stream, ibuf, sizeof(ibuf));
++ ssize_t rsize = odb_write_stream_read(stream, ibuf,
++ sizeof(ibuf));
+
+ if (rsize < 0)
+ die("failed to read blob data");
@@ object-file.c: static void stream_blob_to_pack(struct transaction_packfile *stat
s.next_in = ibuf;
s.avail_in = rsize;
- size -= rsize;
-+ total += rsize;
++ bytes_read += rsize;
}
- status = git_deflate(&s, size ? 0 : Z_FINISH);
@@ object-file.c: static void stream_blob_to_pack(struct transaction_packfile *stat
}
}
+
-+ if (total != size)
++ if (bytes_read != size)
+ die("read %" PRIuMAX " bytes of blob data, but expected %" PRIuMAX " bytes",
-+ (uintmax_t)total, (uintmax_t)size);
++ (uintmax_t)bytes_read, (uintmax_t)size);
+
git_deflate_end(&s);
}
7: 2b81e94677 ! 7: e1d292a7ed odb/transaction: make `write_object_stream()` pluggable
@@ Commit message
How an ODB transaction handles writing objects is expected to vary
between implementations. Introduce a new `write_object_stream()`
callback in `struct odb_transaction` to make this function pluggable.
- Wire up `index_blob_packfile_transaction()` for use with `struct
- odb_transaction_files` accordingly.
+ Rename `index_blob_packfile_transaction()` to
+ `odb_transaction_files_write_object_stream()` and wire it up for use
+ with `struct odb_transaction_files` accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
base-commit: 5361983c075154725be47b65cca9a2421789e410
--
2.53.0.381.g628a66ccf6
^ permalink raw reply [flat|nested] 57+ messages in thread* [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
` (7 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The current ODB transaction interface is colocated with other ODB
interfaces in "odb.{c,h}". Subsequent commits will expand `struct
odb_transaction` to support write operations on the transaction
directly. To keep things organized and prevent "odb.{c,h}" from becoming
more unwieldy, split out `struct odb_transaction` into a separate
header.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Makefile | 1 +
builtin/add.c | 1 +
builtin/unpack-objects.c | 1 +
builtin/update-index.c | 1 +
cache-tree.c | 1 +
meson.build | 1 +
object-file.c | 1 +
odb.c | 25 -------------------------
odb.h | 31 -------------------------------
odb/transaction.c | 28 ++++++++++++++++++++++++++++
odb/transaction.h | 38 ++++++++++++++++++++++++++++++++++++++
read-cache.c | 1 +
12 files changed, 74 insertions(+), 56 deletions(-)
create mode 100644 odb/transaction.c
create mode 100644 odb/transaction.h
diff --git a/Makefile b/Makefile
index dbf0022054..6342db13e5 100644
--- a/Makefile
+++ b/Makefile
@@ -1219,6 +1219,7 @@ LIB_OBJS += odb.o
LIB_OBJS += odb/source.o
LIB_OBJS += odb/source-files.o
LIB_OBJS += odb/streaming.o
+LIB_OBJS += odb/transaction.o
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
diff --git a/builtin/add.c b/builtin/add.c
index 7737ab878b..c859f66519 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -16,6 +16,7 @@
#include "run-command.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 6fc64e9e4b..bc9b1e047e 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -9,6 +9,7 @@
#include "hex.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 8a5907767b..bcc43852ef 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -19,6 +19,7 @@
#include "tree-walk.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "refs.h"
#include "resolve-undo.h"
#include "parse-options.h"
diff --git a/cache-tree.c b/cache-tree.c
index 60bcc07c3b..f056869cfd 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -10,6 +10,7 @@
#include "cache-tree.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "read-cache-ll.h"
#include "replace-object.h"
#include "repository.h"
diff --git a/meson.build b/meson.build
index 8309942d18..6dc23b3af2 100644
--- a/meson.build
+++ b/meson.build
@@ -405,6 +405,7 @@ libgit_sources = [
'odb/source.c',
'odb/source-files.c',
'odb/streaming.c',
+ 'odb/transaction.c',
'oid-array.c',
'oidmap.c',
'oidset.c',
diff --git a/object-file.c b/object-file.c
index f0b029ff0b..bfbb632cf8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -21,6 +21,7 @@
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
+#include "odb/transaction.h"
#include "oidtree.h"
#include "pack.h"
#include "packfile.h"
diff --git a/odb.c b/odb.c
index 350e23f3c0..8c3cbc1b53 100644
--- a/odb.c
+++ b/odb.c
@@ -1069,28 +1069,3 @@ void odb_reprepare(struct object_database *o)
obj_read_unlock();
}
-
-struct odb_transaction *odb_transaction_begin(struct object_database *odb)
-{
- if (odb->transaction)
- return NULL;
-
- odb->transaction = odb_transaction_files_begin(odb->sources);
-
- return odb->transaction;
-}
-
-void odb_transaction_commit(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- /*
- * Ensure the transaction ending matches the pending transaction.
- */
- ASSERT(transaction == transaction->source->odb->transaction);
-
- transaction->commit(transaction);
- transaction->source->odb->transaction = NULL;
- free(transaction);
-}
diff --git a/odb.h b/odb.h
index 9aee260105..ec5367b13e 100644
--- a/odb.h
+++ b/odb.h
@@ -35,24 +35,6 @@ struct packed_git;
struct packfile_store;
struct cached_object_entry;
-/*
- * A transaction may be started for an object database prior to writing new
- * objects via odb_transaction_begin(). These objects are not committed until
- * odb_transaction_commit() is invoked. Only a single transaction may be pending
- * at a time.
- *
- * Each ODB source is expected to implement its own transaction handling.
- */
-struct odb_transaction;
-typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
-struct odb_transaction {
- /* The ODB source the transaction is opened against. */
- struct odb_source *source;
-
- /* The ODB source specific callback invoked to commit a transaction. */
- odb_transaction_commit_fn commit;
-};
-
/*
* The object database encapsulates access to objects in a repository. It
* manages one or more sources that store the actual objects which are
@@ -154,19 +136,6 @@ void odb_close(struct object_database *o);
*/
void odb_reprepare(struct object_database *o);
-/*
- * Starts an ODB transaction. Subsequent objects are written to the transaction
- * and not committed until odb_transaction_commit() is invoked on the
- * transaction. If the ODB already has a pending transaction, NULL is returned.
- */
-struct odb_transaction *odb_transaction_begin(struct object_database *odb);
-
-/*
- * Commits an ODB transaction making the written objects visible. If the
- * specified transaction is NULL, the function is a no-op.
- */
-void odb_transaction_commit(struct odb_transaction *transaction);
-
/*
* Find source by its object directory path. Returns a `NULL` pointer in case
* the source could not be found.
diff --git a/odb/transaction.c b/odb/transaction.c
new file mode 100644
index 0000000000..9bf3f347dc
--- /dev/null
+++ b/odb/transaction.c
@@ -0,0 +1,28 @@
+#include "git-compat-util.h"
+#include "object-file.h"
+#include "odb/transaction.h"
+
+struct odb_transaction *odb_transaction_begin(struct object_database *odb)
+{
+ if (odb->transaction)
+ return NULL;
+
+ odb->transaction = odb_transaction_files_begin(odb->sources);
+
+ return odb->transaction;
+}
+
+void odb_transaction_commit(struct odb_transaction *transaction)
+{
+ if (!transaction)
+ return;
+
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->source->odb->transaction);
+
+ transaction->commit(transaction);
+ transaction->source->odb->transaction = NULL;
+ free(transaction);
+}
diff --git a/odb/transaction.h b/odb/transaction.h
new file mode 100644
index 0000000000..a56e392f21
--- /dev/null
+++ b/odb/transaction.h
@@ -0,0 +1,38 @@
+#ifndef ODB_TRANSACTION_H
+#define ODB_TRANSACTION_H
+
+#include "odb.h"
+#include "odb/source.h"
+
+/*
+ * A transaction may be started for an object database prior to writing new
+ * objects via odb_transaction_begin(). These objects are not committed until
+ * odb_transaction_commit() is invoked. Only a single transaction may be pending
+ * at a time.
+ *
+ * Each ODB source is expected to implement its own transaction handling.
+ */
+struct odb_transaction;
+typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
+struct odb_transaction {
+ /* The ODB source the transaction is opened against. */
+ struct odb_source *source;
+
+ /* The ODB source specific callback invoked to commit a transaction. */
+ odb_transaction_commit_fn commit;
+};
+
+/*
+ * Starts an ODB transaction. Subsequent objects are written to the transaction
+ * and not committed until odb_transaction_commit() is invoked on the
+ * transaction. If the ODB already has a pending transaction, NULL is returned.
+ */
+struct odb_transaction *odb_transaction_begin(struct object_database *odb);
+
+/*
+ * Commits an ODB transaction making the written objects visible. If the
+ * specified transaction is NULL, the function is a no-op.
+ */
+void odb_transaction_commit(struct odb_transaction *transaction);
+
+#endif
diff --git a/read-cache.c b/read-cache.c
index 5049f9baca..8147c7e94a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -20,6 +20,7 @@
#include "dir.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "oid-array.h"
#include "tree.h"
#include "commit.h"
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()`
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
2026-04-02 21:32 ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
` (6 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
Each ODB source is expected to provide an ODB transaction implementation
that should be used when starting a transaction. With d6fc6fe6f8
(odb/source: make `begin_transaction()` function pluggable, 2026-03-05),
the `struct odb_source` now provides a pluggable callback for beginning
transactions. Use the callback provided by the ODB source accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
odb/transaction.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/odb/transaction.c b/odb/transaction.c
index 9bf3f347dc..592ac84075 100644
--- a/odb/transaction.c
+++ b/odb/transaction.c
@@ -1,5 +1,5 @@
#include "git-compat-util.h"
-#include "object-file.h"
+#include "odb/source.h"
#include "odb/transaction.h"
struct odb_transaction *odb_transaction_begin(struct object_database *odb)
@@ -7,7 +7,7 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb)
if (odb->transaction)
return NULL;
- odb->transaction = odb_transaction_files_begin(odb->sources);
+ odb_source_begin_transaction(odb->sources, &odb->transaction);
return odb->transaction;
}
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
2026-04-02 21:32 ` [PATCH v3 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-04-02 21:32 ` [PATCH v3 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-05-11 17:58 ` Jeff King
2026-04-02 21:32 ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
` (5 subsequent siblings)
8 siblings, 1 reply; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The `read()` callback used by `struct odb_write_stream` currently
returns a pointer to an internal buffer along with the number of bytes
read. This makes buffer ownership unclear and provides no way to report
errors.
Update the interface to instead require the caller to provide a buffer,
and have the callback return the number of bytes written to it or a
negative value on error. While at it, also move the `struct
odb_write_stream` definition to "odb/streaming.h". Call sites are
updated accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/unpack-objects.c | 20 ++++++++------------
object-file.c | 14 +++++++++++---
odb.h | 6 +-----
odb/streaming.c | 5 +++++
odb/streaming.h | 18 ++++++++++++++++++
5 files changed, 43 insertions(+), 20 deletions(-)
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index bc9b1e047e..64e58e79fd 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -9,6 +9,7 @@
#include "hex.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "odb/transaction.h"
#include "object.h"
#include "delta.h"
@@ -360,24 +361,21 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
struct input_zstream_data {
git_zstream *zstream;
- unsigned char buf[8192];
int status;
};
-static const void *feed_input_zstream(struct odb_write_stream *in_stream,
- unsigned long *readlen)
+static ssize_t feed_input_zstream(struct odb_write_stream *in_stream,
+ unsigned char *buf, size_t buf_len)
{
struct input_zstream_data *data = in_stream->data;
git_zstream *zstream = data->zstream;
void *in = fill(1);
- if (in_stream->is_finished) {
- *readlen = 0;
- return NULL;
- }
+ if (in_stream->is_finished)
+ return 0;
- zstream->next_out = data->buf;
- zstream->avail_out = sizeof(data->buf);
+ zstream->next_out = buf;
+ zstream->avail_out = buf_len;
zstream->next_in = in;
zstream->avail_in = len;
@@ -385,9 +383,7 @@ static const void *feed_input_zstream(struct odb_write_stream *in_stream,
in_stream->is_finished = data->status != Z_OK;
use(len - zstream->avail_in);
- *readlen = sizeof(data->buf) - zstream->avail_out;
-
- return data->buf;
+ return buf_len - zstream->avail_out;
}
static void stream_blob(unsigned long size, unsigned nr)
diff --git a/object-file.c b/object-file.c
index bfbb632cf8..0ae36314aa 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1066,6 +1066,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
struct git_hash_ctx c, compat_c;
struct strbuf tmp_file = STRBUF_INIT;
struct strbuf filename = STRBUF_INIT;
+ unsigned char buf[8192];
int dirlen;
char hdr[MAX_HEADER_LEN];
int hdrlen;
@@ -1098,9 +1099,16 @@ int odb_source_loose_write_stream(struct odb_source *source,
unsigned char *in0 = stream.next_in;
if (!stream.avail_in && !in_stream->is_finished) {
- const void *in = in_stream->read(in_stream, &stream.avail_in);
- stream.next_in = (void *)in;
- in0 = (unsigned char *)in;
+ ssize_t read_len = odb_write_stream_read(in_stream, buf,
+ sizeof(buf));
+ if (read_len < 0) {
+ err = -1;
+ goto cleanup;
+ }
+
+ stream.avail_in = read_len;
+ stream.next_in = buf;
+ in0 = buf;
/* All data has been read. */
if (in_stream->is_finished)
flush = 1;
diff --git a/odb.h b/odb.h
index ec5367b13e..6faeaa0589 100644
--- a/odb.h
+++ b/odb.h
@@ -529,11 +529,7 @@ static inline int odb_write_object(struct object_database *odb,
return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0);
}
-struct odb_write_stream {
- const void *(*read)(struct odb_write_stream *, unsigned long *len);
- void *data;
- int is_finished;
-};
+struct odb_write_stream;
int odb_write_object_stream(struct object_database *odb,
struct odb_write_stream *stream, size_t len,
diff --git a/odb/streaming.c b/odb/streaming.c
index 5927a12954..a68dd2cbe3 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -232,6 +232,11 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
return st;
}
+ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
+{
+ return st->read(st, buf, sz);
+}
+
int odb_stream_blob_to_fd(struct object_database *odb,
int fd,
const struct object_id *oid,
diff --git a/odb/streaming.h b/odb/streaming.h
index c7861f7e13..65ced911fe 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -47,6 +47,24 @@ int odb_read_stream_close(struct odb_read_stream *stream);
*/
ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);
+/*
+ * A stream that provides an object to be written to the object database without
+ * loading all of it into memory.
+ */
+struct odb_write_stream {
+ ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t);
+ void *data;
+ int is_finished;
+};
+
+/*
+ * Read data from the stream into the buffer. Returns 0 when finished and the
+ * number of bytes read on success. Returns a negative error code in case
+ * reading from the stream fails.
+ */
+ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
+ size_t len);
+
/*
* Look up the object by its ID and write the full contents to the file
* descriptor. The object must be a blob, or the function will fail. When
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* Re: [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback
2026-04-02 21:32 ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
@ 2026-05-11 17:58 ` Jeff King
2026-05-12 15:19 ` Justin Tobler
0 siblings, 1 reply; 57+ messages in thread
From: Jeff King @ 2026-05-11 17:58 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, ps, gitster
On Thu, Apr 02, 2026 at 04:32:16PM -0500, Justin Tobler wrote:
> @@ -1098,9 +1099,16 @@ int odb_source_loose_write_stream(struct odb_source *source,
> unsigned char *in0 = stream.next_in;
>
> if (!stream.avail_in && !in_stream->is_finished) {
> - const void *in = in_stream->read(in_stream, &stream.avail_in);
> - stream.next_in = (void *)in;
> - in0 = (unsigned char *)in;
> + ssize_t read_len = odb_write_stream_read(in_stream, buf,
> + sizeof(buf));
> + if (read_len < 0) {
> + err = -1;
> + goto cleanup;
> + }
> +
> + stream.avail_in = read_len;
> + stream.next_in = buf;
> + in0 = buf;
If we hit this "goto cleanup", we'll leak the "fd" descriptor opened
earlier. We either need to close(fd) here, or do so in the cleanup
handler (but that means consistently setting fd to a sentinel value
after we close it, which we do not currently do).
Noticed by Coverity (I guess this series just hit "jch", since it's
"new" as of today's run).
-Peff
^ permalink raw reply [flat|nested] 57+ messages in thread* Re: [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback
2026-05-11 17:58 ` Jeff King
@ 2026-05-12 15:19 ` Justin Tobler
0 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-12 15:19 UTC (permalink / raw)
To: Jeff King; +Cc: git, ps, gitster
On 26/05/11 01:58PM, Jeff King wrote:
> On Thu, Apr 02, 2026 at 04:32:16PM -0500, Justin Tobler wrote:
>
> > @@ -1098,9 +1099,16 @@ int odb_source_loose_write_stream(struct odb_source *source,
> > unsigned char *in0 = stream.next_in;
> >
> > if (!stream.avail_in && !in_stream->is_finished) {
> > - const void *in = in_stream->read(in_stream, &stream.avail_in);
> > - stream.next_in = (void *)in;
> > - in0 = (unsigned char *)in;
> > + ssize_t read_len = odb_write_stream_read(in_stream, buf,
> > + sizeof(buf));
> > + if (read_len < 0) {
> > + err = -1;
> > + goto cleanup;
> > + }
> > +
> > + stream.avail_in = read_len;
> > + stream.next_in = buf;
> > + in0 = buf;
>
> If we hit this "goto cleanup", we'll leak the "fd" descriptor opened
> earlier. We either need to close(fd) here, or do so in the cleanup
> handler (but that means consistently setting fd to a sentinel value
> after we close it, which we do not currently do).
>
> Noticed by Coverity (I guess this series just hit "jch", since it's
> "new" as of today's run).
Thanks, I will send another version to fix this leak.
-Justin
^ permalink raw reply [flat|nested] 57+ messages in thread
* [PATCH v3 4/7] object-file: remove flags from transaction packfile writes
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
` (2 preceding siblings ...)
2026-04-02 21:32 ` [PATCH v3 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-06 20:16 ` Jeff King
2026-04-02 21:32 ` [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
` (4 subsequent siblings)
8 siblings, 1 reply; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The `index_blob_packfile_transaction()` function handles streaming a
blob from an fd to compute its object ID and conditionally writes the
object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A
subsequent commit will make these packfile object writes part of the
transaction interface. Consequently, having the object write be
conditional on this flag is a bit awkward.
In preparation for this change, introduce a dedicated
`hash_blob_stream()` helper that only computes the OID from a `struct
odb_write_stream`. This is invoked by `index_fd()` instead when the
INDEX_WRITE_OBJECT is not set. The object write performed via
`index_blob_packfile_transaction()` is made unconditional accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 132 +++++++++++++++++++++++++++++-------------------
odb/streaming.c | 46 +++++++++++++++++
odb/streaming.h | 12 +++++
3 files changed, 138 insertions(+), 52 deletions(-)
diff --git a/object-file.c b/object-file.c
index 0ae36314aa..382d14c8c0 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1396,11 +1396,10 @@ static int already_written(struct odb_transaction_files *transaction,
}
/* Lazily create backing packfile for the state */
-static void prepare_packfile_transaction(struct odb_transaction_files *transaction,
- unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction_files *transaction)
{
struct transaction_packfile *state = &transaction->packfile;
- if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+ if (state->f)
return;
state->f = create_tmp_packfile(transaction->base.source->odb->repo,
@@ -1413,6 +1412,39 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
die_errno("unable to write pack header");
}
+static int hash_blob_stream(struct odb_write_stream *stream,
+ const struct git_hash_algo *hash_algo,
+ struct object_id *result_oid, size_t size)
+{
+ unsigned char buf[16384];
+ struct git_hash_ctx ctx;
+ unsigned header_len;
+ size_t bytes_hashed = 0;
+
+ header_len = format_object_header((char *)buf, sizeof(buf),
+ OBJ_BLOB, size);
+ hash_algo->init_fn(&ctx);
+ git_hash_update(&ctx, buf, header_len);
+
+ while (!stream->is_finished) {
+ ssize_t read_result = odb_write_stream_read(stream, buf,
+ sizeof(buf));
+
+ if (read_result < 0)
+ return -1;
+
+ git_hash_update(&ctx, buf, read_result);
+ bytes_hashed += read_result;
+ }
+
+ if (bytes_hashed != size)
+ return -1;
+
+ git_hash_final_oid(result_oid, &ctx);
+
+ return 0;
+}
+
/*
* Read the contents from fd for size bytes, streaming it to the
* packfile in state while updating the hash in ctx. Signal a failure
@@ -1430,15 +1462,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
*/
static int stream_blob_to_pack(struct transaction_packfile *state,
struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path,
- unsigned flags)
+ int fd, size_t size, const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- int write_object = (flags & INDEX_WRITE_OBJECT);
off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1473,20 +1503,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
status = git_deflate(&s, size ? 0 : Z_FINISH);
if (!s.avail_out || status == Z_STREAM_END) {
- if (write_object) {
- size_t written = s.next_out - obuf;
-
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
- hashwrite(state->f, obuf, written);
- state->offset += written;
+ size_t written = s.next_out - obuf;
+
+ /* would we bust the size limit? */
+ if (state->nr_written &&
+ pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + written) {
+ git_deflate_abort(&s);
+ return -1;
}
+
+ hashwrite(state->f, obuf, written);
+ state->offset += written;
s.next_out = obuf;
s.avail_out = sizeof(obuf);
}
@@ -1574,8 +1602,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
*/
static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
struct object_id *result_oid, int fd,
- size_t size, const char *path,
- unsigned flags)
+ size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
@@ -1583,7 +1610,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
- struct pack_idx_entry *idx = NULL;
+ struct pack_idx_entry *idx;
seekback = lseek(fd, 0, SEEK_CUR);
if (seekback == (off_t)-1)
@@ -1594,33 +1621,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
- /* Note: idx is non-NULL when we are writing */
- if ((flags & INDEX_WRITE_OBJECT) != 0) {
- CALLOC_ARRAY(idx, 1);
-
- prepare_packfile_transaction(transaction, flags);
- hashfile_checkpoint_init(state->f, &checkpoint);
- }
+ CALLOC_ARRAY(idx, 1);
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint_init(state->f, &checkpoint);
already_hashed_to = 0;
while (1) {
- prepare_packfile_transaction(transaction, flags);
- if (idx) {
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
- }
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+
if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path, flags))
+ fd, size, path))
break;
/*
* Writing this object to the current pack will make
* it too big; we need to truncate it, start a new
* pack, and write into it.
*/
- if (!idx)
- BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
flush_packfile_transaction(transaction);
@@ -1628,8 +1648,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
return error("cannot seek back");
}
git_hash_final_oid(result_oid, &ctx);
- if (!idx)
- return 0;
idx->crc32 = crc32_end(state->f);
if (already_written(transaction, result_oid)) {
@@ -1667,18 +1685,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
ret = index_core(istate, oid, fd, xsize_t(st->st_size),
type, path, flags);
} else {
- struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size),
- path, flags);
- odb_transaction_commit(transaction);
+ struct odb_write_stream stream;
+ odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
+
+ if (flags & INDEX_WRITE_OBJECT) {
+ struct object_database *odb = the_repository->objects;
+ struct odb_transaction_files *files_transaction;
+ struct odb_transaction *transaction;
+
+ transaction = odb_transaction_begin(odb);
+ files_transaction = container_of(odb->transaction,
+ struct odb_transaction_files,
+ base);
+ ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+ xsize_t(st->st_size), path);
+ odb_transaction_commit(transaction);
+ } else {
+ ret = hash_blob_stream(&stream,
+ the_repository->hash_algo, oid,
+ xsize_t(st->st_size));
+ }
+
+ odb_write_stream_release(&stream);
}
close(fd);
diff --git a/odb/streaming.c b/odb/streaming.c
index a68dd2cbe3..20531e864c 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -237,6 +237,11 @@ ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
return st->read(st, buf, sz);
}
+void odb_write_stream_release(struct odb_write_stream *st)
+{
+ free(st->data);
+}
+
int odb_stream_blob_to_fd(struct object_database *odb,
int fd,
const struct object_id *oid,
@@ -292,3 +297,44 @@ int odb_stream_blob_to_fd(struct object_database *odb,
odb_read_stream_close(st);
return result;
}
+
+struct read_object_fd_data {
+ int fd;
+ size_t remaining;
+};
+
+static ssize_t read_object_fd(struct odb_write_stream *stream,
+ unsigned char *buf, size_t len)
+{
+ struct read_object_fd_data *data = stream->data;
+ ssize_t read_result;
+ size_t count;
+
+ if (stream->is_finished)
+ return 0;
+
+ count = data->remaining < len ? data->remaining : len;
+ read_result = read_in_full(data->fd, buf, count);
+ if (read_result < 0 || (size_t)read_result != count)
+ return -1;
+
+ data->remaining -= count;
+ if (!data->remaining)
+ stream->is_finished = 1;
+
+ return read_result;
+}
+
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size)
+{
+ struct read_object_fd_data *data;
+
+ CALLOC_ARRAY(data, 1);
+ data->fd = fd;
+ data->remaining = size;
+
+ stream->data = data;
+ stream->read = read_object_fd;
+ stream->is_finished = 0;
+}
diff --git a/odb/streaming.h b/odb/streaming.h
index 65ced911fe..2a8cac19a4 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -5,6 +5,7 @@
#define STREAMING_H 1
#include "object.h"
+#include "odb.h"
struct object_database;
struct odb_read_stream;
@@ -65,6 +66,11 @@ struct odb_write_stream {
ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
size_t len);
+/*
+ * Releases memory allocated for underlying stream data.
+ */
+void odb_write_stream_release(struct odb_write_stream *stream);
+
/*
* Look up the object by its ID and write the full contents to the file
* descriptor. The object must be a blob, or the function will fail. When
@@ -82,4 +88,10 @@ int odb_stream_blob_to_fd(struct object_database *odb,
struct stream_filter *filter,
int can_seek);
+/*
+ * Sets up an ODB write stream that reads from an fd.
+ */
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size);
+
#endif /* STREAMING_H */
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* Re: [PATCH v3 4/7] object-file: remove flags from transaction packfile writes
2026-04-02 21:32 ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
@ 2026-04-06 20:16 ` Jeff King
2026-04-06 20:19 ` Jeff King
0 siblings, 1 reply; 57+ messages in thread
From: Jeff King @ 2026-04-06 20:16 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, ps, gitster
On Thu, Apr 02, 2026 at 04:32:17PM -0500, Justin Tobler wrote:
> @@ -1667,18 +1685,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
> [...]
> + struct odb_write_stream stream;
> + odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
> +
> + if (flags & INDEX_WRITE_OBJECT) {
> + struct object_database *odb = the_repository->objects;
> + struct odb_transaction_files *files_transaction;
> + struct odb_transaction *transaction;
> +
> + transaction = odb_transaction_begin(odb);
> + files_transaction = container_of(odb->transaction,
> + struct odb_transaction_files,
> + base);
> + ret = index_blob_packfile_transaction(files_transaction, oid, fd,
> + xsize_t(st->st_size), path);
> + odb_transaction_commit(transaction);
> + } else {
> + ret = hash_blob_stream(&stream,
> + the_repository->hash_algo, oid,
> + xsize_t(st->st_size));
> + }
> +
> + odb_write_stream_release(&stream);
Probably not a big deal, but I notice that "stream" is not used in half
of the conditional. Should its initialization and cleanup be pushed down
into the else clause?
-Peff
^ permalink raw reply [flat|nested] 57+ messages in thread* Re: [PATCH v3 4/7] object-file: remove flags from transaction packfile writes
2026-04-06 20:16 ` Jeff King
@ 2026-04-06 20:19 ` Jeff King
0 siblings, 0 replies; 57+ messages in thread
From: Jeff King @ 2026-04-06 20:19 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, ps, gitster
On Mon, Apr 06, 2026 at 04:16:28PM -0400, Jeff King wrote:
> On Thu, Apr 02, 2026 at 04:32:17PM -0500, Justin Tobler wrote:
>
> > @@ -1667,18 +1685,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
> > [...]
> > + struct odb_write_stream stream;
> > + odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
> > +
> > + if (flags & INDEX_WRITE_OBJECT) {
> > + struct object_database *odb = the_repository->objects;
> > + struct odb_transaction_files *files_transaction;
> > + struct odb_transaction *transaction;
> > +
> > + transaction = odb_transaction_begin(odb);
> > + files_transaction = container_of(odb->transaction,
> > + struct odb_transaction_files,
> > + base);
> > + ret = index_blob_packfile_transaction(files_transaction, oid, fd,
> > + xsize_t(st->st_size), path);
> > + odb_transaction_commit(transaction);
> > + } else {
> > + ret = hash_blob_stream(&stream,
> > + the_repository->hash_algo, oid,
> > + xsize_t(st->st_size));
> > + }
> > +
> > + odb_write_stream_release(&stream);
>
> Probably not a big deal, but I notice that "stream" is not used in half
> of the conditional. Should its initialization and cleanup be pushed down
> into the else clause?
Ah, never mind. I was looking at this patch in isolation as a solution
to the segfault problem. But later in the series, you end up converting
index_blob_packfile_transaction() to use stream, too, which seems like a
good direction.
So it is a little funny at this step, but it reduces the diff later.
-Peff
^ permalink raw reply [flat|nested] 57+ messages in thread
* [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
` (3 preceding siblings ...)
2026-04-02 21:32 ` [PATCH v3 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
` (3 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
In certain scenarios, Git handles writing blobs that exceed
"core.bigFileThreshold" differently by streaming the object directly
into a packfile. When there is an active ODB transaction, these blobs
are streamed to the same packfile instead of using a separate packfile
for each. If "pack.packSizeLimit" is configured and streaming another
object causes the packfile to exceed the configured limit, the packfile
is truncated back to the previous object and the object write is
restarted in a new packfile.
This works fine, but requires the fd being read from to save a
checkpoint so it becomes possible to rewind the input source via seeking
back to a known offset at the beginning. In a subsequent commit, blob
streaming is converted to use `struct odb_write_stream` as a more
generic input source instead of an fd which doesn't provide a mechanism
for rewinding.
For this use case though, rewinding the fd is not strictly necessary
because the inflated size of the object is known and can be used to
approximate whether writing the object would cause the packfile to
exceed the configured limit prior to writing anything. These blobs
written to the packfile are never deltified thus the size difference
between what is written versus the inflated size is due to zlib
compression. While this does prevent packfiles from being filled to the
potential maximum is some cases, it should be good enough and still
prevents the packfile from exceeding any configured limit.
Use the inflated blob size to determine whether writing an object to a
packfile will exceed the configured "pack.packSizeLimit".
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 86 +++++++++++++++------------------------------------
1 file changed, 25 insertions(+), 61 deletions(-)
diff --git a/object-file.c b/object-file.c
index 382d14c8c0..0284d5434b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1447,29 +1447,17 @@ static int hash_blob_stream(struct odb_write_stream *stream,
/*
* Read the contents from fd for size bytes, streaming it to the
- * packfile in state while updating the hash in ctx. Signal a failure
- * by returning a negative value when the resulting pack would exceed
- * the pack size limit and this is not the first object in the pack,
- * so that the caller can discard what we wrote from the current pack
- * by truncating it and opening a new one. The caller will then call
- * us again after rewinding the input fd.
- *
- * The already_hashed_to pointer is kept untouched by the caller to
- * make sure we do not hash the same byte when we are called
- * again. This way, the caller does not have to checkpoint its hash
- * status before calling us just in case we ask it to call us again
- * with a new pack.
+ * packfile in state while updating the hash in ctx.
*/
-static int stream_blob_to_pack(struct transaction_packfile *state,
- struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path)
+static void stream_blob_to_pack(struct transaction_packfile *state,
+ struct git_hash_ctx *ctx, int fd, size_t size,
+ const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1486,15 +1474,9 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
if ((size_t)read_result != rsize)
die("failed to read %u bytes from '%s'",
(unsigned)rsize, path);
- offset += rsize;
- if (*already_hashed_to < offset) {
- size_t hsize = offset - *already_hashed_to;
- if (rsize < hsize)
- hsize = rsize;
- if (hsize)
- git_hash_update(ctx, ibuf, hsize);
- *already_hashed_to = offset;
- }
+
+ git_hash_update(ctx, ibuf, rsize);
+
s.next_in = ibuf;
s.avail_in = rsize;
size -= rsize;
@@ -1505,14 +1487,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
if (!s.avail_out || status == Z_STREAM_END) {
size_t written = s.next_out - obuf;
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
hashwrite(state->f, obuf, written);
state->offset += written;
s.next_out = obuf;
@@ -1529,7 +1503,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
}
}
git_deflate_end(&s);
- return 0;
}
static void flush_packfile_transaction(struct odb_transaction_files *transaction)
@@ -1605,48 +1578,39 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
- off_t seekback, already_hashed_to;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
struct pack_idx_entry *idx;
- seekback = lseek(fd, 0, SEEK_CUR);
- if (seekback == (off_t)-1)
- return error("cannot find the current offset");
-
header_len = format_object_header((char *)obuf, sizeof(obuf),
OBJ_BLOB, size);
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
+ /*
+ * If writing another object to the packfile could result in it
+ * exceeding the configured size limit, flush the current packfile
+ * transaction.
+ *
+ * Note that this uses the inflated object size as an approximation.
+ * Blob objects written in this manner are not delta-compressed, so
+ * the difference between the inflated and on-disk size is limited
+ * to zlib compression and is sufficient for this check.
+ */
+ if (state->nr_written && pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + size)
+ flush_packfile_transaction(transaction);
+
CALLOC_ARRAY(idx, 1);
prepare_packfile_transaction(transaction);
hashfile_checkpoint_init(state->f, &checkpoint);
- already_hashed_to = 0;
-
- while (1) {
- prepare_packfile_transaction(transaction);
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
-
- if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path))
- break;
- /*
- * Writing this object to the current pack will make
- * it too big; we need to truncate it, start a new
- * pack, and write into it.
- */
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- flush_packfile_transaction(transaction);
- if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
- return error("cannot seek back");
- }
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+ stream_blob_to_pack(state, &ctx, fd, size, path);
git_hash_final_oid(result_oid, &ctx);
idx->crc32 = crc32_end(state->f);
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
` (4 preceding siblings ...)
2026-04-02 21:32 ` [PATCH v3 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-02 21:32 ` [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
` (2 subsequent siblings)
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
The `index_blob_packfile_transaction()` function streams blob data
directly from an fd. This makes it difficult to reuse as part of a
generic transactional object writing interface.
Refactor the packfile write path to operate on a `struct
odb_write_stream`, allowing callers to supply data from arbitrary
sources.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 56 +++++++++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 26 deletions(-)
diff --git a/object-file.c b/object-file.c
index 0284d5434b..7fa2b9239f 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1446,18 +1446,19 @@ static int hash_blob_stream(struct odb_write_stream *stream,
}
/*
- * Read the contents from fd for size bytes, streaming it to the
+ * Read the contents from the stream provided, streaming it to the
* packfile in state while updating the hash in ctx.
*/
static void stream_blob_to_pack(struct transaction_packfile *state,
- struct git_hash_ctx *ctx, int fd, size_t size,
- const char *path)
+ struct git_hash_ctx *ctx, size_t size,
+ struct odb_write_stream *stream)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
+ size_t bytes_read = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1466,23 +1467,21 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
s.avail_out = sizeof(obuf) - hdrlen;
while (status != Z_STREAM_END) {
- if (size && !s.avail_in) {
- size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
- ssize_t read_result = read_in_full(fd, ibuf, rsize);
- if (read_result < 0)
- die_errno("failed to read from '%s'", path);
- if ((size_t)read_result != rsize)
- die("failed to read %u bytes from '%s'",
- (unsigned)rsize, path);
+ if (!stream->is_finished && !s.avail_in) {
+ ssize_t rsize = odb_write_stream_read(stream, ibuf,
+ sizeof(ibuf));
+
+ if (rsize < 0)
+ die("failed to read blob data");
git_hash_update(ctx, ibuf, rsize);
s.next_in = ibuf;
s.avail_in = rsize;
- size -= rsize;
+ bytes_read += rsize;
}
- status = git_deflate(&s, size ? 0 : Z_FINISH);
+ status = git_deflate(&s, stream->is_finished ? Z_FINISH : 0);
if (!s.avail_out || status == Z_STREAM_END) {
size_t written = s.next_out - obuf;
@@ -1502,6 +1501,11 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
die("unexpected deflate failure: %d", status);
}
}
+
+ if (bytes_read != size)
+ die("read %" PRIuMAX " bytes of blob data, but expected %" PRIuMAX " bytes",
+ (uintmax_t)bytes_read, (uintmax_t)size);
+
git_deflate_end(&s);
}
@@ -1573,10 +1577,13 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
- struct object_id *result_oid, int fd,
- size_t size, const char *path)
+static int index_blob_packfile_transaction(struct odb_transaction *base,
+ struct odb_write_stream *stream,
+ size_t size, struct object_id *result_oid)
{
+ struct odb_transaction_files *transaction = container_of(base,
+ struct odb_transaction_files,
+ base);
struct transaction_packfile *state = &transaction->packfile;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
@@ -1610,7 +1617,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
crc32_begin(state->f);
- stream_blob_to_pack(state, &ctx, fd, size, path);
+ stream_blob_to_pack(state, &ctx, size, stream);
git_hash_final_oid(result_oid, &ctx);
idx->crc32 = crc32_end(state->f);
@@ -1654,15 +1661,12 @@ int index_fd(struct index_state *istate, struct object_id *oid,
if (flags & INDEX_WRITE_OBJECT) {
struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size), path);
+ struct odb_transaction *transaction = odb_transaction_begin(odb);
+
+ ret = index_blob_packfile_transaction(odb->transaction,
+ &stream,
+ xsize_t(st->st_size),
+ oid);
odb_transaction_commit(transaction);
} else {
ret = hash_blob_stream(&stream,
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
` (5 preceding siblings ...)
2026-04-02 21:32 ` [PATCH v3 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
@ 2026-04-02 21:32 ` Justin Tobler
2026-04-08 7:25 ` [PATCH v3 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
8 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-04-02 21:32 UTC (permalink / raw)
To: git; +Cc: ps, gitster, Justin Tobler
How an ODB transaction handles writing objects is expected to vary
between implementations. Introduce a new `write_object_stream()`
callback in `struct odb_transaction` to make this function pluggable.
Rename `index_blob_packfile_transaction()` to
`odb_transaction_files_write_object_stream()` and wire it up for use
with `struct odb_transaction_files` accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 16 +++++++++-------
odb/transaction.c | 7 +++++++
odb/transaction.h | 25 ++++++++++++++++++++++---
3 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/object-file.c b/object-file.c
index 7fa2b9239f..65356998f3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1577,9 +1577,10 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_packfile_transaction(struct odb_transaction *base,
- struct odb_write_stream *stream,
- size_t size, struct object_id *result_oid)
+static int odb_transaction_files_write_object_stream(struct odb_transaction *base,
+ struct odb_write_stream *stream,
+ size_t size,
+ struct object_id *result_oid)
{
struct odb_transaction_files *transaction = container_of(base,
struct odb_transaction_files,
@@ -1663,10 +1664,10 @@ int index_fd(struct index_state *istate, struct object_id *oid,
struct object_database *odb = the_repository->objects;
struct odb_transaction *transaction = odb_transaction_begin(odb);
- ret = index_blob_packfile_transaction(odb->transaction,
- &stream,
- xsize_t(st->st_size),
- oid);
+ ret = odb_transaction_write_object_stream(odb->transaction,
+ &stream,
+ xsize_t(st->st_size),
+ oid);
odb_transaction_commit(transaction);
} else {
ret = hash_blob_stream(&stream,
@@ -2131,6 +2132,7 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
transaction = xcalloc(1, sizeof(*transaction));
transaction->base.source = source;
transaction->base.commit = odb_transaction_files_commit;
+ transaction->base.write_object_stream = odb_transaction_files_write_object_stream;
return &transaction->base;
}
diff --git a/odb/transaction.c b/odb/transaction.c
index 592ac84075..b16e07aebf 100644
--- a/odb/transaction.c
+++ b/odb/transaction.c
@@ -26,3 +26,10 @@ void odb_transaction_commit(struct odb_transaction *transaction)
transaction->source->odb->transaction = NULL;
free(transaction);
}
+
+int odb_transaction_write_object_stream(struct odb_transaction *transaction,
+ struct odb_write_stream *stream,
+ size_t len, struct object_id *oid)
+{
+ return transaction->write_object_stream(transaction, stream, len, oid);
+}
diff --git a/odb/transaction.h b/odb/transaction.h
index a56e392f21..854fda06f5 100644
--- a/odb/transaction.h
+++ b/odb/transaction.h
@@ -12,14 +12,24 @@
*
* Each ODB source is expected to implement its own transaction handling.
*/
-struct odb_transaction;
-typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
struct odb_transaction {
/* The ODB source the transaction is opened against. */
struct odb_source *source;
/* The ODB source specific callback invoked to commit a transaction. */
- odb_transaction_commit_fn commit;
+ void (*commit)(struct odb_transaction *transaction);
+
+ /*
+ * This callback is expected to write the given object stream into
+ * the ODB transaction. Note that for now, only blobs support streaming.
+ *
+ * The resulting object ID shall be written into the out pointer. The
+ * callback is expected to return 0 on success, a negative error code
+ * otherwise.
+ */
+ int (*write_object_stream)(struct odb_transaction *transaction,
+ struct odb_write_stream *stream, size_t len,
+ struct object_id *oid);
};
/*
@@ -35,4 +45,13 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb);
*/
void odb_transaction_commit(struct odb_transaction *transaction);
+/*
+ * Writes the object in the provided stream into the transaction. The resulting
+ * object ID is written into the out pointer. Returns 0 on success, a negative
+ * error code otherwise.
+ */
+int odb_transaction_write_object_stream(struct odb_transaction *transaction,
+ struct odb_write_stream *stream,
+ size_t len, struct object_id *oid);
+
#endif
--
2.53.0.381.g628a66ccf6
^ permalink raw reply related [flat|nested] 57+ messages in thread* Re: [PATCH v3 0/7] odb: add write operation to ODB transaction interface
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
` (6 preceding siblings ...)
2026-04-02 21:32 ` [PATCH v3 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
@ 2026-04-08 7:25 ` Patrick Steinhardt
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
8 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2026-04-08 7:25 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, gitster
On Thu, Apr 02, 2026 at 04:32:13PM -0500, Justin Tobler wrote:
> Greetings,
>
> This series lays the groundwork for introducing write operations to the
> ODB transaction interface. The eventual goal is for all object writes
> performed within a transaction to go through this interface explicitly,
> rather than implicitly relying on the transaction to reconfigure ODB
> sources so that writes are redirected to a temporary location.
>
> For now, only `odb_transaction_write_object_stream()` is implemented and
> wires up the existing logic for streaming "large" blobs directly into a
> packfile as part of the transaction.
>
> Most of the patches are structural refactorings to enable this, but
> patch 4 introduces a behavioral change in how packfiles that would
> exceed "pack.packSizeLimit" are handled.
>
> Changes since V2:
> - Renamed some variables to improve clarity
> - Make `odb_write_stream_from_fd()` fully initialize the underlying
> `struct odb_write_stream`
> - Move `struct odb_write_stream` to "odb/streaming.h"
> - Make the `hash_blob_stream()` helper more generic by operating on a
> `struct odb_write_stream` instead of reading from an fd directly.
> - Introduce an `odb_write_stream_release()` helper to free the
> underlying stream data.
All these changes here look sensible to me, and the range-diff does,
too. I'm happy with the state of this series, thanks!
Patrick
^ permalink raw reply [flat|nested] 57+ messages in thread* [PATCH v4 0/7] odb: add write operation to ODB transaction interface
2026-04-02 21:32 ` [PATCH v3 " Justin Tobler
` (7 preceding siblings ...)
2026-04-08 7:25 ` [PATCH v3 0/7] odb: add write operation to ODB transaction interface Patrick Steinhardt
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
` (7 more replies)
8 siblings, 8 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
Greetings,
This series lays the groundwork for introducing write operations to the
ODB transaction interface. The eventual goal is for all object writes
performed within a transaction to go through this interface explicitly,
rather than implicitly relying on the transaction to reconfigure ODB
sources so that writes are redirected to a temporary location.
For now, only `odb_transaction_write_object_stream()` is implemented and
wires up the existing logic for streaming "large" blobs directly into a
packfile as part of the transaction.
Most of the patches are structural refactorings to enable this, but
patch 4 introduces a behavioral change in how packfiles that would
exceed "pack.packSizeLimit" are handled.
Changes since V3:
- Fixed leak due to an fd not being closed when exiting prior to
close_loose_object() being invoked.
Changes since V2:
- Renamed some variables to improve clarity
- Make `odb_write_stream_from_fd()` fully initialize the underlying
`struct odb_write_stream`
- Move `struct odb_write_stream` to "odb/streaming.h"
- Make the `hash_blob_stream()` helper more generic by operating on a
`struct odb_write_stream` instead of reading from an fd directly.
- Introduce an `odb_write_stream_release()` helper to free the
underlying stream data.
Changes since V1:
- Fixed some typos
- Improved error handling
- Removed unnecessary guard statement
- Documented in comments why inflated object size is used to approximate
if object write will exceed "pack.packSizeLimit".
- Updated `struct odb_write_stream` read() callback to support returning
errors and using caller provided buffer
- Updated the `hash_blob_stream()` function signature to operate on a
`struct odb_write_stream` instead of an fd directly
- Renamed some variables/functions for better clarity
Thanks,
-Justin
Justin Tobler (7):
odb: split `struct odb_transaction` into separate header
odb/transaction: use pluggable `begin_transaction()`
odb: update `struct odb_write_stream` read() callback
object-file: remove flags from transaction packfile writes
object-file: avoid fd seekback by checking object size upfront
object-file: generalize packfile writes to use odb_write_stream
odb/transaction: make `write_object_stream()` pluggable
Makefile | 1 +
builtin/add.c | 1 +
builtin/unpack-objects.c | 21 ++--
builtin/update-index.c | 1 +
cache-tree.c | 1 +
meson.build | 1 +
object-file.c | 238 ++++++++++++++++++++-------------------
odb.c | 25 ----
odb.h | 37 +-----
odb/streaming.c | 51 +++++++++
odb/streaming.h | 30 +++++
odb/transaction.c | 35 ++++++
odb/transaction.h | 57 ++++++++++
read-cache.c | 1 +
14 files changed, 312 insertions(+), 188 deletions(-)
create mode 100644 odb/transaction.c
create mode 100644 odb/transaction.h
Range-diff against v3:
1: eee372b426 = 1: eee372b426 odb: split `struct odb_transaction` into separate header
2: 57ac075560 = 2: 57ac075560 odb/transaction: use pluggable `begin_transaction()`
3: 11321ad607 ! 3: d53ad95712 odb: update `struct odb_write_stream` read() callback
@@ object-file.c: int odb_source_loose_write_stream(struct odb_source *source,
+ ssize_t read_len = odb_write_stream_read(in_stream, buf,
+ sizeof(buf));
+ if (read_len < 0) {
++ close(fd);
+ err = -1;
+ goto cleanup;
+ }
4: 72d4656eee = 4: fa7a3ad5dc object-file: remove flags from transaction packfile writes
5: e4896101ff = 5: 1ca08e0590 object-file: avoid fd seekback by checking object size upfront
6: b3cb0a707c = 6: a548401057 object-file: generalize packfile writes to use odb_write_stream
7: e1d292a7ed = 7: 4765b1024a odb/transaction: make `write_object_stream()` pluggable
base-commit: 5361983c075154725be47b65cca9a2421789e410
--
2.54.0.105.g59ff4886a5
^ permalink raw reply [flat|nested] 57+ messages in thread* [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
` (6 subsequent siblings)
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
The current ODB transaction interface is colocated with other ODB
interfaces in "odb.{c,h}". Subsequent commits will expand `struct
odb_transaction` to support write operations on the transaction
directly. To keep things organized and prevent "odb.{c,h}" from becoming
more unwieldy, split out `struct odb_transaction` into a separate
header.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Makefile | 1 +
builtin/add.c | 1 +
builtin/unpack-objects.c | 1 +
builtin/update-index.c | 1 +
cache-tree.c | 1 +
meson.build | 1 +
object-file.c | 1 +
odb.c | 25 -------------------------
odb.h | 31 -------------------------------
odb/transaction.c | 28 ++++++++++++++++++++++++++++
odb/transaction.h | 38 ++++++++++++++++++++++++++++++++++++++
read-cache.c | 1 +
12 files changed, 74 insertions(+), 56 deletions(-)
create mode 100644 odb/transaction.c
create mode 100644 odb/transaction.h
diff --git a/Makefile b/Makefile
index dbf0022054..6342db13e5 100644
--- a/Makefile
+++ b/Makefile
@@ -1219,6 +1219,7 @@ LIB_OBJS += odb.o
LIB_OBJS += odb/source.o
LIB_OBJS += odb/source-files.o
LIB_OBJS += odb/streaming.o
+LIB_OBJS += odb/transaction.o
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
diff --git a/builtin/add.c b/builtin/add.c
index 7737ab878b..c859f66519 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -16,6 +16,7 @@
#include "run-command.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 6fc64e9e4b..bc9b1e047e 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -9,6 +9,7 @@
#include "hex.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 8a5907767b..bcc43852ef 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -19,6 +19,7 @@
#include "tree-walk.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "refs.h"
#include "resolve-undo.h"
#include "parse-options.h"
diff --git a/cache-tree.c b/cache-tree.c
index 60bcc07c3b..f056869cfd 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -10,6 +10,7 @@
#include "cache-tree.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "read-cache-ll.h"
#include "replace-object.h"
#include "repository.h"
diff --git a/meson.build b/meson.build
index 8309942d18..6dc23b3af2 100644
--- a/meson.build
+++ b/meson.build
@@ -405,6 +405,7 @@ libgit_sources = [
'odb/source.c',
'odb/source-files.c',
'odb/streaming.c',
+ 'odb/transaction.c',
'oid-array.c',
'oidmap.c',
'oidset.c',
diff --git a/object-file.c b/object-file.c
index f0b029ff0b..bfbb632cf8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -21,6 +21,7 @@
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
+#include "odb/transaction.h"
#include "oidtree.h"
#include "pack.h"
#include "packfile.h"
diff --git a/odb.c b/odb.c
index 350e23f3c0..8c3cbc1b53 100644
--- a/odb.c
+++ b/odb.c
@@ -1069,28 +1069,3 @@ void odb_reprepare(struct object_database *o)
obj_read_unlock();
}
-
-struct odb_transaction *odb_transaction_begin(struct object_database *odb)
-{
- if (odb->transaction)
- return NULL;
-
- odb->transaction = odb_transaction_files_begin(odb->sources);
-
- return odb->transaction;
-}
-
-void odb_transaction_commit(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- /*
- * Ensure the transaction ending matches the pending transaction.
- */
- ASSERT(transaction == transaction->source->odb->transaction);
-
- transaction->commit(transaction);
- transaction->source->odb->transaction = NULL;
- free(transaction);
-}
diff --git a/odb.h b/odb.h
index 9aee260105..ec5367b13e 100644
--- a/odb.h
+++ b/odb.h
@@ -35,24 +35,6 @@ struct packed_git;
struct packfile_store;
struct cached_object_entry;
-/*
- * A transaction may be started for an object database prior to writing new
- * objects via odb_transaction_begin(). These objects are not committed until
- * odb_transaction_commit() is invoked. Only a single transaction may be pending
- * at a time.
- *
- * Each ODB source is expected to implement its own transaction handling.
- */
-struct odb_transaction;
-typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
-struct odb_transaction {
- /* The ODB source the transaction is opened against. */
- struct odb_source *source;
-
- /* The ODB source specific callback invoked to commit a transaction. */
- odb_transaction_commit_fn commit;
-};
-
/*
* The object database encapsulates access to objects in a repository. It
* manages one or more sources that store the actual objects which are
@@ -154,19 +136,6 @@ void odb_close(struct object_database *o);
*/
void odb_reprepare(struct object_database *o);
-/*
- * Starts an ODB transaction. Subsequent objects are written to the transaction
- * and not committed until odb_transaction_commit() is invoked on the
- * transaction. If the ODB already has a pending transaction, NULL is returned.
- */
-struct odb_transaction *odb_transaction_begin(struct object_database *odb);
-
-/*
- * Commits an ODB transaction making the written objects visible. If the
- * specified transaction is NULL, the function is a no-op.
- */
-void odb_transaction_commit(struct odb_transaction *transaction);
-
/*
* Find source by its object directory path. Returns a `NULL` pointer in case
* the source could not be found.
diff --git a/odb/transaction.c b/odb/transaction.c
new file mode 100644
index 0000000000..9bf3f347dc
--- /dev/null
+++ b/odb/transaction.c
@@ -0,0 +1,28 @@
+#include "git-compat-util.h"
+#include "object-file.h"
+#include "odb/transaction.h"
+
+struct odb_transaction *odb_transaction_begin(struct object_database *odb)
+{
+ if (odb->transaction)
+ return NULL;
+
+ odb->transaction = odb_transaction_files_begin(odb->sources);
+
+ return odb->transaction;
+}
+
+void odb_transaction_commit(struct odb_transaction *transaction)
+{
+ if (!transaction)
+ return;
+
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->source->odb->transaction);
+
+ transaction->commit(transaction);
+ transaction->source->odb->transaction = NULL;
+ free(transaction);
+}
diff --git a/odb/transaction.h b/odb/transaction.h
new file mode 100644
index 0000000000..a56e392f21
--- /dev/null
+++ b/odb/transaction.h
@@ -0,0 +1,38 @@
+#ifndef ODB_TRANSACTION_H
+#define ODB_TRANSACTION_H
+
+#include "odb.h"
+#include "odb/source.h"
+
+/*
+ * A transaction may be started for an object database prior to writing new
+ * objects via odb_transaction_begin(). These objects are not committed until
+ * odb_transaction_commit() is invoked. Only a single transaction may be pending
+ * at a time.
+ *
+ * Each ODB source is expected to implement its own transaction handling.
+ */
+struct odb_transaction;
+typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
+struct odb_transaction {
+ /* The ODB source the transaction is opened against. */
+ struct odb_source *source;
+
+ /* The ODB source specific callback invoked to commit a transaction. */
+ odb_transaction_commit_fn commit;
+};
+
+/*
+ * Starts an ODB transaction. Subsequent objects are written to the transaction
+ * and not committed until odb_transaction_commit() is invoked on the
+ * transaction. If the ODB already has a pending transaction, NULL is returned.
+ */
+struct odb_transaction *odb_transaction_begin(struct object_database *odb);
+
+/*
+ * Commits an ODB transaction making the written objects visible. If the
+ * specified transaction is NULL, the function is a no-op.
+ */
+void odb_transaction_commit(struct odb_transaction *transaction);
+
+#endif
diff --git a/read-cache.c b/read-cache.c
index 5049f9baca..8147c7e94a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -20,6 +20,7 @@
#include "dir.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/transaction.h"
#include "oid-array.h"
#include "tree.h"
#include "commit.h"
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()`
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
2026-05-14 18:37 ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
` (5 subsequent siblings)
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
Each ODB source is expected to provide an ODB transaction implementation
that should be used when starting a transaction. With d6fc6fe6f8
(odb/source: make `begin_transaction()` function pluggable, 2026-03-05),
the `struct odb_source` now provides a pluggable callback for beginning
transactions. Use the callback provided by the ODB source accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
odb/transaction.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/odb/transaction.c b/odb/transaction.c
index 9bf3f347dc..592ac84075 100644
--- a/odb/transaction.c
+++ b/odb/transaction.c
@@ -1,5 +1,5 @@
#include "git-compat-util.h"
-#include "object-file.h"
+#include "odb/source.h"
#include "odb/transaction.h"
struct odb_transaction *odb_transaction_begin(struct object_database *odb)
@@ -7,7 +7,7 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb)
if (odb->transaction)
return NULL;
- odb->transaction = odb_transaction_files_begin(odb->sources);
+ odb_source_begin_transaction(odb->sources, &odb->transaction);
return odb->transaction;
}
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
2026-05-14 18:37 ` [PATCH v4 1/7] odb: split `struct odb_transaction` into separate header Justin Tobler
2026-05-14 18:37 ` [PATCH v4 2/7] odb/transaction: use pluggable `begin_transaction()` Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
` (4 subsequent siblings)
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
The `read()` callback used by `struct odb_write_stream` currently
returns a pointer to an internal buffer along with the number of bytes
read. This makes buffer ownership unclear and provides no way to report
errors.
Update the interface to instead require the caller to provide a buffer,
and have the callback return the number of bytes written to it or a
negative value on error. While at it, also move the `struct
odb_write_stream` definition to "odb/streaming.h". Call sites are
updated accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/unpack-objects.c | 20 ++++++++------------
object-file.c | 15 ++++++++++++---
odb.h | 6 +-----
odb/streaming.c | 5 +++++
odb/streaming.h | 18 ++++++++++++++++++
5 files changed, 44 insertions(+), 20 deletions(-)
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index bc9b1e047e..64e58e79fd 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -9,6 +9,7 @@
#include "hex.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "odb/transaction.h"
#include "object.h"
#include "delta.h"
@@ -360,24 +361,21 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
struct input_zstream_data {
git_zstream *zstream;
- unsigned char buf[8192];
int status;
};
-static const void *feed_input_zstream(struct odb_write_stream *in_stream,
- unsigned long *readlen)
+static ssize_t feed_input_zstream(struct odb_write_stream *in_stream,
+ unsigned char *buf, size_t buf_len)
{
struct input_zstream_data *data = in_stream->data;
git_zstream *zstream = data->zstream;
void *in = fill(1);
- if (in_stream->is_finished) {
- *readlen = 0;
- return NULL;
- }
+ if (in_stream->is_finished)
+ return 0;
- zstream->next_out = data->buf;
- zstream->avail_out = sizeof(data->buf);
+ zstream->next_out = buf;
+ zstream->avail_out = buf_len;
zstream->next_in = in;
zstream->avail_in = len;
@@ -385,9 +383,7 @@ static const void *feed_input_zstream(struct odb_write_stream *in_stream,
in_stream->is_finished = data->status != Z_OK;
use(len - zstream->avail_in);
- *readlen = sizeof(data->buf) - zstream->avail_out;
-
- return data->buf;
+ return buf_len - zstream->avail_out;
}
static void stream_blob(unsigned long size, unsigned nr)
diff --git a/object-file.c b/object-file.c
index bfbb632cf8..a1afca23c5 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1066,6 +1066,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
struct git_hash_ctx c, compat_c;
struct strbuf tmp_file = STRBUF_INIT;
struct strbuf filename = STRBUF_INIT;
+ unsigned char buf[8192];
int dirlen;
char hdr[MAX_HEADER_LEN];
int hdrlen;
@@ -1098,9 +1099,17 @@ int odb_source_loose_write_stream(struct odb_source *source,
unsigned char *in0 = stream.next_in;
if (!stream.avail_in && !in_stream->is_finished) {
- const void *in = in_stream->read(in_stream, &stream.avail_in);
- stream.next_in = (void *)in;
- in0 = (unsigned char *)in;
+ ssize_t read_len = odb_write_stream_read(in_stream, buf,
+ sizeof(buf));
+ if (read_len < 0) {
+ close(fd);
+ err = -1;
+ goto cleanup;
+ }
+
+ stream.avail_in = read_len;
+ stream.next_in = buf;
+ in0 = buf;
/* All data has been read. */
if (in_stream->is_finished)
flush = 1;
diff --git a/odb.h b/odb.h
index ec5367b13e..6faeaa0589 100644
--- a/odb.h
+++ b/odb.h
@@ -529,11 +529,7 @@ static inline int odb_write_object(struct object_database *odb,
return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0);
}
-struct odb_write_stream {
- const void *(*read)(struct odb_write_stream *, unsigned long *len);
- void *data;
- int is_finished;
-};
+struct odb_write_stream;
int odb_write_object_stream(struct object_database *odb,
struct odb_write_stream *stream, size_t len,
diff --git a/odb/streaming.c b/odb/streaming.c
index 5927a12954..a68dd2cbe3 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -232,6 +232,11 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
return st;
}
+ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
+{
+ return st->read(st, buf, sz);
+}
+
int odb_stream_blob_to_fd(struct object_database *odb,
int fd,
const struct object_id *oid,
diff --git a/odb/streaming.h b/odb/streaming.h
index c7861f7e13..65ced911fe 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -47,6 +47,24 @@ int odb_read_stream_close(struct odb_read_stream *stream);
*/
ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);
+/*
+ * A stream that provides an object to be written to the object database without
+ * loading all of it into memory.
+ */
+struct odb_write_stream {
+ ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t);
+ void *data;
+ int is_finished;
+};
+
+/*
+ * Read data from the stream into the buffer. Returns 0 when finished and the
+ * number of bytes read on success. Returns a negative error code in case
+ * reading from the stream fails.
+ */
+ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
+ size_t len);
+
/*
* Look up the object by its ID and write the full contents to the file
* descriptor. The object must be a blob, or the function will fail. When
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v4 4/7] object-file: remove flags from transaction packfile writes
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
` (2 preceding siblings ...)
2026-05-14 18:37 ` [PATCH v4 3/7] odb: update `struct odb_write_stream` read() callback Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
` (3 subsequent siblings)
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
The `index_blob_packfile_transaction()` function handles streaming a
blob from an fd to compute its object ID and conditionally writes the
object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A
subsequent commit will make these packfile object writes part of the
transaction interface. Consequently, having the object write be
conditional on this flag is a bit awkward.
In preparation for this change, introduce a dedicated
`hash_blob_stream()` helper that only computes the OID from a `struct
odb_write_stream`. This is invoked by `index_fd()` instead when the
INDEX_WRITE_OBJECT is not set. The object write performed via
`index_blob_packfile_transaction()` is made unconditional accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 132 +++++++++++++++++++++++++++++-------------------
odb/streaming.c | 46 +++++++++++++++++
odb/streaming.h | 12 +++++
3 files changed, 138 insertions(+), 52 deletions(-)
diff --git a/object-file.c b/object-file.c
index a1afca23c5..a59030911f 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1397,11 +1397,10 @@ static int already_written(struct odb_transaction_files *transaction,
}
/* Lazily create backing packfile for the state */
-static void prepare_packfile_transaction(struct odb_transaction_files *transaction,
- unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction_files *transaction)
{
struct transaction_packfile *state = &transaction->packfile;
- if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+ if (state->f)
return;
state->f = create_tmp_packfile(transaction->base.source->odb->repo,
@@ -1414,6 +1413,39 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
die_errno("unable to write pack header");
}
+static int hash_blob_stream(struct odb_write_stream *stream,
+ const struct git_hash_algo *hash_algo,
+ struct object_id *result_oid, size_t size)
+{
+ unsigned char buf[16384];
+ struct git_hash_ctx ctx;
+ unsigned header_len;
+ size_t bytes_hashed = 0;
+
+ header_len = format_object_header((char *)buf, sizeof(buf),
+ OBJ_BLOB, size);
+ hash_algo->init_fn(&ctx);
+ git_hash_update(&ctx, buf, header_len);
+
+ while (!stream->is_finished) {
+ ssize_t read_result = odb_write_stream_read(stream, buf,
+ sizeof(buf));
+
+ if (read_result < 0)
+ return -1;
+
+ git_hash_update(&ctx, buf, read_result);
+ bytes_hashed += read_result;
+ }
+
+ if (bytes_hashed != size)
+ return -1;
+
+ git_hash_final_oid(result_oid, &ctx);
+
+ return 0;
+}
+
/*
* Read the contents from fd for size bytes, streaming it to the
* packfile in state while updating the hash in ctx. Signal a failure
@@ -1431,15 +1463,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti
*/
static int stream_blob_to_pack(struct transaction_packfile *state,
struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path,
- unsigned flags)
+ int fd, size_t size, const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- int write_object = (flags & INDEX_WRITE_OBJECT);
off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1474,20 +1504,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
status = git_deflate(&s, size ? 0 : Z_FINISH);
if (!s.avail_out || status == Z_STREAM_END) {
- if (write_object) {
- size_t written = s.next_out - obuf;
-
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
- hashwrite(state->f, obuf, written);
- state->offset += written;
+ size_t written = s.next_out - obuf;
+
+ /* would we bust the size limit? */
+ if (state->nr_written &&
+ pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + written) {
+ git_deflate_abort(&s);
+ return -1;
}
+
+ hashwrite(state->f, obuf, written);
+ state->offset += written;
s.next_out = obuf;
s.avail_out = sizeof(obuf);
}
@@ -1575,8 +1603,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
*/
static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
struct object_id *result_oid, int fd,
- size_t size, const char *path,
- unsigned flags)
+ size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
@@ -1584,7 +1611,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
- struct pack_idx_entry *idx = NULL;
+ struct pack_idx_entry *idx;
seekback = lseek(fd, 0, SEEK_CUR);
if (seekback == (off_t)-1)
@@ -1595,33 +1622,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
- /* Note: idx is non-NULL when we are writing */
- if ((flags & INDEX_WRITE_OBJECT) != 0) {
- CALLOC_ARRAY(idx, 1);
-
- prepare_packfile_transaction(transaction, flags);
- hashfile_checkpoint_init(state->f, &checkpoint);
- }
+ CALLOC_ARRAY(idx, 1);
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint_init(state->f, &checkpoint);
already_hashed_to = 0;
while (1) {
- prepare_packfile_transaction(transaction, flags);
- if (idx) {
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
- }
+ prepare_packfile_transaction(transaction);
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+
if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path, flags))
+ fd, size, path))
break;
/*
* Writing this object to the current pack will make
* it too big; we need to truncate it, start a new
* pack, and write into it.
*/
- if (!idx)
- BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
flush_packfile_transaction(transaction);
@@ -1629,8 +1649,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
return error("cannot seek back");
}
git_hash_final_oid(result_oid, &ctx);
- if (!idx)
- return 0;
idx->crc32 = crc32_end(state->f);
if (already_written(transaction, result_oid)) {
@@ -1668,18 +1686,28 @@ int index_fd(struct index_state *istate, struct object_id *oid,
ret = index_core(istate, oid, fd, xsize_t(st->st_size),
type, path, flags);
} else {
- struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size),
- path, flags);
- odb_transaction_commit(transaction);
+ struct odb_write_stream stream;
+ odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size));
+
+ if (flags & INDEX_WRITE_OBJECT) {
+ struct object_database *odb = the_repository->objects;
+ struct odb_transaction_files *files_transaction;
+ struct odb_transaction *transaction;
+
+ transaction = odb_transaction_begin(odb);
+ files_transaction = container_of(odb->transaction,
+ struct odb_transaction_files,
+ base);
+ ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+ xsize_t(st->st_size), path);
+ odb_transaction_commit(transaction);
+ } else {
+ ret = hash_blob_stream(&stream,
+ the_repository->hash_algo, oid,
+ xsize_t(st->st_size));
+ }
+
+ odb_write_stream_release(&stream);
}
close(fd);
diff --git a/odb/streaming.c b/odb/streaming.c
index a68dd2cbe3..20531e864c 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -237,6 +237,11 @@ ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz)
return st->read(st, buf, sz);
}
+void odb_write_stream_release(struct odb_write_stream *st)
+{
+ free(st->data);
+}
+
int odb_stream_blob_to_fd(struct object_database *odb,
int fd,
const struct object_id *oid,
@@ -292,3 +297,44 @@ int odb_stream_blob_to_fd(struct object_database *odb,
odb_read_stream_close(st);
return result;
}
+
+struct read_object_fd_data {
+ int fd;
+ size_t remaining;
+};
+
+static ssize_t read_object_fd(struct odb_write_stream *stream,
+ unsigned char *buf, size_t len)
+{
+ struct read_object_fd_data *data = stream->data;
+ ssize_t read_result;
+ size_t count;
+
+ if (stream->is_finished)
+ return 0;
+
+ count = data->remaining < len ? data->remaining : len;
+ read_result = read_in_full(data->fd, buf, count);
+ if (read_result < 0 || (size_t)read_result != count)
+ return -1;
+
+ data->remaining -= count;
+ if (!data->remaining)
+ stream->is_finished = 1;
+
+ return read_result;
+}
+
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size)
+{
+ struct read_object_fd_data *data;
+
+ CALLOC_ARRAY(data, 1);
+ data->fd = fd;
+ data->remaining = size;
+
+ stream->data = data;
+ stream->read = read_object_fd;
+ stream->is_finished = 0;
+}
diff --git a/odb/streaming.h b/odb/streaming.h
index 65ced911fe..2a8cac19a4 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -5,6 +5,7 @@
#define STREAMING_H 1
#include "object.h"
+#include "odb.h"
struct object_database;
struct odb_read_stream;
@@ -65,6 +66,11 @@ struct odb_write_stream {
ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf,
size_t len);
+/*
+ * Releases memory allocated for underlying stream data.
+ */
+void odb_write_stream_release(struct odb_write_stream *stream);
+
/*
* Look up the object by its ID and write the full contents to the file
* descriptor. The object must be a blob, or the function will fail. When
@@ -82,4 +88,10 @@ int odb_stream_blob_to_fd(struct object_database *odb,
struct stream_filter *filter,
int can_seek);
+/*
+ * Sets up an ODB write stream that reads from an fd.
+ */
+void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd,
+ size_t size);
+
#endif /* STREAMING_H */
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
` (3 preceding siblings ...)
2026-05-14 18:37 ` [PATCH v4 4/7] object-file: remove flags from transaction packfile writes Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
` (2 subsequent siblings)
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
In certain scenarios, Git handles writing blobs that exceed
"core.bigFileThreshold" differently by streaming the object directly
into a packfile. When there is an active ODB transaction, these blobs
are streamed to the same packfile instead of using a separate packfile
for each. If "pack.packSizeLimit" is configured and streaming another
object causes the packfile to exceed the configured limit, the packfile
is truncated back to the previous object and the object write is
restarted in a new packfile.
This works fine, but requires the fd being read from to save a
checkpoint so it becomes possible to rewind the input source via seeking
back to a known offset at the beginning. In a subsequent commit, blob
streaming is converted to use `struct odb_write_stream` as a more
generic input source instead of an fd which doesn't provide a mechanism
for rewinding.
For this use case though, rewinding the fd is not strictly necessary
because the inflated size of the object is known and can be used to
approximate whether writing the object would cause the packfile to
exceed the configured limit prior to writing anything. These blobs
written to the packfile are never deltified thus the size difference
between what is written versus the inflated size is due to zlib
compression. While this does prevent packfiles from being filled to the
potential maximum is some cases, it should be good enough and still
prevents the packfile from exceeding any configured limit.
Use the inflated blob size to determine whether writing an object to a
packfile will exceed the configured "pack.packSizeLimit".
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 86 +++++++++++++++------------------------------------
1 file changed, 25 insertions(+), 61 deletions(-)
diff --git a/object-file.c b/object-file.c
index a59030911f..6d7afdb723 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1448,29 +1448,17 @@ static int hash_blob_stream(struct odb_write_stream *stream,
/*
* Read the contents from fd for size bytes, streaming it to the
- * packfile in state while updating the hash in ctx. Signal a failure
- * by returning a negative value when the resulting pack would exceed
- * the pack size limit and this is not the first object in the pack,
- * so that the caller can discard what we wrote from the current pack
- * by truncating it and opening a new one. The caller will then call
- * us again after rewinding the input fd.
- *
- * The already_hashed_to pointer is kept untouched by the caller to
- * make sure we do not hash the same byte when we are called
- * again. This way, the caller does not have to checkpoint its hash
- * status before calling us just in case we ask it to call us again
- * with a new pack.
+ * packfile in state while updating the hash in ctx.
*/
-static int stream_blob_to_pack(struct transaction_packfile *state,
- struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path)
+static void stream_blob_to_pack(struct transaction_packfile *state,
+ struct git_hash_ctx *ctx, int fd, size_t size,
+ const char *path)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
- off_t offset = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1487,15 +1475,9 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
if ((size_t)read_result != rsize)
die("failed to read %u bytes from '%s'",
(unsigned)rsize, path);
- offset += rsize;
- if (*already_hashed_to < offset) {
- size_t hsize = offset - *already_hashed_to;
- if (rsize < hsize)
- hsize = rsize;
- if (hsize)
- git_hash_update(ctx, ibuf, hsize);
- *already_hashed_to = offset;
- }
+
+ git_hash_update(ctx, ibuf, rsize);
+
s.next_in = ibuf;
s.avail_in = rsize;
size -= rsize;
@@ -1506,14 +1488,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
if (!s.avail_out || status == Z_STREAM_END) {
size_t written = s.next_out - obuf;
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
hashwrite(state->f, obuf, written);
state->offset += written;
s.next_out = obuf;
@@ -1530,7 +1504,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state,
}
}
git_deflate_end(&s);
- return 0;
}
static void flush_packfile_transaction(struct odb_transaction_files *transaction)
@@ -1606,48 +1579,39 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
size_t size, const char *path)
{
struct transaction_packfile *state = &transaction->packfile;
- off_t seekback, already_hashed_to;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint;
struct pack_idx_entry *idx;
- seekback = lseek(fd, 0, SEEK_CUR);
- if (seekback == (off_t)-1)
- return error("cannot find the current offset");
-
header_len = format_object_header((char *)obuf, sizeof(obuf),
OBJ_BLOB, size);
transaction->base.source->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
+ /*
+ * If writing another object to the packfile could result in it
+ * exceeding the configured size limit, flush the current packfile
+ * transaction.
+ *
+ * Note that this uses the inflated object size as an approximation.
+ * Blob objects written in this manner are not delta-compressed, so
+ * the difference between the inflated and on-disk size is limited
+ * to zlib compression and is sufficient for this check.
+ */
+ if (state->nr_written && pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + size)
+ flush_packfile_transaction(transaction);
+
CALLOC_ARRAY(idx, 1);
prepare_packfile_transaction(transaction);
hashfile_checkpoint_init(state->f, &checkpoint);
- already_hashed_to = 0;
-
- while (1) {
- prepare_packfile_transaction(transaction);
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
-
- if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path))
- break;
- /*
- * Writing this object to the current pack will make
- * it too big; we need to truncate it, start a new
- * pack, and write into it.
- */
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- flush_packfile_transaction(transaction);
- if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
- return error("cannot seek back");
- }
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+ stream_blob_to_pack(state, &ctx, fd, size, path);
git_hash_final_oid(result_oid, &ctx);
idx->crc32 = crc32_end(state->f);
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
` (4 preceding siblings ...)
2026-05-14 18:37 ` [PATCH v4 5/7] object-file: avoid fd seekback by checking object size upfront Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-14 18:37 ` [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
2026-05-15 3:56 ` [PATCH v4 0/7] odb: add write operation to ODB transaction interface Jeff King
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
The `index_blob_packfile_transaction()` function streams blob data
directly from an fd. This makes it difficult to reuse as part of a
generic transactional object writing interface.
Refactor the packfile write path to operate on a `struct
odb_write_stream`, allowing callers to supply data from arbitrary
sources.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 56 +++++++++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 26 deletions(-)
diff --git a/object-file.c b/object-file.c
index 6d7afdb723..0d492e6962 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1447,18 +1447,19 @@ static int hash_blob_stream(struct odb_write_stream *stream,
}
/*
- * Read the contents from fd for size bytes, streaming it to the
+ * Read the contents from the stream provided, streaming it to the
* packfile in state while updating the hash in ctx.
*/
static void stream_blob_to_pack(struct transaction_packfile *state,
- struct git_hash_ctx *ctx, int fd, size_t size,
- const char *path)
+ struct git_hash_ctx *ctx, size_t size,
+ struct odb_write_stream *stream)
{
git_zstream s;
unsigned char ibuf[16384];
unsigned char obuf[16384];
unsigned hdrlen;
int status = Z_OK;
+ size_t bytes_read = 0;
git_deflate_init(&s, pack_compression_level);
@@ -1467,23 +1468,21 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
s.avail_out = sizeof(obuf) - hdrlen;
while (status != Z_STREAM_END) {
- if (size && !s.avail_in) {
- size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
- ssize_t read_result = read_in_full(fd, ibuf, rsize);
- if (read_result < 0)
- die_errno("failed to read from '%s'", path);
- if ((size_t)read_result != rsize)
- die("failed to read %u bytes from '%s'",
- (unsigned)rsize, path);
+ if (!stream->is_finished && !s.avail_in) {
+ ssize_t rsize = odb_write_stream_read(stream, ibuf,
+ sizeof(ibuf));
+
+ if (rsize < 0)
+ die("failed to read blob data");
git_hash_update(ctx, ibuf, rsize);
s.next_in = ibuf;
s.avail_in = rsize;
- size -= rsize;
+ bytes_read += rsize;
}
- status = git_deflate(&s, size ? 0 : Z_FINISH);
+ status = git_deflate(&s, stream->is_finished ? Z_FINISH : 0);
if (!s.avail_out || status == Z_STREAM_END) {
size_t written = s.next_out - obuf;
@@ -1503,6 +1502,11 @@ static void stream_blob_to_pack(struct transaction_packfile *state,
die("unexpected deflate failure: %d", status);
}
}
+
+ if (bytes_read != size)
+ die("read %" PRIuMAX " bytes of blob data, but expected %" PRIuMAX " bytes",
+ (uintmax_t)bytes_read, (uintmax_t)size);
+
git_deflate_end(&s);
}
@@ -1574,10 +1578,13 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_packfile_transaction(struct odb_transaction_files *transaction,
- struct object_id *result_oid, int fd,
- size_t size, const char *path)
+static int index_blob_packfile_transaction(struct odb_transaction *base,
+ struct odb_write_stream *stream,
+ size_t size, struct object_id *result_oid)
{
+ struct odb_transaction_files *transaction = container_of(base,
+ struct odb_transaction_files,
+ base);
struct transaction_packfile *state = &transaction->packfile;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
@@ -1611,7 +1618,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
crc32_begin(state->f);
- stream_blob_to_pack(state, &ctx, fd, size, path);
+ stream_blob_to_pack(state, &ctx, size, stream);
git_hash_final_oid(result_oid, &ctx);
idx->crc32 = crc32_end(state->f);
@@ -1655,15 +1662,12 @@ int index_fd(struct index_state *istate, struct object_id *oid,
if (flags & INDEX_WRITE_OBJECT) {
struct object_database *odb = the_repository->objects;
- struct odb_transaction_files *files_transaction;
- struct odb_transaction *transaction;
-
- transaction = odb_transaction_begin(odb);
- files_transaction = container_of(odb->transaction,
- struct odb_transaction_files,
- base);
- ret = index_blob_packfile_transaction(files_transaction, oid, fd,
- xsize_t(st->st_size), path);
+ struct odb_transaction *transaction = odb_transaction_begin(odb);
+
+ ret = index_blob_packfile_transaction(odb->transaction,
+ &stream,
+ xsize_t(st->st_size),
+ oid);
odb_transaction_commit(transaction);
} else {
ret = hash_blob_stream(&stream,
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
` (5 preceding siblings ...)
2026-05-14 18:37 ` [PATCH v4 6/7] object-file: generalize packfile writes to use odb_write_stream Justin Tobler
@ 2026-05-14 18:37 ` Justin Tobler
2026-05-15 3:56 ` [PATCH v4 0/7] odb: add write operation to ODB transaction interface Jeff King
7 siblings, 0 replies; 57+ messages in thread
From: Justin Tobler @ 2026-05-14 18:37 UTC (permalink / raw)
To: git; +Cc: ps, gitster, peff, Justin Tobler
How an ODB transaction handles writing objects is expected to vary
between implementations. Introduce a new `write_object_stream()`
callback in `struct odb_transaction` to make this function pluggable.
Rename `index_blob_packfile_transaction()` to
`odb_transaction_files_write_object_stream()` and wire it up for use
with `struct odb_transaction_files` accordingly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 16 +++++++++-------
odb/transaction.c | 7 +++++++
odb/transaction.h | 25 ++++++++++++++++++++++---
3 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/object-file.c b/object-file.c
index 0d492e6962..23f665df90 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1578,9 +1578,10 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_packfile_transaction(struct odb_transaction *base,
- struct odb_write_stream *stream,
- size_t size, struct object_id *result_oid)
+static int odb_transaction_files_write_object_stream(struct odb_transaction *base,
+ struct odb_write_stream *stream,
+ size_t size,
+ struct object_id *result_oid)
{
struct odb_transaction_files *transaction = container_of(base,
struct odb_transaction_files,
@@ -1664,10 +1665,10 @@ int index_fd(struct index_state *istate, struct object_id *oid,
struct object_database *odb = the_repository->objects;
struct odb_transaction *transaction = odb_transaction_begin(odb);
- ret = index_blob_packfile_transaction(odb->transaction,
- &stream,
- xsize_t(st->st_size),
- oid);
+ ret = odb_transaction_write_object_stream(odb->transaction,
+ &stream,
+ xsize_t(st->st_size),
+ oid);
odb_transaction_commit(transaction);
} else {
ret = hash_blob_stream(&stream,
@@ -2132,6 +2133,7 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
transaction = xcalloc(1, sizeof(*transaction));
transaction->base.source = source;
transaction->base.commit = odb_transaction_files_commit;
+ transaction->base.write_object_stream = odb_transaction_files_write_object_stream;
return &transaction->base;
}
diff --git a/odb/transaction.c b/odb/transaction.c
index 592ac84075..b16e07aebf 100644
--- a/odb/transaction.c
+++ b/odb/transaction.c
@@ -26,3 +26,10 @@ void odb_transaction_commit(struct odb_transaction *transaction)
transaction->source->odb->transaction = NULL;
free(transaction);
}
+
+int odb_transaction_write_object_stream(struct odb_transaction *transaction,
+ struct odb_write_stream *stream,
+ size_t len, struct object_id *oid)
+{
+ return transaction->write_object_stream(transaction, stream, len, oid);
+}
diff --git a/odb/transaction.h b/odb/transaction.h
index a56e392f21..854fda06f5 100644
--- a/odb/transaction.h
+++ b/odb/transaction.h
@@ -12,14 +12,24 @@
*
* Each ODB source is expected to implement its own transaction handling.
*/
-struct odb_transaction;
-typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
struct odb_transaction {
/* The ODB source the transaction is opened against. */
struct odb_source *source;
/* The ODB source specific callback invoked to commit a transaction. */
- odb_transaction_commit_fn commit;
+ void (*commit)(struct odb_transaction *transaction);
+
+ /*
+ * This callback is expected to write the given object stream into
+ * the ODB transaction. Note that for now, only blobs support streaming.
+ *
+ * The resulting object ID shall be written into the out pointer. The
+ * callback is expected to return 0 on success, a negative error code
+ * otherwise.
+ */
+ int (*write_object_stream)(struct odb_transaction *transaction,
+ struct odb_write_stream *stream, size_t len,
+ struct object_id *oid);
};
/*
@@ -35,4 +45,13 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb);
*/
void odb_transaction_commit(struct odb_transaction *transaction);
+/*
+ * Writes the object in the provided stream into the transaction. The resulting
+ * object ID is written into the out pointer. Returns 0 on success, a negative
+ * error code otherwise.
+ */
+int odb_transaction_write_object_stream(struct odb_transaction *transaction,
+ struct odb_write_stream *stream,
+ size_t len, struct object_id *oid);
+
#endif
--
2.54.0.105.g59ff4886a5
^ permalink raw reply related [flat|nested] 57+ messages in thread* Re: [PATCH v4 0/7] odb: add write operation to ODB transaction interface
2026-05-14 18:37 ` [PATCH v4 " Justin Tobler
` (6 preceding siblings ...)
2026-05-14 18:37 ` [PATCH v4 7/7] odb/transaction: make `write_object_stream()` pluggable Justin Tobler
@ 2026-05-15 3:56 ` Jeff King
7 siblings, 0 replies; 57+ messages in thread
From: Jeff King @ 2026-05-15 3:56 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, ps, gitster
On Thu, May 14, 2026 at 01:37:33PM -0500, Justin Tobler wrote:
> Changes since V3:
> - Fixed leak due to an fd not being closed when exiting prior to
> close_loose_object() being invoked.
> [...]
> 3: 11321ad607 ! 3: d53ad95712 odb: update `struct odb_write_stream` read() callback
> @@ object-file.c: int odb_source_loose_write_stream(struct odb_source *source,
> + ssize_t read_len = odb_write_stream_read(in_stream, buf,
> + sizeof(buf));
> + if (read_len < 0) {
> ++ close(fd);
> + err = -1;
> + goto cleanup;
> + }
This fix looks good to me (and I think is the best way to write it,
given the rest of the function).
I briefly wondered whether callers might care about errno being
preserved, but I couldn't find any indication that they do.
-Peff
^ permalink raw reply [flat|nested] 57+ messages in thread