[PATCH 0/3] bulk-checkin: remove global transaction state

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/3] bulk-checkin: remove global transaction state
@ 2025-08-20 22:55 Justin Tobler
  2025-08-20 22:55 ` [PATCH 1/3] bulk-checkin: introduce object database transaction structure Justin Tobler
                   ` (4 more replies)
  0 siblings, 5 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-20 22:55 UTC (permalink / raw)
  To: git; +Cc: ps, Justin Tobler

Greetings,

The bulk-checkin subsystem provides an interface to write objects to the
object database in a bulk transaction. The state of an ongoing
transaction is stored across several global variables. This series aims
to remove this global transaction state in favor of storing state in in
`struct object_database`. This is done in preparation for a follow-up
change where the goal is to eventually move these transaction interfaces
into "odb.h".

Thanks,
-Justin

Justin Tobler (3):
  bulk-checkin: introduce object database transaction structure
  bulk-checkin: remove global transaction state
  bulk-checkin: wire repository variable

 builtin/add.c            |   5 +-
 builtin/unpack-objects.c |   5 +-
 builtin/update-index.c   |   7 +-
 bulk-checkin.c           | 136 ++++++++++++++++++++++++---------------
 bulk-checkin.h           |  19 ++++--
 cache-tree.c             |   5 +-
 object-file.c            |  12 ++--
 odb.h                    |   8 +++
 read-cache.c             |   5 +-
 9 files changed, 128 insertions(+), 74 deletions(-)


base-commit: c44beea485f0f2feaf460e2ac87fdd5608d63cf0
-- 
2.51.0


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/3] bulk-checkin: introduce object database transaction structure
  2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
@ 2025-08-20 22:55 ` Justin Tobler
  2025-08-20 22:55 ` [PATCH 2/3] bulk-checkin: remove global transaction state Justin Tobler
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-20 22:55 UTC (permalink / raw)
  To: git; +Cc: ps, Justin Tobler

Object database transaction state is stored across several global
variables in the bulk-checkin subsystem. Consolidate this state into a
single `struct odb_transaction` global. In a subsequent commit, the
transactional interfaces will be updated to wire this structure instead
of relying on a global variable.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index b2809ab0398..82a73da79e8 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -19,11 +19,7 @@
 #include "object-file.h"
 #include "odb.h"
 
-static int odb_transaction_nesting;
-
-static struct tmp_objdir *bulk_fsync_objdir;
-
-static struct bulk_checkin_packfile {
+struct bulk_checkin_packfile {
 	char *pack_tmp_name;
 	struct hashfile *f;
 	off_t offset;
@@ -32,7 +28,13 @@ static struct bulk_checkin_packfile {
 	struct pack_idx_entry **written;
 	uint32_t alloc_written;
 	uint32_t nr_written;
-} bulk_checkin_packfile;
+};
+
+static struct odb_transaction {
+	int nesting;
+	struct tmp_objdir *objdir;
+	struct bulk_checkin_packfile packfile;
+} transaction;
 
 static void finish_tmp_packfile(struct strbuf *basename,
 				const char *pack_tmp_name,
@@ -101,7 +103,7 @@ static void flush_batch_fsync(void)
 	struct strbuf temp_path = STRBUF_INIT;
 	struct tempfile *temp;
 
-	if (!bulk_fsync_objdir)
+	if (!transaction.objdir)
 		return;
 
 	/*
@@ -123,8 +125,8 @@ static void flush_batch_fsync(void)
 	 * Make the object files visible in the primary ODB after their data is
 	 * fully durable.
 	 */
-	tmp_objdir_migrate(bulk_fsync_objdir);
-	bulk_fsync_objdir = NULL;
+	tmp_objdir_migrate(transaction.objdir);
+	transaction.objdir = NULL;
 }
 
 static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
@@ -331,12 +333,12 @@ void prepare_loose_object_bulk_checkin(void)
 	 * callers may not know whether any objects will be
 	 * added at the time they call begin_odb_transaction.
 	 */
-	if (!odb_transaction_nesting || bulk_fsync_objdir)
+	if (!transaction.nesting || transaction.objdir)
 		return;
 
-	bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync");
-	if (bulk_fsync_objdir)
-		tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
+	transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	if (transaction.objdir)
+		tmp_objdir_replace_primary_odb(transaction.objdir, 0);
 }
 
 void fsync_loose_object_bulk_checkin(int fd, const char *filename)
@@ -348,7 +350,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	 * before renaming the objects to their final names as part of
 	 * flush_batch_fsync.
 	 */
-	if (!bulk_fsync_objdir ||
+	if (!transaction.objdir ||
 	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
 		if (errno == ENOSYS)
 			warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@@ -360,31 +362,31 @@ int index_blob_bulk_checkin(struct object_id *oid,
 			    int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size,
+	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
 					  path, flags);
-	if (!odb_transaction_nesting)
-		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
+	if (!transaction.nesting)
+		flush_bulk_checkin_packfile(&transaction.packfile);
 	return status;
 }
 
 void begin_odb_transaction(void)
 {
-	odb_transaction_nesting += 1;
+	transaction.nesting += 1;
 }
 
 void flush_odb_transaction(void)
 {
 	flush_batch_fsync();
-	flush_bulk_checkin_packfile(&bulk_checkin_packfile);
+	flush_bulk_checkin_packfile(&transaction.packfile);
 }
 
 void end_odb_transaction(void)
 {
-	odb_transaction_nesting -= 1;
-	if (odb_transaction_nesting < 0)
+	transaction.nesting -= 1;
+	if (transaction.nesting < 0)
 		BUG("Unbalanced ODB transaction nesting");
 
-	if (odb_transaction_nesting)
+	if (transaction.nesting)
 		return;
 
 	flush_odb_transaction();
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/3] bulk-checkin: remove global transaction state
  2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
  2025-08-20 22:55 ` [PATCH 1/3] bulk-checkin: introduce object database transaction structure Justin Tobler
@ 2025-08-20 22:55 ` Justin Tobler
  2025-08-20 22:55 ` [PATCH 3/3] bulk-checkin: wire repository variable Justin Tobler
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-20 22:55 UTC (permalink / raw)
  To: git; +Cc: ps, Justin Tobler

Object database transactions in the bulk-checkin subsystem rely on
global state to track transaction status. Stop relying on global state
and instead store the transaction in the `struct object_database`.
Functions that operate on transactions are updated to now wire
transaction state.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 builtin/add.c            |  5 ++-
 builtin/unpack-objects.c |  5 ++-
 builtin/update-index.c   |  7 ++--
 bulk-checkin.c           | 82 ++++++++++++++++++++++++++--------------
 bulk-checkin.h           | 18 +++++----
 cache-tree.c             |  5 ++-
 object-file.c            | 11 +++---
 odb.h                    |  8 ++++
 read-cache.c             |  5 ++-
 9 files changed, 94 insertions(+), 52 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index 0235854f809..740c7c45817 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -389,6 +389,7 @@ int cmd_add(int argc,
 	char *seen = NULL;
 	char *ps_matched = NULL;
 	struct lock_file lock_file = LOCK_INIT;
+	struct odb_transaction *transaction;
 
 	repo_config(repo, add_config, NULL);
 
@@ -574,7 +575,7 @@ int cmd_add(int argc,
 		string_list_clear(&only_match_skip_worktree, 0);
 	}
 
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(repo->objects);
 
 	ps_matched = xcalloc(pathspec.nr, 1);
 	if (add_renormalize)
@@ -593,7 +594,7 @@ int cmd_add(int argc,
 
 	if (chmod_arg && pathspec.nr)
 		exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 finish:
 	if (write_locked_index(repo->index, &lock_file,
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 7ae7c82b6c0..28124b324d2 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -584,6 +584,7 @@ static void unpack_all(void)
 {
 	int i;
 	unsigned char *hdr = fill(sizeof(struct pack_header));
+	struct odb_transaction *transaction;
 
 	if (get_be32(hdr) != PACK_SIGNATURE)
 		die("bad pack file");
@@ -599,12 +600,12 @@ static void unpack_all(void)
 		progress = start_progress(the_repository,
 					  _("Unpacking objects"), nr_objects);
 	CALLOC_ARRAY(obj_list, nr_objects);
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	for (i = 0; i < nr_objects; i++) {
 		unpack_one(i);
 		display_progress(progress, i + 1);
 	}
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 	stop_progress(&progress);
 
 	if (delta_list)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 2380f3ccd68..2ba2d29c959 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -77,7 +77,7 @@ static void report(const char *fmt, ...)
 	 * objects invisible while a transaction is active, so flush the
 	 * transaction here before reporting a change made by update-index.
 	 */
-	flush_odb_transaction();
+	flush_odb_transaction(the_repository->objects->transaction);
 	va_start(vp, fmt);
 	vprintf(fmt, vp);
 	putchar('\n');
@@ -940,6 +940,7 @@ int cmd_update_index(int argc,
 	strbuf_getline_fn getline_fn;
 	int parseopt_state = PARSE_OPT_UNKNOWN;
 	struct repository *r = the_repository;
+	struct odb_transaction *transaction;
 	struct option options[] = {
 		OPT_BIT('q', NULL, &refresh_args.flags,
 			N_("continue refresh even when index needs update"),
@@ -1130,7 +1131,7 @@ int cmd_update_index(int argc,
 	 * Allow the object layer to optimize adding multiple objects in
 	 * a batch.
 	 */
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	while (ctx.argc) {
 		if (parseopt_state != PARSE_OPT_DONE)
 			parseopt_state = parse_options_step(&ctx, options,
@@ -1213,7 +1214,7 @@ int cmd_update_index(int argc,
 	/*
 	 * By now we have added all of the new objects
 	 */
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 	if (split_index > 0) {
 		if (repo_config_get_split_index(the_repository) == 0)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 82a73da79e8..53a20a2d92f 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -30,11 +30,13 @@ struct bulk_checkin_packfile {
 	uint32_t nr_written;
 };
 
-static struct odb_transaction {
+struct odb_transaction {
+	struct object_database *odb;
+
 	int nesting;
 	struct tmp_objdir *objdir;
 	struct bulk_checkin_packfile packfile;
-} transaction;
+};
 
 static void finish_tmp_packfile(struct strbuf *basename,
 				const char *pack_tmp_name,
@@ -98,12 +100,12 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 /*
  * Cleanup after batch-mode fsync_object_files.
  */
-static void flush_batch_fsync(void)
+static void flush_batch_fsync(struct odb_transaction *transaction)
 {
 	struct strbuf temp_path = STRBUF_INIT;
 	struct tempfile *temp;
 
-	if (!transaction.objdir)
+	if (!transaction->objdir)
 		return;
 
 	/*
@@ -125,8 +127,8 @@ static void flush_batch_fsync(void)
 	 * Make the object files visible in the primary ODB after their data is
 	 * fully durable.
 	 */
-	tmp_objdir_migrate(transaction.objdir);
-	transaction.objdir = NULL;
+	tmp_objdir_migrate(transaction->objdir);
+	transaction->objdir = NULL;
 }
 
 static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
@@ -325,7 +327,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 	return 0;
 }
 
-void prepare_loose_object_bulk_checkin(void)
+void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
 {
 	/*
 	 * We lazily create the temporary object directory
@@ -333,15 +335,16 @@ void prepare_loose_object_bulk_checkin(void)
 	 * callers may not know whether any objects will be
 	 * added at the time they call begin_odb_transaction.
 	 */
-	if (!transaction.nesting || transaction.objdir)
+	if (!transaction || transaction->objdir)
 		return;
 
-	transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync");
-	if (transaction.objdir)
-		tmp_objdir_replace_primary_odb(transaction.objdir, 0);
+	transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	if (transaction->objdir)
+		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
 }
 
-void fsync_loose_object_bulk_checkin(int fd, const char *filename)
+void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+				     int fd, const char *filename)
 {
 	/*
 	 * If we have an active ODB transaction, we issue a call that
@@ -350,7 +353,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	 * before renaming the objects to their final names as part of
 	 * flush_batch_fsync.
 	 */
-	if (!transaction.objdir ||
+	if (!transaction || !transaction->objdir ||
 	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
 		if (errno == ENOSYS)
 			warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@@ -358,36 +361,57 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	}
 }
 
-int index_blob_bulk_checkin(struct object_id *oid,
-			    int fd, size_t size,
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
-					  path, flags);
-	if (!transaction.nesting)
-		flush_bulk_checkin_packfile(&transaction.packfile);
+	int status;
+
+	if (transaction) {
+		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
+					      size, path, flags);
+	} else {
+		struct bulk_checkin_packfile state = { 0 };
+
+		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
+		flush_bulk_checkin_packfile(&state);
+	}
+
 	return status;
 }
 
-void begin_odb_transaction(void)
+struct odb_transaction *begin_odb_transaction(struct object_database *odb)
 {
-	transaction.nesting += 1;
+	if (!odb->transaction) {
+		CALLOC_ARRAY(odb->transaction, 1);
+		odb->transaction->odb = odb;
+	}
+
+	odb->transaction->nesting += 1;
+
+	return odb->transaction;
 }
 
-void flush_odb_transaction(void)
+void flush_odb_transaction(struct odb_transaction *transaction)
 {
-	flush_batch_fsync();
-	flush_bulk_checkin_packfile(&transaction.packfile);
+	if (!transaction)
+		return;
+
+	flush_batch_fsync(transaction);
+	flush_bulk_checkin_packfile(&transaction->packfile);
 }
 
-void end_odb_transaction(void)
+void end_odb_transaction(struct odb_transaction *transaction)
 {
-	transaction.nesting -= 1;
-	if (transaction.nesting < 0)
+	if (!transaction || transaction->nesting == 0)
 		BUG("Unbalanced ODB transaction nesting");
 
-	if (transaction.nesting)
+	transaction->nesting -= 1;
+
+	if (transaction->nesting)
 		return;
 
-	flush_odb_transaction();
+	flush_odb_transaction(transaction);
+	transaction->odb->transaction = NULL;
+	free(transaction);
 }
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 7246ea58dcf..16254ce6a70 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -5,9 +5,13 @@
 #define BULK_CHECKIN_H
 
 #include "object.h"
+#include "odb.h"
 
-void prepare_loose_object_bulk_checkin(void);
-void fsync_loose_object_bulk_checkin(int fd, const char *filename);
+struct odb_transaction;
+
+void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction);
+void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+				     int fd, const char *filename);
 
 /*
  * This creates one packfile per large blob unless bulk-checkin
@@ -24,8 +28,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename);
  * binary blobs, they generally do not want to get any conversion, and
  * callers should avoid this code path when filters are requested.
  */
-int index_blob_bulk_checkin(struct object_id *oid,
-			    int fd, size_t size,
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags);
 
 /*
@@ -35,20 +39,20 @@ int index_blob_bulk_checkin(struct object_id *oid,
  * and objects are only visible after the outermost transaction
  * is complete or the transaction is flushed.
  */
-void begin_odb_transaction(void);
+struct odb_transaction *begin_odb_transaction(struct object_database *odb);
 
 /*
  * Make any objects that are currently part of a pending object
  * database transaction visible. It is valid to call this function
  * even if no transaction is active.
  */
-void flush_odb_transaction(void);
+void flush_odb_transaction(struct odb_transaction *transaction);
 
 /*
  * Tell the object database to make any objects from the
  * current transaction visible if this is the final nested
  * transaction.
  */
-void end_odb_transaction(void);
+void end_odb_transaction(struct odb_transaction *transaction);
 
 #endif
diff --git a/cache-tree.c b/cache-tree.c
index 66ef2becbe0..d225554eedd 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it,
 
 int cache_tree_update(struct index_state *istate, int flags)
 {
+	struct odb_transaction *transaction;
 	int skip, i;
 
 	i = verify_cache(istate, flags);
@@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags)
 
 	trace_performance_enter();
 	trace2_region_enter("cache_tree", "update", the_repository);
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
 		       "", 0, &skip, flags);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 	trace2_region_leave("cache_tree", "update", the_repository);
 	trace_performance_leave("cache_tree_update");
 	if (i < 0)
diff --git a/object-file.c b/object-file.c
index 2bc36ab3ee8..1740aa2b2e3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source,
 		goto out;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		fsync_loose_object_bulk_checkin(fd, filename);
+		fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename);
 	else if (fsync_object_files > 0)
 		fsync_or_die(fd, filename);
 	else
@@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source,
 	static struct strbuf filename = STRBUF_INIT;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_bulk_checkin();
+		prepare_loose_object_bulk_checkin(source->odb->transaction);
 
 	odb_loose_path(source, &filename, oid);
 
@@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source,
 	int hdrlen;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_bulk_checkin();
+		prepare_loose_object_bulk_checkin(source->odb->transaction);
 
 	/* Since oid is not determined, save tmp file to odb path. */
 	strbuf_addf(&filename, "%s/", source->path);
@@ -1263,8 +1263,9 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	else
-		ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path,
-					     flags);
+		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+					      oid, fd, xsize_t(st->st_size),
+					      path, flags);
 	close(fd);
 	return ret;
 }
diff --git a/odb.h b/odb.h
index 3dfc66d75a3..a89b2143909 100644
--- a/odb.h
+++ b/odb.h
@@ -84,6 +84,7 @@ struct odb_source {
 
 struct packed_git;
 struct cached_object_entry;
+struct odb_transaction;
 
 /*
  * The object database encapsulates access to objects in a repository. It
@@ -94,6 +95,13 @@ struct object_database {
 	/* Repository that owns this database. */
 	struct repository *repo;
 
+	/*
+	 * State of current current object database transaction. Only one
+	 * transaction may be pending at a time. Is NULL when no transaction is
+	 * configured.
+	 */
+	struct odb_transaction *transaction;
+
 	/*
 	 * Set of all object directories; the main directory is first (and
 	 * cannot be NULL after initialization). Subsequent directories are
diff --git a/read-cache.c b/read-cache.c
index 06ad74db228..229b8ef11c9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
 		       const struct pathspec *pathspec, char *ps_matched,
 		       int include_sparse, int flags)
 {
+	struct odb_transaction *transaction;
 	struct update_callback_data data;
 	struct rev_info rev;
 
@@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
 	 * This function is invoked from commands other than 'add', which
 	 * may not have their own transaction active.
 	 */
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(repo->objects);
 	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 	release_revisions(&rev);
 	return !!data.add_errors;
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 3/3] bulk-checkin: wire repository variable
  2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
  2025-08-20 22:55 ` [PATCH 1/3] bulk-checkin: introduce object database transaction structure Justin Tobler
  2025-08-20 22:55 ` [PATCH 2/3] bulk-checkin: remove global transaction state Justin Tobler
@ 2025-08-20 22:55 ` Justin Tobler
  2025-08-21  0:15   ` Junio C Hamano
  2025-08-21  0:00 ` [PATCH 0/3] bulk-checkin: remove global transaction state Junio C Hamano
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
  4 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-20 22:55 UTC (permalink / raw)
  To: git; +Cc: ps, Justin Tobler

The bulk-checkin subsystem depends on `the_repository`. Adapt functions
and call sites to wire the repository variable where needed. The
`USE_THE_REPOSITORY_VARIBALE` is still required as the
`pack_compression_level` and `pack_size_limit_cfg` globals are still
used.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 58 ++++++++++++++++++++++++++++----------------------
 bulk-checkin.h |  3 ++-
 object-file.c  |  3 ++-
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 53a20a2d92f..a1185883837 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -38,7 +38,8 @@ struct odb_transaction {
 	struct bulk_checkin_packfile packfile;
 };
 
-static void finish_tmp_packfile(struct strbuf *basename,
+static void finish_tmp_packfile(struct repository *repo,
+				struct strbuf *basename,
 				const char *pack_tmp_name,
 				struct pack_idx_entry **written_list,
 				uint32_t nr_written,
@@ -47,15 +48,16 @@ static void finish_tmp_packfile(struct strbuf *basename,
 {
 	char *idx_tmp_name = NULL;
 
-	stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
+	stage_tmp_packfiles(repo, basename, pack_tmp_name,
 			    written_list, nr_written, NULL, pack_idx_opts, hash,
 			    &idx_tmp_name);
-	rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
+	rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
 
 	free(idx_tmp_name);
 }
 
-static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
+static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state,
+					struct repository *repo)
 {
 	unsigned char hash[GIT_MAX_RAWSZ];
 	struct strbuf packname = STRBUF_INIT;
@@ -73,15 +75,15 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 	} else {
 		int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
-		fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name,
+		fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
 					 state->nr_written, hash,
 					 state->offset);
 		close(fd);
 	}
 
-	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
-		    hash_to_hex(hash));
-	finish_tmp_packfile(&packname, state->pack_tmp_name,
+	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(repo),
+		    hash_to_hex_algop(hash, repo->hash_algo));
+	finish_tmp_packfile(repo, &packname, state->pack_tmp_name,
 			    state->written, state->nr_written,
 			    &state->pack_idx_opts, hash);
 	for (uint32_t i = 0; i < state->nr_written; i++)
@@ -94,7 +96,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 
 	strbuf_release(&packname);
 	/* Make objects we just wrote available to ourselves */
-	reprepare_packed_git(the_repository);
+	reprepare_packed_git(repo);
 }
 
 /*
@@ -117,7 +119,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
 	 * to ensure that the data in each new object file is durable before
 	 * the final name is visible.
 	 */
-	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
+	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
+		    repo_get_object_directory(transaction->odb->repo));
 	temp = xmks_tempfile(temp_path.buf);
 	fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
 	delete_tempfile(&temp);
@@ -131,10 +134,11 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
 	transaction->objdir = NULL;
 }
 
-static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
+static int already_written(struct bulk_checkin_packfile *state,
+			   struct repository *repo, struct object_id *oid)
 {
 	/* The object may already exist in the repository */
-	if (odb_has_object(the_repository->objects, oid,
+	if (odb_has_object(repo->objects, oid,
 			   HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
 		return 1;
 
@@ -240,12 +244,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
 
 /* Lazily create backing packfile for the state */
 static void prepare_to_stream(struct bulk_checkin_packfile *state,
+			      struct repository *repo,
 			      unsigned flags)
 {
 	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
 		return;
 
-	state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
+	state->f = create_tmp_packfile(repo, &state->pack_tmp_name);
 	reset_pack_idx_option(&state->pack_idx_opts);
 
 	/* Pretend we are going to write only one object */
@@ -255,6 +260,7 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state,
 }
 
 static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
+				struct repository *repo,
 				struct object_id *result_oid,
 				int fd, size_t size,
 				const char *path, unsigned flags)
@@ -272,21 +278,21 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 
 	header_len = format_object_header((char *)obuf, sizeof(obuf),
 					  OBJ_BLOB, size);
-	the_hash_algo->init_fn(&ctx);
+	repo->hash_algo->init_fn(&ctx);
 	git_hash_update(&ctx, obuf, header_len);
 
 	/* Note: idx is non-NULL when we are writing */
 	if ((flags & INDEX_WRITE_OBJECT) != 0) {
 		CALLOC_ARRAY(idx, 1);
 
-		prepare_to_stream(state, flags);
+		prepare_to_stream(state, repo, flags);
 		hashfile_checkpoint_init(state->f, &checkpoint);
 	}
 
 	already_hashed_to = 0;
 
 	while (1) {
-		prepare_to_stream(state, flags);
+		prepare_to_stream(state, repo, flags);
 		if (idx) {
 			hashfile_checkpoint(state->f, &checkpoint);
 			idx->offset = state->offset;
@@ -304,7 +310,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 			BUG("should not happen");
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
-		flush_bulk_checkin_packfile(state);
+		flush_bulk_checkin_packfile(state, repo);
 		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
 			return error("cannot seek back");
 	}
@@ -313,7 +319,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 		return 0;
 
 	idx->crc32 = crc32_end(state->f);
-	if (already_written(state, result_oid)) {
+	if (already_written(state, repo, result_oid)) {
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
 		free(idx);
@@ -338,7 +344,7 @@ void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
 	if (!transaction || transaction->objdir)
 		return;
 
-	transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
 	if (transaction->objdir)
 		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
 }
@@ -361,20 +367,21 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
 	}
 }
 
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
+int index_blob_bulk_checkin(struct repository *repo,
+			    struct odb_transaction *transaction,
 			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags)
 {
 	int status;
 
 	if (transaction) {
-		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
-					      size, path, flags);
+		status = deflate_blob_to_pack(&transaction->packfile,
+					      repo, oid, fd, size, path, flags);
 	} else {
 		struct bulk_checkin_packfile state = { 0 };
 
-		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
-		flush_bulk_checkin_packfile(&state);
+		status = deflate_blob_to_pack(&state, repo, oid, fd, size, path, flags);
+		flush_bulk_checkin_packfile(&state, repo);
 	}
 
 	return status;
@@ -398,7 +405,8 @@ void flush_odb_transaction(struct odb_transaction *transaction)
 		return;
 
 	flush_batch_fsync(transaction);
-	flush_bulk_checkin_packfile(&transaction->packfile);
+	flush_bulk_checkin_packfile(&transaction->packfile,
+				    transaction->odb->repo);
 }
 
 void end_odb_transaction(struct odb_transaction *transaction)
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 16254ce6a70..ac8dbf3523f 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -28,7 +28,8 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
  * binary blobs, they generally do not want to get any conversion, and
  * callers should avoid this code path when filters are requested.
  */
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
+int index_blob_bulk_checkin(struct repository *repo,
+			    struct odb_transaction *transaction,
 			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags);
 
diff --git a/object-file.c b/object-file.c
index 1740aa2b2e3..35f33e466c2 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1263,7 +1263,8 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	else
-		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+		ret = index_blob_bulk_checkin(the_repository,
+					      the_repository->objects->transaction,
 					      oid, fd, xsize_t(st->st_size),
 					      path, flags);
 	close(fd);
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] bulk-checkin: remove global transaction state
  2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
                   ` (2 preceding siblings ...)
  2025-08-20 22:55 ` [PATCH 3/3] bulk-checkin: wire repository variable Justin Tobler
@ 2025-08-21  0:00 ` Junio C Hamano
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
  4 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2025-08-21  0:00 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> The bulk-checkin subsystem provides an interface to write objects to the
> object database in a bulk transaction. The state of an ongoing
> transaction is stored across several global variables. This series aims
> to remove this global transaction state in favor of storing state in in
> `struct object_database`. This is done in preparation for a follow-up
> change where the goal is to eventually move these transaction interfaces
> into "odb.h".

Wonderful.  I admit these global variables are left-over dropping of
my making.  Thanks for cleaning after me ;-)


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] bulk-checkin: wire repository variable
  2025-08-20 22:55 ` [PATCH 3/3] bulk-checkin: wire repository variable Justin Tobler
@ 2025-08-21  0:15   ` Junio C Hamano
  2025-08-21 20:26     ` Justin Tobler
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2025-08-21  0:15 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> The bulk-checkin subsystem depends on `the_repository`. Adapt functions
> and call sites to wire the repository variable where needed. The
> `USE_THE_REPOSITORY_VARIBALE` is still required as the
> `pack_compression_level` and `pack_size_limit_cfg` globals are still
> used.

Hmph.

I somehow expected that odb would know what repository it was
instanciated to work with, or in the worst case where in-core odb is
in theory sharable among multiple in-core repositories, at least
begin_odb_transaction() would take <repository, odb> pair and the
transaction would know for which repository the transaction is
working for.

Do we need bulk_checkin_packfile as a separate structure and pass it
around, or would these internal functions be better off passing an
instance of odb_transaction around and learn the repository from
odb->repo?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] bulk-checkin: wire repository variable
  2025-08-21  0:15   ` Junio C Hamano
@ 2025-08-21 20:26     ` Justin Tobler
  2025-08-21 20:32       ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-21 20:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, ps

On 25/08/20 05:15PM, Junio C Hamano wrote:
> I somehow expected that odb would know what repository it was
> instanciated to work with, or in the worst case where in-core odb is
> in theory sharable among multiple in-core repositories, at least
> begin_odb_transaction() would take <repository, odb> pair and the
> transaction would know for which repository the transaction is
> working for.

The `struct odb_transaction` maintains a pointer to its parent `struct
object_database`. This ODB also has a pointer to the `struct
repository`. Thus, from an ongoing transaction we should be able to get
the repository we want.

For `begin_odb_transaction()` we should only have to provide `struct
object_database` and from that the transaction can access the
corresponding repository. In the general cases, this is exactly what we
do, but this doesn't work for `index_blob_bulk_checkin()` and its
accompanying internal functions as they may be invoked from outside of a
transaction.

> Do we need bulk_checkin_packfile as a separate structure and pass it
> around, or would these internal functions be better off passing an
> instance of odb_transaction around and learn the repository from
> odb->repo?

In its current form, the bulk-checkin mechanism for packfiles may be
used outside of transactions. For example when calling
`index_blob_bulk_checkin()` without a transaction, the single object is
written to the packfile moved into the primary ODB. With a transaction,
multiple objects may be written to a single packfile. This whole setup
is rather awkward though.

Thinking about this more, we should probably just require
`index_blob_bulk_checkin()` be provided a transaction. Callers will need
to ensure a transaction is running so that a `struct
bulk_checkin_packfile` gets set up, but this shouldn't be a big deal.
With this we could easily just propagate the transaction for all these
function as you suggested.

I'll do this in the next version. Thanks!

-Justin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] bulk-checkin: wire repository variable
  2025-08-21 20:26     ` Justin Tobler
@ 2025-08-21 20:32       ` Junio C Hamano
  0 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2025-08-21 20:32 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> Thinking about this more, we should probably just require
> `index_blob_bulk_checkin()` be provided a transaction. Callers will need
> to ensure a transaction is running so that a `struct
> bulk_checkin_packfile` gets set up, but this shouldn't be a big deal.

Thanks for thinking this through.  I think reducing the number of
oddball callers-from-sideways leads us to good code hygiene.

> With this we could easily just propagate the transaction for all these
> function as you suggested.
>
> I'll do this in the next version. Thanks!
>
> -Justin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v2 0/4] bulk-checkin: remove global transaction state
  2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
                   ` (3 preceding siblings ...)
  2025-08-21  0:00 ` [PATCH 0/3] bulk-checkin: remove global transaction state Junio C Hamano
@ 2025-08-21 23:22 ` Justin Tobler
  2025-08-21 23:22   ` [PATCH v2 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
                     ` (4 more replies)
  4 siblings, 5 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-21 23:22 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

Greetings,

The bulk-checkin subsystem provides an interface to write objects to the
object database in a bulk transaction. The state of an ongoing
transaction is stored across several global variables. This series aims
to remove this global transaction state in favor of storing state in in
`struct object_database`. This is done in preparation for a follow-up
change where the goal is to eventually move these transaction interfaces
into "odb.h".

Changes since V1:

- `index_blob_bulk_checkin()` now assumes that the caller always
  provides a setup `struct odb_transaction`. Callers are adjusted to
  ensure this.
- Functions in bulk-checkin.c now consistently access the repository
  through the provided `odb_transaction`.

Thanks,
-Justin

Justin Tobler (4):
  bulk-checkin: introduce object database transaction structure
  bulk-checkin: remove global transaction state
  bulk-checkin: require transaction for index_blob_bulk_checkin()
  bulk-checkin: use repository variable from transaction

 builtin/add.c            |   5 +-
 builtin/unpack-objects.c |   5 +-
 builtin/update-index.c   |   7 +-
 bulk-checkin.c           | 153 +++++++++++++++++++++------------------
 bulk-checkin.h           |  25 ++++---
 cache-tree.c             |   5 +-
 object-file.c            |  30 +++++---
 odb.h                    |   8 ++
 read-cache.c             |   5 +-
 9 files changed, 142 insertions(+), 101 deletions(-)

Range-diff against v1:
1:  5c9358e0b03 = 1:  5c9358e0b03 bulk-checkin: introduce object database transaction structure
2:  4a1b80a6baf = 2:  4a1b80a6baf bulk-checkin: remove global transaction state
-:  ----------- > 3:  ce329932fdd bulk-checkin: require transaction for index_blob_bulk_checkin()
3:  2ca78c8d343 ! 4:  08e26647915 bulk-checkin: wire repository variable
    @@ Metadata
     Author: Justin Tobler <jltobler@gmail.com>
     
      ## Commit message ##
    -    bulk-checkin: wire repository variable
    +    bulk-checkin: use repository variable from transaction
     
         The bulk-checkin subsystem depends on `the_repository`. Adapt functions
    -    and call sites to wire the repository variable where needed. The
    -    `USE_THE_REPOSITORY_VARIBALE` is still required as the
    +    and call sites to access the repository through `struct odb_transaction`
    +    instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
         `pack_compression_level` and `pack_size_limit_cfg` globals are still
         used.
     
    @@ bulk-checkin.c: struct odb_transaction {
      };
      
     -static void finish_tmp_packfile(struct strbuf *basename,
    -+static void finish_tmp_packfile(struct repository *repo,
    +-				const char *pack_tmp_name,
    +-				struct pack_idx_entry **written_list,
    +-				uint32_t nr_written,
    +-				struct pack_idx_option *pack_idx_opts,
    ++static void finish_tmp_packfile(struct odb_transaction *transaction,
     +				struct strbuf *basename,
    - 				const char *pack_tmp_name,
    - 				struct pack_idx_entry **written_list,
    - 				uint32_t nr_written,
    -@@ bulk-checkin.c: static void finish_tmp_packfile(struct strbuf *basename,
    + 				unsigned char hash[])
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    ++	struct repository *repo = transaction->odb->repo;
      	char *idx_tmp_name = NULL;
      
     -	stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
    -+	stage_tmp_packfiles(repo, basename, pack_tmp_name,
    - 			    written_list, nr_written, NULL, pack_idx_opts, hash,
    - 			    &idx_tmp_name);
    +-			    written_list, nr_written, NULL, pack_idx_opts, hash,
    +-			    &idx_tmp_name);
     -	rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
    ++	stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
    ++			    state->written, state->nr_written, NULL,
    ++			    &state->pack_idx_opts, hash, &idx_tmp_name);
     +	rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
      
      	free(idx_tmp_name);
      }
      
     -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
    -+static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state,
    -+					struct repository *repo)
    ++static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    ++	struct repository *repo = transaction->odb->repo;
      	unsigned char hash[GIT_MAX_RAWSZ];
      	struct strbuf packname = STRBUF_INIT;
    + 
     @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
      				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
      	} else {
    @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_pack
     -	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
     -		    hash_to_hex(hash));
     -	finish_tmp_packfile(&packname, state->pack_tmp_name,
    -+	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(repo),
    +-			    state->written, state->nr_written,
    +-			    &state->pack_idx_opts, hash);
    ++	strbuf_addf(&packname, "%s/pack/pack-%s.",
    ++		    transaction->odb->sources->path,
     +		    hash_to_hex_algop(hash, repo->hash_algo));
    -+	finish_tmp_packfile(repo, &packname, state->pack_tmp_name,
    - 			    state->written, state->nr_written,
    - 			    &state->pack_idx_opts, hash);
    ++
    ++	finish_tmp_packfile(transaction, &packname, hash);
      	for (uint32_t i = 0; i < state->nr_written; i++)
    + 		free(state->written[i]);
    + 
     @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
      
      	strbuf_release(&packname);
    @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
      	 */
     -	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
     +	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
    -+		    repo_get_object_directory(transaction->odb->repo));
    ++		    transaction->odb->sources->path);
      	temp = xmks_tempfile(temp_path.buf);
      	fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
      	delete_tempfile(&temp);
    @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
      }
      
     -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
    -+static int already_written(struct bulk_checkin_packfile *state,
    -+			   struct repository *repo, struct object_id *oid)
    ++static int already_written(struct odb_transaction *transaction,
    ++			   struct object_id *oid)
      {
      	/* The object may already exist in the repository */
     -	if (odb_has_object(the_repository->objects, oid,
    -+	if (odb_has_object(repo->objects, oid,
    ++	if (odb_has_object(transaction->odb, oid,
      			   HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
      		return 1;
      
    + 	/* Might want to keep the list sorted */
    +-	for (uint32_t i = 0; i < state->nr_written; i++)
    +-		if (oideq(&state->written[i]->oid, oid))
    ++	for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
    ++		if (oideq(&transaction->packfile.written[i]->oid, oid))
    + 			return 1;
    + 
    + 	/* This is a new object we need to keep */
     @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
    + }
      
      /* Lazily create backing packfile for the state */
    - static void prepare_to_stream(struct bulk_checkin_packfile *state,
    -+			      struct repository *repo,
    +-static void prepare_to_stream(struct bulk_checkin_packfile *state,
    ++static void prepare_to_stream(struct odb_transaction *transaction,
      			      unsigned flags)
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
      	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
      		return;
      
     -	state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
    -+	state->f = create_tmp_packfile(repo, &state->pack_tmp_name);
    ++	state->f = create_tmp_packfile(transaction->odb->repo,
    ++				       &state->pack_tmp_name);
      	reset_pack_idx_option(&state->pack_idx_opts);
      
      	/* Pretend we are going to write only one object */
     @@ bulk-checkin.c: static void prepare_to_stream(struct bulk_checkin_packfile *state,
    + 		die_errno("unable to write pack header");
      }
      
    - static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    -+				struct repository *repo,
    - 				struct object_id *result_oid,
    - 				int fd, size_t size,
    - 				const char *path, unsigned flags)
    +-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    +-				struct object_id *result_oid,
    +-				int fd, size_t size,
    +-				const char *path, unsigned flags)
    ++int index_blob_bulk_checkin(struct odb_transaction *transaction,
    ++			    struct object_id *result_oid,
    ++			    int fd, size_t size,
    ++			    const char *path, unsigned flags)
    + {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    + 	off_t seekback, already_hashed_to;
    + 	struct git_hash_ctx ctx;
    + 	unsigned char obuf[16384];
     @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
      
      	header_len = format_object_header((char *)obuf, sizeof(obuf),
      					  OBJ_BLOB, size);
     -	the_hash_algo->init_fn(&ctx);
    -+	repo->hash_algo->init_fn(&ctx);
    ++	transaction->odb->repo->hash_algo->init_fn(&ctx);
      	git_hash_update(&ctx, obuf, header_len);
      
      	/* Note: idx is non-NULL when we are writing */
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      		CALLOC_ARRAY(idx, 1);
      
     -		prepare_to_stream(state, flags);
    -+		prepare_to_stream(state, repo, flags);
    ++		prepare_to_stream(transaction, flags);
      		hashfile_checkpoint_init(state->f, &checkpoint);
      	}
      
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      
      	while (1) {
     -		prepare_to_stream(state, flags);
    -+		prepare_to_stream(state, repo, flags);
    ++		prepare_to_stream(transaction, flags);
      		if (idx) {
      			hashfile_checkpoint(state->f, &checkpoint);
      			idx->offset = state->offset;
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      		hashfile_truncate(state->f, &checkpoint);
      		state->offset = checkpoint.offset;
     -		flush_bulk_checkin_packfile(state);
    -+		flush_bulk_checkin_packfile(state, repo);
    ++		flush_bulk_checkin_packfile(transaction);
      		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
      			return error("cannot seek back");
      	}
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      
      	idx->crc32 = crc32_end(state->f);
     -	if (already_written(state, result_oid)) {
    -+	if (already_written(state, repo, result_oid)) {
    ++	if (already_written(transaction, result_oid)) {
      		hashfile_truncate(state->f, &checkpoint);
      		state->offset = checkpoint.offset;
      		free(idx);
    @@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *tra
      }
      
     -int index_blob_bulk_checkin(struct odb_transaction *transaction,
    -+int index_blob_bulk_checkin(struct repository *repo,
    -+			    struct odb_transaction *transaction,
    - 			    struct object_id *oid, int fd, size_t size,
    - 			    const char *path, unsigned flags)
    +-			    struct object_id *oid, int fd, size_t size,
    +-			    const char *path, unsigned flags)
    +-{
    +-	return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
    +-				    flags);
    +-}
    +-
    + struct odb_transaction *begin_odb_transaction(struct object_database *odb)
      {
    - 	int status;
    - 
    - 	if (transaction) {
    --		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
    --					      size, path, flags);
    -+		status = deflate_blob_to_pack(&transaction->packfile,
    -+					      repo, oid, fd, size, path, flags);
    - 	} else {
    - 		struct bulk_checkin_packfile state = { 0 };
    - 
    --		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
    --		flush_bulk_checkin_packfile(&state);
    -+		status = deflate_blob_to_pack(&state, repo, oid, fd, size, path, flags);
    -+		flush_bulk_checkin_packfile(&state, repo);
    - 	}
    - 
    - 	return status;
    + 	if (!odb->transaction) {
     @@ bulk-checkin.c: void flush_odb_transaction(struct odb_transaction *transaction)
      		return;
      
      	flush_batch_fsync(transaction);
     -	flush_bulk_checkin_packfile(&transaction->packfile);
    -+	flush_bulk_checkin_packfile(&transaction->packfile,
    -+				    transaction->odb->repo);
    ++	flush_bulk_checkin_packfile(transaction);
      }
      
      void end_odb_transaction(struct odb_transaction *transaction)
    -
    - ## bulk-checkin.h ##
    -@@ bulk-checkin.h: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
    -  * binary blobs, they generally do not want to get any conversion, and
    -  * callers should avoid this code path when filters are requested.
    -  */
    --int index_blob_bulk_checkin(struct odb_transaction *transaction,
    -+int index_blob_bulk_checkin(struct repository *repo,
    -+			    struct odb_transaction *transaction,
    - 			    struct object_id *oid, int fd, size_t size,
    - 			    const char *path, unsigned flags);
    - 
    -
    - ## object-file.c ##
    -@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
    - 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
    - 				 type, path, flags);
    - 	else
    --		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
    -+		ret = index_blob_bulk_checkin(the_repository,
    -+					      the_repository->objects->transaction,
    - 					      oid, fd, xsize_t(st->st_size),
    - 					      path, flags);
    - 	close(fd);

base-commit: c44beea485f0f2feaf460e2ac87fdd5608d63cf0
-- 
2.51.0


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v2 1/4] bulk-checkin: introduce object database transaction structure
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
@ 2025-08-21 23:22   ` Justin Tobler
  2025-08-21 23:22   ` [PATCH v2 2/4] bulk-checkin: remove global transaction state Justin Tobler
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-21 23:22 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

Object database transaction state is stored across several global
variables in the bulk-checkin subsystem. Consolidate this state into a
single `struct odb_transaction` global. In a subsequent commit, the
transactional interfaces will be updated to wire this structure instead
of relying on a global variable.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index b2809ab0398..82a73da79e8 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -19,11 +19,7 @@
 #include "object-file.h"
 #include "odb.h"
 
-static int odb_transaction_nesting;
-
-static struct tmp_objdir *bulk_fsync_objdir;
-
-static struct bulk_checkin_packfile {
+struct bulk_checkin_packfile {
 	char *pack_tmp_name;
 	struct hashfile *f;
 	off_t offset;
@@ -32,7 +28,13 @@ static struct bulk_checkin_packfile {
 	struct pack_idx_entry **written;
 	uint32_t alloc_written;
 	uint32_t nr_written;
-} bulk_checkin_packfile;
+};
+
+static struct odb_transaction {
+	int nesting;
+	struct tmp_objdir *objdir;
+	struct bulk_checkin_packfile packfile;
+} transaction;
 
 static void finish_tmp_packfile(struct strbuf *basename,
 				const char *pack_tmp_name,
@@ -101,7 +103,7 @@ static void flush_batch_fsync(void)
 	struct strbuf temp_path = STRBUF_INIT;
 	struct tempfile *temp;
 
-	if (!bulk_fsync_objdir)
+	if (!transaction.objdir)
 		return;
 
 	/*
@@ -123,8 +125,8 @@ static void flush_batch_fsync(void)
 	 * Make the object files visible in the primary ODB after their data is
 	 * fully durable.
 	 */
-	tmp_objdir_migrate(bulk_fsync_objdir);
-	bulk_fsync_objdir = NULL;
+	tmp_objdir_migrate(transaction.objdir);
+	transaction.objdir = NULL;
 }
 
 static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
@@ -331,12 +333,12 @@ void prepare_loose_object_bulk_checkin(void)
 	 * callers may not know whether any objects will be
 	 * added at the time they call begin_odb_transaction.
 	 */
-	if (!odb_transaction_nesting || bulk_fsync_objdir)
+	if (!transaction.nesting || transaction.objdir)
 		return;
 
-	bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync");
-	if (bulk_fsync_objdir)
-		tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
+	transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	if (transaction.objdir)
+		tmp_objdir_replace_primary_odb(transaction.objdir, 0);
 }
 
 void fsync_loose_object_bulk_checkin(int fd, const char *filename)
@@ -348,7 +350,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	 * before renaming the objects to their final names as part of
 	 * flush_batch_fsync.
 	 */
-	if (!bulk_fsync_objdir ||
+	if (!transaction.objdir ||
 	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
 		if (errno == ENOSYS)
 			warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@@ -360,31 +362,31 @@ int index_blob_bulk_checkin(struct object_id *oid,
 			    int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size,
+	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
 					  path, flags);
-	if (!odb_transaction_nesting)
-		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
+	if (!transaction.nesting)
+		flush_bulk_checkin_packfile(&transaction.packfile);
 	return status;
 }
 
 void begin_odb_transaction(void)
 {
-	odb_transaction_nesting += 1;
+	transaction.nesting += 1;
 }
 
 void flush_odb_transaction(void)
 {
 	flush_batch_fsync();
-	flush_bulk_checkin_packfile(&bulk_checkin_packfile);
+	flush_bulk_checkin_packfile(&transaction.packfile);
 }
 
 void end_odb_transaction(void)
 {
-	odb_transaction_nesting -= 1;
-	if (odb_transaction_nesting < 0)
+	transaction.nesting -= 1;
+	if (transaction.nesting < 0)
 		BUG("Unbalanced ODB transaction nesting");
 
-	if (odb_transaction_nesting)
+	if (transaction.nesting)
 		return;
 
 	flush_odb_transaction();
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v2 2/4] bulk-checkin: remove global transaction state
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
  2025-08-21 23:22   ` [PATCH v2 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
@ 2025-08-21 23:22   ` Justin Tobler
  2025-08-22 16:37     ` Junio C Hamano
  2025-08-21 23:22   ` [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-21 23:22 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

Object database transactions in the bulk-checkin subsystem rely on
global state to track transaction status. Stop relying on global state
and instead store the transaction in the `struct object_database`.
Functions that operate on transactions are updated to now wire
transaction state.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 builtin/add.c            |  5 ++-
 builtin/unpack-objects.c |  5 ++-
 builtin/update-index.c   |  7 ++--
 bulk-checkin.c           | 82 ++++++++++++++++++++++++++--------------
 bulk-checkin.h           | 18 +++++----
 cache-tree.c             |  5 ++-
 object-file.c            | 11 +++---
 odb.h                    |  8 ++++
 read-cache.c             |  5 ++-
 9 files changed, 94 insertions(+), 52 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index 0235854f809..740c7c45817 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -389,6 +389,7 @@ int cmd_add(int argc,
 	char *seen = NULL;
 	char *ps_matched = NULL;
 	struct lock_file lock_file = LOCK_INIT;
+	struct odb_transaction *transaction;
 
 	repo_config(repo, add_config, NULL);
 
@@ -574,7 +575,7 @@ int cmd_add(int argc,
 		string_list_clear(&only_match_skip_worktree, 0);
 	}
 
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(repo->objects);
 
 	ps_matched = xcalloc(pathspec.nr, 1);
 	if (add_renormalize)
@@ -593,7 +594,7 @@ int cmd_add(int argc,
 
 	if (chmod_arg && pathspec.nr)
 		exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 finish:
 	if (write_locked_index(repo->index, &lock_file,
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 7ae7c82b6c0..28124b324d2 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -584,6 +584,7 @@ static void unpack_all(void)
 {
 	int i;
 	unsigned char *hdr = fill(sizeof(struct pack_header));
+	struct odb_transaction *transaction;
 
 	if (get_be32(hdr) != PACK_SIGNATURE)
 		die("bad pack file");
@@ -599,12 +600,12 @@ static void unpack_all(void)
 		progress = start_progress(the_repository,
 					  _("Unpacking objects"), nr_objects);
 	CALLOC_ARRAY(obj_list, nr_objects);
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	for (i = 0; i < nr_objects; i++) {
 		unpack_one(i);
 		display_progress(progress, i + 1);
 	}
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 	stop_progress(&progress);
 
 	if (delta_list)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 2380f3ccd68..2ba2d29c959 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -77,7 +77,7 @@ static void report(const char *fmt, ...)
 	 * objects invisible while a transaction is active, so flush the
 	 * transaction here before reporting a change made by update-index.
 	 */
-	flush_odb_transaction();
+	flush_odb_transaction(the_repository->objects->transaction);
 	va_start(vp, fmt);
 	vprintf(fmt, vp);
 	putchar('\n');
@@ -940,6 +940,7 @@ int cmd_update_index(int argc,
 	strbuf_getline_fn getline_fn;
 	int parseopt_state = PARSE_OPT_UNKNOWN;
 	struct repository *r = the_repository;
+	struct odb_transaction *transaction;
 	struct option options[] = {
 		OPT_BIT('q', NULL, &refresh_args.flags,
 			N_("continue refresh even when index needs update"),
@@ -1130,7 +1131,7 @@ int cmd_update_index(int argc,
 	 * Allow the object layer to optimize adding multiple objects in
 	 * a batch.
 	 */
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	while (ctx.argc) {
 		if (parseopt_state != PARSE_OPT_DONE)
 			parseopt_state = parse_options_step(&ctx, options,
@@ -1213,7 +1214,7 @@ int cmd_update_index(int argc,
 	/*
 	 * By now we have added all of the new objects
 	 */
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 	if (split_index > 0) {
 		if (repo_config_get_split_index(the_repository) == 0)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 82a73da79e8..53a20a2d92f 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -30,11 +30,13 @@ struct bulk_checkin_packfile {
 	uint32_t nr_written;
 };
 
-static struct odb_transaction {
+struct odb_transaction {
+	struct object_database *odb;
+
 	int nesting;
 	struct tmp_objdir *objdir;
 	struct bulk_checkin_packfile packfile;
-} transaction;
+};
 
 static void finish_tmp_packfile(struct strbuf *basename,
 				const char *pack_tmp_name,
@@ -98,12 +100,12 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 /*
  * Cleanup after batch-mode fsync_object_files.
  */
-static void flush_batch_fsync(void)
+static void flush_batch_fsync(struct odb_transaction *transaction)
 {
 	struct strbuf temp_path = STRBUF_INIT;
 	struct tempfile *temp;
 
-	if (!transaction.objdir)
+	if (!transaction->objdir)
 		return;
 
 	/*
@@ -125,8 +127,8 @@ static void flush_batch_fsync(void)
 	 * Make the object files visible in the primary ODB after their data is
 	 * fully durable.
 	 */
-	tmp_objdir_migrate(transaction.objdir);
-	transaction.objdir = NULL;
+	tmp_objdir_migrate(transaction->objdir);
+	transaction->objdir = NULL;
 }
 
 static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
@@ -325,7 +327,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 	return 0;
 }
 
-void prepare_loose_object_bulk_checkin(void)
+void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
 {
 	/*
 	 * We lazily create the temporary object directory
@@ -333,15 +335,16 @@ void prepare_loose_object_bulk_checkin(void)
 	 * callers may not know whether any objects will be
 	 * added at the time they call begin_odb_transaction.
 	 */
-	if (!transaction.nesting || transaction.objdir)
+	if (!transaction || transaction->objdir)
 		return;
 
-	transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync");
-	if (transaction.objdir)
-		tmp_objdir_replace_primary_odb(transaction.objdir, 0);
+	transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	if (transaction->objdir)
+		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
 }
 
-void fsync_loose_object_bulk_checkin(int fd, const char *filename)
+void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+				     int fd, const char *filename)
 {
 	/*
 	 * If we have an active ODB transaction, we issue a call that
@@ -350,7 +353,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	 * before renaming the objects to their final names as part of
 	 * flush_batch_fsync.
 	 */
-	if (!transaction.objdir ||
+	if (!transaction || !transaction->objdir ||
 	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
 		if (errno == ENOSYS)
 			warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@@ -358,36 +361,57 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	}
 }
 
-int index_blob_bulk_checkin(struct object_id *oid,
-			    int fd, size_t size,
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
-					  path, flags);
-	if (!transaction.nesting)
-		flush_bulk_checkin_packfile(&transaction.packfile);
+	int status;
+
+	if (transaction) {
+		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
+					      size, path, flags);
+	} else {
+		struct bulk_checkin_packfile state = { 0 };
+
+		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
+		flush_bulk_checkin_packfile(&state);
+	}
+
 	return status;
 }
 
-void begin_odb_transaction(void)
+struct odb_transaction *begin_odb_transaction(struct object_database *odb)
 {
-	transaction.nesting += 1;
+	if (!odb->transaction) {
+		CALLOC_ARRAY(odb->transaction, 1);
+		odb->transaction->odb = odb;
+	}
+
+	odb->transaction->nesting += 1;
+
+	return odb->transaction;
 }
 
-void flush_odb_transaction(void)
+void flush_odb_transaction(struct odb_transaction *transaction)
 {
-	flush_batch_fsync();
-	flush_bulk_checkin_packfile(&transaction.packfile);
+	if (!transaction)
+		return;
+
+	flush_batch_fsync(transaction);
+	flush_bulk_checkin_packfile(&transaction->packfile);
 }
 
-void end_odb_transaction(void)
+void end_odb_transaction(struct odb_transaction *transaction)
 {
-	transaction.nesting -= 1;
-	if (transaction.nesting < 0)
+	if (!transaction || transaction->nesting == 0)
 		BUG("Unbalanced ODB transaction nesting");
 
-	if (transaction.nesting)
+	transaction->nesting -= 1;
+
+	if (transaction->nesting)
 		return;
 
-	flush_odb_transaction();
+	flush_odb_transaction(transaction);
+	transaction->odb->transaction = NULL;
+	free(transaction);
 }
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 7246ea58dcf..16254ce6a70 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -5,9 +5,13 @@
 #define BULK_CHECKIN_H
 
 #include "object.h"
+#include "odb.h"
 
-void prepare_loose_object_bulk_checkin(void);
-void fsync_loose_object_bulk_checkin(int fd, const char *filename);
+struct odb_transaction;
+
+void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction);
+void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+				     int fd, const char *filename);
 
 /*
  * This creates one packfile per large blob unless bulk-checkin
@@ -24,8 +28,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename);
  * binary blobs, they generally do not want to get any conversion, and
  * callers should avoid this code path when filters are requested.
  */
-int index_blob_bulk_checkin(struct object_id *oid,
-			    int fd, size_t size,
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags);
 
 /*
@@ -35,20 +39,20 @@ int index_blob_bulk_checkin(struct object_id *oid,
  * and objects are only visible after the outermost transaction
  * is complete or the transaction is flushed.
  */
-void begin_odb_transaction(void);
+struct odb_transaction *begin_odb_transaction(struct object_database *odb);
 
 /*
  * Make any objects that are currently part of a pending object
  * database transaction visible. It is valid to call this function
  * even if no transaction is active.
  */
-void flush_odb_transaction(void);
+void flush_odb_transaction(struct odb_transaction *transaction);
 
 /*
  * Tell the object database to make any objects from the
  * current transaction visible if this is the final nested
  * transaction.
  */
-void end_odb_transaction(void);
+void end_odb_transaction(struct odb_transaction *transaction);
 
 #endif
diff --git a/cache-tree.c b/cache-tree.c
index 66ef2becbe0..d225554eedd 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it,
 
 int cache_tree_update(struct index_state *istate, int flags)
 {
+	struct odb_transaction *transaction;
 	int skip, i;
 
 	i = verify_cache(istate, flags);
@@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags)
 
 	trace_performance_enter();
 	trace2_region_enter("cache_tree", "update", the_repository);
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
 		       "", 0, &skip, flags);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 	trace2_region_leave("cache_tree", "update", the_repository);
 	trace_performance_leave("cache_tree_update");
 	if (i < 0)
diff --git a/object-file.c b/object-file.c
index 2bc36ab3ee8..1740aa2b2e3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source,
 		goto out;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		fsync_loose_object_bulk_checkin(fd, filename);
+		fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename);
 	else if (fsync_object_files > 0)
 		fsync_or_die(fd, filename);
 	else
@@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source,
 	static struct strbuf filename = STRBUF_INIT;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_bulk_checkin();
+		prepare_loose_object_bulk_checkin(source->odb->transaction);
 
 	odb_loose_path(source, &filename, oid);
 
@@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source,
 	int hdrlen;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_bulk_checkin();
+		prepare_loose_object_bulk_checkin(source->odb->transaction);
 
 	/* Since oid is not determined, save tmp file to odb path. */
 	strbuf_addf(&filename, "%s/", source->path);
@@ -1263,8 +1263,9 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	else
-		ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path,
-					     flags);
+		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+					      oid, fd, xsize_t(st->st_size),
+					      path, flags);
 	close(fd);
 	return ret;
 }
diff --git a/odb.h b/odb.h
index 3dfc66d75a3..a89b2143909 100644
--- a/odb.h
+++ b/odb.h
@@ -84,6 +84,7 @@ struct odb_source {
 
 struct packed_git;
 struct cached_object_entry;
+struct odb_transaction;
 
 /*
  * The object database encapsulates access to objects in a repository. It
@@ -94,6 +95,13 @@ struct object_database {
 	/* Repository that owns this database. */
 	struct repository *repo;
 
+	/*
+	 * State of current current object database transaction. Only one
+	 * transaction may be pending at a time. Is NULL when no transaction is
+	 * configured.
+	 */
+	struct odb_transaction *transaction;
+
 	/*
 	 * Set of all object directories; the main directory is first (and
 	 * cannot be NULL after initialization). Subsequent directories are
diff --git a/read-cache.c b/read-cache.c
index 06ad74db228..229b8ef11c9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
 		       const struct pathspec *pathspec, char *ps_matched,
 		       int include_sparse, int flags)
 {
+	struct odb_transaction *transaction;
 	struct update_callback_data data;
 	struct rev_info rev;
 
@@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
 	 * This function is invoked from commands other than 'add', which
 	 * may not have their own transaction active.
 	 */
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(repo->objects);
 	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 	release_revisions(&rev);
 	return !!data.add_errors;
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin()
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
  2025-08-21 23:22   ` [PATCH v2 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
  2025-08-21 23:22   ` [PATCH v2 2/4] bulk-checkin: remove global transaction state Justin Tobler
@ 2025-08-21 23:22   ` Justin Tobler
  2025-08-22 16:49     ` Junio C Hamano
  2025-08-21 23:22   ` [PATCH v2 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
  4 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-21 23:22 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

The bulk-checkin subsystem provides a mechanism to write blobs directly
to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing
transaction when invoked, objects written via this function are stored
in the same packfile. The packfile is not flushed until the transaction
itself is flushed. If there is no transaction, the single object is
written to a packfile and immediately flushed. This complicates
`index_blob_bulk_checkin()` as it cannot reliably use the provided
transaction to get the associated repository.

Update `index_blob_bulk_checkin()` to assume that a valid transaction is
always provided. Callers are now expected to ensure a transaction is set
up beforehand. The single call site in `object-file.c:index_fd()` is
updated accordingly. Due to how `{begin,end}_odb_transaction()` handles
nested transactions, a new transaction is only created and committed if
there is not already an ongoing transaction.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 15 ++-------------
 bulk-checkin.h |  7 +++++--
 object-file.c  | 21 ++++++++++++++-------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 53a20a2d92f..0e3747640b9 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -365,19 +365,8 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
 			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status;
-
-	if (transaction) {
-		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
-					      size, path, flags);
-	} else {
-		struct bulk_checkin_packfile state = { 0 };
-
-		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
-		flush_bulk_checkin_packfile(&state);
-	}
-
-	return status;
+	return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
+				    flags);
 }
 
 struct odb_transaction *begin_odb_transaction(struct object_database *odb)
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 16254ce6a70..ac8887f476b 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
 				     int fd, const char *filename);
 
 /*
- * This creates one packfile per large blob unless bulk-checkin
- * machinery is "plugged".
+ * This writes the specified object to a packfile. Objects written here
+ * during the same transaction are written to the same packfile. The
+ * packfile is not flushed until the transaction is flushed. The caller
+ * is expected to ensure a valid transaction is setup for objects to be
+ * recorded to.
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
diff --git a/object-file.c b/object-file.c
index 1740aa2b2e3..bc15af42450 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 	 * Call xsize_t() only when needed to avoid potentially unnecessary
 	 * die() for large files.
 	 */
-	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
+	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
 		ret = index_stream_convert_blob(istate, oid, fd, path, flags);
-	else if (!S_ISREG(st->st_mode))
+	} else if (!S_ISREG(st->st_mode)) {
 		ret = index_pipe(istate, oid, fd, type, path, flags);
-	else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
-		 type != OBJ_BLOB ||
-		 (path && would_convert_to_git(istate, path)))
+	} else if ((st->st_size >= 0 &&
+		    (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
+		   type != OBJ_BLOB ||
+		   (path && would_convert_to_git(istate, path))) {
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
-	else
-		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+	} else {
+		struct odb_transaction *transaction;
+
+		transaction = begin_odb_transaction(the_repository->objects);
+		ret = index_blob_bulk_checkin(transaction,
 					      oid, fd, xsize_t(st->st_size),
 					      path, flags);
+		end_odb_transaction(transaction);
+	}
+
 	close(fd);
 	return ret;
 }
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v2 4/4] bulk-checkin: use repository variable from transaction
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
                     ` (2 preceding siblings ...)
  2025-08-21 23:22   ` [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
@ 2025-08-21 23:22   ` Justin Tobler
  2025-08-22 17:03     ` Junio C Hamano
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
  4 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-21 23:22 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

The bulk-checkin subsystem depends on `the_repository`. Adapt functions
and call sites to access the repository through `struct odb_transaction`
instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
`pack_compression_level` and `pack_size_limit_cfg` globals are still
used.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 84 ++++++++++++++++++++++++--------------------------
 1 file changed, 41 insertions(+), 43 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 0e3747640b9..8402d9637c3 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -38,25 +38,26 @@ struct odb_transaction {
 	struct bulk_checkin_packfile packfile;
 };
 
-static void finish_tmp_packfile(struct strbuf *basename,
-				const char *pack_tmp_name,
-				struct pack_idx_entry **written_list,
-				uint32_t nr_written,
-				struct pack_idx_option *pack_idx_opts,
+static void finish_tmp_packfile(struct odb_transaction *transaction,
+				struct strbuf *basename,
 				unsigned char hash[])
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
+	struct repository *repo = transaction->odb->repo;
 	char *idx_tmp_name = NULL;
 
-	stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
-			    written_list, nr_written, NULL, pack_idx_opts, hash,
-			    &idx_tmp_name);
-	rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
+	stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
+			    state->written, state->nr_written, NULL,
+			    &state->pack_idx_opts, hash, &idx_tmp_name);
+	rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
 
 	free(idx_tmp_name);
 }
 
-static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
+static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
+	struct repository *repo = transaction->odb->repo;
 	unsigned char hash[GIT_MAX_RAWSZ];
 	struct strbuf packname = STRBUF_INIT;
 
@@ -73,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 	} else {
 		int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
-		fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name,
+		fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
 					 state->nr_written, hash,
 					 state->offset);
 		close(fd);
 	}
 
-	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
-		    hash_to_hex(hash));
-	finish_tmp_packfile(&packname, state->pack_tmp_name,
-			    state->written, state->nr_written,
-			    &state->pack_idx_opts, hash);
+	strbuf_addf(&packname, "%s/pack/pack-%s.",
+		    transaction->odb->sources->path,
+		    hash_to_hex_algop(hash, repo->hash_algo));
+
+	finish_tmp_packfile(transaction, &packname, hash);
 	for (uint32_t i = 0; i < state->nr_written; i++)
 		free(state->written[i]);
 
@@ -94,7 +95,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 
 	strbuf_release(&packname);
 	/* Make objects we just wrote available to ourselves */
-	reprepare_packed_git(the_repository);
+	reprepare_packed_git(repo);
 }
 
 /*
@@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
 	 * to ensure that the data in each new object file is durable before
 	 * the final name is visible.
 	 */
-	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
+	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
+		    transaction->odb->sources->path);
 	temp = xmks_tempfile(temp_path.buf);
 	fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
 	delete_tempfile(&temp);
@@ -131,16 +133,17 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
 	transaction->objdir = NULL;
 }
 
-static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
+static int already_written(struct odb_transaction *transaction,
+			   struct object_id *oid)
 {
 	/* The object may already exist in the repository */
-	if (odb_has_object(the_repository->objects, oid,
+	if (odb_has_object(transaction->odb, oid,
 			   HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
 		return 1;
 
 	/* Might want to keep the list sorted */
-	for (uint32_t i = 0; i < state->nr_written; i++)
-		if (oideq(&state->written[i]->oid, oid))
+	for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
+		if (oideq(&transaction->packfile.written[i]->oid, oid))
 			return 1;
 
 	/* This is a new object we need to keep */
@@ -239,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
 }
 
 /* Lazily create backing packfile for the state */
-static void prepare_to_stream(struct bulk_checkin_packfile *state,
+static void prepare_to_stream(struct odb_transaction *transaction,
 			      unsigned flags)
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
 	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
 		return;
 
-	state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
+	state->f = create_tmp_packfile(transaction->odb->repo,
+				       &state->pack_tmp_name);
 	reset_pack_idx_option(&state->pack_idx_opts);
 
 	/* Pretend we are going to write only one object */
@@ -254,11 +259,12 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state,
 		die_errno("unable to write pack header");
 }
 
-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
-				struct object_id *result_oid,
-				int fd, size_t size,
-				const char *path, unsigned flags)
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *result_oid,
+			    int fd, size_t size,
+			    const char *path, unsigned flags)
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
 	off_t seekback, already_hashed_to;
 	struct git_hash_ctx ctx;
 	unsigned char obuf[16384];
@@ -272,21 +278,21 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 
 	header_len = format_object_header((char *)obuf, sizeof(obuf),
 					  OBJ_BLOB, size);
-	the_hash_algo->init_fn(&ctx);
+	transaction->odb->repo->hash_algo->init_fn(&ctx);
 	git_hash_update(&ctx, obuf, header_len);
 
 	/* Note: idx is non-NULL when we are writing */
 	if ((flags & INDEX_WRITE_OBJECT) != 0) {
 		CALLOC_ARRAY(idx, 1);
 
-		prepare_to_stream(state, flags);
+		prepare_to_stream(transaction, flags);
 		hashfile_checkpoint_init(state->f, &checkpoint);
 	}
 
 	already_hashed_to = 0;
 
 	while (1) {
-		prepare_to_stream(state, flags);
+		prepare_to_stream(transaction, flags);
 		if (idx) {
 			hashfile_checkpoint(state->f, &checkpoint);
 			idx->offset = state->offset;
@@ -304,7 +310,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 			BUG("should not happen");
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
-		flush_bulk_checkin_packfile(state);
+		flush_bulk_checkin_packfile(transaction);
 		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
 			return error("cannot seek back");
 	}
@@ -313,7 +319,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 		return 0;
 
 	idx->crc32 = crc32_end(state->f);
-	if (already_written(state, result_oid)) {
+	if (already_written(transaction, result_oid)) {
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
 		free(idx);
@@ -338,7 +344,7 @@ void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
 	if (!transaction || transaction->objdir)
 		return;
 
-	transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
 	if (transaction->objdir)
 		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
 }
@@ -361,14 +367,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
 	}
 }
 
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
-			    struct object_id *oid, int fd, size_t size,
-			    const char *path, unsigned flags)
-{
-	return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
-				    flags);
-}
-
 struct odb_transaction *begin_odb_transaction(struct object_database *odb)
 {
 	if (!odb->transaction) {
@@ -387,7 +385,7 @@ void flush_odb_transaction(struct odb_transaction *transaction)
 		return;
 
 	flush_batch_fsync(transaction);
-	flush_bulk_checkin_packfile(&transaction->packfile);
+	flush_bulk_checkin_packfile(transaction);
 }
 
 void end_odb_transaction(struct odb_transaction *transaction)
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/4] bulk-checkin: remove global transaction state
  2025-08-21 23:22   ` [PATCH v2 2/4] bulk-checkin: remove global transaction state Justin Tobler
@ 2025-08-22 16:37     ` Junio C Hamano
  2025-08-22 18:07       ` Justin Tobler
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2025-08-22 16:37 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> diff --git a/bulk-checkin.c b/bulk-checkin.c
> index 82a73da79e8..53a20a2d92f 100644
> --- a/bulk-checkin.c
> +++ b/bulk-checkin.c
> @@ -30,11 +30,13 @@ struct bulk_checkin_packfile {
>  	uint32_t nr_written;
>  };
>  
> -static struct odb_transaction {
> +struct odb_transaction {
> +	struct object_database *odb;
> +
>  	int nesting;
>  	struct tmp_objdir *objdir;
>  	struct bulk_checkin_packfile packfile;
> -} transaction;
> +};

Very nice to see that this singleton instance is now gone.  

The singleton was really handy way to allow calls into this
subsystem to lazily initialize bulk_checkin_packfile in the call
chain starting from deflate_blob_to_pack() -> prepare_to_stream().

We now need to be a lot more careful to make sure that everybody has
access to a valid bulk_checkin_packfile struct, which makes the
implementation of index_blob_bulk_checkin() a bit awkward and we
need to invent one bulk_checkin_packfile instance right there.
Luckily it goes away in the next step, I guess?

> +int index_blob_bulk_checkin(struct odb_transaction *transaction,
> +			    struct object_id *oid, int fd, size_t size,
>  			    const char *path, unsigned flags)
>  {
> -	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
> -					  path, flags);
> -	if (!transaction.nesting)
> -		flush_bulk_checkin_packfile(&transaction.packfile);
> +	int status;
> +
> +	if (transaction) {
> +		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
> +					      size, path, flags);
> +	} else {
> +		struct bulk_checkin_packfile state = { 0 };
> +
> +		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
> +		flush_bulk_checkin_packfile(&state);
> +	}
> +
>  	return status;
>  }

OK.  If we do not have a transaction (i.e. if nobody called 
begin_odb_transaction() more times than end_odb_transaction()
got called), we let the underlying machinery do the right thing on
an empty state and clean up after ourselves; otherwise we follow
exactly the same code path as we used to.

> diff --git a/cache-tree.c b/cache-tree.c
> index 66ef2becbe0..d225554eedd 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it,
>  
>  int cache_tree_update(struct index_state *istate, int flags)
>  {
> +	struct odb_transaction *transaction;
>  	int skip, i;

OK, doing the odb-transaction here would cover both this one and
write_index_as_tree and its friends, that are public interfaces into
the cache-tree subsystem.  

> diff --git a/odb.h b/odb.h
> index 3dfc66d75a3..a89b2143909 100644
> --- a/odb.h
> +++ b/odb.h
> @@ -84,6 +84,7 @@ struct odb_source {
>  
>  struct packed_git;
>  struct cached_object_entry;
> +struct odb_transaction;
>  
>  /*
>   * The object database encapsulates access to objects in a repository. It
> @@ -94,6 +95,13 @@ struct object_database {
>  	/* Repository that owns this database. */
>  	struct repository *repo;
>  
> +	/*
> +	 * State of current current object database transaction. Only one
> +	 * transaction may be pending at a time. Is NULL when no transaction is
> +	 * configured.
> +	 */
> +	struct odb_transaction *transaction;

Once dust from this topic settles, we may want to rename the
bulk-checkin.[ch] to have "odb" somewhere in its name, perhaps?  I
usually do not like renaming file for the sake of renaming to make
the result look "pretty" (people may use "consistent naming" ),
though.  I dunno.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin()
  2025-08-21 23:22   ` [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
@ 2025-08-22 16:49     ` Junio C Hamano
  2025-08-22 19:13       ` Justin Tobler
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2025-08-22 16:49 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

>  /*
> - * This creates one packfile per large blob unless bulk-checkin
> - * machinery is "plugged".
> + * This writes the specified object to a packfile. Objects written here
> + * during the same transaction are written to the same packfile. The
> + * packfile is not flushed until the transaction is flushed. The caller
> + * is expected to ensure a valid transaction is setup for objects to be
> + * recorded to.
>   *
>   * This also bypasses the usual "convert-to-git" dance, and that is on
>   * purpose. We could write a streaming version of the converting
> diff --git a/object-file.c b/object-file.c
> index 1740aa2b2e3..bc15af42450 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
>  	 * Call xsize_t() only when needed to avoid potentially unnecessary
>  	 * die() for large files.
>  	 */
> -	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
> +	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
>  		ret = index_stream_convert_blob(istate, oid, fd, path, flags);
> -	else if (!S_ISREG(st->st_mode))
> +	} else if (!S_ISREG(st->st_mode)) {
>  		ret = index_pipe(istate, oid, fd, type, path, flags);
> -	else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
> -		 type != OBJ_BLOB ||
> -		 (path && would_convert_to_git(istate, path)))
> +	} else if ((st->st_size >= 0 &&
> +		    (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
> +		   type != OBJ_BLOB ||
> +		   (path && would_convert_to_git(istate, path))) {
>  		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
>  				 type, path, flags);
> -	else
> -		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
> +	} else {
> +		struct odb_transaction *transaction;
> +
> +		transaction = begin_odb_transaction(the_repository->objects);
> +		ret = index_blob_bulk_checkin(transaction,
>  					      oid, fd, xsize_t(st->st_size),
>  					      path, flags);
> +		end_odb_transaction(transaction);
> +	}

Interesting.  If the caller does the odb transaction management
itself by calling begin/end before calling this function and fed two
or more large objects, the original code did the right thing with
.nesting set to 1.  In the new code, it still does the right thing
even during the call to index_blob_bulk_checkin we raise .nesting by
one, because the all non-zero .nesting values mean the same thing to
the machinery, thanks to lazy initialization of the .objdir.

So, isn't the comment above the function now less accurate than
before?  The caller of this function does not have to do anything
and we do not expect the caller to "ensure a valid transaction" at
all, no?

Thanks.





^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 4/4] bulk-checkin: use repository variable from transaction
  2025-08-21 23:22   ` [PATCH v2 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
@ 2025-08-22 17:03     ` Junio C Hamano
  2025-08-22 19:38       ` Justin Tobler
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2025-08-22 17:03 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> The bulk-checkin subsystem depends on `the_repository`. Adapt functions
> and call sites to access the repository through `struct odb_transaction`
> instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
> `pack_compression_level` and `pack_size_limit_cfg` globals are still
> used.

Also we grab the details of the new packfile the bulk-checkin
machinery is building out of the transaction, which made some
redundant parameters functions take go away, ...

> -static void finish_tmp_packfile(struct strbuf *basename,
> -				const char *pack_tmp_name,
> -				struct pack_idx_entry **written_list,
> -				uint32_t nr_written,
> -				struct pack_idx_option *pack_idx_opts,
> +static void finish_tmp_packfile(struct odb_transaction *transaction,
> +				struct strbuf *basename,
>  				unsigned char hash[])

... like this one.  Very nice.

> @@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
>  	 * to ensure that the data in each new object file is durable before
>  	 * the final name is visible.
>  	 */
> -	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
> +	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
> +		    transaction->odb->sources->path);

This is doing a lot more than a simple "the_repository" ->
"odb->repo" replacement.  How much confidence do we have
that the internal detail of repo_get_object_directory() will stay
the same and our developers in the future would spot that this open
coded copy needs to be updated if they have to change it?

> -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
> -				struct object_id *result_oid,
> -				int fd, size_t size,
> -				const char *path, unsigned flags)
> +int index_blob_bulk_checkin(struct odb_transaction *transaction,
> +			    struct object_id *result_oid,
> +			    int fd, size_t size,
> +			    const char *path, unsigned flags)

Ahh, OK, with the simplification to always take transaction, there
is no need to have the deflate_blob_to_pack() function, which had
only one caller, as an internal implementation detail anymore.

This change could have been in the previous step and it would have
been less surprising; the above "Ahh, OK" is my reaction to the
surprise ;-)


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/4] bulk-checkin: remove global transaction state
  2025-08-22 16:37     ` Junio C Hamano
@ 2025-08-22 18:07       ` Justin Tobler
  2025-08-22 20:25         ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 18:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, ps

On 25/08/22 09:37AM, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:

> We now need to be a lot more careful to make sure that everybody has
> access to a valid bulk_checkin_packfile struct, which makes the
> implementation of index_blob_bulk_checkin() a bit awkward and we
> need to invent one bulk_checkin_packfile instance right there.
> Luckily it goes away in the next step, I guess?

Ya, now that we cannot rely on the transaction singleton to provide a
bulk_checkin_packfile, we have create one if there is no transaction. I
completely agree that this is awkward which is why the next step
requires index_blob_bulk_checkin() callers to ensure a transaction is
created ahead of time.

If it is preferable, I can combine these two patches together. I kept
them separate as I thought it would better explain the steps, but maybe
that isn't the best way to structure the patch as we are largely undoing
the change in the next patch anyway.

> > diff --git a/odb.h b/odb.h
> > index 3dfc66d75a3..a89b2143909 100644
> > --- a/odb.h
> > +++ b/odb.h
> > @@ -84,6 +84,7 @@ struct odb_source {
> >  
> >  struct packed_git;
> >  struct cached_object_entry;
> > +struct odb_transaction;
> >  
> >  /*
> >   * The object database encapsulates access to objects in a repository. It
> > @@ -94,6 +95,13 @@ struct object_database {
> >  	/* Repository that owns this database. */
> >  	struct repository *repo;
> >  
> > +	/*
> > +	 * State of current current object database transaction. Only one
> > +	 * transaction may be pending at a time. Is NULL when no transaction is
> > +	 * configured.
> > +	 */
> > +	struct odb_transaction *transaction;
> 
> Once dust from this topic settles, we may want to rename the
> bulk-checkin.[ch] to have "odb" somewhere in its name, perhaps?  I
> usually do not like renaming file for the sake of renaming to make
> the result look "pretty" (people may use "consistent naming" ),
> though.  I dunno.

I'm hoping for an eventual state where the transactional interfaces,
{begin,end}_odb_transaction(), are moved directly into odb.{h,c}. The
current implementation of transaction handling is specific to the
current object database source. In a pluggable object database future
where we could have different types of object database sources,
transaction handling will likely have to implemented separately.

With this in mind, we could move the current transaction implementation
into something like object-files.c. This is already where the vast
majority of its call sites are and would enable us to further simply the
interface we expose.

I plan do submit a followup series to do this. :)

-Justin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin()
  2025-08-22 16:49     ` Junio C Hamano
@ 2025-08-22 19:13       ` Justin Tobler
  2025-08-22 20:33         ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 19:13 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, ps

On 25/08/22 09:49AM, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:
> 
> >  /*
> > - * This creates one packfile per large blob unless bulk-checkin
> > - * machinery is "plugged".
> > + * This writes the specified object to a packfile. Objects written here
> > + * during the same transaction are written to the same packfile. The
> > + * packfile is not flushed until the transaction is flushed. The caller
> > + * is expected to ensure a valid transaction is setup for objects to be
> > + * recorded to.
> >   *
> >   * This also bypasses the usual "convert-to-git" dance, and that is on
> >   * purpose. We could write a streaming version of the converting
> > diff --git a/object-file.c b/object-file.c
> > index 1740aa2b2e3..bc15af42450 100644
> > --- a/object-file.c
> > +++ b/object-file.c
> > @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
> >  	 * Call xsize_t() only when needed to avoid potentially unnecessary
> >  	 * die() for large files.
> >  	 */
> > -	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
> > +	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
> >  		ret = index_stream_convert_blob(istate, oid, fd, path, flags);
> > -	else if (!S_ISREG(st->st_mode))
> > +	} else if (!S_ISREG(st->st_mode)) {
> >  		ret = index_pipe(istate, oid, fd, type, path, flags);
> > -	else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
> > -		 type != OBJ_BLOB ||
> > -		 (path && would_convert_to_git(istate, path)))
> > +	} else if ((st->st_size >= 0 &&
> > +		    (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
> > +		   type != OBJ_BLOB ||
> > +		   (path && would_convert_to_git(istate, path))) {
> >  		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
> >  				 type, path, flags);
> > -	else
> > -		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
> > +	} else {
> > +		struct odb_transaction *transaction;
> > +
> > +		transaction = begin_odb_transaction(the_repository->objects);
> > +		ret = index_blob_bulk_checkin(transaction,
> >  					      oid, fd, xsize_t(st->st_size),
> >  					      path, flags);
> > +		end_odb_transaction(transaction);
> > +	}
> 
> Interesting.  If the caller does the odb transaction management
> itself by calling begin/end before calling this function and fed two
> or more large objects, the original code did the right thing with
> .nesting set to 1.  In the new code, it still does the right thing
> even during the call to index_blob_bulk_checkin we raise .nesting by
> one, because the all non-zero .nesting values mean the same thing to
> the machinery, thanks to lazy initialization of the .objdir.

In the original code, if no transaction was setup prior to invoking
index_blob_bulk_checkin() (i.e. nesting == 0), the object would be
written to its own packfile and flushed immediately. If a transaction
had been started further upstream (i.e nesting > 0), the object would be
written to a packfile, but not flushed. This allowed for subseqent calls
to index_blob_bulk_checkin() to write objects to the same packfile. Only
when transaction ended would the packfile be flushed.

With this change, index_blob_bulk_checkin() must be provided a
transaction so it can write the object to the packfile. As there is now
always a transaction involved, there is no longer any automatic packfile
flushing. The caller is required to ensure a transaction handling as
appropriate.

> So, isn't the comment above the function now less accurate than
> before?  The caller of this function does not have to do anything
> and we do not expect the caller to "ensure a valid transaction" at
> all, no?

I'm not quite sure I follow. index_blob_bulk_checkin() now expects a
transaction to be setup even if we intend to only write a single object.
Thus the call site in index_fd() is adjusted to ensure there is a
transaction via invoking begin_odb_transaction() and
end_odb_transaction() before/after the function respectively.

Just to clarify, are we talking about the comment above
index_blob_bulk_checkin()?

-Justin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 4/4] bulk-checkin: use repository variable from transaction
  2025-08-22 17:03     ` Junio C Hamano
@ 2025-08-22 19:38       ` Justin Tobler
  0 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 19:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, ps

On 25/08/22 10:03AM, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:
> 
> > The bulk-checkin subsystem depends on `the_repository`. Adapt functions
> > and call sites to access the repository through `struct odb_transaction`
> > instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
> > `pack_compression_level` and `pack_size_limit_cfg` globals are still
> > used.
> 
> Also we grab the details of the new packfile the bulk-checkin
> machinery is building out of the transaction, which made some
> redundant parameters functions take go away, ...

I'll mention this in the commit message in the next version.

> > @@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
> >  	 * to ensure that the data in each new object file is durable before
> >  	 * the final name is visible.
> >  	 */
> > -	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
> > +	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
> > +		    transaction->odb->sources->path);
> 
> This is doing a lot more than a simple "the_repository" ->
> "odb->repo" replacement.  How much confidence do we have
> that the internal detail of repo_get_object_directory() will stay
> the same and our developers in the future would spot that this open
> coded copy needs to be updated if they have to change it?

That's fair. With this change we are less defensive since we are not
bugging out if the object source is not set up. There are other
instances throughout the codebase where the object directory is accessed
directly through the repository in an open coded manner instead of going
through repo_get_object_directory(). Regardless, it's probably best to
just keep using repo_get_object_directory() here anyway.

I'll update in the next version.

> > -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
> > -				struct object_id *result_oid,
> > -				int fd, size_t size,
> > -				const char *path, unsigned flags)
> > +int index_blob_bulk_checkin(struct odb_transaction *transaction,
> > +			    struct object_id *result_oid,
> > +			    int fd, size_t size,
> > +			    const char *path, unsigned flags)
> 
> Ahh, OK, with the simplification to always take transaction, there
> is no need to have the deflate_blob_to_pack() function, which had
> only one caller, as an internal implementation detail anymore.
> 
> This change could have been in the previous step and it would have
> been less surprising; the above "Ahh, OK" is my reaction to the
> surprise ;-)

In the next version, I'll perform this in the previous step instead. :)

Thanks,
-Justin

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 2/4] bulk-checkin: remove global transaction state
  2025-08-22 18:07       ` Justin Tobler
@ 2025-08-22 20:25         ` Junio C Hamano
  0 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2025-08-22 20:25 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> If it is preferable, I can combine these two patches together. I kept
> them separate as I thought it would better explain the steps, but maybe
> that isn't the best way to structure the patch as we are largely undoing
> the change in the next patch anyway.

This shouldn't make much difference, but I prefer the current
"awkared but done as two steps" arrangement slightly better.

> I'm hoping for an eventual state where the transactional interfaces,
> {begin,end}_odb_transaction(), are moved directly into odb.{h,c}.

Yes.

> The
> current implementation of transaction handling is specific to the
> current object database source. In a pluggable object database future
> where we could have different types of object database sources,
> transaction handling will likely have to implemented separately.

Yup.

> With this in mind, we could move the current transaction implementation
> into something like object-files.c. This is already where the vast
> majority of its call sites are and would enable us to further simply the
> interface we expose.

Great.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin()
  2025-08-22 19:13       ` Justin Tobler
@ 2025-08-22 20:33         ` Junio C Hamano
  0 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2025-08-22 20:33 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> On 25/08/22 09:49AM, Junio C Hamano wrote:
>> Justin Tobler <jltobler@gmail.com> writes:
>> 
>> >  /*
>> > - * This creates one packfile per large blob unless bulk-checkin
>> > - * machinery is "plugged".
>> > + * This writes the specified object to a packfile. Objects written here
>> > + * during the same transaction are written to the same packfile. The
>> > + * packfile is not flushed until the transaction is flushed. The caller
>> > + * is expected to ensure a valid transaction is setup for objects to be
>> > + * recorded to.
>> >   *
>> >   * This also bypasses the usual "convert-to-git" dance, and that is on
>> >   * purpose. We could write a streaming version of the converting
>> > diff --git a/object-file.c b/object-file.c
>> > index 1740aa2b2e3..bc15af42450 100644
>> > --- a/object-file.c
>> > +++ b/object-file.c
>> > @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
>> >  	 * Call xsize_t() only when needed to avoid potentially unnecessary
>> >  	 * die() for large files.
>> >  	 */
>> > -	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
>> > +	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
>> >  		ret = index_stream_convert_blob(istate, oid, fd, path, flags);
>> > -	else if (!S_ISREG(st->st_mode))
>> > +	} else if (!S_ISREG(st->st_mode)) {
>> >  		ret = index_pipe(istate, oid, fd, type, path, flags);
>> > -	else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
>> > -		 type != OBJ_BLOB ||
>> > -		 (path && would_convert_to_git(istate, path)))
>> > +	} else if ((st->st_size >= 0 &&
>> > +		    (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
>> > +		   type != OBJ_BLOB ||
>> > +		   (path && would_convert_to_git(istate, path))) {
>> >  		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
>> >  				 type, path, flags);
>> > -	else
>> > -		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
>> > +	} else {
>> > +		struct odb_transaction *transaction;
>> > +
>> > +		transaction = begin_odb_transaction(the_repository->objects);
>> > +		ret = index_blob_bulk_checkin(transaction,
>> >  					      oid, fd, xsize_t(st->st_size),
>> >  					      path, flags);
>> > +		end_odb_transaction(transaction);
>> > +	}
>> 
>> Interesting.  If the caller does the odb transaction management
>> itself by calling begin/end before calling this function and fed two
>> or more large objects, the original code did the right thing with
>> .nesting set to 1.  In the new code, it still does the right thing
>> even during the call to index_blob_bulk_checkin we raise .nesting by
>> one, because the all non-zero .nesting values mean the same thing to
>> the machinery, thanks to lazy initialization of the .objdir.
>
> In the original code, if no transaction was setup prior to invoking
> index_blob_bulk_checkin() (i.e. nesting == 0), the object would be
> written to its own packfile and flushed immediately. If a transaction
> had been started further upstream (i.e nesting > 0), the object would be
> written to a packfile, but not flushed. This allowed for subseqent calls
> to index_blob_bulk_checkin() to write objects to the same packfile. Only
> when transaction ended would the packfile be flushed.
>
> With this change, index_blob_bulk_checkin() must be provided a
> transaction so it can write the object to the packfile. As there is now
> always a transaction involved, there is no longer any automatic packfile
> flushing. The caller is required to ensure a transaction handling as
> appropriate.
>
>> So, isn't the comment above the function now less accurate than
>> before?  The caller of this function does not have to do anything
>> and we do not expect the caller to "ensure a valid transaction" at
>> all, no?
>
> I'm not quite sure I follow. index_blob_bulk_checkin() now expects a
> transaction to be setup even if we intend to only write a single object.
> Thus the call site in index_fd() is adjusted to ensure there is a
> transaction via invoking begin_odb_transaction() and
> end_odb_transaction() before/after the function respectively.
>
> Just to clarify, are we talking about the comment above
> index_blob_bulk_checkin()?

Ahh, sorry, I misread the patch and missed the hunk/file boundary;
iow, thought the comment was about index_fd(), which was the source
of my confusion.

Thanks for correcting me.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v3 0/4] bulk-checkin: remove global transaction state
  2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
                     ` (3 preceding siblings ...)
  2025-08-21 23:22   ` [PATCH v2 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
@ 2025-08-22 21:34   ` Justin Tobler
  2025-08-22 21:34     ` [PATCH v3 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
                       ` (4 more replies)
  4 siblings, 5 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 21:34 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

Greetings,

The bulk-checkin subsystem provides an interface to write objects to the
object database in a bulk transaction. The state of an ongoing
transaction is stored across several global variables. This series aims
to remove this global transaction state in favor of storing state in in
`struct object_database`. This is done in preparation for a follow-up
change where the goal is to eventually move these transaction interfaces
into "odb.h".

Changes since V2:

- `index_blob_bulk_checkin()` is combined with
  `deflate_blob_bulk_checkin()` in patch 3 instead of 4.
- Continue to use `repo_get_object_directory()` instead of open coding.

Changes since V1:

- `index_blob_bulk_checkin()` now assumes that the caller always
  provides a setup `struct odb_transaction`. Callers are adjusted to
  ensure this.
- Functions in bulk-checkin.c now consistently access the repository
  through the provided `odb_transaction`.

Thanks,
-Justin

Justin Tobler (4):
  bulk-checkin: introduce object database transaction structure
  bulk-checkin: remove global transaction state
  bulk-checkin: require transaction for index_blob_bulk_checkin()
  bulk-checkin: use repository variable from transaction

 builtin/add.c            |   5 +-
 builtin/unpack-objects.c |   5 +-
 builtin/update-index.c   |   7 +-
 bulk-checkin.c           | 152 +++++++++++++++++++++------------------
 bulk-checkin.h           |  25 ++++---
 cache-tree.c             |   5 +-
 object-file.c            |  30 +++++---
 odb.h                    |   8 +++
 read-cache.c             |   5 +-
 9 files changed, 141 insertions(+), 101 deletions(-)

Range-diff against v2:
1:  5c9358e0b03 = 1:  5c9358e0b03 bulk-checkin: introduce object database transaction structure
2:  4a1b80a6baf = 2:  4a1b80a6baf bulk-checkin: remove global transaction state
3:  ce329932fdd ! 3:  ae5dbd0e1af bulk-checkin: require transaction for index_blob_bulk_checkin()
    @@ Commit message
     
         Update `index_blob_bulk_checkin()` to assume that a valid transaction is
         always provided. Callers are now expected to ensure a transaction is set
    -    up beforehand. The single call site in `object-file.c:index_fd()` is
    -    updated accordingly. Due to how `{begin,end}_odb_transaction()` handles
    -    nested transactions, a new transaction is only created and committed if
    -    there is not already an ongoing transaction.
    +    up beforehand. With this simplification, `deflate_blob_bulk_checkin()`
    +    is no longer needed as a standalone internal function and is combined
    +    with `index_blob_bulk_checkin()`. The single call site in
    +    `object-file.c:index_fd()` is updated accordingly. Due to how
    +    `{begin,end}_odb_transaction()` handles nested transactions, a new
    +    transaction is only created and committed if there is not already an
    +    ongoing transaction.
     
         Signed-off-by: Justin Tobler <jltobler@gmail.com>
     
      ## bulk-checkin.c ##
    -@@ bulk-checkin.c: int index_blob_bulk_checkin(struct odb_transaction *transaction,
    - 			    struct object_id *oid, int fd, size_t size,
    - 			    const char *path, unsigned flags)
    +@@ bulk-checkin.c: static void prepare_to_stream(struct bulk_checkin_packfile *state,
    + 		die_errno("unable to write pack header");
    + }
    + 
    +-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    +-				struct object_id *result_oid,
    +-				int fd, size_t size,
    +-				const char *path, unsigned flags)
    ++int index_blob_bulk_checkin(struct odb_transaction *transaction,
    ++			    struct object_id *result_oid, int fd, size_t size,
    ++			    const char *path, unsigned flags)
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    + 	off_t seekback, already_hashed_to;
    + 	struct git_hash_ctx ctx;
    + 	unsigned char obuf[16384];
    +@@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
    + 	}
    + }
    + 
    +-int index_blob_bulk_checkin(struct odb_transaction *transaction,
    +-			    struct object_id *oid, int fd, size_t size,
    +-			    const char *path, unsigned flags)
    +-{
     -	int status;
     -
     -	if (transaction) {
    @@ bulk-checkin.c: int index_blob_bulk_checkin(struct odb_transaction *transaction,
     -	}
     -
     -	return status;
    -+	return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
    -+				    flags);
    - }
    - 
    +-}
    +-
      struct odb_transaction *begin_odb_transaction(struct object_database *odb)
    + {
    + 	if (!odb->transaction) {
     
      ## bulk-checkin.h ##
     @@ bulk-checkin.h: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
4:  08e26647915 ! 4:  a05af82fddb bulk-checkin: use repository variable from transaction
    @@ Commit message
         `pack_compression_level` and `pack_size_limit_cfg` globals are still
         used.
     
    +    Also adapt functions using packfile state to instead access it through
    +    the transaction. This makes some function parameters redundant and go
    +    away.
    +
         Signed-off-by: Justin Tobler <jltobler@gmail.com>
     
      ## bulk-checkin.c ##
    @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_pack
     -			    state->written, state->nr_written,
     -			    &state->pack_idx_opts, hash);
     +	strbuf_addf(&packname, "%s/pack/pack-%s.",
    -+		    transaction->odb->sources->path,
    ++		    repo_get_object_directory(transaction->odb->repo),
     +		    hash_to_hex_algop(hash, repo->hash_algo));
     +
     +	finish_tmp_packfile(transaction, &packname, hash);
    @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
      	 */
     -	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
     +	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
    -+		    transaction->odb->sources->path);
    ++		    repo_get_object_directory(transaction->odb->repo));
      	temp = xmks_tempfile(temp_path.buf);
      	fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
      	delete_tempfile(&temp);
    @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *sta
      	reset_pack_idx_option(&state->pack_idx_opts);
      
      	/* Pretend we are going to write only one object */
    -@@ bulk-checkin.c: static void prepare_to_stream(struct bulk_checkin_packfile *state,
    - 		die_errno("unable to write pack header");
    - }
    - 
    --static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    --				struct object_id *result_oid,
    --				int fd, size_t size,
    --				const char *path, unsigned flags)
    -+int index_blob_bulk_checkin(struct odb_transaction *transaction,
    -+			    struct object_id *result_oid,
    -+			    int fd, size_t size,
    -+			    const char *path, unsigned flags)
    - {
    -+	struct bulk_checkin_packfile *state = &transaction->packfile;
    - 	off_t seekback, already_hashed_to;
    - 	struct git_hash_ctx ctx;
    - 	unsigned char obuf[16384];
    -@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    +@@ bulk-checkin.c: int index_blob_bulk_checkin(struct odb_transaction *transaction,
      
      	header_len = format_object_header((char *)obuf, sizeof(obuf),
      					  OBJ_BLOB, size);
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      		if (idx) {
      			hashfile_checkpoint(state->f, &checkpoint);
      			idx->offset = state->offset;
    -@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    +@@ bulk-checkin.c: int index_blob_bulk_checkin(struct odb_transaction *transaction,
      			BUG("should not happen");
      		hashfile_truncate(state->f, &checkpoint);
      		state->offset = checkpoint.offset;
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
      			return error("cannot seek back");
      	}
    -@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    +@@ bulk-checkin.c: int index_blob_bulk_checkin(struct odb_transaction *transaction,
      		return 0;
      
      	idx->crc32 = crc32_end(state->f);
    @@ bulk-checkin.c: void prepare_loose_object_bulk_checkin(struct odb_transaction *t
      	if (transaction->objdir)
      		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
      }
    -@@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
    - 	}
    - }
    - 
    --int index_blob_bulk_checkin(struct odb_transaction *transaction,
    --			    struct object_id *oid, int fd, size_t size,
    --			    const char *path, unsigned flags)
    --{
    --	return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
    --				    flags);
    --}
    --
    - struct odb_transaction *begin_odb_transaction(struct object_database *odb)
    - {
    - 	if (!odb->transaction) {
     @@ bulk-checkin.c: void flush_odb_transaction(struct odb_transaction *transaction)
      		return;
      

base-commit: c44beea485f0f2feaf460e2ac87fdd5608d63cf0
-- 
2.51.0


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v3 1/4] bulk-checkin: introduce object database transaction structure
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
@ 2025-08-22 21:34     ` Justin Tobler
  2025-08-22 21:34     ` [PATCH v3 2/4] bulk-checkin: remove global transaction state Justin Tobler
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 21:34 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

Object database transaction state is stored across several global
variables in the bulk-checkin subsystem. Consolidate this state into a
single `struct odb_transaction` global. In a subsequent commit, the
transactional interfaces will be updated to wire this structure instead
of relying on a global variable.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index b2809ab0398..82a73da79e8 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -19,11 +19,7 @@
 #include "object-file.h"
 #include "odb.h"
 
-static int odb_transaction_nesting;
-
-static struct tmp_objdir *bulk_fsync_objdir;
-
-static struct bulk_checkin_packfile {
+struct bulk_checkin_packfile {
 	char *pack_tmp_name;
 	struct hashfile *f;
 	off_t offset;
@@ -32,7 +28,13 @@ static struct bulk_checkin_packfile {
 	struct pack_idx_entry **written;
 	uint32_t alloc_written;
 	uint32_t nr_written;
-} bulk_checkin_packfile;
+};
+
+static struct odb_transaction {
+	int nesting;
+	struct tmp_objdir *objdir;
+	struct bulk_checkin_packfile packfile;
+} transaction;
 
 static void finish_tmp_packfile(struct strbuf *basename,
 				const char *pack_tmp_name,
@@ -101,7 +103,7 @@ static void flush_batch_fsync(void)
 	struct strbuf temp_path = STRBUF_INIT;
 	struct tempfile *temp;
 
-	if (!bulk_fsync_objdir)
+	if (!transaction.objdir)
 		return;
 
 	/*
@@ -123,8 +125,8 @@ static void flush_batch_fsync(void)
 	 * Make the object files visible in the primary ODB after their data is
 	 * fully durable.
 	 */
-	tmp_objdir_migrate(bulk_fsync_objdir);
-	bulk_fsync_objdir = NULL;
+	tmp_objdir_migrate(transaction.objdir);
+	transaction.objdir = NULL;
 }
 
 static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
@@ -331,12 +333,12 @@ void prepare_loose_object_bulk_checkin(void)
 	 * callers may not know whether any objects will be
 	 * added at the time they call begin_odb_transaction.
 	 */
-	if (!odb_transaction_nesting || bulk_fsync_objdir)
+	if (!transaction.nesting || transaction.objdir)
 		return;
 
-	bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync");
-	if (bulk_fsync_objdir)
-		tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
+	transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	if (transaction.objdir)
+		tmp_objdir_replace_primary_odb(transaction.objdir, 0);
 }
 
 void fsync_loose_object_bulk_checkin(int fd, const char *filename)
@@ -348,7 +350,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	 * before renaming the objects to their final names as part of
 	 * flush_batch_fsync.
 	 */
-	if (!bulk_fsync_objdir ||
+	if (!transaction.objdir ||
 	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
 		if (errno == ENOSYS)
 			warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@@ -360,31 +362,31 @@ int index_blob_bulk_checkin(struct object_id *oid,
 			    int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size,
+	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
 					  path, flags);
-	if (!odb_transaction_nesting)
-		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
+	if (!transaction.nesting)
+		flush_bulk_checkin_packfile(&transaction.packfile);
 	return status;
 }
 
 void begin_odb_transaction(void)
 {
-	odb_transaction_nesting += 1;
+	transaction.nesting += 1;
 }
 
 void flush_odb_transaction(void)
 {
 	flush_batch_fsync();
-	flush_bulk_checkin_packfile(&bulk_checkin_packfile);
+	flush_bulk_checkin_packfile(&transaction.packfile);
 }
 
 void end_odb_transaction(void)
 {
-	odb_transaction_nesting -= 1;
-	if (odb_transaction_nesting < 0)
+	transaction.nesting -= 1;
+	if (transaction.nesting < 0)
 		BUG("Unbalanced ODB transaction nesting");
 
-	if (odb_transaction_nesting)
+	if (transaction.nesting)
 		return;
 
 	flush_odb_transaction();
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 2/4] bulk-checkin: remove global transaction state
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
  2025-08-22 21:34     ` [PATCH v3 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
@ 2025-08-22 21:34     ` Justin Tobler
  2025-08-22 21:34     ` [PATCH v3 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 21:34 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

Object database transactions in the bulk-checkin subsystem rely on
global state to track transaction status. Stop relying on global state
and instead store the transaction in the `struct object_database`.
Functions that operate on transactions are updated to now wire
transaction state.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 builtin/add.c            |  5 ++-
 builtin/unpack-objects.c |  5 ++-
 builtin/update-index.c   |  7 ++--
 bulk-checkin.c           | 82 ++++++++++++++++++++++++++--------------
 bulk-checkin.h           | 18 +++++----
 cache-tree.c             |  5 ++-
 object-file.c            | 11 +++---
 odb.h                    |  8 ++++
 read-cache.c             |  5 ++-
 9 files changed, 94 insertions(+), 52 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index 0235854f809..740c7c45817 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -389,6 +389,7 @@ int cmd_add(int argc,
 	char *seen = NULL;
 	char *ps_matched = NULL;
 	struct lock_file lock_file = LOCK_INIT;
+	struct odb_transaction *transaction;
 
 	repo_config(repo, add_config, NULL);
 
@@ -574,7 +575,7 @@ int cmd_add(int argc,
 		string_list_clear(&only_match_skip_worktree, 0);
 	}
 
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(repo->objects);
 
 	ps_matched = xcalloc(pathspec.nr, 1);
 	if (add_renormalize)
@@ -593,7 +594,7 @@ int cmd_add(int argc,
 
 	if (chmod_arg && pathspec.nr)
 		exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 finish:
 	if (write_locked_index(repo->index, &lock_file,
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 7ae7c82b6c0..28124b324d2 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -584,6 +584,7 @@ static void unpack_all(void)
 {
 	int i;
 	unsigned char *hdr = fill(sizeof(struct pack_header));
+	struct odb_transaction *transaction;
 
 	if (get_be32(hdr) != PACK_SIGNATURE)
 		die("bad pack file");
@@ -599,12 +600,12 @@ static void unpack_all(void)
 		progress = start_progress(the_repository,
 					  _("Unpacking objects"), nr_objects);
 	CALLOC_ARRAY(obj_list, nr_objects);
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	for (i = 0; i < nr_objects; i++) {
 		unpack_one(i);
 		display_progress(progress, i + 1);
 	}
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 	stop_progress(&progress);
 
 	if (delta_list)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 2380f3ccd68..2ba2d29c959 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -77,7 +77,7 @@ static void report(const char *fmt, ...)
 	 * objects invisible while a transaction is active, so flush the
 	 * transaction here before reporting a change made by update-index.
 	 */
-	flush_odb_transaction();
+	flush_odb_transaction(the_repository->objects->transaction);
 	va_start(vp, fmt);
 	vprintf(fmt, vp);
 	putchar('\n');
@@ -940,6 +940,7 @@ int cmd_update_index(int argc,
 	strbuf_getline_fn getline_fn;
 	int parseopt_state = PARSE_OPT_UNKNOWN;
 	struct repository *r = the_repository;
+	struct odb_transaction *transaction;
 	struct option options[] = {
 		OPT_BIT('q', NULL, &refresh_args.flags,
 			N_("continue refresh even when index needs update"),
@@ -1130,7 +1131,7 @@ int cmd_update_index(int argc,
 	 * Allow the object layer to optimize adding multiple objects in
 	 * a batch.
 	 */
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	while (ctx.argc) {
 		if (parseopt_state != PARSE_OPT_DONE)
 			parseopt_state = parse_options_step(&ctx, options,
@@ -1213,7 +1214,7 @@ int cmd_update_index(int argc,
 	/*
 	 * By now we have added all of the new objects
 	 */
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 	if (split_index > 0) {
 		if (repo_config_get_split_index(the_repository) == 0)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 82a73da79e8..53a20a2d92f 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -30,11 +30,13 @@ struct bulk_checkin_packfile {
 	uint32_t nr_written;
 };
 
-static struct odb_transaction {
+struct odb_transaction {
+	struct object_database *odb;
+
 	int nesting;
 	struct tmp_objdir *objdir;
 	struct bulk_checkin_packfile packfile;
-} transaction;
+};
 
 static void finish_tmp_packfile(struct strbuf *basename,
 				const char *pack_tmp_name,
@@ -98,12 +100,12 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 /*
  * Cleanup after batch-mode fsync_object_files.
  */
-static void flush_batch_fsync(void)
+static void flush_batch_fsync(struct odb_transaction *transaction)
 {
 	struct strbuf temp_path = STRBUF_INIT;
 	struct tempfile *temp;
 
-	if (!transaction.objdir)
+	if (!transaction->objdir)
 		return;
 
 	/*
@@ -125,8 +127,8 @@ static void flush_batch_fsync(void)
 	 * Make the object files visible in the primary ODB after their data is
 	 * fully durable.
 	 */
-	tmp_objdir_migrate(transaction.objdir);
-	transaction.objdir = NULL;
+	tmp_objdir_migrate(transaction->objdir);
+	transaction->objdir = NULL;
 }
 
 static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
@@ -325,7 +327,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 	return 0;
 }
 
-void prepare_loose_object_bulk_checkin(void)
+void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
 {
 	/*
 	 * We lazily create the temporary object directory
@@ -333,15 +335,16 @@ void prepare_loose_object_bulk_checkin(void)
 	 * callers may not know whether any objects will be
 	 * added at the time they call begin_odb_transaction.
 	 */
-	if (!transaction.nesting || transaction.objdir)
+	if (!transaction || transaction->objdir)
 		return;
 
-	transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync");
-	if (transaction.objdir)
-		tmp_objdir_replace_primary_odb(transaction.objdir, 0);
+	transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	if (transaction->objdir)
+		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
 }
 
-void fsync_loose_object_bulk_checkin(int fd, const char *filename)
+void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+				     int fd, const char *filename)
 {
 	/*
 	 * If we have an active ODB transaction, we issue a call that
@@ -350,7 +353,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	 * before renaming the objects to their final names as part of
 	 * flush_batch_fsync.
 	 */
-	if (!transaction.objdir ||
+	if (!transaction || !transaction->objdir ||
 	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
 		if (errno == ENOSYS)
 			warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@@ -358,36 +361,57 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
 	}
 }
 
-int index_blob_bulk_checkin(struct object_id *oid,
-			    int fd, size_t size,
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags)
 {
-	int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size,
-					  path, flags);
-	if (!transaction.nesting)
-		flush_bulk_checkin_packfile(&transaction.packfile);
+	int status;
+
+	if (transaction) {
+		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
+					      size, path, flags);
+	} else {
+		struct bulk_checkin_packfile state = { 0 };
+
+		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
+		flush_bulk_checkin_packfile(&state);
+	}
+
 	return status;
 }
 
-void begin_odb_transaction(void)
+struct odb_transaction *begin_odb_transaction(struct object_database *odb)
 {
-	transaction.nesting += 1;
+	if (!odb->transaction) {
+		CALLOC_ARRAY(odb->transaction, 1);
+		odb->transaction->odb = odb;
+	}
+
+	odb->transaction->nesting += 1;
+
+	return odb->transaction;
 }
 
-void flush_odb_transaction(void)
+void flush_odb_transaction(struct odb_transaction *transaction)
 {
-	flush_batch_fsync();
-	flush_bulk_checkin_packfile(&transaction.packfile);
+	if (!transaction)
+		return;
+
+	flush_batch_fsync(transaction);
+	flush_bulk_checkin_packfile(&transaction->packfile);
 }
 
-void end_odb_transaction(void)
+void end_odb_transaction(struct odb_transaction *transaction)
 {
-	transaction.nesting -= 1;
-	if (transaction.nesting < 0)
+	if (!transaction || transaction->nesting == 0)
 		BUG("Unbalanced ODB transaction nesting");
 
-	if (transaction.nesting)
+	transaction->nesting -= 1;
+
+	if (transaction->nesting)
 		return;
 
-	flush_odb_transaction();
+	flush_odb_transaction(transaction);
+	transaction->odb->transaction = NULL;
+	free(transaction);
 }
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 7246ea58dcf..16254ce6a70 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -5,9 +5,13 @@
 #define BULK_CHECKIN_H
 
 #include "object.h"
+#include "odb.h"
 
-void prepare_loose_object_bulk_checkin(void);
-void fsync_loose_object_bulk_checkin(int fd, const char *filename);
+struct odb_transaction;
+
+void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction);
+void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+				     int fd, const char *filename);
 
 /*
  * This creates one packfile per large blob unless bulk-checkin
@@ -24,8 +28,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename);
  * binary blobs, they generally do not want to get any conversion, and
  * callers should avoid this code path when filters are requested.
  */
-int index_blob_bulk_checkin(struct object_id *oid,
-			    int fd, size_t size,
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *oid, int fd, size_t size,
 			    const char *path, unsigned flags);
 
 /*
@@ -35,20 +39,20 @@ int index_blob_bulk_checkin(struct object_id *oid,
  * and objects are only visible after the outermost transaction
  * is complete or the transaction is flushed.
  */
-void begin_odb_transaction(void);
+struct odb_transaction *begin_odb_transaction(struct object_database *odb);
 
 /*
  * Make any objects that are currently part of a pending object
  * database transaction visible. It is valid to call this function
  * even if no transaction is active.
  */
-void flush_odb_transaction(void);
+void flush_odb_transaction(struct odb_transaction *transaction);
 
 /*
  * Tell the object database to make any objects from the
  * current transaction visible if this is the final nested
  * transaction.
  */
-void end_odb_transaction(void);
+void end_odb_transaction(struct odb_transaction *transaction);
 
 #endif
diff --git a/cache-tree.c b/cache-tree.c
index 66ef2becbe0..d225554eedd 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it,
 
 int cache_tree_update(struct index_state *istate, int flags)
 {
+	struct odb_transaction *transaction;
 	int skip, i;
 
 	i = verify_cache(istate, flags);
@@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags)
 
 	trace_performance_enter();
 	trace2_region_enter("cache_tree", "update", the_repository);
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(the_repository->objects);
 	i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
 		       "", 0, &skip, flags);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 	trace2_region_leave("cache_tree", "update", the_repository);
 	trace_performance_leave("cache_tree_update");
 	if (i < 0)
diff --git a/object-file.c b/object-file.c
index 2bc36ab3ee8..1740aa2b2e3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source,
 		goto out;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		fsync_loose_object_bulk_checkin(fd, filename);
+		fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename);
 	else if (fsync_object_files > 0)
 		fsync_or_die(fd, filename);
 	else
@@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source,
 	static struct strbuf filename = STRBUF_INIT;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_bulk_checkin();
+		prepare_loose_object_bulk_checkin(source->odb->transaction);
 
 	odb_loose_path(source, &filename, oid);
 
@@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source,
 	int hdrlen;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_bulk_checkin();
+		prepare_loose_object_bulk_checkin(source->odb->transaction);
 
 	/* Since oid is not determined, save tmp file to odb path. */
 	strbuf_addf(&filename, "%s/", source->path);
@@ -1263,8 +1263,9 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	else
-		ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path,
-					     flags);
+		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+					      oid, fd, xsize_t(st->st_size),
+					      path, flags);
 	close(fd);
 	return ret;
 }
diff --git a/odb.h b/odb.h
index 3dfc66d75a3..a89b2143909 100644
--- a/odb.h
+++ b/odb.h
@@ -84,6 +84,7 @@ struct odb_source {
 
 struct packed_git;
 struct cached_object_entry;
+struct odb_transaction;
 
 /*
  * The object database encapsulates access to objects in a repository. It
@@ -94,6 +95,13 @@ struct object_database {
 	/* Repository that owns this database. */
 	struct repository *repo;
 
+	/*
+	 * State of current current object database transaction. Only one
+	 * transaction may be pending at a time. Is NULL when no transaction is
+	 * configured.
+	 */
+	struct odb_transaction *transaction;
+
 	/*
 	 * Set of all object directories; the main directory is first (and
 	 * cannot be NULL after initialization). Subsequent directories are
diff --git a/read-cache.c b/read-cache.c
index 06ad74db228..229b8ef11c9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
 		       const struct pathspec *pathspec, char *ps_matched,
 		       int include_sparse, int flags)
 {
+	struct odb_transaction *transaction;
 	struct update_callback_data data;
 	struct rev_info rev;
 
@@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
 	 * This function is invoked from commands other than 'add', which
 	 * may not have their own transaction active.
 	 */
-	begin_odb_transaction();
+	transaction = begin_odb_transaction(repo->objects);
 	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
-	end_odb_transaction();
+	end_odb_transaction(transaction);
 
 	release_revisions(&rev);
 	return !!data.add_errors;
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin()
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
  2025-08-22 21:34     ` [PATCH v3 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
  2025-08-22 21:34     ` [PATCH v3 2/4] bulk-checkin: remove global transaction state Justin Tobler
@ 2025-08-22 21:34     ` Justin Tobler
  2025-08-22 21:35     ` [PATCH v3 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
  2025-08-25 20:25     ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Junio C Hamano
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 21:34 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

The bulk-checkin subsystem provides a mechanism to write blobs directly
to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing
transaction when invoked, objects written via this function are stored
in the same packfile. The packfile is not flushed until the transaction
itself is flushed. If there is no transaction, the single object is
written to a packfile and immediately flushed. This complicates
`index_blob_bulk_checkin()` as it cannot reliably use the provided
transaction to get the associated repository.

Update `index_blob_bulk_checkin()` to assume that a valid transaction is
always provided. Callers are now expected to ensure a transaction is set
up beforehand. With this simplification, `deflate_blob_bulk_checkin()`
is no longer needed as a standalone internal function and is combined
with `index_blob_bulk_checkin()`. The single call site in
`object-file.c:index_fd()` is updated accordingly. Due to how
`{begin,end}_odb_transaction()` handles nested transactions, a new
transaction is only created and committed if there is not already an
ongoing transaction.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 27 ++++-----------------------
 bulk-checkin.h |  7 +++++--
 object-file.c  | 21 ++++++++++++++-------
 3 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 53a20a2d92f..542d8125a86 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -254,11 +254,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state,
 		die_errno("unable to write pack header");
 }
 
-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
-				struct object_id *result_oid,
-				int fd, size_t size,
-				const char *path, unsigned flags)
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+			    struct object_id *result_oid, int fd, size_t size,
+			    const char *path, unsigned flags)
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
 	off_t seekback, already_hashed_to;
 	struct git_hash_ctx ctx;
 	unsigned char obuf[16384];
@@ -361,25 +361,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
 	}
 }
 
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
-			    struct object_id *oid, int fd, size_t size,
-			    const char *path, unsigned flags)
-{
-	int status;
-
-	if (transaction) {
-		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
-					      size, path, flags);
-	} else {
-		struct bulk_checkin_packfile state = { 0 };
-
-		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
-		flush_bulk_checkin_packfile(&state);
-	}
-
-	return status;
-}
-
 struct odb_transaction *begin_odb_transaction(struct object_database *odb)
 {
 	if (!odb->transaction) {
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 16254ce6a70..ac8887f476b 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
 				     int fd, const char *filename);
 
 /*
- * This creates one packfile per large blob unless bulk-checkin
- * machinery is "plugged".
+ * This writes the specified object to a packfile. Objects written here
+ * during the same transaction are written to the same packfile. The
+ * packfile is not flushed until the transaction is flushed. The caller
+ * is expected to ensure a valid transaction is setup for objects to be
+ * recorded to.
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
diff --git a/object-file.c b/object-file.c
index 1740aa2b2e3..bc15af42450 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 	 * Call xsize_t() only when needed to avoid potentially unnecessary
 	 * die() for large files.
 	 */
-	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
+	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
 		ret = index_stream_convert_blob(istate, oid, fd, path, flags);
-	else if (!S_ISREG(st->st_mode))
+	} else if (!S_ISREG(st->st_mode)) {
 		ret = index_pipe(istate, oid, fd, type, path, flags);
-	else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
-		 type != OBJ_BLOB ||
-		 (path && would_convert_to_git(istate, path)))
+	} else if ((st->st_size >= 0 &&
+		    (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
+		   type != OBJ_BLOB ||
+		   (path && would_convert_to_git(istate, path))) {
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
-	else
-		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+	} else {
+		struct odb_transaction *transaction;
+
+		transaction = begin_odb_transaction(the_repository->objects);
+		ret = index_blob_bulk_checkin(transaction,
 					      oid, fd, xsize_t(st->st_size),
 					      path, flags);
+		end_odb_transaction(transaction);
+	}
+
 	close(fd);
 	return ret;
 }
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 4/4] bulk-checkin: use repository variable from transaction
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
                       ` (2 preceding siblings ...)
  2025-08-22 21:34     ` [PATCH v3 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
@ 2025-08-22 21:35     ` Justin Tobler
  2025-08-25 20:25     ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Junio C Hamano
  4 siblings, 0 replies; 27+ messages in thread
From: Justin Tobler @ 2025-08-22 21:35 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, Justin Tobler

The bulk-checkin subsystem depends on `the_repository`. Adapt functions
and call sites to access the repository through `struct odb_transaction`
instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
`pack_compression_level` and `pack_size_limit_cfg` globals are still
used.

Also adapt functions using packfile state to instead access it through
the transaction. This makes some function parameters redundant and go
away.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 bulk-checkin.c | 67 +++++++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 542d8125a86..124c4930676 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -38,25 +38,26 @@ struct odb_transaction {
 	struct bulk_checkin_packfile packfile;
 };
 
-static void finish_tmp_packfile(struct strbuf *basename,
-				const char *pack_tmp_name,
-				struct pack_idx_entry **written_list,
-				uint32_t nr_written,
-				struct pack_idx_option *pack_idx_opts,
+static void finish_tmp_packfile(struct odb_transaction *transaction,
+				struct strbuf *basename,
 				unsigned char hash[])
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
+	struct repository *repo = transaction->odb->repo;
 	char *idx_tmp_name = NULL;
 
-	stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
-			    written_list, nr_written, NULL, pack_idx_opts, hash,
-			    &idx_tmp_name);
-	rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
+	stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
+			    state->written, state->nr_written, NULL,
+			    &state->pack_idx_opts, hash, &idx_tmp_name);
+	rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
 
 	free(idx_tmp_name);
 }
 
-static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
+static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
+	struct repository *repo = transaction->odb->repo;
 	unsigned char hash[GIT_MAX_RAWSZ];
 	struct strbuf packname = STRBUF_INIT;
 
@@ -73,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 	} else {
 		int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
-		fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name,
+		fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
 					 state->nr_written, hash,
 					 state->offset);
 		close(fd);
 	}
 
-	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
-		    hash_to_hex(hash));
-	finish_tmp_packfile(&packname, state->pack_tmp_name,
-			    state->written, state->nr_written,
-			    &state->pack_idx_opts, hash);
+	strbuf_addf(&packname, "%s/pack/pack-%s.",
+		    repo_get_object_directory(transaction->odb->repo),
+		    hash_to_hex_algop(hash, repo->hash_algo));
+
+	finish_tmp_packfile(transaction, &packname, hash);
 	for (uint32_t i = 0; i < state->nr_written; i++)
 		free(state->written[i]);
 
@@ -94,7 +95,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
 
 	strbuf_release(&packname);
 	/* Make objects we just wrote available to ourselves */
-	reprepare_packed_git(the_repository);
+	reprepare_packed_git(repo);
 }
 
 /*
@@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
 	 * to ensure that the data in each new object file is durable before
 	 * the final name is visible.
 	 */
-	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
+	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
+		    repo_get_object_directory(transaction->odb->repo));
 	temp = xmks_tempfile(temp_path.buf);
 	fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
 	delete_tempfile(&temp);
@@ -131,16 +133,17 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
 	transaction->objdir = NULL;
 }
 
-static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
+static int already_written(struct odb_transaction *transaction,
+			   struct object_id *oid)
 {
 	/* The object may already exist in the repository */
-	if (odb_has_object(the_repository->objects, oid,
+	if (odb_has_object(transaction->odb, oid,
 			   HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
 		return 1;
 
 	/* Might want to keep the list sorted */
-	for (uint32_t i = 0; i < state->nr_written; i++)
-		if (oideq(&state->written[i]->oid, oid))
+	for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
+		if (oideq(&transaction->packfile.written[i]->oid, oid))
 			return 1;
 
 	/* This is a new object we need to keep */
@@ -239,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
 }
 
 /* Lazily create backing packfile for the state */
-static void prepare_to_stream(struct bulk_checkin_packfile *state,
+static void prepare_to_stream(struct odb_transaction *transaction,
 			      unsigned flags)
 {
+	struct bulk_checkin_packfile *state = &transaction->packfile;
 	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
 		return;
 
-	state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
+	state->f = create_tmp_packfile(transaction->odb->repo,
+				       &state->pack_tmp_name);
 	reset_pack_idx_option(&state->pack_idx_opts);
 
 	/* Pretend we are going to write only one object */
@@ -272,21 +277,21 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
 
 	header_len = format_object_header((char *)obuf, sizeof(obuf),
 					  OBJ_BLOB, size);
-	the_hash_algo->init_fn(&ctx);
+	transaction->odb->repo->hash_algo->init_fn(&ctx);
 	git_hash_update(&ctx, obuf, header_len);
 
 	/* Note: idx is non-NULL when we are writing */
 	if ((flags & INDEX_WRITE_OBJECT) != 0) {
 		CALLOC_ARRAY(idx, 1);
 
-		prepare_to_stream(state, flags);
+		prepare_to_stream(transaction, flags);
 		hashfile_checkpoint_init(state->f, &checkpoint);
 	}
 
 	already_hashed_to = 0;
 
 	while (1) {
-		prepare_to_stream(state, flags);
+		prepare_to_stream(transaction, flags);
 		if (idx) {
 			hashfile_checkpoint(state->f, &checkpoint);
 			idx->offset = state->offset;
@@ -304,7 +309,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
 			BUG("should not happen");
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
-		flush_bulk_checkin_packfile(state);
+		flush_bulk_checkin_packfile(transaction);
 		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
 			return error("cannot seek back");
 	}
@@ -313,7 +318,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
 		return 0;
 
 	idx->crc32 = crc32_end(state->f);
-	if (already_written(state, result_oid)) {
+	if (already_written(transaction, result_oid)) {
 		hashfile_truncate(state->f, &checkpoint);
 		state->offset = checkpoint.offset;
 		free(idx);
@@ -338,7 +343,7 @@ void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
 	if (!transaction || transaction->objdir)
 		return;
 
-	transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync");
+	transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
 	if (transaction->objdir)
 		tmp_objdir_replace_primary_odb(transaction->objdir, 0);
 }
@@ -379,7 +384,7 @@ void flush_odb_transaction(struct odb_transaction *transaction)
 		return;
 
 	flush_batch_fsync(transaction);
-	flush_bulk_checkin_packfile(&transaction->packfile);
+	flush_bulk_checkin_packfile(transaction);
 }
 
 void end_odb_transaction(struct odb_transaction *transaction)
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 0/4] bulk-checkin: remove global transaction state
  2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
                       ` (3 preceding siblings ...)
  2025-08-22 21:35     ` [PATCH v3 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
@ 2025-08-25 20:25     ` Junio C Hamano
  4 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2025-08-25 20:25 UTC (permalink / raw)
  To: Justin Tobler; +Cc: git, ps

Justin Tobler <jltobler@gmail.com> writes:

> Changes since V2:
>
> - `index_blob_bulk_checkin()` is combined with
>   `deflate_blob_bulk_checkin()` in patch 3 instead of 4.
> - Continue to use `repo_get_object_directory()` instead of open coding.
>
> Changes since V1:
>
> - `index_blob_bulk_checkin()` now assumes that the caller always
>   provides a setup `struct odb_transaction`. Callers are adjusted to
>   ensure this.
> - Functions in bulk-checkin.c now consistently access the repository
>   through the provided `odb_transaction`.
>
> Thanks,
> -Justin
>
> Justin Tobler (4):
>   bulk-checkin: introduce object database transaction structure
>   bulk-checkin: remove global transaction state
>   bulk-checkin: require transaction for index_blob_bulk_checkin()
>   bulk-checkin: use repository variable from transaction
>
>  builtin/add.c            |   5 +-
>  builtin/unpack-objects.c |   5 +-
>  builtin/update-index.c   |   7 +-
>  bulk-checkin.c           | 152 +++++++++++++++++++++------------------
>  bulk-checkin.h           |  25 ++++---
>  cache-tree.c             |   5 +-
>  object-file.c            |  30 +++++---
>  odb.h                    |   8 +++
>  read-cache.c             |   5 +-
>  9 files changed, 141 insertions(+), 101 deletions(-)

Looking good.  Will queue.  Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2025-08-25 20:25 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
2025-08-20 22:55 ` [PATCH 1/3] bulk-checkin: introduce object database transaction structure Justin Tobler
2025-08-20 22:55 ` [PATCH 2/3] bulk-checkin: remove global transaction state Justin Tobler
2025-08-20 22:55 ` [PATCH 3/3] bulk-checkin: wire repository variable Justin Tobler
2025-08-21  0:15   ` Junio C Hamano
2025-08-21 20:26     ` Justin Tobler
2025-08-21 20:32       ` Junio C Hamano
2025-08-21  0:00 ` [PATCH 0/3] bulk-checkin: remove global transaction state Junio C Hamano
2025-08-21 23:22 ` [PATCH v2 0/4] " Justin Tobler
2025-08-21 23:22   ` [PATCH v2 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
2025-08-21 23:22   ` [PATCH v2 2/4] bulk-checkin: remove global transaction state Justin Tobler
2025-08-22 16:37     ` Junio C Hamano
2025-08-22 18:07       ` Justin Tobler
2025-08-22 20:25         ` Junio C Hamano
2025-08-21 23:22   ` [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
2025-08-22 16:49     ` Junio C Hamano
2025-08-22 19:13       ` Justin Tobler
2025-08-22 20:33         ` Junio C Hamano
2025-08-21 23:22   ` [PATCH v2 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
2025-08-22 17:03     ` Junio C Hamano
2025-08-22 19:38       ` Justin Tobler
2025-08-22 21:34   ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
2025-08-22 21:34     ` [PATCH v3 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
2025-08-22 21:34     ` [PATCH v3 2/4] bulk-checkin: remove global transaction state Justin Tobler
2025-08-22 21:34     ` [PATCH v3 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
2025-08-22 21:35     ` [PATCH v3 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
2025-08-25 20:25     ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).