* [PATCH v2 1/6] bulk-checkin: remove ODB transaction nesting
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
@ 2025-09-15 20:29 ` Justin Tobler
2025-09-16 7:57 ` Karthik Nayak
2025-09-15 20:29 ` [PATCH v2 2/6] builtin/update-index: end ODB transaction when --verbose is specified Justin Tobler
` (5 subsequent siblings)
6 siblings, 1 reply; 38+ messages in thread
From: Justin Tobler @ 2025-09-15 20:29 UTC (permalink / raw)
To: git; +Cc: ps, Justin Tobler
ODB transactions support being nested. Only the outermost
{begin,end}_odb_transaction() start and finish a transaction. This is
done so that certain object write codepaths that occur internally can be
optimized via ODB transactions without having to worry if a transaction
has already been started or not. This can make the interface a bit
awkward to use, as calling {begin,end}_odb_transaction() does not
guarantee that a transaction is actually started or ended. Thus, in
situations where a transaction must be explicitly flushed,
flush_odb_transaction() must be used.
To better clarify ownership sematics around a transaction and further
remove the need for flush_odb_transaction() as part of the transaction
interface, instead be more explicit and require callers who use ODB
transactions internally to ensure there is not already a pending
transaction before beginning or ending a transaction.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
bulk-checkin.c | 22 ++++++++++------------
bulk-checkin.h | 9 ++++-----
cache-tree.c | 9 +++++++--
object-file.c | 9 ++++++---
read-cache.c | 7 +++++--
5 files changed, 32 insertions(+), 24 deletions(-)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 124c493067..6299d1c9b3 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -33,7 +33,6 @@ struct bulk_checkin_packfile {
struct odb_transaction {
struct object_database *odb;
- int nesting;
struct tmp_objdir *objdir;
struct bulk_checkin_packfile packfile;
};
@@ -368,12 +367,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
struct odb_transaction *begin_odb_transaction(struct object_database *odb)
{
- if (!odb->transaction) {
- CALLOC_ARRAY(odb->transaction, 1);
- odb->transaction->odb = odb;
- }
+ if (odb->transaction)
+ BUG("ODB transaction already started");
- odb->transaction->nesting += 1;
+ CALLOC_ARRAY(odb->transaction, 1);
+ odb->transaction->odb = odb;
return odb->transaction;
}
@@ -389,14 +387,14 @@ void flush_odb_transaction(struct odb_transaction *transaction)
void end_odb_transaction(struct odb_transaction *transaction)
{
- if (!transaction || transaction->nesting == 0)
- BUG("Unbalanced ODB transaction nesting");
-
- transaction->nesting -= 1;
-
- if (transaction->nesting)
+ if (!transaction)
return;
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->odb->transaction);
+
flush_odb_transaction(transaction);
transaction->odb->transaction = NULL;
free(transaction);
diff --git a/bulk-checkin.h b/bulk-checkin.h
index ac8887f476..b4536d81fc 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -38,9 +38,9 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
/*
* Tell the object database to optimize for adding
* multiple objects. end_odb_transaction must be called
- * to make new objects visible. Transactions can be nested,
- * and objects are only visible after the outermost transaction
- * is complete or the transaction is flushed.
+ * to make new objects visible. Only a single transaction
+ * can be pending at a time and must be ended before
+ * beginning another.
*/
struct odb_transaction *begin_odb_transaction(struct object_database *odb);
@@ -53,8 +53,7 @@ void flush_odb_transaction(struct odb_transaction *transaction);
/*
* Tell the object database to make any objects from the
- * current transaction visible if this is the final nested
- * transaction.
+ * current transaction visible.
*/
void end_odb_transaction(struct odb_transaction *transaction);
diff --git a/cache-tree.c b/cache-tree.c
index d225554eed..f88555a773 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -474,7 +474,7 @@ static int update_one(struct cache_tree *it,
int cache_tree_update(struct index_state *istate, int flags)
{
- struct odb_transaction *transaction;
+ struct odb_transaction *transaction = NULL;
int skip, i;
i = verify_cache(istate, flags);
@@ -490,10 +490,15 @@ int cache_tree_update(struct index_state *istate, int flags)
trace_performance_enter();
trace2_region_enter("cache_tree", "update", the_repository);
- transaction = begin_odb_transaction(the_repository->objects);
+
+ if (!the_repository->objects->transaction)
+ transaction = begin_odb_transaction(the_repository->objects);
+
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
"", 0, &skip, flags);
+
end_odb_transaction(transaction);
+
trace2_region_leave("cache_tree", "update", the_repository);
trace_performance_leave("cache_tree_update");
if (i < 0)
diff --git a/object-file.c b/object-file.c
index bc15af4245..c2db58d62e 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1264,12 +1264,15 @@ int index_fd(struct index_state *istate, struct object_id *oid,
ret = index_core(istate, oid, fd, xsize_t(st->st_size),
type, path, flags);
} else {
- struct odb_transaction *transaction;
+ struct odb_transaction *transaction = NULL;
- transaction = begin_odb_transaction(the_repository->objects);
- ret = index_blob_bulk_checkin(transaction,
+ if (!the_repository->objects->transaction)
+ transaction = begin_odb_transaction(the_repository->objects);
+
+ ret = index_blob_bulk_checkin(the_repository->objects->transaction,
oid, fd, xsize_t(st->st_size),
path, flags);
+
end_odb_transaction(transaction);
}
diff --git a/read-cache.c b/read-cache.c
index 229b8ef11c..6d2ff487f6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3947,7 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
const struct pathspec *pathspec, char *ps_matched,
int include_sparse, int flags)
{
- struct odb_transaction *transaction;
+ struct odb_transaction *transaction = NULL;
struct update_callback_data data;
struct rev_info rev;
@@ -3973,8 +3973,11 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
* This function is invoked from commands other than 'add', which
* may not have their own transaction active.
*/
- transaction = begin_odb_transaction(repo->objects);
+ if (!repo->objects->transaction)
+ transaction = begin_odb_transaction(repo->objects);
+
run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
+
end_odb_transaction(transaction);
release_revisions(&rev);
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 1/6] bulk-checkin: remove ODB transaction nesting
2025-09-15 20:29 ` [PATCH v2 1/6] bulk-checkin: remove ODB transaction nesting Justin Tobler
@ 2025-09-16 7:57 ` Karthik Nayak
2025-09-16 15:00 ` Justin Tobler
0 siblings, 1 reply; 38+ messages in thread
From: Karthik Nayak @ 2025-09-16 7:57 UTC (permalink / raw)
To: Justin Tobler, git; +Cc: ps
[-- Attachment #1: Type: text/plain, Size: 2461 bytes --]
Justin Tobler <jltobler@gmail.com> writes:
> ODB transactions support being nested. Only the outermost
> {begin,end}_odb_transaction() start and finish a transaction. This is
> done so that certain object write codepaths that occur internally can be
> optimized via ODB transactions without having to worry if a transaction
> has already been started or not. This can make the interface a bit
> awkward to use, as calling {begin,end}_odb_transaction() does not
> guarantee that a transaction is actually started or ended. Thus, in
> situations where a transaction must be explicitly flushed,
> flush_odb_transaction() must be used.
>
> To better clarify ownership sematics around a transaction and further
s/smatics/semantics
> remove the need for flush_odb_transaction() as part of the transaction
> interface, instead be more explicit and require callers who use ODB
The first sentence doesn't flow into the second here. Perhaps s/instead//
> transactions internally to ensure there is not already a pending
> transaction before beginning or ending a transaction.
[snip]
> diff --git a/cache-tree.c b/cache-tree.c
> index d225554eed..f88555a773 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -474,7 +474,7 @@ static int update_one(struct cache_tree *it,
>
> int cache_tree_update(struct index_state *istate, int flags)
> {
> - struct odb_transaction *transaction;
> + struct odb_transaction *transaction = NULL;
> int skip, i;
>
> i = verify_cache(istate, flags);
> @@ -490,10 +490,15 @@ int cache_tree_update(struct index_state *istate, int flags)
>
> trace_performance_enter();
> trace2_region_enter("cache_tree", "update", the_repository);
> - transaction = begin_odb_transaction(the_repository->objects);
> +
> + if (!the_repository->objects->transaction)
> + transaction = begin_odb_transaction(the_repository->objects);
> +
> i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
> "", 0, &skip, flags);
> +
> end_odb_transaction(transaction);
> +
> trace2_region_leave("cache_tree", "update", the_repository);
> trace_performance_leave("cache_tree_update");
> if (i < 0)
>
If there is an ongoing transaction, we don't create a new one. If we
create a new transaction, we ensure we also close it. Makes sense.
I wish the parent transaction would be passed through to make it easier
to understand, instead of deriving from a global variable. Nevertheless,
this is a great improvement.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 38+ messages in thread* Re: [PATCH v2 1/6] bulk-checkin: remove ODB transaction nesting
2025-09-16 7:57 ` Karthik Nayak
@ 2025-09-16 15:00 ` Justin Tobler
0 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 15:00 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, ps
On 25/09/16 12:57AM, Karthik Nayak wrote:
> Justin Tobler <jltobler@gmail.com> writes:
>
> > ODB transactions support being nested. Only the outermost
> > {begin,end}_odb_transaction() start and finish a transaction. This is
> > done so that certain object write codepaths that occur internally can be
> > optimized via ODB transactions without having to worry if a transaction
> > has already been started or not. This can make the interface a bit
> > awkward to use, as calling {begin,end}_odb_transaction() does not
> > guarantee that a transaction is actually started or ended. Thus, in
> > situations where a transaction must be explicitly flushed,
> > flush_odb_transaction() must be used.
> >
> > To better clarify ownership sematics around a transaction and further
>
> s/smatics/semantics
>
> > remove the need for flush_odb_transaction() as part of the transaction
> > interface, instead be more explicit and require callers who use ODB
>
> The first sentence doesn't flow into the second here. Perhaps s/instead//
Thanks. I'll fix these in the next version.
> > diff --git a/cache-tree.c b/cache-tree.c
> > index d225554eed..f88555a773 100644
> > --- a/cache-tree.c
> > +++ b/cache-tree.c
> > @@ -474,7 +474,7 @@ static int update_one(struct cache_tree *it,
> >
> > int cache_tree_update(struct index_state *istate, int flags)
> > {
> > - struct odb_transaction *transaction;
> > + struct odb_transaction *transaction = NULL;
> > int skip, i;
> >
> > i = verify_cache(istate, flags);
> > @@ -490,10 +490,15 @@ int cache_tree_update(struct index_state *istate, int flags)
> >
> > trace_performance_enter();
> > trace2_region_enter("cache_tree", "update", the_repository);
> > - transaction = begin_odb_transaction(the_repository->objects);
> > +
> > + if (!the_repository->objects->transaction)
> > + transaction = begin_odb_transaction(the_repository->objects);
> > +
> > i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
> > "", 0, &skip, flags);
> > +
> > end_odb_transaction(transaction);
> > +
> > trace2_region_leave("cache_tree", "update", the_repository);
> > trace_performance_leave("cache_tree_update");
> > if (i < 0)
> >
> I wish the parent transaction would be passed through to make it easier
> to understand, instead of deriving from a global variable. Nevertheless,
> this is a great improvement.
This should be cleaned up a bit in the next version by making
begin_odb_transaction() function as a noop that returns NULL when the
ODB already has a pending transaction. This allow us to at least avoid
checking the transaction state from the global here. We will still have
to derive the ODB from the_repository global for now though.
-Justin
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 2/6] builtin/update-index: end ODB transaction when --verbose is specified
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
2025-09-15 20:29 ` [PATCH v2 1/6] bulk-checkin: remove ODB transaction nesting Justin Tobler
@ 2025-09-15 20:29 ` Justin Tobler
2025-09-16 9:07 ` Karthik Nayak
2025-09-15 20:29 ` [PATCH v2 3/6] bulk-checkin: drop flush_odb_transaction() Justin Tobler
` (4 subsequent siblings)
6 siblings, 1 reply; 38+ messages in thread
From: Justin Tobler @ 2025-09-15 20:29 UTC (permalink / raw)
To: git; +Cc: ps, Justin Tobler
With 23a3a303 (update-index: use the bulk-checkin infrastructure,
2022-04-04), object database transactions were added to
git-update-index(1) to facilitate writing objects in bulk. With
transactions, newly added objects are instead written to a temporary
object directory and migrated to the primary object database upon
transaction commit.
When the --verbose option is specified, each of the following objects is
explicitly flushed via flush_odb_transaction() prior to reporting the
update. Flushing the object database transaction migrates pending
objects to the primary object database without marking the transaction
as complete. This is done so objects are immediately visible to
git-update-index(1) callers using the --verbose option and that rely on
parsing verbose output to know when objects are written.
Due to how git-update-index(1) parses options, each filename argument is
evaluated with only the set of options that precede it. Therefore, it is
possible for an initial set of objects to be written in a transaction
before a --verbose option is encountered.
As soon as the --verbose option is parsed in git-update-index(1), all
subsequent object writes are flushed prior to being reported and thus no
longer benefit from being transactional. Furthermore, the mechanism to
flush a transaction without committing is rather awkward. Drop the call
to flush_odb_transaction() in favor of ending the transaction early when
the --verbose flag is encountered.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/update-index.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 2ba2d29c95..d36bc55752 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -70,14 +70,6 @@ static void report(const char *fmt, ...)
if (!verbose)
return;
- /*
- * It is possible, though unlikely, that a caller could use the verbose
- * output to synchronize with addition of objects to the object
- * database. The current implementation of ODB transactions leaves
- * objects invisible while a transaction is active, so flush the
- * transaction here before reporting a change made by update-index.
- */
- flush_odb_transaction(the_repository->objects->transaction);
va_start(vp, fmt);
vprintf(fmt, vp);
putchar('\n');
@@ -1150,6 +1142,21 @@ int cmd_update_index(int argc,
const char *path = ctx.argv[0];
char *p;
+ /*
+ * It is possible, though unlikely, that a caller could
+ * use the verbose output to synchronize with addition
+ * of objects to the object database. The current
+ * implementation of ODB transactions leaves objects
+ * invisible while a transaction is active, so end the
+ * transaction here early before processing the next
+ * update. All further updates are performed outside of
+ * a transaction.
+ */
+ if (transaction && verbose) {
+ end_odb_transaction(transaction);
+ transaction = NULL;
+ }
+
setup_work_tree();
p = prefix_path(prefix, prefix_length, path);
update_one(p);
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 2/6] builtin/update-index: end ODB transaction when --verbose is specified
2025-09-15 20:29 ` [PATCH v2 2/6] builtin/update-index: end ODB transaction when --verbose is specified Justin Tobler
@ 2025-09-16 9:07 ` Karthik Nayak
2025-09-16 15:17 ` Justin Tobler
0 siblings, 1 reply; 38+ messages in thread
From: Karthik Nayak @ 2025-09-16 9:07 UTC (permalink / raw)
To: Justin Tobler, git; +Cc: ps
[-- Attachment #1: Type: text/plain, Size: 3628 bytes --]
Justin Tobler <jltobler@gmail.com> writes:
> With 23a3a303 (update-index: use the bulk-checkin infrastructure,
> 2022-04-04), object database transactions were added to
> git-update-index(1) to facilitate writing objects in bulk. With
> transactions, newly added objects are instead written to a temporary
> object directory and migrated to the primary object database upon
> transaction commit.
>
> When the --verbose option is specified, each of the following objects is
> explicitly flushed via flush_odb_transaction() prior to reporting the
> update. Flushing the object database transaction migrates pending
> objects to the primary object database without marking the transaction
> as complete. This is done so objects are immediately visible to
> git-update-index(1) callers using the --verbose option and that rely on
> parsing verbose output to know when objects are written.
>
> Due to how git-update-index(1) parses options, each filename argument is
> evaluated with only the set of options that precede it. Therefore, it is
> possible for an initial set of objects to be written in a transaction
> before a --verbose option is encountered.
>
> As soon as the --verbose option is parsed in git-update-index(1), all
> subsequent object writes are flushed prior to being reported and thus no
> longer benefit from being transactional. Furthermore, the mechanism to
> flush a transaction without committing is rather awkward. Drop the call
> to flush_odb_transaction() in favor of ending the transaction early when
> the --verbose flag is encountered.
>
> Signed-off-by: Justin Tobler <jltobler@gmail.com>
> ---
> builtin/update-index.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/update-index.c b/builtin/update-index.c
> index 2ba2d29c95..d36bc55752 100644
> --- a/builtin/update-index.c
> +++ b/builtin/update-index.c
> @@ -70,14 +70,6 @@ static void report(const char *fmt, ...)
> if (!verbose)
> return;
>
> - /*
> - * It is possible, though unlikely, that a caller could use the verbose
> - * output to synchronize with addition of objects to the object
> - * database. The current implementation of ODB transactions leaves
> - * objects invisible while a transaction is active, so flush the
> - * transaction here before reporting a change made by update-index.
> - */
> - flush_odb_transaction(the_repository->objects->transaction);
> va_start(vp, fmt);
> vprintf(fmt, vp);
> putchar('\n');
> @@ -1150,6 +1142,21 @@ int cmd_update_index(int argc,
> const char *path = ctx.argv[0];
> char *p;
>
> + /*
> + * It is possible, though unlikely, that a caller could
> + * use the verbose output to synchronize with addition
> + * of objects to the object database. The current
> + * implementation of ODB transactions leaves objects
> + * invisible while a transaction is active, so end the
> + * transaction here early before processing the next
> + * update. All further updates are performed outside of
> + * a transaction.
> + */
> + if (transaction && verbose) {
> + end_odb_transaction(transaction);
> + transaction = NULL;
> + }
> +
So with this change, we now have all objects updated before the
`--verbose` flag updated via a single transaction. Updates after the
`--verbose` flag will no longer use a transaction.
The older version would flush the transaction on every report, is there
is any benefits to the new flow with regards to performance?
> setup_work_tree();
> p = prefix_path(prefix, prefix_length, path);
> update_one(p);
> --
> 2.51.0.193.g4975ec3473b
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 38+ messages in thread* Re: [PATCH v2 2/6] builtin/update-index: end ODB transaction when --verbose is specified
2025-09-16 9:07 ` Karthik Nayak
@ 2025-09-16 15:17 ` Justin Tobler
0 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 15:17 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, ps
On 25/09/16 02:07AM, Karthik Nayak wrote:
> Justin Tobler <jltobler@gmail.com> writes:
> > + /*
> > + * It is possible, though unlikely, that a caller could
> > + * use the verbose output to synchronize with addition
> > + * of objects to the object database. The current
> > + * implementation of ODB transactions leaves objects
> > + * invisible while a transaction is active, so end the
> > + * transaction here early before processing the next
> > + * update. All further updates are performed outside of
> > + * a transaction.
> > + */
> > + if (transaction && verbose) {
> > + end_odb_transaction(transaction);
> > + transaction = NULL;
> > + }
> > +
>
> So with this change, we now have all objects updated before the
> `--verbose` flag updated via a single transaction. Updates after the
> `--verbose` flag will no longer use a transaction.
>
> The older version would flush the transaction on every report, is there
> is any benefits to the new flow with regards to performance?
The older version would only flush transaction during a report if the
--verbose option is enabled. Object written without --verbose would not
be immediately flushed on report.
In this version we have functionally the same thing, but instead of
flushing the transaction every report, the transaction is ended and
subsequent object writes are written outside of a transaction instead.
I'll try to clarify this in the commit message.
-Justin
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 3/6] bulk-checkin: drop flush_odb_transaction()
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
2025-09-15 20:29 ` [PATCH v2 1/6] bulk-checkin: remove ODB transaction nesting Justin Tobler
2025-09-15 20:29 ` [PATCH v2 2/6] builtin/update-index: end ODB transaction when --verbose is specified Justin Tobler
@ 2025-09-15 20:29 ` Justin Tobler
2025-09-15 20:29 ` [PATCH v2 4/6] object-file: relocate ODB transaction code Justin Tobler
` (3 subsequent siblings)
6 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-15 20:29 UTC (permalink / raw)
To: git; +Cc: ps, Justin Tobler
Object database transactions can be explicitly flushed via
flush_odb_transaction() without actually completing the transaction.
This makes the provided transactional interface a bit awkward. Now that
there are no longer any flush_odb_transaction() call sites, drop the
function to simplify the interface and further ensure that a transaction
is only finalized when end_odb_transaction() is invoked.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
bulk-checkin.c | 12 ++----------
bulk-checkin.h | 7 -------
2 files changed, 2 insertions(+), 17 deletions(-)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 6299d1c9b3..e1d8367967 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -376,15 +376,6 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb)
return odb->transaction;
}
-void flush_odb_transaction(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(transaction);
-}
-
void end_odb_transaction(struct odb_transaction *transaction)
{
if (!transaction)
@@ -395,7 +386,8 @@ void end_odb_transaction(struct odb_transaction *transaction)
*/
ASSERT(transaction == transaction->odb->transaction);
- flush_odb_transaction(transaction);
+ flush_batch_fsync(transaction);
+ flush_bulk_checkin_packfile(transaction);
transaction->odb->transaction = NULL;
free(transaction);
}
diff --git a/bulk-checkin.h b/bulk-checkin.h
index b4536d81fc..35e0564082 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -44,13 +44,6 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
*/
struct odb_transaction *begin_odb_transaction(struct object_database *odb);
-/*
- * Make any objects that are currently part of a pending object
- * database transaction visible. It is valid to call this function
- * even if no transaction is active.
- */
-void flush_odb_transaction(struct odb_transaction *transaction);
-
/*
* Tell the object database to make any objects from the
* current transaction visible.
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 4/6] object-file: relocate ODB transaction code
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (2 preceding siblings ...)
2025-09-15 20:29 ` [PATCH v2 3/6] bulk-checkin: drop flush_odb_transaction() Justin Tobler
@ 2025-09-15 20:29 ` Justin Tobler
2025-09-15 20:29 ` [PATCH v2 5/6] object-file: update naming from bulk-checkin Justin Tobler
` (2 subsequent siblings)
6 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-15 20:29 UTC (permalink / raw)
To: git; +Cc: ps, Justin Tobler
The bulk-checkin subsystem provides various functions to manage ODB
transactions. Apart from {begin,end}_odb_transaction(), these functions
are only used by the object-file subsystem to manage aspects of a
transaction implementation specific to the files object source.
Relocate all the transaction code in in bulk-checkin to object-file.
This simplifies the exposed transaction interface by reducing it to only
{begin,end}_odb_transaction(). Function and type names are adjusted in
the subsequent commit to better fit the new location.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Makefile | 1 -
builtin/add.c | 2 +-
builtin/unpack-objects.c | 1 -
builtin/update-index.c | 1 -
bulk-checkin.c | 393 --------------------------------------
bulk-checkin.h | 53 ------
cache-tree.c | 1 -
meson.build | 1 -
object-file.c | 394 ++++++++++++++++++++++++++++++++++++++-
object-file.h | 17 ++
read-cache.c | 1 -
11 files changed, 411 insertions(+), 454 deletions(-)
delete mode 100644 bulk-checkin.c
delete mode 100644 bulk-checkin.h
diff --git a/Makefile b/Makefile
index 4c95affadb..d25d4255f8 100644
--- a/Makefile
+++ b/Makefile
@@ -974,7 +974,6 @@ LIB_OBJS += blame.o
LIB_OBJS += blob.o
LIB_OBJS += bloom.o
LIB_OBJS += branch.o
-LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle-uri.o
LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o
diff --git a/builtin/add.c b/builtin/add.c
index 740c7c4581..8294366d68 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -14,13 +14,13 @@
#include "gettext.h"
#include "pathspec.h"
#include "run-command.h"
+#include "object-file.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
#include "diff.h"
#include "read-cache.h"
#include "revision.h"
-#include "bulk-checkin.h"
#include "strvec.h"
#include "submodule.h"
#include "add-interactive.h"
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 28124b324d..4596fff0da 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -2,7 +2,6 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
-#include "bulk-checkin.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
diff --git a/builtin/update-index.c b/builtin/update-index.c
index d36bc55752..ee01c4e423 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -8,7 +8,6 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
-#include "bulk-checkin.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
diff --git a/bulk-checkin.c b/bulk-checkin.c
deleted file mode 100644
index e1d8367967..0000000000
--- a/bulk-checkin.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-
-#define USE_THE_REPOSITORY_VARIABLE
-
-#include "git-compat-util.h"
-#include "bulk-checkin.h"
-#include "environment.h"
-#include "gettext.h"
-#include "hex.h"
-#include "lockfile.h"
-#include "repository.h"
-#include "csum-file.h"
-#include "pack.h"
-#include "strbuf.h"
-#include "tmp-objdir.h"
-#include "packfile.h"
-#include "object-file.h"
-#include "odb.h"
-
-struct bulk_checkin_packfile {
- char *pack_tmp_name;
- struct hashfile *f;
- off_t offset;
- struct pack_idx_option pack_idx_opts;
-
- struct pack_idx_entry **written;
- uint32_t alloc_written;
- uint32_t nr_written;
-};
-
-struct odb_transaction {
- struct object_database *odb;
-
- struct tmp_objdir *objdir;
- struct bulk_checkin_packfile packfile;
-};
-
-static void finish_tmp_packfile(struct odb_transaction *transaction,
- struct strbuf *basename,
- unsigned char hash[])
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- struct repository *repo = transaction->odb->repo;
- char *idx_tmp_name = NULL;
-
- stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
- state->written, state->nr_written, NULL,
- &state->pack_idx_opts, hash, &idx_tmp_name);
- rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
-
- free(idx_tmp_name);
-}
-
-static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- struct repository *repo = transaction->odb->repo;
- unsigned char hash[GIT_MAX_RAWSZ];
- struct strbuf packname = STRBUF_INIT;
-
- if (!state->f)
- return;
-
- if (state->nr_written == 0) {
- close(state->f->fd);
- free_hashfile(state->f);
- unlink(state->pack_tmp_name);
- goto clear_exit;
- } else if (state->nr_written == 1) {
- finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
- CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
- } else {
- int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
- fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
- state->nr_written, hash,
- state->offset);
- close(fd);
- }
-
- strbuf_addf(&packname, "%s/pack/pack-%s.",
- repo_get_object_directory(transaction->odb->repo),
- hash_to_hex_algop(hash, repo->hash_algo));
-
- finish_tmp_packfile(transaction, &packname, hash);
- for (uint32_t i = 0; i < state->nr_written; i++)
- free(state->written[i]);
-
-clear_exit:
- free(state->pack_tmp_name);
- free(state->written);
- memset(state, 0, sizeof(*state));
-
- strbuf_release(&packname);
- /* Make objects we just wrote available to ourselves */
- reprepare_packed_git(repo);
-}
-
-/*
- * Cleanup after batch-mode fsync_object_files.
- */
-static void flush_batch_fsync(struct odb_transaction *transaction)
-{
- struct strbuf temp_path = STRBUF_INIT;
- struct tempfile *temp;
-
- if (!transaction->objdir)
- return;
-
- /*
- * Issue a full hardware flush against a temporary file to ensure
- * that all objects are durable before any renames occur. The code in
- * fsync_loose_object_bulk_checkin has already issued a writeout
- * request, but it has not flushed any writeback cache in the storage
- * hardware or any filesystem logs. This fsync call acts as a barrier
- * to ensure that the data in each new object file is durable before
- * the final name is visible.
- */
- strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
- repo_get_object_directory(transaction->odb->repo));
- temp = xmks_tempfile(temp_path.buf);
- fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
- delete_tempfile(&temp);
- strbuf_release(&temp_path);
-
- /*
- * Make the object files visible in the primary ODB after their data is
- * fully durable.
- */
- tmp_objdir_migrate(transaction->objdir);
- transaction->objdir = NULL;
-}
-
-static int already_written(struct odb_transaction *transaction,
- struct object_id *oid)
-{
- /* The object may already exist in the repository */
- if (odb_has_object(transaction->odb, oid,
- HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
- return 1;
-
- /* Might want to keep the list sorted */
- for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
- if (oideq(&transaction->packfile.written[i]->oid, oid))
- return 1;
-
- /* This is a new object we need to keep */
- return 0;
-}
-
-/*
- * Read the contents from fd for size bytes, streaming it to the
- * packfile in state while updating the hash in ctx. Signal a failure
- * by returning a negative value when the resulting pack would exceed
- * the pack size limit and this is not the first object in the pack,
- * so that the caller can discard what we wrote from the current pack
- * by truncating it and opening a new one. The caller will then call
- * us again after rewinding the input fd.
- *
- * The already_hashed_to pointer is kept untouched by the caller to
- * make sure we do not hash the same byte when we are called
- * again. This way, the caller does not have to checkpoint its hash
- * status before calling us just in case we ask it to call us again
- * with a new pack.
- */
-static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
- struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path,
- unsigned flags)
-{
- git_zstream s;
- unsigned char ibuf[16384];
- unsigned char obuf[16384];
- unsigned hdrlen;
- int status = Z_OK;
- int write_object = (flags & INDEX_WRITE_OBJECT);
- off_t offset = 0;
-
- git_deflate_init(&s, pack_compression_level);
-
- hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
- s.next_out = obuf + hdrlen;
- s.avail_out = sizeof(obuf) - hdrlen;
-
- while (status != Z_STREAM_END) {
- if (size && !s.avail_in) {
- size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
- ssize_t read_result = read_in_full(fd, ibuf, rsize);
- if (read_result < 0)
- die_errno("failed to read from '%s'", path);
- if ((size_t)read_result != rsize)
- die("failed to read %u bytes from '%s'",
- (unsigned)rsize, path);
- offset += rsize;
- if (*already_hashed_to < offset) {
- size_t hsize = offset - *already_hashed_to;
- if (rsize < hsize)
- hsize = rsize;
- if (hsize)
- git_hash_update(ctx, ibuf, hsize);
- *already_hashed_to = offset;
- }
- s.next_in = ibuf;
- s.avail_in = rsize;
- size -= rsize;
- }
-
- status = git_deflate(&s, size ? 0 : Z_FINISH);
-
- if (!s.avail_out || status == Z_STREAM_END) {
- if (write_object) {
- size_t written = s.next_out - obuf;
-
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
- hashwrite(state->f, obuf, written);
- state->offset += written;
- }
- s.next_out = obuf;
- s.avail_out = sizeof(obuf);
- }
-
- switch (status) {
- case Z_OK:
- case Z_BUF_ERROR:
- case Z_STREAM_END:
- continue;
- default:
- die("unexpected deflate failure: %d", status);
- }
- }
- git_deflate_end(&s);
- return 0;
-}
-
-/* Lazily create backing packfile for the state */
-static void prepare_to_stream(struct odb_transaction *transaction,
- unsigned flags)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- if (!(flags & INDEX_WRITE_OBJECT) || state->f)
- return;
-
- state->f = create_tmp_packfile(transaction->odb->repo,
- &state->pack_tmp_name);
- reset_pack_idx_option(&state->pack_idx_opts);
-
- /* Pretend we are going to write only one object */
- state->offset = write_pack_header(state->f, 1);
- if (!state->offset)
- die_errno("unable to write pack header");
-}
-
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
- struct object_id *result_oid, int fd, size_t size,
- const char *path, unsigned flags)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- off_t seekback, already_hashed_to;
- struct git_hash_ctx ctx;
- unsigned char obuf[16384];
- unsigned header_len;
- struct hashfile_checkpoint checkpoint;
- struct pack_idx_entry *idx = NULL;
-
- seekback = lseek(fd, 0, SEEK_CUR);
- if (seekback == (off_t) -1)
- return error("cannot find the current offset");
-
- header_len = format_object_header((char *)obuf, sizeof(obuf),
- OBJ_BLOB, size);
- transaction->odb->repo->hash_algo->init_fn(&ctx);
- git_hash_update(&ctx, obuf, header_len);
-
- /* Note: idx is non-NULL when we are writing */
- if ((flags & INDEX_WRITE_OBJECT) != 0) {
- CALLOC_ARRAY(idx, 1);
-
- prepare_to_stream(transaction, flags);
- hashfile_checkpoint_init(state->f, &checkpoint);
- }
-
- already_hashed_to = 0;
-
- while (1) {
- prepare_to_stream(transaction, flags);
- if (idx) {
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
- }
- if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path, flags))
- break;
- /*
- * Writing this object to the current pack will make
- * it too big; we need to truncate it, start a new
- * pack, and write into it.
- */
- if (!idx)
- BUG("should not happen");
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- flush_bulk_checkin_packfile(transaction);
- if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
- return error("cannot seek back");
- }
- git_hash_final_oid(result_oid, &ctx);
- if (!idx)
- return 0;
-
- idx->crc32 = crc32_end(state->f);
- if (already_written(transaction, result_oid)) {
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- free(idx);
- } else {
- oidcpy(&idx->oid, result_oid);
- ALLOC_GROW(state->written,
- state->nr_written + 1,
- state->alloc_written);
- state->written[state->nr_written++] = idx;
- }
- return 0;
-}
-
-void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
-{
- /*
- * We lazily create the temporary object directory
- * the first time an object might be added, since
- * callers may not know whether any objects will be
- * added at the time they call begin_odb_transaction.
- */
- if (!transaction || transaction->objdir)
- return;
-
- transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
- if (transaction->objdir)
- tmp_objdir_replace_primary_odb(transaction->objdir, 0);
-}
-
-void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
- int fd, const char *filename)
-{
- /*
- * If we have an active ODB transaction, we issue a call that
- * cleans the filesystem page cache but avoids a hardware flush
- * command. Later on we will issue a single hardware flush
- * before renaming the objects to their final names as part of
- * flush_batch_fsync.
- */
- if (!transaction || !transaction->objdir ||
- git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
- if (errno == ENOSYS)
- warning(_("core.fsyncMethod = batch is unsupported on this platform"));
- fsync_or_die(fd, filename);
- }
-}
-
-struct odb_transaction *begin_odb_transaction(struct object_database *odb)
-{
- if (odb->transaction)
- BUG("ODB transaction already started");
-
- CALLOC_ARRAY(odb->transaction, 1);
- odb->transaction->odb = odb;
-
- return odb->transaction;
-}
-
-void end_odb_transaction(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- /*
- * Ensure the transaction ending matches the pending transaction.
- */
- ASSERT(transaction == transaction->odb->transaction);
-
- flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(transaction);
- transaction->odb->transaction = NULL;
- free(transaction);
-}
diff --git a/bulk-checkin.h b/bulk-checkin.h
deleted file mode 100644
index 35e0564082..0000000000
--- a/bulk-checkin.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-#ifndef BULK_CHECKIN_H
-#define BULK_CHECKIN_H
-
-#include "object.h"
-#include "odb.h"
-
-struct odb_transaction;
-
-void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction);
-void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
- int fd, const char *filename);
-
-/*
- * This writes the specified object to a packfile. Objects written here
- * during the same transaction are written to the same packfile. The
- * packfile is not flushed until the transaction is flushed. The caller
- * is expected to ensure a valid transaction is setup for objects to be
- * recorded to.
- *
- * This also bypasses the usual "convert-to-git" dance, and that is on
- * purpose. We could write a streaming version of the converting
- * functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above). However, that is
- * somewhat complicated, as we do not know the size of the filter
- * result, which we need to know beforehand when writing a git object.
- * Since the primary motivation for trying to stream from the working
- * tree file and to avoid mmaping it in core is to deal with large
- * binary blobs, they generally do not want to get any conversion, and
- * callers should avoid this code path when filters are requested.
- */
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
- struct object_id *oid, int fd, size_t size,
- const char *path, unsigned flags);
-
-/*
- * Tell the object database to optimize for adding
- * multiple objects. end_odb_transaction must be called
- * to make new objects visible. Only a single transaction
- * can be pending at a time and must be ended before
- * beginning another.
- */
-struct odb_transaction *begin_odb_transaction(struct object_database *odb);
-
-/*
- * Tell the object database to make any objects from the
- * current transaction visible.
- */
-void end_odb_transaction(struct odb_transaction *transaction);
-
-#endif
diff --git a/cache-tree.c b/cache-tree.c
index f88555a773..7ad3225a71 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -8,7 +8,6 @@
#include "tree.h"
#include "tree-walk.h"
#include "cache-tree.h"
-#include "bulk-checkin.h"
#include "object-file.h"
#include "odb.h"
#include "read-cache-ll.h"
diff --git a/meson.build b/meson.build
index b3dfcc0497..fccb6d2eec 100644
--- a/meson.build
+++ b/meson.build
@@ -287,7 +287,6 @@ libgit_sources = [
'blob.c',
'bloom.c',
'branch.c',
- 'bulk-checkin.c',
'bundle-uri.c',
'bundle.c',
'cache-tree.c',
diff --git a/object-file.c b/object-file.c
index c2db58d62e..3958063e48 100644
--- a/object-file.c
+++ b/object-file.c
@@ -10,7 +10,6 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
-#include "bulk-checkin.h"
#include "convert.h"
#include "dir.h"
#include "environment.h"
@@ -28,6 +27,8 @@
#include "read-cache-ll.h"
#include "setup.h"
#include "streaming.h"
+#include "tempfile.h"
+#include "tmp-objdir.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
@@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf,
write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
}
+struct bulk_checkin_packfile {
+ char *pack_tmp_name;
+ struct hashfile *f;
+ off_t offset;
+ struct pack_idx_option pack_idx_opts;
+
+ struct pack_idx_entry **written;
+ uint32_t alloc_written;
+ uint32_t nr_written;
+};
+
+struct odb_transaction {
+ struct object_database *odb;
+
+ struct tmp_objdir *objdir;
+ struct bulk_checkin_packfile packfile;
+};
+
+static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
+{
+ /*
+ * We lazily create the temporary object directory
+ * the first time an object might be added, since
+ * callers may not know whether any objects will be
+ * added at the time they call begin_odb_transaction.
+ */
+ if (!transaction || transaction->objdir)
+ return;
+
+ transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
+ if (transaction->objdir)
+ tmp_objdir_replace_primary_odb(transaction->objdir, 0);
+}
+
+static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+ int fd, const char *filename)
+{
+ /*
+ * If we have an active ODB transaction, we issue a call that
+ * cleans the filesystem page cache but avoids a hardware flush
+ * command. Later on we will issue a single hardware flush
+ * before renaming the objects to their final names as part of
+ * flush_batch_fsync.
+ */
+ if (!transaction || !transaction->objdir ||
+ git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
+ if (errno == ENOSYS)
+ warning(_("core.fsyncMethod = batch is unsupported on this platform"));
+ fsync_or_die(fd, filename);
+ }
+}
+
+/*
+ * Cleanup after batch-mode fsync_object_files.
+ */
+static void flush_batch_fsync(struct odb_transaction *transaction)
+{
+ struct strbuf temp_path = STRBUF_INIT;
+ struct tempfile *temp;
+
+ if (!transaction->objdir)
+ return;
+
+ /*
+ * Issue a full hardware flush against a temporary file to ensure
+ * that all objects are durable before any renames occur. The code in
+ * fsync_loose_object_bulk_checkin has already issued a writeout
+ * request, but it has not flushed any writeback cache in the storage
+ * hardware or any filesystem logs. This fsync call acts as a barrier
+ * to ensure that the data in each new object file is durable before
+ * the final name is visible.
+ */
+ strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
+ repo_get_object_directory(transaction->odb->repo));
+ temp = xmks_tempfile(temp_path.buf);
+ fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
+ delete_tempfile(&temp);
+ strbuf_release(&temp_path);
+
+ /*
+ * Make the object files visible in the primary ODB after their data is
+ * fully durable.
+ */
+ tmp_objdir_migrate(transaction->objdir);
+ transaction->objdir = NULL;
+}
+
/* Finalize a file on disk, and close it. */
static void close_loose_object(struct odb_source *source,
int fd, const char *filename)
@@ -1243,6 +1331,283 @@ static int index_core(struct index_state *istate,
return ret;
}
+static int already_written(struct odb_transaction *transaction,
+ struct object_id *oid)
+{
+ /* The object may already exist in the repository */
+ if (odb_has_object(transaction->odb, oid,
+ HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
+ return 1;
+
+ /* Might want to keep the list sorted */
+ for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
+ if (oideq(&transaction->packfile.written[i]->oid, oid))
+ return 1;
+
+ /* This is a new object we need to keep */
+ return 0;
+}
+
+/* Lazily create backing packfile for the state */
+static void prepare_to_stream(struct odb_transaction *transaction,
+ unsigned flags)
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+ return;
+
+ state->f = create_tmp_packfile(transaction->odb->repo,
+ &state->pack_tmp_name);
+ reset_pack_idx_option(&state->pack_idx_opts);
+
+ /* Pretend we are going to write only one object */
+ state->offset = write_pack_header(state->f, 1);
+ if (!state->offset)
+ die_errno("unable to write pack header");
+}
+
+/*
+ * Read the contents from fd for size bytes, streaming it to the
+ * packfile in state while updating the hash in ctx. Signal a failure
+ * by returning a negative value when the resulting pack would exceed
+ * the pack size limit and this is not the first object in the pack,
+ * so that the caller can discard what we wrote from the current pack
+ * by truncating it and opening a new one. The caller will then call
+ * us again after rewinding the input fd.
+ *
+ * The already_hashed_to pointer is kept untouched by the caller to
+ * make sure we do not hash the same byte when we are called
+ * again. This way, the caller does not have to checkpoint its hash
+ * status before calling us just in case we ask it to call us again
+ * with a new pack.
+ */
+static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
+ struct git_hash_ctx *ctx, off_t *already_hashed_to,
+ int fd, size_t size, const char *path,
+ unsigned flags)
+{
+ git_zstream s;
+ unsigned char ibuf[16384];
+ unsigned char obuf[16384];
+ unsigned hdrlen;
+ int status = Z_OK;
+ int write_object = (flags & INDEX_WRITE_OBJECT);
+ off_t offset = 0;
+
+ git_deflate_init(&s, pack_compression_level);
+
+ hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
+ s.next_out = obuf + hdrlen;
+ s.avail_out = sizeof(obuf) - hdrlen;
+
+ while (status != Z_STREAM_END) {
+ if (size && !s.avail_in) {
+ size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
+ ssize_t read_result = read_in_full(fd, ibuf, rsize);
+ if (read_result < 0)
+ die_errno("failed to read from '%s'", path);
+ if ((size_t)read_result != rsize)
+ die("failed to read %u bytes from '%s'",
+ (unsigned)rsize, path);
+ offset += rsize;
+ if (*already_hashed_to < offset) {
+ size_t hsize = offset - *already_hashed_to;
+ if (rsize < hsize)
+ hsize = rsize;
+ if (hsize)
+ git_hash_update(ctx, ibuf, hsize);
+ *already_hashed_to = offset;
+ }
+ s.next_in = ibuf;
+ s.avail_in = rsize;
+ size -= rsize;
+ }
+
+ status = git_deflate(&s, size ? 0 : Z_FINISH);
+
+ if (!s.avail_out || status == Z_STREAM_END) {
+ if (write_object) {
+ size_t written = s.next_out - obuf;
+
+ /* would we bust the size limit? */
+ if (state->nr_written &&
+ pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + written) {
+ git_deflate_abort(&s);
+ return -1;
+ }
+
+ hashwrite(state->f, obuf, written);
+ state->offset += written;
+ }
+ s.next_out = obuf;
+ s.avail_out = sizeof(obuf);
+ }
+
+ switch (status) {
+ case Z_OK:
+ case Z_BUF_ERROR:
+ case Z_STREAM_END:
+ continue;
+ default:
+ die("unexpected deflate failure: %d", status);
+ }
+ }
+ git_deflate_end(&s);
+ return 0;
+}
+
+static void finish_tmp_packfile(struct odb_transaction *transaction,
+ struct strbuf *basename,
+ unsigned char hash[])
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct repository *repo = transaction->odb->repo;
+ char *idx_tmp_name = NULL;
+
+ stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
+ state->written, state->nr_written, NULL,
+ &state->pack_idx_opts, hash, &idx_tmp_name);
+ rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
+
+ free(idx_tmp_name);
+}
+
+static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct repository *repo = transaction->odb->repo;
+ unsigned char hash[GIT_MAX_RAWSZ];
+ struct strbuf packname = STRBUF_INIT;
+
+ if (!state->f)
+ return;
+
+ if (state->nr_written == 0) {
+ close(state->f->fd);
+ free_hashfile(state->f);
+ unlink(state->pack_tmp_name);
+ goto clear_exit;
+ } else if (state->nr_written == 1) {
+ finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
+ CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+ } else {
+ int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
+ fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
+ state->nr_written, hash,
+ state->offset);
+ close(fd);
+ }
+
+ strbuf_addf(&packname, "%s/pack/pack-%s.",
+ repo_get_object_directory(transaction->odb->repo),
+ hash_to_hex_algop(hash, repo->hash_algo));
+
+ finish_tmp_packfile(transaction, &packname, hash);
+ for (uint32_t i = 0; i < state->nr_written; i++)
+ free(state->written[i]);
+
+clear_exit:
+ free(state->pack_tmp_name);
+ free(state->written);
+ memset(state, 0, sizeof(*state));
+
+ strbuf_release(&packname);
+ /* Make objects we just wrote available to ourselves */
+ reprepare_packed_git(repo);
+}
+
+/*
+ * This writes the specified object to a packfile. Objects written here
+ * during the same transaction are written to the same packfile. The
+ * packfile is not flushed until the transaction is flushed. The caller
+ * is expected to ensure a valid transaction is setup for objects to be
+ * recorded to.
+ *
+ * This also bypasses the usual "convert-to-git" dance, and that is on
+ * purpose. We could write a streaming version of the converting
+ * functions and insert that before feeding the data to fast-import
+ * (or equivalent in-core API described above). However, that is
+ * somewhat complicated, as we do not know the size of the filter
+ * result, which we need to know beforehand when writing a git object.
+ * Since the primary motivation for trying to stream from the working
+ * tree file and to avoid mmaping it in core is to deal with large
+ * binary blobs, they generally do not want to get any conversion, and
+ * callers should avoid this code path when filters are requested.
+ */
+static int index_blob_bulk_checkin(struct odb_transaction *transaction,
+ struct object_id *result_oid, int fd, size_t size,
+ const char *path, unsigned flags)
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ off_t seekback, already_hashed_to;
+ struct git_hash_ctx ctx;
+ unsigned char obuf[16384];
+ unsigned header_len;
+ struct hashfile_checkpoint checkpoint;
+ struct pack_idx_entry *idx = NULL;
+
+ seekback = lseek(fd, 0, SEEK_CUR);
+ if (seekback == (off_t)-1)
+ return error("cannot find the current offset");
+
+ header_len = format_object_header((char *)obuf, sizeof(obuf),
+ OBJ_BLOB, size);
+ transaction->odb->repo->hash_algo->init_fn(&ctx);
+ git_hash_update(&ctx, obuf, header_len);
+
+ /* Note: idx is non-NULL when we are writing */
+ if ((flags & INDEX_WRITE_OBJECT) != 0) {
+ CALLOC_ARRAY(idx, 1);
+
+ prepare_to_stream(transaction, flags);
+ hashfile_checkpoint_init(state->f, &checkpoint);
+ }
+
+ already_hashed_to = 0;
+
+ while (1) {
+ prepare_to_stream(transaction, flags);
+ if (idx) {
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+ }
+ if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
+ fd, size, path, flags))
+ break;
+ /*
+ * Writing this object to the current pack will make
+ * it too big; we need to truncate it, start a new
+ * pack, and write into it.
+ */
+ if (!idx)
+ BUG("should not happen");
+ hashfile_truncate(state->f, &checkpoint);
+ state->offset = checkpoint.offset;
+ flush_bulk_checkin_packfile(transaction);
+ if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
+ return error("cannot seek back");
+ }
+ git_hash_final_oid(result_oid, &ctx);
+ if (!idx)
+ return 0;
+
+ idx->crc32 = crc32_end(state->f);
+ if (already_written(transaction, result_oid)) {
+ hashfile_truncate(state->f, &checkpoint);
+ state->offset = checkpoint.offset;
+ free(idx);
+ } else {
+ oidcpy(&idx->oid, result_oid);
+ ALLOC_GROW(state->written,
+ state->nr_written + 1,
+ state->alloc_written);
+ state->written[state->nr_written++] = idx;
+ }
+ return 0;
+}
+
int index_fd(struct index_state *istate, struct object_id *oid,
int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
@@ -1612,3 +1977,30 @@ int read_loose_object(struct repository *repo,
munmap(map, mapsize);
return ret;
}
+
+struct odb_transaction *begin_odb_transaction(struct object_database *odb)
+{
+ if (odb->transaction)
+ BUG("ODB transaction already started");
+
+ CALLOC_ARRAY(odb->transaction, 1);
+ odb->transaction->odb = odb;
+
+ return odb->transaction;
+}
+
+void end_odb_transaction(struct odb_transaction *transaction)
+{
+ if (!transaction)
+ return;
+
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->odb->transaction);
+
+ flush_batch_fsync(transaction);
+ flush_bulk_checkin_packfile(transaction);
+ transaction->odb->transaction = NULL;
+ free(transaction);
+}
diff --git a/object-file.h b/object-file.h
index 15d97630d3..5925563f83 100644
--- a/object-file.h
+++ b/object-file.h
@@ -218,4 +218,21 @@ int read_loose_object(struct repository *repo,
void **contents,
struct object_info *oi);
+struct odb_transaction;
+
+/*
+ * Tell the object database to optimize for adding
+ * multiple objects. end_odb_transaction must be called
+ * to make new objects visible. Only a single transaction
+ * can be pending at a time and must be ended before
+ * beginning another.
+ */
+struct odb_transaction *begin_odb_transaction(struct object_database *odb);
+
+/*
+ * Tell the object database to make any objects from the
+ * current transaction visible.
+ */
+void end_odb_transaction(struct odb_transaction *transaction);
+
#endif /* OBJECT_FILE_H */
diff --git a/read-cache.c b/read-cache.c
index 6d2ff487f6..00acd8c858 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -8,7 +8,6 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "git-compat-util.h"
-#include "bulk-checkin.h"
#include "config.h"
#include "date.h"
#include "diff.h"
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 5/6] object-file: update naming from bulk-checkin
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (3 preceding siblings ...)
2025-09-15 20:29 ` [PATCH v2 4/6] object-file: relocate ODB transaction code Justin Tobler
@ 2025-09-15 20:29 ` Justin Tobler
2025-09-15 20:29 ` [PATCH v2 6/6] odb: add transaction interface Justin Tobler
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
6 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-15 20:29 UTC (permalink / raw)
To: git; +Cc: ps, Justin Tobler
Update the names of several functions and types relocated from the
bulk-checkin subsystem for better clarity. Also drop
finish_tmp_packfile() as a standalone function in favor of embedding it
in flush_packfile_transaction() directly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 80 +++++++++++++++++++++++----------------------------
1 file changed, 36 insertions(+), 44 deletions(-)
diff --git a/object-file.c b/object-file.c
index 3958063e48..63195da2e8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -667,7 +667,7 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf,
write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
}
-struct bulk_checkin_packfile {
+struct transaction_packfile {
char *pack_tmp_name;
struct hashfile *f;
off_t offset;
@@ -682,10 +682,10 @@ struct odb_transaction {
struct object_database *odb;
struct tmp_objdir *objdir;
- struct bulk_checkin_packfile packfile;
+ struct transaction_packfile packfile;
};
-static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
+static void prepare_loose_object_transaction(struct odb_transaction *transaction)
{
/*
* We lazily create the temporary object directory
@@ -701,7 +701,7 @@ static void prepare_loose_object_bulk_checkin(struct odb_transaction *transactio
tmp_objdir_replace_primary_odb(transaction->objdir, 0);
}
-static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+static void fsync_loose_object_transaction(struct odb_transaction *transaction,
int fd, const char *filename)
{
/*
@@ -722,7 +722,7 @@ static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
/*
* Cleanup after batch-mode fsync_object_files.
*/
-static void flush_batch_fsync(struct odb_transaction *transaction)
+static void flush_loose_object_transaction(struct odb_transaction *transaction)
{
struct strbuf temp_path = STRBUF_INIT;
struct tempfile *temp;
@@ -733,7 +733,7 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
/*
* Issue a full hardware flush against a temporary file to ensure
* that all objects are durable before any renames occur. The code in
- * fsync_loose_object_bulk_checkin has already issued a writeout
+ * fsync_loose_object_transaction has already issued a writeout
* request, but it has not flushed any writeback cache in the storage
* hardware or any filesystem logs. This fsync call acts as a barrier
* to ensure that the data in each new object file is durable before
@@ -762,7 +762,7 @@ static void close_loose_object(struct odb_source *source,
goto out;
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
- fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename);
+ fsync_loose_object_transaction(source->odb->transaction, fd, filename);
else if (fsync_object_files > 0)
fsync_or_die(fd, filename);
else
@@ -940,7 +940,7 @@ static int write_loose_object(struct odb_source *source,
static struct strbuf filename = STRBUF_INIT;
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
- prepare_loose_object_bulk_checkin(source->odb->transaction);
+ prepare_loose_object_transaction(source->odb->transaction);
odb_loose_path(source, &filename, oid);
@@ -1029,7 +1029,7 @@ int stream_loose_object(struct odb_source *source,
int hdrlen;
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
- prepare_loose_object_bulk_checkin(source->odb->transaction);
+ prepare_loose_object_transaction(source->odb->transaction);
/* Since oid is not determined, save tmp file to odb path. */
strbuf_addf(&filename, "%s/", source->path);
@@ -1349,10 +1349,10 @@ static int already_written(struct odb_transaction *transaction,
}
/* Lazily create backing packfile for the state */
-static void prepare_to_stream(struct odb_transaction *transaction,
- unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction *transaction,
+ unsigned flags)
{
- struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct transaction_packfile *state = &transaction->packfile;
if (!(flags & INDEX_WRITE_OBJECT) || state->f)
return;
@@ -1381,7 +1381,7 @@ static void prepare_to_stream(struct odb_transaction *transaction,
* status before calling us just in case we ask it to call us again
* with a new pack.
*/
-static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
+static int stream_blob_to_pack(struct transaction_packfile *state,
struct git_hash_ctx *ctx, off_t *already_hashed_to,
int fd, size_t size, const char *path,
unsigned flags)
@@ -1457,28 +1457,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
return 0;
}
-static void finish_tmp_packfile(struct odb_transaction *transaction,
- struct strbuf *basename,
- unsigned char hash[])
+static void flush_packfile_transaction(struct odb_transaction *transaction)
{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- struct repository *repo = transaction->odb->repo;
- char *idx_tmp_name = NULL;
-
- stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
- state->written, state->nr_written, NULL,
- &state->pack_idx_opts, hash, &idx_tmp_name);
- rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
-
- free(idx_tmp_name);
-}
-
-static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct transaction_packfile *state = &transaction->packfile;
struct repository *repo = transaction->odb->repo;
unsigned char hash[GIT_MAX_RAWSZ];
struct strbuf packname = STRBUF_INIT;
+ char *idx_tmp_name = NULL;
if (!state->f)
return;
@@ -1503,11 +1488,16 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
repo_get_object_directory(transaction->odb->repo),
hash_to_hex_algop(hash, repo->hash_algo));
- finish_tmp_packfile(transaction, &packname, hash);
+ stage_tmp_packfiles(repo, &packname, state->pack_tmp_name,
+ state->written, state->nr_written, NULL,
+ &state->pack_idx_opts, hash, &idx_tmp_name);
+ rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name);
+
for (uint32_t i = 0; i < state->nr_written; i++)
free(state->written[i]);
clear_exit:
+ free(idx_tmp_name);
free(state->pack_tmp_name);
free(state->written);
memset(state, 0, sizeof(*state));
@@ -1535,11 +1525,12 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_bulk_checkin(struct odb_transaction *transaction,
- struct object_id *result_oid, int fd, size_t size,
- const char *path, unsigned flags)
+static int index_blob_packfile_transaction(struct odb_transaction *transaction,
+ struct object_id *result_oid, int fd,
+ size_t size, const char *path,
+ unsigned flags)
{
- struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct transaction_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
@@ -1560,14 +1551,14 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction,
if ((flags & INDEX_WRITE_OBJECT) != 0) {
CALLOC_ARRAY(idx, 1);
- prepare_to_stream(transaction, flags);
+ prepare_packfile_transaction(transaction, flags);
hashfile_checkpoint_init(state->f, &checkpoint);
}
already_hashed_to = 0;
while (1) {
- prepare_to_stream(transaction, flags);
+ prepare_packfile_transaction(transaction, flags);
if (idx) {
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
@@ -1585,7 +1576,7 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction,
BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
- flush_bulk_checkin_packfile(transaction);
+ flush_packfile_transaction(transaction);
if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
return error("cannot seek back");
}
@@ -1634,9 +1625,10 @@ int index_fd(struct index_state *istate, struct object_id *oid,
if (!the_repository->objects->transaction)
transaction = begin_odb_transaction(the_repository->objects);
- ret = index_blob_bulk_checkin(the_repository->objects->transaction,
- oid, fd, xsize_t(st->st_size),
- path, flags);
+ ret = index_blob_packfile_transaction(the_repository->objects->transaction,
+ oid, fd,
+ xsize_t(st->st_size),
+ path, flags);
end_odb_transaction(transaction);
}
@@ -1999,8 +1991,8 @@ void end_odb_transaction(struct odb_transaction *transaction)
*/
ASSERT(transaction == transaction->odb->transaction);
- flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(transaction);
+ flush_loose_object_transaction(transaction);
+ flush_packfile_transaction(transaction);
transaction->odb->transaction = NULL;
free(transaction);
}
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 6/6] odb: add transaction interface
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (4 preceding siblings ...)
2025-09-15 20:29 ` [PATCH v2 5/6] object-file: update naming from bulk-checkin Justin Tobler
@ 2025-09-15 20:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
6 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-15 20:29 UTC (permalink / raw)
To: git; +Cc: ps, Justin Tobler
Transactions are managed via the {begin,end}_odb_transaction() function
in the object-file subsystem and its implementation is specific to the
files object source. Introduce odb_transaction_{begin,commit}() in the
odb subsystem to provide an eventual object source agnostic means to
manage transactions.
Update call sites to instead manage transactions through the odb
subsystem. Also rename {begin,end}_odb_transaction() functions to
object_file_transaction_{begin,commit}() to clarify the object source it
supports.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/add.c | 5 +++--
builtin/unpack-objects.c | 4 ++--
builtin/update-index.c | 7 ++++---
cache-tree.c | 4 ++--
object-file.c | 12 +++++++-----
object-file.h | 6 +++---
odb.c | 10 ++++++++++
odb.h | 14 ++++++++++++++
read-cache.c | 4 ++--
9 files changed, 47 insertions(+), 19 deletions(-)
diff --git a/builtin/add.c b/builtin/add.c
index 8294366d68..bf312c40be 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -15,6 +15,7 @@
#include "pathspec.h"
#include "run-command.h"
#include "object-file.h"
+#include "odb.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
@@ -575,7 +576,7 @@ int cmd_add(int argc,
string_list_clear(&only_match_skip_worktree, 0);
}
- transaction = begin_odb_transaction(repo->objects);
+ transaction = odb_transaction_begin(repo->objects);
ps_matched = xcalloc(pathspec.nr, 1);
if (add_renormalize)
@@ -594,7 +595,7 @@ int cmd_add(int argc,
if (chmod_arg && pathspec.nr)
exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
finish:
if (write_locked_index(repo->index, &lock_file,
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 4596fff0da..ef79e43715 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -599,12 +599,12 @@ static void unpack_all(void)
progress = start_progress(the_repository,
_("Unpacking objects"), nr_objects);
CALLOC_ARRAY(obj_list, nr_objects);
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
stop_progress(&progress);
if (delta_list)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index ee01c4e423..8a5907767b 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -18,6 +18,7 @@
#include "cache-tree.h"
#include "tree-walk.h"
#include "object-file.h"
+#include "odb.h"
#include "refs.h"
#include "resolve-undo.h"
#include "parse-options.h"
@@ -1122,7 +1123,7 @@ int cmd_update_index(int argc,
* Allow the object layer to optimize adding multiple objects in
* a batch.
*/
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
while (ctx.argc) {
if (parseopt_state != PARSE_OPT_DONE)
parseopt_state = parse_options_step(&ctx, options,
@@ -1152,7 +1153,7 @@ int cmd_update_index(int argc,
* a transaction.
*/
if (transaction && verbose) {
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
transaction = NULL;
}
@@ -1220,7 +1221,7 @@ int cmd_update_index(int argc,
/*
* By now we have added all of the new objects
*/
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
if (split_index > 0) {
if (repo_config_get_split_index(the_repository) == 0)
diff --git a/cache-tree.c b/cache-tree.c
index 7ad3225a71..f4a8753e27 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -491,12 +491,12 @@ int cache_tree_update(struct index_state *istate, int flags)
trace2_region_enter("cache_tree", "update", the_repository);
if (!the_repository->objects->transaction)
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
"", 0, &skip, flags);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
trace2_region_leave("cache_tree", "update", the_repository);
trace_performance_leave("cache_tree_update");
diff --git a/object-file.c b/object-file.c
index 63195da2e8..0fc24b48da 100644
--- a/object-file.c
+++ b/object-file.c
@@ -691,7 +691,7 @@ static void prepare_loose_object_transaction(struct odb_transaction *transaction
* We lazily create the temporary object directory
* the first time an object might be added, since
* callers may not know whether any objects will be
- * added at the time they call begin_odb_transaction.
+ * added at the time they call object_file_transaction_begin.
*/
if (!transaction || transaction->objdir)
return;
@@ -1623,14 +1623,14 @@ int index_fd(struct index_state *istate, struct object_id *oid,
struct odb_transaction *transaction = NULL;
if (!the_repository->objects->transaction)
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
ret = index_blob_packfile_transaction(the_repository->objects->transaction,
oid, fd,
xsize_t(st->st_size),
path, flags);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
}
close(fd);
@@ -1970,8 +1970,10 @@ int read_loose_object(struct repository *repo,
return ret;
}
-struct odb_transaction *begin_odb_transaction(struct object_database *odb)
+struct odb_transaction *object_file_transaction_begin(struct odb_source *source)
{
+ struct object_database *odb = source->odb;
+
if (odb->transaction)
BUG("ODB transaction already started");
@@ -1981,7 +1983,7 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb)
return odb->transaction;
}
-void end_odb_transaction(struct odb_transaction *transaction)
+void object_file_transaction_commit(struct odb_transaction *transaction)
{
if (!transaction)
return;
diff --git a/object-file.h b/object-file.h
index 5925563f83..965f437ada 100644
--- a/object-file.h
+++ b/object-file.h
@@ -222,17 +222,17 @@ struct odb_transaction;
/*
* Tell the object database to optimize for adding
- * multiple objects. end_odb_transaction must be called
+ * multiple objects. object_file_transaction_commit must be called
* to make new objects visible. Only a single transaction
* can be pending at a time and must be ended before
* beginning another.
*/
-struct odb_transaction *begin_odb_transaction(struct object_database *odb);
+struct odb_transaction *object_file_transaction_begin(struct odb_source *source);
/*
* Tell the object database to make any objects from the
* current transaction visible.
*/
-void end_odb_transaction(struct odb_transaction *transaction);
+void object_file_transaction_commit(struct odb_transaction *transaction);
#endif /* OBJECT_FILE_H */
diff --git a/odb.c b/odb.c
index 2a92a018c4..af9534bfe1 100644
--- a/odb.c
+++ b/odb.c
@@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o)
hashmap_clear(&o->pack_map);
string_list_clear(&o->submodule_source_paths, 0);
}
+
+struct odb_transaction *odb_transaction_begin(struct object_database *odb)
+{
+ return object_file_transaction_begin(odb->sources);
+}
+
+void odb_transaction_commit(struct odb_transaction *transaction)
+{
+ object_file_transaction_commit(transaction);
+}
diff --git a/odb.h b/odb.h
index a89b214390..48621f9402 100644
--- a/odb.h
+++ b/odb.h
@@ -185,6 +185,20 @@ struct object_database {
struct object_database *odb_new(struct repository *repo);
void odb_clear(struct object_database *o);
+/*
+ * Starts an ODB transaction. Subsequent objects are written to the transaction
+ * and not committed until odb_transaction_commit() is invoked on the
+ * transaction. Caller are responsible to ensure there is only a single ODB
+ * transaction pending at a time.
+ */
+struct odb_transaction *odb_transaction_begin(struct object_database *odb);
+
+/*
+ * Commits an ODB transaction making the written objects visible. If the
+ * specified transaction is NULL, the function is a no-op.
+ */
+void odb_transaction_commit(struct odb_transaction *transaction);
+
/*
* Find source by its object directory path. Dies in case the source couldn't
* be found.
diff --git a/read-cache.c b/read-cache.c
index 00acd8c858..e9a0ddfeb1 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3973,11 +3973,11 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
* may not have their own transaction active.
*/
if (!repo->objects->transaction)
- transaction = begin_odb_transaction(repo->objects);
+ transaction = odb_transaction_begin(repo->objects);
run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
release_revisions(&rev);
return !!data.add_errors;
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem
2025-09-15 20:29 ` [PATCH v2 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (5 preceding siblings ...)
2025-09-15 20:29 ` [PATCH v2 6/6] odb: add transaction interface Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 1/6] bulk-checkin: remove ODB transaction nesting Justin Tobler
` (5 more replies)
6 siblings, 6 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
Greetings,
This series is a followup to [1] and continues iterating on the ODB
transaction interfaces.
The bulk-checkin subsystem provides an interface to manage ODB
transactions. Apart from {begin,end}_odb_transaction(), these functions
are only used by the object-file subsystem to manage aspects of a
transaction implementation specific to the files object source.
In a pluggable object database future where we could have different
types of object database sources, transaction handling will have to be
implemented separately per source. Thus, the primary focus of this
series is to simplify the existing ODB transaction interface and provide
a means to manage transactions via the ODB subsystem in an object source
agnostic manner eventually.
This series is built on top of 4975ec3473b (The seventh batch,
2025-09-08) with jt/de-global-bulk-checkin merged into it at ddc0b56ad77
(bulk-checkin: use repository variable from transaction, 2025-08-22).
Changes since V1:
- end_odb_transaction() is now a no-op when no transaction is specified
and also guards against the ending a transaction not set in the object
database.
- The object_file_transaction_begin() now accepts a `struct odb_source`
instead of `struct odbject_database`. Which is more in line with it
being a transaction implementation specific to an object source.
- Further clarified some commit messages.
- Renamed object_file_transaction_end() to
object_file_transaction_commit() to match the corresponding function
in the ODB section.
- Add some missing documentation for odb_transaction_{begin,commit}().
Change since V2:
- begin_odb_transaction() now behaves as a no-op and returns a NULL
transaction value when the ODB already has an active transaction.
Callers are no longer required to manually check if there is an active
transaction beforehand.
- Reworded several commit messages.
Thanks,
-Justin
[1]: <20250820225531.1212935-1-jltobler@gmail.com>
Justin Tobler (6):
bulk-checkin: remove ODB transaction nesting
builtin/update-index: end ODB transaction when --verbose is specified
bulk-checkin: drop flush_odb_transaction()
object-file: relocate ODB transaction code
object-file: update naming from bulk-checkin
odb: add transaction interface
Makefile | 1 -
builtin/add.c | 7 +-
builtin/unpack-objects.c | 5 +-
builtin/update-index.c | 29 +--
bulk-checkin.c | 403 --------------------------------------
bulk-checkin.h | 61 ------
cache-tree.c | 5 +-
meson.build | 1 -
object-file.c | 404 ++++++++++++++++++++++++++++++++++++++-
object-file.h | 16 ++
odb.c | 10 +
odb.h | 13 ++
read-cache.c | 5 +-
13 files changed, 462 insertions(+), 498 deletions(-)
delete mode 100644 bulk-checkin.c
delete mode 100644 bulk-checkin.h
Range-diff against v2:
1: 217f3eb6d2 ! 1: 9fe2ab4198 bulk-checkin: remove ODB transaction nesting
@@ Commit message
bulk-checkin: remove ODB transaction nesting
ODB transactions support being nested. Only the outermost
- {begin,end}_odb_transaction() start and finish a transaction. This is
- done so that certain object write codepaths that occur internally can be
- optimized via ODB transactions without having to worry if a transaction
- has already been started or not. This can make the interface a bit
- awkward to use, as calling {begin,end}_odb_transaction() does not
- guarantee that a transaction is actually started or ended. Thus, in
- situations where a transaction must be explicitly flushed,
- flush_odb_transaction() must be used.
+ {begin,end}_odb_transaction() start and finish a transaction. This
+ allows internal object write codepaths to be optimized with ODB
+ transactions without worrying about whether a transaction is already
+ active. When {begin,end}_odb_transaction() is invoked during an active
+ transaction, these operations are essentially treated as no-ops. This
+ can make the interface a bit awkward to use, as calling
+ end_odb_transaction() does not guarantee that a transaction is actually
+ ended. Thus, in situations where a transaction needs to be explicitly
+ flushed, flush_odb_transaction() must be used.
- To better clarify ownership sematics around a transaction and further
- remove the need for flush_odb_transaction() as part of the transaction
- interface, instead be more explicit and require callers who use ODB
- transactions internally to ensure there is not already a pending
- transaction before beginning or ending a transaction.
+ To remove the need for an explicit transaction flush operation via
+ flush_odb_transaction() and better clarify transaction semantics, drop
+ the transaction nesting mechanism in favor of begin_odb_transaction()
+ returning a NULL transaction value to signal it was a no-op, and
+ end_odb_transaction() behaving as a no-op when a NULL transaction value
+ is passed. This is safe for existing callers as the transaction value
+ wired to end_odb_transaction() already comes from
+ begin_odb_transaction() and thus continues the same no-op behavior when
+ a transaction is already pending. With this model, passing a pending
+ transaction to end_odb_transaction() ensures it is committed at that
+ point in time.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
@@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *tra
- odb->transaction->odb = odb;
- }
+ if (odb->transaction)
-+ BUG("ODB transaction already started");
++ return NULL;
- odb->transaction->nesting += 1;
+ CALLOC_ARRAY(odb->transaction, 1);
@@ bulk-checkin.h: int index_blob_bulk_checkin(struct odb_transaction *transaction,
- * to make new objects visible. Transactions can be nested,
- * and objects are only visible after the outermost transaction
- * is complete or the transaction is flushed.
-+ * to make new objects visible. Only a single transaction
-+ * can be pending at a time and must be ended before
-+ * beginning another.
++ * to make new objects visible. If a transaction is already
++ * pending, NULL is returned.
*/
struct odb_transaction *begin_odb_transaction(struct object_database *odb);
@@ bulk-checkin.h: void flush_odb_transaction(struct odb_transaction *transaction);
void end_odb_transaction(struct odb_transaction *transaction);
- ## cache-tree.c ##
-@@ cache-tree.c: static int update_one(struct cache_tree *it,
-
- int cache_tree_update(struct index_state *istate, int flags)
- {
-- struct odb_transaction *transaction;
-+ struct odb_transaction *transaction = NULL;
- int skip, i;
-
- i = verify_cache(istate, flags);
-@@ cache-tree.c: int cache_tree_update(struct index_state *istate, int flags)
-
- trace_performance_enter();
- trace2_region_enter("cache_tree", "update", the_repository);
-- transaction = begin_odb_transaction(the_repository->objects);
-+
-+ if (!the_repository->objects->transaction)
-+ transaction = begin_odb_transaction(the_repository->objects);
-+
- i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
- "", 0, &skip, flags);
-+
- end_odb_transaction(transaction);
-+
- trace2_region_leave("cache_tree", "update", the_repository);
- trace_performance_leave("cache_tree_update");
- if (i < 0)
-
## object-file.c ##
@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
- ret = index_core(istate, oid, fd, xsize_t(st->st_size),
- type, path, flags);
- } else {
-- struct odb_transaction *transaction;
-+ struct odb_transaction *transaction = NULL;
+ struct odb_transaction *transaction;
-- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = begin_odb_transaction(the_repository->objects);
- ret = index_blob_bulk_checkin(transaction,
-+ if (!the_repository->objects->transaction)
-+ transaction = begin_odb_transaction(the_repository->objects);
-+
+ ret = index_blob_bulk_checkin(the_repository->objects->transaction,
oid, fd, xsize_t(st->st_size),
path, flags);
-+
end_odb_transaction(transaction);
- }
-
-
- ## read-cache.c ##
-@@ read-cache.c: int add_files_to_cache(struct repository *repo, const char *prefix,
- const struct pathspec *pathspec, char *ps_matched,
- int include_sparse, int flags)
- {
-- struct odb_transaction *transaction;
-+ struct odb_transaction *transaction = NULL;
- struct update_callback_data data;
- struct rev_info rev;
-
-@@ read-cache.c: int add_files_to_cache(struct repository *repo, const char *prefix,
- * This function is invoked from commands other than 'add', which
- * may not have their own transaction active.
- */
-- transaction = begin_odb_transaction(repo->objects);
-+ if (!repo->objects->transaction)
-+ transaction = begin_odb_transaction(repo->objects);
-+
- run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
-+
- end_odb_transaction(transaction);
-
- release_revisions(&rev);
2: 16af9f1169 ! 2: f24c28ec56 builtin/update-index: end ODB transaction when --verbose is specified
@@ Commit message
object directory and migrated to the primary object database upon
transaction commit.
- When the --verbose option is specified, each of the following objects is
- explicitly flushed via flush_odb_transaction() prior to reporting the
- update. Flushing the object database transaction migrates pending
- objects to the primary object database without marking the transaction
- as complete. This is done so objects are immediately visible to
- git-update-index(1) callers using the --verbose option and that rely on
- parsing verbose output to know when objects are written.
+ When the --verbose option is specified, the subsequent set of objects
+ written are explicitly flushed via flush_odb_transaction() prior to
+ reporting the update. Flushing the object database transaction migrates
+ pending objects to the primary object database without marking the
+ transaction as complete. This is done so objects are immediately visible
+ to git-update-index(1) callers using the --verbose option and that rely
+ on parsing verbose output to know when objects are written.
- Due to how git-update-index(1) parses options, each filename argument is
- evaluated with only the set of options that precede it. Therefore, it is
- possible for an initial set of objects to be written in a transaction
- before a --verbose option is encountered.
+ Due to how git-update-index(1) parses arguments, options that come after
+ a filename are not considered during the object update. Therefore, it
+ may not be known ahead of time whether the --verbose option is present
+ and thus object writes are considered transactional by default until a
+ --verbose option is parsed.
- As soon as the --verbose option is parsed in git-update-index(1), all
- subsequent object writes are flushed prior to being reported and thus no
- longer benefit from being transactional. Furthermore, the mechanism to
- flush a transaction without committing is rather awkward. Drop the call
- to flush_odb_transaction() in favor of ending the transaction early when
- the --verbose flag is encountered.
+ Flushing a transaction after individual object writes negates the
+ benefit of writing objects to a transaction in the first place.
+ Furthermore, the mechanism to flush a transaction without actually
+ committing is rather awkward. Drop the call to flush_odb_transaction()
+ in favor of ending the transaction altogether when the --verbose flag is
+ encountered. Subsequent object writes occur outside of a transaction and
+ are therefore immediately visible which matches the current behavior.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
3: ae6199a3c8 = 3: 6268b2934a bulk-checkin: drop flush_odb_transaction()
4: 01f5485441 ! 4: e5f1d93797 object-file: relocate ODB transaction code
@@ Commit message
are only used by the object-file subsystem to manage aspects of a
transaction implementation specific to the files object source.
- Relocate all the transaction code in in bulk-checkin to object-file.
- This simplifies the exposed transaction interface by reducing it to only
+ Relocate all the transaction code in bulk-checkin to object-file. This
+ simplifies the exposed transaction interface by reducing it to only
{begin,end}_odb_transaction(). Function and type names are adjusted in
the subsequent commit to better fit the new location.
@@ bulk-checkin.c (deleted)
-struct odb_transaction *begin_odb_transaction(struct object_database *odb)
-{
- if (odb->transaction)
-- BUG("ODB transaction already started");
+- return NULL;
-
- CALLOC_ARRAY(odb->transaction, 1);
- odb->transaction->odb = odb;
@@ bulk-checkin.h (deleted)
-/*
- * Tell the object database to optimize for adding
- * multiple objects. end_odb_transaction must be called
-- * to make new objects visible. Only a single transaction
-- * can be pending at a time and must be ended before
-- * beginning another.
+- * to make new objects visible. If a transaction is already
+- * pending, NULL is returned.
- */
-struct odb_transaction *begin_odb_transaction(struct object_database *odb);
-
@@ object-file.c: int read_loose_object(struct repository *repo,
+struct odb_transaction *begin_odb_transaction(struct object_database *odb)
+{
+ if (odb->transaction)
-+ BUG("ODB transaction already started");
++ return NULL;
+
+ CALLOC_ARRAY(odb->transaction, 1);
+ odb->transaction->odb = odb;
@@ object-file.h: int read_loose_object(struct repository *repo,
+/*
+ * Tell the object database to optimize for adding
+ * multiple objects. end_odb_transaction must be called
-+ * to make new objects visible. Only a single transaction
-+ * can be pending at a time and must be ended before
-+ * beginning another.
++ * to make new objects visible. If a transaction is already
++ * pending, NULL is returned.
+ */
+struct odb_transaction *begin_odb_transaction(struct object_database *odb);
+
5: 333319a63d ! 5: 76eabfb6a1 object-file: update naming from bulk-checkin
@@ object-file.c: static int index_blob_bulk_checkin(struct odb_transaction *transa
return error("cannot seek back");
}
@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
- if (!the_repository->objects->transaction)
- transaction = begin_odb_transaction(the_repository->objects);
+ struct odb_transaction *transaction;
+ transaction = begin_odb_transaction(the_repository->objects);
- ret = index_blob_bulk_checkin(the_repository->objects->transaction,
- oid, fd, xsize_t(st->st_size),
- path, flags);
@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
+ oid, fd,
+ xsize_t(st->st_size),
+ path, flags);
-
end_odb_transaction(transaction);
}
+
@@ object-file.c: void end_odb_transaction(struct odb_transaction *transaction)
*/
ASSERT(transaction == transaction->odb->transaction);
6: 30759bbd0d ! 6: 9accbfdbf0 odb: add transaction interface
@@ builtin/update-index.c: int cmd_update_index(int argc,
## cache-tree.c ##
@@ cache-tree.c: int cache_tree_update(struct index_state *istate, int flags)
- trace2_region_enter("cache_tree", "update", the_repository);
-
- if (!the_repository->objects->transaction)
-- transaction = begin_odb_transaction(the_repository->objects);
-+ transaction = odb_transaction_begin(the_repository->objects);
+ trace_performance_enter();
+ trace2_region_enter("cache_tree", "update", the_repository);
+- transaction = begin_odb_transaction(the_repository->objects);
++ transaction = odb_transaction_begin(the_repository->objects);
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
"", 0, &skip, flags);
-
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
-
trace2_region_leave("cache_tree", "update", the_repository);
trace_performance_leave("cache_tree_update");
+ if (i < 0)
## object-file.c ##
@@ object-file.c: static void prepare_loose_object_transaction(struct odb_transaction *transaction
@@ object-file.c: static void prepare_loose_object_transaction(struct odb_transacti
if (!transaction || transaction->objdir)
return;
@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
- struct odb_transaction *transaction = NULL;
-
- if (!the_repository->objects->transaction)
-- transaction = begin_odb_transaction(the_repository->objects);
-+ transaction = odb_transaction_begin(the_repository->objects);
+ } else {
+ struct odb_transaction *transaction;
+- transaction = begin_odb_transaction(the_repository->objects);
++ transaction = odb_transaction_begin(the_repository->objects);
ret = index_blob_packfile_transaction(the_repository->objects->transaction,
oid, fd,
xsize_t(st->st_size),
path, flags);
-
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
}
@@ object-file.c: int read_loose_object(struct repository *repo,
+ struct object_database *odb = source->odb;
+
if (odb->transaction)
- BUG("ODB transaction already started");
+ return NULL;
@@ object-file.c: struct odb_transaction *begin_odb_transaction(struct object_database *odb)
return odb->transaction;
@@ object-file.h: struct odb_transaction;
* Tell the object database to optimize for adding
- * multiple objects. end_odb_transaction must be called
+ * multiple objects. object_file_transaction_commit must be called
- * to make new objects visible. Only a single transaction
- * can be pending at a time and must be ended before
- * beginning another.
+ * to make new objects visible. If a transaction is already
+ * pending, NULL is returned.
*/
-struct odb_transaction *begin_odb_transaction(struct object_database *odb);
+struct odb_transaction *object_file_transaction_begin(struct odb_source *source);
@@ odb.h: struct object_database {
+/*
+ * Starts an ODB transaction. Subsequent objects are written to the transaction
+ * and not committed until odb_transaction_commit() is invoked on the
-+ * transaction. Caller are responsible to ensure there is only a single ODB
-+ * transaction pending at a time.
++ * transaction. If the ODB already has a pending transaction, NULL is returned.
+ */
+struct odb_transaction *odb_transaction_begin(struct object_database *odb);
+
@@ odb.h: struct object_database {
## read-cache.c ##
@@ read-cache.c: int add_files_to_cache(struct repository *repo, const char *prefix,
+ * This function is invoked from commands other than 'add', which
* may not have their own transaction active.
*/
- if (!repo->objects->transaction)
-- transaction = begin_odb_transaction(repo->objects);
-+ transaction = odb_transaction_begin(repo->objects);
-
+- transaction = begin_odb_transaction(repo->objects);
++ transaction = odb_transaction_begin(repo->objects);
run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
-
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
--
2.51.0.193.g4975ec3473b
^ permalink raw reply [flat|nested] 38+ messages in thread* [PATCH v3 1/6] bulk-checkin: remove ODB transaction nesting
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 2/6] builtin/update-index: end ODB transaction when --verbose is specified Justin Tobler
` (4 subsequent siblings)
5 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
ODB transactions support being nested. Only the outermost
{begin,end}_odb_transaction() start and finish a transaction. This
allows internal object write codepaths to be optimized with ODB
transactions without worrying about whether a transaction is already
active. When {begin,end}_odb_transaction() is invoked during an active
transaction, these operations are essentially treated as no-ops. This
can make the interface a bit awkward to use, as calling
end_odb_transaction() does not guarantee that a transaction is actually
ended. Thus, in situations where a transaction needs to be explicitly
flushed, flush_odb_transaction() must be used.
To remove the need for an explicit transaction flush operation via
flush_odb_transaction() and better clarify transaction semantics, drop
the transaction nesting mechanism in favor of begin_odb_transaction()
returning a NULL transaction value to signal it was a no-op, and
end_odb_transaction() behaving as a no-op when a NULL transaction value
is passed. This is safe for existing callers as the transaction value
wired to end_odb_transaction() already comes from
begin_odb_transaction() and thus continues the same no-op behavior when
a transaction is already pending. With this model, passing a pending
transaction to end_odb_transaction() ensures it is committed at that
point in time.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
bulk-checkin.c | 22 ++++++++++------------
bulk-checkin.h | 8 +++-----
object-file.c | 2 +-
3 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 124c493067..eb6ef704c3 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -33,7 +33,6 @@ struct bulk_checkin_packfile {
struct odb_transaction {
struct object_database *odb;
- int nesting;
struct tmp_objdir *objdir;
struct bulk_checkin_packfile packfile;
};
@@ -368,12 +367,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
struct odb_transaction *begin_odb_transaction(struct object_database *odb)
{
- if (!odb->transaction) {
- CALLOC_ARRAY(odb->transaction, 1);
- odb->transaction->odb = odb;
- }
+ if (odb->transaction)
+ return NULL;
- odb->transaction->nesting += 1;
+ CALLOC_ARRAY(odb->transaction, 1);
+ odb->transaction->odb = odb;
return odb->transaction;
}
@@ -389,14 +387,14 @@ void flush_odb_transaction(struct odb_transaction *transaction)
void end_odb_transaction(struct odb_transaction *transaction)
{
- if (!transaction || transaction->nesting == 0)
- BUG("Unbalanced ODB transaction nesting");
-
- transaction->nesting -= 1;
-
- if (transaction->nesting)
+ if (!transaction)
return;
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->odb->transaction);
+
flush_odb_transaction(transaction);
transaction->odb->transaction = NULL;
free(transaction);
diff --git a/bulk-checkin.h b/bulk-checkin.h
index ac8887f476..51d0ac6134 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -38,9 +38,8 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
/*
* Tell the object database to optimize for adding
* multiple objects. end_odb_transaction must be called
- * to make new objects visible. Transactions can be nested,
- * and objects are only visible after the outermost transaction
- * is complete or the transaction is flushed.
+ * to make new objects visible. If a transaction is already
+ * pending, NULL is returned.
*/
struct odb_transaction *begin_odb_transaction(struct object_database *odb);
@@ -53,8 +52,7 @@ void flush_odb_transaction(struct odb_transaction *transaction);
/*
* Tell the object database to make any objects from the
- * current transaction visible if this is the final nested
- * transaction.
+ * current transaction visible.
*/
void end_odb_transaction(struct odb_transaction *transaction);
diff --git a/object-file.c b/object-file.c
index bc15af4245..5e76573549 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1267,7 +1267,7 @@ int index_fd(struct index_state *istate, struct object_id *oid,
struct odb_transaction *transaction;
transaction = begin_odb_transaction(the_repository->objects);
- ret = index_blob_bulk_checkin(transaction,
+ ret = index_blob_bulk_checkin(the_repository->objects->transaction,
oid, fd, xsize_t(st->st_size),
path, flags);
end_odb_transaction(transaction);
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v3 2/6] builtin/update-index: end ODB transaction when --verbose is specified
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
2025-09-16 18:29 ` [PATCH v3 1/6] bulk-checkin: remove ODB transaction nesting Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 3/6] bulk-checkin: drop flush_odb_transaction() Justin Tobler
` (3 subsequent siblings)
5 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
With 23a3a303 (update-index: use the bulk-checkin infrastructure,
2022-04-04), object database transactions were added to
git-update-index(1) to facilitate writing objects in bulk. With
transactions, newly added objects are instead written to a temporary
object directory and migrated to the primary object database upon
transaction commit.
When the --verbose option is specified, the subsequent set of objects
written are explicitly flushed via flush_odb_transaction() prior to
reporting the update. Flushing the object database transaction migrates
pending objects to the primary object database without marking the
transaction as complete. This is done so objects are immediately visible
to git-update-index(1) callers using the --verbose option and that rely
on parsing verbose output to know when objects are written.
Due to how git-update-index(1) parses arguments, options that come after
a filename are not considered during the object update. Therefore, it
may not be known ahead of time whether the --verbose option is present
and thus object writes are considered transactional by default until a
--verbose option is parsed.
Flushing a transaction after individual object writes negates the
benefit of writing objects to a transaction in the first place.
Furthermore, the mechanism to flush a transaction without actually
committing is rather awkward. Drop the call to flush_odb_transaction()
in favor of ending the transaction altogether when the --verbose flag is
encountered. Subsequent object writes occur outside of a transaction and
are therefore immediately visible which matches the current behavior.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/update-index.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 2ba2d29c95..d36bc55752 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -70,14 +70,6 @@ static void report(const char *fmt, ...)
if (!verbose)
return;
- /*
- * It is possible, though unlikely, that a caller could use the verbose
- * output to synchronize with addition of objects to the object
- * database. The current implementation of ODB transactions leaves
- * objects invisible while a transaction is active, so flush the
- * transaction here before reporting a change made by update-index.
- */
- flush_odb_transaction(the_repository->objects->transaction);
va_start(vp, fmt);
vprintf(fmt, vp);
putchar('\n');
@@ -1150,6 +1142,21 @@ int cmd_update_index(int argc,
const char *path = ctx.argv[0];
char *p;
+ /*
+ * It is possible, though unlikely, that a caller could
+ * use the verbose output to synchronize with addition
+ * of objects to the object database. The current
+ * implementation of ODB transactions leaves objects
+ * invisible while a transaction is active, so end the
+ * transaction here early before processing the next
+ * update. All further updates are performed outside of
+ * a transaction.
+ */
+ if (transaction && verbose) {
+ end_odb_transaction(transaction);
+ transaction = NULL;
+ }
+
setup_work_tree();
p = prefix_path(prefix, prefix_length, path);
update_one(p);
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v3 3/6] bulk-checkin: drop flush_odb_transaction()
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
2025-09-16 18:29 ` [PATCH v3 1/6] bulk-checkin: remove ODB transaction nesting Justin Tobler
2025-09-16 18:29 ` [PATCH v3 2/6] builtin/update-index: end ODB transaction when --verbose is specified Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 4/6] object-file: relocate ODB transaction code Justin Tobler
` (2 subsequent siblings)
5 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
Object database transactions can be explicitly flushed via
flush_odb_transaction() without actually completing the transaction.
This makes the provided transactional interface a bit awkward. Now that
there are no longer any flush_odb_transaction() call sites, drop the
function to simplify the interface and further ensure that a transaction
is only finalized when end_odb_transaction() is invoked.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
bulk-checkin.c | 12 ++----------
bulk-checkin.h | 7 -------
2 files changed, 2 insertions(+), 17 deletions(-)
diff --git a/bulk-checkin.c b/bulk-checkin.c
index eb6ef704c3..5de848deff 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -376,15 +376,6 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb)
return odb->transaction;
}
-void flush_odb_transaction(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(transaction);
-}
-
void end_odb_transaction(struct odb_transaction *transaction)
{
if (!transaction)
@@ -395,7 +386,8 @@ void end_odb_transaction(struct odb_transaction *transaction)
*/
ASSERT(transaction == transaction->odb->transaction);
- flush_odb_transaction(transaction);
+ flush_batch_fsync(transaction);
+ flush_bulk_checkin_packfile(transaction);
transaction->odb->transaction = NULL;
free(transaction);
}
diff --git a/bulk-checkin.h b/bulk-checkin.h
index 51d0ac6134..eea728f0d4 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -43,13 +43,6 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction,
*/
struct odb_transaction *begin_odb_transaction(struct object_database *odb);
-/*
- * Make any objects that are currently part of a pending object
- * database transaction visible. It is valid to call this function
- * even if no transaction is active.
- */
-void flush_odb_transaction(struct odb_transaction *transaction);
-
/*
* Tell the object database to make any objects from the
* current transaction visible.
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v3 4/6] object-file: relocate ODB transaction code
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (2 preceding siblings ...)
2025-09-16 18:29 ` [PATCH v3 3/6] bulk-checkin: drop flush_odb_transaction() Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 5/6] object-file: update naming from bulk-checkin Justin Tobler
2025-09-16 18:29 ` [PATCH v3 6/6] odb: add transaction interface Justin Tobler
5 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
The bulk-checkin subsystem provides various functions to manage ODB
transactions. Apart from {begin,end}_odb_transaction(), these functions
are only used by the object-file subsystem to manage aspects of a
transaction implementation specific to the files object source.
Relocate all the transaction code in bulk-checkin to object-file. This
simplifies the exposed transaction interface by reducing it to only
{begin,end}_odb_transaction(). Function and type names are adjusted in
the subsequent commit to better fit the new location.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Makefile | 1 -
builtin/add.c | 2 +-
builtin/unpack-objects.c | 1 -
builtin/update-index.c | 1 -
bulk-checkin.c | 393 --------------------------------------
bulk-checkin.h | 52 ------
cache-tree.c | 1 -
meson.build | 1 -
object-file.c | 394 ++++++++++++++++++++++++++++++++++++++-
object-file.h | 16 ++
read-cache.c | 1 -
11 files changed, 410 insertions(+), 453 deletions(-)
delete mode 100644 bulk-checkin.c
delete mode 100644 bulk-checkin.h
diff --git a/Makefile b/Makefile
index 4c95affadb..d25d4255f8 100644
--- a/Makefile
+++ b/Makefile
@@ -974,7 +974,6 @@ LIB_OBJS += blame.o
LIB_OBJS += blob.o
LIB_OBJS += bloom.o
LIB_OBJS += branch.o
-LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle-uri.o
LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o
diff --git a/builtin/add.c b/builtin/add.c
index 740c7c4581..8294366d68 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -14,13 +14,13 @@
#include "gettext.h"
#include "pathspec.h"
#include "run-command.h"
+#include "object-file.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
#include "diff.h"
#include "read-cache.h"
#include "revision.h"
-#include "bulk-checkin.h"
#include "strvec.h"
#include "submodule.h"
#include "add-interactive.h"
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 28124b324d..4596fff0da 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -2,7 +2,6 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
-#include "bulk-checkin.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
diff --git a/builtin/update-index.c b/builtin/update-index.c
index d36bc55752..ee01c4e423 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -8,7 +8,6 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
-#include "bulk-checkin.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
diff --git a/bulk-checkin.c b/bulk-checkin.c
deleted file mode 100644
index 5de848deff..0000000000
--- a/bulk-checkin.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-
-#define USE_THE_REPOSITORY_VARIABLE
-
-#include "git-compat-util.h"
-#include "bulk-checkin.h"
-#include "environment.h"
-#include "gettext.h"
-#include "hex.h"
-#include "lockfile.h"
-#include "repository.h"
-#include "csum-file.h"
-#include "pack.h"
-#include "strbuf.h"
-#include "tmp-objdir.h"
-#include "packfile.h"
-#include "object-file.h"
-#include "odb.h"
-
-struct bulk_checkin_packfile {
- char *pack_tmp_name;
- struct hashfile *f;
- off_t offset;
- struct pack_idx_option pack_idx_opts;
-
- struct pack_idx_entry **written;
- uint32_t alloc_written;
- uint32_t nr_written;
-};
-
-struct odb_transaction {
- struct object_database *odb;
-
- struct tmp_objdir *objdir;
- struct bulk_checkin_packfile packfile;
-};
-
-static void finish_tmp_packfile(struct odb_transaction *transaction,
- struct strbuf *basename,
- unsigned char hash[])
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- struct repository *repo = transaction->odb->repo;
- char *idx_tmp_name = NULL;
-
- stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
- state->written, state->nr_written, NULL,
- &state->pack_idx_opts, hash, &idx_tmp_name);
- rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
-
- free(idx_tmp_name);
-}
-
-static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- struct repository *repo = transaction->odb->repo;
- unsigned char hash[GIT_MAX_RAWSZ];
- struct strbuf packname = STRBUF_INIT;
-
- if (!state->f)
- return;
-
- if (state->nr_written == 0) {
- close(state->f->fd);
- free_hashfile(state->f);
- unlink(state->pack_tmp_name);
- goto clear_exit;
- } else if (state->nr_written == 1) {
- finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
- CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
- } else {
- int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
- fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
- state->nr_written, hash,
- state->offset);
- close(fd);
- }
-
- strbuf_addf(&packname, "%s/pack/pack-%s.",
- repo_get_object_directory(transaction->odb->repo),
- hash_to_hex_algop(hash, repo->hash_algo));
-
- finish_tmp_packfile(transaction, &packname, hash);
- for (uint32_t i = 0; i < state->nr_written; i++)
- free(state->written[i]);
-
-clear_exit:
- free(state->pack_tmp_name);
- free(state->written);
- memset(state, 0, sizeof(*state));
-
- strbuf_release(&packname);
- /* Make objects we just wrote available to ourselves */
- reprepare_packed_git(repo);
-}
-
-/*
- * Cleanup after batch-mode fsync_object_files.
- */
-static void flush_batch_fsync(struct odb_transaction *transaction)
-{
- struct strbuf temp_path = STRBUF_INIT;
- struct tempfile *temp;
-
- if (!transaction->objdir)
- return;
-
- /*
- * Issue a full hardware flush against a temporary file to ensure
- * that all objects are durable before any renames occur. The code in
- * fsync_loose_object_bulk_checkin has already issued a writeout
- * request, but it has not flushed any writeback cache in the storage
- * hardware or any filesystem logs. This fsync call acts as a barrier
- * to ensure that the data in each new object file is durable before
- * the final name is visible.
- */
- strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
- repo_get_object_directory(transaction->odb->repo));
- temp = xmks_tempfile(temp_path.buf);
- fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
- delete_tempfile(&temp);
- strbuf_release(&temp_path);
-
- /*
- * Make the object files visible in the primary ODB after their data is
- * fully durable.
- */
- tmp_objdir_migrate(transaction->objdir);
- transaction->objdir = NULL;
-}
-
-static int already_written(struct odb_transaction *transaction,
- struct object_id *oid)
-{
- /* The object may already exist in the repository */
- if (odb_has_object(transaction->odb, oid,
- HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
- return 1;
-
- /* Might want to keep the list sorted */
- for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
- if (oideq(&transaction->packfile.written[i]->oid, oid))
- return 1;
-
- /* This is a new object we need to keep */
- return 0;
-}
-
-/*
- * Read the contents from fd for size bytes, streaming it to the
- * packfile in state while updating the hash in ctx. Signal a failure
- * by returning a negative value when the resulting pack would exceed
- * the pack size limit and this is not the first object in the pack,
- * so that the caller can discard what we wrote from the current pack
- * by truncating it and opening a new one. The caller will then call
- * us again after rewinding the input fd.
- *
- * The already_hashed_to pointer is kept untouched by the caller to
- * make sure we do not hash the same byte when we are called
- * again. This way, the caller does not have to checkpoint its hash
- * status before calling us just in case we ask it to call us again
- * with a new pack.
- */
-static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
- struct git_hash_ctx *ctx, off_t *already_hashed_to,
- int fd, size_t size, const char *path,
- unsigned flags)
-{
- git_zstream s;
- unsigned char ibuf[16384];
- unsigned char obuf[16384];
- unsigned hdrlen;
- int status = Z_OK;
- int write_object = (flags & INDEX_WRITE_OBJECT);
- off_t offset = 0;
-
- git_deflate_init(&s, pack_compression_level);
-
- hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
- s.next_out = obuf + hdrlen;
- s.avail_out = sizeof(obuf) - hdrlen;
-
- while (status != Z_STREAM_END) {
- if (size && !s.avail_in) {
- size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
- ssize_t read_result = read_in_full(fd, ibuf, rsize);
- if (read_result < 0)
- die_errno("failed to read from '%s'", path);
- if ((size_t)read_result != rsize)
- die("failed to read %u bytes from '%s'",
- (unsigned)rsize, path);
- offset += rsize;
- if (*already_hashed_to < offset) {
- size_t hsize = offset - *already_hashed_to;
- if (rsize < hsize)
- hsize = rsize;
- if (hsize)
- git_hash_update(ctx, ibuf, hsize);
- *already_hashed_to = offset;
- }
- s.next_in = ibuf;
- s.avail_in = rsize;
- size -= rsize;
- }
-
- status = git_deflate(&s, size ? 0 : Z_FINISH);
-
- if (!s.avail_out || status == Z_STREAM_END) {
- if (write_object) {
- size_t written = s.next_out - obuf;
-
- /* would we bust the size limit? */
- if (state->nr_written &&
- pack_size_limit_cfg &&
- pack_size_limit_cfg < state->offset + written) {
- git_deflate_abort(&s);
- return -1;
- }
-
- hashwrite(state->f, obuf, written);
- state->offset += written;
- }
- s.next_out = obuf;
- s.avail_out = sizeof(obuf);
- }
-
- switch (status) {
- case Z_OK:
- case Z_BUF_ERROR:
- case Z_STREAM_END:
- continue;
- default:
- die("unexpected deflate failure: %d", status);
- }
- }
- git_deflate_end(&s);
- return 0;
-}
-
-/* Lazily create backing packfile for the state */
-static void prepare_to_stream(struct odb_transaction *transaction,
- unsigned flags)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- if (!(flags & INDEX_WRITE_OBJECT) || state->f)
- return;
-
- state->f = create_tmp_packfile(transaction->odb->repo,
- &state->pack_tmp_name);
- reset_pack_idx_option(&state->pack_idx_opts);
-
- /* Pretend we are going to write only one object */
- state->offset = write_pack_header(state->f, 1);
- if (!state->offset)
- die_errno("unable to write pack header");
-}
-
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
- struct object_id *result_oid, int fd, size_t size,
- const char *path, unsigned flags)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- off_t seekback, already_hashed_to;
- struct git_hash_ctx ctx;
- unsigned char obuf[16384];
- unsigned header_len;
- struct hashfile_checkpoint checkpoint;
- struct pack_idx_entry *idx = NULL;
-
- seekback = lseek(fd, 0, SEEK_CUR);
- if (seekback == (off_t) -1)
- return error("cannot find the current offset");
-
- header_len = format_object_header((char *)obuf, sizeof(obuf),
- OBJ_BLOB, size);
- transaction->odb->repo->hash_algo->init_fn(&ctx);
- git_hash_update(&ctx, obuf, header_len);
-
- /* Note: idx is non-NULL when we are writing */
- if ((flags & INDEX_WRITE_OBJECT) != 0) {
- CALLOC_ARRAY(idx, 1);
-
- prepare_to_stream(transaction, flags);
- hashfile_checkpoint_init(state->f, &checkpoint);
- }
-
- already_hashed_to = 0;
-
- while (1) {
- prepare_to_stream(transaction, flags);
- if (idx) {
- hashfile_checkpoint(state->f, &checkpoint);
- idx->offset = state->offset;
- crc32_begin(state->f);
- }
- if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
- fd, size, path, flags))
- break;
- /*
- * Writing this object to the current pack will make
- * it too big; we need to truncate it, start a new
- * pack, and write into it.
- */
- if (!idx)
- BUG("should not happen");
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- flush_bulk_checkin_packfile(transaction);
- if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
- return error("cannot seek back");
- }
- git_hash_final_oid(result_oid, &ctx);
- if (!idx)
- return 0;
-
- idx->crc32 = crc32_end(state->f);
- if (already_written(transaction, result_oid)) {
- hashfile_truncate(state->f, &checkpoint);
- state->offset = checkpoint.offset;
- free(idx);
- } else {
- oidcpy(&idx->oid, result_oid);
- ALLOC_GROW(state->written,
- state->nr_written + 1,
- state->alloc_written);
- state->written[state->nr_written++] = idx;
- }
- return 0;
-}
-
-void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
-{
- /*
- * We lazily create the temporary object directory
- * the first time an object might be added, since
- * callers may not know whether any objects will be
- * added at the time they call begin_odb_transaction.
- */
- if (!transaction || transaction->objdir)
- return;
-
- transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
- if (transaction->objdir)
- tmp_objdir_replace_primary_odb(transaction->objdir, 0);
-}
-
-void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
- int fd, const char *filename)
-{
- /*
- * If we have an active ODB transaction, we issue a call that
- * cleans the filesystem page cache but avoids a hardware flush
- * command. Later on we will issue a single hardware flush
- * before renaming the objects to their final names as part of
- * flush_batch_fsync.
- */
- if (!transaction || !transaction->objdir ||
- git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
- if (errno == ENOSYS)
- warning(_("core.fsyncMethod = batch is unsupported on this platform"));
- fsync_or_die(fd, filename);
- }
-}
-
-struct odb_transaction *begin_odb_transaction(struct object_database *odb)
-{
- if (odb->transaction)
- return NULL;
-
- CALLOC_ARRAY(odb->transaction, 1);
- odb->transaction->odb = odb;
-
- return odb->transaction;
-}
-
-void end_odb_transaction(struct odb_transaction *transaction)
-{
- if (!transaction)
- return;
-
- /*
- * Ensure the transaction ending matches the pending transaction.
- */
- ASSERT(transaction == transaction->odb->transaction);
-
- flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(transaction);
- transaction->odb->transaction = NULL;
- free(transaction);
-}
diff --git a/bulk-checkin.h b/bulk-checkin.h
deleted file mode 100644
index eea728f0d4..0000000000
--- a/bulk-checkin.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-#ifndef BULK_CHECKIN_H
-#define BULK_CHECKIN_H
-
-#include "object.h"
-#include "odb.h"
-
-struct odb_transaction;
-
-void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction);
-void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
- int fd, const char *filename);
-
-/*
- * This writes the specified object to a packfile. Objects written here
- * during the same transaction are written to the same packfile. The
- * packfile is not flushed until the transaction is flushed. The caller
- * is expected to ensure a valid transaction is setup for objects to be
- * recorded to.
- *
- * This also bypasses the usual "convert-to-git" dance, and that is on
- * purpose. We could write a streaming version of the converting
- * functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above). However, that is
- * somewhat complicated, as we do not know the size of the filter
- * result, which we need to know beforehand when writing a git object.
- * Since the primary motivation for trying to stream from the working
- * tree file and to avoid mmaping it in core is to deal with large
- * binary blobs, they generally do not want to get any conversion, and
- * callers should avoid this code path when filters are requested.
- */
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
- struct object_id *oid, int fd, size_t size,
- const char *path, unsigned flags);
-
-/*
- * Tell the object database to optimize for adding
- * multiple objects. end_odb_transaction must be called
- * to make new objects visible. If a transaction is already
- * pending, NULL is returned.
- */
-struct odb_transaction *begin_odb_transaction(struct object_database *odb);
-
-/*
- * Tell the object database to make any objects from the
- * current transaction visible.
- */
-void end_odb_transaction(struct odb_transaction *transaction);
-
-#endif
diff --git a/cache-tree.c b/cache-tree.c
index d225554eed..79ddf6b727 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -8,7 +8,6 @@
#include "tree.h"
#include "tree-walk.h"
#include "cache-tree.h"
-#include "bulk-checkin.h"
#include "object-file.h"
#include "odb.h"
#include "read-cache-ll.h"
diff --git a/meson.build b/meson.build
index b3dfcc0497..fccb6d2eec 100644
--- a/meson.build
+++ b/meson.build
@@ -287,7 +287,6 @@ libgit_sources = [
'blob.c',
'bloom.c',
'branch.c',
- 'bulk-checkin.c',
'bundle-uri.c',
'bundle.c',
'cache-tree.c',
diff --git a/object-file.c b/object-file.c
index 5e76573549..03f9931b83 100644
--- a/object-file.c
+++ b/object-file.c
@@ -10,7 +10,6 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
-#include "bulk-checkin.h"
#include "convert.h"
#include "dir.h"
#include "environment.h"
@@ -28,6 +27,8 @@
#include "read-cache-ll.h"
#include "setup.h"
#include "streaming.h"
+#include "tempfile.h"
+#include "tmp-objdir.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
@@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf,
write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
}
+struct bulk_checkin_packfile {
+ char *pack_tmp_name;
+ struct hashfile *f;
+ off_t offset;
+ struct pack_idx_option pack_idx_opts;
+
+ struct pack_idx_entry **written;
+ uint32_t alloc_written;
+ uint32_t nr_written;
+};
+
+struct odb_transaction {
+ struct object_database *odb;
+
+ struct tmp_objdir *objdir;
+ struct bulk_checkin_packfile packfile;
+};
+
+static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
+{
+ /*
+ * We lazily create the temporary object directory
+ * the first time an object might be added, since
+ * callers may not know whether any objects will be
+ * added at the time they call begin_odb_transaction.
+ */
+ if (!transaction || transaction->objdir)
+ return;
+
+ transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
+ if (transaction->objdir)
+ tmp_objdir_replace_primary_odb(transaction->objdir, 0);
+}
+
+static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+ int fd, const char *filename)
+{
+ /*
+ * If we have an active ODB transaction, we issue a call that
+ * cleans the filesystem page cache but avoids a hardware flush
+ * command. Later on we will issue a single hardware flush
+ * before renaming the objects to their final names as part of
+ * flush_batch_fsync.
+ */
+ if (!transaction || !transaction->objdir ||
+ git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
+ if (errno == ENOSYS)
+ warning(_("core.fsyncMethod = batch is unsupported on this platform"));
+ fsync_or_die(fd, filename);
+ }
+}
+
+/*
+ * Cleanup after batch-mode fsync_object_files.
+ */
+static void flush_batch_fsync(struct odb_transaction *transaction)
+{
+ struct strbuf temp_path = STRBUF_INIT;
+ struct tempfile *temp;
+
+ if (!transaction->objdir)
+ return;
+
+ /*
+ * Issue a full hardware flush against a temporary file to ensure
+ * that all objects are durable before any renames occur. The code in
+ * fsync_loose_object_bulk_checkin has already issued a writeout
+ * request, but it has not flushed any writeback cache in the storage
+ * hardware or any filesystem logs. This fsync call acts as a barrier
+ * to ensure that the data in each new object file is durable before
+ * the final name is visible.
+ */
+ strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
+ repo_get_object_directory(transaction->odb->repo));
+ temp = xmks_tempfile(temp_path.buf);
+ fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
+ delete_tempfile(&temp);
+ strbuf_release(&temp_path);
+
+ /*
+ * Make the object files visible in the primary ODB after their data is
+ * fully durable.
+ */
+ tmp_objdir_migrate(transaction->objdir);
+ transaction->objdir = NULL;
+}
+
/* Finalize a file on disk, and close it. */
static void close_loose_object(struct odb_source *source,
int fd, const char *filename)
@@ -1243,6 +1331,283 @@ static int index_core(struct index_state *istate,
return ret;
}
+static int already_written(struct odb_transaction *transaction,
+ struct object_id *oid)
+{
+ /* The object may already exist in the repository */
+ if (odb_has_object(transaction->odb, oid,
+ HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
+ return 1;
+
+ /* Might want to keep the list sorted */
+ for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
+ if (oideq(&transaction->packfile.written[i]->oid, oid))
+ return 1;
+
+ /* This is a new object we need to keep */
+ return 0;
+}
+
+/* Lazily create backing packfile for the state */
+static void prepare_to_stream(struct odb_transaction *transaction,
+ unsigned flags)
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ if (!(flags & INDEX_WRITE_OBJECT) || state->f)
+ return;
+
+ state->f = create_tmp_packfile(transaction->odb->repo,
+ &state->pack_tmp_name);
+ reset_pack_idx_option(&state->pack_idx_opts);
+
+ /* Pretend we are going to write only one object */
+ state->offset = write_pack_header(state->f, 1);
+ if (!state->offset)
+ die_errno("unable to write pack header");
+}
+
+/*
+ * Read the contents from fd for size bytes, streaming it to the
+ * packfile in state while updating the hash in ctx. Signal a failure
+ * by returning a negative value when the resulting pack would exceed
+ * the pack size limit and this is not the first object in the pack,
+ * so that the caller can discard what we wrote from the current pack
+ * by truncating it and opening a new one. The caller will then call
+ * us again after rewinding the input fd.
+ *
+ * The already_hashed_to pointer is kept untouched by the caller to
+ * make sure we do not hash the same byte when we are called
+ * again. This way, the caller does not have to checkpoint its hash
+ * status before calling us just in case we ask it to call us again
+ * with a new pack.
+ */
+static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
+ struct git_hash_ctx *ctx, off_t *already_hashed_to,
+ int fd, size_t size, const char *path,
+ unsigned flags)
+{
+ git_zstream s;
+ unsigned char ibuf[16384];
+ unsigned char obuf[16384];
+ unsigned hdrlen;
+ int status = Z_OK;
+ int write_object = (flags & INDEX_WRITE_OBJECT);
+ off_t offset = 0;
+
+ git_deflate_init(&s, pack_compression_level);
+
+ hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
+ s.next_out = obuf + hdrlen;
+ s.avail_out = sizeof(obuf) - hdrlen;
+
+ while (status != Z_STREAM_END) {
+ if (size && !s.avail_in) {
+ size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
+ ssize_t read_result = read_in_full(fd, ibuf, rsize);
+ if (read_result < 0)
+ die_errno("failed to read from '%s'", path);
+ if ((size_t)read_result != rsize)
+ die("failed to read %u bytes from '%s'",
+ (unsigned)rsize, path);
+ offset += rsize;
+ if (*already_hashed_to < offset) {
+ size_t hsize = offset - *already_hashed_to;
+ if (rsize < hsize)
+ hsize = rsize;
+ if (hsize)
+ git_hash_update(ctx, ibuf, hsize);
+ *already_hashed_to = offset;
+ }
+ s.next_in = ibuf;
+ s.avail_in = rsize;
+ size -= rsize;
+ }
+
+ status = git_deflate(&s, size ? 0 : Z_FINISH);
+
+ if (!s.avail_out || status == Z_STREAM_END) {
+ if (write_object) {
+ size_t written = s.next_out - obuf;
+
+ /* would we bust the size limit? */
+ if (state->nr_written &&
+ pack_size_limit_cfg &&
+ pack_size_limit_cfg < state->offset + written) {
+ git_deflate_abort(&s);
+ return -1;
+ }
+
+ hashwrite(state->f, obuf, written);
+ state->offset += written;
+ }
+ s.next_out = obuf;
+ s.avail_out = sizeof(obuf);
+ }
+
+ switch (status) {
+ case Z_OK:
+ case Z_BUF_ERROR:
+ case Z_STREAM_END:
+ continue;
+ default:
+ die("unexpected deflate failure: %d", status);
+ }
+ }
+ git_deflate_end(&s);
+ return 0;
+}
+
+static void finish_tmp_packfile(struct odb_transaction *transaction,
+ struct strbuf *basename,
+ unsigned char hash[])
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct repository *repo = transaction->odb->repo;
+ char *idx_tmp_name = NULL;
+
+ stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
+ state->written, state->nr_written, NULL,
+ &state->pack_idx_opts, hash, &idx_tmp_name);
+ rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
+
+ free(idx_tmp_name);
+}
+
+static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct repository *repo = transaction->odb->repo;
+ unsigned char hash[GIT_MAX_RAWSZ];
+ struct strbuf packname = STRBUF_INIT;
+
+ if (!state->f)
+ return;
+
+ if (state->nr_written == 0) {
+ close(state->f->fd);
+ free_hashfile(state->f);
+ unlink(state->pack_tmp_name);
+ goto clear_exit;
+ } else if (state->nr_written == 1) {
+ finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
+ CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+ } else {
+ int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
+ fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
+ state->nr_written, hash,
+ state->offset);
+ close(fd);
+ }
+
+ strbuf_addf(&packname, "%s/pack/pack-%s.",
+ repo_get_object_directory(transaction->odb->repo),
+ hash_to_hex_algop(hash, repo->hash_algo));
+
+ finish_tmp_packfile(transaction, &packname, hash);
+ for (uint32_t i = 0; i < state->nr_written; i++)
+ free(state->written[i]);
+
+clear_exit:
+ free(state->pack_tmp_name);
+ free(state->written);
+ memset(state, 0, sizeof(*state));
+
+ strbuf_release(&packname);
+ /* Make objects we just wrote available to ourselves */
+ reprepare_packed_git(repo);
+}
+
+/*
+ * This writes the specified object to a packfile. Objects written here
+ * during the same transaction are written to the same packfile. The
+ * packfile is not flushed until the transaction is flushed. The caller
+ * is expected to ensure a valid transaction is setup for objects to be
+ * recorded to.
+ *
+ * This also bypasses the usual "convert-to-git" dance, and that is on
+ * purpose. We could write a streaming version of the converting
+ * functions and insert that before feeding the data to fast-import
+ * (or equivalent in-core API described above). However, that is
+ * somewhat complicated, as we do not know the size of the filter
+ * result, which we need to know beforehand when writing a git object.
+ * Since the primary motivation for trying to stream from the working
+ * tree file and to avoid mmaping it in core is to deal with large
+ * binary blobs, they generally do not want to get any conversion, and
+ * callers should avoid this code path when filters are requested.
+ */
+static int index_blob_bulk_checkin(struct odb_transaction *transaction,
+ struct object_id *result_oid, int fd, size_t size,
+ const char *path, unsigned flags)
+{
+ struct bulk_checkin_packfile *state = &transaction->packfile;
+ off_t seekback, already_hashed_to;
+ struct git_hash_ctx ctx;
+ unsigned char obuf[16384];
+ unsigned header_len;
+ struct hashfile_checkpoint checkpoint;
+ struct pack_idx_entry *idx = NULL;
+
+ seekback = lseek(fd, 0, SEEK_CUR);
+ if (seekback == (off_t)-1)
+ return error("cannot find the current offset");
+
+ header_len = format_object_header((char *)obuf, sizeof(obuf),
+ OBJ_BLOB, size);
+ transaction->odb->repo->hash_algo->init_fn(&ctx);
+ git_hash_update(&ctx, obuf, header_len);
+
+ /* Note: idx is non-NULL when we are writing */
+ if ((flags & INDEX_WRITE_OBJECT) != 0) {
+ CALLOC_ARRAY(idx, 1);
+
+ prepare_to_stream(transaction, flags);
+ hashfile_checkpoint_init(state->f, &checkpoint);
+ }
+
+ already_hashed_to = 0;
+
+ while (1) {
+ prepare_to_stream(transaction, flags);
+ if (idx) {
+ hashfile_checkpoint(state->f, &checkpoint);
+ idx->offset = state->offset;
+ crc32_begin(state->f);
+ }
+ if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
+ fd, size, path, flags))
+ break;
+ /*
+ * Writing this object to the current pack will make
+ * it too big; we need to truncate it, start a new
+ * pack, and write into it.
+ */
+ if (!idx)
+ BUG("should not happen");
+ hashfile_truncate(state->f, &checkpoint);
+ state->offset = checkpoint.offset;
+ flush_bulk_checkin_packfile(transaction);
+ if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
+ return error("cannot seek back");
+ }
+ git_hash_final_oid(result_oid, &ctx);
+ if (!idx)
+ return 0;
+
+ idx->crc32 = crc32_end(state->f);
+ if (already_written(transaction, result_oid)) {
+ hashfile_truncate(state->f, &checkpoint);
+ state->offset = checkpoint.offset;
+ free(idx);
+ } else {
+ oidcpy(&idx->oid, result_oid);
+ ALLOC_GROW(state->written,
+ state->nr_written + 1,
+ state->alloc_written);
+ state->written[state->nr_written++] = idx;
+ }
+ return 0;
+}
+
int index_fd(struct index_state *istate, struct object_id *oid,
int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
@@ -1609,3 +1974,30 @@ int read_loose_object(struct repository *repo,
munmap(map, mapsize);
return ret;
}
+
+struct odb_transaction *begin_odb_transaction(struct object_database *odb)
+{
+ if (odb->transaction)
+ return NULL;
+
+ CALLOC_ARRAY(odb->transaction, 1);
+ odb->transaction->odb = odb;
+
+ return odb->transaction;
+}
+
+void end_odb_transaction(struct odb_transaction *transaction)
+{
+ if (!transaction)
+ return;
+
+ /*
+ * Ensure the transaction ending matches the pending transaction.
+ */
+ ASSERT(transaction == transaction->odb->transaction);
+
+ flush_batch_fsync(transaction);
+ flush_bulk_checkin_packfile(transaction);
+ transaction->odb->transaction = NULL;
+ free(transaction);
+}
diff --git a/object-file.h b/object-file.h
index 15d97630d3..6323d2e63c 100644
--- a/object-file.h
+++ b/object-file.h
@@ -218,4 +218,20 @@ int read_loose_object(struct repository *repo,
void **contents,
struct object_info *oi);
+struct odb_transaction;
+
+/*
+ * Tell the object database to optimize for adding
+ * multiple objects. end_odb_transaction must be called
+ * to make new objects visible. If a transaction is already
+ * pending, NULL is returned.
+ */
+struct odb_transaction *begin_odb_transaction(struct object_database *odb);
+
+/*
+ * Tell the object database to make any objects from the
+ * current transaction visible.
+ */
+void end_odb_transaction(struct odb_transaction *transaction);
+
#endif /* OBJECT_FILE_H */
diff --git a/read-cache.c b/read-cache.c
index 229b8ef11c..80591eeced 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -8,7 +8,6 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "git-compat-util.h"
-#include "bulk-checkin.h"
#include "config.h"
#include "date.h"
#include "diff.h"
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v3 5/6] object-file: update naming from bulk-checkin
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (3 preceding siblings ...)
2025-09-16 18:29 ` [PATCH v3 4/6] object-file: relocate ODB transaction code Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
2025-09-16 18:29 ` [PATCH v3 6/6] odb: add transaction interface Justin Tobler
5 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
Update the names of several functions and types relocated from the
bulk-checkin subsystem for better clarity. Also drop
finish_tmp_packfile() as a standalone function in favor of embedding it
in flush_packfile_transaction() directly.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
object-file.c | 80 +++++++++++++++++++++++----------------------------
1 file changed, 36 insertions(+), 44 deletions(-)
diff --git a/object-file.c b/object-file.c
index 03f9931b83..8103a2bf41 100644
--- a/object-file.c
+++ b/object-file.c
@@ -667,7 +667,7 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf,
write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
}
-struct bulk_checkin_packfile {
+struct transaction_packfile {
char *pack_tmp_name;
struct hashfile *f;
off_t offset;
@@ -682,10 +682,10 @@ struct odb_transaction {
struct object_database *odb;
struct tmp_objdir *objdir;
- struct bulk_checkin_packfile packfile;
+ struct transaction_packfile packfile;
};
-static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
+static void prepare_loose_object_transaction(struct odb_transaction *transaction)
{
/*
* We lazily create the temporary object directory
@@ -701,7 +701,7 @@ static void prepare_loose_object_bulk_checkin(struct odb_transaction *transactio
tmp_objdir_replace_primary_odb(transaction->objdir, 0);
}
-static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
+static void fsync_loose_object_transaction(struct odb_transaction *transaction,
int fd, const char *filename)
{
/*
@@ -722,7 +722,7 @@ static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
/*
* Cleanup after batch-mode fsync_object_files.
*/
-static void flush_batch_fsync(struct odb_transaction *transaction)
+static void flush_loose_object_transaction(struct odb_transaction *transaction)
{
struct strbuf temp_path = STRBUF_INIT;
struct tempfile *temp;
@@ -733,7 +733,7 @@ static void flush_batch_fsync(struct odb_transaction *transaction)
/*
* Issue a full hardware flush against a temporary file to ensure
* that all objects are durable before any renames occur. The code in
- * fsync_loose_object_bulk_checkin has already issued a writeout
+ * fsync_loose_object_transaction has already issued a writeout
* request, but it has not flushed any writeback cache in the storage
* hardware or any filesystem logs. This fsync call acts as a barrier
* to ensure that the data in each new object file is durable before
@@ -762,7 +762,7 @@ static void close_loose_object(struct odb_source *source,
goto out;
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
- fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename);
+ fsync_loose_object_transaction(source->odb->transaction, fd, filename);
else if (fsync_object_files > 0)
fsync_or_die(fd, filename);
else
@@ -940,7 +940,7 @@ static int write_loose_object(struct odb_source *source,
static struct strbuf filename = STRBUF_INIT;
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
- prepare_loose_object_bulk_checkin(source->odb->transaction);
+ prepare_loose_object_transaction(source->odb->transaction);
odb_loose_path(source, &filename, oid);
@@ -1029,7 +1029,7 @@ int stream_loose_object(struct odb_source *source,
int hdrlen;
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
- prepare_loose_object_bulk_checkin(source->odb->transaction);
+ prepare_loose_object_transaction(source->odb->transaction);
/* Since oid is not determined, save tmp file to odb path. */
strbuf_addf(&filename, "%s/", source->path);
@@ -1349,10 +1349,10 @@ static int already_written(struct odb_transaction *transaction,
}
/* Lazily create backing packfile for the state */
-static void prepare_to_stream(struct odb_transaction *transaction,
- unsigned flags)
+static void prepare_packfile_transaction(struct odb_transaction *transaction,
+ unsigned flags)
{
- struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct transaction_packfile *state = &transaction->packfile;
if (!(flags & INDEX_WRITE_OBJECT) || state->f)
return;
@@ -1381,7 +1381,7 @@ static void prepare_to_stream(struct odb_transaction *transaction,
* status before calling us just in case we ask it to call us again
* with a new pack.
*/
-static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
+static int stream_blob_to_pack(struct transaction_packfile *state,
struct git_hash_ctx *ctx, off_t *already_hashed_to,
int fd, size_t size, const char *path,
unsigned flags)
@@ -1457,28 +1457,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
return 0;
}
-static void finish_tmp_packfile(struct odb_transaction *transaction,
- struct strbuf *basename,
- unsigned char hash[])
+static void flush_packfile_transaction(struct odb_transaction *transaction)
{
- struct bulk_checkin_packfile *state = &transaction->packfile;
- struct repository *repo = transaction->odb->repo;
- char *idx_tmp_name = NULL;
-
- stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
- state->written, state->nr_written, NULL,
- &state->pack_idx_opts, hash, &idx_tmp_name);
- rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
-
- free(idx_tmp_name);
-}
-
-static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
-{
- struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct transaction_packfile *state = &transaction->packfile;
struct repository *repo = transaction->odb->repo;
unsigned char hash[GIT_MAX_RAWSZ];
struct strbuf packname = STRBUF_INIT;
+ char *idx_tmp_name = NULL;
if (!state->f)
return;
@@ -1503,11 +1488,16 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
repo_get_object_directory(transaction->odb->repo),
hash_to_hex_algop(hash, repo->hash_algo));
- finish_tmp_packfile(transaction, &packname, hash);
+ stage_tmp_packfiles(repo, &packname, state->pack_tmp_name,
+ state->written, state->nr_written, NULL,
+ &state->pack_idx_opts, hash, &idx_tmp_name);
+ rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name);
+
for (uint32_t i = 0; i < state->nr_written; i++)
free(state->written[i]);
clear_exit:
+ free(idx_tmp_name);
free(state->pack_tmp_name);
free(state->written);
memset(state, 0, sizeof(*state));
@@ -1535,11 +1525,12 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
-static int index_blob_bulk_checkin(struct odb_transaction *transaction,
- struct object_id *result_oid, int fd, size_t size,
- const char *path, unsigned flags)
+static int index_blob_packfile_transaction(struct odb_transaction *transaction,
+ struct object_id *result_oid, int fd,
+ size_t size, const char *path,
+ unsigned flags)
{
- struct bulk_checkin_packfile *state = &transaction->packfile;
+ struct transaction_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
@@ -1560,14 +1551,14 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction,
if ((flags & INDEX_WRITE_OBJECT) != 0) {
CALLOC_ARRAY(idx, 1);
- prepare_to_stream(transaction, flags);
+ prepare_packfile_transaction(transaction, flags);
hashfile_checkpoint_init(state->f, &checkpoint);
}
already_hashed_to = 0;
while (1) {
- prepare_to_stream(transaction, flags);
+ prepare_packfile_transaction(transaction, flags);
if (idx) {
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
@@ -1585,7 +1576,7 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction,
BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
- flush_bulk_checkin_packfile(transaction);
+ flush_packfile_transaction(transaction);
if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
return error("cannot seek back");
}
@@ -1632,9 +1623,10 @@ int index_fd(struct index_state *istate, struct object_id *oid,
struct odb_transaction *transaction;
transaction = begin_odb_transaction(the_repository->objects);
- ret = index_blob_bulk_checkin(the_repository->objects->transaction,
- oid, fd, xsize_t(st->st_size),
- path, flags);
+ ret = index_blob_packfile_transaction(the_repository->objects->transaction,
+ oid, fd,
+ xsize_t(st->st_size),
+ path, flags);
end_odb_transaction(transaction);
}
@@ -1996,8 +1988,8 @@ void end_odb_transaction(struct odb_transaction *transaction)
*/
ASSERT(transaction == transaction->odb->transaction);
- flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(transaction);
+ flush_loose_object_transaction(transaction);
+ flush_packfile_transaction(transaction);
transaction->odb->transaction = NULL;
free(transaction);
}
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v3 6/6] odb: add transaction interface
2025-09-16 18:29 ` [PATCH v3 0/6] odb: add transaction interfaces to ODB subsystem Justin Tobler
` (4 preceding siblings ...)
2025-09-16 18:29 ` [PATCH v3 5/6] object-file: update naming from bulk-checkin Justin Tobler
@ 2025-09-16 18:29 ` Justin Tobler
5 siblings, 0 replies; 38+ messages in thread
From: Justin Tobler @ 2025-09-16 18:29 UTC (permalink / raw)
To: git; +Cc: ps, gitster, me, karthik.188, Justin Tobler
Transactions are managed via the {begin,end}_odb_transaction() function
in the object-file subsystem and its implementation is specific to the
files object source. Introduce odb_transaction_{begin,commit}() in the
odb subsystem to provide an eventual object source agnostic means to
manage transactions.
Update call sites to instead manage transactions through the odb
subsystem. Also rename {begin,end}_odb_transaction() functions to
object_file_transaction_{begin,commit}() to clarify the object source it
supports.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
builtin/add.c | 5 +++--
builtin/unpack-objects.c | 4 ++--
builtin/update-index.c | 7 ++++---
cache-tree.c | 4 ++--
object-file.c | 12 +++++++-----
object-file.h | 6 +++---
odb.c | 10 ++++++++++
odb.h | 13 +++++++++++++
read-cache.c | 4 ++--
9 files changed, 46 insertions(+), 19 deletions(-)
diff --git a/builtin/add.c b/builtin/add.c
index 8294366d68..bf312c40be 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -15,6 +15,7 @@
#include "pathspec.h"
#include "run-command.h"
#include "object-file.h"
+#include "odb.h"
#include "parse-options.h"
#include "path.h"
#include "preload-index.h"
@@ -575,7 +576,7 @@ int cmd_add(int argc,
string_list_clear(&only_match_skip_worktree, 0);
}
- transaction = begin_odb_transaction(repo->objects);
+ transaction = odb_transaction_begin(repo->objects);
ps_matched = xcalloc(pathspec.nr, 1);
if (add_renormalize)
@@ -594,7 +595,7 @@ int cmd_add(int argc,
if (chmod_arg && pathspec.nr)
exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
finish:
if (write_locked_index(repo->index, &lock_file,
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 4596fff0da..ef79e43715 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -599,12 +599,12 @@ static void unpack_all(void)
progress = start_progress(the_repository,
_("Unpacking objects"), nr_objects);
CALLOC_ARRAY(obj_list, nr_objects);
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
stop_progress(&progress);
if (delta_list)
diff --git a/builtin/update-index.c b/builtin/update-index.c
index ee01c4e423..8a5907767b 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -18,6 +18,7 @@
#include "cache-tree.h"
#include "tree-walk.h"
#include "object-file.h"
+#include "odb.h"
#include "refs.h"
#include "resolve-undo.h"
#include "parse-options.h"
@@ -1122,7 +1123,7 @@ int cmd_update_index(int argc,
* Allow the object layer to optimize adding multiple objects in
* a batch.
*/
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
while (ctx.argc) {
if (parseopt_state != PARSE_OPT_DONE)
parseopt_state = parse_options_step(&ctx, options,
@@ -1152,7 +1153,7 @@ int cmd_update_index(int argc,
* a transaction.
*/
if (transaction && verbose) {
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
transaction = NULL;
}
@@ -1220,7 +1221,7 @@ int cmd_update_index(int argc,
/*
* By now we have added all of the new objects
*/
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
if (split_index > 0) {
if (repo_config_get_split_index(the_repository) == 0)
diff --git a/cache-tree.c b/cache-tree.c
index 79ddf6b727..2aba47060e 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -489,10 +489,10 @@ int cache_tree_update(struct index_state *istate, int flags)
trace_performance_enter();
trace2_region_enter("cache_tree", "update", the_repository);
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
"", 0, &skip, flags);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
trace2_region_leave("cache_tree", "update", the_repository);
trace_performance_leave("cache_tree_update");
if (i < 0)
diff --git a/object-file.c b/object-file.c
index 8103a2bf41..17a236d2fe 100644
--- a/object-file.c
+++ b/object-file.c
@@ -691,7 +691,7 @@ static void prepare_loose_object_transaction(struct odb_transaction *transaction
* We lazily create the temporary object directory
* the first time an object might be added, since
* callers may not know whether any objects will be
- * added at the time they call begin_odb_transaction.
+ * added at the time they call object_file_transaction_begin.
*/
if (!transaction || transaction->objdir)
return;
@@ -1622,12 +1622,12 @@ int index_fd(struct index_state *istate, struct object_id *oid,
} else {
struct odb_transaction *transaction;
- transaction = begin_odb_transaction(the_repository->objects);
+ transaction = odb_transaction_begin(the_repository->objects);
ret = index_blob_packfile_transaction(the_repository->objects->transaction,
oid, fd,
xsize_t(st->st_size),
path, flags);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
}
close(fd);
@@ -1967,8 +1967,10 @@ int read_loose_object(struct repository *repo,
return ret;
}
-struct odb_transaction *begin_odb_transaction(struct object_database *odb)
+struct odb_transaction *object_file_transaction_begin(struct odb_source *source)
{
+ struct object_database *odb = source->odb;
+
if (odb->transaction)
return NULL;
@@ -1978,7 +1980,7 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb)
return odb->transaction;
}
-void end_odb_transaction(struct odb_transaction *transaction)
+void object_file_transaction_commit(struct odb_transaction *transaction)
{
if (!transaction)
return;
diff --git a/object-file.h b/object-file.h
index 6323d2e63c..3fd48dcafb 100644
--- a/object-file.h
+++ b/object-file.h
@@ -222,16 +222,16 @@ struct odb_transaction;
/*
* Tell the object database to optimize for adding
- * multiple objects. end_odb_transaction must be called
+ * multiple objects. object_file_transaction_commit must be called
* to make new objects visible. If a transaction is already
* pending, NULL is returned.
*/
-struct odb_transaction *begin_odb_transaction(struct object_database *odb);
+struct odb_transaction *object_file_transaction_begin(struct odb_source *source);
/*
* Tell the object database to make any objects from the
* current transaction visible.
*/
-void end_odb_transaction(struct odb_transaction *transaction);
+void object_file_transaction_commit(struct odb_transaction *transaction);
#endif /* OBJECT_FILE_H */
diff --git a/odb.c b/odb.c
index 2a92a018c4..af9534bfe1 100644
--- a/odb.c
+++ b/odb.c
@@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o)
hashmap_clear(&o->pack_map);
string_list_clear(&o->submodule_source_paths, 0);
}
+
+struct odb_transaction *odb_transaction_begin(struct object_database *odb)
+{
+ return object_file_transaction_begin(odb->sources);
+}
+
+void odb_transaction_commit(struct odb_transaction *transaction)
+{
+ object_file_transaction_commit(transaction);
+}
diff --git a/odb.h b/odb.h
index a89b214390..82093753c8 100644
--- a/odb.h
+++ b/odb.h
@@ -185,6 +185,19 @@ struct object_database {
struct object_database *odb_new(struct repository *repo);
void odb_clear(struct object_database *o);
+/*
+ * Starts an ODB transaction. Subsequent objects are written to the transaction
+ * and not committed until odb_transaction_commit() is invoked on the
+ * transaction. If the ODB already has a pending transaction, NULL is returned.
+ */
+struct odb_transaction *odb_transaction_begin(struct object_database *odb);
+
+/*
+ * Commits an ODB transaction making the written objects visible. If the
+ * specified transaction is NULL, the function is a no-op.
+ */
+void odb_transaction_commit(struct odb_transaction *transaction);
+
/*
* Find source by its object directory path. Dies in case the source couldn't
* be found.
diff --git a/read-cache.c b/read-cache.c
index 80591eeced..94098a3861 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3972,9 +3972,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
* This function is invoked from commands other than 'add', which
* may not have their own transaction active.
*/
- transaction = begin_odb_transaction(repo->objects);
+ transaction = odb_transaction_begin(repo->objects);
run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
- end_odb_transaction(transaction);
+ odb_transaction_commit(transaction);
release_revisions(&rev);
return !!data.add_errors;
--
2.51.0.193.g4975ec3473b
^ permalink raw reply related [flat|nested] 38+ messages in thread