Git development
 help / color / mirror / Atom feed
* [PATCH] object-file: avoid ODB transaction when not writing objects
@ 2026-04-07 20:17 Justin Tobler
  2026-04-07 21:18 ` Junio C Hamano
  0 siblings, 1 reply; 10+ messages in thread
From: Justin Tobler @ 2026-04-07 20:17 UTC (permalink / raw)
  To: git; +Cc: ps, gitster, peff, luca.stefani.ge1, Justin Tobler

In ce1661f9da (odb: add transaction interface, 2025-09-16), existing ODB
transaction logic is adapted to create a transaction interface at the
ODB layer. The intent here is for the ODB transaction interface to
eventually provide an object source agnostic means to manage
transactions.

An unintended consequence of this change though is that
`object-file.c:index_fd()` may enter the ODB transaction path even when
no object write is requested. In non-repository contexts, this can
result in a NULL dereference and segfault. One such case occurs when
running git-diff(1) outside of a repository with "core.bigFileThreshold"
forcing the streaming path in `index_fd()`:

        $ echo foo >foo
        $ echo bar >bar
        $ git -c core.bigFileThreshold=1 diff -- foo bar

In this scenario, the caller only needs to compute the object ID. Object
hashing does not require an ODB, so starting a transaction is both
unnecessary and invalid.

Fix the bug by avoiding the use of ODB transactions in `index_fd()` when
callers are only interested in computing the object hash.

Reported-by: Luca Stefani <luca.stefani.ge1@gmail.com>
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---

Greetings,

This patch addresses a bug report[1] where performing git-diff(1) on
files that exceed "core.bigFileThreshold" outside of a repository causes
a segfault. Originally this patch was included in another series sent to
the mailing list[2] as a preparatory refactor. Since it happens to fix
the reported bug though, I've extracted it from that series with the
hope of upstreaming more quickly.

I wasn't entirely sure if this patch should be based on master or maint.
I went with master, but am happy to resend if this is incorrect.

Thanks,
-Justin

[1]: <CAO0HQ0X_pQmew5tJReOL=u+CMxCjAQynx8JfjykoYAUE59YNzw@mail.gmail.com>
[2]: <20260331033835.2863514-1-jltobler@gmail.com>

---
 object-file.c           | 57 ++++++++++++++++++++++++++++++++---------
 t/t1517-outside-repo.sh |  8 ++++++
 2 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/object-file.c b/object-file.c
index 4f77ce0982..63408fc290 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1640,6 +1640,34 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac
 	return 0;
 }
 
+static int hash_blob_stream(const struct git_hash_algo *hash_algo,
+			    struct object_id *result_oid, int fd, size_t size)
+{
+	unsigned char buf[16384];
+	struct git_hash_ctx ctx;
+	unsigned header_len;
+
+	header_len = format_object_header((char *)buf, sizeof(buf),
+					  OBJ_BLOB, size);
+	hash_algo->init_fn(&ctx);
+	git_hash_update(&ctx, buf, header_len);
+
+	while (size) {
+		size_t rsize = size < sizeof(buf) ? size : sizeof(buf);
+		ssize_t read_result = read_in_full(fd, buf, rsize);
+
+		if ((read_result < 0) || ((size_t)read_result != rsize))
+			return -1;
+
+		git_hash_update(&ctx, buf, rsize);
+		size -= read_result;
+	}
+
+	git_hash_final_oid(result_oid, &ctx);
+
+	return 0;
+}
+
 int index_fd(struct index_state *istate, struct object_id *oid,
 	     int fd, struct stat *st,
 	     enum object_type type, const char *path, unsigned flags)
@@ -1661,18 +1689,23 @@ int index_fd(struct index_state *istate, struct object_id *oid,
 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
 				 type, path, flags);
 	} else {
-		struct object_database *odb = the_repository->objects;
-		struct odb_transaction_files *files_transaction;
-		struct odb_transaction *transaction;
-
-		transaction = odb_transaction_begin(odb);
-		files_transaction = container_of(odb->transaction,
-						 struct odb_transaction_files,
-						 base);
-		ret = index_blob_packfile_transaction(files_transaction, oid, fd,
-						      xsize_t(st->st_size),
-						      path, flags);
-		odb_transaction_commit(transaction);
+		if (flags & INDEX_WRITE_OBJECT) {
+			struct object_database *odb = the_repository->objects;
+			struct odb_transaction_files *files_transaction;
+			struct odb_transaction *transaction;
+
+			transaction = odb_transaction_begin(odb);
+			files_transaction = container_of(odb->transaction,
+							 struct odb_transaction_files,
+							 base);
+			ret = index_blob_packfile_transaction(files_transaction, oid, fd,
+							      xsize_t(st->st_size),
+							      path, flags);
+			odb_transaction_commit(transaction);
+		} else {
+			ret = hash_blob_stream(the_repository->hash_algo, oid,
+					       fd, xsize_t(st->st_size));
+		}
 	}
 
 	close(fd);
diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh
index c824c1a25c..c1dbc6359a 100755
--- a/t/t1517-outside-repo.sh
+++ b/t/t1517-outside-repo.sh
@@ -93,6 +93,14 @@ test_expect_success 'diff outside repository' '
 	test_cmp expect actual
 '
 
+test_expect_success 'diff files exceeding bigFileThreshold outside repository' '
+	cd non-repo &&
+	echo foo >foo &&
+	echo bar >bar &&
+	test_must_fail git -c core.bigFileThreshold=1 diff -- foo bar >actual &&
+	test_grep "diff --git a/foo b/bar" actual
+'
+
 test_expect_success 'stripspace outside repository' '
 	nongit git stripspace -s </dev/null
 '

base-commit: 1adf5bca8c3cf778103548b9355777cf2d12efdd
-- 
2.53.0.381.g628a66ccf6


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-04-08  0:42 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-07 20:17 [PATCH] object-file: avoid ODB transaction when not writing objects Justin Tobler
2026-04-07 21:18 ` Junio C Hamano
2026-04-07 21:29   ` Jeff King
2026-04-07 21:43     ` Junio C Hamano
2026-04-07 21:43   ` Justin Tobler
2026-04-07 21:53   ` Junio C Hamano
2026-04-07 22:08     ` Justin Tobler
2026-04-07 22:24       ` Junio C Hamano
2026-04-07 22:41         ` Justin Tobler
2026-04-08  0:42           ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox