All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] diff: batch fetching of missing blobs
@ 2019-03-26 22:09 Jonathan Tan
  2019-03-27 10:10 ` SZEDER Gábor
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Jonathan Tan @ 2019-03-26 22:09 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan

When running a command like "git show" or "git diff" in a partial clone,
batch all missing blobs to be fetched as one request.

This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
blobs", 2017-12-08), but for another command.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
Here's an improvement for those having partial clones.

I couldn't find a good place to place the test (a place that checks how
diff interfaces with the object store would be ideal), so I created a
new one. Let me know if there's a better place to put it.
---
 diff.c                        | 27 +++++++++++++
 t/t4067-diff-partial-clone.sh | 76 +++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100755 t/t4067-diff-partial-clone.sh

diff --git a/diff.c b/diff.c
index ec5c095199..0e08d05b14 100644
--- a/diff.c
+++ b/diff.c
@@ -25,6 +25,7 @@
 #include "packfile.h"
 #include "parse-options.h"
 #include "help.h"
+#include "fetch-object.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6067,6 +6068,32 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 	if (o->color_moved)
 		o->emitted_symbols = &esm;
 
+	if (repository_format_partial_clone) {
+		/*
+		 * Prefetch the diff pairs that are about to be flushed.
+		 */
+		struct oid_array to_fetch = OID_ARRAY_INIT;
+		int fetch_if_missing_store = fetch_if_missing;
+
+		fetch_if_missing = 0;
+		for (i = 0; i < q->nr; i++) {
+			struct diff_filepair *p = q->queue[i];
+			if (!check_pair_status(p))
+				continue;
+			if (p->one && p->one->oid_valid &&
+			    !has_object_file(&p->one->oid))
+				oid_array_append(&to_fetch, &p->one->oid);
+			if (p->two && p->two->oid_valid &&
+			    !has_object_file(&p->two->oid))
+				oid_array_append(&to_fetch, &p->two->oid);
+		}
+		if (to_fetch.nr)
+			fetch_objects(repository_format_partial_clone,
+				      to_fetch.oid, to_fetch.nr);
+		fetch_if_missing = fetch_if_missing_store;
+		oid_array_clear(&to_fetch);
+	}
+
 	for (i = 0; i < q->nr; i++) {
 		struct diff_filepair *p = q->queue[i];
 		if (check_pair_status(p))
diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
new file mode 100755
index 0000000000..38f03be114
--- /dev/null
+++ b/t/t4067-diff-partial-clone.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+test_description='behavior of diff when reading objects in a partial clone'
+
+. ./test-lib.sh
+
+test_expect_success 'git show batches blobs' '
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff batches blobs' '
+	rm -rf server client trace &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo c >server/c &&
+	echo d >server/d &&
+	git -C server add c d &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff skips same-OID blobs' '
+	rm -rf server client trace &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo another-a >server/a &&
+	git -C server add a &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	echo a | git hash-object --stdin >hash-old-a &&
+	echo another-a | git hash-object --stdin >hash-new-a &&
+	echo b | git hash-object --stdin >hash-b &&
+
+	# Ensure that only a and another-a are fetched.
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "want $(cat hash-old-a)" trace &&
+	grep "want $(cat hash-new-a)" trace &&
+	! grep "want $(cat hash-b)" trace
+'
+
+test_done
-- 
2.21.0.155.ge902e9bcae.dirty


^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2019-04-09  6:36 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-03-26 22:09 [PATCH] diff: batch fetching of missing blobs Jonathan Tan
2019-03-27 10:10 ` SZEDER Gábor
2019-03-27 22:02 ` Johannes Schindelin
2019-03-28  6:52 ` Jeff King
2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
2019-04-05 14:13     ` Johannes Schindelin
2019-04-05 22:00     ` Jeff King
2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
2019-04-04  2:47     ` SZEDER Gábor
2019-04-05 13:38       ` Johannes Schindelin
2019-04-07  6:00         ` Christian Couder
2019-04-08  2:36           ` Junio C Hamano
2019-04-08  5:51             ` Junio C Hamano
2019-04-08  6:03               ` Junio C Hamano
2019-04-08  6:45                 ` Christian Couder
2019-04-08  6:40             ` Christian Couder
2019-04-08  7:59               ` Junio C Hamano
2019-04-08  9:56                 ` Christian Couder
2019-04-05  9:39     ` Duy Nguyen
2019-04-05 17:09       ` [PATCH] fixup! " Jonathan Tan
2019-04-05 20:16         ` Johannes Schindelin
2019-04-06  4:17         ` Duy Nguyen
2019-04-08  3:46           ` Junio C Hamano
2019-04-08  4:06           ` Junio C Hamano
2019-04-08  9:58             ` Duy Nguyen
2019-04-09  6:36               ` Junio C Hamano
2019-04-05 14:17     ` [PATCH v2 2/2] " Johannes Schindelin
2019-04-05 22:12   ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jeff King

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.