git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/1] name-rev: use generation numbers if available
@ 2022-02-28 21:50 Jacob Keller
  2022-02-28 21:50 ` [PATCH v2 1/1] " Jacob Keller
  2022-02-28 21:50 ` [PATCH] " Jacob Keller
  0 siblings, 2 replies; 23+ messages in thread
From: Jacob Keller @ 2022-02-28 21:50 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee, Jacob Keller

From: Jacob Keller <jacob.keller@gmail.com>

Thanks for the review, Stolee!

Here's the range-diff since v1:

1:  e7e302376dd6 ! 1:  8ab5d987bf93 name-rev: use generation numbers if available
    @@ Commit message
         This heuristic impacts git name-rev, and by extension git describe
         --contains which is built on top of name-rev.
     
    -    Further more, if --annotate-stdin is used, the heuristic is not enabled
    -    because the full history has to be analyzed anyways. This results in
    -    some confusion if a user sees that --annotate-stdin works but a normal
    -    name-rev does not.
    +    Further more, if --all or --annotate-stdin is used, the heuristic is not
    +    enabled because the full history has to be analyzed anyways. This
    +    results in some confusion if a user sees that --annotate-stdin works but
    +    a normal name-rev does not.
     
         If the repository has a commit graph, we can use the generation numbers
         instead of using the commit dates. This is essentially the same check
         except that generation numbers make it exact, where the commit date
         heuristic could be incorrect due to clock errors.
     
    -    Add a test case which covers this behavior and shows how the commit
    -    graph makes the name-rev process work.
    +    Since we're extending the notion of cutoff to more than one variable,
    +    create a series of functions for setting and checking the cutoff. This
    +    avoids duplication and moves access of the global cutoff and
    +    generation_cutoff to as few functions as possible.
    +
    +    Add several test cases including coverage of --all, --annotate-stdin,
    +    and the normal case with the cutoff heuristic, both with and without
    +    commit graphs enabled.
     
         Signed-off-by: Jacob Keller <jacob.keller@gmail.com>
     
    @@ builtin/name-rev.c: struct rev_name {
      static timestamp_t cutoff = TIME_MAX;
      static struct commit_rev_name rev_names;
      
    ++/* Disable the cutoff checks entirely */
    ++static void disable_cutoff(void)
    ++{
    ++	generation_cutoff = 0;
    ++	cutoff = 0;
    ++}
    ++
    ++/* Cutoff searching any commits older than this one */
     +static void set_commit_cutoff(struct commit *commit)
     +{
    -+	timestamp_t generation;
     +
     +	if (cutoff > commit->date)
     +		cutoff = commit->date;
     +
    -+	generation = commit_graph_generation(commit);
    -+	if (generation_cutoff > generation)
    -+		generation_cutoff = generation;
    ++	if (generation_cutoff) {
    ++		timestamp_t generation = commit_graph_generation(commit);
    ++
    ++		if (generation_cutoff > generation)
    ++			generation_cutoff = generation;
    ++	}
    ++}
    ++
    ++/* adjust the commit date cutoff with a slop to allow for slightly incorrect
    ++ * commit timestamps in case of clock skew.
    ++ */
    ++static void adjust_cutoff_timestamp_for_slop(void)
    ++{
    ++	if (cutoff) {
    ++		/* check for undeflow */
    ++		if (cutoff > TIME_MIN + CUTOFF_DATE_SLOP)
    ++			cutoff = cutoff - CUTOFF_DATE_SLOP;
    ++		else
    ++			cutoff = TIME_MIN;
    ++	}
     +}
     +
     +/* Check if a commit is before the cutoff. Prioritize generation numbers
    @@ builtin/name-rev.c: struct rev_name {
     +static int commit_is_before_cutoff(struct commit *commit)
     +{
     +	if (generation_cutoff < GENERATION_NUMBER_INFINITY)
    -+		return commit_graph_generation(commit) < generation_cutoff;
    ++		return generation_cutoff &&
    ++			commit_graph_generation(commit) < generation_cutoff;
     +
     +	return commit->date < cutoff;
     +}
    @@ builtin/name-rev.c: static void name_rev(struct commit *start_commit,
      
      			if (parent_number > 1) {
     @@ builtin/name-rev.c: int cmd_name_rev(int argc, const char **argv, const char *prefix)
    - 		error("Specify either a list, or --all, not both!");
      		usage_with_options(name_rev_usage, opts);
      	}
    --	if (all || annotate_stdin)
    -+	if (all || annotate_stdin) {
    -+		generation_cutoff = 0;
    - 		cutoff = 0;
    -+	}
    + 	if (all || annotate_stdin)
    +-		cutoff = 0;
    ++		disable_cutoff();
      
      	for (; argc; argc--, argv++) {
      		struct object_id oid;
    @@ builtin/name-rev.c: int cmd_name_rev(int argc, const char **argv, const char *pr
      
      		if (peel_tag) {
      			if (!commit) {
    +@@ builtin/name-rev.c: int cmd_name_rev(int argc, const char **argv, const char *prefix)
    + 		add_object_array(object, *argv, &revs);
    + 	}
    + 
    +-	if (cutoff) {
    +-		/* check for undeflow */
    +-		if (cutoff > TIME_MIN + CUTOFF_DATE_SLOP)
    +-			cutoff = cutoff - CUTOFF_DATE_SLOP;
    +-		else
    +-			cutoff = TIME_MIN;
    +-	}
    ++	adjust_cutoff_timestamp_for_slop();
    ++
    + 	for_each_ref(name_ref, &data);
    + 	name_tips();
    + 
     
      ## t/t6120-describe.sh ##
     @@ t/t6120-describe.sh: test_expect_success 'name-rev covers all conditions while looking at parents' '
    @@ t/t6120-describe.sh: test_expect_success 'name-rev covers all conditions while l
     +	test_commit -C non-monotonic E
     +'
     +
    -+test_expect_success 'name-rev with commitGraph handles non-monotonic timestamps' '
    -+	test_config -C non-monotonic core.commitGraph true &&
    ++test_expect_success 'name-rev without commitGraph does not handle non-monotonic timestamps' '
    ++	test_config -C non-monotonic core.commitGraph false &&
     +	(
     +		cd non-monotonic &&
     +
    -+		# Ensure commit graph is up to date
    -+		git -c gc.writeCommitGraph=true gc &&
    ++		rm -rf .git/info/commit-graph* &&
     +
    -+		echo "main~3 tags/D~2" >expect &&
    ++		echo "main~3 undefined" >expect &&
     +		git name-rev --tags main~3 >actual &&
     +
     +		test_cmp expect actual
     +	)
     +'
     +
    -+test_expect_success 'name-rev --all works with non-monotonic' '
    ++test_expect_success 'name-rev --all works with non-monotonic timestamps' '
    ++	test_config -C non-monotonic core.commitGraph false &&
     +	(
     +		cd non-monotonic &&
     +
    ++		rm -rf .git/info/commit-graph* &&
    ++
    ++		cat >tags <<-\EOF &&
    ++		tags/E
    ++		tags/D
    ++		tags/D~1
    ++		tags/D~2
    ++		tags/A
    ++		EOF
    ++
    ++		git log --pretty=%H >revs &&
    ++
    ++		paste -d" " revs tags | sort >expect &&
    ++
    ++		git name-rev --tags --all | sort >actual &&
    ++		test_cmp expect actual
    ++	)
    ++'
    ++
    ++test_expect_success 'name-rev --annotate-stdin works with non-monotonic timestamps' '
    ++	test_config -C non-monotonic core.commitGraph false &&
    ++	(
    ++		cd non-monotonic &&
    ++
    ++		rm -rf .git/info/commit-graph* &&
    ++
     +		cat >expect <<-\EOF &&
     +		E
     +		D
    @@ t/t6120-describe.sh: test_expect_success 'name-rev covers all conditions while l
     +
     +		git log --pretty=%H >revs &&
     +		git name-rev --tags --annotate-stdin --name-only <revs >actual &&
    ++		test_cmp expect actual
    ++	)
    ++'
     +
    ++test_expect_success 'name-rev with commitGraph handles non-monotonic timestamps' '
    ++	test_config -C non-monotonic core.commitGraph true &&
    ++	(
    ++		cd non-monotonic &&
    ++
    ++		git commit-graph write --reachable &&
    ++
    ++		echo "main~3 tags/D~2" >expect &&
    ++		git name-rev --tags main~3 >actual &&
    ++
    ++		test_cmp expect actual
    ++	)
    ++'
    ++
    ++test_expect_success 'name-rev --all works with commitGraph' '
    ++	test_config -C non-monotonic core.commitGraph true &&
    ++	(
    ++		cd non-monotonic &&
    ++
    ++		git commit-graph write --reachable &&
    ++
    ++		cat >tags <<-\EOF &&
    ++		tags/E
    ++		tags/D
    ++		tags/D~1
    ++		tags/D~2
    ++		tags/A
    ++		EOF
    ++
    ++		git log --pretty=%H >revs &&
    ++
    ++		paste -d" " revs tags | sort >expect &&
    ++
    ++		git name-rev --tags --all | sort >actual &&
    ++		test_cmp expect actual
    ++	)
    ++'
    ++
    ++test_expect_success 'name-rev --annotate-stdin works with commitGraph' '
    ++	test_config -C non-monotonic core.commitGraph true &&
    ++	(
    ++		cd non-monotonic &&
    ++
    ++		git commit-graph write --reachable &&
    ++
    ++		cat >expect <<-\EOF &&
    ++		E
    ++		D
    ++		D~1
    ++		D~2
    ++		A
    ++		EOF
    ++
    ++		git log --pretty=%H >revs &&
    ++		git name-rev --tags --annotate-stdin --name-only <revs >actual &&
     +		test_cmp expect actual
     +	)
     +'

Jacob Keller (1):
  name-rev: use generation numbers if available

 builtin/name-rev.c  |  71 +++++++++++++++++++-----
 t/t6120-describe.sh | 132 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+), 14 deletions(-)

-- 
2.35.1.355.ge7e302376dd6


^ permalink raw reply	[flat|nested] 23+ messages in thread
* [PATCH] name-rev: use generation numbers if available
@ 2022-02-28 19:07 Jacob Keller
  2022-02-28 19:50 ` Derrick Stolee
  0 siblings, 1 reply; 23+ messages in thread
From: Jacob Keller @ 2022-02-28 19:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee, Jacob Keller

From: Jacob Keller <jacob.keller@gmail.com>

If a commit in a sequence of linear history has a non-monotonically
increasing commit timestamp, git name-rev might not properly name the
commit.

This occurs because name-rev uses a heuristic of the commit date to
avoid searching down tags which lead to commits that are older than the
named commit. This is intended to avoid work on larger repositories.

This heuristic impacts git name-rev, and by extension git describe
--contains which is built on top of name-rev.

Further more, if --annotate-stdin is used, the heuristic is not enabled
because the full history has to be analyzed anyways. This results in
some confusion if a user sees that --annotate-stdin works but a normal
name-rev does not.

If the repository has a commit graph, we can use the generation numbers
instead of using the commit dates. This is essentially the same check
except that generation numbers make it exact, where the commit date
heuristic could be incorrect due to clock errors.

Add a test case which covers this behavior and shows how the commit
graph makes the name-rev process work.

Signed-off-by: Jacob Keller <jacob.keller@gmail.com>
---
The initial implementation of this came from [1]. Should this have Stolee's
sign-off?

[1]: https://lore.kernel.org/git/42d2a9fe-a3f2-f841-dcd1-27a0440521b6@github.com/


 builtin/name-rev.c  | 39 +++++++++++++++++++++++++++++-------
 t/t6120-describe.sh | 48 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 138e3c30a2b9..eda06697ac9f 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -9,6 +9,7 @@
 #include "prio-queue.h"
 #include "hash-lookup.h"
 #include "commit-slab.h"
+#include "commit-graph.h"
 
 /*
  * One day.  See the 'name a rev shortly after epoch' test in t6120 when
@@ -26,9 +27,33 @@ struct rev_name {
 
 define_commit_slab(commit_rev_name, struct rev_name);
 
+static timestamp_t generation_cutoff = GENERATION_NUMBER_INFINITY;
 static timestamp_t cutoff = TIME_MAX;
 static struct commit_rev_name rev_names;
 
+static void set_commit_cutoff(struct commit *commit)
+{
+	timestamp_t generation;
+
+	if (cutoff > commit->date)
+		cutoff = commit->date;
+
+	generation = commit_graph_generation(commit);
+	if (generation_cutoff > generation)
+		generation_cutoff = generation;
+}
+
+/* Check if a commit is before the cutoff. Prioritize generation numbers
+ * first, but use the commit timestamp if we lack generation data.
+ */
+static int commit_is_before_cutoff(struct commit *commit)
+{
+	if (generation_cutoff < GENERATION_NUMBER_INFINITY)
+		return commit_graph_generation(commit) < generation_cutoff;
+
+	return commit->date < cutoff;
+}
+
 /* How many generations are maximally preferred over _one_ merge traversal? */
 #define MERGE_TRAVERSAL_WEIGHT 65535
 
@@ -151,7 +176,7 @@ static void name_rev(struct commit *start_commit,
 	struct rev_name *start_name;
 
 	parse_commit(start_commit);
-	if (start_commit->date < cutoff)
+	if (commit_is_before_cutoff(start_commit))
 		return;
 
 	start_name = create_or_update_name(start_commit, taggerdate, 0, 0,
@@ -181,7 +206,7 @@ static void name_rev(struct commit *start_commit,
 			int generation, distance;
 
 			parse_commit(parent);
-			if (parent->date < cutoff)
+			if (commit_is_before_cutoff(parent))
 				continue;
 
 			if (parent_number > 1) {
@@ -567,8 +592,10 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 		error("Specify either a list, or --all, not both!");
 		usage_with_options(name_rev_usage, opts);
 	}
-	if (all || annotate_stdin)
+	if (all || annotate_stdin) {
+		generation_cutoff = 0;
 		cutoff = 0;
+	}
 
 	for (; argc; argc--, argv++) {
 		struct object_id oid;
@@ -596,10 +623,8 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 			continue;
 		}
 
-		if (commit) {
-			if (cutoff > commit->date)
-				cutoff = commit->date;
-		}
+		if (commit)
+			set_commit_cutoff(commit);
 
 		if (peel_tag) {
 			if (!commit) {
diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh
index 9781b92aeddf..1af29b6824ba 100755
--- a/t/t6120-describe.sh
+++ b/t/t6120-describe.sh
@@ -488,6 +488,54 @@ test_expect_success 'name-rev covers all conditions while looking at parents' '
 	)
 '
 
+# A-B-C-D-E-main
+#
+# Where C has a non-monotonically increasing commit timestamp w.r.t. other
+# commits
+test_expect_success 'non-monotonic commit dates setup' '
+	UNIX_EPOCH_ZERO="@0 +0000" &&
+	git init non-monotonic &&
+	test_commit -C non-monotonic A &&
+	test_commit -C non-monotonic --no-tag B &&
+	test_commit -C non-monotonic --no-tag --date "$UNIX_EPOCH_ZERO" C &&
+	test_commit -C non-monotonic D &&
+	test_commit -C non-monotonic E
+'
+
+test_expect_success 'name-rev with commitGraph handles non-monotonic timestamps' '
+	test_config -C non-monotonic core.commitGraph true &&
+	(
+		cd non-monotonic &&
+
+		# Ensure commit graph is up to date
+		git -c gc.writeCommitGraph=true gc &&
+
+		echo "main~3 tags/D~2" >expect &&
+		git name-rev --tags main~3 >actual &&
+
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'name-rev --all works with non-monotonic' '
+	(
+		cd non-monotonic &&
+
+		cat >expect <<-\EOF &&
+		E
+		D
+		D~1
+		D~2
+		A
+		EOF
+
+		git log --pretty=%H >revs &&
+		git name-rev --tags --annotate-stdin --name-only <revs >actual &&
+
+		test_cmp expect actual
+	)
+'
+
 #               B
 #               o
 #                \
-- 
2.35.1.129.gb80121027d12


^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2022-03-07 22:52 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-02-28 21:50 [PATCH v2 0/1] name-rev: use generation numbers if available Jacob Keller
2022-02-28 21:50 ` [PATCH v2 1/1] " Jacob Keller
2022-02-28 21:50 ` [PATCH] " Jacob Keller
2022-03-01  2:36   ` Junio C Hamano
2022-03-01  7:08     ` Jacob Keller
2022-03-01  7:09       ` Jacob Keller
2022-03-01  7:33       ` Junio C Hamano
2022-03-01 15:09         ` Derrick Stolee
2022-03-01 19:52           ` Keller, Jacob E
2022-03-01 19:56             ` Derrick Stolee
2022-03-01 20:22               ` Junio C Hamano
2022-03-01 22:46                 ` Keller, Jacob E
2022-03-03  1:10                   ` Junio C Hamano
2022-03-07 20:22                     ` Jacob Keller
2022-03-07 20:26                       ` Derrick Stolee
2022-03-07 22:30                         ` Keller, Jacob E
2022-03-07 22:43                           ` Derrick Stolee
2022-03-07 22:52                           ` Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2022-02-28 19:07 Jacob Keller
2022-02-28 19:50 ` Derrick Stolee
2022-02-28 20:20   ` Keller, Jacob E
2022-02-28 20:24     ` Derrick Stolee
2022-02-28 20:59       ` Keller, Jacob E

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).