Git development

Git development
 help / color / mirror / Atom feed

* [PATCH 11/12] reftable/record: decode keys in place
From: Patrick Steinhardt @ 2024-02-14  7:46 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1707895758.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 7644 bytes --]

When reading a record from a block, we need to decode the record's key.
As reftable keys are prefix-compressed, meaning they reuse a prefix from
the preceding record's key, this is a bit more involved than just having
to copy the relevant bytes: we need to figure out the prefix and suffix
lengths, copy the prefix from the preceding record and finally copy the
suffix from the current record.

This is done by passing three buffers to `reftable_decode_key()`: one
buffer that holds the result, one buffer that holds the last key, and
one buffer that points to the current record. The final key is then
assembled by calling `strbuf_add()` twice to copy over the prefix and
suffix.

Performing two memory copies is inefficient though. And we can indeed do
better by decoding keys in place. Instead of providing two buffers, the
caller may only call a single buffer that is already pre-populated with
the last key. Like this, we only have to call `strbuf_setlen()` to trim
the record to its prefix and then `strbuf_add()` to add the suffix.

This refactoring leads to a noticeable performance bump when iterating
over 1 million refs:

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     112.2 ms ±   3.9 ms    [User: 109.3 ms, System: 2.8 ms]
    Range (min … max):   109.2 ms … 149.6 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):     106.0 ms ±   3.5 ms    [User: 103.2 ms, System: 2.7 ms]
    Range (min … max):   103.2 ms … 133.7 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.06 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/block.c       | 25 +++++++++++--------------
 reftable/block.h       |  2 --
 reftable/record.c      | 19 +++++++++----------
 reftable/record.h      |  9 ++++++---
 reftable/record_test.c |  3 ++-
 5 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/reftable/block.c b/reftable/block.c
index 72eb73b380..ad9074dba6 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -291,9 +291,8 @@ static int restart_key_less(size_t idx, void *args)
 	/* the restart key is verbatim in the block, so this could avoid the
 	   alloc for decoding the key */
 	struct strbuf rkey = STRBUF_INIT;
-	struct strbuf last_key = STRBUF_INIT;
 	uint8_t unused_extra;
-	int n = reftable_decode_key(&rkey, &unused_extra, last_key, in);
+	int n = reftable_decode_key(&rkey, &unused_extra, in);
 	int result;
 	if (n < 0) {
 		a->error = 1;
@@ -326,35 +325,34 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
 	if (it->next_off >= it->br->block_len)
 		return 1;
 
-	n = reftable_decode_key(&it->key, &extra, it->last_key, in);
+	n = reftable_decode_key(&it->last_key, &extra, in);
 	if (n < 0)
 		return -1;
-
-	if (!it->key.len)
+	if (!it->last_key.len)
 		return REFTABLE_FORMAT_ERROR;
 
 	string_view_consume(&in, n);
-	n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size);
+	n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size);
 	if (n < 0)
 		return -1;
 	string_view_consume(&in, n);
 
-	strbuf_swap(&it->last_key, &it->key);
 	it->next_off += start.len - in.len;
 	return 0;
 }
 
 int block_reader_first_key(struct block_reader *br, struct strbuf *key)
 {
-	struct strbuf empty = STRBUF_INIT;
-	int off = br->header_off + 4;
+	int off = br->header_off + 4, n;
 	struct string_view in = {
 		.buf = br->block.data + off,
 		.len = br->block_len - off,
 	};
-
 	uint8_t extra = 0;
-	int n = reftable_decode_key(key, &extra, empty, in);
+
+	strbuf_reset(key);
+
+	n = reftable_decode_key(key, &extra, in);
 	if (n < 0)
 		return n;
 	if (!key->len)
@@ -371,7 +369,6 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want)
 void block_iter_close(struct block_iter *it)
 {
 	strbuf_release(&it->last_key);
-	strbuf_release(&it->key);
 }
 
 int block_reader_seek(struct block_reader *br, struct block_iter *it,
@@ -408,8 +405,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
 		if (err < 0)
 			goto done;
 
-		reftable_record_key(&rec, &it->key);
-		if (err > 0 || strbuf_cmp(&it->key, want) >= 0) {
+		reftable_record_key(&rec, &it->last_key);
+		if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) {
 			err = 0;
 			goto done;
 		}
diff --git a/reftable/block.h b/reftable/block.h
index 17481e6331..51699af233 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -84,12 +84,10 @@ struct block_iter {
 
 	/* key for last entry we read. */
 	struct strbuf last_key;
-	struct strbuf key;
 };
 
 #define BLOCK_ITER_INIT { \
 	.last_key = STRBUF_INIT, \
-	.key = STRBUF_INIT, \
 }
 
 /* initializes a block reader. */
diff --git a/reftable/record.c b/reftable/record.c
index 3f2a639036..37682cc7d0 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -159,20 +159,19 @@ int reftable_encode_key(int *restart, struct string_view dest,
 	return start.len - dest.len;
 }
 
-int reftable_decode_key(struct strbuf *key, uint8_t *extra,
-			struct strbuf last_key, struct string_view in)
+int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
+			struct string_view in)
 {
 	int start_len = in.len;
 	uint64_t prefix_len = 0;
 	uint64_t suffix_len = 0;
-	int n = get_var_int(&prefix_len, &in);
+	int n;
+
+	n = get_var_int(&prefix_len, &in);
 	if (n < 0)
 		return -1;
 	string_view_consume(&in, n);
 
-	if (prefix_len > last_key.len)
-		return -1;
-
 	n = get_var_int(&suffix_len, &in);
 	if (n <= 0)
 		return -1;
@@ -181,12 +180,12 @@ int reftable_decode_key(struct strbuf *key, uint8_t *extra,
 	*extra = (uint8_t)(suffix_len & 0x7);
 	suffix_len >>= 3;
 
-	if (in.len < suffix_len)
+	if (in.len < suffix_len ||
+	    prefix_len > last_key->len)
 		return -1;
 
-	strbuf_reset(key);
-	strbuf_add(key, last_key.buf, prefix_len);
-	strbuf_add(key, in.buf, suffix_len);
+	strbuf_setlen(last_key, prefix_len);
+	strbuf_add(last_key, in.buf, suffix_len);
 	string_view_consume(&in, suffix_len);
 
 	return start_len - in.len;
diff --git a/reftable/record.h b/reftable/record.h
index a05e2be179..91c9c6ebfd 100644
--- a/reftable/record.h
+++ b/reftable/record.h
@@ -81,9 +81,12 @@ int reftable_encode_key(int *is_restart, struct string_view dest,
 			struct strbuf prev_key, struct strbuf key,
 			uint8_t extra);
 
-/* Decode into `key` and `extra` from `in` */
-int reftable_decode_key(struct strbuf *key, uint8_t *extra,
-			struct strbuf last_key, struct string_view in);
+/*
+ * Decode into `last_key` and `extra` from `in`. `last_key` is expected to
+ * contain the decoded key of the preceding record, if any.
+ */
+int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
+			struct string_view in);
 
 /* reftable_index_record are used internally to speed up lookups. */
 struct reftable_index_record {
diff --git a/reftable/record_test.c b/reftable/record_test.c
index a86cff5526..89209894d8 100644
--- a/reftable/record_test.c
+++ b/reftable/record_test.c
@@ -295,7 +295,8 @@ static void test_key_roundtrip(void)
 	EXPECT(!restart);
 	EXPECT(n > 0);
 
-	m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest);
+	strbuf_addstr(&roundtrip, "refs/heads/master");
+	m = reftable_decode_key(&roundtrip, &rt_extra, dest);
 	EXPECT(n == m);
 	EXPECT(0 == strbuf_cmp(&key, &roundtrip));
 	EXPECT(rt_extra == extra);
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH 12/12] reftable: allow inlining of a few functions
From: Patrick Steinhardt @ 2024-02-14  7:46 UTC (permalink / raw)
  To: git
In-Reply-To: <cover.1707895758.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 5032 bytes --]

We have a few functions which are basically just accessors to
structures. As those functions are executed inside the hot loop when
iterating through many refs, the fact that they cannot be inlined is
costing us some performance.

Move the function definitions into their respective headers so that they
can be inlined. This results in a performance improvement when iterating
over 1 million refs:

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     102.4 ms ±   3.7 ms    [User: 99.6 ms, System: 2.7 ms]
    Range (min … max):    99.1 ms … 129.1 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):      98.3 ms ±   3.7 ms    [User: 95.4 ms, System: 2.7 ms]
    Range (min … max):    94.9 ms … 126.2 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.04 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/pq.c     | 10 ----------
 reftable/pq.h     | 12 ++++++++++--
 reftable/record.c | 11 -----------
 reftable/record.h | 12 ++++++++++--
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/reftable/pq.c b/reftable/pq.c
index 0074d6bc43..7fb45d8c60 100644
--- a/reftable/pq.c
+++ b/reftable/pq.c
@@ -20,16 +20,6 @@ int pq_less(struct pq_entry *a, struct pq_entry *b)
 	return cmp < 0;
 }
 
-struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq)
-{
-	return pq.heap[0];
-}
-
-int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq)
-{
-	return pq.len == 0;
-}
-
 struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq)
 {
 	int i = 0;
diff --git a/reftable/pq.h b/reftable/pq.h
index ce23972c16..f796c23179 100644
--- a/reftable/pq.h
+++ b/reftable/pq.h
@@ -22,12 +22,20 @@ struct merged_iter_pqueue {
 	size_t cap;
 };
 
-struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq);
-int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq);
 void merged_iter_pqueue_check(struct merged_iter_pqueue pq);
 struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq);
 void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e);
 void merged_iter_pqueue_release(struct merged_iter_pqueue *pq);
 int pq_less(struct pq_entry *a, struct pq_entry *b);
 
+static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq)
+{
+	return pq.heap[0];
+}
+
+static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq)
+{
+	return pq.len == 0;
+}
+
 #endif
diff --git a/reftable/record.c b/reftable/record.c
index 37682cc7d0..fdda28645c 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -1176,11 +1176,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest)
 	reftable_record_vtable(rec)->key(reftable_record_data(rec), dest);
 }
 
-uint8_t reftable_record_type(struct reftable_record *rec)
-{
-	return rec->type;
-}
-
 int reftable_record_encode(struct reftable_record *rec, struct string_view dest,
 			   int hash_size)
 {
@@ -1302,12 +1297,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log)
 	return (log->value_type == REFTABLE_LOG_DELETION);
 }
 
-void string_view_consume(struct string_view *s, int n)
-{
-	s->buf += n;
-	s->len -= n;
-}
-
 static void *reftable_record_data(struct reftable_record *rec)
 {
 	switch (rec->type) {
diff --git a/reftable/record.h b/reftable/record.h
index 91c9c6ebfd..5e8304e052 100644
--- a/reftable/record.h
+++ b/reftable/record.h
@@ -25,7 +25,11 @@ struct string_view {
 };
 
 /* Advance `s.buf` by `n`, and decrease length. */
-void string_view_consume(struct string_view *s, int n);
+static inline void string_view_consume(struct string_view *s, int n)
+{
+	s->buf += n;
+	s->len -= n;
+}
 
 /* utilities for de/encoding varints */
 
@@ -127,7 +131,6 @@ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
 int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
 void reftable_record_print(struct reftable_record *rec, int hash_size);
 void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);
-uint8_t reftable_record_type(struct reftable_record *rec);
 void reftable_record_copy_from(struct reftable_record *rec,
 			       struct reftable_record *src, int hash_size);
 uint8_t reftable_record_val_type(struct reftable_record *rec);
@@ -138,6 +141,11 @@ int reftable_record_decode(struct reftable_record *rec, struct strbuf key,
 			   int hash_size);
 int reftable_record_is_deletion(struct reftable_record *rec);
 
+static inline uint8_t reftable_record_type(struct reftable_record *rec)
+{
+	return rec->type;
+}
+
 /* frees and zeroes out the embedded record */
 void reftable_record_release(struct reftable_record *rec);
 
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* Re: [PATCH 06/12] fsmonitor: clarify handling of directory events in callback
From: Junio C Hamano @ 2024-02-14  7:47 UTC (permalink / raw)
  To: Jeff Hostetler via GitGitGadget; +Cc: git, Jeff Hostetler
In-Reply-To: <5b6f8bd1fe7b6c742b25a5a1ed95b528f352215e.1707857541.git.gitgitgadget@gmail.com>

"Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Jeff Hostetler <jeffhostetler@github.com>
>
> Signed-off-by: Jeff Hostetler <jeffhostetler@github.com>
> ---
>  fsmonitor.c | 47 +++++++++++++++++++++++++++++++++--------------
>  1 file changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/fsmonitor.c b/fsmonitor.c
> index 614270fa5e8..754fe20cfd0 100644
> --- a/fsmonitor.c
> +++ b/fsmonitor.c
> @@ -219,24 +219,40 @@ static void fsmonitor_refresh_callback_unqualified(
>  	}
>  }
>  
> -static void fsmonitor_refresh_callback_slash(
> +/*
> + * The daemon can decorate directory events, such as a move or rename,
> + * by adding a trailing slash to the observed name.  Use this to
> + * explicitly invalidate the entire cone under that directory.
> + *
> + * The daemon can only reliably do that if the OS FSEvent contains
> + * sufficient information in the event.
> + *
> + * macOS FSEvents have enough information.
> + *
> + * Other platforms may or may not be able to do it (and it might
> + * depend on the type of event (for example, a daemon could lstat() an
> + * observed pathname after a rename, but not after a delete)).
> + *
> + * If we find an exact match in the index for a path with a trailing
> + * slash, it means that we matched a sparse-index directory in a
> + * cone-mode sparse-checkout (since that's the only time we have
> + * directories in the index).  We should never see this in practice
> + * (because sparse directories should not be present and therefore
> + * not generating FS events).  Either way, we can treat them in the
> + * same way and just invalidate the cache-entry and the untracked
> + * cache (and in this case, the forward cache-entry scan won't find
> + * anything and it doesn't hurt to let it run).
> + *
> + * Return the number of cache-entries that we invalidated.  We will
> + * use this later to determine if we need to attempt a second
> + * case-insensitive search.
> + */
> +static int fsmonitor_refresh_callback_slash(
>  	struct index_state *istate, const char *name, int len, int pos)
>  {

This was split out a few patches ago, and the caller of course
ignored the return value (void), but now it turns an integer, and
this change is without a corresponding update to the caller, which
leaves readers puzzled.

Perhaps a future patch either updates the existing caller or adds a
new caller that utilize the returned value, but then at least the
proposed commit message for this step should hint how it helps the
caller(s) we will see in the future steps if this function returns
the number of entries invalidated, iow, how the caller is expected
to use the returned value from here, no?

Alternatively, this step can limit itself to what the commit title
claims to do---to clarify what the helper does with enhanced in-code
comments.  Then a future step that updates the caller to care about
the return value can have both the changes to this callee as well as
the caller---which may make it easier to see how the returned info
helps the caller.  I dunno which is more reasonable.

Thanks.



>  	int i;
> +	int nr_in_cone = 0;
>  
> -	/*
> -	 * The daemon can decorate directory events, such as
> -	 * moves or renames, with a trailing slash if the OS
> -	 * FS Event contains sufficient information, such as
> -	 * MacOS.
> -	 *
> -	 * Use this to invalidate the entire cone under that
> -	 * directory.
> -	 *
> -	 * We do not expect an exact match because the index
> -	 * does not normally contain directory entries, so we
> -	 * start at the insertion point and scan.
> -	 */
>  	if (pos < 0)
>  		pos = -pos - 1;
>  
> @@ -245,7 +261,10 @@ static void fsmonitor_refresh_callback_slash(
>  		if (!starts_with(istate->cache[i]->name, name))
>  			break;
>  		istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
> +		nr_in_cone++;
>  	}
> +
> +	return nr_in_cone;
>  }
>  
>  static void fsmonitor_refresh_callback(struct index_state *istate, char *name)

^ permalink raw reply

* [PATCH v2 0/1] diff: mark param1 and param2 as placeholders
From: Jiang Xin @ 2024-02-14  8:46 UTC (permalink / raw)
  To: Git List, Junio C Hamano, Jean-Noël Avila; +Cc: Jiang Xin
In-Reply-To: <6e33b2b2-f0b1-46ba-bbd8-3ae4c87d35ba@free.fr>

Some l10n translators translated the parameters "files", "param1" and
"param2" in the following message:

    "synonym for --dirstat=files,param1,param2..."

Translating "param1" and "param2" is OK, but changing the parameter
"files" is wrong. The parameters that are not meant to be used verbatim
should be marked as placeholders, but the verbatim parameter not marked
as a placeholder should be left as is.

This change is a complement for commit 51e846e673 (doc: enforce
placeholders in documentation, 2023-12-25).

With the help of Jean-Noël,some parameter combinations in one
placeholder (e.g. "<param1,param2>...") are splited into seperate
placeholders.

# range-diff v1...v2

1:  c65bca7f6f ! 1:  3a82f72f33 diff: mark param1 and param2 as placeholders
    @@ Commit message
         This change is a complement for commit 51e846e673 (doc: enforce
         placeholders in documentation, 2023-12-25).
     
    +    With the help of Jean-Noël,some parameter combinations in one
    +    placeholder (e.g. "<param1,param2>...") are splited into seperate
    +    placeholders.
    +
    +    Helped-by: Jean-Noël Avila <jn.avila@free.fr>
         Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
     
      ## diff.c ##
     @@ diff.c: struct option *add_diff_options(const struct option *opts,
    + 		OPT_BITOP(0, "shortstat", &options->output_format,
    + 			  N_("output only the last line of --stat"),
    + 			  DIFF_FORMAT_SHORTSTAT, DIFF_FORMAT_NO_OUTPUT),
    +-		OPT_CALLBACK_F('X', "dirstat", options, N_("<param1,param2>..."),
    ++		OPT_CALLBACK_F('X', "dirstat", options, N_("<param1>,<param2>..."),
    + 			       N_("output the distribution of relative amount of changes for each sub-directory"),
    + 			       PARSE_OPT_NONEG | PARSE_OPT_OPTARG,
    + 			       diff_opt_dirstat),
    +@@ diff.c: struct option *add_diff_options(const struct option *opts,
    + 			       N_("synonym for --dirstat=cumulative"),
      			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
      			       diff_opt_dirstat),
    - 		OPT_CALLBACK_F(0, "dirstat-by-file", options, N_("<param1,param2>..."),
    +-		OPT_CALLBACK_F(0, "dirstat-by-file", options, N_("<param1,param2>..."),
     -			       N_("synonym for --dirstat=files,param1,param2..."),
    ++		OPT_CALLBACK_F(0, "dirstat-by-file", options, N_("<param1>,<param2>..."),
     +			       N_("synonym for --dirstat=files,<param1>,<param2>..."),
      			       PARSE_OPT_NONEG | PARSE_OPT_OPTARG,
      			       diff_opt_dirstat),
---
Jiang Xin (1):
  diff: mark param1 and param2 as placeholders

 diff.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

-- 
2.44.0.rc0


^ permalink raw reply

* [PATCH v2 1/1] diff: mark param1 and param2 as placeholders
From: Jiang Xin @ 2024-02-14  8:46 UTC (permalink / raw)
  To: Git List, Junio C Hamano, Jean-Noël Avila; +Cc: Jiang Xin
In-Reply-To: <cover.1707900029.git.worldhello.net@gmail.com>

Some l10n translators translated the parameters "files", "param1" and
"param2" in the following message:

    "synonym for --dirstat=files,param1,param2..."

Translating "param1" and "param2" is OK, but changing the parameter
"files" is wrong. The parameters that are not meant to be used verbatim
should be marked as placeholders, but the verbatim parameter not marked
as a placeholder should be left as is.

This change is a complement for commit 51e846e673 (doc: enforce
placeholders in documentation, 2023-12-25).

With the help of Jean-Noël,some parameter combinations in one
placeholder (e.g. "<param1,param2>...") are splited into seperate
placeholders.

Helped-by: Jean-Noël Avila <jn.avila@free.fr>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 diff.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/diff.c b/diff.c
index ccfa1fca0d..e50def4538 100644
--- a/diff.c
+++ b/diff.c
@@ -5590,7 +5590,7 @@ struct option *add_diff_options(const struct option *opts,
 		OPT_BITOP(0, "shortstat", &options->output_format,
 			  N_("output only the last line of --stat"),
 			  DIFF_FORMAT_SHORTSTAT, DIFF_FORMAT_NO_OUTPUT),
-		OPT_CALLBACK_F('X', "dirstat", options, N_("<param1,param2>..."),
+		OPT_CALLBACK_F('X', "dirstat", options, N_("<param1>,<param2>..."),
 			       N_("output the distribution of relative amount of changes for each sub-directory"),
 			       PARSE_OPT_NONEG | PARSE_OPT_OPTARG,
 			       diff_opt_dirstat),
@@ -5598,8 +5598,8 @@ struct option *add_diff_options(const struct option *opts,
 			       N_("synonym for --dirstat=cumulative"),
 			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
 			       diff_opt_dirstat),
-		OPT_CALLBACK_F(0, "dirstat-by-file", options, N_("<param1,param2>..."),
-			       N_("synonym for --dirstat=files,param1,param2..."),
+		OPT_CALLBACK_F(0, "dirstat-by-file", options, N_("<param1>,<param2>..."),
+			       N_("synonym for --dirstat=files,<param1>,<param2>..."),
 			       PARSE_OPT_NONEG | PARSE_OPT_OPTARG,
 			       diff_opt_dirstat),
 		OPT_BIT_F(0, "check", &options->output_format,
-- 
2.44.0.rc0


^ permalink raw reply related

* Re: [GSOC][RFC] microproject: use test_path_is_* functions in test scripts
From: vincenzo mezzela @ 2024-02-14 10:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <xmqqy1bo5k5h.fsf@gitster.g>

> One thing to note is that you'd need to make sure if "test -e" for
> example originally written really means "we want to see something
> there and it does not matter if it is a file or a directory" while
> turning it into test_path_exists.  The operations that such a test
> follows may be expected to create a file and never a directory, in
> which case the condition the original code is testing may need to
> be corrected first to expect a more specific type (e.g. "test -f").
> The same comment applies for the other two.

Thanks for this clarification!

> Some tests check with "! test -f <path>", which often would want to
> be turned into "test_path_is_missing", but you'd need to make sure
> that is what the original test really meant to do.

I'd have changed them into:
-  ! test -f <path> --> ! test_path_is_file <path>
-  ! test -e <path> --> test_path_is_missing <path>
Since '! test -f <path>' and 'test_path_is_missing <path>' can return
different values.

But as you said, I'll check what they are really meant to do and
modify them accordingly :)

>
> A microproject is not about "doing the real work to help the
> project".  It is a practice session to come up with a set of small
> changes and explain them well, to send the result to the list to get
> reviewed, to respond to reviews and possibly send updates, and to
> repeat the cycle until completion.  So most likely you'd pick a
> single file or two and do the above conversion, while leaving the
> others as practice material for other GSoC participants.
>
> Enjoy.
>

Thanks for this clarification too. :)

Vincenzo

^ permalink raw reply

* Re: [PATCH v4 1/5] refs: introduce `is_pseudoref()` and `is_headref()`
From: Karthik Nayak @ 2024-02-14 10:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Patrick Steinhardt, git, phillip.wood123, Jeff King
In-Reply-To: <xmqqh6ic5ex3.fsf@gitster.g>

Hello,

On Tue, Feb 13, 2024 at 8:42 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Patrick Steinhardt <ps@pks.im> writes:
>
> > I wonder whether we can maybe consolidate the interface into one or
> > maybe even two functions where the behaviour can be tweaked with a flag
> > field. Something like `refname_is_valid()` with a bunch of flags:
> >
> >   - REFNAME_ACCEPT_HEAD to accept "HEAD"
> >   - REFNAME_ACCEPT_PSEUDOREF to accept all of the refs ending with
> >     "_HEAD" or being one of the irregular pseudorefs.
> >   - REFNAME_ACCEPT_INVALID_BUT_SAFE to accept refnames which aren't
> >     valid, but which would pass `refname_is_safe()`.
>
> I am certain we _can_, but it will take an actual patch to see if
> such a refactoring makes the callers easier to follow, which is the
> real test.  FWIW, I am much less skeptical than hopeful in this
> particular case.

I was trying to implement this and realized that the changes sprawl
multiple files and
and have a fair bit of complexity since `check_refname_format()`
implements its own
flags. Overall, adding it to this patch series would overshadow what
we're trying to do here.

I think it would be best to tackle this problem after this series has landed.

Junio, let me know if you want me to reroll for the whitespace issues.
Otherwise, I'll wait
for reviews here.

^ permalink raw reply

* Re: Bug report: Incorrect GIT_FLUSH behavior in 2.43.1 (regression and breaking)
From: Phillip Wood @ 2024-02-14 10:57 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Xiaoguang WANG, Taylor Blau, Torsten Bögershausen,
	Chandra Pratap, git
In-Reply-To: <xmqqbk8k5eo0.fsf@gitster.g>

Hi Junio

On 13/02/2024 19:48, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
>> Unfortunately the minimum fix is already in 'next', so let me turn
>> what you wrote into an update relative to that.  I'll assume your
>> patch in the discussion is signed-off already?
> 
> Nah, my mistake.  The topic still is outside 'next', so I'll replace
> it with the attached while queuing.
 >
> Thanks.
> 
> ------- >8 ------------- >8 ------------- >8 -------
> Subject: [PATCH] write-or-die: fix the polarity of GIT_FLUSH environment variable
> 
> When GIT_FLUSH is set to 1, true, on, yes, then we should disable
> skip_stdout_flush, but the conversion somehow did the opposite.
> 
> With the understanding of the original motivation behind "skip" in
> 06f59e9f (Don't fflush(stdout) when it's not helpful, 2007-06-29),
> we can sympathize with the current naming (we wanted to avoid
> useless flushing of stdout by default, with an escape hatch to
> always flush), but it is still not a good excuse.
> 
> Retire the "skip_stdout_flush" variable and replace it with "flush_stdout"
> that tells if we do or do not want to run fflush().

I think the patch looks good and the commit message nicely explains why 
we want to change the name of the variable.

Best Wishes

Phillip

> Reported-by: Xiaoguang WANG <wxiaoguang@gmail.com>
> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
>   write-or-die.c | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/write-or-die.c b/write-or-die.c
> index 3942152865..01a9a51fa2 100644
> --- a/write-or-die.c
> +++ b/write-or-die.c
> @@ -18,20 +18,20 @@
>    */
>   void maybe_flush_or_die(FILE *f, const char *desc)
>   {
> -	static int skip_stdout_flush = -1;
> -
>   	if (f == stdout) {
> -		if (skip_stdout_flush < 0) {
> -			skip_stdout_flush = git_env_bool("GIT_FLUSH", -1);
> -			if (skip_stdout_flush < 0) {
> +		static int force_flush_stdout = -1;
> +
> +		if (force_flush_stdout < 0) {
> +			force_flush_stdout = git_env_bool("GIT_FLUSH", -1);
> +			if (force_flush_stdout < 0) {
>   				struct stat st;
>   				if (fstat(fileno(stdout), &st))
> -					skip_stdout_flush = 0;
> +					force_flush_stdout = 1;
>   				else
> -					skip_stdout_flush = S_ISREG(st.st_mode);
> +					force_flush_stdout = !S_ISREG(st.st_mode);
>   			}
>   		}
> -		if (skip_stdout_flush && !ferror(f))
> +		if (!force_flush_stdout && !ferror(f))
>   			return;
>   	}
>   	if (fflush(f)) {


^ permalink raw reply

* Re: [PATCH v4 2/2] revision: implement `git log --merge` also for rebase/cherry-pick/revert
From: Phillip Wood @ 2024-02-14 11:02 UTC (permalink / raw)
  To: Philippe Blain, phillip.wood, git
  Cc: Johannes Sixt, Elijah Newren, Michael Lohmann, Patrick Steinhardt,
	Michael Lohmann, Junio C Hamano
In-Reply-To: <790a3f11-5a8c-42f2-7a35-f2900c0299b4@gmail.com>

Hi Philippe

On 13/02/2024 13:27, Philippe Blain wrote:
> Le 2024-02-12 à 06:02, Phillip Wood a écrit :
>> Hi Philippe
>> On 10/02/2024 23:35, Philippe Blain wrote:
>>> From: Michael Lohmann <mi.al.lohmann@gmail.com>
>>> diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
>>> index 2bf239ff03..5b4672c346 100644
>>> --- a/Documentation/rev-list-options.txt
>>> +++ b/Documentation/rev-list-options.txt
>>> @@ -341,8 +341,10 @@ See also linkgit:git-reflog[1].
>>>    Under `--pretty=reference`, this information will not be shown at all.
>>>      --merge::
>>> -    After a failed merge, show refs that touch files having a
>>> -    conflict and don't exist on all heads to merge.
>>> +    Show commits touching conflicted paths in the range `HEAD...$OTHER`,
>>> +    where `$OTHER` is the first existing pseudoref in `MERGE_HEAD`,
>>> +    `CHERRY_PICK_HEAD`, `REVERT_HEAD` or `REBASE_HEAD`. Only works
>>> +    when the index has unmerged entries.
>>
>> Do you know what "and don't exist on all heads to merge" in the original 
> is referring to? The new text doesn't mention anything that sounds like 
>that but I don't understand what the original was trying to say.
> 
> Yes, it took me a while to understand what that meant. I think it is simply
> describing the range of commits shown. If we substitute "refs" for "commits"
> and switch the order of the sentence, it reads:
> 
>      After a failed merge, show commits that don't exist on all heads to merge
>      and that touch files having a conflict.
> 
> So it's just describing (a bit awkwardly) the HEAD...MERGE_HEAD range.

Ah, that makes sense, thanks for explaining


>>> +    for (i = 0; i < ARRAY_SIZE(other_head); i++)
>>> +        if (!read_ref_full(other_head[i],
>>> +                RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
>>> +                oid, NULL)) {
>>> +            if (is_null_oid(oid))
>>> +                die("%s is a symbolic ref???", other_head[i]);
>>
>> This would benefit from being translated and I think one '?' would suffice 
>> (I'm not sure we even need that - are there other possible causes of a null 
>> oid here?)
> 
> This bit was suggested by Junio upthread in <xmqqzfxa9usx.fsf@gitster.g>.
> I'm not sure if the are other causes of null oid, as I don't know well this
> part of the code.
> I agree that a single '?' would be enough, but I'm not sure about marking
> this for translation, I think maybe this situation would be best handled with
> BUG() ?

I think it would be a bug for git to create MERGE_HEAD as a symbolic ref 
but when we read MERGE_HEAD and find it is a symbolic ref we don't know 
if git created it or some third-party script so I think we should just 
report an error.

Best Wishes

Phillip


^ permalink raw reply

* Re: [PATCH v2 3/8] rebase: update `--empty=ask` to `--empty=drop`
From: Phillip Wood @ 2024-02-14 11:05 UTC (permalink / raw)
  To: Brian Lyles, git; +Cc: newren, me, gitster
In-Reply-To: <17b2b5fb5acd8fad.70b1dd9aae081c6e.203dcd72f6563036@zivdesk>

Hi Brian

On 11/02/2024 04:54, Brian Lyles wrote:
> I just noticed that I incorrectly specified `--empty=drop` in the
> subject of this commit. It should read "rebase: update `--empty=ask` to
> `--empty=stop`". This will need to be corrected in a v3 reroll.

Thanks for flagging that, I'm afraid it will probably be next week 
before I take a proper look at these

Best Wishes

Phillip



^ permalink raw reply

* Re: [PATCH v6 0/2] add-patch: classify '@' as a synonym for 'HEAD'
From: Phillip Wood @ 2024-02-14 11:06 UTC (permalink / raw)
  To: Ghanshyam Thakkar, git; +Cc: gitster, sunshine, ps
In-Reply-To: <20240213000601.520731-2-shyamthakkar001@gmail.com>

Hi Ghanshyam

On 13/02/2024 00:05, Ghanshyam Thakkar wrote:
> I just noticed after sending v5 that, in one of the tests, while
> moving it inside the loop for also testing '@', set_and_save_state was
> not used therefore the subsequent tests after the first $opt will not have
> the changes for which we prove 'y' thereby making the tests useless.
> (Although it would not fail regardless of this). This got unnoticed by
> the previous reviews since v1 and me as well.

This version all looks fine to me, thanks for working on it. Thanks for 
removing the PERL prerequisite from the remaining tests. I think the 
change to "git checkout @" is a good idea as it makes "@" act like 
"HEAD", hopefully there aren't too many users relying on "git checkout 
@" to detach HEAD.

Best Wishes

Phillip

> Apologies for the oversight and noise.
> 
> Ghanshyam Thakkar (2):
>    add-patch: classify '@' as a synonym for 'HEAD'
>    add -p tests: remove PERL prerequisites
> 
>   add-patch.c                    |  8 ------
>   builtin/checkout.c             |  4 ++-
>   builtin/reset.c                |  4 ++-
>   t/t2016-checkout-patch.sh      | 46 +++++++++++++++++++---------------
>   t/t2020-checkout-detach.sh     | 12 +++++++++
>   t/t2024-checkout-dwim.sh       |  2 +-
>   t/t2071-restore-patch.sh       | 46 ++++++++++++++++++----------------
>   t/t3904-stash-patch.sh         |  6 -----
>   t/t7105-reset-patch.sh         | 38 +++++++++++++++-------------
>   t/t7106-reset-unborn-branch.sh |  2 +-
>   t/t7514-commit-patch.sh        |  6 -----
>   11 files changed, 92 insertions(+), 82 deletions(-)
> 
> change since v5:
> diff --git a/t/t7105-reset-patch.sh b/t/t7105-reset-patch.sh
> index 0f597416d8..453872c8ba 100755
> --- a/t/t7105-reset-patch.sh
> +++ b/t/t7105-reset-patch.sh
> @@ -29,6 +29,7 @@ test_expect_success PERL 'saying "n" does nothing' '
>   for opt in "HEAD" "@" ""
>   do
>          test_expect_success PERL "git reset -p $opt" '
> +               set_and_save_state dir/foo work work &&
>                  test_write_lines n y | git reset -p $opt >output &&
>                  verify_state dir/foo work head &&
>                  verify_saved_state bar &&
> 

^ permalink raw reply

* [PATCH v3 1/5] t9210: do not rely on lazy fetching to fail
From: Christian Couder @ 2024-02-14 14:25 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Patrick Steinhardt, John Cai, Linus Arver,
	Eric Sunshine
In-Reply-To: <20240214142513.4002639-1-christian.couder@gmail.com>

From: Junio C Hamano <gitster@pobox.com>

With "rev-list --missing=print $start", where "$start" is a 40-hex
object name, the object may or may not be lazily fetched from the
promisor.  Make sure it fails by forcing dereference of "$start"
at that point.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t9210-scalar.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh
index 4432a30d10..428339e342 100755
--- a/t/t9210-scalar.sh
+++ b/t/t9210-scalar.sh
@@ -154,7 +154,14 @@ test_expect_success 'scalar clone' '
 		test_cmp expect actual &&
 
 		test_path_is_missing 1/2 &&
-		test_must_fail git rev-list --missing=print $second &&
+
+		# This relies on the fact that the presence of "--missing"
+		# on the command line forces lazy fetching off before
+		# "$second^{blob}" gets parsed.  Without "^{blob}", a
+		# bare object name "$second" is taken into the queue and
+		# the command may not fail with a fixed "rev-list --missing".
+		test_must_fail git rev-list --missing=print "$second^{blob}" -- &&
+
 		git rev-list $second &&
 		git cat-file blob $second >actual &&
 		echo "second" >expect &&
-- 
2.44.0.rc0.51.gda36843b44


^ permalink raw reply related

* [PATCH v3 0/5] rev-list: allow missing tips with --missing
From: Christian Couder @ 2024-02-14 14:25 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Patrick Steinhardt, John Cai, Linus Arver,
	Eric Sunshine, Christian Couder
In-Reply-To: <20240208135055.2705260-1-christian.couder@gmail.com>

# Intro

A recent patch series, kn/rev-list-missing-fix [1] extended the
`--missing` option in git-rev-list(1) to support commit objects.

Unfortunately, git-rev-list(1) with `--missing` set to something other
than 'error' still fails, usually with a "fatal: bad object <oid>"
error message, when a missing object is passed as an argument.

This patch series removes this limitation and when using
`--missing=print` allows all missing objects to be printed including
those that are passed as arguments.

[1] https://lore.kernel.org/git/20231026101109.43110-1-karthik.188@gmail.com/

# Patch overview

Patch 1/5 (t9210: do not rely on lazy fetching to fail) is a test fix
suggested by Junio, so that a mostly unrelated test will not wrongly
fail when this series is merged.

Patches 2/5 (revision: clarify a 'return NULL' in get_reference()),
3/5 (oidset: refactor oidset_insert_from_set()) and
4/5 (t6022: fix 'test' style and 'even though' typo) are very small
preparatory cleanups.

Patch 5/5 (rev-list: allow missing tips with --missing=[print|allow*])
allows git-rev-list(1) with `--missing=<arg>` when <arg> is not
'error' to not fail if some tips it is passed are missing.

# Changes since V2

Thanks to Linus Arver, and Junio who commented on V2!

The changes since V2 are the following:

  - Patch 1/5 (t9210: do not rely on lazy fetching to fail) was added
    to fix a test that wrongly failed when this series was applied,
    thanks to Junio who authored it.

  - In patch 5/5 (rev-list: allow missing tips with
    --missing=[print|allow*]), some grammos and typos were fixed in the
    commit message, thanks to Junio and Linus.

  - In patch 5/5 (rev-list: allow missing tips with
    --missing=[print|allow*]), the NEEDSWORK comment was improved, thanks
    to Junio. In particular, it doesn't use "ugly" and "dumb" anymore
    and gives an example of what's broken.

  - In patch 5/5 (rev-list: allow missing tips with
    --missing=[print|allow*]), a code comment as been clarified by removing
    "already" from it.

# Range-diff since V2

-:  ---------- > 1:  6e6f2cc26b t9210: do not rely on lazy fetching to fail
1:  5233a83181 = 2:  733c78144e revision: clarify a 'return NULL' in get_reference()
2:  cfa72c8cf1 = 3:  4c9e032456 oidset: refactor oidset_insert_from_set()
3:  5668340516 = 4:  35ca6e7c3d t6022: fix 'test' style and 'even though' typo
4:  55792110ca ! 5:  da36843b44 rev-list: allow missing tips with --missing=[print|allow*]
    @@ Commit message
         We could introduce a new option to make it work like this, but most
         users are likely to prefer the command to have this behavior as the
         default one. Introducing a new option would require another dumb loop
    -    to look for that options early, which isn't nice.
    +    to look for that option early, which isn't nice.
     
         Also we made `git rev-list` work with missing commits very recently
         and the command is most often passed commits as arguments. So let's
    -    consider this as a bug fix related to these previous change.
    +    consider this as a bug fix related to these recent changes.
     
         While at it let's add a NEEDSWORK comment to say that we should get
         rid of the existing ugly dumb loops that parse the
    @@ builtin/rev-list.c: int cmd_rev_list(int argc, const char **argv, const char *pr
         * Let "--missing" to conditionally set fetch_if_missing.
         */
     +  /*
    -+   * NEEDSWORK: These dump loops to look for some options early
    -+   * are ugly. We really need setup_revisions() to have a
    -+   * mechanism to allow and disallow some sets of options for
    -+   * different commands (like rev-list, replay, etc). Such
    -+   * mechanism should do an early parsing of option and be able
    -+   * to manage the `--exclude-promisor-objects` and `--missing=...`
    -+   * options below.
    ++   * NEEDSWORK: These loops that attempt to find presence of
    ++   * options without understanding that the options they are
    ++   * skipping are broken (e.g., it would not know "--grep
    ++   * --exclude-promisor-objects" is not triggering
    ++   * "--exclude-promisor-objects" option).  We really need
    ++   * setup_revisions() to have a mechanism to allow and disallow
    ++   * some sets of options for different commands (like rev-list,
    ++   * replay, etc). Such a mechanism should do an early parsing
    ++   * of options and be able to manage the `--missing=...` and
    ++   * `--exclude-promisor-objects` options below.
     +   */
        for (i = 1; i < argc; i++) {
                const char *arg = argv[i];
    @@ builtin/rev-list.c: int cmd_rev_list(int argc, const char **argv, const char *pr
     -  if (arg_missing_action == MA_PRINT)
     +  if (arg_missing_action == MA_PRINT) {
                oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
    -+          /* Already add missing tips */
    ++          /* Add missing tips */
     +          oidset_insert_from_set(&missing_objects, &revs.missing_commits);
     +          oidset_clear(&revs.missing_commits);
     +  }


Christian Couder (4):
  revision: clarify a 'return NULL' in get_reference()
  oidset: refactor oidset_insert_from_set()
  t6022: fix 'test' style and 'even though' typo
  rev-list: allow missing tips with --missing=[print|allow*]

Junio C Hamano (1):
  t9210: do not rely on lazy fetching to fail

 Documentation/rev-list-options.txt |  4 ++
 builtin/rev-list.c                 | 18 ++++++++-
 list-objects-filter.c              | 11 +-----
 oidset.c                           | 10 +++++
 oidset.h                           |  6 +++
 revision.c                         | 16 ++++++--
 t/t6022-rev-list-missing.sh        | 61 +++++++++++++++++++++++++++++-
 t/t9210-scalar.sh                  |  9 ++++-
 8 files changed, 117 insertions(+), 18 deletions(-)

-- 
2.44.0.rc0.51.gda36843b44

^ permalink raw reply

* [PATCH v3 2/5] revision: clarify a 'return NULL' in get_reference()
From: Christian Couder @ 2024-02-14 14:25 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Patrick Steinhardt, John Cai, Linus Arver,
	Eric Sunshine, Christian Couder, Christian Couder
In-Reply-To: <20240214142513.4002639-1-christian.couder@gmail.com>

In general when we know a pointer variable is NULL, it's clearer to
explicitly return NULL than to return that variable.

In get_reference() when 'object' is NULL, we already return NULL
when 'revs->exclude_promisor_objects && is_promisor_object(oid)' is
true, but we return 'object' when 'revs->ignore_missing' is true.

Let's make the code clearer and more uniform by also explicitly
returning NULL when 'revs->ignore_missing' is true.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 revision.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/revision.c b/revision.c
index 2424c9bd67..4c5cd7c3ce 100644
--- a/revision.c
+++ b/revision.c
@@ -385,7 +385,7 @@ static struct object *get_reference(struct rev_info *revs, const char *name,

 	if (!object) {
 		if (revs->ignore_missing)
-			return object;
+			return NULL;
 		if (revs->exclude_promisor_objects && is_promisor_object(oid))
 			return NULL;
 		die("bad object %s", name);
-- 
2.44.0.rc0.51.gda36843b44

^ permalink raw reply related

* [PATCH v3 3/5] oidset: refactor oidset_insert_from_set()
From: Christian Couder @ 2024-02-14 14:25 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Patrick Steinhardt, John Cai, Linus Arver,
	Eric Sunshine, Christian Couder, Christian Couder
In-Reply-To: <20240214142513.4002639-1-christian.couder@gmail.com>

In a following commit, we will need to add all the oids from a set into
another set. In "list-objects-filter.c", there is already a static
function called add_all() to do that.

Let's rename this function oidset_insert_from_set() and move it into
oidset.{c,h} to make it generally available.

While at it, let's remove a useless `!= NULL`.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 list-objects-filter.c | 11 +----------
 oidset.c              | 10 ++++++++++
 oidset.h              |  6 ++++++
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/list-objects-filter.c b/list-objects-filter.c
index da287cc8e0..4346f8da45 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -711,15 +711,6 @@ static void filter_combine__free(void *filter_data)
 	free(d);
 }
 
-static void add_all(struct oidset *dest, struct oidset *src) {
-	struct oidset_iter iter;
-	struct object_id *src_oid;
-
-	oidset_iter_init(src, &iter);
-	while ((src_oid = oidset_iter_next(&iter)) != NULL)
-		oidset_insert(dest, src_oid);
-}
-
 static void filter_combine__finalize_omits(
 	struct oidset *omits,
 	void *filter_data)
@@ -728,7 +719,7 @@ static void filter_combine__finalize_omits(
 	size_t sub;
 
 	for (sub = 0; sub < d->nr; sub++) {
-		add_all(omits, &d->sub[sub].omits);
+		oidset_insert_from_set(omits, &d->sub[sub].omits);
 		oidset_clear(&d->sub[sub].omits);
 	}
 }
diff --git a/oidset.c b/oidset.c
index d1e5376316..91d1385910 100644
--- a/oidset.c
+++ b/oidset.c
@@ -23,6 +23,16 @@ int oidset_insert(struct oidset *set, const struct object_id *oid)
 	return !added;
 }
 
+void oidset_insert_from_set(struct oidset *dest, struct oidset *src)
+{
+	struct oidset_iter iter;
+	struct object_id *src_oid;
+
+	oidset_iter_init(src, &iter);
+	while ((src_oid = oidset_iter_next(&iter)))
+		oidset_insert(dest, src_oid);
+}
+
 int oidset_remove(struct oidset *set, const struct object_id *oid)
 {
 	khiter_t pos = kh_get_oid_set(&set->set, *oid);
diff --git a/oidset.h b/oidset.h
index ba4a5a2cd3..262f4256d6 100644
--- a/oidset.h
+++ b/oidset.h
@@ -47,6 +47,12 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid);
  */
 int oidset_insert(struct oidset *set, const struct object_id *oid);
 
+/**
+ * Insert all the oids that are in set 'src' into set 'dest'; a copy
+ * is made of each oid inserted into set 'dest'.
+ */
+void oidset_insert_from_set(struct oidset *dest, struct oidset *src);
+
 /**
  * Remove the oid from the set.
  *
-- 
2.44.0.rc0.51.gda36843b44


^ permalink raw reply related

* [PATCH v3 4/5] t6022: fix 'test' style and 'even though' typo
From: Christian Couder @ 2024-02-14 14:25 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Patrick Steinhardt, John Cai, Linus Arver,
	Eric Sunshine, Christian Couder, Christian Couder
In-Reply-To: <20240214142513.4002639-1-christian.couder@gmail.com>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 t/t6022-rev-list-missing.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 211672759a..5f1be7abb5 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -46,9 +46,10 @@ do
 			git rev-list --objects --no-object-names \
 				HEAD ^$obj >expect.raw &&
 
-			# Blobs are shared by all commits, so evethough a commit/tree
+			# Blobs are shared by all commits, so even though a commit/tree
 			# might be skipped, its blob must be accounted for.
-			if [ $obj != "HEAD:1.t" ]; then
+			if test $obj != "HEAD:1.t"
+			then
 				echo $(git rev-parse HEAD:1.t) >>expect.raw &&
 				echo $(git rev-parse HEAD:2.t) >>expect.raw
 			fi &&
-- 
2.44.0.rc0.51.gda36843b44


^ permalink raw reply related

* [PATCH v3 5/5] rev-list: allow missing tips with --missing=[print|allow*]
From: Christian Couder @ 2024-02-14 14:25 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Patrick Steinhardt, John Cai, Linus Arver,
	Eric Sunshine, Christian Couder, Christian Couder
In-Reply-To: <20240214142513.4002639-1-christian.couder@gmail.com>

In 9830926c7d (rev-list: add commit object support in `--missing`
option, 2023-10-27) we fixed the `--missing` option in `git rev-list`
so that it works with with missing commits, not just blobs/trees.

Unfortunately, such a command would still fail with a "fatal: bad
object <oid>" if it is passed a missing commit, blob or tree as an
argument (before the rev walking even begins).

When such a command is used to find the dependencies of some objects,
for example the dependencies of quarantined objects (see the
"QUARANTINE ENVIRONMENT" section in the git-receive-pack(1)
documentation), it would be better if the command would instead
consider such missing objects, especially commits, in the same way as
other missing objects.

If, for example `--missing=print` is used, it would be nice for some
use cases if the missing tips passed as arguments were reported in
the same way as other missing objects instead of the command just
failing.

We could introduce a new option to make it work like this, but most
users are likely to prefer the command to have this behavior as the
default one. Introducing a new option would require another dumb loop
to look for that option early, which isn't nice.

Also we made `git rev-list` work with missing commits very recently
and the command is most often passed commits as arguments. So let's
consider this as a bug fix related to these recent changes.

While at it let's add a NEEDSWORK comment to say that we should get
rid of the existing ugly dumb loops that parse the
`--exclude-promisor-objects` and `--missing=...` options early.

Helped-by: Linus Arver <linusa@google.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/rev-list-options.txt |  4 +++
 builtin/rev-list.c                 | 18 +++++++++-
 revision.c                         | 14 ++++++--
 t/t6022-rev-list-missing.sh        | 56 ++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index a583b52c61..bb753b6292 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1019,6 +1019,10 @@ Unexpected missing objects will raise an error.
 +
 The form '--missing=print' is like 'allow-any', but will also print a
 list of the missing objects.  Object IDs are prefixed with a ``?'' character.
++
+If some tips passed to the traversal are missing, they will be
+considered as missing too, and the traversal will ignore them. In case
+we cannot get their Object ID though, an error will be raised.
 
 --exclude-promisor-objects::
 	(For internal use only.)  Prefilter object traversal at
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index b3f4783858..ec455aa972 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -545,6 +545,18 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 	 *
 	 * Let "--missing" to conditionally set fetch_if_missing.
 	 */
+	/*
+	 * NEEDSWORK: These loops that attempt to find presence of
+	 * options without understanding that the options they are
+	 * skipping are broken (e.g., it would not know "--grep
+	 * --exclude-promisor-objects" is not triggering
+	 * "--exclude-promisor-objects" option).  We really need
+	 * setup_revisions() to have a mechanism to allow and disallow
+	 * some sets of options for different commands (like rev-list,
+	 * replay, etc). Such a mechanism should do an early parsing
+	 * of options and be able to manage the `--missing=...` and
+	 * `--exclude-promisor-objects` options below.
+	 */
 	for (i = 1; i < argc; i++) {
 		const char *arg = argv[i];
 		if (!strcmp(arg, "--exclude-promisor-objects")) {
@@ -753,8 +765,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 
 	if (arg_print_omitted)
 		oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
-	if (arg_missing_action == MA_PRINT)
+	if (arg_missing_action == MA_PRINT) {
 		oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+		/* Add missing tips */
+		oidset_insert_from_set(&missing_objects, &revs.missing_commits);
+		oidset_clear(&revs.missing_commits);
+	}
 
 	traverse_commit_list_filtered(
 		&revs, show_commit, show_object, &info,
diff --git a/revision.c b/revision.c
index 4c5cd7c3ce..80c349d347 100644
--- a/revision.c
+++ b/revision.c
@@ -388,6 +388,10 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
 			return NULL;
 		if (revs->exclude_promisor_objects && is_promisor_object(oid))
 			return NULL;
+		if (revs->do_not_die_on_missing_objects) {
+			oidset_insert(&revs->missing_commits, oid);
+			return NULL;
+		}
 		die("bad object %s", name);
 	}
 	object->flags |= flags;
@@ -1947,6 +1951,7 @@ void repo_init_revisions(struct repository *r,
 	init_display_notes(&revs->notes_opt);
 	list_objects_filter_init(&revs->filter);
 	init_ref_exclusions(&revs->ref_excludes);
+	oidset_init(&revs->missing_commits, 0);
 }
 
 static void add_pending_commit_list(struct rev_info *revs,
@@ -2178,13 +2183,18 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl
 	if (revarg_opt & REVARG_COMMITTISH)
 		get_sha1_flags |= GET_OID_COMMITTISH;
 
+	/*
+	 * Even if revs->do_not_die_on_missing_objects is set, we
+	 * should error out if we can't even get an oid, as
+	 * `--missing=print` should be able to report missing oids.
+	 */
 	if (get_oid_with_context(revs->repo, arg, get_sha1_flags, &oid, &oc))
 		return revs->ignore_missing ? 0 : -1;
 	if (!cant_be_filename)
 		verify_non_filename(revs->prefix, arg);
 	object = get_reference(revs, arg, &oid, flags ^ local_flags);
 	if (!object)
-		return revs->ignore_missing ? 0 : -1;
+		return (revs->ignore_missing || revs->do_not_die_on_missing_objects) ? 0 : -1;
 	add_rev_cmdline(revs, object, arg_, REV_CMD_REV, flags ^ local_flags);
 	add_pending_object_with_path(revs, object, arg, oc.mode, oc.path);
 	free(oc.path);
@@ -3830,8 +3840,6 @@ int prepare_revision_walk(struct rev_info *revs)
 				       FOR_EACH_OBJECT_PROMISOR_ONLY);
 	}
 
-	oidset_init(&revs->missing_commits, 0);
-
 	if (!revs->reflog_info)
 		prepare_to_use_bloom_filter(revs);
 	if (!revs->unsorted_input)
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 5f1be7abb5..78387eebb3 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -78,4 +78,60 @@ do
 	done
 done
 
+for missing_tip in "HEAD~1" "HEAD~1^{tree}" "HEAD:1.t"
+do
+	# We want to check that things work when both
+	#   - all the tips passed are missing (case existing_tip = ""), and
+	#   - there is one missing tip and one existing tip (case existing_tip = "HEAD")
+	for existing_tip in "" "HEAD"
+	do
+		for action in "allow-any" "print"
+		do
+			test_expect_success "--missing=$action with tip '$missing_tip' missing and tip '$existing_tip'" '
+				oid="$(git rev-parse $missing_tip)" &&
+				path=".git/objects/$(test_oid_to_path $oid)" &&
+
+				# Before the object is made missing, we use rev-list to
+				# get the expected oids.
+				if test "$existing_tip" = "HEAD"
+				then
+					git rev-list --objects --no-object-names \
+						HEAD ^$missing_tip >expect.raw
+				else
+					>expect.raw
+				fi &&
+
+				# Blobs are shared by all commits, so even though a commit/tree
+				# might be skipped, its blob must be accounted for.
+				if test "$existing_tip" = "HEAD" && test $missing_tip != "HEAD:1.t"
+				then
+					echo $(git rev-parse HEAD:1.t) >>expect.raw &&
+					echo $(git rev-parse HEAD:2.t) >>expect.raw
+				fi &&
+
+				mv "$path" "$path.hidden" &&
+				test_when_finished "mv $path.hidden $path" &&
+
+				git rev-list --missing=$action --objects --no-object-names \
+				     $oid $existing_tip >actual.raw &&
+
+				# When the action is to print, we should also add the missing
+				# oid to the expect list.
+				case $action in
+				allow-any)
+					;;
+				print)
+					grep ?$oid actual.raw &&
+					echo ?$oid >>expect.raw
+					;;
+				esac &&
+
+				sort actual.raw >actual &&
+				sort expect.raw >expect &&
+				test_cmp expect actual
+			'
+		done
+	done
+done
+
 test_done
-- 
2.44.0.rc0.51.gda36843b44


^ permalink raw reply related

* Re: [PATCH v2 0/4] rev-list: allow missing tips with --missing
From: Christian Couder @ 2024-02-14 14:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Patrick Steinhardt, John Cai, Linus Arver
In-Reply-To: <xmqq7cjemttr.fsf@gitster.g>

On Fri, Feb 9, 2024 at 12:15 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> > # Patch overview
> >
> > Patches 1/4 (revision: clarify a 'return NULL' in get_reference()),
> > 2/4 (oidset: refactor oidset_insert_from_set()) and
> > 3/4 (t6022: fix 'test' style and 'even though' typo) are very small
> > preparatory cleanups.
> >
> > Patch 4/4 (rev-list: allow missing tips with --missing=[print|allow*])
> > allows git-rev-list(1) with `--missing=<arg>` when <arg> is not
> > 'error' to not fail if some tips it is passed are missing.
>
> Thanks.  There is an existing test that makes a bad assumption and
> fails with these patches.  We may need something like this patch as
> a preliminary fix before these four patches.

Thanks, I have added your patch to the V3 I just sent.

^ permalink raw reply

* Re: [PATCH v2 2/4] oidset: refactor oidset_insert_from_set()
From: Christian Couder @ 2024-02-14 14:33 UTC (permalink / raw)
  To: Linus Arver
  Cc: git, Junio C Hamano, Patrick Steinhardt, John Cai,
	Christian Couder
In-Reply-To: <owlyle7o9iyf.fsf@fine.c.googlers.com>

On Tue, Feb 13, 2024 at 10:02 PM Linus Arver <linusa@google.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> > In a following commit, we will need to add all the oids from a set into
> > another set. In "list-objects-filter.c", there is already a static
> > function called add_all() to do that.
>
> Nice find.
>
> > Let's rename this function oidset_insert_from_set() and move it into
> > oidset.{c,h} to make it generally available.
>
> At some point (I don't ask for it in this series) we should add unit
> tests for this newly-exposed function. Presumably the stuff around
> object/oid handling is stable enough to receive unit tests.

Yeah, ideally there should be unit tests for oidset and all its
features, but it seems to me that there aren't any. Also oidset is
based on khash.h which was originally imported from
https://github.com/attractivechaos/klib/ without tests. So I think
it's a different topic to add tests from scratch to oidset, khash.h or
both.

Actually after taking another look, it looks like khash.h or some of
its features are tested through "helper/test-oidmap.c" and
"t0016-oidmap.sh". I still think it's another topic to test oidset.

> > +void oidset_insert_from_set(struct oidset *dest, struct oidset *src)
> > +{
> > +     struct oidset_iter iter;
> > +     struct object_id *src_oid;
> > +
> > +     oidset_iter_init(src, &iter);
> > +     while ((src_oid = oidset_iter_next(&iter)))
>
> Are the extra parentheses necessary?

Yes. Without them gcc errors out with:

oidset.c: In function ‘oidset_insert_from_set’:
oidset.c:32:16: error: suggest parentheses around assignment used as
truth value [-Werror=parentheses]
  32 |         while (src_oid = oidset_iter_next(&iter))
     |                ^~~~~~~

Having extra parentheses is a way to tell the compiler that we do want
to use '=' and not '=='. This helps avoid the very common mistake of
using '=' where '==' was intended.

> > +/**
> > + * Insert all the oids that are in set 'src' into set 'dest'; a copy
> > + * is made of each oid inserted into set 'dest'.
> > + */
>
> Just above in oid_insert() there is already a comment about needing to
> copy each oid.

(It's "oidset_insert()" not "oid_insert()".)

>     /**
>      * Insert the oid into the set; a copy is made, so "oid" does not need
>      * to persist after this function is called.
>      *
>      * Returns 1 if the oid was already in the set, 0 otherwise. This can be used
>      * to perform an efficient check-and-add.
>      */
>
> so perhaps the following wording is simpler?
>
>     Like oid_insert(), but insert all oids found in 'src'. Calls
>     oid_insert() internally.

(What you suggest would need s/oid_insert/oidset_insert/)

Yeah, it's a bit simpler and shorter, but on the other hand a reader
might have to read both this and the oidset_insert() doc, so in the
end I am not sure it's a big win for readability. And if they don't
read the oidset_insert() doc, they might miss the fact that we are
copying the oids we insert, which might result in a bug.

Also your wording ties the implementation with oidset_insert(), which
we might not want if we could find something more performant. See
Junio's comment on this patch saying his initial reaction was that
copying underlying bits may even be more efficient.

So I prefer not to change this.

> > +void oidset_insert_from_set(struct oidset *dest, struct oidset *src);
>
> Perhaps "oidset_insert_all" would be a simpler name? I generally prefer
> to reuse any descriptors in comments to guide the names. Plus this
> function used to be called "add_all()" so keeping the "all" naming style
> feels right.

We already have other related types like 'struct oid-array' and
'struct oidmap' to store oids, as well as code that inserts many oids
into an oidset from a 'struct ref *' linked list or array in a tight
loop. So if we want to add functions inserting all the oids from
instances of such types, how should we call them?

I would say we should use suffixes like: "_from_set", "_from_map",
"from_array", "_from_ref_list", "_from_ref_array", etc.

If we start using just "_all" for oidset, then what should we use for
the other types? I don't see a good answer to that, so I prefer to
stick with "_from_set" for oidset.

Thanks.

^ permalink raw reply

* Re: [PATCH v2 4/4] rev-list: allow missing tips with --missing=[print|allow*]
From: Christian Couder @ 2024-02-14 14:34 UTC (permalink / raw)
  To: Linus Arver
  Cc: Junio C Hamano, git, Patrick Steinhardt, John Cai,
	Christian Couder
In-Reply-To: <owlyfrxw9ei3.fsf@fine.c.googlers.com>

On Tue, Feb 13, 2024 at 11:38 PM Linus Arver <linusa@google.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> > Christian Couder <christian.couder@gmail.com> writes:
> >
> >> +     * mechanism to allow and disallow some sets of options for
> >> +     * different commands (like rev-list, replay, etc). Such
> >> +     * mechanism should do an early parsing of option and be able
> >> +     * to manage the `--exclude-promisor-objects` and `--missing=...`
> >> +     * options below.
> >> +     */
> >>      for (i = 1; i < argc; i++) {
> >>              const char *arg = argv[i];
> >>              if (!strcmp(arg, "--exclude-promisor-objects")) {
> >> @@ -753,8 +762,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
> >>
> >>      if (arg_print_omitted)
> >>              oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
> >> -    if (arg_missing_action == MA_PRINT)
> >> +    if (arg_missing_action == MA_PRINT) {
> >>              oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
> >> +            /* Already add missing tips */
> >> +            oidset_insert_from_set(&missing_objects, &revs.missing_commits);
> >> +            oidset_clear(&revs.missing_commits);
> >> +    }
> >
> > It is unclear what "already" here refers to, at least to me.

I wanted to hint that we already have some missing objects that we can
add to the set. But it's not an important detail and I agree it can be
confusing.

> It's grammatically correct but perhaps a bit "over eager" (gives the
> impression that we get these missing tips all the time and is a common
> "happy" path). I do still think my earlier v1 comments
>
>     Did you mean "Add already-missing commits"? Perhaps even more explicit
>     would be "Add missing tips"?

"Add missing tips" is used in the V3 I just sent. Thanks.

> are relevant here.

^ permalink raw reply

* Re: [PATCH v2 4/4] rev-list: allow missing tips with --missing=[print|allow*]
From: Christian Couder @ 2024-02-14 14:38 UTC (permalink / raw)
  To: Linus Arver
  Cc: git, Junio C Hamano, Patrick Steinhardt, John Cai,
	Christian Couder
In-Reply-To: <owlyil2s9eq6.fsf@fine.c.googlers.com>

On Tue, Feb 13, 2024 at 11:33 PM Linus Arver <linusa@google.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:

> > We could introduce a new option to make it work like this, but most
> > users are likely to prefer the command to have this behavior as the
> > default one. Introducing a new option would require another dumb loop
> > to look for that options early, which isn't nice.
>
> s/options/option

Fixed in the V3 I just sent.

> > Also we made `git rev-list` work with missing commits very recently
> > and the command is most often passed commits as arguments. So let's
> > consider this as a bug fix related to these previous change.
>
> s/previous change/recent changes

Fixed in V3, thanks.

> > While at it let's add a NEEDSWORK comment to say that we should get
> > rid of the existing ugly dumb loops that parse the
> > `--exclude-promisor-objects` and `--missing=...` options early.

> > @@ -1019,6 +1019,10 @@ Unexpected missing objects will raise an error.
> >  +
> >  The form '--missing=print' is like 'allow-any', but will also print a
> >  list of the missing objects.  Object IDs are prefixed with a ``?'' character.
> > ++
> > +If some tips passed to the traversal are missing, they will be
> > +considered as missing too, and the traversal will ignore them. In case
> > +we cannot get their Object ID though, an error will be raised.
>
> The only other mention of the term "tips" is for the "--alternate-refs"
> flag on line 215 which uses "ref tips". Perhaps this is obvious to
> veteran Git developers but I do wonder if we need to somehow define it
> (however briefly) the first time we mention it (either in the document
> as a whole, or just within this newly added paragraph).

I did a quick grep in Documentation/git*.txt and found more than 130
instances of the 'tip' word. So I think it is quite common in the
whole Git documentation and our glossary would likely be the right
place to document it if we decide to do so. Anyway I think that topic
is independent from this small series.

> Here's an alternate wording
>
>     Ref tips given on the command line are a special case.

`git rev-list` has a `--stdin` mode which makes it accept tips from
stdin, so talking about the command line is not enough. Also maybe one
day some config option could be added that makes the command include
additional tips.

> They are
>     first dereferenced to Object IDs (if this is not possible, an error
>     will be raised). Then these IDs are checked to see if the objects
>     they refer to exist. If so, the traversal happens starting with
>     these tips. Otherwise, then such tips will not be used for
>     traversal.
>
>     Even though such missing tips won't be included for traversal, for
>     purposes of the `--missing` flag they will be treated the same way
>     as those objects that did get traversed (and were determined to be
>     missing). For example, if `--missing=print` is given then the Object
>     IDs of these tips will be printed just like all other missing
>     objects encountered during traversal.

Your wording describes what happens correctly, but I don't see much
added value for this specific patch compared to my wording which is
shorter.

Here I think, we only need to describe the result of the command in
the special case that the patch is fixing. We don't need to go into
details of how the command or even --missing works. It could be
interesting to go into details of how things work, but I think it's a
separate topic. Or perhaps it's even actually counter productive to go
into too much detail as it could prevent us from finding other ways to
make it work better. Anyway it seems to me to be a separate topic to
discuss.

> But also, I realize that these documentation touch-ups might be better
> served by an overall pass over the whole document, so it's fine if we
> decide not to take this suggestion at this time.

Right, I agree. Thanks for telling this.

> Aside: unfortunately we don't seem to define the relationship between
> ref tips (e.g., "HEAD"), object IDs (40-char hex string), and the actual
> objects (the real data that we traverse over). It's probably another
> thing that could be fixed up in the docs in the future.

Yeah, and for rev-list a tip could also be a tree or a blob. It
doesn't need to be a "ref tip". (Even though a ref can point to a tree
or a blog, it's very rare in practice.)

> >  --exclude-promisor-objects::
> >       (For internal use only.)  Prefilter object traversal at
> > diff --git a/builtin/rev-list.c b/builtin/rev-list.c
> > index b3f4783858..ec9556f135 100644
> > --- a/builtin/rev-list.c
> > +++ b/builtin/rev-list.c
> > @@ -545,6 +545,15 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
> >        *
> >        * Let "--missing" to conditionally set fetch_if_missing.
> >        */
> > +     /*
> > +      * NEEDSWORK: These dump loops to look for some options early
> > +      * are ugly.
>
> I agree with Junio's suggestion to use more objective language.
>
> > We really need setup_revisions() to have a
> > +      * mechanism to allow and disallow some sets of options for
> > +      * different commands (like rev-list, replay, etc). Such
>
> s/Such/Such a

Fixed in V3

> > +      * mechanism should do an early parsing of option and be able
>
> s/option/options

Fixed in V3, thanks.

> > +      * to manage the `--exclude-promisor-objects` and `--missing=...`
> > +      * options below.
> > +      */
> >       for (i = 1; i < argc; i++) {
> >               const char *arg = argv[i];
> >               if (!strcmp(arg, "--exclude-promisor-objects")) {
> >
> > [...]
> >
> > @@ -2178,13 +2183,18 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl
> >       if (revarg_opt & REVARG_COMMITTISH)
> >               get_sha1_flags |= GET_OID_COMMITTISH;
> >
> > +     /*
> > +      * Even if revs->do_not_die_on_missing_objects is set, we
> > +      * should error out if we can't even get an oid, ...
> > +      */
>
> Perhaps this wording is more precise?
>
>     If we can't even get an oid, we are forced to error out (regardless
>     of revs->do_not_die_on_missing_objects) because a valid traversal
>     must start from *some* valid oid. OTOH we ignore the ref tip
>     altogether with revs->ignore_missing.

This uses "valid oid" and "valid traversal", but I am not sure it's
easy to understand what "valid" means in both of these expressions.

Also if all the tips passed to the command are missing, the traversal
doesn't need to actually start. The command, assuming
`--missing=print` is passed, just needs to output the oids of the tips
as missing oids.

I also think that "ref tip" might be misleading as trees and blos can
be passed as tips.

So I prefer to keep the wording I used.

> > +      * ... as
> > +      * `--missing=print` should be able to report missing oids.
>
> I think this comment would be better placed ...
>
> >       if (get_oid_with_context(revs->repo, arg, get_sha1_flags, &oid, &oc))
> >               return revs->ignore_missing ? 0 : -1;
> >       if (!cant_be_filename)
> >               verify_non_filename(revs->prefix, arg);
> >       object = get_reference(revs, arg, &oid, flags ^ local_flags);
>
> ... around here.

In a previous round, I was asked to put such a comment before `if
(get_oid_with_context(...))`. So I prefer to avoid some back and forth
here.

> > +++ b/t/t6022-rev-list-missing.sh
> > @@ -78,4 +78,60 @@ do
> >       done
> >  done
> >
> > +for missing_tip in "HEAD~1" "HEAD~1^{tree}" "HEAD:1.t"
> > +do
> > +     # We want to check that things work when both
> > +     #   - all the tips passed are missing (case existing_tip = ""), and
> > +     #   - there is one missing tip and one existing tip (case existing_tip = "HEAD")
> > +     for existing_tip in "" "HEAD"
> > +     do
>
> Though I am biased, these new variable names do make this test that much
> easier to read. Thanks.

Thanks for suggesting them and for your reviews.

^ permalink raw reply

* Re: [PATCH v2 4/4] rev-list: allow missing tips with --missing=[print|allow*]
From: Christian Couder @ 2024-02-14 14:39 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, Patrick Steinhardt, John Cai, Linus Arver, Christian Couder
In-Reply-To: <xmqqeddmonq0.fsf@gitster.g>

On Thu, Feb 8, 2024 at 6:44 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:

> > diff --git a/builtin/rev-list.c b/builtin/rev-list.c
> > index b3f4783858..ec9556f135 100644
> > --- a/builtin/rev-list.c
> > +++ b/builtin/rev-list.c
> > @@ -545,6 +545,15 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
> >        *
> >        * Let "--missing" to conditionally set fetch_if_missing.
> >        */
> > +     /*
> > +      * NEEDSWORK: These dump loops to look for some options early
> > +      * are ugly. We really need setup_revisions() to have a
>
> I would drop the "dump" and "ugly" from here.  It OK to make value
> judgement with such words in casual conversations on a proposed
> patch, but when we make a request to future developers to fix our
> mess, we should be more objective and stick to the techincal facts,
> so that they have better understanding on why we think this area
> needs more work.
>
> Perhaps something like:
>
>     These loops that attempt to find presence of options without
>     understanding what the options they are skipping are broken
>     (e.g., it would not know "--grep --exclude-promisor-objects" is
>     not triggering "--exclude-promisor-objects" option).

I have used what you suggest in V3, except for s/what/that/.

> Everything after "We really need" is good (modulo possible grammos),
> I think.  Thanks for writing it.
>
> > +      * mechanism to allow and disallow some sets of options for
> > +      * different commands (like rev-list, replay, etc). Such
> > +      * mechanism should do an early parsing of option and be able
> > +      * to manage the `--exclude-promisor-objects` and `--missing=...`
> > +      * options below.
> > +      */
> >       for (i = 1; i < argc; i++) {
> >               const char *arg = argv[i];
> >               if (!strcmp(arg, "--exclude-promisor-objects")) {
> > @@ -753,8 +762,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
> >
> >       if (arg_print_omitted)
> >               oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
> > -     if (arg_missing_action == MA_PRINT)
> > +     if (arg_missing_action == MA_PRINT) {
> >               oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
> > +             /* Already add missing tips */
> > +             oidset_insert_from_set(&missing_objects, &revs.missing_commits);
> > +             oidset_clear(&revs.missing_commits);
> > +     }
>
> It is unclear what "already" here refers to, at least to me.

I removed it and changed the comment to just "/* Add missing tips */"
in the V3 I just sent.

Thanks.

^ permalink raw reply

* Re: What's cooking in git.git (Feb 2024, #05; Tue, 13)
From: Ghanshyam Thakkar @ 2024-02-14 15:08 UTC (permalink / raw)
  To: Junio C Hamano, git
In-Reply-To: <xmqqo7cjzhl0.fsf@gitster.g>

On Wed Feb 14, 2024 at 11:57 AM IST, Junio C Hamano wrote:
> * gt/at-is-synonym-for-head-in-add-patch (2024-02-13) 2 commits
>  - add -p tests: remove PERL prerequisites
>  - add-patch: classify '@' as a synonym for 'HEAD'
>
>  Teach "git checkout -p" and friends that "@" is a synonym for
>  "HEAD".
>
>  Will merge to 'next'?
>  source: <20240211202035.7196-2-shyamthakkar001@gmail.com>

Hello Junio,

I see that it is already in 'next'. However, I have rerolled it for a
single line change. If find it is worth it, here it is: 
https://lore.kernel.org/git/20240213000601.520731-2-shyamthakkar001@gmail.com/

Thanks.

^ permalink raw reply

* Re: [PATCH v3 1/8] reftable/stack: do not overwrite errors when compacting
From: Han-Wen Nienhuys @ 2024-02-14 15:12 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, Junio C Hamano
In-Reply-To: <1dc8ddf04a112c38f41d573a48dac3f99b4b51e9.1704262787.git.ps@pks.im>

Good catch!

Sorry for messing this up.

> In the worst case,
> this can lead to a compacted stack that is missing records.

Yeah, that would be an insidious corruption. Have you considered
writing a test to reproduce this (and thus verify that the fix really
fixes the problem?)

I think it wouldn't be too difficult: you could create a custom
blocksource wrapper that returns I/O error on the Nth read, and then
create a reftable with two ref blocks (could just be 2 records if you
use a small blocksize and a large refname) and two log blocks.  Merge
that with an empty table, and see if the compacted result is what you
got in. Loop over N to get coverage for all error paths.

On Wed, Jan 3, 2024 at 7:22 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> In order to compact multiple stacks we iterate through the merged ref
> and log records. When there is any error either when reading the records
> from the old merged table or when writing the records to the new table
> then we break out of the respective loops. When breaking out of the loop
> for the ref records though the error code will be overwritten, which may
> cause us to inadvertently skip over bad ref records. In the worst case,
> this can lead to a compacted stack that is missing records.
>
> Fix the code by using `goto done` instead so that any potential error
> codes are properly returned to the caller.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  reftable/stack.c | 20 ++++++++------------
>  1 file changed, 8 insertions(+), 12 deletions(-)
>
> diff --git a/reftable/stack.c b/reftable/stack.c
> index 16bab82063..8729508dc3 100644
> --- a/reftable/stack.c
> +++ b/reftable/stack.c
> @@ -801,18 +801,16 @@ static int stack_write_compact(struct reftable_stack *st,
>                         err = 0;
>                         break;
>                 }
> -               if (err < 0) {
> -                       break;
> -               }
> +               if (err < 0)
> +                       goto done;
>
>                 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
>                         continue;
>                 }
>
>                 err = reftable_writer_add_ref(wr, &ref);
> -               if (err < 0) {
> -                       break;
> -               }
> +               if (err < 0)
> +                       goto done;
>                 entries++;
>         }
>         reftable_iterator_destroy(&it);
> @@ -827,9 +825,8 @@ static int stack_write_compact(struct reftable_stack *st,
>                         err = 0;
>                         break;
>                 }
> -               if (err < 0) {
> -                       break;
> -               }
> +               if (err < 0)
> +                       goto done;
>                 if (first == 0 && reftable_log_record_is_deletion(&log)) {
>                         continue;
>                 }
> @@ -845,9 +842,8 @@ static int stack_write_compact(struct reftable_stack *st,
>                 }
>
>                 err = reftable_writer_add_log(wr, &log);
> -               if (err < 0) {
> -                       break;
> -               }
> +               if (err < 0)
> +                       goto done;
>                 entries++;
>         }
>
> --
> 2.43.GIT
>


-- 
Han-Wen Nienhuys - hanwenn@gmail.com - http://www.xs4all.nl/~hanwen

^ permalink raw reply

* Re: [PATCH 07/12] fsmonitor: refactor untracked-cache invalidation
From: Junio C Hamano @ 2024-02-14 16:46 UTC (permalink / raw)
  To: Jeff Hostetler via GitGitGadget; +Cc: git, Jeff Hostetler
In-Reply-To: <1df4019931c29824b174defb75e09823d604219e.1707857541.git.gitgitgadget@gmail.com>

"Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Jeff Hostetler <jeffhostetler@github.com>
>
> Signed-off-by: Jeff Hostetler <jeffhostetler@github.com>
> ---
>  fsmonitor.c | 38 ++++++++++++++++++++++++++------------
>  1 file changed, 26 insertions(+), 12 deletions(-)

Sorry, but the proposed commit log is way lacking for this
particular step.  Readers have already understood, after reading
steps like [04/12] and [05/12], that you use the verb "refactor" in
its usual sense, i.e. reorganize the code around without changing
behaviour in order to enhance readability and to make it easier for
code reuse in future steps, and these two steps did exactly that:
helper functions are split out of larger functions, presumably
either to allow adding new callers to the helpers, or to make the
result of adding more code to the caller easier to follow [*].

However, the changes in this step look vastly different, and it is
not even clear if this change intends to keep the behaviour before
and after it the same, or if it does, how they are the same.

I can sort-of see that the original code made a call to
untracked_cache_invalidate_path() at the very end of the
fsmonitor_refresh_callback(), but the updated code no longer does
so.  Why?  Is it because it is the root cause of an unstated bug
that we don't do so until the end in the current code?  Is it
because the order does not matter (how and why?) and the resulting
code becomes better (how?  simpler to follow? more performant?
avoids duplicated work?  something else)?

It does not help to call a new helper function with a cryptic "my_"
name, either.

Please try again?  Thanks.

[Footnote] 

 * These two are vastly different goals, and there may be other
   reasons why you are doing such refactoring.  It would have been
   nicer if such a preliminary refactoring steps had explained what
   the intended course of evolution for the code involved in the
   refactoring is.

>
> diff --git a/fsmonitor.c b/fsmonitor.c
> index 754fe20cfd0..14585b6c516 100644
> --- a/fsmonitor.c
> +++ b/fsmonitor.c
> @@ -183,11 +183,35 @@ static int query_fsmonitor_hook(struct repository *r,
>  	return result;
>  }
>  
> +/*
> + * Invalidate the untracked cache for the given pathname.  Copy the
> + * buffer to a proper null-terminated string (since the untracked
> + * cache code does not use (buf, len) style argument).  Also strip any
> + * trailing slash.
> + */
> +static void my_invalidate_untracked_cache(
> +	struct index_state *istate, const char *name, int len)
> +{
> +	struct strbuf work_path = STRBUF_INIT;
> +
> +	if (!len)
> +		return;
> +
> +	if (name[len-1] == '/')
> +		len--;
> +
> +	strbuf_add(&work_path, name, len);
> +	untracked_cache_invalidate_path(istate, work_path.buf, 0);
> +	strbuf_release(&work_path);
> +}
> +
>  static void fsmonitor_refresh_callback_unqualified(
>  	struct index_state *istate, const char *name, int len, int pos)
>  {
>  	int i;
>  
> +	my_invalidate_untracked_cache(istate, name, len);
> +
>  	if (pos >= 0) {
>  		/*
>  		 * We have an exact match for this path and can just
> @@ -253,6 +277,8 @@ static int fsmonitor_refresh_callback_slash(
>  	int i;
>  	int nr_in_cone = 0;
>  
> +	my_invalidate_untracked_cache(istate, name, len);
> +
>  	if (pos < 0)
>  		pos = -pos - 1;
>  
> @@ -278,21 +304,9 @@ static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
>  
>  	if (name[len - 1] == '/') {
>  		fsmonitor_refresh_callback_slash(istate, name, len, pos);
> -
> -		/*
> -		 * We need to remove the traling "/" from the path
> -		 * for the untracked cache.
> -		 */
> -		name[len - 1] = '\0';
>  	} else {
>  		fsmonitor_refresh_callback_unqualified(istate, name, len, pos);
>  	}
> -
> -	/*
> -	 * Mark the untracked cache dirty even if it wasn't found in the index
> -	 * as it could be a new untracked file.
> -	 */
> -	untracked_cache_invalidate_path(istate, name, 0);
>  }
>  
>  /*

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox