* [PATCH] rev-list: print missing object type with --missing=print-type
@ 2025-01-08 3:40 Justin Tobler
2025-01-08 10:08 ` Christian Couder
` (4 more replies)
0 siblings, 5 replies; 37+ messages in thread
From: Justin Tobler @ 2025-01-08 3:40 UTC (permalink / raw)
To: git; +Cc: karthik.188, Justin Tobler
Handling of missing objects encounted by git-rev-list(1) can be
configured with the `--missing=<action>` option and specifying the
desired action. Of the available missing actions, none provide a way to
print additional information about the missing object such as its type.
Add a new missing action called `print-type`. Similar to `print`, this
action prints a list of missing objects but also includes the object
type if available in the form: `?<oid> [type]`.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Instead of adding an additional missing action type for the explicit
purpose of printing type info, I also considered a separate option
(something like `--object-types`, or maybe a `--missing-format`) that
could be used in combination with the `--missing=print` option to
achieve the same result. The main intent is for the missing object type
and I wasn't sure if type info would be useful in the general case so I
opted to choose the former approach.
Thanks,
-Justin
---
Documentation/rev-list-options.txt | 3 ++
builtin/rev-list.c | 74 +++++++++++++++++++++++++-----
t/t6022-rev-list-missing.sh | 13 +++++-
3 files changed, 77 insertions(+), 13 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 459e5a02f5..277a0b645e 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1024,6 +1024,9 @@ Unexpected missing objects will raise an error.
The form '--missing=print' is like 'allow-any', but will also print a
list of the missing objects. Object IDs are prefixed with a ``?'' character.
+
+The form '--missing=print-type' is like 'print', but will also print the
+missing object type information if available in the form `?<oid> [type]`.
++
If some tips passed to the traversal are missing, they will be
considered as missing too, and the traversal will ignore them. In case
we cannot get their Object ID though, an error will be raised.
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 3196da7b2d..ee473da52c 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -22,6 +22,7 @@
#include "progress.h"
#include "reflog-walk.h"
#include "oidset.h"
+#include "oidmap.h"
#include "packfile.h"
static const char rev_list_usage[] =
@@ -73,11 +74,16 @@ static unsigned progress_counter;
static struct oidset omitted_objects;
static int arg_print_omitted; /* print objects omitted by filter */
-static struct oidset missing_objects;
+struct missing_objects_map_entry {
+ struct oidmap_entry entry;
+ unsigned type : TYPE_BITS;
+};
+static struct oidmap missing_objects;
enum missing_action {
MA_ERROR = 0, /* fail if any missing objects are encountered */
MA_ALLOW_ANY, /* silently allow ALL missing objects */
MA_PRINT, /* print ALL missing objects in special section */
+ MA_PRINT_TYPE, /* same as MA_PRINT but includes available type info */
MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
};
static enum missing_action arg_missing_action;
@@ -103,6 +109,8 @@ static off_t get_object_disk_usage(struct object *obj)
static inline void finish_object__ma(struct object *obj)
{
+ struct missing_objects_map_entry *entry, *old;
+
/*
* Whether or not we try to dynamically fetch missing objects
* from the server, we currently DO NOT have the object. We
@@ -119,7 +127,12 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_PRINT:
- oidset_insert(&missing_objects, &obj->oid);
+ case MA_PRINT_TYPE:
+ CALLOC_ARRAY(entry, 1);
+ entry->entry.oid = obj->oid;
+ entry->type = obj->type;
+ old = oidmap_put(&missing_objects, entry);
+ free(old);
return;
case MA_ALLOW_PROMISOR:
@@ -414,6 +427,12 @@ static inline int parse_missing_action_value(const char *value)
return 1;
}
+ if (!strcmp(value, "print-type")) {
+ arg_missing_action = MA_PRINT_TYPE;
+ fetch_if_missing = 0;
+ return 1;
+ }
+
if (!strcmp(value, "allow-promisor")) {
arg_missing_action = MA_ALLOW_PROMISOR;
fetch_if_missing = 0;
@@ -781,10 +800,26 @@ int cmd_rev_list(int argc,
if (arg_print_omitted)
oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
- if (arg_missing_action == MA_PRINT) {
- oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
- /* Add missing tips */
- oidset_insert_from_set(&missing_objects, &revs.missing_commits);
+ if (arg_missing_action == MA_PRINT ||
+ arg_missing_action == MA_PRINT_TYPE) {
+ struct oidset_iter iter;
+ struct object_id *oid;
+
+ oidmap_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ oidset_iter_init(&revs.missing_commits, &iter);
+
+ /*
+ * Revisions pointing to missing objects lack the context
+ * required to determine object type.
+ */
+ while ((oid = oidset_iter_next(&iter))) {
+ struct missing_objects_map_entry *entry;
+
+ CALLOC_ARRAY(entry, 1);
+ oidcpy(&entry->entry.oid, oid);
+ oidmap_put(&missing_objects, entry);
+ }
+
oidset_clear(&revs.missing_commits);
}
@@ -801,12 +836,27 @@ int cmd_rev_list(int argc,
oidset_clear(&omitted_objects);
}
if (arg_missing_action == MA_PRINT) {
- struct oidset_iter iter;
- struct object_id *oid;
- oidset_iter_init(&missing_objects, &iter);
- while ((oid = oidset_iter_next(&iter)))
- printf("?%s\n", oid_to_hex(oid));
- oidset_clear(&missing_objects);
+ struct missing_objects_map_entry *entry;
+ struct oidmap_iter iter;
+
+ oidmap_iter_init(&missing_objects, &iter);
+
+ while ((entry = oidmap_iter_next(&iter)))
+ printf("?%s\n", oid_to_hex(&entry->entry.oid));
+
+ oidmap_free(&missing_objects, true);
+ }
+ if (arg_missing_action == MA_PRINT_TYPE) {
+ struct missing_objects_map_entry *entry;
+ struct oidmap_iter iter;
+
+ oidmap_iter_init(&missing_objects, &iter);
+
+ while ((entry = oidmap_iter_next(&iter)))
+ printf("?%s %s\n", oid_to_hex(&entry->entry.oid),
+ entry->type ? type_name(entry->type) : "");
+
+ oidmap_free(&missing_objects, true);
}
stop_progress(&progress);
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 7553a9cca2..1606082d4b 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -37,7 +37,7 @@ done
for obj in "HEAD~1" "HEAD~1^{tree}" "HEAD:1.t"
do
- for action in "allow-any" "print"
+ for action in "allow-any" "print" "print-type"
do
test_expect_success "rev-list --missing=$action with missing $obj" '
oid="$(git rev-parse $obj)" &&
@@ -48,6 +48,13 @@ do
git rev-list --objects --no-object-names \
HEAD ^$obj >expect.raw &&
+ # When the action is to print the type, we should also
+ # record the expected type of the missing object.
+ if test "$action" = "print-type"
+ then
+ type="$(git cat-file -t $obj)"
+ fi &&
+
# Blobs are shared by all commits, so even though a commit/tree
# might be skipped, its blob must be accounted for.
if test $obj != "HEAD:1.t"
@@ -71,6 +78,10 @@ do
grep ?$oid actual.raw &&
echo ?$oid >>expect.raw
;;
+ print-type)
+ grep "?$oid $type" actual.raw &&
+ echo "?$oid $type" >>expect.raw
+ ;;
esac &&
sort actual.raw >actual &&
base-commit: b74ff38af58464688b211140b90ec90598d340c6
--
2.47.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [PATCH] rev-list: print missing object type with --missing=print-type
2025-01-08 3:40 [PATCH] rev-list: print missing object type with --missing=print-type Justin Tobler
@ 2025-01-08 10:08 ` Christian Couder
2025-01-08 22:28 ` Justin Tobler
2025-01-08 15:17 ` Junio C Hamano
` (3 subsequent siblings)
4 siblings, 1 reply; 37+ messages in thread
From: Christian Couder @ 2025-01-08 10:08 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, karthik.188
On Wed, Jan 8, 2025 at 4:43 AM Justin Tobler <jltobler@gmail.com> wrote:
>
> Handling of missing objects encounted by git-rev-list(1) can be
> configured with the `--missing=<action>` option and specifying the
> desired action. Of the available missing actions, none provide a way to
> print additional information about the missing object such as its type.
What kind of additional information could we also print except for the type?
> Add a new missing action called `print-type`. Similar to `print`, this
> action prints a list of missing objects but also includes the object
> type if available in the form: `?<oid> [type]`.
>
> Signed-off-by: Justin Tobler <jltobler@gmail.com>
> ---
> Instead of adding an additional missing action type for the explicit
> purpose of printing type info, I also considered a separate option
> (something like `--object-types`, or maybe a `--missing-format`) that
> could be used in combination with the `--missing=print` option to
> achieve the same result. The main intent is for the missing object type
> and I wasn't sure if type info would be useful in the general case so I
> opted to choose the former approach.
If there are many other kinds of information we could also print, then
maybe a `--missing-format=<format>` option would make more sense
rather than adding `print-X`, `print-Y`, `print-X-Y`, etc. Otherwise,
yeah, I would say that `print-type` makes sense.
> @@ -103,6 +109,8 @@ static off_t get_object_disk_usage(struct object *obj)
>
> static inline void finish_object__ma(struct object *obj)
> {
> + struct missing_objects_map_entry *entry, *old;
> +
> /*
> * Whether or not we try to dynamically fetch missing objects
> * from the server, we currently DO NOT have the object. We
> @@ -119,7 +127,12 @@ static inline void finish_object__ma(struct object *obj)
> return;
>
> case MA_PRINT:
> - oidset_insert(&missing_objects, &obj->oid);
> + case MA_PRINT_TYPE:
> + CALLOC_ARRAY(entry, 1);
> + entry->entry.oid = obj->oid;
> + entry->type = obj->type;
> + old = oidmap_put(&missing_objects, entry);
> + free(old);
> return;
Maybe a function like:
static void add_missing_object_entry(struct object_id *oid, unsigned int type)
{
struct missing_objects_map_entry *entry, *old;
CALLOC_ARRAY(entry, 1);
entry->entry.oid = *oid;
entry->type = type;
old = oidmap_put(&missing_objects, entry);
free(old);
}
and then:
case MA_PRINT:
case MA_PRINT_TYPE:
add_missing_object_entry(&obj->oid, obj->type);
return;
could help keep finish_object__ma() (which is inlined) short and also
avoid some code duplication in the code below.
> case MA_ALLOW_PROMISOR:
> @@ -414,6 +427,12 @@ static inline int parse_missing_action_value(const char *value)
> return 1;
> }
>
> + if (!strcmp(value, "print-type")) {
> + arg_missing_action = MA_PRINT_TYPE;
> + fetch_if_missing = 0;
> + return 1;
> + }
> +
> if (!strcmp(value, "allow-promisor")) {
> arg_missing_action = MA_ALLOW_PROMISOR;
> fetch_if_missing = 0;
> @@ -781,10 +800,26 @@ int cmd_rev_list(int argc,
>
> if (arg_print_omitted)
> oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
> - if (arg_missing_action == MA_PRINT) {
> - oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
> - /* Add missing tips */
> - oidset_insert_from_set(&missing_objects, &revs.missing_commits);
> + if (arg_missing_action == MA_PRINT ||
> + arg_missing_action == MA_PRINT_TYPE) {
> + struct oidset_iter iter;
> + struct object_id *oid;
> +
> + oidmap_init(&missing_objects, DEFAULT_OIDSET_SIZE);
> + oidset_iter_init(&revs.missing_commits, &iter);
> +
> + /*
> + * Revisions pointing to missing objects lack the context
> + * required to determine object type.
> + */
> + while ((oid = oidset_iter_next(&iter))) {
> + struct missing_objects_map_entry *entry;
> +
> + CALLOC_ARRAY(entry, 1);
> + oidcpy(&entry->entry.oid, oid);
> + oidmap_put(&missing_objects, entry);
> + }
Using the function suggested above this could be:
/*
* Revisions pointing to missing objects lack the context
* required to determine object type.
*/
while ((oid = oidset_iter_next(&iter)))
add_missing_object_entry(oid, 0);
> +
> oidset_clear(&revs.missing_commits);
> }
>
> @@ -801,12 +836,27 @@ int cmd_rev_list(int argc,
> oidset_clear(&omitted_objects);
> }
> if (arg_missing_action == MA_PRINT) {
> - struct oidset_iter iter;
> - struct object_id *oid;
> - oidset_iter_init(&missing_objects, &iter);
> - while ((oid = oidset_iter_next(&iter)))
> - printf("?%s\n", oid_to_hex(oid));
> - oidset_clear(&missing_objects);
> + struct missing_objects_map_entry *entry;
> + struct oidmap_iter iter;
> +
> + oidmap_iter_init(&missing_objects, &iter);
> +
> + while ((entry = oidmap_iter_next(&iter)))
> + printf("?%s\n", oid_to_hex(&entry->entry.oid));
> +
> + oidmap_free(&missing_objects, true);
> + }
> + if (arg_missing_action == MA_PRINT_TYPE) {
> + struct missing_objects_map_entry *entry;
> + struct oidmap_iter iter;
> +
> + oidmap_iter_init(&missing_objects, &iter);
> +
> + while ((entry = oidmap_iter_next(&iter)))
> + printf("?%s %s\n", oid_to_hex(&entry->entry.oid),
> + entry->type ? type_name(entry->type) : "");
> +
> + oidmap_free(&missing_objects, true);
> }
Maybe a function like:
static void print_missing_objects(unsigned int with_type)
{
struct missing_objects_map_entry *entry;
struct oidmap_iter iter;
oidmap_iter_init(&missing_objects, &iter);
while ((entry = oidmap_iter_next(&iter)))
if (with_type && entry->type)
printf("?%s %s\n", oid_to_hex(&entry->entry.oid),
type_name(entry->type));
else
printf("?%s\n", oid_to_hex(&entry->entry.oid));
oidmap_free(&missing_objects, true);
}
could avoid some code duplication. It could also make the output a bit
cleaner as when there is no type, there would be no space after the
oid in the output.
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] rev-list: print missing object type with --missing=print-type
2025-01-08 3:40 [PATCH] rev-list: print missing object type with --missing=print-type Justin Tobler
2025-01-08 10:08 ` Christian Couder
@ 2025-01-08 15:17 ` Junio C Hamano
2025-01-08 22:18 ` Justin Tobler
2025-01-10 5:34 ` [PATCH v2 0/2] rev-list: print additional missing object information Justin Tobler
` (2 subsequent siblings)
4 siblings, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2025-01-08 15:17 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, karthik.188
Justin Tobler <jltobler@gmail.com> writes:
> Handling of missing objects encounted by git-rev-list(1) can be
> configured with the `--missing=<action>` option and specifying the
> desired action. Of the available missing actions, none provide a way to
> print additional information about the missing object such as its type.
>
> Add a new missing action called `print-type`. Similar to `print`, this
> action prints a list of missing objects but also includes the object
> type if available in the form: `?<oid> [type]`.
This part needs to explain where the type information comes from and
what its significance is (see below for more details).
> diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
> index 459e5a02f5..277a0b645e 100644
> --- a/Documentation/rev-list-options.txt
> +++ b/Documentation/rev-list-options.txt
> @@ -1024,6 +1024,9 @@ Unexpected missing objects will raise an error.
> The form '--missing=print' is like 'allow-any', but will also print a
> list of the missing objects. Object IDs are prefixed with a ``?'' character.
> +
> +The form '--missing=print-type' is like 'print', but will also print the
> +missing object type information if available in the form `?<oid> [type]`.
> ++
The users need to be told what this "type" information really means,
as its meaning is quite different from what "git cat-file -t <oid>"
would give them. We do not have the object, so we are not learning
its type from the object itself. How much trust should the users
put in this information, for example?
That comes back to the "where does it come from" that the future
readers of "git log" and reviewers need to be told by the proposed
log message. Knowing the internals, I know you'd be getting it from
the "containing" objects, e.g., an object name that was found on the
"parent" object header field of another commit, which is _expected_
to be a commit, or an object name that was found in a tree entry
whose mode bits were 100644, which is _expected_ to be a blob, etc.
There are other places that you _could_ glean information about
(possibly missing) objects. An object that is found during
"rev-list --objects" traversal (which is the topic of this patch
after all) but turned out to be missing may not just have an
expected type (because it was found in a tree object that we
successfully read) but also the full path to the object in the
top-level tree, for example.
In modern Git, there are even more places that you may be able to
use, like commit-graph that not just hints the object itself is a
commit, but what its parents are and when the commit was created.
Note that I am not suggesting to implement more code to learn "type"
information from more places than the current patch is doing. At
least not in this iteration of the patch. What I am getting at is
that it would help us to avoid unnecessarily limiting ourselves by
stressing on "type" too much if we at least imagine what the
possible sources of these extra pieces of information are and what
they could provide.
As I suspect that we would want to leave the door open for us to
extend this later, I would perhaps suggest an output format format
like:
?<object name> [<token>=<value>]...
where <token> tells what kind of extra information it is. I expect
that the initial implementation only knows about "type" as the
<token>. For future extensibility, we only need to say that under
the syntax:
(1) How multiple attributes are shown?
(2) How would a <value> with SP or LF in it is represented?
My suggestion is to have multiple <token>=<value> on the same line,
with a SP in between, and problematic bytes in <value> are quoted,
using cquote(). i.e. a <token>=<value> whose <value> part does not
begin with a double-quote ends at the first SP after it, otherwise
<value> is taken as a C-quoted string inside a pair of double-quote.
If you are adventurous, I would not mind seeing "path" implemented
as another token, since that would be fairly easily obtainable, but
it does not have to be in the initial attempt.
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] rev-list: print missing object type with --missing=print-type
2025-01-08 15:17 ` Junio C Hamano
@ 2025-01-08 22:18 ` Justin Tobler
2025-01-08 22:43 ` Junio C Hamano
0 siblings, 1 reply; 37+ messages in thread
From: Justin Tobler @ 2025-01-08 22:18 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, karthik.188
On 25/01/08 07:17AM, Junio C Hamano wrote:
> The users need to be told what this "type" information really means,
> as its meaning is quite different from what "git cat-file -t <oid>"
> would give them. We do not have the object, so we are not learning
> its type from the object itself. How much trust should the users
> put in this information, for example?
>
> That comes back to the "where does it come from" that the future
> readers of "git log" and reviewers need to be told by the proposed
> log message. Knowing the internals, I know you'd be getting it from
> the "containing" objects, e.g., an object name that was found on the
> "parent" object header field of another commit, which is _expected_
> to be a commit, or an object name that was found in a tree entry
> whose mode bits were 100644, which is _expected_ to be a blob, etc.
I'll update the log message in the next version to explain how
information about a missing object gets inferred. As you explained, this
is relying on the containing object to figure this out. This is why for
some missing objects it may not be possible to infer the type. For
example, if a missing object is only reffered to by a reference, there
is not a containing object that can be used.
> There are other places that you _could_ glean information about
> (possibly missing) objects. An object that is found during
> "rev-list --objects" traversal (which is the topic of this patch
> after all) but turned out to be missing may not just have an
> expected type (because it was found in a tree object that we
> successfully read) but also the full path to the object in the
> top-level tree, for example.
>
> In modern Git, there are even more places that you may be able to
> use, like commit-graph that not just hints the object itself is a
> commit, but what its parents are and when the commit was created.
>
> Note that I am not suggesting to implement more code to learn "type"
> information from more places than the current patch is doing. At
> least not in this iteration of the patch. What I am getting at is
> that it would help us to avoid unnecessarily limiting ourselves by
> stressing on "type" too much if we at least imagine what the
> possible sources of these extra pieces of information are and what
> they could provide.
>
> As I suspect that we would want to leave the door open for us to
> extend this later, I would perhaps suggest an output format format
> like:
>
> ?<object name> [<token>=<value>]...
I think this is a great idea. To select which attributes get printed
with the missing object we could add an option. Something like:
$ git rev-list --objects --missing=print \
--missing-attr=path --missing-attr=type
I like the idea of also adding a path attribute, but this raises a
couple of questions. The way `--missing=print` currently works is that
it prints the unique set of missing object IDs. A missing object could
possibly be referenced by multiple trees and thus have multiple valid
paths. There are a couple of different ways this situation could be
handled:
- We could record each of the encounted paths for an object and print
out each. Something like: `?<oid> path=foo path=bar`
- Historically, `--missing=print` would only ever print a single
instance of the OID, but we could print a missing object with
multiple paths each on a separate line. Something like this:
?e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 path=foo
?e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 path=bar
- We could keep it simple a just report a single path per missing
object. It doesn't capture the whole picture, but it does provide
some insight into the missing object.
Since this missing object type is also inferred from the containing
object, in theory different containing objects could indicate different
types for the missing object. I'm not sure though if this is a scenario
worth accounting for. Maybe it would be fine to rely on a single
containing object to provide the type and assume it is consistent across
the others.
For paths, I'm currently leaning towards having each identified path
printed out as a separate attribute on the same line. For types, I'm
thinking we can just print a single type and assume it is consistent.
I'm certainly open to suggestions though. :)
> where <token> tells what kind of extra information it is. I expect
> that the initial implementation only knows about "type" as the
> <token>. For future extensibility, we only need to say that under
> the syntax:
>
> (1) How multiple attributes are shown?
> (2) How would a <value> with SP or LF in it is represented?
>
> My suggestion is to have multiple <token>=<value> on the same line,
> with a SP in between, and problematic bytes in <value> are quoted,
> using cquote(). i.e. a <token>=<value> whose <value> part does not
> begin with a double-quote ends at the first SP after it, otherwise
> <value> is taken as a C-quoted string inside a pair of double-quote.
Ok, so using a path value as an example, if it contains a SP, or a
special character that would be handled by `quote_c_style()` it should
be wrapped in double-quotes to indicate that it is all part of the
single attribute. That makes sense.
> If you are adventurous, I would not mind seeing "path" implemented
> as another token, since that would be fairly easily obtainable, but
> it does not have to be in the initial attempt.
Handling paths appears pretty straight-forward to add and seems like a
good idea. In my next version I'll also add support for a path
attribute. Thanks for the feedback.
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] rev-list: print missing object type with --missing=print-type
2025-01-08 10:08 ` Christian Couder
@ 2025-01-08 22:28 ` Justin Tobler
0 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-01-08 22:28 UTC (permalink / raw)
To: Christian Couder; +Cc: git, karthik.188
On 25/01/08 11:08AM, Christian Couder wrote:
> On Wed, Jan 8, 2025 at 4:43 AM Justin Tobler <jltobler@gmail.com> wrote:
> >
> > Handling of missing objects encounted by git-rev-list(1) can be
> > configured with the `--missing=<action>` option and specifying the
> > desired action. Of the available missing actions, none provide a way to
> > print additional information about the missing object such as its type.
>
> What kind of additional information could we also print except for the type?
In [1], Junio suggested path information as another option.
>
> > Add a new missing action called `print-type`. Similar to `print`, this
> > action prints a list of missing objects but also includes the object
> > type if available in the form: `?<oid> [type]`.
> >
> > Signed-off-by: Justin Tobler <jltobler@gmail.com>
> > ---
> > Instead of adding an additional missing action type for the explicit
> > purpose of printing type info, I also considered a separate option
> > (something like `--object-types`, or maybe a `--missing-format`) that
> > could be used in combination with the `--missing=print` option to
> > achieve the same result. The main intent is for the missing object type
> > and I wasn't sure if type info would be useful in the general case so I
> > opted to choose the former approach.
>
> If there are many other kinds of information we could also print, then
> maybe a `--missing-format=<format>` option would make more sense
> rather than adding `print-X`, `print-Y`, `print-X-Y`, etc. Otherwise,
> yeah, I would say that `print-type` makes sense.
Since there is some other information that we may also want to print, I
think it makes sense to follow a more generic approach now. I'm
currently thinking could continue to use `--missing=print`, but add a
`--missing-attr` option to specify additional information we want to
print. Something like:
$ git rev-list --objects --missing=print \
--missing-attr=type --missing-attr=type
> > @@ -103,6 +109,8 @@ static off_t get_object_disk_usage(struct object *obj)
> >
> > static inline void finish_object__ma(struct object *obj)
> > {
> > + struct missing_objects_map_entry *entry, *old;
> > +
> > /*
> > * Whether or not we try to dynamically fetch missing objects
> > * from the server, we currently DO NOT have the object. We
> > @@ -119,7 +127,12 @@ static inline void finish_object__ma(struct object *obj)
> > return;
> >
> > case MA_PRINT:
> > - oidset_insert(&missing_objects, &obj->oid);
> > + case MA_PRINT_TYPE:
> > + CALLOC_ARRAY(entry, 1);
> > + entry->entry.oid = obj->oid;
> > + entry->type = obj->type;
> > + old = oidmap_put(&missing_objects, entry);
> > + free(old);
> > return;
>
> Maybe a function like:
>
> static void add_missing_object_entry(struct object_id *oid, unsigned int type)
> {
> struct missing_objects_map_entry *entry, *old;
>
> CALLOC_ARRAY(entry, 1);
> entry->entry.oid = *oid;
> entry->type = type;
> old = oidmap_put(&missing_objects, entry);
> free(old);
> }
>
> and then:
>
> case MA_PRINT:
> case MA_PRINT_TYPE:
> add_missing_object_entry(&obj->oid, obj->type);
> return;
>
> could help keep finish_object__ma() (which is inlined) short and also
> avoid some code duplication in the code below.
Good idea, I'll factor this out in the next version.
>
> > case MA_ALLOW_PROMISOR:
> > @@ -414,6 +427,12 @@ static inline int parse_missing_action_value(const char *value)
> > return 1;
> > }
> >
> > + if (!strcmp(value, "print-type")) {
> > + arg_missing_action = MA_PRINT_TYPE;
> > + fetch_if_missing = 0;
> > + return 1;
> > + }
> > +
> > if (!strcmp(value, "allow-promisor")) {
> > arg_missing_action = MA_ALLOW_PROMISOR;
> > fetch_if_missing = 0;
> > @@ -781,10 +800,26 @@ int cmd_rev_list(int argc,
> >
> > if (arg_print_omitted)
> > oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
> > - if (arg_missing_action == MA_PRINT) {
> > - oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
> > - /* Add missing tips */
> > - oidset_insert_from_set(&missing_objects, &revs.missing_commits);
> > + if (arg_missing_action == MA_PRINT ||
> > + arg_missing_action == MA_PRINT_TYPE) {
> > + struct oidset_iter iter;
> > + struct object_id *oid;
> > +
> > + oidmap_init(&missing_objects, DEFAULT_OIDSET_SIZE);
> > + oidset_iter_init(&revs.missing_commits, &iter);
> > +
> > + /*
> > + * Revisions pointing to missing objects lack the context
> > + * required to determine object type.
> > + */
> > + while ((oid = oidset_iter_next(&iter))) {
> > + struct missing_objects_map_entry *entry;
> > +
> > + CALLOC_ARRAY(entry, 1);
> > + oidcpy(&entry->entry.oid, oid);
> > + oidmap_put(&missing_objects, entry);
> > + }
>
> Using the function suggested above this could be:
>
> /*
> * Revisions pointing to missing objects lack the context
> * required to determine object type.
> */
> while ((oid = oidset_iter_next(&iter)))
> add_missing_object_entry(oid, 0);
>
> > +
> > oidset_clear(&revs.missing_commits);
> > }
> >
> > @@ -801,12 +836,27 @@ int cmd_rev_list(int argc,
> > oidset_clear(&omitted_objects);
> > }
> > if (arg_missing_action == MA_PRINT) {
> > - struct oidset_iter iter;
> > - struct object_id *oid;
> > - oidset_iter_init(&missing_objects, &iter);
> > - while ((oid = oidset_iter_next(&iter)))
> > - printf("?%s\n", oid_to_hex(oid));
> > - oidset_clear(&missing_objects);
> > + struct missing_objects_map_entry *entry;
> > + struct oidmap_iter iter;
> > +
> > + oidmap_iter_init(&missing_objects, &iter);
> > +
> > + while ((entry = oidmap_iter_next(&iter)))
> > + printf("?%s\n", oid_to_hex(&entry->entry.oid));
> > +
> > + oidmap_free(&missing_objects, true);
> > + }
> > + if (arg_missing_action == MA_PRINT_TYPE) {
> > + struct missing_objects_map_entry *entry;
> > + struct oidmap_iter iter;
> > +
> > + oidmap_iter_init(&missing_objects, &iter);
> > +
> > + while ((entry = oidmap_iter_next(&iter)))
> > + printf("?%s %s\n", oid_to_hex(&entry->entry.oid),
> > + entry->type ? type_name(entry->type) : "");
> > +
> > + oidmap_free(&missing_objects, true);
> > }
>
> Maybe a function like:
>
> static void print_missing_objects(unsigned int with_type)
> {
> struct missing_objects_map_entry *entry;
> struct oidmap_iter iter;
>
> oidmap_iter_init(&missing_objects, &iter);
>
> while ((entry = oidmap_iter_next(&iter)))
> if (with_type && entry->type)
> printf("?%s %s\n", oid_to_hex(&entry->entry.oid),
> type_name(entry->type));
> else
> printf("?%s\n", oid_to_hex(&entry->entry.oid));
>
> oidmap_free(&missing_objects, true);
> }
>
> could avoid some code duplication. It could also make the output a bit
> cleaner as when there is no type, there would be no space after the
> oid in the output.
I agree with this suggestion and will also factor this out in the next
version. Thanks!
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] rev-list: print missing object type with --missing=print-type
2025-01-08 22:18 ` Justin Tobler
@ 2025-01-08 22:43 ` Junio C Hamano
2025-01-08 23:13 ` Justin Tobler
0 siblings, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2025-01-08 22:43 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, karthik.188
Justin Tobler <jltobler@gmail.com> writes:
>> As I suspect that we would want to leave the door open for us to
>> extend this later, I would perhaps suggest an output format format
>> like:
>>
>> ?<object name> [<token>=<value>]...
>
> I think this is a great idea. To select which attributes get printed
> with the missing object we could add an option. Something like:
>
> $ git rev-list --objects --missing=print \
> --missing-attr=path --missing-attr=type
My knee-jerk reaction is that this is over-engineered; wouldn't it
be possible for us to simply dump everything we know about the
object, and let the receiving end pick and choose?
> I like the idea of also adding a path attribute, but this raises a
> couple of questions. The way `--missing=print` currently works is that
> it prints the unique set of missing object IDs. A missing object could
> possibly be referenced by multiple trees and thus have multiple valid
> paths.
That is not an issue at all, I think. "rev-list --objects" that
shows objects that are not missing already has the same issue, and
the solution is "show the path when the object gets shown for the
first time". Even when the same object is encountered during the
history-and-then-tree walk later, that object is simply not listed.
The code path that collects "I thought this blob should exist
because a tree wants to see it at this path, but the repository is
corrupt and I cannot see it there" into the missing object table
with attributes should do the same. If the table does not yet have
the object, record the attributes (like "expected type", "path at
which the object was found") when inserting the object into the
table for the first time. If you have a missing object and the
table already has it recorded there, don't do anything extra.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] rev-list: print missing object type with --missing=print-type
2025-01-08 22:43 ` Junio C Hamano
@ 2025-01-08 23:13 ` Justin Tobler
0 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-01-08 23:13 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, karthik.188
On 25/01/08 02:43PM, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:
>
> >> As I suspect that we would want to leave the door open for us to
> >> extend this later, I would perhaps suggest an output format format
> >> like:
> >>
> >> ?<object name> [<token>=<value>]...
> >
> > I think this is a great idea. To select which attributes get printed
> > with the missing object we could add an option. Something like:
> >
> > $ git rev-list --objects --missing=print \
> > --missing-attr=path --missing-attr=type
>
> My knee-jerk reaction is that this is over-engineered; wouldn't it
> be possible for us to simply dump everything we know about the
> object, and let the receiving end pick and choose?
I think that should also be fine and much simpler. We may not want to
effect the existing output of `--missing=print` though, so we could have
a simple boolean option like `--missing-attr` or just add a new missing
action type like `--missing=print-attr`. When enabled it would just
print all the identified attributes.
If we don't care though, we could keep it very simple and just add all
the information whenever `--missing=print` is set.
> > I like the idea of also adding a path attribute, but this raises a
> > couple of questions. The way `--missing=print` currently works is that
> > it prints the unique set of missing object IDs. A missing object could
> > possibly be referenced by multiple trees and thus have multiple valid
> > paths.
>
> That is not an issue at all, I think. "rev-list --objects" that
> shows objects that are not missing already has the same issue, and
> the solution is "show the path when the object gets shown for the
> first time". Even when the same object is encountered during the
> history-and-then-tree walk later, that object is simply not listed.
>
> The code path that collects "I thought this blob should exist
> because a tree wants to see it at this path, but the repository is
> corrupt and I cannot see it there" into the missing object table
> with attributes should do the same. If the table does not yet have
> the object, record the attributes (like "expected type", "path at
> which the object was found") when inserting the object into the
> table for the first time. If you have a missing object and the
> table already has it recorded there, don't do anything extra.
Good to know! That makes things a bit simpler. I'll follow this approach
and only add the missing object to the table if an object with the same
OID is not already recorded.
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 0/2] rev-list: print additional missing object information
2025-01-08 3:40 [PATCH] rev-list: print missing object type with --missing=print-type Justin Tobler
2025-01-08 10:08 ` Christian Couder
2025-01-08 15:17 ` Junio C Hamano
@ 2025-01-10 5:34 ` Justin Tobler
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
2025-01-10 5:34 ` [PATCH v2 1/2] rev-list: add --missing-info to print missing object path Justin Tobler
2025-01-10 5:34 ` [PATCH v2 2/2] rev-list: extend --missing-info to print missing object type Justin Tobler
4 siblings, 1 reply; 37+ messages in thread
From: Justin Tobler @ 2025-01-10 5:34 UTC (permalink / raw)
To: git; +Cc: karthik.188, christian.couder, Justin Tobler
Greetings,
It is possible to configure git-rev-list(1) to print the OID of missing
objects by setting the `--missing=print` option. While it is useful
knowing about these objects, it would be nice to have even more context
about the objects that are missing. Luckily, from an object containing
the missing object, it is possible to infer additional information the
missing object. For example, if the tree containing a missing blob still
exists, the tree entry for the missing object should contain path and
type information.
This series aims to provide a git-rev-list(1) option that, when combined
with the existing `--missing=print` option, prints other potentially
interesting information about the missing object.
- Patch 1 introduces the `--missing-info` option and supports printing
the missing object path information.
- Patch 2 extends the `--missing-info` option to also print object type
information about the missing object.
Changes in V2:
- A `--missing-info` option is now used instead of an explicit
`--missing=print-type` option to print additional missing object info
in a more generalized fashion. This enables other use cases outside of
just the missing object type.
- Support for printing the full path of the missing object from the
top-level tree is also added with `--missing-info`.
Due to all rearranging/changes in this version, I opted not to include a
range-diff.
Thanks,
-Justin
Justin Tobler (2):
rev-list: add --missing-info to print missing object path
rev-list: extend --missing-info to print missing object type
Documentation/rev-list-options.txt | 15 +++++
builtin/rev-list.c | 103 ++++++++++++++++++++++++-----
t/t6022-rev-list-missing.sh | 53 +++++++++++++++
3 files changed, 156 insertions(+), 15 deletions(-)
base-commit: b74ff38af58464688b211140b90ec90598d340c6
--
2.47.1
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 1/2] rev-list: add --missing-info to print missing object path
2025-01-08 3:40 [PATCH] rev-list: print missing object type with --missing=print-type Justin Tobler
` (2 preceding siblings ...)
2025-01-10 5:34 ` [PATCH v2 0/2] rev-list: print additional missing object information Justin Tobler
@ 2025-01-10 5:34 ` Justin Tobler
2025-01-10 8:47 ` Christian Couder
2025-01-10 5:34 ` [PATCH v2 2/2] rev-list: extend --missing-info to print missing object type Justin Tobler
4 siblings, 1 reply; 37+ messages in thread
From: Justin Tobler @ 2025-01-10 5:34 UTC (permalink / raw)
To: git; +Cc: karthik.188, christian.couder, Justin Tobler
Missing objects identified through git-rev-list(1) can be printed by
setting the `--missing=print` option. Additional information about the
missing object, such as its path and type, may be present in its
containing object.
Add the `--missing-info` option that, when also set with
`--missing=print`, prints additional insight about the missing object
inferred from its containing object. Each line of output for a missing
object is in the form: `?<oid> [<token>=<value>]...`. This format is
kept generic so it can support multiple different attributes and be
extended in the future as needed. Each additional attribute is separated
by a SP. Any value containing a SP or special character is enclosed in
double-quotes in the C style as needed.
For now, only a missing object path attribute is implemented. It follows
the form `path=<path>` and specifies the full path to the object from
the top-level tree. In a subsequent commit, a missing object type
attribute will also be added.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/rev-list-options.txt | 12 ++++
builtin/rev-list.c | 98 +++++++++++++++++++++++++-----
t/t6022-rev-list-missing.sh | 52 ++++++++++++++++
3 files changed, 147 insertions(+), 15 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 459e5a02f5..8e363f5ae7 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1028,6 +1028,18 @@ If some tips passed to the traversal are missing, they will be
considered as missing too, and the traversal will ignore them. In case
we cannot get their Object ID though, an error will be raised.
+--missing-info::
+ Only useful with `--missing=print`; prints any additional information
+ about the missing object inferred from its containing object. The
+ information is all printed on the same line with the missing object ID
+ in the form: `?<oid> [<token>=<value>]...`. Additional attributes are
+ each separated by a SP. Any value containing a SP or special character
+ is enclosed in double-quotes in the C style as needed. Each
+ `<token>=<value>` may be one of the following:
++
+The `path=<path>` shows the path of the missing object inferred from a
+containing object.
+
--exclude-promisor-objects::
(For internal use only.) Prefilter object traversal at
promisor boundary. This is used with partial clone. This is
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 3196da7b2d..861ffdaf21 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -22,7 +22,10 @@
#include "progress.h"
#include "reflog-walk.h"
#include "oidset.h"
+#include "oidmap.h"
#include "packfile.h"
+#include "quote.h"
+#include "strbuf.h"
static const char rev_list_usage[] =
"git rev-list [<options>] <commit>... [--] [<path>...]\n"
@@ -73,7 +76,11 @@ static unsigned progress_counter;
static struct oidset omitted_objects;
static int arg_print_omitted; /* print objects omitted by filter */
-static struct oidset missing_objects;
+struct missing_objects_map_entry {
+ struct oidmap_entry entry;
+ const char *path;
+};
+static struct oidmap missing_objects;
enum missing_action {
MA_ERROR = 0, /* fail if any missing objects are encountered */
MA_ALLOW_ANY, /* silently allow ALL missing objects */
@@ -101,7 +108,47 @@ static off_t get_object_disk_usage(struct object *obj)
return size;
}
-static inline void finish_object__ma(struct object *obj)
+static void add_missing_object_entry(struct object_id *oid, const char *path)
+{
+ struct missing_objects_map_entry *entry;
+
+ if (oidmap_get(&missing_objects, oid))
+ return;
+
+ CALLOC_ARRAY(entry, 1);
+ entry->entry.oid = *oid;
+ if (path)
+ entry->path = xstrdup(path);
+ oidmap_put(&missing_objects, entry);
+}
+
+static void print_missing_object(struct missing_objects_map_entry *entry,
+ int print_missing_info)
+{
+ struct strbuf sb;
+
+ if (!print_missing_info) {
+ printf("?%s\n", oid_to_hex(&entry->entry.oid));
+ return;
+ }
+
+ strbuf_init(&sb, 0);
+ if (entry->path && *entry->path) {
+ strbuf_addstr(&sb, " path=");
+
+ if (quote_c_style(entry->path, NULL, NULL, 0))
+ quote_c_style(entry->path, &sb, NULL, 0);
+ else if (strchr(entry->path, ' '))
+ strbuf_addf(&sb, "\"%s\"", entry->path);
+ else
+ strbuf_addstr(&sb, entry->path);
+ }
+
+ printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
+ strbuf_release(&sb);
+}
+
+static inline void finish_object__ma(struct object *obj, const char *name)
{
/*
* Whether or not we try to dynamically fetch missing objects
@@ -119,7 +166,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_PRINT:
- oidset_insert(&missing_objects, &obj->oid);
+ add_missing_object_entry(&obj->oid, name);
return;
case MA_ALLOW_PROMISOR:
@@ -152,7 +199,7 @@ static void show_commit(struct commit *commit, void *data)
if (revs->do_not_die_on_missing_objects &&
oidset_contains(&revs->missing_commits, &commit->object.oid)) {
- finish_object__ma(&commit->object);
+ finish_object__ma(&commit->object, NULL);
return;
}
@@ -268,12 +315,11 @@ static void show_commit(struct commit *commit, void *data)
finish_commit(commit);
}
-static int finish_object(struct object *obj, const char *name UNUSED,
- void *cb_data)
+static int finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (oid_object_info_extended(the_repository, &obj->oid, NULL, 0) < 0) {
- finish_object__ma(obj);
+ finish_object__ma(obj, name);
return 1;
}
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
@@ -538,6 +584,7 @@ int cmd_rev_list(int argc,
int bisect_show_vars = 0;
int bisect_find_all = 0;
int use_bitmap_index = 0;
+ int print_missing_info = 0;
int filter_provided_objects = 0;
const char *show_progress = NULL;
int ret = 0;
@@ -656,6 +703,15 @@ int cmd_rev_list(int argc,
if (skip_prefix(arg, "--missing=", &arg))
continue; /* already handled above */
+ if (!strcmp(arg, "--missing-info")) {
+ if (arg_missing_action != MA_PRINT)
+ die(_("the option '%s' requires '%s'"),
+ "--missing-info", "--missing=print");
+
+ print_missing_info = 1;
+ continue;
+ }
+
if (!strcmp(arg, ("--no-object-names"))) {
arg_show_object_names = 0;
continue;
@@ -782,9 +838,16 @@ int cmd_rev_list(int argc,
if (arg_print_omitted)
oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
if (arg_missing_action == MA_PRINT) {
- oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ struct oidset_iter iter;
+ struct object_id *oid;
+
+ oidmap_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ oidset_iter_init(&revs.missing_commits, &iter);
+
/* Add missing tips */
- oidset_insert_from_set(&missing_objects, &revs.missing_commits);
+ while ((oid = oidset_iter_next(&iter)))
+ add_missing_object_entry(oid, NULL);
+
oidset_clear(&revs.missing_commits);
}
@@ -801,12 +864,17 @@ int cmd_rev_list(int argc,
oidset_clear(&omitted_objects);
}
if (arg_missing_action == MA_PRINT) {
- struct oidset_iter iter;
- struct object_id *oid;
- oidset_iter_init(&missing_objects, &iter);
- while ((oid = oidset_iter_next(&iter)))
- printf("?%s\n", oid_to_hex(oid));
- oidset_clear(&missing_objects);
+ struct missing_objects_map_entry *entry;
+ struct oidmap_iter iter;
+
+ oidmap_iter_init(&missing_objects, &iter);
+
+ while ((entry = oidmap_iter_next(&iter))) {
+ print_missing_object(entry, print_missing_info);
+ free((void *)entry->path);
+ }
+
+ oidmap_free(&missing_objects, true);
}
stop_progress(&progress);
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 7553a9cca2..2eb051028e 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -145,4 +145,56 @@ do
done
done
+for obj in "HEAD~1" "HEAD^{tree}" "HEAD:foo" "HEAD:foo/bar" "HEAD:baz baz" "HEAD:bat\""
+do
+ test_expect_success "--missing-info with missing '$obj'" '
+ test_when_finished rm -rf missing-info &&
+
+ git init missing-info &&
+ (
+ cd missing-info &&
+ git commit --allow-empty -m first &&
+
+ mkdir foo &&
+ echo bar >foo/bar &&
+ echo baz >"baz baz" &&
+ echo bat >bat\" &&
+ git add -A &&
+ git commit -m second &&
+
+ oid="$(git rev-parse "$obj")" &&
+ path=".git/objects/$(test_oid_to_path $oid)" &&
+
+ case $obj in
+ HEAD:foo)
+ path_info=" path=foo"
+ ;;
+ HEAD:foo/bar)
+ path_info=" path=foo/bar"
+ ;;
+ "HEAD:baz baz")
+ path_info=" path=\"baz baz\""
+ ;;
+ "HEAD:bat\"")
+ path_info=" path=\"bat\\\"\""
+ ;;
+ esac &&
+
+ # Before the object is made missing, we use rev-list to
+ # get the expected oids.
+ git rev-list --objects --no-object-names \
+ HEAD ^"$obj" >expect.raw &&
+ echo "?$oid$path_info" >>expect.raw &&
+
+ mv "$path" "$path.hidden" &&
+ git rev-list --objects --no-object-names \
+ --missing=print --missing-info HEAD >actual.raw &&
+
+ sort actual.raw >actual &&
+ sort expect.raw >expect &&
+ test_cmp expect actual
+ )
+ '
+done
+
test_done
--
2.47.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH v2 2/2] rev-list: extend --missing-info to print missing object type
2025-01-08 3:40 [PATCH] rev-list: print missing object type with --missing=print-type Justin Tobler
` (3 preceding siblings ...)
2025-01-10 5:34 ` [PATCH v2 1/2] rev-list: add --missing-info to print missing object path Justin Tobler
@ 2025-01-10 5:34 ` Justin Tobler
4 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-01-10 5:34 UTC (permalink / raw)
To: git; +Cc: karthik.188, christian.couder, Justin Tobler
Additional information about missing objects found in git-rev-list(1)
can be printed by specifying the `--missing=print` and `--missing-info`
options. Extend this option to also print missing object type
information inferred from its containing object. This attribute follows
the form `type=<type>` and specifies the expected object type of the
missing object.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/rev-list-options.txt | 3 +++
builtin/rev-list.c | 11 ++++++++---
t/t6022-rev-list-missing.sh | 3 ++-
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 8e363f5ae7..9722eefc79 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1039,6 +1039,9 @@ we cannot get their Object ID though, an error will be raised.
+
The `path=<path>` shows the path of the missing object inferred from a
containing object.
++
+The `type=<type>` shows the type of the missing object inferred from a
+containing object.
--exclude-promisor-objects::
(For internal use only.) Prefilter object traversal at
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 861ffdaf21..93d173039d 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -79,6 +79,7 @@ static int arg_print_omitted; /* print objects omitted by filter */
struct missing_objects_map_entry {
struct oidmap_entry entry;
const char *path;
+ unsigned type;
};
static struct oidmap missing_objects;
enum missing_action {
@@ -108,7 +109,8 @@ static off_t get_object_disk_usage(struct object *obj)
return size;
}
-static void add_missing_object_entry(struct object_id *oid, const char *path)
+static void add_missing_object_entry(struct object_id *oid, const char *path,
+ unsigned type)
{
struct missing_objects_map_entry *entry;
@@ -117,6 +119,7 @@ static void add_missing_object_entry(struct object_id *oid, const char *path)
CALLOC_ARRAY(entry, 1);
entry->entry.oid = *oid;
+ entry->type = type;
if (path)
entry->path = xstrdup(path);
oidmap_put(&missing_objects, entry);
@@ -143,6 +146,8 @@ static void print_missing_object(struct missing_objects_map_entry *entry,
else
strbuf_addstr(&sb, entry->path);
}
+ if (entry->type)
+ strbuf_addf(&sb, " type=%s", type_name(entry->type));
printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
strbuf_release(&sb);
@@ -166,7 +171,7 @@ static inline void finish_object__ma(struct object *obj, const char *name)
return;
case MA_PRINT:
- add_missing_object_entry(&obj->oid, name);
+ add_missing_object_entry(&obj->oid, name, obj->type);
return;
case MA_ALLOW_PROMISOR:
@@ -846,7 +851,7 @@ int cmd_rev_list(int argc,
/* Add missing tips */
while ((oid = oidset_iter_next(&iter)))
- add_missing_object_entry(oid, NULL);
+ add_missing_object_entry(oid, NULL, 0);
oidset_clear(&revs.missing_commits);
}
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 2eb051028e..95bee17cab 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -164,6 +164,7 @@ do
oid="$(git rev-parse "$obj")" &&
path=".git/objects/$(test_oid_to_path $oid)" &&
+ type_info=" type=$(git cat-file -t $oid)" &&
case $obj in
HEAD:foo)
@@ -184,7 +185,7 @@ do
# get the expected oids.
git rev-list --objects --no-object-names \
HEAD ^"$obj" >expect.raw &&
- echo "?$oid$path_info" >>expect.raw &&
+ echo "?$oid$path_info$type_info" >>expect.raw &&
mv "$path" "$path.hidden" &&
git rev-list --objects --no-object-names \
--
2.47.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [PATCH v2 1/2] rev-list: add --missing-info to print missing object path
2025-01-10 5:34 ` [PATCH v2 1/2] rev-list: add --missing-info to print missing object path Justin Tobler
@ 2025-01-10 8:47 ` Christian Couder
2025-01-10 15:22 ` Junio C Hamano
0 siblings, 1 reply; 37+ messages in thread
From: Christian Couder @ 2025-01-10 8:47 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, karthik.188
On Fri, Jan 10, 2025 at 6:38 AM Justin Tobler <jltobler@gmail.com> wrote:
> +--missing-info::
> + Only useful with `--missing=print`; prints any additional information
> + about the missing object inferred from its containing object. The
> + information is all printed on the same line with the missing object ID
> + in the form: `?<oid> [<token>=<value>]...`. Additional attributes are
> + each separated by a SP.
Nit: I'd rather say "The `<token>=<value>` pairs containing additional
information are separated from each other by a SP." to avoid
introducing "attributes" which might not be very clear.
> Any value containing a SP or special character
> + is enclosed in double-quotes in the C style as needed. Each
> + `<token>=<value>` may be one of the following:
It might be a bit better to decide for each token-value pair how the
value is encoded, instead of deciding in advance for all of them.
> ++
> +The `path=<path>` shows the path of the missing object inferred from a
> +containing object.
For example for the path, I think it might be easier to always enclose
it in double-quotes in the C style rather than checking first if it
contains spaces or other special characters, see below.
> +static void print_missing_object(struct missing_objects_map_entry *entry,
> + int print_missing_info)
> +{
> + struct strbuf sb;
> +
> + if (!print_missing_info) {
> + printf("?%s\n", oid_to_hex(&entry->entry.oid));
> + return;
> + }
> +
> + strbuf_init(&sb, 0);
I am not sure it's worth initializing the sb separately from its
declaration above. Using "struct strbuf sb = STRBUF_INIT;" is more
standard in the code base and I think most compilers these days are
likely to be able to optimize away the initialization in the
"!print_missing_info" case.
> + if (entry->path && *entry->path) {
> + strbuf_addstr(&sb, " path=");
> +
> + if (quote_c_style(entry->path, NULL, NULL, 0))
> + quote_c_style(entry->path, &sb, NULL, 0);
> + else if (strchr(entry->path, ' '))
> + strbuf_addf(&sb, "\"%s\"", entry->path);
> + else
> + strbuf_addstr(&sb, entry->path);
I think the above code paragraph could be simplified to just:
quote_c_style(entry->path, &sb, NULL, 0);
if we decided to always quote the path. The decoding part would likely
be simplified too.
> + }
> +
> + printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
> + strbuf_release(&sb);
> +}
> @@ -656,6 +703,15 @@ int cmd_rev_list(int argc,
> if (skip_prefix(arg, "--missing=", &arg))
> continue; /* already handled above */
>
> + if (!strcmp(arg, "--missing-info")) {
> + if (arg_missing_action != MA_PRINT)
> + die(_("the option '%s' requires '%s'"),
> + "--missing-info", "--missing=print");
It seems to me that this check should be performed outside the arg
parsing loop so that this check passes if the user passes
"--missing-info" before "--missing=print" on the command line.
> + print_missing_info = 1;
> + continue;
> + }
> +
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v2 1/2] rev-list: add --missing-info to print missing object path
2025-01-10 8:47 ` Christian Couder
@ 2025-01-10 15:22 ` Junio C Hamano
0 siblings, 0 replies; 37+ messages in thread
From: Junio C Hamano @ 2025-01-10 15:22 UTC (permalink / raw)
To: Christian Couder; +Cc: Justin Tobler, git, karthik.188
Christian Couder <christian.couder@gmail.com> writes:
> On Fri, Jan 10, 2025 at 6:38 AM Justin Tobler <jltobler@gmail.com> wrote:
>
>> +--missing-info::
>> + Only useful with `--missing=print`; prints any additional information
>> + about the missing object inferred from its containing object. The
>> + information is all printed on the same line with the missing object ID
>> + in the form: `?<oid> [<token>=<value>]...`. Additional attributes are
>> + each separated by a SP.
>
> Nit: I'd rather say "The `<token>=<value>` pairs containing additional
> information are separated from each other by a SP." to avoid
> introducing "attributes" which might not be very clear.
Excellent.
>> Any value containing a SP or special character
>> + is enclosed in double-quotes in the C style as needed. Each
>> + `<token>=<value>` may be one of the following:
>
> It might be a bit better to decide for each token-value pair how the
> value is encoded, instead of deciding in advance for all of them.
I strongly advise against it.
Declaring the syntax we will use for forseeable future for new
attributes upfront would allow the receiving end, the scripts that
interpret our output, to be written in a futureproof way.
An alternative is "value is encoded in token specific fashion, but
one thing that is common across these future encodings is that SP or
LF contained in value will be represented in such a way that the
resulting encoded value will not have either of these two
problematic bytes".
>> + if (entry->path && *entry->path) {
>> + strbuf_addstr(&sb, " path=");
>> +
>> + if (quote_c_style(entry->path, NULL, NULL, 0))
>> + quote_c_style(entry->path, &sb, NULL, 0);
>> + else if (strchr(entry->path, ' '))
>> + strbuf_addf(&sb, "\"%s\"", entry->path);
>> + else
>> + strbuf_addstr(&sb, entry->path);
>
> I think the above code paragraph could be simplified to just:
>
> quote_c_style(entry->path, &sb, NULL, 0);
Hmph, you two may be both wrong ;-) It is unfortunate that you
cannot easily configure what is considered "must be quoted" bytes
per invocation of the quote_c_style() function. Most problematic
is that in the cq_lookup[] table, SP and "!" are treated the same
way, i.e., <a b c> does not need to be quoted. This comes from the
fact that quote_c_style() was written for the sole purpose of
showing the pathnames on the header lines of "git diff" output.
We may be able to (ab)use quote_path() with QUOTE_PATH_QUOTE_SP
bit set for this purpose, though, to work around the above.
cq_must_quote() also is affected by core.quotepath, which is not a
desirable feature here---we want our machine readable program output
stable regardless of end-user preference.
By the way, perhaps we should propose to make the default value for
core.quotepath to "no" at Git 3.0 boundary?
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v3 0/4] rev-list: print additional missing object information
2025-01-10 5:34 ` [PATCH v2 0/2] rev-list: print additional missing object information Justin Tobler
@ 2025-02-01 20:16 ` Justin Tobler
2025-02-01 20:16 ` [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath Justin Tobler
` (5 more replies)
0 siblings, 6 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-01 20:16 UTC (permalink / raw)
To: git; +Cc: christian.couder, Justin Tobler
Greetings,
It is possible to configure git-rev-list(1) to print the OID of missing
objects by setting the `--missing=print` option. While it is useful
knowing about these objects, it would be nice to have even more context
about the objects that are missing. Luckily, from an object containing
the missing object, it is possible to infer additional information the
missing object. For example, if the tree containing a missing blob still
exists, the tree entry for the missing object should contain path and
type information.
This series aims to provide git-rev-list(1) with a new `print-info`
missing action for the `--missing` option that, when set, behaves like
the existing `print` action but also prints other potentially
interesting information about the missing object.
Missing object info is printed in the form `?<oid> [<token>=<value>]...`
where multiple `<token>=<value>` pairs may be specified each separated
from each other with a SP. Values that contain SP or LF characters are
expected to be encoded in a manner such that these problematic bytes are
handled. For missing object path information this is handled by quoting
the path in the C style if it contains SP or special characters.
One concern I currently have with this quoting approach is that it is a
bit more challenging to machine parse compared to something like using a
null byte to delimit between missing info. One option is, in a followup
series, introduce a git-for-each-ref(1) style format syntax. Maybe
something like: `--missing=print-info:%(path)%00%(type)`. I'm curious if
anyone may have thoughts around this. My goal is to ensure that there is
an easy to use machine parsable interface to get this information. I
could see something like `?<oid> path="foo \"bar" type=blob`, being a
bit complex.
The series is set up as follows:
- Pathes 1 and 2 provide the `quote_c_style` and `quote_path` functions
with a way print consistent output regardless of how core.quotePath is
set.
- Patch 3 introduces the `print-info` missing action and supports
printing missing object path information.
- Patch 4 extends the `print-info` missing action to also print object
type information about the missing object.
Changes in V3:
- This missing info is provided once again through an explicit
`--missing=print-info` action instead of combining the
`--missing=print` and `--missing-info` flag. This was done to make the
interface a bit straightforward to use and not introduce a flag and is
dependent on another.
- Added flag to `quote_path()` to disable core.quotePath from impacting
quoted output and use it when print missing object path info.
- Update documentation to not use the term "attribute" which could be
confused with gitattributes.
Due to all rearranging/changes in this version, I opted not to include a
range-diff.
Thanks,
-Justin
Justin Tobler (4):
quote: add c quote flag to ignore core.quotePath
quote: add quote_path() flag to ignore config
rev-list: add print-info action to print missing object path
rev-list: extend print-info to print missing object type
Documentation/rev-list-options.txt | 19 +++++
builtin/rev-list.c | 107 ++++++++++++++++++++++++-----
quote.c | 27 +++++---
quote.h | 6 +-
t/t6022-rev-list-missing.sh | 53 ++++++++++++++
5 files changed, 184 insertions(+), 28 deletions(-)
base-commit: b74ff38af58464688b211140b90ec90598d340c6
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
@ 2025-02-01 20:16 ` Justin Tobler
2025-02-03 9:51 ` Christian Couder
2025-02-03 22:33 ` Junio C Hamano
2025-02-01 20:16 ` [PATCH v3 2/4] quote: add quote_path() flag to ignore config Justin Tobler
` (4 subsequent siblings)
5 siblings, 2 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-01 20:16 UTC (permalink / raw)
To: git; +Cc: christian.couder, Justin Tobler
The output of `cq_must_quote()` is affected by `core.quotePath`. This is
undesirable for operations that want to ensure consistent output
independent of config settings.
Introduce the `CQUOTE_IGNORE_CONFIG` flag for the `quote_c_style*`
functions which when set makes `cq_must_quote()` always follow the
default behavior (core.quotePath=true) regardless of how its set in the
config.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
quote.c | 14 ++++++++------
quote.h | 3 ++-
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/quote.c b/quote.c
index b9f6bdc775..d129c1de70 100644
--- a/quote.c
+++ b/quote.c
@@ -232,21 +232,22 @@ static signed char const cq_lookup[256] = {
/* 0x80 */ /* set to 0 */
};
-static inline int cq_must_quote(char c)
+static inline int cq_must_quote(char c, int ignore_config)
{
- return cq_lookup[(unsigned char)c] + quote_path_fully > 0;
+ return cq_lookup[(unsigned char)c] + (quote_path_fully || ignore_config) > 0;
}
/* returns the longest prefix not needing a quote up to maxlen if positive.
This stops at the first \0 because it's marked as a character needing an
escape */
-static size_t next_quote_pos(const char *s, ssize_t maxlen)
+static size_t next_quote_pos(const char *s, ssize_t maxlen, int ignore_config)
{
size_t len;
if (maxlen < 0) {
- for (len = 0; !cq_must_quote(s[len]); len++);
+ for (len = 0; !cq_must_quote(s[len], ignore_config); len++);
} else {
- for (len = 0; len < maxlen && !cq_must_quote(s[len]); len++);
+ for (len = 0;
+ len < maxlen && !cq_must_quote(s[len], ignore_config); len++);
}
return len;
}
@@ -282,13 +283,14 @@ static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
} while (0)
int no_dq = !!(flags & CQUOTE_NODQ);
+ int ignore_config = !!(flags & CQUOTE_IGNORE_CONFIG);
size_t len, count = 0;
const char *p = name;
for (;;) {
int ch;
- len = next_quote_pos(p, maxlen);
+ len = next_quote_pos(p, maxlen, ignore_config);
if (len == maxlen || (maxlen < 0 && !p[len]))
break;
diff --git a/quote.h b/quote.h
index 0300c29104..2a793fbef6 100644
--- a/quote.h
+++ b/quote.h
@@ -83,7 +83,8 @@ int sq_dequote_to_strvec(char *arg, struct strvec *);
int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
/* Bits in the flags parameter to quote_c_style() */
-#define CQUOTE_NODQ 01
+#define CQUOTE_NODQ (1u << 0)
+#define CQUOTE_IGNORE_CONFIG (1u << 1)
size_t quote_c_style(const char *name, struct strbuf *, FILE *, unsigned);
void quote_two_c_style(struct strbuf *, const char *, const char *, unsigned);
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH v3 2/4] quote: add quote_path() flag to ignore config
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
2025-02-01 20:16 ` [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath Justin Tobler
@ 2025-02-01 20:16 ` Justin Tobler
2025-02-02 10:52 ` Phillip Wood
` (2 more replies)
2025-02-01 20:16 ` [PATCH v3 3/4] rev-list: add print-info action to print missing object path Justin Tobler
` (3 subsequent siblings)
5 siblings, 3 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-01 20:16 UTC (permalink / raw)
To: git; +Cc: christian.couder, Justin Tobler
The `quote_path()` function invokes `quote_c_style_counted()` to handle
quoting. This means the output `quote_path()` is ultimately affected by
`core.quotePath` configuration. In a subsequent commit, `quote_path()`
will be used in a scenario where the output should remain consistent
regardless of the current configuration.
Introduce the `QUOTE_PATH_IGNORE_CONFIG` flag for `quote_path()`which
when set instructs the underlying `quote_c_style_counted()` to also
ignore the `core.quotePath` configuration when executed.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
quote.c | 13 ++++++++++---
quote.h | 3 ++-
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/quote.c b/quote.c
index d129c1de70..baec34ca94 100644
--- a/quote.c
+++ b/quote.c
@@ -370,10 +370,18 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
{
struct strbuf sb = STRBUF_INIT;
const char *rel = relative_path(in, prefix, &sb);
- int force_dq = ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' '));
+ unsigned cquote_flags = 0;
+ int force_dq = 0;
strbuf_reset(out);
+ if ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' ')) {
+ force_dq = 1;
+ cquote_flags &= CQUOTE_NODQ;
+ }
+ if (flags & QUOTE_PATH_IGNORE_CONFIG)
+ cquote_flags &= CQUOTE_IGNORE_CONFIG;
+
/*
* If the caller wants us to enclose the output in a dq-pair
* whether quote_c_style_counted() needs to, we do it ourselves
@@ -381,8 +389,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
*/
if (force_dq)
strbuf_addch(out, '"');
- quote_c_style_counted(rel, strlen(rel), out, NULL,
- force_dq ? CQUOTE_NODQ : 0);
+ quote_c_style_counted(rel, strlen(rel), out, NULL, cquote_flags);
if (force_dq)
strbuf_addch(out, '"');
strbuf_release(&sb);
diff --git a/quote.h b/quote.h
index 2a793fbef6..84903951ef 100644
--- a/quote.h
+++ b/quote.h
@@ -94,7 +94,8 @@ void write_name_quoted_relative(const char *name, const char *prefix,
/* quote path as relative to the given prefix */
char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigned flags);
-#define QUOTE_PATH_QUOTE_SP 01
+#define QUOTE_PATH_QUOTE_SP (1u << 0)
+#define QUOTE_PATH_IGNORE_CONFIG (1u << 1)
/* quoting as a string literal for other languages */
void perl_quote_buf(struct strbuf *sb, const char *src);
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH v3 3/4] rev-list: add print-info action to print missing object path
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
2025-02-01 20:16 ` [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath Justin Tobler
2025-02-01 20:16 ` [PATCH v3 2/4] quote: add quote_path() flag to ignore config Justin Tobler
@ 2025-02-01 20:16 ` Justin Tobler
2025-02-01 20:16 ` [PATCH v3 4/4] rev-list: extend print-info to print missing object type Justin Tobler
` (2 subsequent siblings)
5 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-01 20:16 UTC (permalink / raw)
To: git; +Cc: christian.couder, Justin Tobler
Missing objects identified through git-rev-list(1) can be printed by
setting the `--missing=print` option. Additional information about the
missing object, such as its path and type, may be present in its
containing object.
Add the `print-info` missing action for the `--missing` option that,
when set, prints additional insight about the missing object inferred
from its containing object. Each line of output for a missing object is
in the form: `?<oid> [<token>=<value>]...`. The `<token>=<value>` pairs
containing additional information are separated from each other by a SP.
The value is encoded in a token specific fashion, but SP or LF contained
in value are always expected to be represented in such a way that the
resulting encoded value does not have either of these two problematic
bytes. This format is kept generic so it can be extended in the future
to support additional information.
For now, only a missing object path info is implemented. It follows the
form `path=<path>` and specifies the full path to the object from the
top-level tree. A path containing SP or special characters is enclosed
in double-quotes in the C style as needed. In a subsequent commit,
missing object type info will also be added.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/rev-list-options.txt | 16 +++++
builtin/rev-list.c | 102 ++++++++++++++++++++++++-----
t/t6022-rev-list-missing.sh | 52 +++++++++++++++
3 files changed, 153 insertions(+), 17 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 459e5a02f5..0bea9d4ad3 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1024,6 +1024,22 @@ Unexpected missing objects will raise an error.
The form '--missing=print' is like 'allow-any', but will also print a
list of the missing objects. Object IDs are prefixed with a ``?'' character.
+
+The form '--missing=print-info' is like 'print', but will also print additional
+information about the missing object inferred from its containing object. The
+information is all printed on the same line with the missing object ID in the
+form: `?<oid> [<token>=<value>]...`. The `<token>=<value>` pairs containing
+additional information are separated from each other by a SP. The value is
+encoded in a token specific fashion, but SP or LF contained in value are always
+expected to be represented in such a way that the resulting encoded value does
+not have either of these two problematic bytes. Each `<token>=<value>` may be
+one of the following:
++
+--
+* The `path=<path>` shows the path of the missing object inferred from a
+ containing object. A path containing SP or special characters is enclosed in
+ double-quotes in the C style as needed.
+--
++
If some tips passed to the traversal are missing, they will be
considered as missing too, and the traversal will ignore them. In case
we cannot get their Object ID though, an error will be raised.
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 3196da7b2d..4a45a4e555 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -22,7 +22,10 @@
#include "progress.h"
#include "reflog-walk.h"
#include "oidset.h"
+#include "oidmap.h"
#include "packfile.h"
+#include "quote.h"
+#include "strbuf.h"
static const char rev_list_usage[] =
"git rev-list [<options>] <commit>... [--] [<path>...]\n"
@@ -73,11 +76,16 @@ static unsigned progress_counter;
static struct oidset omitted_objects;
static int arg_print_omitted; /* print objects omitted by filter */
-static struct oidset missing_objects;
+struct missing_objects_map_entry {
+ struct oidmap_entry entry;
+ const char *path;
+};
+static struct oidmap missing_objects;
enum missing_action {
MA_ERROR = 0, /* fail if any missing objects are encountered */
MA_ALLOW_ANY, /* silently allow ALL missing objects */
MA_PRINT, /* print ALL missing objects in special section */
+ MA_PRINT_INFO, /* same as MA_PRINT but also prints missing object info */
MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
};
static enum missing_action arg_missing_action;
@@ -101,7 +109,46 @@ static off_t get_object_disk_usage(struct object *obj)
return size;
}
-static inline void finish_object__ma(struct object *obj)
+static void add_missing_object_entry(struct object_id *oid, const char *path)
+{
+ struct missing_objects_map_entry *entry;
+
+ if (oidmap_get(&missing_objects, oid))
+ return;
+
+ CALLOC_ARRAY(entry, 1);
+ entry->entry.oid = *oid;
+ if (path)
+ entry->path = xstrdup(path);
+ oidmap_put(&missing_objects, entry);
+}
+
+static void print_missing_object(struct missing_objects_map_entry *entry,
+ int print_missing_info)
+{
+ struct strbuf sb = STRBUF_INIT;
+
+ if (!print_missing_info) {
+ printf("?%s\n", oid_to_hex(&entry->entry.oid));
+ return;
+ }
+
+ if (entry->path && *entry->path) {
+ struct strbuf path = STRBUF_INIT;
+
+ strbuf_addstr(&sb, " path=");
+ quote_path(entry->path, NULL, &path,
+ QUOTE_PATH_QUOTE_SP | QUOTE_PATH_IGNORE_CONFIG);
+ strbuf_addbuf(&sb, &path);
+
+ strbuf_release(&path);
+ }
+
+ printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
+ strbuf_release(&sb);
+}
+
+static inline void finish_object__ma(struct object *obj, const char *name)
{
/*
* Whether or not we try to dynamically fetch missing objects
@@ -119,7 +166,8 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_PRINT:
- oidset_insert(&missing_objects, &obj->oid);
+ case MA_PRINT_INFO:
+ add_missing_object_entry(&obj->oid, name);
return;
case MA_ALLOW_PROMISOR:
@@ -152,7 +200,7 @@ static void show_commit(struct commit *commit, void *data)
if (revs->do_not_die_on_missing_objects &&
oidset_contains(&revs->missing_commits, &commit->object.oid)) {
- finish_object__ma(&commit->object);
+ finish_object__ma(&commit->object, NULL);
return;
}
@@ -268,12 +316,11 @@ static void show_commit(struct commit *commit, void *data)
finish_commit(commit);
}
-static int finish_object(struct object *obj, const char *name UNUSED,
- void *cb_data)
+static int finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (oid_object_info_extended(the_repository, &obj->oid, NULL, 0) < 0) {
- finish_object__ma(obj);
+ finish_object__ma(obj, name);
return 1;
}
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
@@ -414,6 +461,12 @@ static inline int parse_missing_action_value(const char *value)
return 1;
}
+ if (!strcmp(value, "print-info")) {
+ arg_missing_action = MA_PRINT_INFO;
+ fetch_if_missing = 0;
+ return 1;
+ }
+
if (!strcmp(value, "allow-promisor")) {
arg_missing_action = MA_ALLOW_PROMISOR;
fetch_if_missing = 0;
@@ -781,10 +834,18 @@ int cmd_rev_list(int argc,
if (arg_print_omitted)
oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
- if (arg_missing_action == MA_PRINT) {
- oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ if (arg_missing_action == MA_PRINT ||
+ arg_missing_action == MA_PRINT_INFO) {
+ struct oidset_iter iter;
+ struct object_id *oid;
+
+ oidmap_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ oidset_iter_init(&revs.missing_commits, &iter);
+
/* Add missing tips */
- oidset_insert_from_set(&missing_objects, &revs.missing_commits);
+ while ((oid = oidset_iter_next(&iter)))
+ add_missing_object_entry(oid, NULL);
+
oidset_clear(&revs.missing_commits);
}
@@ -800,13 +861,20 @@ int cmd_rev_list(int argc,
printf("~%s\n", oid_to_hex(oid));
oidset_clear(&omitted_objects);
}
- if (arg_missing_action == MA_PRINT) {
- struct oidset_iter iter;
- struct object_id *oid;
- oidset_iter_init(&missing_objects, &iter);
- while ((oid = oidset_iter_next(&iter)))
- printf("?%s\n", oid_to_hex(oid));
- oidset_clear(&missing_objects);
+ if (arg_missing_action == MA_PRINT ||
+ arg_missing_action == MA_PRINT_INFO) {
+ struct missing_objects_map_entry *entry;
+ struct oidmap_iter iter;
+
+ oidmap_iter_init(&missing_objects, &iter);
+
+ while ((entry = oidmap_iter_next(&iter))) {
+ print_missing_object(entry, arg_missing_action ==
+ MA_PRINT_INFO);
+ free((void *)entry->path);
+ }
+
+ oidmap_free(&missing_objects, true);
}
stop_progress(&progress);
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 7553a9cca2..38afca6f09 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -145,4 +145,56 @@ do
done
done
+for obj in "HEAD~1" "HEAD^{tree}" "HEAD:foo" "HEAD:foo/bar" "HEAD:baz baz"
+do
+ test_expect_success "--missing=print-info with missing '$obj'" '
+ test_when_finished rm -rf missing-info &&
+
+ git init missing-info &&
+ (
+ cd missing-info &&
+ git commit --allow-empty -m first &&
+
+ mkdir foo &&
+ echo bar >foo/bar &&
+ echo baz >"baz baz" &&
+ echo bat >bat\" &&
+ git add -A &&
+ git commit -m second &&
+
+ oid="$(git rev-parse "$obj")" &&
+ path=".git/objects/$(test_oid_to_path $oid)" &&
+
+ case $obj in
+ HEAD:foo)
+ path_info=" path=foo"
+ ;;
+ HEAD:foo/bar)
+ path_info=" path=foo/bar"
+ ;;
+ "HEAD:baz baz")
+ path_info=" path=\"baz baz\""
+ ;;
+ "HEAD:bat\"")
+ path_info=" path=\"bat\\\"\""
+ ;;
+ esac &&
+
+ # Before the object is made missing, we use rev-list to
+ # get the expected oids.
+ git rev-list --objects --no-object-names \
+ HEAD ^"$obj" >expect.raw &&
+ echo "?$oid$path_info" >>expect.raw &&
+
+ mv "$path" "$path.hidden" &&
+ git rev-list --objects --no-object-names \
+ --missing=print-info HEAD >actual.raw &&
+
+ sort actual.raw >actual &&
+ sort expect.raw >expect &&
+ test_cmp expect actual
+ )
+ '
+done
+
test_done
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH v3 4/4] rev-list: extend print-info to print missing object type
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
` (2 preceding siblings ...)
2025-02-01 20:16 ` [PATCH v3 3/4] rev-list: add print-info action to print missing object path Justin Tobler
@ 2025-02-01 20:16 ` Justin Tobler
2025-02-03 10:45 ` [PATCH v3 0/4] rev-list: print additional missing object information Christian Couder
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
5 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-01 20:16 UTC (permalink / raw)
To: git; +Cc: christian.couder, Justin Tobler
Additional information about missing objects found in git-rev-list(1)
can be printed by specifying the `print-info` missing action for the
`--missing` option. Extend this action to also print missing object type
information inferred from its containing object. This token follows the
form `type=<type>` and specifies the expected object type of the missing
object.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/rev-list-options.txt | 3 +++
builtin/rev-list.c | 11 ++++++++---
t/t6022-rev-list-missing.sh | 3 ++-
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 0bea9d4ad3..f10f78c600 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1038,6 +1038,9 @@ one of the following:
* The `path=<path>` shows the path of the missing object inferred from a
containing object. A path containing SP or special characters is enclosed in
double-quotes in the C style as needed.
++
+* The `type=<type>` shows the type of the missing object inferred from a
+ containing object.
--
+
If some tips passed to the traversal are missing, they will be
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 4a45a4e555..963f96d031 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -79,6 +79,7 @@ static int arg_print_omitted; /* print objects omitted by filter */
struct missing_objects_map_entry {
struct oidmap_entry entry;
const char *path;
+ unsigned type;
};
static struct oidmap missing_objects;
enum missing_action {
@@ -109,7 +110,8 @@ static off_t get_object_disk_usage(struct object *obj)
return size;
}
-static void add_missing_object_entry(struct object_id *oid, const char *path)
+static void add_missing_object_entry(struct object_id *oid, const char *path,
+ unsigned type)
{
struct missing_objects_map_entry *entry;
@@ -118,6 +120,7 @@ static void add_missing_object_entry(struct object_id *oid, const char *path)
CALLOC_ARRAY(entry, 1);
entry->entry.oid = *oid;
+ entry->type = type;
if (path)
entry->path = xstrdup(path);
oidmap_put(&missing_objects, entry);
@@ -143,6 +146,8 @@ static void print_missing_object(struct missing_objects_map_entry *entry,
strbuf_release(&path);
}
+ if (entry->type)
+ strbuf_addf(&sb, " type=%s", type_name(entry->type));
printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
strbuf_release(&sb);
@@ -167,7 +172,7 @@ static inline void finish_object__ma(struct object *obj, const char *name)
case MA_PRINT:
case MA_PRINT_INFO:
- add_missing_object_entry(&obj->oid, name);
+ add_missing_object_entry(&obj->oid, name, obj->type);
return;
case MA_ALLOW_PROMISOR:
@@ -844,7 +849,7 @@ int cmd_rev_list(int argc,
/* Add missing tips */
while ((oid = oidset_iter_next(&iter)))
- add_missing_object_entry(oid, NULL);
+ add_missing_object_entry(oid, NULL, 0);
oidset_clear(&revs.missing_commits);
}
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 38afca6f09..3e2790d4c8 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -164,6 +164,7 @@ do
oid="$(git rev-parse "$obj")" &&
path=".git/objects/$(test_oid_to_path $oid)" &&
+ type_info=" type=$(git cat-file -t $oid)" &&
case $obj in
HEAD:foo)
@@ -184,7 +185,7 @@ do
# get the expected oids.
git rev-list --objects --no-object-names \
HEAD ^"$obj" >expect.raw &&
- echo "?$oid$path_info" >>expect.raw &&
+ echo "?$oid$path_info$type_info" >>expect.raw &&
mv "$path" "$path.hidden" &&
git rev-list --objects --no-object-names \
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [PATCH v3 2/4] quote: add quote_path() flag to ignore config
2025-02-01 20:16 ` [PATCH v3 2/4] quote: add quote_path() flag to ignore config Justin Tobler
@ 2025-02-02 10:52 ` Phillip Wood
2025-02-04 22:39 ` Justin Tobler
2025-02-03 10:07 ` Christian Couder
2025-02-03 22:52 ` Junio C Hamano
2 siblings, 1 reply; 37+ messages in thread
From: Phillip Wood @ 2025-02-02 10:52 UTC (permalink / raw)
To: Justin Tobler, git; +Cc: christian.couder
Hi Justin
On 01/02/2025 20:16, Justin Tobler wrote:
> The `quote_path()` function invokes `quote_c_style_counted()` to handle
> quoting. This means the output `quote_path()` is ultimately affected by
> `core.quotePath` configuration. In a subsequent commit, `quote_path()`
> will be used in a scenario where the output should remain consistent
> regardless of the current configuration.
>
> Introduce the `QUOTE_PATH_IGNORE_CONFIG` flag for `quote_path()`which
> when set instructs the underlying `quote_c_style_counted()` to also
> ignore the `core.quotePath` configuration when executed.
I'm confused as to why this is necessary. All of our existing plumbing
commands that print paths respect core.quotePath so why is rev-list
different? The config setting only affects the representation used for
bytes 0x80 and above, control characters, backslash and double-quote are
always quoted.
Best Wishes
Phillip
> Signed-off-by: Justin Tobler <jltobler@gmail.com>
> ---
> quote.c | 13 ++++++++++---
> quote.h | 3 ++-
> 2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/quote.c b/quote.c
> index d129c1de70..baec34ca94 100644
> --- a/quote.c
> +++ b/quote.c
> @@ -370,10 +370,18 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
> {
> struct strbuf sb = STRBUF_INIT;
> const char *rel = relative_path(in, prefix, &sb);
> - int force_dq = ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' '));
> + unsigned cquote_flags = 0;
> + int force_dq = 0;
>
> strbuf_reset(out);
>
> + if ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' ')) {
> + force_dq = 1;
> + cquote_flags &= CQUOTE_NODQ;
> + }
> + if (flags & QUOTE_PATH_IGNORE_CONFIG)
> + cquote_flags &= CQUOTE_IGNORE_CONFIG;
> +
> /*
> * If the caller wants us to enclose the output in a dq-pair
> * whether quote_c_style_counted() needs to, we do it ourselves
> @@ -381,8 +389,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
> */
> if (force_dq)
> strbuf_addch(out, '"');
> - quote_c_style_counted(rel, strlen(rel), out, NULL,
> - force_dq ? CQUOTE_NODQ : 0);
> + quote_c_style_counted(rel, strlen(rel), out, NULL, cquote_flags);
> if (force_dq)
> strbuf_addch(out, '"');
> strbuf_release(&sb);
> diff --git a/quote.h b/quote.h
> index 2a793fbef6..84903951ef 100644
> --- a/quote.h
> +++ b/quote.h
> @@ -94,7 +94,8 @@ void write_name_quoted_relative(const char *name, const char *prefix,
>
> /* quote path as relative to the given prefix */
> char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigned flags);
> -#define QUOTE_PATH_QUOTE_SP 01
> +#define QUOTE_PATH_QUOTE_SP (1u << 0)
> +#define QUOTE_PATH_IGNORE_CONFIG (1u << 1)
>
> /* quoting as a string literal for other languages */
> void perl_quote_buf(struct strbuf *sb, const char *src);
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath
2025-02-01 20:16 ` [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath Justin Tobler
@ 2025-02-03 9:51 ` Christian Couder
2025-02-03 22:14 ` Junio C Hamano
2025-02-03 22:33 ` Junio C Hamano
1 sibling, 1 reply; 37+ messages in thread
From: Christian Couder @ 2025-02-03 9:51 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Sat, Feb 1, 2025 at 9:20 PM Justin Tobler <jltobler@gmail.com> wrote:
> -static inline int cq_must_quote(char c)
> +static inline int cq_must_quote(char c, int ignore_config)
I think it's a bit better to use 'unsigned int' instead of just 'int'
for such flags, but it's fine here to use an 'int' because both
`quote_path_fully` and `no_dq` below already use that type.
> - for (len = 0; len < maxlen && !cq_must_quote(s[len]); len++);
> + for (len = 0;
> + len < maxlen && !cq_must_quote(s[len], ignore_config); len++);
Micronit: If you really want to split the line into many lines, I
think it might be better to go all the way like this:
for (len = 0;
len < maxlen && !cq_must_quote(s[len], ignore_config);
len++);
> @@ -83,7 +83,8 @@ int sq_dequote_to_strvec(char *arg, struct strvec *);
> int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
>
> /* Bits in the flags parameter to quote_c_style() */
> -#define CQUOTE_NODQ 01
> +#define CQUOTE_NODQ (1u << 0)
Nice small cleanup which could be mentioned in the commit message if
you reroll this series.
> +#define CQUOTE_IGNORE_CONFIG (1u << 1)
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 2/4] quote: add quote_path() flag to ignore config
2025-02-01 20:16 ` [PATCH v3 2/4] quote: add quote_path() flag to ignore config Justin Tobler
2025-02-02 10:52 ` Phillip Wood
@ 2025-02-03 10:07 ` Christian Couder
2025-02-03 22:52 ` Junio C Hamano
2 siblings, 0 replies; 37+ messages in thread
From: Christian Couder @ 2025-02-03 10:07 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Sat, Feb 1, 2025 at 9:20 PM Justin Tobler <jltobler@gmail.com> wrote:
> @@ -370,10 +370,18 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
> {
> struct strbuf sb = STRBUF_INIT;
> const char *rel = relative_path(in, prefix, &sb);
> - int force_dq = ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' '));
> + unsigned cquote_flags = 0;
> + int force_dq = 0;
I don't think you needed to change how force_dq was computed.
> strbuf_reset(out);
>
> + if ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' ')) {
> + force_dq = 1;
> + cquote_flags &= CQUOTE_NODQ;
> + }
If you didn't change how force_dq was computed above, then this could be just:
if (force_dq)
cquote_flags &= CQUOTE_NODQ;
> /* quote path as relative to the given prefix */
> char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigned flags);
> -#define QUOTE_PATH_QUOTE_SP 01
> +#define QUOTE_PATH_QUOTE_SP (1u << 0)
Nice cleanup that could be mentioned in the commit message if you reroll.
> +#define QUOTE_PATH_IGNORE_CONFIG (1u << 1)
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 0/4] rev-list: print additional missing object information
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
` (3 preceding siblings ...)
2025-02-01 20:16 ` [PATCH v3 4/4] rev-list: extend print-info to print missing object type Justin Tobler
@ 2025-02-03 10:45 ` Christian Couder
2025-02-04 22:51 ` Justin Tobler
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
5 siblings, 1 reply; 37+ messages in thread
From: Christian Couder @ 2025-02-03 10:45 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Sat, Feb 1, 2025 at 9:20 PM Justin Tobler <jltobler@gmail.com> wrote:
>
> Greetings,
>
> It is possible to configure git-rev-list(1) to print the OID of missing
> objects by setting the `--missing=print` option. While it is useful
> knowing about these objects, it would be nice to have even more context
> about the objects that are missing. Luckily, from an object containing
> the missing object, it is possible to infer additional information the
> missing object. For example, if the tree containing a missing blob still
> exists, the tree entry for the missing object should contain path and
> type information.
>
> This series aims to provide git-rev-list(1) with a new `print-info`
> missing action for the `--missing` option that, when set, behaves like
> the existing `print` action but also prints other potentially
> interesting information about the missing object.
I took a look and commented a bit on patches 1/4 and 2/4. Not sure my
comments are worth a reroll on their own. The other patches look good
to me.
Anyway I think you might want to address Phillip Wood's concerns too:
https://lore.kernel.org/git/76390e3b-e749-4d28-98a5-05db7c5fbcd3@gmail.com/
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath
2025-02-03 9:51 ` Christian Couder
@ 2025-02-03 22:14 ` Junio C Hamano
0 siblings, 0 replies; 37+ messages in thread
From: Junio C Hamano @ 2025-02-03 22:14 UTC (permalink / raw)
To: Christian Couder; +Cc: Justin Tobler, git
Christian Couder <christian.couder@gmail.com> writes:
> On Sat, Feb 1, 2025 at 9:20 PM Justin Tobler <jltobler@gmail.com> wrote:
>
>
>> -static inline int cq_must_quote(char c)
>> +static inline int cq_must_quote(char c, int ignore_config)
>
> I think it's a bit better to use 'unsigned int' instead of just 'int'
> for such flags, but it's fine here to use an 'int' because both
> `quote_path_fully` and `no_dq` below already use that type.
Yup, good forward thinking to suggest using unsigned as these "this
is just a single bit, so let's use platform natural int" tend to
grow into "there is this another bit that is orthogonal, so pass it
as well in the same flag word", at which point unsigned would work
better for us. But until that happens, plain platform natural int
is fine.
>> - for (len = 0; len < maxlen && !cq_must_quote(s[len]); len++);
>> + for (len = 0;
>> + len < maxlen && !cq_must_quote(s[len], ignore_config); len++);
>
> Micronit: If you really want to split the line into many lines, I
> think it might be better to go all the way like this:
>
> for (len = 0;
> len < maxlen && !cq_must_quote(s[len], ignore_config);
> len++);
Good style suggestion.
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath
2025-02-01 20:16 ` [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath Justin Tobler
2025-02-03 9:51 ` Christian Couder
@ 2025-02-03 22:33 ` Junio C Hamano
2025-02-04 16:40 ` Junio C Hamano
1 sibling, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2025-02-03 22:33 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, christian.couder
Justin Tobler <jltobler@gmail.com> writes:
> The output of `cq_must_quote()` is affected by `core.quotePath`. This is
> undesirable for operations that want to ensure consistent output
> independent of config settings.
>
> Introduce the `CQUOTE_IGNORE_CONFIG` flag for the `quote_c_style*`
> functions which when set makes `cq_must_quote()` always follow the
> default behavior (core.quotePath=true) regardless of how its set in the
> config.
Hmph.
I was hoping that we can flip the default for 'core.quotePath' to
'no' at Git 3.0 boundary, to help folks in non-ASCII locale. If
this is about emitting pipe-able output out of rev-list, unlike a
patch that is to be e-mailed (and being 8-bit clean was a risky
assumption to make in 2005) that core.quotePath was originally
invented for, it is more so that we would not want to force the
receiving end to unquote, no?
So regardless of what the future default value of core.quotePath
would be, I am not convinced that it is a good idea to octal quote
any and all bytes outside the ASCII range in the rev-list output.
After all, "git rev-list --objects" would show such a path without
quoting, no [*]?
Side note: the path in the output from "git rev-list --objects"
is a hack to allow receiving end to compute a path hash, and
does not have to be strictly reversible, so it emits verbatim
bytes but truncates the output at LF to preserve the one-line
one-object output format.
We do need to quote certain bytes (e.g., LF cannot be allowed
verbatim, when the output is line-oriented, and we use C-quote,
which means literal double-quote needs to be also quoted), so we
cannot mimic paths emitted by "git rev-list --objects", but I do not
think it buys us much to quote non-ASCII bytes these days.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 2/4] quote: add quote_path() flag to ignore config
2025-02-01 20:16 ` [PATCH v3 2/4] quote: add quote_path() flag to ignore config Justin Tobler
2025-02-02 10:52 ` Phillip Wood
2025-02-03 10:07 ` Christian Couder
@ 2025-02-03 22:52 ` Junio C Hamano
2 siblings, 0 replies; 37+ messages in thread
From: Junio C Hamano @ 2025-02-03 22:52 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, christian.couder
Justin Tobler <jltobler@gmail.com> writes:
> The `quote_path()` function invokes `quote_c_style_counted()` to handle
> quoting. This means the output `quote_path()` is ultimately affected by
> `core.quotePath` configuration. In a subsequent commit, `quote_path()`
> will be used in a scenario where the output should remain consistent
> regardless of the current configuration.
>
> Introduce the `QUOTE_PATH_IGNORE_CONFIG` flag for `quote_path()`which
> when set instructs the underlying `quote_c_style_counted()` to also
> ignore the `core.quotePath` configuration when executed.
>
> Signed-off-by: Justin Tobler <jltobler@gmail.com>
> ---
I've already read what [3/4] does using this, but I do not quite see
why the scenario the quote_path() function is called requires that
the output should remain consistent.
As it is possible that a path has any byte other than NUL in it, the
recipient MUST be prepared to unquote it before being able to use
it. So if a run of "rev-list --info" in the same repository with
the same arguments tells a path as "abc" (without quotes) and
another one shows the same path as "abc" (with quotes) or even as
"\141\142\143", the consumer should be able to cope with the
variations and see that they are the same path, no? At least, I do
not quite see why we require consistency across different settings
of the configuration.
If we drop the first two patches, as long as we use a known value in
core.quotepath in tests in t6022, everything should reliably work,
no?
When we do need to quote SP (i.e. outside the "diff" output, for
which originally cquote machinery was invented), the default cquote
mechanism does not help, so we need to allow _some_ tweak from the
caller's side, but I do not know if I agree that "no, we do not
allow bytes with 8-bit set without quoting" is a good idea in this
age.
Thanks.
> quote.c | 13 ++++++++++---
> quote.h | 3 ++-
> 2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/quote.c b/quote.c
> index d129c1de70..baec34ca94 100644
> --- a/quote.c
> +++ b/quote.c
> @@ -370,10 +370,18 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
> {
> struct strbuf sb = STRBUF_INIT;
> const char *rel = relative_path(in, prefix, &sb);
> - int force_dq = ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' '));
> + unsigned cquote_flags = 0;
> + int force_dq = 0;
>
> strbuf_reset(out);
>
> + if ((flags & QUOTE_PATH_QUOTE_SP) && strchr(rel, ' ')) {
> + force_dq = 1;
> + cquote_flags &= CQUOTE_NODQ;
> + }
> + if (flags & QUOTE_PATH_IGNORE_CONFIG)
> + cquote_flags &= CQUOTE_IGNORE_CONFIG;
> +
> /*
> * If the caller wants us to enclose the output in a dq-pair
> * whether quote_c_style_counted() needs to, we do it ourselves
> @@ -381,8 +389,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
> */
> if (force_dq)
> strbuf_addch(out, '"');
> - quote_c_style_counted(rel, strlen(rel), out, NULL,
> - force_dq ? CQUOTE_NODQ : 0);
> + quote_c_style_counted(rel, strlen(rel), out, NULL, cquote_flags);
> if (force_dq)
> strbuf_addch(out, '"');
> strbuf_release(&sb);
> diff --git a/quote.h b/quote.h
> index 2a793fbef6..84903951ef 100644
> --- a/quote.h
> +++ b/quote.h
> @@ -94,7 +94,8 @@ void write_name_quoted_relative(const char *name, const char *prefix,
>
> /* quote path as relative to the given prefix */
> char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigned flags);
> -#define QUOTE_PATH_QUOTE_SP 01
> +#define QUOTE_PATH_QUOTE_SP (1u << 0)
> +#define QUOTE_PATH_IGNORE_CONFIG (1u << 1)
>
> /* quoting as a string literal for other languages */
> void perl_quote_buf(struct strbuf *sb, const char *src);
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath
2025-02-03 22:33 ` Junio C Hamano
@ 2025-02-04 16:40 ` Junio C Hamano
2025-02-04 22:50 ` Justin Tobler
0 siblings, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2025-02-04 16:40 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, christian.couder
Junio C Hamano <gitster@pobox.com> writes:
> So regardless of what the future default value of core.quotePath
> would be, I am not convinced that it is a good idea to octal quote
> any and all bytes outside the ASCII range in the rev-list output.
>
> After all, "git rev-list --objects" would show such a path without
> quoting, no [*]?
>
> Side note: the path in the output from "git rev-list --objects"
> is a hack to allow receiving end to compute a path hash, and
> does not have to be strictly reversible, so it emits verbatim
> bytes but truncates the output at LF to preserve the one-line
> one-object output format.
>
> We do need to quote certain bytes (e.g., LF cannot be allowed
> verbatim, when the output is line-oriented, and we use C-quote,
> which means literal double-quote needs to be also quoted), so we
> cannot mimic paths emitted by "git rev-list --objects", but I do not
> think it buys us much to quote non-ASCII bytes these days.
Rereading this I realize that I was not quite making sense.
A short version of what I wanted to say is:
- The output format need to do some quoting anyway because it is
inevitable to make the string stuffed as "value" in a space
separated list of var=value on a single line.
- It does not really matter if core.quotePath allows us to pass
bytes with 8-bit set (e.g. UTF-8 outside ASCII) unquoted or
require quoting. The receiving end must be prepared to unquote,
so it is dubious that a new feature to ignore core.quotePath is
needed.
- We do need to quote SP that cquote does not require quoting, so
some wrapping around quote_c_style() like quote_path() does is
needed.
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 2/4] quote: add quote_path() flag to ignore config
2025-02-02 10:52 ` Phillip Wood
@ 2025-02-04 22:39 ` Justin Tobler
2025-02-11 16:51 ` Phillip Wood
0 siblings, 1 reply; 37+ messages in thread
From: Justin Tobler @ 2025-02-04 22:39 UTC (permalink / raw)
To: phillip.wood; +Cc: git, christian.couder
On 25/02/02 10:52AM, Phillip Wood wrote:
> Hi Justin
>
> On 01/02/2025 20:16, Justin Tobler wrote:
> > The `quote_path()` function invokes `quote_c_style_counted()` to handle
> > quoting. This means the output `quote_path()` is ultimately affected by
> > `core.quotePath` configuration. In a subsequent commit, `quote_path()`
> > will be used in a scenario where the output should remain consistent
> > regardless of the current configuration.
> >
> > Introduce the `QUOTE_PATH_IGNORE_CONFIG` flag for `quote_path()`which
> > when set instructs the underlying `quote_c_style_counted()` to also
> > ignore the `core.quotePath` configuration when executed.
>
> I'm confused as to why this is necessary. All of our existing plumbing
> commands that print paths respect core.quotePath so why is rev-list
> different? The config setting only affects the representation used for bytes
> 0x80 and above, control characters, backslash and double-quote are always
> quoted.
You are correct that it isn't neccesary. From a previous discussion, I
initally thought it would preferrable to have consistent output for this
plumbing operation, but as you pointed out other plumbing commands
respect core.quotePath. I'll drop these first two patches in the next
version.
Thanks for the feedback!
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath
2025-02-04 16:40 ` Junio C Hamano
@ 2025-02-04 22:50 ` Justin Tobler
0 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-04 22:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, christian.couder
On 25/02/04 08:40AM, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
> > So regardless of what the future default value of core.quotePath
> > would be, I am not convinced that it is a good idea to octal quote
> > any and all bytes outside the ASCII range in the rev-list output.
> >
> > After all, "git rev-list --objects" would show such a path without
> > quoting, no [*]?
> >
> > Side note: the path in the output from "git rev-list --objects"
> > is a hack to allow receiving end to compute a path hash, and
> > does not have to be strictly reversible, so it emits verbatim
> > bytes but truncates the output at LF to preserve the one-line
> > one-object output format.
> >
> > We do need to quote certain bytes (e.g., LF cannot be allowed
> > verbatim, when the output is line-oriented, and we use C-quote,
> > which means literal double-quote needs to be also quoted), so we
> > cannot mimic paths emitted by "git rev-list --objects", but I do not
> > think it buys us much to quote non-ASCII bytes these days.
>
> Rereading this I realize that I was not quite making sense.
>
> A short version of what I wanted to say is:
>
> - The output format need to do some quoting anyway because it is
> inevitable to make the string stuffed as "value" in a space
> separated list of var=value on a single line.
>
> - It does not really matter if core.quotePath allows us to pass
> bytes with 8-bit set (e.g. UTF-8 outside ASCII) unquoted or
> require quoting. The receiving end must be prepared to unquote,
> so it is dubious that a new feature to ignore core.quotePath is
> needed.
I agree, thinking about this some more, forcing the value quoting
behavior to act as if core.quotePath is always enabled is a step in the
wrong direction. If anything, we could force it disabled to be more
friendly to non-ASCII characters, but that is probably not worth it
either.
> - We do need to quote SP that cquote does not require quoting, so
> some wrapping around quote_c_style() like quote_path() does is
> needed.
I think using quote_path() as-is should be sufficient. I'll drop the
first two patches from this series in the next version I send and adapt
accordingly.
Thanks
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 0/4] rev-list: print additional missing object information
2025-02-03 10:45 ` [PATCH v3 0/4] rev-list: print additional missing object information Christian Couder
@ 2025-02-04 22:51 ` Justin Tobler
0 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-04 22:51 UTC (permalink / raw)
To: Christian Couder; +Cc: git
On 25/02/03 11:45AM, Christian Couder wrote:
> On Sat, Feb 1, 2025 at 9:20 PM Justin Tobler <jltobler@gmail.com> wrote:
> >
> > Greetings,
> >
> > It is possible to configure git-rev-list(1) to print the OID of missing
> > objects by setting the `--missing=print` option. While it is useful
> > knowing about these objects, it would be nice to have even more context
> > about the objects that are missing. Luckily, from an object containing
> > the missing object, it is possible to infer additional information the
> > missing object. For example, if the tree containing a missing blob still
> > exists, the tree entry for the missing object should contain path and
> > type information.
> >
> > This series aims to provide git-rev-list(1) with a new `print-info`
> > missing action for the `--missing` option that, when set, behaves like
> > the existing `print` action but also prints other potentially
> > interesting information about the missing object.
>
> I took a look and commented a bit on patches 1/4 and 2/4. Not sure my
> comments are worth a reroll on their own. The other patches look good
> to me.
>
> Anyway I think you might want to address Phillip Wood's concerns too:
>
> https://lore.kernel.org/git/76390e3b-e749-4d28-98a5-05db7c5fbcd3@gmail.com/
>
> Thanks.
Thanks Christian for the review!
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v4 0/2] rev-list: print additional missing object information
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
` (4 preceding siblings ...)
2025-02-03 10:45 ` [PATCH v3 0/4] rev-list: print additional missing object information Christian Couder
@ 2025-02-05 0:41 ` Justin Tobler
2025-02-05 0:41 ` [PATCH v4 1/2] rev-list: add print-info action to print missing object path Justin Tobler
` (3 more replies)
5 siblings, 4 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-05 0:41 UTC (permalink / raw)
To: git; +Cc: christian.couder, phillip.wood123, Justin Tobler
Greetings,
It is possible to configure git-rev-list(1) to print the OID of missing
objects by setting the `--missing=print` option. While it is useful
knowing about these objects, it would be nice to have even more context
about the objects that are missing. Luckily, from an object containing
the missing object, it is possible to infer additional information the
missing object. For example, if the tree containing a missing blob still
exists, the tree entry for the missing object should contain path and
type information.
This series aims to provide git-rev-list(1) with a new `print-info`
missing action for the `--missing` option that, when set, behaves like
the existing `print` action but also prints other potentially
interesting information about the missing object.
Missing object info is printed in the form `?<oid> [<token>=<value>]...`
where multiple `<token>=<value>` pairs may be specified each separated
from each other with a SP. Values that contain SP or LF characters are
expected to be encoded in a manner such that these problematic bytes are
handled. For missing object path information this is handled by quoting
the path in the C style if it contains SP or special characters.
One concern I currently have with this quoting approach is that it is a
bit more challenging to machine parse compared to something like using a
null byte to delimit between missing info. One option is, in a followup
series, introduce a git-for-each-ref(1) style format syntax. Maybe
something like: `--missing=print-info:%(path)%00%(type)`. I'm curious if
anyone may have thoughts around this. My goal is to ensure that there is
an easy to use machine parsable interface to get this information. I
could see something like `?<oid> path="foo \"bar" type=blob`, being a
bit complex.
The series is set up as follows:
- Patch 1 introduces the `print-info` missing action and supports
printing missing object path information.
- Patch 2 extends the `print-info` missing action to also print object
type information about the missing object.
Changes in V4:
- The core.quotePath behavior is no longer force enabled for the missing
info values. Consequently the first two patches from the previous
version are dropped.
Thanks,
-Justin
Justin Tobler (2):
rev-list: add print-info action to print missing object path
rev-list: extend print-info to print missing object type
Documentation/rev-list-options.txt | 19 ++++++
builtin/rev-list.c | 106 ++++++++++++++++++++++++-----
t/t6022-rev-list-missing.sh | 53 +++++++++++++++
3 files changed, 161 insertions(+), 17 deletions(-)
Range-diff against v3:
1: f628728300 < -: ---------- quote: add c quote flag to ignore core.quotePath
2: 53a3811d8f < -: ---------- quote: add quote_path() flag to ignore config
3: fe7a3da8de ! 1: e3d5295b4d rev-list: add print-info action to print missing object path
@@ builtin/rev-list.c: static off_t get_object_disk_usage(struct object *obj)
+ struct strbuf path = STRBUF_INIT;
+
+ strbuf_addstr(&sb, " path=");
-+ quote_path(entry->path, NULL, &path,
-+ QUOTE_PATH_QUOTE_SP | QUOTE_PATH_IGNORE_CONFIG);
++ quote_path(entry->path, NULL, &path, QUOTE_PATH_QUOTE_SP);
+ strbuf_addbuf(&sb, &path);
+
+ strbuf_release(&path);
4: 788b497d00 = 2: 6aa71444d3 rev-list: extend print-info to print missing object type
base-commit: b74ff38af58464688b211140b90ec90598d340c6
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v4 1/2] rev-list: add print-info action to print missing object path
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
@ 2025-02-05 0:41 ` Justin Tobler
2025-02-05 0:41 ` [PATCH v4 2/2] rev-list: extend print-info to print missing object type Justin Tobler
` (2 subsequent siblings)
3 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-05 0:41 UTC (permalink / raw)
To: git; +Cc: christian.couder, phillip.wood123, Justin Tobler
Missing objects identified through git-rev-list(1) can be printed by
setting the `--missing=print` option. Additional information about the
missing object, such as its path and type, may be present in its
containing object.
Add the `print-info` missing action for the `--missing` option that,
when set, prints additional insight about the missing object inferred
from its containing object. Each line of output for a missing object is
in the form: `?<oid> [<token>=<value>]...`. The `<token>=<value>` pairs
containing additional information are separated from each other by a SP.
The value is encoded in a token specific fashion, but SP or LF contained
in value are always expected to be represented in such a way that the
resulting encoded value does not have either of these two problematic
bytes. This format is kept generic so it can be extended in the future
to support additional information.
For now, only a missing object path info is implemented. It follows the
form `path=<path>` and specifies the full path to the object from the
top-level tree. A path containing SP or special characters is enclosed
in double-quotes in the C style as needed. In a subsequent commit,
missing object type info will also be added.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/rev-list-options.txt | 16 +++++
builtin/rev-list.c | 101 ++++++++++++++++++++++++-----
t/t6022-rev-list-missing.sh | 52 +++++++++++++++
3 files changed, 152 insertions(+), 17 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 459e5a02f5..0bea9d4ad3 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1024,6 +1024,22 @@ Unexpected missing objects will raise an error.
The form '--missing=print' is like 'allow-any', but will also print a
list of the missing objects. Object IDs are prefixed with a ``?'' character.
+
+The form '--missing=print-info' is like 'print', but will also print additional
+information about the missing object inferred from its containing object. The
+information is all printed on the same line with the missing object ID in the
+form: `?<oid> [<token>=<value>]...`. The `<token>=<value>` pairs containing
+additional information are separated from each other by a SP. The value is
+encoded in a token specific fashion, but SP or LF contained in value are always
+expected to be represented in such a way that the resulting encoded value does
+not have either of these two problematic bytes. Each `<token>=<value>` may be
+one of the following:
++
+--
+* The `path=<path>` shows the path of the missing object inferred from a
+ containing object. A path containing SP or special characters is enclosed in
+ double-quotes in the C style as needed.
+--
++
If some tips passed to the traversal are missing, they will be
considered as missing too, and the traversal will ignore them. In case
we cannot get their Object ID though, an error will be raised.
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 3196da7b2d..00f157eb68 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -22,7 +22,10 @@
#include "progress.h"
#include "reflog-walk.h"
#include "oidset.h"
+#include "oidmap.h"
#include "packfile.h"
+#include "quote.h"
+#include "strbuf.h"
static const char rev_list_usage[] =
"git rev-list [<options>] <commit>... [--] [<path>...]\n"
@@ -73,11 +76,16 @@ static unsigned progress_counter;
static struct oidset omitted_objects;
static int arg_print_omitted; /* print objects omitted by filter */
-static struct oidset missing_objects;
+struct missing_objects_map_entry {
+ struct oidmap_entry entry;
+ const char *path;
+};
+static struct oidmap missing_objects;
enum missing_action {
MA_ERROR = 0, /* fail if any missing objects are encountered */
MA_ALLOW_ANY, /* silently allow ALL missing objects */
MA_PRINT, /* print ALL missing objects in special section */
+ MA_PRINT_INFO, /* same as MA_PRINT but also prints missing object info */
MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
};
static enum missing_action arg_missing_action;
@@ -101,7 +109,45 @@ static off_t get_object_disk_usage(struct object *obj)
return size;
}
-static inline void finish_object__ma(struct object *obj)
+static void add_missing_object_entry(struct object_id *oid, const char *path)
+{
+ struct missing_objects_map_entry *entry;
+
+ if (oidmap_get(&missing_objects, oid))
+ return;
+
+ CALLOC_ARRAY(entry, 1);
+ entry->entry.oid = *oid;
+ if (path)
+ entry->path = xstrdup(path);
+ oidmap_put(&missing_objects, entry);
+}
+
+static void print_missing_object(struct missing_objects_map_entry *entry,
+ int print_missing_info)
+{
+ struct strbuf sb = STRBUF_INIT;
+
+ if (!print_missing_info) {
+ printf("?%s\n", oid_to_hex(&entry->entry.oid));
+ return;
+ }
+
+ if (entry->path && *entry->path) {
+ struct strbuf path = STRBUF_INIT;
+
+ strbuf_addstr(&sb, " path=");
+ quote_path(entry->path, NULL, &path, QUOTE_PATH_QUOTE_SP);
+ strbuf_addbuf(&sb, &path);
+
+ strbuf_release(&path);
+ }
+
+ printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
+ strbuf_release(&sb);
+}
+
+static inline void finish_object__ma(struct object *obj, const char *name)
{
/*
* Whether or not we try to dynamically fetch missing objects
@@ -119,7 +165,8 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_PRINT:
- oidset_insert(&missing_objects, &obj->oid);
+ case MA_PRINT_INFO:
+ add_missing_object_entry(&obj->oid, name);
return;
case MA_ALLOW_PROMISOR:
@@ -152,7 +199,7 @@ static void show_commit(struct commit *commit, void *data)
if (revs->do_not_die_on_missing_objects &&
oidset_contains(&revs->missing_commits, &commit->object.oid)) {
- finish_object__ma(&commit->object);
+ finish_object__ma(&commit->object, NULL);
return;
}
@@ -268,12 +315,11 @@ static void show_commit(struct commit *commit, void *data)
finish_commit(commit);
}
-static int finish_object(struct object *obj, const char *name UNUSED,
- void *cb_data)
+static int finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (oid_object_info_extended(the_repository, &obj->oid, NULL, 0) < 0) {
- finish_object__ma(obj);
+ finish_object__ma(obj, name);
return 1;
}
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
@@ -414,6 +460,12 @@ static inline int parse_missing_action_value(const char *value)
return 1;
}
+ if (!strcmp(value, "print-info")) {
+ arg_missing_action = MA_PRINT_INFO;
+ fetch_if_missing = 0;
+ return 1;
+ }
+
if (!strcmp(value, "allow-promisor")) {
arg_missing_action = MA_ALLOW_PROMISOR;
fetch_if_missing = 0;
@@ -781,10 +833,18 @@ int cmd_rev_list(int argc,
if (arg_print_omitted)
oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
- if (arg_missing_action == MA_PRINT) {
- oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ if (arg_missing_action == MA_PRINT ||
+ arg_missing_action == MA_PRINT_INFO) {
+ struct oidset_iter iter;
+ struct object_id *oid;
+
+ oidmap_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+ oidset_iter_init(&revs.missing_commits, &iter);
+
/* Add missing tips */
- oidset_insert_from_set(&missing_objects, &revs.missing_commits);
+ while ((oid = oidset_iter_next(&iter)))
+ add_missing_object_entry(oid, NULL);
+
oidset_clear(&revs.missing_commits);
}
@@ -800,13 +860,20 @@ int cmd_rev_list(int argc,
printf("~%s\n", oid_to_hex(oid));
oidset_clear(&omitted_objects);
}
- if (arg_missing_action == MA_PRINT) {
- struct oidset_iter iter;
- struct object_id *oid;
- oidset_iter_init(&missing_objects, &iter);
- while ((oid = oidset_iter_next(&iter)))
- printf("?%s\n", oid_to_hex(oid));
- oidset_clear(&missing_objects);
+ if (arg_missing_action == MA_PRINT ||
+ arg_missing_action == MA_PRINT_INFO) {
+ struct missing_objects_map_entry *entry;
+ struct oidmap_iter iter;
+
+ oidmap_iter_init(&missing_objects, &iter);
+
+ while ((entry = oidmap_iter_next(&iter))) {
+ print_missing_object(entry, arg_missing_action ==
+ MA_PRINT_INFO);
+ free((void *)entry->path);
+ }
+
+ oidmap_free(&missing_objects, true);
}
stop_progress(&progress);
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 7553a9cca2..38afca6f09 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -145,4 +145,56 @@ do
done
done
+for obj in "HEAD~1" "HEAD^{tree}" "HEAD:foo" "HEAD:foo/bar" "HEAD:baz baz"
+do
+ test_expect_success "--missing=print-info with missing '$obj'" '
+ test_when_finished rm -rf missing-info &&
+
+ git init missing-info &&
+ (
+ cd missing-info &&
+ git commit --allow-empty -m first &&
+
+ mkdir foo &&
+ echo bar >foo/bar &&
+ echo baz >"baz baz" &&
+ echo bat >bat\" &&
+ git add -A &&
+ git commit -m second &&
+
+ oid="$(git rev-parse "$obj")" &&
+ path=".git/objects/$(test_oid_to_path $oid)" &&
+
+ case $obj in
+ HEAD:foo)
+ path_info=" path=foo"
+ ;;
+ HEAD:foo/bar)
+ path_info=" path=foo/bar"
+ ;;
+ "HEAD:baz baz")
+ path_info=" path=\"baz baz\""
+ ;;
+ "HEAD:bat\"")
+ path_info=" path=\"bat\\\"\""
+ ;;
+ esac &&
+
+ # Before the object is made missing, we use rev-list to
+ # get the expected oids.
+ git rev-list --objects --no-object-names \
+ HEAD ^"$obj" >expect.raw &&
+ echo "?$oid$path_info" >>expect.raw &&
+
+ mv "$path" "$path.hidden" &&
+ git rev-list --objects --no-object-names \
+ --missing=print-info HEAD >actual.raw &&
+
+ sort actual.raw >actual &&
+ sort expect.raw >expect &&
+ test_cmp expect actual
+ )
+ '
+done
+
test_done
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH v4 2/2] rev-list: extend print-info to print missing object type
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
2025-02-05 0:41 ` [PATCH v4 1/2] rev-list: add print-info action to print missing object path Justin Tobler
@ 2025-02-05 0:41 ` Justin Tobler
2025-02-05 10:35 ` [PATCH v4 0/2] rev-list: print additional missing object information Christian Couder
2025-02-05 13:18 ` Junio C Hamano
3 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-05 0:41 UTC (permalink / raw)
To: git; +Cc: christian.couder, phillip.wood123, Justin Tobler
Additional information about missing objects found in git-rev-list(1)
can be printed by specifying the `print-info` missing action for the
`--missing` option. Extend this action to also print missing object type
information inferred from its containing object. This token follows the
form `type=<type>` and specifies the expected object type of the missing
object.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/rev-list-options.txt | 3 +++
builtin/rev-list.c | 11 ++++++++---
t/t6022-rev-list-missing.sh | 3 ++-
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 0bea9d4ad3..f10f78c600 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -1038,6 +1038,9 @@ one of the following:
* The `path=<path>` shows the path of the missing object inferred from a
containing object. A path containing SP or special characters is enclosed in
double-quotes in the C style as needed.
++
+* The `type=<type>` shows the type of the missing object inferred from a
+ containing object.
--
+
If some tips passed to the traversal are missing, they will be
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 00f157eb68..ead43a34ef 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -79,6 +79,7 @@ static int arg_print_omitted; /* print objects omitted by filter */
struct missing_objects_map_entry {
struct oidmap_entry entry;
const char *path;
+ unsigned type;
};
static struct oidmap missing_objects;
enum missing_action {
@@ -109,7 +110,8 @@ static off_t get_object_disk_usage(struct object *obj)
return size;
}
-static void add_missing_object_entry(struct object_id *oid, const char *path)
+static void add_missing_object_entry(struct object_id *oid, const char *path,
+ unsigned type)
{
struct missing_objects_map_entry *entry;
@@ -118,6 +120,7 @@ static void add_missing_object_entry(struct object_id *oid, const char *path)
CALLOC_ARRAY(entry, 1);
entry->entry.oid = *oid;
+ entry->type = type;
if (path)
entry->path = xstrdup(path);
oidmap_put(&missing_objects, entry);
@@ -142,6 +145,8 @@ static void print_missing_object(struct missing_objects_map_entry *entry,
strbuf_release(&path);
}
+ if (entry->type)
+ strbuf_addf(&sb, " type=%s", type_name(entry->type));
printf("?%s%s\n", oid_to_hex(&entry->entry.oid), sb.buf);
strbuf_release(&sb);
@@ -166,7 +171,7 @@ static inline void finish_object__ma(struct object *obj, const char *name)
case MA_PRINT:
case MA_PRINT_INFO:
- add_missing_object_entry(&obj->oid, name);
+ add_missing_object_entry(&obj->oid, name, obj->type);
return;
case MA_ALLOW_PROMISOR:
@@ -843,7 +848,7 @@ int cmd_rev_list(int argc,
/* Add missing tips */
while ((oid = oidset_iter_next(&iter)))
- add_missing_object_entry(oid, NULL);
+ add_missing_object_entry(oid, NULL, 0);
oidset_clear(&revs.missing_commits);
}
diff --git a/t/t6022-rev-list-missing.sh b/t/t6022-rev-list-missing.sh
index 38afca6f09..3e2790d4c8 100755
--- a/t/t6022-rev-list-missing.sh
+++ b/t/t6022-rev-list-missing.sh
@@ -164,6 +164,7 @@ do
oid="$(git rev-parse "$obj")" &&
path=".git/objects/$(test_oid_to_path $oid)" &&
+ type_info=" type=$(git cat-file -t $oid)" &&
case $obj in
HEAD:foo)
@@ -184,7 +185,7 @@ do
# get the expected oids.
git rev-list --objects --no-object-names \
HEAD ^"$obj" >expect.raw &&
- echo "?$oid$path_info" >>expect.raw &&
+ echo "?$oid$path_info$type_info" >>expect.raw &&
mv "$path" "$path.hidden" &&
git rev-list --objects --no-object-names \
--
2.48.1.157.g3b0d05c4a7
^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [PATCH v4 0/2] rev-list: print additional missing object information
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
2025-02-05 0:41 ` [PATCH v4 1/2] rev-list: add print-info action to print missing object path Justin Tobler
2025-02-05 0:41 ` [PATCH v4 2/2] rev-list: extend print-info to print missing object type Justin Tobler
@ 2025-02-05 10:35 ` Christian Couder
2025-02-05 17:18 ` Justin Tobler
2025-02-05 13:18 ` Junio C Hamano
3 siblings, 1 reply; 37+ messages in thread
From: Christian Couder @ 2025-02-05 10:35 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, phillip.wood123, Junio C Hamano
On Wed, Feb 5, 2025 at 1:45 AM Justin Tobler <jltobler@gmail.com> wrote:
> Changes in V4:
>
> - The core.quotePath behavior is no longer force enabled for the missing
> info values. Consequently the first two patches from the previous
> version are dropped.
This v4 looks good to me. Ack!
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v4 0/2] rev-list: print additional missing object information
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
` (2 preceding siblings ...)
2025-02-05 10:35 ` [PATCH v4 0/2] rev-list: print additional missing object information Christian Couder
@ 2025-02-05 13:18 ` Junio C Hamano
2025-02-05 17:17 ` Justin Tobler
3 siblings, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2025-02-05 13:18 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, christian.couder, phillip.wood123
Justin Tobler <jltobler@gmail.com> writes:
> One concern I currently have with this quoting approach is that it is a
> bit more challenging to machine parse compared to something like using a
> null byte to delimit between missing info. One option is, in a followup
> series, introduce a git-for-each-ref(1) style format syntax. Maybe
> something like: `--missing=print-info:%(path)%00%(type)`. I'm curious if
> anyone may have thoughts around this.
Would it be so bad if we said that in -z mode with --info option,
each record is terminated with two NUL bytes, and elements on a list
of var=value pairs have a single NUL in between, or something silly
like that? The point is to get away with just a fixed format,
without any customization.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v4 0/2] rev-list: print additional missing object information
2025-02-05 13:18 ` Junio C Hamano
@ 2025-02-05 17:17 ` Justin Tobler
2025-02-05 18:29 ` Junio C Hamano
0 siblings, 1 reply; 37+ messages in thread
From: Justin Tobler @ 2025-02-05 17:17 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, christian.couder, phillip.wood123
On 25/02/05 05:18AM, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:
>
> > One concern I currently have with this quoting approach is that it is a
> > bit more challenging to machine parse compared to something like using a
> > null byte to delimit between missing info. One option is, in a followup
> > series, introduce a git-for-each-ref(1) style format syntax. Maybe
> > something like: `--missing=print-info:%(path)%00%(type)`. I'm curious if
> > anyone may have thoughts around this.
>
> Would it be so bad if we said that in -z mode with --info option,
> each record is terminated with two NUL bytes, and elements on a list
> of var=value pairs have a single NUL in between, or something silly
> like that? The point is to get away with just a fixed format,
> without any customization.
I agree that some sort of fixed format would be preferable as it's less
complex while also being simpler to implement. I originally considered
using NUL but realized a single NUL byte to delimit between entries
wouldn't be sufficient to determine where each record would end. Using
two NUL bytes next to each other to mark the end of a record would work
though.
Since even a normal rev-list record may have an object name entry in
addition to its OID when the `--objects` option is set, maybe we could
introduce a `-z` option that always terminates a record with two NUL
bytes?
The output for `git rev-list -z --objects --missing=print-info` could
look something like the following (no LF at EOL):
6aa71444d3d41315509c3f2cfe2d45d86cea20d7<NUL><NUL>
f009994f5d7fc97c1e87b4dc7ad69057a07e85c4<NUL>foo/bar<NUL><NUL>
?f10f78c60046b2be841c9e2403960663439296c3<NUL>path=foo/bar/baz<NUL>type=blob<NUL><NUL>
?ead43a34efd775b58d6b3e86db6bc71bbedd2c1c<NUL>path=foo/bar/baz 2<NUL>type=blob<NUL><NUL>
Having two NUL bytes to delimit between records might be a bit odd in
the common case for git-rev-list(1) without the `--objects` and
`--missing` options since we would only expect a list of OIDs. Having
consistent `-z` option output irrespective of other options might be
preferable though.
If this approach seems reasonable, I can do so in a followup series.
Thanks
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v4 0/2] rev-list: print additional missing object information
2025-02-05 10:35 ` [PATCH v4 0/2] rev-list: print additional missing object information Christian Couder
@ 2025-02-05 17:18 ` Justin Tobler
0 siblings, 0 replies; 37+ messages in thread
From: Justin Tobler @ 2025-02-05 17:18 UTC (permalink / raw)
To: Christian Couder; +Cc: git, phillip.wood123, Junio C Hamano
On 25/02/05 11:35AM, Christian Couder wrote:
> On Wed, Feb 5, 2025 at 1:45 AM Justin Tobler <jltobler@gmail.com> wrote:
>
> > Changes in V4:
> >
> > - The core.quotePath behavior is no longer force enabled for the missing
> > info values. Consequently the first two patches from the previous
> > version are dropped.
>
> This v4 looks good to me. Ack!
Thanks for the review!
-Justin
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v4 0/2] rev-list: print additional missing object information
2025-02-05 17:17 ` Justin Tobler
@ 2025-02-05 18:29 ` Junio C Hamano
0 siblings, 0 replies; 37+ messages in thread
From: Junio C Hamano @ 2025-02-05 18:29 UTC (permalink / raw)
To: Justin Tobler; +Cc: git, christian.couder, phillip.wood123
Justin Tobler <jltobler@gmail.com> writes:
> wouldn't be sufficient to determine where each record would end. Using
> two NUL bytes next to each other to mark the end of a record would work
> though.
I think we already use that convention elsewhere, and that is why I
brought it up as a potential approach to take.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v3 2/4] quote: add quote_path() flag to ignore config
2025-02-04 22:39 ` Justin Tobler
@ 2025-02-11 16:51 ` Phillip Wood
0 siblings, 0 replies; 37+ messages in thread
From: Phillip Wood @ 2025-02-11 16:51 UTC (permalink / raw)
To: Justin Tobler, phillip.wood; +Cc: git, christian.couder
Hi Justin
On 04/02/2025 22:39, Justin Tobler wrote:
>
> You are correct that it isn't neccesary. From a previous discussion, I
> initally thought it would preferrable to have consistent output for this
> plumbing operation, but as you pointed out other plumbing commands
> respect core.quotePath. I'll drop these first two patches in the next
> version.
That's great, I'm afraid I don't know enough about the rev-list
machinery to comment on the meat of this series.
Best Wishes
Phillip
^ permalink raw reply [flat|nested] 37+ messages in thread
end of thread, other threads:[~2025-02-11 16:51 UTC | newest]
Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-08 3:40 [PATCH] rev-list: print missing object type with --missing=print-type Justin Tobler
2025-01-08 10:08 ` Christian Couder
2025-01-08 22:28 ` Justin Tobler
2025-01-08 15:17 ` Junio C Hamano
2025-01-08 22:18 ` Justin Tobler
2025-01-08 22:43 ` Junio C Hamano
2025-01-08 23:13 ` Justin Tobler
2025-01-10 5:34 ` [PATCH v2 0/2] rev-list: print additional missing object information Justin Tobler
2025-02-01 20:16 ` [PATCH v3 0/4] " Justin Tobler
2025-02-01 20:16 ` [PATCH v3 1/4] quote: add c quote flag to ignore core.quotePath Justin Tobler
2025-02-03 9:51 ` Christian Couder
2025-02-03 22:14 ` Junio C Hamano
2025-02-03 22:33 ` Junio C Hamano
2025-02-04 16:40 ` Junio C Hamano
2025-02-04 22:50 ` Justin Tobler
2025-02-01 20:16 ` [PATCH v3 2/4] quote: add quote_path() flag to ignore config Justin Tobler
2025-02-02 10:52 ` Phillip Wood
2025-02-04 22:39 ` Justin Tobler
2025-02-11 16:51 ` Phillip Wood
2025-02-03 10:07 ` Christian Couder
2025-02-03 22:52 ` Junio C Hamano
2025-02-01 20:16 ` [PATCH v3 3/4] rev-list: add print-info action to print missing object path Justin Tobler
2025-02-01 20:16 ` [PATCH v3 4/4] rev-list: extend print-info to print missing object type Justin Tobler
2025-02-03 10:45 ` [PATCH v3 0/4] rev-list: print additional missing object information Christian Couder
2025-02-04 22:51 ` Justin Tobler
2025-02-05 0:41 ` [PATCH v4 0/2] " Justin Tobler
2025-02-05 0:41 ` [PATCH v4 1/2] rev-list: add print-info action to print missing object path Justin Tobler
2025-02-05 0:41 ` [PATCH v4 2/2] rev-list: extend print-info to print missing object type Justin Tobler
2025-02-05 10:35 ` [PATCH v4 0/2] rev-list: print additional missing object information Christian Couder
2025-02-05 17:18 ` Justin Tobler
2025-02-05 13:18 ` Junio C Hamano
2025-02-05 17:17 ` Justin Tobler
2025-02-05 18:29 ` Junio C Hamano
2025-01-10 5:34 ` [PATCH v2 1/2] rev-list: add --missing-info to print missing object path Justin Tobler
2025-01-10 8:47 ` Christian Couder
2025-01-10 15:22 ` Junio C Hamano
2025-01-10 5:34 ` [PATCH v2 2/2] rev-list: extend --missing-info to print missing object type Justin Tobler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).