* [PATCH 1/2] shortlog: introduce `--group-filter` to restrict output
2023-06-07 23:02 [PATCH 0/2] shortlog: introduce --email-only, --group-filter options Taylor Blau
@ 2023-06-07 23:02 ` Taylor Blau
2023-06-08 14:34 ` Derrick Stolee
2023-06-07 23:02 ` [PATCH 2/2] shortlog: introduce `--email-only` to only show emails Taylor Blau
1 sibling, 1 reply; 8+ messages in thread
From: Taylor Blau @ 2023-06-07 23:02 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Victoria Dye, Jeff King, Junio C Hamano
Sometimes it is useful to ask, "how many commits have I authored or been
mentioned in via the Co-authored-by trailer"? `git shortlog` is a
reasonable tool for this, and the answer can easily be obtained by
running:
$ git shortlog -ns --group=author --group=trailer:Co-authored-by
and reading off the corresponding value from its output. But what if you
want to know which commits contribute to the above count? You can drop
the `-s` option and parse through the results, but you'll have to skip
past everything you don't want (and stop reading after matching
everything you do want).
That is a script-able task, but it is cumbersome, and potentially very
slow, if there is a large amount of output that does not match the given
query.
Instead, this patch introduces `--group-filter` in order to restrict the
output of `git shortlog` to only matching group(s). Items match if they
are in a group which is strictly equal to one of the specified filters.
This means that you could easily view the hashes of all commits you
either wrote or co-authored with something like:
$ git shortlog -n --group=author --group=trailer:Co-authored-by \
--group-filter="$(git config user.name)"
When filtering just by trailers, it is tempting to want to introduce a
new grep mode for matching a given trailer, like `--author=<pattern>`
for matching the author header. But this would not be suitable for the
above, since we want commits which match either the author or the
Co-authored-by trailer, not ones which match both.
An alternative approach might be to implement trailer filtering as
above in revision.c, and show commits matching either the `--author`
value or some hypothetical `--trailer` filter. That would give shortlog
fewer commits, which may improve its performance. But it would restrict
the interpretation of these options to be an OR (i.e. show commits
matching either the `--author` or `--trailer` field).
In fact, this is already not possible to do, since the `--author` and
`--committer` options are documented as:
> Limit the commits output to ones with author/committer header lines
> that match the specified pattern
So introducing another option which changes the behavior of existing
ones is a non-starter.
Instead, `git shortlog` will process more commits than necessary. But
this is a marginal cost, since implementing the hypothetical revision
options from above as an OR would mean that revision.c has to process
every commit anyway.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/git-shortlog.txt | 5 ++++
builtin/shortlog.c | 20 ++++++++++++-
shortlog.h | 2 ++
t/t4201-shortlog.sh | 54 ++++++++++++++++++++++++++++++++++
4 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt
index 7d0277d033..dab6d09648 100644
--- a/Documentation/git-shortlog.txt
+++ b/Documentation/git-shortlog.txt
@@ -97,6 +97,11 @@ counts both authors and co-authors.
If width is `0` (zero) then indent the lines of the output without wrapping
them.
+--group-filter=<group>::
+ Only show output from the given group. If given more than once,
+ show output from any of the previously specified groups. May be
+ cleared with `--no-group-filter`.
+
<revision-range>::
Show only commits in the specified revision range. When no
<revision-range> is specified, it defaults to `HEAD` (i.e. the
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 46f4e0832a..679db22c57 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -365,6 +365,7 @@ void shortlog_init(struct shortlog *log)
log->trailers.strdup_strings = 1;
log->trailers.cmp = strcasecmp;
log->format.strdup_strings = 1;
+ log->group_filter.strdup_strings = 1;
}
void shortlog_finish_setup(struct shortlog *log)
@@ -377,6 +378,7 @@ void shortlog_finish_setup(struct shortlog *log)
log->email ? "%cN <%cE>" : "%cN");
string_list_sort(&log->trailers);
+ string_list_sort(&log->group_filter);
}
int cmd_shortlog(int argc, const char **argv, const char *prefix)
@@ -400,6 +402,8 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
&parse_wrap_args),
OPT_CALLBACK(0, "group", &log, N_("field"),
N_("group by field"), parse_group_option),
+ OPT_STRING_LIST(0, "group-filter", &log.group_filter,
+ N_("group"), N_("only show matching groups")),
OPT_END(),
};
@@ -476,6 +480,13 @@ static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s,
strbuf_addch(sb, '\n');
}
+static int want_shortlog_group(struct shortlog *log, const char *group)
+{
+ if (!log->group_filter.nr)
+ return 1;
+ return string_list_has_string(&log->group_filter, group);
+}
+
void shortlog_output(struct shortlog *log)
{
size_t i, j;
@@ -486,6 +497,9 @@ void shortlog_output(struct shortlog *log)
log->summary ? compare_by_counter : compare_by_list);
for (i = 0; i < log->list.nr; i++) {
const struct string_list_item *item = &log->list.items[i];
+ if (!want_shortlog_group(log, item->string))
+ goto next;
+
if (log->summary) {
fprintf(log->file, "%6d\t%s\n",
(int)UTIL_TO_INT(item), item->string);
@@ -505,11 +519,15 @@ void shortlog_output(struct shortlog *log)
fprintf(log->file, " %s\n", msg);
}
putc('\n', log->file);
+ }
+
+next:
+ if (!log->summary) {
+ struct string_list *onelines = item->util;
onelines->strdup_strings = 1;
string_list_clear(onelines, 0);
free(onelines);
}
-
log->list.items[i].util = NULL;
}
diff --git a/shortlog.h b/shortlog.h
index 28d04f951a..8ebee0e2d6 100644
--- a/shortlog.h
+++ b/shortlog.h
@@ -18,6 +18,8 @@ struct shortlog {
int abbrev;
struct date_mode date_mode;
+ struct string_list group_filter;
+
enum {
SHORTLOG_GROUP_AUTHOR = (1 << 0),
SHORTLOG_GROUP_COMMITTER = (1 << 1),
diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh
index 8e4effebdb..0695c42ca8 100755
--- a/t/t4201-shortlog.sh
+++ b/t/t4201-shortlog.sh
@@ -346,6 +346,60 @@ test_expect_success 'shortlog can match multiple groups' '
test_cmp expect actual
'
+test_expect_success '--group-filter shows only matching groups (single groups)' '
+ cat >expect <<-\EOF &&
+ 1 A U Thor
+ EOF
+ git shortlog -ns \
+ --group=trailer:another-trailer \
+ --group-filter="A U Thor" \
+ -2 HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '--group-filter shows only matching groups (multiple groups)' '
+ cat >expect <<-\EOF &&
+ 2 A U Thor
+ EOF
+ git shortlog -ns \
+ --group=author \
+ --group=trailer:some-trailer \
+ --group=trailer:another-trailer \
+ --group-filter="A U Thor" \
+ -2 HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '--group-filter can be specified more than once' '
+ cat >expect <<-\EOF &&
+ 2 User B
+ 1 User A
+ EOF
+ git shortlog -ns \
+ --group=author \
+ --group=trailer:some-trailer \
+ --group=trailer:another-trailer \
+ --group-filter="User A" \
+ --group-filter="User B" \
+ -2 HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '--no-group-filter reset group filters' '
+ cat >expect <<-\EOF &&
+ 2 A U Thor
+ 2 User B
+ 1 User A
+ EOF
+ git shortlog -ns \
+ --group=author \
+ --group=trailer:some-trailer \
+ --group=trailer:another-trailer \
+ --group-filter="A U Thor" --no-group-filter \
+ -2 HEAD >actual &&
+ test_cmp expect actual
+'
+
test_expect_success 'shortlog can match multiple format groups' '
GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME" \
git commit --allow-empty -m "identical names" &&
--
2.41.0.1.gf123b68cb8
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] shortlog: introduce `--email-only` to only show emails
2023-06-07 23:02 [PATCH 0/2] shortlog: introduce --email-only, --group-filter options Taylor Blau
2023-06-07 23:02 ` [PATCH 1/2] shortlog: introduce `--group-filter` to restrict output Taylor Blau
@ 2023-06-07 23:02 ` Taylor Blau
2023-06-08 7:35 ` Johannes Sixt
1 sibling, 1 reply; 8+ messages in thread
From: Taylor Blau @ 2023-06-07 23:02 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Victoria Dye, Jeff King, Junio C Hamano
When a shortlog caller wants to group output by, say, author email, they
can easily express this with:
$ git shortlog --group=format:%ae
and restrict output to specific email(s) with the new `--group-filter`
option introduced by the previous commit.
But they are not able to apply the same treatment to identities that
appear in trailers. Doing:
$ git shortlog -e --group=format:%ae --group=trailer:Co-authored-by
will produce funky results, interspersing proper emails with full "Name
<email>" identities from the Co-authored-by trailer (or anything else
that might appear there), like:
461 me@ttaylorr.com
11 Taylor Blau <me@ttaylorr.com>
So if the caller wants to restrict output to a set of matching email
addresses (say, "me@ttaylorr.com"), they cannot do it with a
`--group-filter`, since it would discard the group "Taylor Blau
<me@ttaylorr.com>".
Introduce a new `--email-only` option, which extracts the email
component of an identity from all shortlog groups, including trailers.
It behaves similarly to the `-e` option, but replaces its output with
just the email component, instead of adding it on to the end.
Now, `shortlog` callers can perform:
$ git shortlog -s --group=author --group=trailer:Co-authored-by \
--email-only --group-filter="<me@ttaylorr.com>"
472 <me@ttaylorr.com>
to obtain the output they want.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/git-shortlog.txt | 5 +++++
builtin/shortlog.c | 33 ++++++++++++++++++++++++---------
shortlog.h | 1 +
t/t4201-shortlog.sh | 28 ++++++++++++++++++++++++++++
4 files changed, 58 insertions(+), 9 deletions(-)
diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt
index dab6d09648..160c11aead 100644
--- a/Documentation/git-shortlog.txt
+++ b/Documentation/git-shortlog.txt
@@ -39,6 +39,11 @@ OPTIONS
--email::
Show the email address of each author.
+--email-only::
+ Show only the email address of each author, or committer. If
+ using a different kind of group (e.g. trailers, custom format,
+ etc.), only values which contain name/email-pairs will be shown.
+
--format[=<format>]::
Instead of the commit subject, use some other information to
describe each commit. '<format>' can be any string accepted
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 679db22c57..bbe01a376f 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -108,9 +108,13 @@ static int parse_ident(struct shortlog *log,
maillen = ident.mail_end - ident.mail_begin;
map_user(&log->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
- strbuf_add(out, namebuf, namelen);
- if (log->email)
- strbuf_addf(out, " <%.*s>", (int)maillen, mailbuf);
+ if (log->email_only) {
+ strbuf_addf(out, "<%.*s>", (int)maillen, mailbuf);
+ } else {
+ strbuf_add(out, namebuf, namelen);
+ if (log->email)
+ strbuf_addf(out, " <%.*s>", (int)maillen, mailbuf);
+ }
return 0;
}
@@ -198,6 +202,8 @@ static void insert_records_from_trailers(struct shortlog *log,
strbuf_reset(&ident);
if (!parse_ident(log, &ident, value))
value = ident.buf;
+ else if (log->email_only)
+ continue;
if (!strset_add(dups, value))
continue;
@@ -370,12 +376,19 @@ void shortlog_init(struct shortlog *log)
void shortlog_finish_setup(struct shortlog *log)
{
- if (log->groups & SHORTLOG_GROUP_AUTHOR)
- string_list_append(&log->format,
- log->email ? "%aN <%aE>" : "%aN");
- if (log->groups & SHORTLOG_GROUP_COMMITTER)
- string_list_append(&log->format,
- log->email ? "%cN <%cE>" : "%cN");
+ if (log->email_only) {
+ if (log->groups & SHORTLOG_GROUP_AUTHOR)
+ string_list_append(&log->format, "<%aE>");
+ if (log->groups & SHORTLOG_GROUP_COMMITTER)
+ string_list_append(&log->format, "<%cE>");
+ } else {
+ if (log->groups & SHORTLOG_GROUP_AUTHOR)
+ string_list_append(&log->format,
+ log->email ? "%aN <%aE>" : "%aN");
+ if (log->groups & SHORTLOG_GROUP_COMMITTER)
+ string_list_append(&log->format,
+ log->email ? "%cN <%cE>" : "%cN");
+ }
string_list_sort(&log->trailers);
string_list_sort(&log->group_filter);
@@ -397,6 +410,8 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
N_("suppress commit descriptions, only provides commit count")),
OPT_BOOL('e', "email", &log.email,
N_("show the email address of each author")),
+ OPT_BOOL(0, "email-only", &log.email_only,
+ N_("only show the email address of each author")),
OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
N_("linewrap output"), PARSE_OPT_OPTARG,
&parse_wrap_args),
diff --git a/shortlog.h b/shortlog.h
index 8ebee0e2d6..3fb28639c3 100644
--- a/shortlog.h
+++ b/shortlog.h
@@ -30,6 +30,7 @@ struct shortlog {
struct string_list format;
int email;
+ int email_only;
struct string_list mailmap;
FILE *file;
};
diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh
index 0695c42ca8..d747a402ff 100755
--- a/t/t4201-shortlog.sh
+++ b/t/t4201-shortlog.sh
@@ -400,6 +400,34 @@ test_expect_success '--no-group-filter reset group filters' '
test_cmp expect actual
'
+test_expect_success '--email-only shows emails from author' '
+ cat >expect <<-\EOF &&
+ 2 <author@example.com>
+ EOF
+ git shortlog -ns --group=author --email-only -2 HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '--email-only shows emails from committer' '
+ cat >expect <<-\EOF &&
+ 2 <committer@example.com>
+ EOF
+ git shortlog -ns --group=committer --email-only -2 HEAD >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '--email-only shows emails from trailers with idents' '
+ cat >expect <<-\EOF &&
+ 1 <a@example.com>
+ 1 <b@example.com>
+ EOF
+ # at this point, HEAD~3 has a trailer "Repeated-trailer: Foo",
+ # which is not shown here since it cannot be parsed as an ident
+ git shortlog -ns --group=trailer:some-trailer --email-only -3 \
+ HEAD >actual &&
+ test_cmp expect actual
+'
+
test_expect_success 'shortlog can match multiple format groups' '
GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME" \
git commit --allow-empty -m "identical names" &&
--
2.41.0.1.gf123b68cb8
^ permalink raw reply related [flat|nested] 8+ messages in thread