From: Alex Riesen <raa.lkml@gmail.com>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org,
Johannes Schindelin <Johannes.Schindelin@gmx.de>,
Mikael Magnusson <mikachu@gmail.com>
Subject: [PATCH] git wrapper: DWIM mistyped commands
Date: Sun, 31 Aug 2008 15:50:23 +0200 [thread overview]
Message-ID: <20080831135023.GA6616@blimp.local> (raw)
In-Reply-To: <7vprnqifd2.fsf@gitster.siamese.dyndns.org> <237967ef0808300333t2cd4e354xd461f7bfead40f4c@mail.gmail.com>
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
This patch introduces a modified Damerau-Levenshtein algorithm into
Git's code base, and uses it with the following penalties to show some
similar commands when an unknown command was encountered:
swap = 0, insertion = 1, substitution = 2, deletion = 4
A typical output would now look like this:
$ git sm
git: 'sm' is not a git-command. See 'git --help'.
Did you mean one of these?
am
rm
The cut-off is at similarity rating 6, which was empirically determined
to give sensible results.
As a convenience, if there is only one candidate, Git continues under
the assumption that the user mistyped it. Example:
$ git reabse
WARNING: You called a Git program named 'reabse', which does
not exist.
Continuing under the assumption that you meant 'rebase'
[...]
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
---
Junio C Hamano, Sat, Aug 30, 2008 19:26:17 +0200:
> I think you do not need the file-scope static levenshtein_cmd anymore with
> this change, if you make similarity() take two command names. No?
Yes :)
> Please reroll the whole f66dd34 (git wrapper: DWIM mistyped commands,
> 2008-08-28), as it is not part of any solid integration branch yet.
I think I better reroll (now) both
> You might also want to update the commit log message to talk about the
> "len" reuse hack, but you already have in-code comment which might be
> sufficient.
I believe it is (and I added one against the member in the
declaration)
Mikael, this also might fix the crash you're seeing: the heap was
corrupted by clean_cmdnames(&other_cmds) names members of which were
moved to main_cmds.
Makefile | 2 +
builtin.h | 2 +-
git.c | 4 ++-
help.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
help.h | 2 +-
levenshtein.c | 47 +++++++++++++++++++++++++++++++++++++
levenshtein.h | 8 ++++++
7 files changed, 133 insertions(+), 4 deletions(-)
create mode 100644 levenshtein.c
create mode 100644 levenshtein.h
diff --git a/Makefile b/Makefile
index bf400e6..3daa6dc 100644
--- a/Makefile
+++ b/Makefile
@@ -358,6 +358,7 @@ LIB_H += graph.h
LIB_H += grep.h
LIB_H += hash.h
LIB_H += help.h
+LIB_H += levenshtein.h
LIB_H += list-objects.h
LIB_H += ll-merge.h
LIB_H += log-tree.h
@@ -433,6 +434,7 @@ LIB_OBJS += hash.o
LIB_OBJS += help.o
LIB_OBJS += ident.o
LIB_OBJS += interpolate.o
+LIB_OBJS += levenshtein.o
LIB_OBJS += list-objects.o
LIB_OBJS += ll-merge.o
LIB_OBJS += lockfile.o
diff --git a/builtin.h b/builtin.h
index f3502d3..e67cb20 100644
--- a/builtin.h
+++ b/builtin.h
@@ -11,7 +11,7 @@ extern const char git_usage_string[];
extern const char git_more_info_string[];
extern void list_common_cmds_help(void);
-extern void help_unknown_cmd(const char *cmd);
+extern const char *help_unknown_cmd(const char *cmd);
extern void prune_packed_objects(int);
extern int read_line_with_nul(char *buf, int size, FILE *file);
extern int fmt_merge_msg(int merge_summary, struct strbuf *in,
diff --git a/git.c b/git.c
index 37b1d76..54c5bfa 100644
--- a/git.c
+++ b/git.c
@@ -499,7 +499,9 @@ int main(int argc, const char **argv)
cmd, argv[0]);
exit(1);
}
- help_unknown_cmd(cmd);
+ argv[0] = help_unknown_cmd(cmd);
+ handle_internal_command(argc, argv);
+ execv_dashed_external(argv);
}
fprintf(stderr, "Failed to run command '%s': %s\n",
diff --git a/help.c b/help.c
index b278257..aaba809 100644
--- a/help.c
+++ b/help.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "builtin.h"
#include "exec_cmd.h"
+#include "levenshtein.h"
#include "help.h"
/* most GUI terminals set COLUMNS (although some don't export it) */
@@ -37,6 +38,16 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len)
cmds->names[cmds->cnt++] = ent;
}
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+ int i;
+ for (i = 0; i < cmds->cnt; ++i)
+ free(cmds->names[i]);
+ free(cmds->names);
+ cmds->cnt = 0;
+ cmds->alloc = 0;
+}
+
static int cmdname_compare(const void *a_, const void *b_)
{
struct cmdname *a = *(struct cmdname **)a_;
@@ -250,9 +261,68 @@ int is_in_cmdlist(struct cmdnames *c, const char *s)
return 0;
}
-void help_unknown_cmd(const char *cmd)
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+ const struct cmdname *const *c1 = p1, *const *c2 = p2;
+ const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+ int l1 = (*c1)->len;
+ int l2 = (*c2)->len;
+ return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+const char *help_unknown_cmd(const char *cmd)
{
+ int i, n, best_similarity = 0;
+ struct cmdnames main_cmds, other_cmds;
+
+ memset(&main_cmds, 0, sizeof(main_cmds));
+ memset(&other_cmds, 0, sizeof(main_cmds));
+
+ load_command_list("git-", &main_cmds, &other_cmds);
+
+ ALLOC_GROW(main_cmds.names, main_cmds.cnt + other_cmds.cnt,
+ main_cmds.alloc);
+ memcpy(main_cmds.names + main_cmds.cnt, other_cmds.names,
+ other_cmds.cnt * sizeof(other_cmds.names[0]));
+ main_cmds.cnt += other_cmds.cnt;
+ free(other_cmds.names);
+
+ /* This reuses cmdname->len for similarity index */
+ for (i = 0; i < main_cmds.cnt; ++i)
+ main_cmds.names[i]->len =
+ levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(*main_cmds.names), levenshtein_compare);
+
+ if (!main_cmds.cnt)
+ die ("Uh oh. Your system reports no Git commands at all.");
+
+ best_similarity = main_cmds.names[0]->len;
+ n = 1;
+ while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+ ++n;
+ if (n == 1) {
+ const char *assumed = main_cmds.names[0]->name;
+ main_cmds.names[0] = NULL;
+ clean_cmdnames(&main_cmds);
+ fprintf(stderr, "WARNING: You called a Git program named '%s', "
+ "which does not exist.\n"
+ "Continuing under the assumption that you meant '%s'\n",
+ cmd, assumed);
+ return assumed;
+ }
+
fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd);
+
+ if (best_similarity < 6) {
+ fprintf(stderr, "\nDid you mean %s?\n",
+ n < 2 ? "this": "one of these");
+
+ for (i = 0; i < n; i++)
+ fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+ }
+
exit(1);
}
diff --git a/help.h b/help.h
index 2733433..56bc154 100644
--- a/help.h
+++ b/help.h
@@ -5,7 +5,7 @@ struct cmdnames {
int alloc;
int cnt;
struct cmdname {
- size_t len;
+ size_t len; /* also used for similarity index in help.c */
char name[FLEX_ARRAY];
} **names;
};
diff --git a/levenshtein.c b/levenshtein.c
new file mode 100644
index 0000000..db52f2c
--- /dev/null
+++ b/levenshtein.c
@@ -0,0 +1,47 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+int levenshtein(const char *string1, const char *string2,
+ int w, int s, int a, int d)
+{
+ int len1 = strlen(string1), len2 = strlen(string2);
+ int *row0 = xmalloc(sizeof(int) * (len2 + 1));
+ int *row1 = xmalloc(sizeof(int) * (len2 + 1));
+ int *row2 = xmalloc(sizeof(int) * (len2 + 1));
+ int i, j;
+
+ for (j = 0; j <= len2; j++)
+ row1[j] = j * a;
+ for (i = 0; i < len1; i++) {
+ int *dummy;
+
+ row2[0] = (i + 1) * d;
+ for (j = 0; j < len2; j++) {
+ /* substitution */
+ row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+ /* swap */
+ if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+ string1[i] == string2[j - 1] &&
+ row2[j + 1] > row0[j - 1] + w)
+ row2[j + 1] = row0[j - 1] + w;
+ /* deletion */
+ if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d)
+ row2[j + 1] = row1[j + 1] + d;
+ /* insertion */
+ if (row2[j + 1] > row2[j] + a)
+ row2[j + 1] = row2[j] + a;
+ }
+
+ dummy = row0;
+ row0 = row1;
+ row1 = row2;
+ row2 = dummy;
+ }
+
+ i = row1[len2];
+ free(row0);
+ free(row1);
+ free(row2);
+
+ return i;
+}
diff --git a/levenshtein.h b/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+ int swap_penalty, int substition_penalty,
+ int insertion_penalty, int deletion_penalty);
+
+#endif
--
1.6.0.1.168.gdf6f0
next prev parent reply other threads:[~2008-08-31 13:52 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-28 17:15 [PATCH] Remove calculation of the longest command name from where it is not used Alex Riesen, Alex Riesen
2008-08-28 21:27 ` [PATCH updated] git wrapper: DWIM mistyped commands Alex Riesen
2008-08-28 21:28 ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-08-29 10:11 ` Andreas Ericsson
2008-09-08 6:50 ` Junio C Hamano
2008-08-29 14:58 ` [PATCH updated] git wrapper: DWIM mistyped commands Mikael Magnusson
2008-08-30 10:12 ` Alex Riesen
2008-08-30 10:33 ` Mikael Magnusson
2008-08-31 13:50 ` Alex Riesen [this message]
2008-08-31 13:54 ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-08-31 14:49 ` Matthieu Moy
2008-08-31 16:33 ` Junio C Hamano
2008-09-01 14:42 ` [PATCH] git wrapper: DWIM mistyped commands Mikael Magnusson
2008-08-31 13:57 ` [PATCH updated] " Alex Riesen
2008-08-30 15:36 ` Junio C Hamano
2008-08-30 16:44 ` Alex Riesen
2008-08-30 17:13 ` [PATCH] Reuse cmdname->len to store pre-calculated similarity indexes Alex Riesen
2008-08-30 17:26 ` Junio C Hamano
[not found] ` <a2075f4c0808301510g1af01b14kd58da12dc2e80f93@mail.gmail.com>
2008-08-30 22:17 ` [PATCH updated] git wrapper: DWIM mistyped commands Felipe Carvalho Oliveira
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080831135023.GA6616@blimp.local \
--to=raa.lkml@gmail.com \
--cc=Johannes.Schindelin@gmx.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mikachu@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).