All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Riesen <raa.lkml@gmail.com>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org,
	Johannes Schindelin <Johannes.Schindelin@gmx.de>,
	Mikael Magnusson <mikachu@gmail.com>
Subject: [PATCH] git wrapper: DWIM mistyped commands
Date: Sun, 31 Aug 2008 15:50:23 +0200	[thread overview]
Message-ID: <20080831135023.GA6616@blimp.local> (raw)
In-Reply-To: <7vprnqifd2.fsf@gitster.siamese.dyndns.org> <237967ef0808300333t2cd4e354xd461f7bfead40f4c@mail.gmail.com>

From: Johannes Schindelin <Johannes.Schindelin@gmx.de>

This patch introduces a modified Damerau-Levenshtein algorithm into
Git's code base, and uses it with the following penalties to show some
similar commands when an unknown command was encountered:

	swap = 0, insertion = 1, substitution = 2, deletion = 4

A typical output would now look like this:

	$ git sm
	git: 'sm' is not a git-command. See 'git --help'.

	Did you mean one of these?
		am
		rm

The cut-off is at similarity rating 6, which was empirically determined
to give sensible results.

As a convenience, if there is only one candidate, Git continues under
the assumption that the user mistyped it.  Example:

	$ git reabse
	WARNING: You called a Git program named 'reabse', which does
	not exist.
	Continuing under the assumption that you meant 'rebase'
	[...]

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
---

Junio C Hamano, Sat, Aug 30, 2008 19:26:17 +0200:
> I think you do not need the file-scope static levenshtein_cmd anymore with
> this change, if you make similarity() take two command names.  No?

Yes :)

> Please reroll the whole f66dd34 (git wrapper: DWIM mistyped commands,
> 2008-08-28), as it is not part of any solid integration branch yet.

I think I better reroll (now) both

> You might also want to update the commit log message to talk about the
> "len" reuse hack, but you already have in-code comment which might be
> sufficient.

I believe it is (and I added one against the member in the
declaration)

Mikael, this also might fix the crash you're seeing: the heap was
corrupted by clean_cmdnames(&other_cmds) names members of which were
moved to main_cmds.

 Makefile      |    2 +
 builtin.h     |    2 +-
 git.c         |    4 ++-
 help.c        |   72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 help.h        |    2 +-
 levenshtein.c |   47 +++++++++++++++++++++++++++++++++++++
 levenshtein.h |    8 ++++++
 7 files changed, 133 insertions(+), 4 deletions(-)
 create mode 100644 levenshtein.c
 create mode 100644 levenshtein.h

diff --git a/Makefile b/Makefile
index bf400e6..3daa6dc 100644
--- a/Makefile
+++ b/Makefile
@@ -358,6 +358,7 @@ LIB_H += graph.h
 LIB_H += grep.h
 LIB_H += hash.h
 LIB_H += help.h
+LIB_H += levenshtein.h
 LIB_H += list-objects.h
 LIB_H += ll-merge.h
 LIB_H += log-tree.h
@@ -433,6 +434,7 @@ LIB_OBJS += hash.o
 LIB_OBJS += help.o
 LIB_OBJS += ident.o
 LIB_OBJS += interpolate.o
+LIB_OBJS += levenshtein.o
 LIB_OBJS += list-objects.o
 LIB_OBJS += ll-merge.o
 LIB_OBJS += lockfile.o
diff --git a/builtin.h b/builtin.h
index f3502d3..e67cb20 100644
--- a/builtin.h
+++ b/builtin.h
@@ -11,7 +11,7 @@ extern const char git_usage_string[];
 extern const char git_more_info_string[];
 
 extern void list_common_cmds_help(void);
-extern void help_unknown_cmd(const char *cmd);
+extern const char *help_unknown_cmd(const char *cmd);
 extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int fmt_merge_msg(int merge_summary, struct strbuf *in,
diff --git a/git.c b/git.c
index 37b1d76..54c5bfa 100644
--- a/git.c
+++ b/git.c
@@ -499,7 +499,9 @@ int main(int argc, const char **argv)
 				cmd, argv[0]);
 			exit(1);
 		}
-		help_unknown_cmd(cmd);
+		argv[0] = help_unknown_cmd(cmd);
+		handle_internal_command(argc, argv);
+		execv_dashed_external(argv);
 	}
 
 	fprintf(stderr, "Failed to run command '%s': %s\n",
diff --git a/help.c b/help.c
index b278257..aaba809 100644
--- a/help.c
+++ b/help.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "builtin.h"
 #include "exec_cmd.h"
+#include "levenshtein.h"
 #include "help.h"
 
 /* most GUI terminals set COLUMNS (although some don't export it) */
@@ -37,6 +38,16 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len)
 	cmds->names[cmds->cnt++] = ent;
 }
 
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+	int i;
+	for (i = 0; i < cmds->cnt; ++i)
+		free(cmds->names[i]);
+	free(cmds->names);
+	cmds->cnt = 0;
+	cmds->alloc = 0;
+}
+
 static int cmdname_compare(const void *a_, const void *b_)
 {
 	struct cmdname *a = *(struct cmdname **)a_;
@@ -250,9 +261,68 @@ int is_in_cmdlist(struct cmdnames *c, const char *s)
 	return 0;
 }
 
-void help_unknown_cmd(const char *cmd)
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+const char *help_unknown_cmd(const char *cmd)
 {
+	int i, n, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+
+	load_command_list("git-", &main_cmds, &other_cmds);
+
+	ALLOC_GROW(main_cmds.names, main_cmds.cnt + other_cmds.cnt,
+		   main_cmds.alloc);
+	memcpy(main_cmds.names + main_cmds.cnt, other_cmds.names,
+	       other_cmds.cnt * sizeof(other_cmds.names[0]));
+	main_cmds.cnt += other_cmds.cnt;
+	free(other_cmds.names);
+
+	/* This reuses cmdname->len for similarity index */
+	for (i = 0; i < main_cmds.cnt; ++i)
+		main_cmds.names[i]->len =
+			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(*main_cmds.names), levenshtein_compare);
+
+	if (!main_cmds.cnt)
+		die ("Uh oh. Your system reports no Git commands at all.");
+
+	best_similarity = main_cmds.names[0]->len;
+	n = 1;
+	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+		++n;
+	if (n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a Git program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		return assumed;
+	}
+
 	fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd);
+
+	if (best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+
 	exit(1);
 }
 
diff --git a/help.h b/help.h
index 2733433..56bc154 100644
--- a/help.h
+++ b/help.h
@@ -5,7 +5,7 @@ struct cmdnames {
 	int alloc;
 	int cnt;
 	struct cmdname {
-		size_t len;
+		size_t len; /* also used for similarity index in help.c */
 		char name[FLEX_ARRAY];
 	} **names;
 };
diff --git a/levenshtein.c b/levenshtein.c
new file mode 100644
index 0000000..db52f2c
--- /dev/null
+++ b/levenshtein.c
@@ -0,0 +1,47 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = xmalloc(sizeof(int) * (len2 + 1));
+	int *row1 = xmalloc(sizeof(int) * (len2 + 1));
+	int *row2 = xmalloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/levenshtein.h b/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif
-- 
1.6.0.1.168.gdf6f0

  reply	other threads:[~2008-08-31 13:52 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-28 17:15 [PATCH] Remove calculation of the longest command name from where it is not used Alex Riesen, Alex Riesen
2008-08-28 21:27 ` [PATCH updated] git wrapper: DWIM mistyped commands Alex Riesen
2008-08-28 21:28   ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-08-29 10:11     ` Andreas Ericsson
2008-09-08  6:50     ` Junio C Hamano
2008-08-29 14:58   ` [PATCH updated] git wrapper: DWIM mistyped commands Mikael Magnusson
2008-08-30 10:12     ` Alex Riesen
2008-08-30 10:33       ` Mikael Magnusson
2008-08-31 13:50         ` Alex Riesen [this message]
2008-08-31 13:54           ` [PATCH] Add help.autocorrect to enable/disable autocorrecting Alex Riesen
2008-08-31 14:49             ` Matthieu Moy
2008-08-31 16:33               ` Junio C Hamano
2008-09-01 14:42           ` [PATCH] git wrapper: DWIM mistyped commands Mikael Magnusson
2008-08-31 13:57         ` [PATCH updated] " Alex Riesen
2008-08-30 15:36   ` Junio C Hamano
2008-08-30 16:44     ` Alex Riesen
2008-08-30 17:13       ` [PATCH] Reuse cmdname->len to store pre-calculated similarity indexes Alex Riesen
2008-08-30 17:26         ` Junio C Hamano
     [not found]   ` <a2075f4c0808301510g1af01b14kd58da12dc2e80f93@mail.gmail.com>
2008-08-30 22:17     ` [PATCH updated] git wrapper: DWIM mistyped commands Felipe Carvalho Oliveira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080831135023.GA6616@blimp.local \
    --to=raa.lkml@gmail.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mikachu@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.