git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] add a diff-files command
@ 2005-04-27 21:13 Nicolas Pitre
  2005-04-27 21:49 ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Nicolas Pitre @ 2005-04-27 21:13 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git


In the same spirit as diff-tree and diff-cache, here is a diff-files 
command that processes differences between the index cache and the 
working directory content.  It produces lists of files that are either 
changed (-c), deleted (-d) or outside (-o) from the current cache, or a 
combination of those, or all of them (-a).

The -p option can also be used to generate a patch describing the 
changes directly.

It also has the ability to accept exclude file patterns with -x and even 
a file containing a list of patterns to exclude with -X.  This is 
especially useful to use the famous dontdiff file when looking for 
uncommitted files in a compiled kernel tree.

Signed-off-by: Nicolas Pitre <nico@cam.org>

--- k/Makefile
+++ l/Makefile
@@ -18,7 +18,7 @@ PROG=   update-cache show-diff init-db w
 	cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
 	check-files ls-tree merge-base merge-cache unpack-file git-export \
 	diff-cache convert-cache http-pull rpush rpull rev-list git-mktag \
-	diff-tree-helper
+	diff-tree-helper diff-files
 
 all: $(PROG)
 
--- k/diff-files.c
+++ l/diff-files.c
@@ -0,0 +1,362 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+
+#include <dirent.h>
+#include <fnmatch.h>
+#include "cache.h"
+#include "diff.h"
+
+static const char *diff_files_usage = "diff-files [-a] [-c] [-d] [-o] [-p | -z]"
+				      " [-x <pattern>] [-X <file>] [paths...]";
+
+/* What paths are we interested in? */
+static int nr_paths = 0;
+static char **paths = NULL;
+static int *pathlens = NULL;
+
+static int nr_excludes;
+static const char **excludes;
+static int excludes_alloc;
+
+static void add_exclude(const char *string)
+{
+	if (nr_excludes == excludes_alloc) {
+		excludes_alloc = alloc_nr(excludes_alloc);
+		excludes = realloc(excludes, excludes_alloc*sizeof(char *));
+	}
+	excludes[nr_excludes++] = string;
+}
+
+static void add_excludes_from_file(const char *fname)
+{
+	int fd, i;
+	long size;
+	char *buf, *entry;
+
+	fd = open(fname, O_RDONLY);
+	if (fd < 0)
+		goto err;
+	size = lseek(fd, 0, SEEK_END);
+	if (size < 0)
+		goto err;
+	lseek(fd, 0, SEEK_SET);
+	if (size == 0) {
+		close(fd);
+		return;
+	}
+	buf = malloc(size);
+	if (!buf) {
+		errno = ENOMEM;
+		goto err;
+	}
+	if (read(fd, buf, size) != size)
+		goto err;
+	close(fd);
+
+	entry = buf;
+	for (i = 0; i < size; i++) {
+		if (buf[i] == '\n') {
+			if (entry != buf + i) {
+				buf[i] = 0;
+				add_exclude(entry);
+			}
+			entry = buf + i + 1;
+		}
+	}
+	return;
+
+err:	perror(fname);
+	exit(1);
+}
+
+/*
+ * See if name matches our specified paths and is not excluded.
+ * return value:
+ *	-1 if no match
+ *	0 if partial match (name is a directory component)
+ *	1 = exact match
+ *	2 = name is under a specified directory path with no excludes
+ */
+static int path_match(const char *name, int namelen)
+{
+	int i, ret;
+
+	/* fast case: no path list and no exclude list */
+	if (!nr_paths && !nr_excludes)
+		return 2;
+
+	ret = (nr_paths) ? -1 : 1;
+	for (i = 0; i < nr_paths; i++) {
+		int pathlen = pathlens[i];
+		if (pathlen == namelen &&
+		    strncmp(paths[i], name, pathlen) == 0) {
+			ret = 1;
+			break;
+		} else if (pathlen > namelen && 
+			   strncmp(paths[i], name, namelen) == 0 &&
+			   paths[i][namelen] == '/') {
+			ret = 0;
+			break;
+		} else if (pathlen < namelen &&
+			   strncmp(paths[i], name, pathlen) == 0 &&
+			   name[pathlen] == '/') {
+			ret = (nr_excludes) ? 1 : 2;
+			break;
+		}
+	}
+
+	if (ret >= 0 && nr_excludes) {
+		const char *basename = strrchr(name, '/');
+		basename = (basename) ? basename+1 : name;
+		for (i = 0; i < nr_excludes; i++) {
+			if (fnmatch(excludes[i], basename, 0) == 0) {
+				ret = -1;
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static const char **others;
+static int nr_others;
+static int others_alloc;
+
+static void add_name(const char *pathname, int len)
+{
+	char *name;
+
+	if (cache_name_pos(pathname, len) >= 0)
+		return;
+
+	if (nr_others == others_alloc) {
+		others_alloc = alloc_nr(others_alloc);
+		others = realloc(others, others_alloc*sizeof(char *));
+	}
+	name = malloc(len + 1);
+	memcpy(name, pathname, len + 1);
+	others[nr_others++] = name;
+}
+
+/*
+ * Read a directory tree. We currently ignore anything but
+ * directories and regular files. That's because git doesn't
+ * handle them at all yet. Maybe that will change some day.
+ *
+ * Also, we currently ignore all names starting with a dot.
+ * That likely will not change.
+ */
+static void read_directory(const char *path, const char *base, int baselen, int match)
+{
+	DIR *dir = opendir(path);
+
+	if (dir) {
+		struct dirent *de;
+		char fullname[MAXPATHLEN + 1];
+		memcpy(fullname, base, baselen);
+
+		while ((de = readdir(dir)) != NULL) {
+			int len;
+
+			if (de->d_name[0] == '.')
+				continue;
+			len = strlen(de->d_name);
+			memcpy(fullname + baselen, de->d_name, len+1);
+			if (match < 2)
+				match = path_match(fullname, baselen+len);
+			if (match < 0)
+				continue;
+
+			switch (de->d_type) {
+			struct stat st;
+			default:
+				continue;
+			case DT_UNKNOWN:
+				if (lstat(fullname, &st))
+					continue;
+				if (S_ISREG(st.st_mode))
+					break;
+				if (!S_ISDIR(st.st_mode))
+					continue;
+				/* fallthrough */
+			case DT_DIR:
+				memcpy(fullname + baselen + len, "/", 2);
+				read_directory(fullname, fullname,
+					       baselen + len + 1, match);
+				continue;
+			case DT_REG:
+				break;
+			}
+			if (match > 0)
+				add_name(fullname, baselen + len);
+		}
+		closedir(dir);
+	}
+}
+
+static int cmp_name(const void *p1, const void *p2)
+{
+	const char *n1 = *(const char **)p1;
+	const char *n2 = *(const char **)p2;
+	int l1 = strlen(n1), l2 = strlen(n2);
+
+	return cache_name_compare(n1, l1, n2, l2);
+}
+
+static int show_changed = 0;
+static int show_deleted = 0;
+static int show_others = 0;
+static int generate_patch = 0;
+static int line_terminator = '\n';
+
+static const char null_sha1[20];
+static const char null_sha1_hex[] = "0000000000000000000000000000000000000000";
+
+static void show_file(int prefix, unsigned int mode,
+		      const char *sha1, const char *name)
+{
+	if (generate_patch)
+		diff_addremove(prefix, mode, sha1, name, NULL);
+	else
+		printf("%c%o\t%s\t%s\t%s%c", prefix, mode, "blob",
+		       sha1_to_hex(sha1), name, line_terminator);
+}
+
+int main(int argc, char **argv)
+{
+	int i, entries;
+
+	for (i = 1; i < argc; i++) {
+		char *arg = argv[i];
+
+		if (*arg != '-')
+			break;
+
+		if (!strcmp(arg, "-z")) {
+			line_terminator = 0;
+			continue;
+		}
+		if (!strcmp(arg, "-a")) {
+			show_changed = show_deleted = show_others = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-c")) {
+			show_changed = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-d")) {
+			show_deleted = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-o")) {
+			show_others = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-p")) {
+			generate_patch = 1;
+			continue;
+		}
+		if (!strcmp(arg, "-x") && i+1 < argc) {
+			arg = argv[++i];
+			add_exclude(arg);
+			continue;
+		}
+		if (!strcmp(arg, "-X") && i+1 < argc) {
+			arg = argv[++i];
+			add_excludes_from_file(arg);
+			continue;
+		}
+		if (!strcmp(arg, "--")) {
+			i++;
+			break;
+		}
+
+		usage(diff_files_usage);
+	}
+
+	/* default to -c if none of -c, -d nor -o have been specified */
+	if (!show_changed && !show_deleted && !show_others)
+		show_changed = 1;
+
+	if (i < argc) {
+		paths = &argv[i];
+		nr_paths = argc - i;
+		pathlens = malloc(nr_paths * sizeof(int));
+		for (i=0; i<nr_paths; i++) {
+			pathlens[i] = strlen(paths[i]);
+			if (paths[i][pathlens[i] - 1] == '/')
+				pathlens[i]--;
+		}
+	}
+
+	entries = read_cache();
+	if (entries < 0) {
+		perror("read_cache");
+		exit(1);
+	}
+
+	if (show_others) {
+		read_directory(".", "", 0, 0);
+		qsort(others, nr_others, sizeof(char *), cmp_name);
+		for (i = 0; i < nr_others; i++) {
+			struct stat st;
+			unsigned int mode;
+			if (stat(others[i], &st) < 0) {
+				perror(others[i]);
+			} else {
+				mode = S_IFREG | ce_permissions(st.st_mode);
+				show_file('+', mode, null_sha1, others[i]);
+			}
+		}
+	}
+
+	for (i = 0; i < entries; i++) {
+		struct stat st;
+		unsigned int ce_mode, mode;
+		struct cache_entry *ce = active_cache[i];
+
+		if (path_match(ce->name, ce_namelen(ce)) < 1)
+			continue;
+
+		if (show_changed && ce_stage(ce)) {
+			if (generate_patch)
+				diff_unmerge(ce->name);
+			else
+				printf("U %s%c", ce->name, line_terminator);
+			do {
+				i++;
+			} while (i < entries &&
+				 !strcmp(ce->name, active_cache[i]->name));
+			continue;
+		}
+
+		ce_mode = ntohl(ce->ce_mode);
+		if (stat(ce->name, &st) < 0) {
+			if (errno != ENOENT) {
+				perror(ce->name);
+			} else if (show_deleted) {
+				show_file('-', ce_mode, ce->sha1, ce->name);
+			}
+			continue;
+		}
+
+		if (!show_changed || !cache_match_stat(ce, &st))
+			continue;
+
+		mode = S_IFREG | ce_permissions(st.st_mode);
+		if (generate_patch)
+			diff_change(ce_mode, mode, ce->sha1,
+				    null_sha1, ce->name, NULL);
+		else
+			printf("*%o->%o\t%s\t%s->%s\t%s%c",
+			       ce_mode, mode, "blob",
+			       sha1_to_hex(ce->sha1), null_sha1_hex,
+			       ce->name, line_terminator);
+	}
+
+	return 0;
+}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] add a diff-files command
  2005-04-27 21:13 [PATCH] add a diff-files command Nicolas Pitre
@ 2005-04-27 21:49 ` Junio C Hamano
  2005-04-28  1:02   ` Nicolas Pitre
  0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2005-04-27 21:49 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Linus Torvalds, git

>>>>> "NP" == Nicolas Pitre <nico@cam.org> writes:

NP> It also has the ability to accept exclude file patterns with -x and even 
NP> a file containing a list of patterns to exclude with -X.  This is 
NP> especially useful to use the famous dontdiff file when looking for 
NP> uncommitted files in a compiled kernel tree.

I think show-diff with path restriction (if restriction is
simple), or piping its output to grep or filterdiff (otherwise),
would be enough to do what you do here, so personally I doubt
this new command is even useful that much.

That said, I have a couple of comments.  Other than these I do
not see anything majorly wrong (although I haven't even compiled
it yet ;-).

NP> +static const char *diff_files_usage = "diff-files [-a] [-c] [-d] [-o] [-p | -z]"
NP> +				      " [-x <pattern>] [-X <file>] [paths...]";
NP> +

If you are trying to do something similar to show-files by these
-[acdo] flags, matching these flags in both commands would be
less confusing to the users and script writers.  Either make
diff-files take fully spelled --others etc. that show-files
takes, or submit a patch for show-files to match these shorter
ones as well.  I personally prefer the latter.

NP> +/*
NP> + * Read a directory tree. We currently ignore anything but
NP> + * directories and regular files. That's because git doesn't
NP> + * handle them at all yet. Maybe that will change some day.
NP> + *
NP> + * Also, we currently ignore all names starting with a dot.
NP> + * That likely will not change.
NP> + */

For that logic, instead of doing de->d_name[0] == '.' and things
yourself, I'd rather see you lift verify_path() function from
update-cache.c into common library and call it.  Then if the
"likely will not change" part needs to be updated, you do not
have to worry about it; updates to verify_path() would take care
of it for you.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] add a diff-files command
  2005-04-27 21:49 ` Junio C Hamano
@ 2005-04-28  1:02   ` Nicolas Pitre
  2005-04-28 16:34     ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Nicolas Pitre @ 2005-04-28  1:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, git

On Wed, 27 Apr 2005, Junio C Hamano wrote:

> >>>>> "NP" == Nicolas Pitre <nico@cam.org> writes:
> 
> NP> It also has the ability to accept exclude file patterns with -x and even 
> NP> a file containing a list of patterns to exclude with -X.  This is 
> NP> especially useful to use the famous dontdiff file when looking for 
> NP> uncommitted files in a compiled kernel tree.
> 
> I think show-diff with path restriction (if restriction is
> simple), or piping its output to grep or filterdiff (otherwise),
> would be enough to do what you do here, so personally I doubt
> this new command is even useful that much.

First, show-diff doesn't handle files in the work tree which are not 
listed in the cache.

Have you ever looked at the dontdiff file?  You can get a sample of it 
from http://www.moses.uklinux.net/patches/dontdiff to give you an idea.  
Using grep or filterdiff is really backward in that case since out of 
all the junk that might appear in the output about 98% will be filtered 
away in most useful cases, which is rather inefficient.

Path restriction is inclusive, while the exclude list is, well, 
exclusive.  They serves separate purposes.  So trust me it _is_ pretty 
damn useful, unless you always run "make clean" on your kernel tree 
before checking for potentially uncommitted files then recompile 
everything afterwards which is a hassle.

> That said, I have a couple of comments.  Other than these I do
> not see anything majorly wrong (although I haven't even compiled
> it yet ;-).
> 
> NP> +static const char *diff_files_usage = "diff-files [-a] [-c] [-d] [-o] [-p | -z]"
> NP> +				      " [-x <pattern>] [-X <file>] [paths...]";
> NP> +
> 
> If you are trying to do something similar to show-files by these
> -[acdo] flags, matching these flags in both commands would be
> less confusing to the users and script writers.  Either make
> diff-files take fully spelled --others etc. that show-files
> takes, or submit a patch for show-files to match these shorter
> ones as well.  I personally prefer the latter.

Agreed.

> NP> +/*
> NP> + * Read a directory tree. We currently ignore anything but
> NP> + * directories and regular files. That's because git doesn't
> NP> + * handle them at all yet. Maybe that will change some day.
> NP> + *
> NP> + * Also, we currently ignore all names starting with a dot.
> NP> + * That likely will not change.
> NP> + */
> 
> For that logic, instead of doing de->d_name[0] == '.' and things
> yourself, I'd rather see you lift verify_path() function from
> update-cache.c into common library and call it.

Sure, but actually I'd prefer for that to be a separate patch since it's 
a tangential issue that might affect show-files as well.


Nicolas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] add a diff-files command
  2005-04-28  1:02   ` Nicolas Pitre
@ 2005-04-28 16:34     ` Junio C Hamano
  2005-04-28 16:56       ` Nicolas Pitre
  0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2005-04-28 16:34 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Linus Torvalds, git

>>>>> "NP" == Nicolas Pitre <nico@cam.org> writes:

Having thought about it more, although I praise your enthusiasm
to improve git, I suspect your diff-files is a solution to a
problem that does not exist.

NP> It also has the ability to accept exclude file patterns with
NP> -x and even a file containing a list of patterns to exclude
NP> with -X.  This is especially useful to use the famous
NP> dontdiff file when looking for uncommitted files in a
NP> compiled kernel tree.

If you want to see if working tree has some junk other than
those listed in dontdiff, wouldn't this be sufficient?

  $ show-files --others | grep -f dontdiff

NP> First, show-diff doesn't handle files in the work tree which
NP> are not listed in the cache.

That's the whole point of git (and show-diff).  If it is not
listed in the cache, it does not exist.

NP> ...  So trust me it _is_ pretty damn useful, unless you
NP> always run "make clean" on your kernel tree before checking
NP> for potentially uncommitted files then recompile everything
NP> afterwards which is a hassle.

Why do you need to "make clean"?  Is it because otherwise you
would get lots of output for things that are listed in dontdiff
but not listed in the cache, and the uncommitted but not listed
in dontdiff file that you care about would get lost in the
noise?

Earlier you complained show-diff does _not_ look at what are not
listed in the cache.  Now you want to exclude garbage that comes
out of it because you have tons of stuff that are not listed in
the cache, which implies show-diff _does_ look at what are not
listed in the cache.  Either you are contradicting yourself or I
am confused.

The truth is, as you said earlier, show-diff does not look at
them, so I do not understand what problem you are trying to
describe here.

Again, "checking for potentially uncommitted" files is what
you use show-files --others for, not show-diff.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] add a diff-files command
  2005-04-28 16:34     ` Junio C Hamano
@ 2005-04-28 16:56       ` Nicolas Pitre
  2005-04-28 17:16         ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Nicolas Pitre @ 2005-04-28 16:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, git

On Thu, 28 Apr 2005, Junio C Hamano wrote:

> If you want to see if working tree has some junk other than
> those listed in dontdiff, wouldn't this be sufficient?
> 
>   $ show-files --others | grep -f dontdiff

Well, it would work if the dontdiff file other people are maintaining 
was made up of regexps.  But it is made of shell wildcard patterns meant 
to be used with the -X switch of the diff command.

> Again, "checking for potentially uncommitted" files is what
> you use show-files --others for, not show-diff.

Indeed.  And yesterday I realized that the (currently unimplemented) 
--ignore switch to show-files, combined with the exclusion pattern list, 
whould be more logical than teaching show-diff (which I still think is a 
misnamer in the context of the other diff tools) about files unknown to 
the cache.  The patch to show-files is also much smaller and logical.

BTW, I don't do this out of pure entousiasm but rather trying to make my 
own workflow with the Linux kernel source tree more efficient in the 
context of git usage.  My pure coding entousiasm lies somewhere else.  


Nicolas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] add a diff-files command
  2005-04-28 16:56       ` Nicolas Pitre
@ 2005-04-28 17:16         ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2005-04-28 17:16 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Linus Torvalds, git

>>>>> "NP" == Nicolas Pitre <nico@cam.org> writes:

NP> ...  And yesterday I realized that the (currently unimplemented) 
NP> --ignore switch to show-files, combined with the exclusion pattern list, 
NP> whould be more logical than teaching show-diff (which I still think is a 
NP> misnamer in the context of the other diff tools) about files unknown to 
NP> the cache.  The patch to show-files is also much smaller and logical.

I agree wholeheartedly with both counts.  (1) Linus and I
discussed briefly about renaming show-diff to diff-files but it
is on hold, waiting for a big wholesale rename.  (2) the logical
place for the -X and -x is "show-files --ignore".


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2005-04-28 17:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-27 21:13 [PATCH] add a diff-files command Nicolas Pitre
2005-04-27 21:49 ` Junio C Hamano
2005-04-28  1:02   ` Nicolas Pitre
2005-04-28 16:34     ` Junio C Hamano
2005-04-28 16:56       ` Nicolas Pitre
2005-04-28 17:16         ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).