git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] builtin-add.c: restructure the code for maintainability
@ 2008-07-23  7:01 Junio C Hamano
  2008-07-23  7:01 ` [PATCH 2/2] builtin-add.c: optimize -A option and "git add $paths..." Junio C Hamano
  0 siblings, 1 reply; 2+ messages in thread
From: Junio C Hamano @ 2008-07-23  7:01 UTC (permalink / raw)
  To: git

A private function add_files_to_cache() in builtin-add.c was borrowed by
checkout and commit re-implementors without getting properly refactored to
more library-ish place.  This does the refactoring.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 * This is just code movement.

 builtin-add.c |   57 -----------------------------------------------------
 cache.h       |    1 +
 read-cache.c  |   61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 57 deletions(-)

diff --git a/builtin-add.c b/builtin-add.c
index fc3f96e..0de516a 100644
--- a/builtin-add.c
+++ b/builtin-add.c
@@ -8,10 +8,6 @@
 #include "dir.h"
 #include "exec_cmd.h"
 #include "cache-tree.h"
-#include "diff.h"
-#include "diffcore.h"
-#include "commit.h"
-#include "revision.h"
 #include "run-command.h"
 #include "parse-options.h"
 
@@ -79,59 +75,6 @@ static void fill_directory(struct dir_struct *dir, const char **pathspec,
 		prune_directory(dir, pathspec, baselen);
 }
 
-struct update_callback_data
-{
-	int flags;
-	int add_errors;
-};
-
-static void update_callback(struct diff_queue_struct *q,
-			    struct diff_options *opt, void *cbdata)
-{
-	int i;
-	struct update_callback_data *data = cbdata;
-
-	for (i = 0; i < q->nr; i++) {
-		struct diff_filepair *p = q->queue[i];
-		const char *path = p->one->path;
-		switch (p->status) {
-		default:
-			die("unexpected diff status %c", p->status);
-		case DIFF_STATUS_UNMERGED:
-		case DIFF_STATUS_MODIFIED:
-		case DIFF_STATUS_TYPE_CHANGED:
-			if (add_file_to_cache(path, data->flags)) {
-				if (!(data->flags & ADD_CACHE_IGNORE_ERRORS))
-					die("updating files failed");
-				data->add_errors++;
-			}
-			break;
-		case DIFF_STATUS_DELETED:
-			if (!(data->flags & ADD_CACHE_PRETEND))
-				remove_file_from_cache(path);
-			if (data->flags & (ADD_CACHE_PRETEND|ADD_CACHE_VERBOSE))
-				printf("remove '%s'\n", path);
-			break;
-		}
-	}
-}
-
-int add_files_to_cache(const char *prefix, const char **pathspec, int flags)
-{
-	struct update_callback_data data;
-	struct rev_info rev;
-	init_revisions(&rev, prefix);
-	setup_revisions(0, NULL, &rev, NULL);
-	rev.prune_data = pathspec;
-	rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
-	rev.diffopt.format_callback = update_callback;
-	data.flags = flags;
-	data.add_errors = 0;
-	rev.diffopt.format_callback_data = &data;
-	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
-	return !!data.add_errors;
-}
-
 static void refresh(int verbose, const char **pathspec)
 {
 	char *seen;
diff --git a/cache.h b/cache.h
index 38985aa..6f374ad 100644
--- a/cache.h
+++ b/cache.h
@@ -375,6 +375,7 @@ extern int remove_file_from_index(struct index_state *, const char *path);
 #define ADD_CACHE_VERBOSE 1
 #define ADD_CACHE_PRETEND 2
 #define ADD_CACHE_IGNORE_ERRORS	4
+#define ADD_CACHE_IGNORE_REMOVAL 8
 extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags);
 extern int add_file_to_index(struct index_state *, const char *path, int flags);
 extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, int refresh);
diff --git a/read-cache.c b/read-cache.c
index a50a851..6833af6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -8,6 +8,11 @@
 #include "cache-tree.h"
 #include "refs.h"
 #include "dir.h"
+#include "tree.h"
+#include "commit.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "revision.h"
 
 /* Index extensions.
  *
@@ -1444,3 +1449,59 @@ int read_index_unmerged(struct index_state *istate)
 	istate->cache_nr = dst - istate->cache;
 	return !!last;
 }
+
+struct update_callback_data
+{
+	int flags;
+	int add_errors;
+};
+
+static void update_callback(struct diff_queue_struct *q,
+			    struct diff_options *opt, void *cbdata)
+{
+	int i;
+	struct update_callback_data *data = cbdata;
+
+	for (i = 0; i < q->nr; i++) {
+		struct diff_filepair *p = q->queue[i];
+		const char *path = p->one->path;
+		switch (p->status) {
+		default:
+			die("unexpected diff status %c", p->status);
+		case DIFF_STATUS_UNMERGED:
+		case DIFF_STATUS_MODIFIED:
+		case DIFF_STATUS_TYPE_CHANGED:
+			if (add_file_to_index(&the_index, path, data->flags)) {
+				if (!(data->flags & ADD_CACHE_IGNORE_ERRORS))
+					die("updating files failed");
+				data->add_errors++;
+			}
+			break;
+		case DIFF_STATUS_DELETED:
+			if (data->flags & ADD_CACHE_IGNORE_REMOVAL)
+				break;
+			if (!(data->flags & ADD_CACHE_PRETEND))
+				remove_file_from_index(&the_index, path);
+			if (data->flags & (ADD_CACHE_PRETEND|ADD_CACHE_VERBOSE))
+				printf("remove '%s'\n", path);
+			break;
+		}
+	}
+}
+
+int add_files_to_cache(const char *prefix, const char **pathspec, int flags)
+{
+	struct update_callback_data data;
+	struct rev_info rev;
+	init_revisions(&rev, prefix);
+	setup_revisions(0, NULL, &rev, NULL);
+	rev.prune_data = pathspec;
+	rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
+	rev.diffopt.format_callback = update_callback;
+	data.flags = flags;
+	data.add_errors = 0;
+	rev.diffopt.format_callback_data = &data;
+	run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
+	return !!data.add_errors;
+}
+
-- 
1.6.0.rc0.15.g0dda1

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] builtin-add.c: optimize -A option and "git add $paths..."
  2008-07-23  7:01 [PATCH 1/2] builtin-add.c: restructure the code for maintainability Junio C Hamano
@ 2008-07-23  7:01 ` Junio C Hamano
  0 siblings, 0 replies; 2+ messages in thread
From: Junio C Hamano @ 2008-07-23  7:01 UTC (permalink / raw)
  To: git

The earlier "git add -A" change was done in a quite inefficient
way (i.e. it is as unefficient as "git add -u && git add ." modulo
one fork/exec and read/write index).

For that matter, the original "git add $paths..." could be improved.

When the user asks "git add .", we do not have to examine all paths
we encounter and perform the excluded() and dir_add_name()
processing, both of which are slower code and use slower data structure
by git standards, especially when the index is already reasonably well
populated.

Instead, we implement "git add $pathspec..." as:

 - read the index;

 - read_directory() to process untracked, unignored files the current
   way, that is, recursively doing readdir(), filtering them by pathspec
   and excluded(), queueing them via dir_add_name() and finally do
   add_files(); and

 - iterate over the index, filtering them by pathspec, and update only
   the modified/type changed paths but not deleted ones.

And "git add -A" becomes exactly the same as above, modulo:

 - missing $pathspec means "." instead of being an error; and

 - "iterate over the index" part handles deleted ones as well,
   i.e. exactly what the current update_callback() in builtin-add.c does.

In either case, because fill_directory() does not use read_directory() to
read everything in, we need to add an extra logic to iterate over the
index to catch mistyped pathspec.

In a fully populated kernel tree (without any local change), best of ten
runs:

(with this patch)
$ /usr/bin/time ~/git-test/bin/git add .
0.16user 0.17system 0:00.34elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+3231minor)pagefaults 0swaps

(without this patch)
$ /usr/bin/time ~/git-master/bin/git add .
0.21user 0.21system 0:00.42elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+3647minor)pagefaults 0swaps

Under the same condition, but after "rm .git/index" to force re-indexing
everything (the patch should not make any difference):

(with this patch)
$ /usr/bin/time ~/git-test/bin/git add .
3.22user 2.10system 1:35.10elapsed 5%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (30037major+63003minor)pagefaults 0swaps

(without this patch)
$ /usr/bin/time ~/git-master/bin/git add .
3.31user 2.28system 1:44.77elapsed 5%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (30472major+62563minor)pagefaults 0swaps

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-add.c |   43 +++++++++++++++++++++++++++++++------------
 1 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/builtin-add.c b/builtin-add.c
index 0de516a..1834e2d 100644
--- a/builtin-add.c
+++ b/builtin-add.c
@@ -18,6 +18,27 @@ static const char * const builtin_add_usage[] = {
 static int patch_interactive = 0, add_interactive = 0;
 static int take_worktree_changes;
 
+static void fill_pathspec_matches(const char **pathspec, char *seen, int specs)
+{
+	int num_unmatched = 0, i;
+
+	/*
+	 * Since we are walking the index as if we are warlking the directory,
+	 * we have to mark the matched pathspec as seen; otherwise we will
+	 * mistakenly think that the user gave a pathspec that did not match
+	 * anything.
+	 */
+	for (i = 0; i < specs; i++)
+		if (!seen[i])
+			num_unmatched++;
+	if (!num_unmatched)
+		return;
+	for (i = 0; i < active_nr; i++) {
+		struct cache_entry *ce = active_cache[i];
+		match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, seen);
+	}
+}
+
 static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix)
 {
 	char *seen;
@@ -37,6 +58,7 @@ static void prune_directory(struct dir_struct *dir, const char **pathspec, int p
 			*dst++ = entry;
 	}
 	dir->nr = dst - dir->entries;
+	fill_pathspec_matches(pathspec, seen, specs);
 
 	for (i = 0; i < specs; i++) {
 		if (!seen[i] && !file_exists(pathspec[i]))
@@ -201,7 +223,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 
 	if (addremove && take_worktree_changes)
 		die("-A and -u are mutually incompatible");
-	if (addremove && !argc) {
+	if ((addremove || take_worktree_changes) && !argc) {
 		static const char *here[2] = { ".", NULL };
 		argc = 1;
 		argv = here;
@@ -214,7 +236,9 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 
 	flags = ((verbose ? ADD_CACHE_VERBOSE : 0) |
 		 (show_only ? ADD_CACHE_PRETEND : 0) |
-		 (ignore_add_errors ? ADD_CACHE_IGNORE_ERRORS : 0));
+		 (ignore_add_errors ? ADD_CACHE_IGNORE_ERRORS : 0) |
+		 (!(addremove || take_worktree_changes)
+		  ? ADD_CACHE_IGNORE_REMOVAL : 0));
 
 	if (require_pathspec && argc == 0) {
 		fprintf(stderr, "Nothing specified, nothing added.\n");
@@ -223,24 +247,19 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 	}
 	pathspec = get_pathspec(prefix, argv);
 
-	/*
-	 * If we are adding new files, we need to scan the working
-	 * tree to find the ones that match pathspecs; this needs
-	 * to be done before we read the index.
-	 */
-	if (add_new_files)
-		fill_directory(&dir, pathspec, ignored_too);
-
 	if (read_cache() < 0)
 		die("index file corrupt");
 
+	if (add_new_files)
+		/* This picks up the paths that are not tracked */
+		fill_directory(&dir, pathspec, ignored_too);
+
 	if (refresh_only) {
 		refresh(verbose, pathspec);
 		goto finish;
 	}
 
-	if (take_worktree_changes || addremove)
-		exit_status |= add_files_to_cache(prefix, pathspec, flags);
+	exit_status |= add_files_to_cache(prefix, pathspec, flags);
 
 	if (add_new_files)
 		exit_status |= add_files(&dir, flags);
-- 
1.6.0.rc0.15.g0dda1

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2008-07-23  7:03 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-23  7:01 [PATCH 1/2] builtin-add.c: restructure the code for maintainability Junio C Hamano
2008-07-23  7:01 ` [PATCH 2/2] builtin-add.c: optimize -A option and "git add $paths..." Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).