All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] checkout: avoid unncessary match_pathspec calls
Date: Sat, 23 Mar 2013 17:55:42 +0700	[thread overview]
Message-ID: <1364036142-3031-1-git-send-email-pclouds@gmail.com> (raw)

In checkout_paths() we do this

 - for i = 0..active_nr, if not updated, call match_pathspec
 - for ..., call match_pathspec (inside unmerge_cache)
 - for ..., call match_pathspec (for showing "path .. is unmerged)
 - for ..., if not updated, call match_pathspec and update paths

That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per index entry, save the
result in ce_flags and reuse the results in the following loops.

This command is used on webkit, 215k entries. The pattern is chosen
mainly to make match_pathspec sweat:

git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"

        before      after
real    0m3.493s    0m2.737s
user    0m2.239s    0m1.586s
sys     0m1.252s    0m1.151s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Junio, this patch clearly conflicts wih nd/magic-pathspecs. Do you
 want me to:

  - hold it off until nd/magic-pathspecs graduates
  - rebase on top of nd/magic-pathspecs and repost
  - leave it to you to handle conflicts

 ?

 builtin/checkout.c | 23 +++++++++++++++++++----
 cache.h            |  1 +
 resolve-undo.c     | 19 ++++++++++++++++++-
 resolve-undo.h     |  1 +
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index a9c1b5a..fadc11b 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -273,22 +273,37 @@ static int checkout_paths(const struct checkout_opts *opts,
 
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
+		ce->ce_flags &= ~CE_MATCHED;
 		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
 			continue;
-		match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
+		if (match_pathspec(opts->pathspec, ce->name,
+				   ce_namelen(ce), 0, ps_matched))
+			ce->ce_flags |= CE_MATCHED;
 	}
 
 	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
 		return 1;
 
+	/*
+	 * call match_pathspec on the remaining entries that have not
+	 * been done in the previous loop
+	 */
+	for (pos = 0; pos < active_nr; pos++) {
+		struct cache_entry *ce = active_cache[pos];
+		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE) &&
+		    match_pathspec(opts->pathspec, ce->name,
+				   ce_namelen(ce), 0, ps_matched))
+			ce->ce_flags |= CE_MATCHED;
+	}
+
 	/* "checkout -m path" to recreate conflicted state */
 	if (opts->merge)
-		unmerge_cache(opts->pathspec);
+		unmerge_marked_index(&the_index);
 
 	/* Any unmerged paths? */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce))
 				continue;
 			if (opts->force) {
@@ -315,7 +330,7 @@ static int checkout_paths(const struct checkout_opts *opts,
 		struct cache_entry *ce = active_cache[pos];
 		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
 			continue;
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce)) {
 				errs |= checkout_entry(ce, &state, NULL);
 				continue;
diff --git a/cache.h b/cache.h
index c56315c..04e6090 100644
--- a/cache.h
+++ b/cache.h
@@ -161,6 +161,7 @@ struct cache_entry {
 
 #define CE_UNPACKED          (1 << 24)
 #define CE_NEW_SKIP_WORKTREE (1 << 25)
+#define CE_MATCHED           (1 << 26)
 
 /*
  * Extended on-disk flags
diff --git a/resolve-undo.c b/resolve-undo.c
index 72b4612..639eb9c 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -118,7 +118,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	struct cache_entry *ce;
 	struct string_list_item *item;
 	struct resolve_undo_info *ru;
-	int i, err = 0;
+	int i, err = 0, matched;
 
 	if (!istate->resolve_undo)
 		return pos;
@@ -137,6 +137,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	ru = item->util;
 	if (!ru)
 		return pos;
+	matched = ce->ce_flags & CE_MATCHED;
 	remove_index_entry_at(istate, pos);
 	for (i = 0; i < 3; i++) {
 		struct cache_entry *nce;
@@ -144,6 +145,8 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 			continue;
 		nce = make_cache_entry(ru->mode[i], ru->sha1[i],
 				       ce->name, i + 1, 0);
+		if (matched)
+			nce->ce_flags |= CE_MATCHED;
 		if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
 			err = 1;
 			error("cannot unmerge '%s'", ce->name);
@@ -156,6 +159,20 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	return unmerge_index_entry_at(istate, pos);
 }
 
+void unmerge_marked_index(struct index_state *istate)
+{
+	int i;
+
+	if (!istate->resolve_undo)
+		return;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		if (ce->ce_flags & CE_MATCHED)
+			i = unmerge_index_entry_at(istate, i);
+	}
+}
+
 void unmerge_index(struct index_state *istate, const char **pathspec)
 {
 	int i;
diff --git a/resolve-undo.h b/resolve-undo.h
index 8458769..7a30206 100644
--- a/resolve-undo.h
+++ b/resolve-undo.h
@@ -12,5 +12,6 @@ extern struct string_list *resolve_undo_read(const char *, unsigned long);
 extern void resolve_undo_clear_index(struct index_state *);
 extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
+extern void unmerge_marked_index(struct index_state *);
 
 #endif
-- 
1.8.2.83.gc99314b

             reply	other threads:[~2013-03-23 10:56 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-23 10:55 Nguyễn Thái Ngọc Duy [this message]
2013-03-24  2:45 ` [PATCH] checkout: avoid unncessary match_pathspec calls Eric Sunshine
2013-03-24  6:47 ` Junio C Hamano
2013-03-24 12:55   ` [PATCH v2] checkout: avoid unnecessary " Nguyễn Thái Ngọc Duy
2013-03-25 16:26     ` Junio C Hamano
2013-03-27  5:58       ` [PATCH v3] " Nguyễn Thái Ngọc Duy
2013-03-28 22:32         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1364036142-3031-1-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.