git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Jeff King <peff@peff.net>
Cc: git@vger.kernel.org, Tuncer Ayaz <tuncer.ayaz@gmail.com>
Subject: Re: [RFC/PATCH] shortstatus v1
Date: Tue, 10 Feb 2009 14:25:39 -0800	[thread overview]
Message-ID: <7vtz72kjz0.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <20090210191118.GA26651@coredump.intra.peff.net> (Jeff King's message of "Tue, 10 Feb 2009 14:11:18 -0500")

Jeff King <peff@peff.net> writes:

> Warm cache, it runs in .042s on my git repo, about half of which is the
> untracked files check. It takes about .49s on the kernel repo. The
> read_directory() bit is not optimized at all, and could probably benefit
> from an early return (OTOH, the worst case is still going to need to
> look at every path).

I suspect that with a large tree your have_untracked() would show
unnecessary overhead from dir_add_name(), because you only want one bit of
information but there is no way to stop with "ok, we know enough".  This
toy patch adds a trivial "early return" to read_directory() codepath, but
there are two sad things about it.

 * In order to cheaply run "is there a single other file", you really
   should scan the level you have already opened first before digging
   deeper.  I didn't bother because the primary use of read_directory is
   the depth first traversal.

 * In a cloned work tree, the tracked files and directories come early in
   the physical directory and then crufts you created yourself comes at
   the end in readdir() order.  We tend to read a lot of tracked ones
   first and the finally hit other files.

 builtin-ministatus.c |    4 +++-
 dir.c                |   32 +++++++++++++++++++++++++++-----
 dir.h                |    2 ++
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/builtin-ministatus.c b/builtin-ministatus.c
index aff9e5a..4b5a191 100644
--- a/builtin-ministatus.c
+++ b/builtin-ministatus.c
@@ -25,7 +25,7 @@ static int have_untracked(void)
 
 	read_directory(&dir, ".", "", 0, NULL);
 	/* XXX we are probably leaking memory from dir */
-	for (i = 0; i < dir.nr; i++)
+	for (i = 0; i < dir.nr; i++) {
 		struct dir_entry *ent = dir.entries[i];
 		if (cache_name_is_other(ent->name, ent->len))
 			return 1;
@@ -47,6 +47,8 @@ int cmd_ministatus(int argc, const char **argv, const char *prefix)
 		putchar('*');
 	if (have_untracked())
 		putchar('?');
+	if (untracked_files_exist())
+		putchar('%');
 
 	return 0;
 }
diff --git a/dir.c b/dir.c
index cfd1ea5..8d4fcdd 100644
--- a/dir.c
+++ b/dir.c
@@ -16,7 +16,10 @@ struct path_simplify {
 
 static int read_directory_recursive(struct dir_struct *dir,
 	const char *path, const char *base, int baselen,
-	int check_only, const struct path_simplify *simplify);
+	int mode, const struct path_simplify *simplify);
+#define READ_DIRECTORY_EMPTY_CHECK 1
+#define READ_DIRECTORY_OTHER_CHECK 2
+
 static int get_dtype(struct dirent *de, const char *path);
 
 int common_prefix(const char **pathspec)
@@ -505,7 +508,8 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
 	/* This is the "show_other_directories" case */
 	if (!dir->hide_empty_directories)
 		return show_directory;
-	if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify))
+	if (!read_directory_recursive(dir, dirname, dirname, len,
+				      READ_DIRECTORY_EMPTY_CHECK, simplify))
 		return ignore_directory;
 	return show_directory;
 }
@@ -574,10 +578,12 @@ static int get_dtype(struct dirent *de, const char *path)
  * Also, we ignore the name ".git" (even if it is not a directory).
  * That likely will not change.
  */
-static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify)
+static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int mode, const struct path_simplify *simplify)
 {
 	DIR *fdir = opendir(path);
 	int contents = 0;
+	int empty_check_only = (mode == READ_DIRECTORY_EMPTY_CHECK);
+	int other_check_only = (mode == READ_DIRECTORY_OTHER_CHECK);
 
 	if (fdir) {
 		struct dirent *de;
@@ -639,7 +645,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
 					break;
 				case recurse_into_directory:
 					contents += read_directory_recursive(dir,
-						fullname, fullname, baselen + len, 0, simplify);
+						fullname, fullname, baselen + len, mode, simplify);
 					continue;
 				case ignore_directory:
 					continue;
@@ -650,10 +656,12 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
 				break;
 			}
 			contents++;
-			if (check_only)
+			if (empty_check_only)
 				goto exit_early;
 			else
 				dir_add_name(dir, fullname, baselen + len);
+			if (other_check_only && dir->nr)
+				goto exit_early;
 		}
 exit_early:
 		closedir(fdir);
@@ -731,6 +739,20 @@ int read_directory(struct dir_struct *dir, const char *path, const char *base, i
 	return dir->nr;
 }
 
+int untracked_files_exist(void)
+{
+	struct dir_struct dir;
+	int i;
+
+	memset(&dir, 0, sizeof(dir));
+	setup_standard_excludes(&dir);
+	read_directory_recursive(&dir, ".", "", 0, READ_DIRECTORY_OTHER_CHECK,
+				 NULL);
+	for (i = 0; i < dir.nr; i++)
+		free(dir.entries[i]);
+	return !!dir.nr;
+}
+
 int file_exists(const char *f)
 {
 	struct stat sb;
diff --git a/dir.h b/dir.h
index bdc2d47..1f8b575 100644
--- a/dir.h
+++ b/dir.h
@@ -92,4 +92,6 @@ extern int remove_dir_recursively(struct strbuf *path, int only_empty);
 /* tries to remove the path with empty directories along it, ignores ENOENT */
 extern int remove_path(const char *path);
 
+extern int untracked_files_exist(void);
+
 #endif

  parent reply	other threads:[~2009-02-10 22:27 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-10  0:51 [RFC/PATCH] shortstatus v1 Tuncer Ayaz
2009-02-10  1:44 ` Junio C Hamano
2009-02-10  3:46   ` Sitaram Chamarty
2009-02-10 10:22     ` Spending time in PS1, was " Johannes Schindelin
2009-02-10 17:31       ` Sitaram Chamarty
2009-02-10 10:11   ` Tuncer Ayaz
2009-02-10 11:03 ` Jeff King
2009-02-10 11:29   ` Michael J Gruber
2009-02-10 11:31     ` Tuncer Ayaz
2009-02-10 11:45     ` Jeff King
2009-02-10 12:36       ` Michael J Gruber
2009-02-10 13:01         ` Jeff King
2009-02-10 15:58   ` Junio C Hamano
2009-02-10 18:10     ` Jeff King
2009-02-10 18:22       ` Jeff King
2009-02-10 19:11       ` Jeff King
2009-02-10 21:21         ` Tuncer Ayaz
2009-02-10 21:36           ` Jeff King
2009-02-10 22:25         ` Junio C Hamano [this message]
2009-02-10 22:52           ` Tuncer Ayaz
2009-02-10 22:55           ` Jeff King
2009-02-10 23:05             ` Junio C Hamano
2009-02-12  0:49               ` Jeff King
2009-02-10 23:52     ` Nanako Shiraishi
2009-02-11 21:24       ` Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2009-02-10 23:58 [RFC] New command: 'git snapshot' Ulrik Sverdrup
2009-02-11  0:08 ` [RFC/PATCH] shortstatus v1 Nanako Shiraishi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vtz72kjz0.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=peff@peff.net \
    --cc=tuncer.ayaz@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).