From: Marius Storm-Olsen <marius@trolltech.com>
To: git@vger.kernel.org, msysgit@googlegroups.com
Cc: Marius Storm-Olsen <marius@trolltech.com>
Subject: [PATCH] MinGW readdir reimplementation to support d_type
Date: Wed, 8 Apr 2009 23:01:47 +0200 [thread overview]
Message-ID: <1239224507-5372-1-git-send-email-marius@trolltech.com> (raw)
The original readdir implementation was fast, but didn't
support the d_type. This means that git would do additional
lstats for each entry, to figure out if the entry was a
directory or not. This unneedingly slowed down many
operations, since Windows API provides this information
directly when walking the directories.
By running this implementation on Moe's repo structure:
mkdir bummer && cd bummer; for ((i=0;i<100;i++)); do
mkdir $i && pushd $i;
for ((j=0;j<1000;j++)); do echo "$j" >$j; done;
popd;
done
We see the following speedups:
git add .
-------------------
old: 00:00:23(.087)
new: 00:00:21(.512) 1.07x
git status
-------------------
old: 00:00:03(.306)
new: 00:00:01(.684) 1.96x
git clean -dxf
-------------------
old: 00:00:01(.918)
new: 00:00:00(.295) 6.50x
Signed-off-by: Marius Storm-Olsen <marius@trolltech.com>
---
It would be nice if MinGW/Windows people would give this a thorough
testing to ensure that's it's pristine. It seems fine, and I've not
stumbled over anything myself.
Of course, if you have status.showUntrackedFiles = no, then you'll
not get any speedups, since the read_directory_recursive loop is
never entered. People with a standard setup, however, should
experience a significant speedup.
compat/mingw.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/mingw.h | 28 ++++++++++++++++++++++++++
2 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/compat/mingw.c b/compat/mingw.c
index 2839d9d..f52de3e 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -1139,3 +1139,62 @@ int link(const char *oldpath, const char *newpath)
}
return 0;
}
+
+#ifndef NO_MINGW_REPLACE_READDIR
+/* MinGW readdir implementation to avoid extra lstats for Git */
+struct mingw_DIR
+{
+ struct _finddata_t dd_dta; /* disk transfer area for this dir */
+ struct mingw_dirent dd_dir; /* Our own implementation, including d_type */
+ long dd_handle; /* _findnext handle */
+ int dd_stat; /* 0 = next entry to read is first entry, -1 = off the end, positive = 0 based index of next entry */
+ char dd_name[1]; /* given path for dir with search pattern (struct is extended) */
+};
+
+struct dirent *mingw_readdir(DIR *dir)
+{
+ WIN32_FIND_DATAA buf;
+ HANDLE handle;
+ struct mingw_DIR *mdir = (struct mingw_DIR*)dir;
+
+ if (!dir->dd_handle) {
+ errno = EBADF; /* No set_errno for mingw */
+ return NULL;
+ }
+
+ if (dir->dd_handle == (long)INVALID_HANDLE_VALUE && dir->dd_stat == 0)
+ {
+ handle = FindFirstFileA(dir->dd_name, &buf);
+ DWORD lasterr = GetLastError();
+ dir->dd_handle = (long)handle;
+ if (handle == INVALID_HANDLE_VALUE && (lasterr != ERROR_NO_MORE_FILES)) {
+ errno = err_win_to_posix(lasterr);
+ return NULL;
+ }
+ } else if (dir->dd_handle == (long)INVALID_HANDLE_VALUE) {
+ return NULL;
+ } else if (!FindNextFileA((HANDLE)dir->dd_handle, &buf)) {
+ DWORD lasterr = GetLastError();
+ FindClose((HANDLE)dir->dd_handle);
+ dir->dd_handle = (long)INVALID_HANDLE_VALUE;
+ /* POSIX says you shouldn't set errno when readdir can't
+ find any more files; so, if another error we leave it set. */
+ if (lasterr != ERROR_NO_MORE_FILES)
+ errno = err_win_to_posix(lasterr);
+ return NULL;
+ }
+
+ /* We get here if `buf' contains valid data. */
+ strcpy(dir->dd_dir.d_name, buf.cFileName);
+ ++dir->dd_stat;
+
+ /* Set file type, based on WIN32_FIND_DATA */
+ mdir->dd_dir.d_type = 0;
+ if (buf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ mdir->dd_dir.d_type |= DT_DIR;
+ else
+ mdir->dd_dir.d_type |= DT_REG;
+
+ return (struct dirent*)&dir->dd_dir;
+}
+#endif // !NO_MINGW_REPLACE_READDIR
diff --git a/compat/mingw.h b/compat/mingw.h
index 762eb14..104b310 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -233,3 +233,31 @@ int main(int argc, const char **argv) \
return mingw_main(argc, argv); \
} \
static int mingw_main(c,v)
+
+#ifndef NO_MINGW_REPLACE_READDIR
+/*
+ * A replacement of readdir, to ensure that it reads the file type at
+ * the same time. This avoid extra unneeded lstats in git on MinGW
+ */
+#undef DT_UNKNOWN
+#undef DT_DIR
+#undef DT_REG
+#undef DT_LNK
+#define DT_UNKNOWN 0
+#define DT_DIR 1
+#define DT_REG 2
+#define DT_LNK 3
+
+struct mingw_dirent
+{
+ long d_ino; /* Always zero. */
+ union {
+ unsigned short d_reclen; /* Always zero. */
+ unsigned char d_type; /* Reimplementation adds this */
+ };
+ unsigned short d_namlen; /* Length of name in d_name. */
+ char d_name[FILENAME_MAX]; /* File name. */
+};
+#define dirent mingw_dirent
+#define readdir(x) mingw_readdir(x)
+#endif // !NO_MINGW_REPLACE_READDIR
--
1.6.2.2.472.gf61f7.dirty
next reply other threads:[~2009-04-08 21:03 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-04-08 21:01 Marius Storm-Olsen [this message]
2009-04-09 20:34 ` [msysGit] [PATCH] MinGW readdir reimplementation to support d_type Johannes Sixt
2009-04-10 7:50 ` Marius Storm-Olsen
2009-04-11 21:44 ` Johannes Sixt
2009-05-07 21:26 ` Heiko Voigt
2009-05-08 5:45 ` Marius Storm-Olsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1239224507-5372-1-git-send-email-marius@trolltech.com \
--to=marius@trolltech.com \
--cc=git@vger.kernel.org \
--cc=msysgit@googlegroups.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).