* [PATCH 1/2] clean: make is_git_repository a public function
2016-01-22 22:26 ` Jeff King
@ 2016-01-22 22:27 ` Jeff King
2016-01-22 22:29 ` [PATCH 2/2] resolve_gitlink_ref: ignore non-repository paths Jeff King
2016-01-25 13:13 ` Minor bug, git ls-files -o aborts because of broken submodules Andreas Krey
2 siblings, 0 replies; 9+ messages in thread
From: Jeff King @ 2016-01-22 22:27 UTC (permalink / raw)
To: Duy Nguyen; +Cc: Andreas Krey, Git Mailing List
We have always had is_git_directory(), for looking at a
specific directory to see if it contains a git repo. In
0179ca7 (clean: improve performance when removing lots of
directories, 2015-06-15), we added is_git_repository() which
checks for a non-bare repository by looking at its ".git"
entry.
However, the fix in 0179ca7 needs to be applied other
places, too. Let's make this new helper globally available.
We need to give it a better name, though, to avoid confusion
with is_git_directory(). This patch does that, documents
both functions with a comment to reduce confusion, and
removes the clean-specific references in the comments.
Based-on-a-patch-by: Andreas Krey <a.krey@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
---
builtin/clean.c | 26 +-------------------------
cache.h | 20 +++++++++++++++++++-
setup.c | 17 +++++++++++++++++
3 files changed, 37 insertions(+), 26 deletions(-)
diff --git a/builtin/clean.c b/builtin/clean.c
index d7acb94..919157b 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -147,30 +147,6 @@ static int exclude_cb(const struct option *opt, const char *arg, int unset)
return 0;
}
-/*
- * Return 1 if the given path is the root of a git repository or
- * submodule else 0. Will not return 1 for bare repositories with the
- * exception of creating a bare repository in "foo/.git" and calling
- * is_git_repository("foo").
- */
-static int is_git_repository(struct strbuf *path)
-{
- int ret = 0;
- int gitfile_error;
- size_t orig_path_len = path->len;
- assert(orig_path_len != 0);
- strbuf_complete(path, '/');
- strbuf_addstr(path, ".git");
- if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf))
- ret = 1;
- if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED ||
- gitfile_error == READ_GITFILE_ERR_READ_FAILED)
- ret = 1; /* This could be a real .git file, take the
- * safe option and avoid cleaning */
- strbuf_setlen(path, orig_path_len);
- return ret;
-}
-
static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
int dry_run, int quiet, int *dir_gone)
{
@@ -182,7 +158,7 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
*dir_gone = 1;
- if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_git_repository(path)) {
+ if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_nonbare_repository_dir(path)) {
if (!quiet) {
quote_path_relative(path->buf, prefix, "ed);
printf(dry_run ? _(msg_would_skip_git_dir) : _(msg_skip_git_dir),
diff --git a/cache.h b/cache.h
index dfc459c..553b04b 100644
--- a/cache.h
+++ b/cache.h
@@ -458,7 +458,6 @@ extern char *git_work_tree_cfg;
extern int is_inside_work_tree(void);
extern const char *get_git_dir(void);
extern const char *get_git_common_dir(void);
-extern int is_git_directory(const char *path);
extern char *get_object_directory(void);
extern char *get_index_file(void);
extern char *get_graft_file(void);
@@ -469,6 +468,25 @@ extern const char *get_git_namespace(void);
extern const char *strip_namespace(const char *namespaced_ref);
extern const char *get_git_work_tree(void);
+/*
+ * Return true if the given path is a git directory; note that this _just_
+ * looks at the directory itself. If you want to know whether "foo/.git"
+ * is a repository, you must feed that path, not just "foo".
+ */
+extern int is_git_directory(const char *path);
+
+/*
+ * Return 1 if the given path is the root of a git repository or
+ * submodule, else 0. Will not return 1 for bare repositories with the
+ * exception of creating a bare repository in "foo/.git" and calling
+ * is_git_repository("foo").
+ *
+ * If we run into read errors, we err on the side of saying "yes, it is",
+ * as we usually consider sub-repos precious, and would prefer to err on the
+ * side of not disrupting or deleting them.
+ */
+extern int is_nonbare_repository_dir(struct strbuf *path);
+
#define READ_GITFILE_ERR_STAT_FAILED 1
#define READ_GITFILE_ERR_NOT_A_FILE 2
#define READ_GITFILE_ERR_OPEN_FAILED 3
diff --git a/setup.c b/setup.c
index d343725..2c4b22c 100644
--- a/setup.c
+++ b/setup.c
@@ -312,6 +312,23 @@ done:
return ret;
}
+int is_nonbare_repository_dir(struct strbuf *path)
+{
+ int ret = 0;
+ int gitfile_error;
+ size_t orig_path_len = path->len;
+ assert(orig_path_len != 0);
+ strbuf_complete(path, '/');
+ strbuf_addstr(path, ".git");
+ if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf))
+ ret = 1;
+ if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED ||
+ gitfile_error == READ_GITFILE_ERR_READ_FAILED)
+ ret = 1;
+ strbuf_setlen(path, orig_path_len);
+ return ret;
+}
+
int is_inside_git_dir(void)
{
if (inside_git_dir < 0)
--
2.7.0.384.gae54cb4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] resolve_gitlink_ref: ignore non-repository paths
2016-01-22 22:26 ` Jeff King
2016-01-22 22:27 ` [PATCH 1/2] clean: make is_git_repository a public function Jeff King
@ 2016-01-22 22:29 ` Jeff King
2016-01-22 22:31 ` Jeff King
2016-01-25 13:13 ` Minor bug, git ls-files -o aborts because of broken submodules Andreas Krey
2 siblings, 1 reply; 9+ messages in thread
From: Jeff King @ 2016-01-22 22:29 UTC (permalink / raw)
To: Duy Nguyen; +Cc: Andreas Krey, Git Mailing List
When we want to look up a submodule ref, we use
get_ref_cache(path) to find or auto-create its ref cache.
But if we feed a path that isn't actually a git repository,
we blindly create the ref cache, and then may die deeper in
the code when we try to access it. This is a problem because
many callers speculatively feed us a path that looks vaguely
like a repository, and expect us to tell them when it is
not.
This patch teaches resolve_gitlink_ref to reject
non-repository paths without creating a ref_cache. This
avoids the die(), and also performs better if you have a
large number of these faux-submodule directories (because
the ref_cache lookup is linear, under the assumption that
there won't be a large number of submodules).
To accomplish this, we also break get_ref_cache into two
pieces: the lookup and auto-creation (the latter is lumped
into create_ref_cache). This lets us first cheaply ask our
cache "is it a submodule we know about?" If so, we can avoid
repeating our filesystem lookup. So lookups of real
submodules are not penalized; they examine the submodule's
.git directory only once.
The test in t3000 demonstrates a case where this improves
correctness (we used to just die). The new perf case in
p7300 shows off the speed improvement in an admittedly
pathological repository:
Test HEAD^ HEAD
----------------------------------------------------------------
7300.4: ls-files -o 66.97(66.15+0.87) 0.33(0.08+0.24) -99.5%
Signed-off-by: Jeff King <peff@peff.net>
---
refs/files-backend.c | 46 ++++++++++++++++++++++++++++++++--------------
t/perf/p7300-clean.sh | 4 ++++
t/t3000-ls-files-others.sh | 7 +++++++
3 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index c648b5e..3a27f27 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -933,6 +933,10 @@ static void clear_loose_ref_cache(struct ref_cache *refs)
}
}
+/*
+ * Create a new submodule ref cache and add it to the internal
+ * set of caches.
+ */
static struct ref_cache *create_ref_cache(const char *submodule)
{
int len;
@@ -942,16 +946,12 @@ static struct ref_cache *create_ref_cache(const char *submodule)
len = strlen(submodule) + 1;
refs = xcalloc(1, sizeof(struct ref_cache) + len);
memcpy(refs->name, submodule, len);
+ refs->next = submodule_ref_caches;
+ submodule_ref_caches = refs;
return refs;
}
-/*
- * Return a pointer to a ref_cache for the specified submodule. For
- * the main repository, use submodule==NULL. The returned structure
- * will be allocated and initialized but not necessarily populated; it
- * should not be freed.
- */
-static struct ref_cache *get_ref_cache(const char *submodule)
+static struct ref_cache *lookup_ref_cache(const char *submodule)
{
struct ref_cache *refs;
@@ -961,10 +961,20 @@ static struct ref_cache *get_ref_cache(const char *submodule)
for (refs = submodule_ref_caches; refs; refs = refs->next)
if (!strcmp(submodule, refs->name))
return refs;
+ return NULL;
+}
- refs = create_ref_cache(submodule);
- refs->next = submodule_ref_caches;
- submodule_ref_caches = refs;
+/*
+ * Return a pointer to a ref_cache for the specified submodule. For
+ * the main repository, use submodule==NULL. The returned structure
+ * will be allocated and initialized but not necessarily populated; it
+ * should not be freed.
+ */
+static struct ref_cache *get_ref_cache(const char *submodule)
+{
+ struct ref_cache *refs = lookup_ref_cache(submodule);
+ if (!refs)
+ refs = create_ref_cache(submodule);
return refs;
}
@@ -1336,16 +1346,24 @@ static int resolve_gitlink_ref_recursive(struct ref_cache *refs,
int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *sha1)
{
int len = strlen(path), retval;
- char *submodule;
+ struct strbuf submodule = STRBUF_INIT;
struct ref_cache *refs;
while (len && path[len-1] == '/')
len--;
if (!len)
return -1;
- submodule = xstrndup(path, len);
- refs = get_ref_cache(submodule);
- free(submodule);
+
+ strbuf_add(&submodule, path, len);
+ refs = lookup_ref_cache(submodule.buf);
+ if (!refs) {
+ if (!is_nonbare_repository_dir(&submodule)) {
+ strbuf_release(&submodule);
+ return -1;
+ }
+ refs = create_ref_cache(submodule.buf);
+ }
+ strbuf_release(&submodule);
retval = resolve_gitlink_ref_recursive(refs, refname, sha1, 0);
return retval;
diff --git a/t/perf/p7300-clean.sh b/t/perf/p7300-clean.sh
index ec94cdd..7c1888a 100755
--- a/t/perf/p7300-clean.sh
+++ b/t/perf/p7300-clean.sh
@@ -28,4 +28,8 @@ test_perf 'clean many untracked sub dirs, ignore nested git' '
git clean -n -q -f -f -d 100000_sub_dirs/
'
+test_perf 'ls-files -o' '
+ git ls-files -o
+'
+
test_done
diff --git a/t/t3000-ls-files-others.sh b/t/t3000-ls-files-others.sh
index 88be904..c525656 100755
--- a/t/t3000-ls-files-others.sh
+++ b/t/t3000-ls-files-others.sh
@@ -65,6 +65,13 @@ test_expect_success '--no-empty-directory hides empty directory' '
test_cmp expected3 output
'
+test_expect_success 'ls-files --others handles non-submodule .git' '
+ mkdir not-a-submodule &&
+ echo foo >not-a-submodule/.git &&
+ git ls-files -o >output &&
+ test_cmp expected1 output
+'
+
test_expect_success SYMLINKS 'ls-files --others with symlinked submodule' '
git init super &&
git init sub &&
--
2.7.0.384.gae54cb4
^ permalink raw reply related [flat|nested] 9+ messages in thread