From: "René Scharfe" <l.s.r@web.de>
To: Junio C Hamano <gitster@pobox.com>, Jeff King <peff@peff.net>
Cc: "Michael Giuffrida" <michaelpg@chromium.org>,
git@vger.kernel.org, "SZEDER Gábor" <szeder.dev@gmail.com>
Subject: [PATCH] sha1_name: cache readdir(3) results in find_short_object_filename()
Date: Thu, 22 Jun 2017 20:19:48 +0200 [thread overview]
Message-ID: <d06fb033-294e-f364-3dde-394624e83cd6@web.de> (raw)
In-Reply-To: <xmqqd1a0vb2t.fsf@gitster.mtv.corp.google.com>
Read each loose object subdirectory at most once when looking for unique
abbreviated hashes. This speeds up commands like "git log --pretty=%h"
considerably, which previously caused one readdir(3) call for each
candidate, even for subdirectories that were visited before.
The new cache is kept until the program ends and never invalidated. The
same is already true for pack indexes. The inherent racy nature of
finding unique short hashes makes it still fit for this purpose -- a
conflicting new object may be added at any time. Tasks with higher
consistency requirements should not use it, though.
The cached object names are stored in an oid_array, which is quite
compact. The bitmap for remembering which subdir was already read is
stored as a char array, with one char per directory -- that's not quite
as compact, but really simple and incurs only an overhead equivalent to
11 hashes after all.
Suggested-by: Jeff King <peff@peff.net>
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
cache.h | 17 +++++++++++++++++
sha1_file.c | 12 ++++++------
sha1_name.c | 50 ++++++++++++++++++++++++++++++--------------------
3 files changed, 53 insertions(+), 26 deletions(-)
diff --git a/cache.h b/cache.h
index d6ba8a2f11..00a017dfcb 100644
--- a/cache.h
+++ b/cache.h
@@ -11,6 +11,7 @@
#include "string-list.h"
#include "pack-revindex.h"
#include "hash.h"
+#include "sha1-array.h"
#ifndef platform_SHA_CTX
/*
@@ -1593,6 +1594,16 @@ extern struct alternate_object_database {
struct strbuf scratch;
size_t base_len;
+ /*
+ * Used to store the results of readdir(3) calls when searching
+ * for unique abbreviated hashes. This cache is never
+ * invalidated, thus it's racy and not necessarily accurate.
+ * That's fine for its purpose; don't use it for tasks requiring
+ * greater accuracy!
+ */
+ char loose_objects_subdir_seen[256];
+ struct oid_array loose_objects_cache;
+
char path[FLEX_ARRAY];
} *alt_odb_list;
extern void prepare_alt_odb(void);
@@ -1811,6 +1822,12 @@ typedef int each_loose_cruft_fn(const char *basename,
typedef int each_loose_subdir_fn(int nr,
const char *path,
void *data);
+int for_each_file_in_obj_subdir(int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
int for_each_loose_file_in_objdir(const char *path,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
diff --git a/sha1_file.c b/sha1_file.c
index 59a4ed2ed3..5e0ee2b68b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3735,12 +3735,12 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
typename(expect));
}
-static int for_each_file_in_obj_subdir(int subdir_nr,
- struct strbuf *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data)
+int for_each_file_in_obj_subdir(int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
{
size_t baselen = path->len;
DIR *dir = opendir(path->buf);
diff --git a/sha1_name.c b/sha1_name.c
index 5126853bb5..ccb5144d0d 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -77,10 +77,19 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
/* otherwise, current can be discarded and candidate is still good */
}
+static int append_loose_object(const struct object_id *oid, const char *path,
+ void *data)
+{
+ oid_array_append(data, oid);
+ return 0;
+}
+
+static int match_sha(unsigned, const unsigned char *, const unsigned char *);
+
static void find_short_object_filename(struct disambiguate_state *ds)
{
+ int subdir_nr = ds->bin_pfx.hash[0];
struct alternate_object_database *alt;
- char hex[GIT_MAX_HEXSZ];
static struct alternate_object_database *fakeent;
if (!fakeent) {
@@ -95,29 +104,30 @@ static void find_short_object_filename(struct disambiguate_state *ds)
}
fakeent->next = alt_odb_list;
- xsnprintf(hex, sizeof(hex), "%.2s", ds->hex_pfx);
for (alt = fakeent; alt && !ds->ambiguous; alt = alt->next) {
- struct strbuf *buf = alt_scratch_buf(alt);
- struct dirent *de;
- DIR *dir;
-
- strbuf_addf(buf, "%.2s/", ds->hex_pfx);
- dir = opendir(buf->buf);
- if (!dir)
- continue;
+ int pos;
- while (!ds->ambiguous && (de = readdir(dir)) != NULL) {
- struct object_id oid;
+ if (!alt->loose_objects_subdir_seen[subdir_nr]) {
+ struct strbuf *buf = alt_scratch_buf(alt);
+ strbuf_addf(buf, "%02x/", subdir_nr);
+ for_each_file_in_obj_subdir(subdir_nr, buf,
+ append_loose_object,
+ NULL, NULL,
+ &alt->loose_objects_cache);
+ alt->loose_objects_subdir_seen[subdir_nr] = 1;
+ }
- if (strlen(de->d_name) != GIT_SHA1_HEXSZ - 2)
- continue;
- if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
- continue;
- memcpy(hex + 2, de->d_name, GIT_SHA1_HEXSZ - 2);
- if (!get_oid_hex(hex, &oid))
- update_candidates(ds, &oid);
+ pos = oid_array_lookup(&alt->loose_objects_cache, &ds->bin_pfx);
+ if (pos < 0)
+ pos = -1 - pos;
+ while (!ds->ambiguous && pos < alt->loose_objects_cache.nr) {
+ const struct object_id *oid;
+ oid = alt->loose_objects_cache.oid + pos;
+ if (!match_sha(ds->len, ds->bin_pfx.hash, oid->hash))
+ break;
+ update_candidates(ds, oid);
+ pos++;
}
- closedir(dir);
}
}
--
2.13.1
next prev parent reply other threads:[~2017-06-22 18:20 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-12 3:13 [BUG] add_again() off-by-one error in custom format Michael Giuffrida
2017-06-12 22:49 ` Junio C Hamano
2017-06-13 18:09 ` René Scharfe
2017-06-13 18:29 ` Junio C Hamano
2017-06-13 20:29 ` René Scharfe
2017-06-13 21:20 ` Junio C Hamano
2017-06-14 18:24 ` René Scharfe
2017-06-15 5:56 ` Jeff King
2017-06-15 11:33 ` René Scharfe
2017-06-15 13:25 ` Jeff King
2017-06-18 10:58 ` René Scharfe
2017-06-18 11:49 ` Jeff King
2017-06-18 12:59 ` René Scharfe
2017-06-18 13:56 ` Jeff King
2017-06-22 18:19 ` René Scharfe
2017-06-22 23:15 ` Jeff King
2017-06-18 10:58 ` René Scharfe
2017-06-18 11:50 ` Jeff King
2017-06-19 4:46 ` Junio C Hamano
2017-06-22 18:19 ` René Scharfe [this message]
2017-06-22 23:10 ` [PATCH] sha1_name: cache readdir(3) results in find_short_object_filename() Jeff King
2017-06-24 12:12 ` René Scharfe
2017-06-24 12:14 ` Jeff King
2017-06-24 12:12 ` René Scharfe
2017-06-24 12:20 ` Jeff King
2017-06-24 14:09 ` René Scharfe
2017-06-24 14:12 ` Jeff King
2017-06-15 18:37 ` [BUG] add_again() off-by-one error in custom format Junio C Hamano
2017-06-13 22:24 ` SZEDER Gábor
2017-06-14 17:34 ` René Scharfe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d06fb033-294e-f364-3dde-394624e83cd6@web.de \
--to=l.s.r@web.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=michaelpg@chromium.org \
--cc=peff@peff.net \
--cc=szeder.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).