From: Johan Herland <johan@herland.net>
To: gitster@pobox.com
Cc: git@vger.kernel.org, johannes.schindelin@gmx.de,
trast@student.ethz.ch, tavestbo@trolltech.com,
git@drmicha.warpmail.net, chriscool@tuxfamily.org,
spearce@spearce.org,
Johannes Schindelin <Johannes.Schindelin@gmx.de>,
Johan Herland <johan@herland.net>
Subject: [PATCHv3 3/8] Speed up git notes lookup
Date: Wed, 29 Jul 2009 04:25:21 +0200 [thread overview]
Message-ID: <1248834326-31488-4-git-send-email-johan@herland.net> (raw)
In-Reply-To: <1248834326-31488-1-git-send-email-johan@herland.net>
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
To avoid looking up each and every commit in the notes ref's tree
object, which is very expensive, speed things up by slurping the tree
object's contents into a hash_map.
The idea for the hashmap singleton is from David Reiss, initial
benchmarking by Jeff King.
Note: the implementation allows for arbitrary entries in the notes
tree object, ignoring those that do not reference a valid object. This
allows you to annotate arbitrary branches, or objects.
This patch has been improved by the following contributions:
- Junio C Hamano: fixed an obvious error in initialize_hash_map()
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
notes.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 102 insertions(+), 11 deletions(-)
diff --git a/notes.c b/notes.c
index b0cf553..bd73784 100644
--- a/notes.c
+++ b/notes.c
@@ -4,16 +4,112 @@
#include "refs.h"
#include "utf8.h"
#include "strbuf.h"
+#include "tree-walk.h"
+
+struct entry {
+ unsigned char commit_sha1[20];
+ unsigned char notes_sha1[20];
+};
+
+struct hash_map {
+ struct entry *entries;
+ off_t count, size;
+};
static int initialized;
+static struct hash_map hash_map;
+
+static int hash_index(struct hash_map *map, const unsigned char *sha1)
+{
+ int i = ((*(unsigned int *)sha1) % map->size);
+
+ for (;;) {
+ unsigned char *current = map->entries[i].commit_sha1;
+
+ if (!hashcmp(sha1, current))
+ return i;
+
+ if (is_null_sha1(current))
+ return -1 - i;
+
+ if (++i == map->size)
+ i = 0;
+ }
+}
+
+static void add_entry(const unsigned char *commit_sha1,
+ const unsigned char *notes_sha1)
+{
+ int index;
+
+ if (hash_map.count + 1 > hash_map.size >> 1) {
+ int i, old_size = hash_map.size;
+ struct entry *old = hash_map.entries;
+
+ hash_map.size = old_size ? old_size << 1 : 64;
+ hash_map.entries = (struct entry *)
+ xcalloc(sizeof(struct entry), hash_map.size);
+
+ for (i = 0; i < old_size; i++)
+ if (!is_null_sha1(old[i].commit_sha1)) {
+ index = -1 - hash_index(&hash_map,
+ old[i].commit_sha1);
+ memcpy(hash_map.entries + index, old + i,
+ sizeof(struct entry));
+ }
+ free(old);
+ }
+
+ index = hash_index(&hash_map, commit_sha1);
+ if (index < 0) {
+ index = -1 - index;
+ hash_map.count++;
+ }
+
+ hashcpy(hash_map.entries[index].commit_sha1, commit_sha1);
+ hashcpy(hash_map.entries[index].notes_sha1, notes_sha1);
+}
+
+static void initialize_hash_map(const char *notes_ref_name)
+{
+ unsigned char sha1[20], commit_sha1[20];
+ unsigned mode;
+ struct tree_desc desc;
+ struct name_entry entry;
+ void *buf;
+
+ if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) ||
+ get_tree_entry(commit_sha1, "", sha1, &mode))
+ return;
+
+ buf = fill_tree_descriptor(&desc, sha1);
+ if (!buf)
+ die("Could not read %s for notes-index", sha1_to_hex(sha1));
+
+ while (tree_entry(&desc, &entry))
+ if (!get_sha1(entry.path, commit_sha1))
+ add_entry(commit_sha1, entry.sha1);
+ free(buf);
+}
+
+static unsigned char *lookup_notes(const unsigned char *commit_sha1)
+{
+ int index;
+
+ if (!hash_map.size)
+ return NULL;
+
+ index = hash_index(&hash_map, commit_sha1);
+ if (index < 0)
+ return NULL;
+ return hash_map.entries[index].notes_sha1;
+}
void get_commit_notes(const struct commit *commit, struct strbuf *sb,
const char *output_encoding)
{
static const char *utf8 = "utf-8";
- struct strbuf name = STRBUF_INIT;
- const char *hex;
- unsigned char sha1[20];
+ unsigned char *sha1;
char *msg, *msg_p;
unsigned long linelen, msglen;
enum object_type type;
@@ -24,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb,
notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT);
else if (!notes_ref_name)
notes_ref_name = GIT_NOTES_DEFAULT_REF;
- if (notes_ref_name && read_ref(notes_ref_name, sha1))
- notes_ref_name = NULL;
+ initialize_hash_map(notes_ref_name);
initialized = 1;
}
- if (!notes_ref_name)
- return;
-
- strbuf_addf(&name, "%s:%s", notes_ref_name,
- sha1_to_hex(commit->object.sha1));
- if (get_sha1(name.buf, sha1))
+ sha1 = lookup_notes(commit->object.sha1);
+ if (!sha1)
return;
if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen ||
--
1.6.4.rc3.138.ga6b98.dirty
next prev parent reply other threads:[~2009-07-29 2:27 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-07-29 2:25 [PATCHv3 0/8] RESEND: git notes Johan Herland
2009-07-29 2:25 ` [PATCHv3 1/8] Introduce commit notes Johan Herland
2009-07-29 8:52 ` Alex Riesen
2009-07-29 16:40 ` Johannes Schindelin
2009-07-30 0:50 ` Johan Herland
2009-07-30 1:14 ` Johannes Schindelin
2009-07-30 0:42 ` Johan Herland
2009-07-29 2:25 ` [PATCHv3 2/8] Add a script to edit/inspect notes Johan Herland
2009-07-29 2:25 ` Johan Herland [this message]
2009-07-29 2:25 ` [PATCHv3 4/8] Add an expensive test for git-notes Johan Herland
2009-07-29 2:25 ` [PATCHv3 5/8] Teach "-m <msg>" and "-F <file>" to "git notes edit" Johan Herland
2009-07-29 7:57 ` Thomas Rast
2009-07-30 1:02 ` Johan Herland
2009-07-29 2:25 ` [PATCHv3 6/8] First draft of notes tree parser with support for fanout subtrees Johan Herland
2009-07-29 16:45 ` Johannes Schindelin
2009-07-30 0:18 ` Testing performance of the notes lookup code (Was: [PATCHv3 6/8] First draft of notes tree parser with support for fanout subtrees) Johan Herland
2009-08-01 2:36 ` [RFC] First draft of 256-tree structure for storing notes Johan Herland
2009-08-13 3:00 ` [RFC] Store subtree entries in the same hash map as the note entries Johan Herland
2009-08-26 10:31 ` [RFC] Use a 16-tree instead of a 256-tree for storing notes Johan Herland
2009-08-26 12:05 ` Alex Riesen
2009-08-26 12:56 ` Johan Herland
2009-08-26 13:24 ` Alex Riesen
2009-08-26 13:27 ` Andreas Ericsson
2009-08-26 14:43 ` Johan Herland
2009-08-27 11:56 ` Johannes Schindelin
2009-07-29 2:25 ` [PATCHv3 7/8] fast-import: Add support for importing commit notes Johan Herland
2009-07-29 14:21 ` Shawn O. Pearce
2009-07-30 1:29 ` Johan Herland
2009-07-29 2:25 ` [PATCHv3 8/8] t3302-notes-index-expensive: Speed up create_repo() Johan Herland
2009-07-29 16:46 ` Johannes Schindelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1248834326-31488-4-git-send-email-johan@herland.net \
--to=johan@herland.net \
--cc=chriscool@tuxfamily.org \
--cc=git@drmicha.warpmail.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=spearce@spearce.org \
--cc=tavestbo@trolltech.com \
--cc=trast@student.ethz.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).