git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johan Herland <johan@herland.net>
To: gitster@pobox.com
Cc: git@vger.kernel.org, johannes.schindelin@gmx.de,
	trast@student.ethz.ch, tavestbo@trolltech.com,
	git@drmicha.warpmail.net, chriscool@tuxfamily.org,
	spearce@spearce.org,
	Johannes Schindelin <Johannes.Schindelin@gmx.de>,
	Johan Herland <johan@herland.net>
Subject: [PATCHv3 3/8] Speed up git notes lookup
Date: Wed, 29 Jul 2009 04:25:21 +0200	[thread overview]
Message-ID: <1248834326-31488-4-git-send-email-johan@herland.net> (raw)
In-Reply-To: <1248834326-31488-1-git-send-email-johan@herland.net>

From: Johannes Schindelin <Johannes.Schindelin@gmx.de>

To avoid looking up each and every commit in the notes ref's tree
object, which is very expensive, speed things up by slurping the tree
object's contents into a hash_map.

The idea for the hashmap singleton is from David Reiss, initial
benchmarking by Jeff King.

Note: the implementation allows for arbitrary entries in the notes
tree object, ignoring those that do not reference a valid object.  This
allows you to annotate arbitrary branches, or objects.

This patch has been improved by the following contributions:
- Junio C Hamano: fixed an obvious error in initialize_hash_map()

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 notes.c |  113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/notes.c b/notes.c
index b0cf553..bd73784 100644
--- a/notes.c
+++ b/notes.c
@@ -4,16 +4,112 @@
 #include "refs.h"
 #include "utf8.h"
 #include "strbuf.h"
+#include "tree-walk.h"
+
+struct entry {
+	unsigned char commit_sha1[20];
+	unsigned char notes_sha1[20];
+};
+
+struct hash_map {
+	struct entry *entries;
+	off_t count, size;
+};
 
 static int initialized;
+static struct hash_map hash_map;
+
+static int hash_index(struct hash_map *map, const unsigned char *sha1)
+{
+	int i = ((*(unsigned int *)sha1) % map->size);
+
+	for (;;) {
+		unsigned char *current = map->entries[i].commit_sha1;
+
+		if (!hashcmp(sha1, current))
+			return i;
+
+		if (is_null_sha1(current))
+			return -1 - i;
+
+		if (++i == map->size)
+			i = 0;
+	}
+}
+
+static void add_entry(const unsigned char *commit_sha1,
+		const unsigned char *notes_sha1)
+{
+	int index;
+
+	if (hash_map.count + 1 > hash_map.size >> 1) {
+		int i, old_size = hash_map.size;
+		struct entry *old = hash_map.entries;
+
+		hash_map.size = old_size ? old_size << 1 : 64;
+		hash_map.entries = (struct entry *)
+			xcalloc(sizeof(struct entry), hash_map.size);
+
+		for (i = 0; i < old_size; i++)
+			if (!is_null_sha1(old[i].commit_sha1)) {
+				index = -1 - hash_index(&hash_map,
+						old[i].commit_sha1);
+				memcpy(hash_map.entries + index, old + i,
+					sizeof(struct entry));
+			}
+		free(old);
+	}
+
+	index = hash_index(&hash_map, commit_sha1);
+	if (index < 0) {
+		index = -1 - index;
+		hash_map.count++;
+	}
+
+	hashcpy(hash_map.entries[index].commit_sha1, commit_sha1);
+	hashcpy(hash_map.entries[index].notes_sha1, notes_sha1);
+}
+
+static void initialize_hash_map(const char *notes_ref_name)
+{
+	unsigned char sha1[20], commit_sha1[20];
+	unsigned mode;
+	struct tree_desc desc;
+	struct name_entry entry;
+	void *buf;
+
+	if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) ||
+	    get_tree_entry(commit_sha1, "", sha1, &mode))
+		return;
+
+	buf = fill_tree_descriptor(&desc, sha1);
+	if (!buf)
+		die("Could not read %s for notes-index", sha1_to_hex(sha1));
+
+	while (tree_entry(&desc, &entry))
+		if (!get_sha1(entry.path, commit_sha1))
+			add_entry(commit_sha1, entry.sha1);
+	free(buf);
+}
+
+static unsigned char *lookup_notes(const unsigned char *commit_sha1)
+{
+	int index;
+
+	if (!hash_map.size)
+		return NULL;
+
+	index = hash_index(&hash_map, commit_sha1);
+	if (index < 0)
+		return NULL;
+	return hash_map.entries[index].notes_sha1;
+}
 
 void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 		const char *output_encoding)
 {
 	static const char *utf8 = "utf-8";
-	struct strbuf name = STRBUF_INIT;
-	const char *hex;
-	unsigned char sha1[20];
+	unsigned char *sha1;
 	char *msg, *msg_p;
 	unsigned long linelen, msglen;
 	enum object_type type;
@@ -24,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 			notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT);
 		else if (!notes_ref_name)
 			notes_ref_name = GIT_NOTES_DEFAULT_REF;
-		if (notes_ref_name && read_ref(notes_ref_name, sha1))
-			notes_ref_name = NULL;
+		initialize_hash_map(notes_ref_name);
 		initialized = 1;
 	}
 
-	if (!notes_ref_name)
-		return;
-
-	strbuf_addf(&name, "%s:%s", notes_ref_name,
-			sha1_to_hex(commit->object.sha1));
-	if (get_sha1(name.buf, sha1))
+	sha1 = lookup_notes(commit->object.sha1);
+	if (!sha1)
 		return;
 
 	if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen ||
-- 
1.6.4.rc3.138.ga6b98.dirty

  parent reply	other threads:[~2009-07-29  2:27 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-29  2:25 [PATCHv3 0/8] RESEND: git notes Johan Herland
2009-07-29  2:25 ` [PATCHv3 1/8] Introduce commit notes Johan Herland
2009-07-29  8:52   ` Alex Riesen
2009-07-29 16:40     ` Johannes Schindelin
2009-07-30  0:50       ` Johan Herland
2009-07-30  1:14         ` Johannes Schindelin
2009-07-30  0:42     ` Johan Herland
2009-07-29  2:25 ` [PATCHv3 2/8] Add a script to edit/inspect notes Johan Herland
2009-07-29  2:25 ` Johan Herland [this message]
2009-07-29  2:25 ` [PATCHv3 4/8] Add an expensive test for git-notes Johan Herland
2009-07-29  2:25 ` [PATCHv3 5/8] Teach "-m <msg>" and "-F <file>" to "git notes edit" Johan Herland
2009-07-29  7:57   ` Thomas Rast
2009-07-30  1:02     ` Johan Herland
2009-07-29  2:25 ` [PATCHv3 6/8] First draft of notes tree parser with support for fanout subtrees Johan Herland
2009-07-29 16:45   ` Johannes Schindelin
2009-07-30  0:18     ` Testing performance of the notes lookup code (Was: [PATCHv3 6/8] First draft of notes tree parser with support for fanout subtrees) Johan Herland
2009-08-01  2:36       ` [RFC] First draft of 256-tree structure for storing notes Johan Herland
2009-08-13  3:00         ` [RFC] Store subtree entries in the same hash map as the note entries Johan Herland
2009-08-26 10:31         ` [RFC] Use a 16-tree instead of a 256-tree for storing notes Johan Herland
2009-08-26 12:05           ` Alex Riesen
2009-08-26 12:56             ` Johan Herland
2009-08-26 13:24               ` Alex Riesen
2009-08-26 13:27                 ` Andreas Ericsson
2009-08-26 14:43                   ` Johan Herland
2009-08-27 11:56                   ` Johannes Schindelin
2009-07-29  2:25 ` [PATCHv3 7/8] fast-import: Add support for importing commit notes Johan Herland
2009-07-29 14:21   ` Shawn O. Pearce
2009-07-30  1:29     ` Johan Herland
2009-07-29  2:25 ` [PATCHv3 8/8] t3302-notes-index-expensive: Speed up create_repo() Johan Herland
2009-07-29 16:46   ` Johannes Schindelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1248834326-31488-4-git-send-email-johan@herland.net \
    --to=johan@herland.net \
    --cc=chriscool@tuxfamily.org \
    --cc=git@drmicha.warpmail.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=johannes.schindelin@gmx.de \
    --cc=spearce@spearce.org \
    --cc=tavestbo@trolltech.com \
    --cc=trast@student.ethz.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).