git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johan Herland <johan@herland.net>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johan@herland.net, Johannes.Schindelin@gmx.de,
	trast@student.ethz.ch, tavestbo@trolltech.com,
	git@drmicha.warpmail.net, chriscool@tuxfamily.org,
	spearce@spearce.org, sam@vilain.net
Subject: [RFC/PATCHv7 19/22] Notes API: for_each_note(): Traverse the entire notes tree with a callback
Date: Fri, 09 Oct 2009 12:22:15 +0200	[thread overview]
Message-ID: <1255083738-23263-21-git-send-email-johan@herland.net> (raw)
In-Reply-To: <1255083738-23263-1-git-send-email-johan@herland.net>

This includes a first attempt at creating an optimal fanout scheme (which
is created on-the-fly, while traversing).

Signed-off-by: Johan Herland <johan@herland.net>
---
 notes.c |  101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 notes.h |    9 +++++
 2 files changed, 110 insertions(+), 0 deletions(-)

diff --git a/notes.c b/notes.c
index 2196a5f..9581b98 100644
--- a/notes.c
+++ b/notes.c
@@ -339,6 +339,101 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node,
 	free(buf);
 }
 
+/*
+ * Determine optimal on-disk fanout for this part of the notes tree
+ *
+ * Given a (sub)tree and the level in the internal tree structure, determine
+ * whether or not the given existing fanout should be expanded for this
+ * (sub)tree.
+ *
+ * Values of the 'fanout' variable:
+ * - 0: No fanout (all notes are stored directly in the root notes tree)
+ * - 1: 2/38 fanout
+ * - 2: 2/2/36 fanout
+ * - 3: 2/2/2/34 fanout
+ * etc.
+ */
+static unsigned char determine_fanout(struct int_node *tree, unsigned char n,
+		unsigned char fanout)
+{
+	/*
+	 * The following is a simple heuristic that works well in practice:
+	 * For each even-numbered 16-tree level (remember that each on-disk
+	 * fanout level corresponds to two 16-tree levels), peek at all 16
+	 * entries at that tree level. If any of them are subtree entries, then
+	 * there are likely plenty of notes below this level, so we return an
+	 * incremented fanout immediately. Otherwise, we return an incremented
+	 * fanout only if all of the entries at this level are int_nodes.
+	 */
+	unsigned int i;
+	if ((n % 2) || (n > 2 * fanout))
+		return fanout;
+	for (i = 0; i < 16; i++) {
+		switch(GET_PTR_TYPE(tree->a[i])) {
+		case PTR_TYPE_SUBTREE:
+			return fanout + 1;
+		case PTR_TYPE_INTERNAL:
+			continue;
+		default:
+			return fanout;
+		}
+	}
+	return fanout + 1;
+}
+
+static void construct_path_with_fanout(const unsigned char *sha1,
+		unsigned char fanout, char *path)
+{
+	unsigned int i = 0, j = 0;
+	const char *hex_sha1 = sha1_to_hex(sha1);
+	assert(fanout < 20);
+	while (fanout) {
+		path[i++] = hex_sha1[j++];
+		path[i++] = hex_sha1[j++];
+		path[i++] = '/';
+		fanout--;
+	}
+	strcpy(path + i, hex_sha1 + j);
+}
+
+static int for_each_note_helper(struct int_node *tree, unsigned char n,
+		unsigned char fanout, each_note_fn fn, void *cb_data)
+{
+	unsigned int i;
+	void *p;
+	int ret = 0;
+	struct leaf_node *l;
+	static char path[40 + 19 + 1];  /* hex SHA1 + 19 * '/' + NUL */
+
+	fanout = determine_fanout(tree, n, fanout);
+	for (i = 0; i < 16; i++) {
+redo:
+		p = tree->a[i];
+		switch(GET_PTR_TYPE(p)) {
+		case PTR_TYPE_INTERNAL:
+			/* recurse into int_node */
+			ret = for_each_note_helper(
+				CLR_PTR_TYPE(p), n + 1, fanout, fn, cb_data);
+			break;
+		case PTR_TYPE_SUBTREE:
+			/* unpack subtree and resume traversal */
+			l = (struct leaf_node *) CLR_PTR_TYPE(p);
+			tree->a[i] = NULL;
+			load_subtree(l, tree, n);
+			free(l);
+			goto redo;
+		case PTR_TYPE_NOTE:
+			l = (struct leaf_node *) CLR_PTR_TYPE(p);
+			construct_path_with_fanout(l->key_sha1, fanout, path);
+			ret = fn(l->key_sha1, l->val_sha1, path, cb_data);
+			break;
+		}
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 void init_notes(const char *notes_ref, int flags)
 {
 	unsigned char sha1[20], object_sha1[20];
@@ -386,6 +481,12 @@ const unsigned char *get_note(const unsigned char *object_sha1)
 	return found ? found->val_sha1 : NULL;
 }
 
+int for_each_note(each_note_fn fn, void *cb_data)
+{
+	assert(initialized);
+	return for_each_note_helper(&root_node, 0, 0, fn, cb_data);
+}
+
 void free_notes(void)
 {
 	note_tree_free(&root_node);
diff --git a/notes.h b/notes.h
index 21a8930..28648fd 100644
--- a/notes.h
+++ b/notes.h
@@ -28,6 +28,15 @@ void add_note(const unsigned char *object_sha1,
 /* Get the note object SHA1 containing the note data for the given object */
 const unsigned char *get_note(const unsigned char *object_sha1);
 
+/*
+ * Calls the specified function for each note until it returns nonzero,
+ * and returns the value
+ */
+typedef int each_note_fn(const unsigned char *object_sha1,
+		const unsigned char *note_sha1, const char *note_tree_path,
+		void *cb_data);
+int for_each_note(each_note_fn fn, void *cb_data);
+
 /* Free (and de-initialize) the internal notes tree structure */
 void free_notes(void);
 
-- 
1.6.4.304.g1365c.dirty

  parent reply	other threads:[~2009-10-09 10:32 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-09 10:21 [RFC/PATCHv7 00/22] git notes Johan Herland
2009-10-09 10:21 ` Johan Herland
2009-10-09 10:32   ` Johan Herland
2009-10-09 10:21 ` [RFC/PATCHv7 01/22] Introduce commit notes Johan Herland
2009-10-09 10:21 ` [RFC/PATCHv7 02/22] Add a script to edit/inspect notes Johan Herland
2009-10-09 10:21 ` [RFC/PATCHv7 03/22] Speed up git notes lookup Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 04/22] Add an expensive test for git-notes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 05/22] Teach "-m <msg>" and "-F <file>" to "git notes edit" Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 06/22] fast-import: Add support for importing commit notes Johan Herland
2011-01-31 18:33   ` [RFC] fast-import: notemodify (N) command Jonathan Nieder
2011-01-31 18:48     ` [Vcs-fast-import-devs] " Sverre Rabbelier
2011-01-31 19:01       ` Jonathan Nieder
2011-01-31 21:19         ` Sverre Rabbelier
2011-01-31 22:37           ` Sam Vilain
2011-02-01  0:13             ` Sverre Rabbelier
2009-10-09 10:22 ` [RFC/PATCHv7 07/22] t3302-notes-index-expensive: Speed up create_repo() Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 08/22] Add flags to get_commit_notes() to control the format of the note string Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 09/22] Add '%N'-format for pretty-printing commit notes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 10/22] Teach notes code to free its internal data structures on request Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 11/22] Teach the notes lookup code to parse notes trees with various fanout schemes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 12/22] Add selftests verifying that we can parse notes trees with various fanouts Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 13/22] Refactor notes code to concatenate multiple notes annotating the same object Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 14/22] Add selftests verifying concatenation of multiple notes for the same commit Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 15/22] Notes API: get_commit_notes() -> format_note() + remove the commit restriction Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 16/22] Notes API: init_notes(): Initialize the notes tree from the given notes ref Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 17/22] Notes API: add_note(): Add note objects to the internal notes tree structure Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 18/22] Notes API: get_note(): Return the note annotating the given object Johan Herland
2009-10-09 10:22 ` Johan Herland [this message]
2009-10-09 10:22 ` [RFC/PATCHv7 20/22] Notes API: Allow multiple concurrent notes trees with new struct notes_tree Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 21/22] Refactor notes concatenation into a flexible interface for combining notes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 22/22] fast-import: Proper notes tree manipulation using the notes API Johan Herland
2009-10-09 14:25   ` Shawn O. Pearce
2009-11-20  1:43     ` Johan Herland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1255083738-23263-21-git-send-email-johan@herland.net \
    --to=johan@herland.net \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=chriscool@tuxfamily.org \
    --cc=git@drmicha.warpmail.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=sam@vilain.net \
    --cc=spearce@spearce.org \
    --cc=tavestbo@trolltech.com \
    --cc=trast@student.ethz.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).