git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johan Herland <johan@herland.net>
To: gitster@pobox.com
Cc: git@vger.kernel.org, johan@herland.net
Subject: [PATCHv12 08/23] Notes API: for_each_note(): Traverse the entire notes tree with a callback
Date: Wed, 27 Jan 2010 12:51:45 +0100	[thread overview]
Message-ID: <1264593120-4428-9-git-send-email-johan@herland.net> (raw)
In-Reply-To: <1264593120-4428-1-git-send-email-johan@herland.net>

This includes a first attempt at creating an optimal fanout scheme (which
is calculated on-the-fly, while traversing).

Signed-off-by: Johan Herland <johan@herland.net>
---
 notes.c |  133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 notes.h |   47 ++++++++++++++++++++++
 2 files changed, 180 insertions(+), 0 deletions(-)

diff --git a/notes.c b/notes.c
index a0a85b4..eabd6f3 100644
--- a/notes.c
+++ b/notes.c
@@ -413,6 +413,133 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node,
 	free(buf);
 }
 
+/*
+ * Determine optimal on-disk fanout for this part of the notes tree
+ *
+ * Given a (sub)tree and the level in the internal tree structure, determine
+ * whether or not the given existing fanout should be expanded for this
+ * (sub)tree.
+ *
+ * Values of the 'fanout' variable:
+ * - 0: No fanout (all notes are stored directly in the root notes tree)
+ * - 1: 2/38 fanout
+ * - 2: 2/2/36 fanout
+ * - 3: 2/2/2/34 fanout
+ * etc.
+ */
+static unsigned char determine_fanout(struct int_node *tree, unsigned char n,
+		unsigned char fanout)
+{
+	/*
+	 * The following is a simple heuristic that works well in practice:
+	 * For each even-numbered 16-tree level (remember that each on-disk
+	 * fanout level corresponds to _two_ 16-tree levels), peek at all 16
+	 * entries at that tree level. If all of them are either int_nodes or
+	 * subtree entries, then there are likely plenty of notes below this
+	 * level, so we return an incremented fanout.
+	 */
+	unsigned int i;
+	if ((n % 2) || (n > 2 * fanout))
+		return fanout;
+	for (i = 0; i < 16; i++) {
+		switch (GET_PTR_TYPE(tree->a[i])) {
+		case PTR_TYPE_SUBTREE:
+		case PTR_TYPE_INTERNAL:
+			continue;
+		default:
+			return fanout;
+		}
+	}
+	return fanout + 1;
+}
+
+static void construct_path_with_fanout(const unsigned char *sha1,
+		unsigned char fanout, char *path)
+{
+	unsigned int i = 0, j = 0;
+	const char *hex_sha1 = sha1_to_hex(sha1);
+	assert(fanout < 20);
+	while (fanout) {
+		path[i++] = hex_sha1[j++];
+		path[i++] = hex_sha1[j++];
+		path[i++] = '/';
+		fanout--;
+	}
+	strcpy(path + i, hex_sha1 + j);
+}
+
+static int for_each_note_helper(struct int_node *tree, unsigned char n,
+		unsigned char fanout, int flags, each_note_fn fn,
+		void *cb_data)
+{
+	unsigned int i;
+	void *p;
+	int ret = 0;
+	struct leaf_node *l;
+	static char path[40 + 19 + 1];  /* hex SHA1 + 19 * '/' + NUL */
+
+	fanout = determine_fanout(tree, n, fanout);
+	for (i = 0; i < 16; i++) {
+redo:
+		p = tree->a[i];
+		switch (GET_PTR_TYPE(p)) {
+		case PTR_TYPE_INTERNAL:
+			/* recurse into int_node */
+			ret = for_each_note_helper(CLR_PTR_TYPE(p), n + 1,
+				fanout, flags, fn, cb_data);
+			break;
+		case PTR_TYPE_SUBTREE:
+			l = (struct leaf_node *) CLR_PTR_TYPE(p);
+			/*
+			 * Subtree entries in the note tree represent parts of
+			 * the note tree that have not yet been explored. There
+			 * is a direct relationship between subtree entries at
+			 * level 'n' in the tree, and the 'fanout' variable:
+			 * Subtree entries at level 'n <= 2 * fanout' should be
+			 * preserved, since they correspond exactly to a fanout
+			 * directory in the on-disk structure. However, subtree
+			 * entries at level 'n > 2 * fanout' should NOT be
+			 * preserved, but rather consolidated into the above
+			 * notes tree level. We achieve this by unconditionally
+			 * unpacking subtree entries that exist below the
+			 * threshold level at 'n = 2 * fanout'.
+			 */
+			if (n <= 2 * fanout &&
+			    flags & FOR_EACH_NOTE_YIELD_SUBTREES) {
+				/* invoke callback with subtree */
+				unsigned int path_len =
+					l->key_sha1[19] * 2 + fanout;
+				assert(path_len < 40 + 19);
+				construct_path_with_fanout(l->key_sha1, fanout,
+							   path);
+				/* Create trailing slash, if needed */
+				if (path[path_len - 1] != '/')
+					path[path_len++] = '/';
+				path[path_len] = '\0';
+				ret = fn(l->key_sha1, l->val_sha1, path,
+					 cb_data);
+			}
+			if (n > fanout * 2 ||
+			    !(flags & FOR_EACH_NOTE_DONT_UNPACK_SUBTREES)) {
+				/* unpack subtree and resume traversal */
+				tree->a[i] = NULL;
+				load_subtree(l, tree, n);
+				free(l);
+				goto redo;
+			}
+			break;
+		case PTR_TYPE_NOTE:
+			l = (struct leaf_node *) CLR_PTR_TYPE(p);
+			construct_path_with_fanout(l->key_sha1, fanout, path);
+			ret = fn(l->key_sha1, l->val_sha1, path, cb_data);
+			break;
+		}
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 void init_notes(const char *notes_ref, int flags)
 {
 	unsigned char sha1[20], object_sha1[20];
@@ -471,6 +598,12 @@ const unsigned char *get_note(const unsigned char *object_sha1)
 	return found ? found->val_sha1 : NULL;
 }
 
+int for_each_note(int flags, each_note_fn fn, void *cb_data)
+{
+	assert(initialized);
+	return for_each_note_helper(&root_node, 0, 0, flags, fn, cb_data);
+}
+
 void free_notes(void)
 {
 	note_tree_free(&root_node);
diff --git a/notes.h b/notes.h
index c0714f4..c319fd8 100644
--- a/notes.h
+++ b/notes.h
@@ -31,6 +31,53 @@ void remove_note(const unsigned char *object_sha1);
 /* Get the note object SHA1 containing the note data for the given object */
 const unsigned char *get_note(const unsigned char *object_sha1);
 
+/*
+ * Flags controlling behaviour of for_each_note()
+ *
+ * Default behaviour of for_each_note() is to traverse every single note object
+ * in the notes tree, unpacking subtree entries along the way.
+ * The following flags can be used to alter the default behaviour:
+ *
+ * - DONT_UNPACK_SUBTREES causes for_each_note() NOT to unpack and recurse into
+ *   subtree entries while traversing the notes tree. This causes notes within
+ *   those subtrees NOT to be passed to the callback. Use this flag if you
+ *   don't want to traverse _all_ notes, but only want to traverse the parts
+ *   of the notes tree that have already been unpacked (this includes at least
+ *   all notes that have been added/changed).
+ *
+ * - YIELD_SUBTREES causes any subtree entries that are encountered to be
+ *   passed to the callback, before recursing into them. Subtree entries are
+ *   not note objects, but represent intermediate directories in the notes
+ *   tree. When passed to the callback, subtree entries will have a trailing
+ *   slash in their path, which the callback may use to differentiate between
+ *   note entries and subtree entries. Note that already-unpacked subtree
+ *   entries are not part of the notes tree, and will therefore not be yielded.
+ *   If this flag is used together with DONT_UNPACK_SUBTREES, for_each_note()
+ *   will yield the subtree entry, but not recurse into it.
+ */
+#define FOR_EACH_NOTE_DONT_UNPACK_SUBTREES 1
+#define FOR_EACH_NOTE_YIELD_SUBTREES 2
+
+/*
+ * Invoke the specified callback function for each note
+ *
+ * If the callback returns nonzero, the note walk is aborted, and the return
+ * value from the callback is returned from for_each_note(). Hence, a zero
+ * return value from for_each_note() indicates that all notes were walked
+ * successfully.
+ *
+ * IMPORTANT: The callback function is NOT allowed to change the notes tree.
+ * In other words, the following functions can NOT be invoked (on the current
+ * notes tree) from within the callback:
+ * - add_note()
+ * - remove_note()
+ * - free_notes()
+ */
+typedef int each_note_fn(const unsigned char *object_sha1,
+		const unsigned char *note_sha1, char *note_path,
+		void *cb_data);
+int for_each_note(int flags, each_note_fn fn, void *cb_data);
+
 /* Free (and de-initialize) the internal notes tree structure */
 void free_notes(void);
 
-- 
1.6.6.405.g80ed6

  parent reply	other threads:[~2010-01-27 11:53 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-27 11:51 [PATCHv12 00/23] git notes Johan Herland
2010-01-27 11:51 ` [PATCHv12 01/23] Minor non-functional fixes to notes.c Johan Herland
2010-01-27 21:20   ` Junio C Hamano
2010-01-27 22:33     ` Johan Herland
2010-01-27 11:51 ` [PATCHv12 02/23] Notes API: get_commit_notes() -> format_note() + remove the commit restriction Johan Herland
2010-01-27 11:51 ` [PATCHv12 03/23] Add tests for checking correct handling of $GIT_NOTES_REF and core.notesRef Johan Herland
2010-01-27 11:51 ` [PATCHv12 04/23] Notes API: init_notes(): Initialize the notes tree from the given notes ref Johan Herland
2010-01-27 11:51 ` [PATCHv12 05/23] Notes API: add_note(): Add note objects to the internal notes tree structure Johan Herland
2010-01-27 11:51 ` [PATCHv12 06/23] Notes API: remove_note(): Remove note objects from the " Johan Herland
2010-01-27 11:51 ` [PATCHv12 07/23] Notes API: get_note(): Return the note annotating the given object Johan Herland
2010-01-27 11:51 ` Johan Herland [this message]
2010-01-27 11:51 ` [PATCHv12 09/23] Notes API: write_notes_tree(): Store the notes tree in the database Johan Herland
2010-01-27 11:51 ` [PATCHv12 10/23] Notes API: Allow multiple concurrent notes trees with new struct notes_tree Johan Herland
2010-01-27 11:51 ` [PATCHv12 11/23] Refactor notes concatenation into a flexible interface for combining notes Johan Herland
2010-01-27 11:51 ` [PATCHv12 12/23] Builtin-ify git-notes Johan Herland
2010-01-27 11:51 ` [PATCHv12 13/23] t3301: Verify successful annotation of non-commits Johan Herland
2010-01-27 11:51 ` [PATCHv12 14/23] t3305: Verify that adding many notes with git-notes triggers increased fanout Johan Herland
2010-01-27 11:51 ` [PATCHv12 15/23] Teach notes code to properly preserve non-notes in the notes tree Johan Herland
2010-01-27 11:51 ` [PATCHv12 16/23] Teach builtin-notes to remove empty notes Johan Herland
2010-01-27 11:51 ` [PATCHv12 17/23] builtin-notes: Add "remove" subcommand for removing existing notes Johan Herland
2010-01-27 11:51 ` [PATCHv12 18/23] t3305: Verify that removing notes triggers automatic fanout consolidation Johan Herland
2010-01-27 11:51 ` [PATCHv12 19/23] Notes API: prune_notes(): Prune notes that belong to non-existing objects Johan Herland
2010-01-27 11:51 ` [PATCHv12 20/23] builtin-notes: Add "prune" subcommand for removing notes for missing objects Johan Herland
2010-01-27 11:51 ` [PATCHv12 21/23] Documentation: Generalize git-notes docs to 'objects' instead of 'commits' Johan Herland
2010-01-27 11:51 ` [PATCHv12 22/23] builtin-notes: Add "list" subcommand for listing note objects Johan Herland
2010-01-27 11:52 ` [PATCHv12 23/23] builtin-notes: Add "add" subcommand for appending to " Johan Herland
2010-01-27 20:00 ` [PATCHv12 00/23] git notes Junio C Hamano
2010-01-27 20:18   ` Sverre Rabbelier
2010-01-27 23:05   ` Johan Herland
2010-01-28  0:02     ` Junio C Hamano
2010-01-28  1:17       ` Johan Herland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1264593120-4428-9-git-send-email-johan@herland.net \
    --to=johan@herland.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).