git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: git@vger.kernel.org
Cc: Ramkumar Ramachandra <artagnon@gmail.com>,
	David Michael Barr <david.barr@cordelta.com>,
	Sverre Rabbelier <srabbelier@gmail.com>,
	Daniel Shahaf <daniel@shahaf.name>
Subject: [PATCH 4/9] Add treap implementation
Date: Thu, 24 Jun 2010 05:57:06 -0500	[thread overview]
Message-ID: <20100624105706.GD12376@burratino> (raw)
In-Reply-To: <20100624105004.GA12336@burratino>

From: Jason Evans <jasone@canonware.com>

Provide macros to generate a type-specific treap implementation and
various functions to operate on it. It uses obj_pool.h to store memory
nodes in a treap.  Previously committed nodes are never removed from
the pool; after any *_commit operation, it is assumed (correctly, in
the case of svn-fast-export) that someone else must care about them.

Treaps provide a memory-efficient binary search tree structure.
Insertion/deletion/search are about as about as fast in the average
case as red-black trees and the chances of worst-case behavior are
vanishingly small, thanks to (pseudo-)randomness.  The bad worst-case
behavior is a small price to pay, given that treaps are much simpler
to implement.

From http://www.canonware.com/download/trp/trp_hash/trp.h

[db: Altered to reference nodes by offset from a common base pointer]
[db: Bob Jenkins' hashing implementation dropped for Knuth's]
[db: Methods unnecessary for search and insert dropped]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 Makefile        |    2 +-
 vcs-svn/LICENSE |    3 +
 vcs-svn/trp.h   |  220 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/trp.txt |   90 ++++++++++++++++++++++
 4 files changed, 314 insertions(+), 1 deletions(-)
 create mode 100644 vcs-svn/trp.h
 create mode 100644 vcs-svn/trp.txt

diff --git a/Makefile b/Makefile
index fc31ee0..663a366 100644
--- a/Makefile
+++ b/Makefile
@@ -1864,7 +1864,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS): \
-	vcs-svn/obj_pool.h
+	vcs-svn/obj_pool.h vcs-svn/trp.h
 endif
 
 exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE
index 6e52372..a3d384c 100644
--- a/vcs-svn/LICENSE
+++ b/vcs-svn/LICENSE
@@ -1,6 +1,9 @@
 Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
 All rights reserved.
 
+Copyright (C) 2008 Jason Evans <jasone@canonware.com>.
+All rights reserved.
+
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h
new file mode 100644
index 0000000..dd7d5ee
--- /dev/null
+++ b/vcs-svn/trp.h
@@ -0,0 +1,220 @@
+/*
+ * C macro implementation of treaps.
+ *
+ * Usage:
+ *   #include <stdint.h>
+ *   #include "trp.h"
+ *   trp_gen(...)
+ *
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#ifndef TRP_H_
+#define TRP_H_
+
+#define MAYBE_UNUSED __attribute__((__unused__))
+
+/* Node structure. */
+struct trp_node {
+	uint32_t trpn_left;
+	uint32_t trpn_right;
+};
+
+/* Root structure. */
+struct trp_root {
+	uint32_t trp_root;
+};
+
+/* Pointer/Offset conversion. */
+#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
+#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
+#define trpn_modify(a_base, a_offset) \
+	do { \
+		if ((a_offset) < a_base##_pool.committed) { \
+			uint32_t old_offset = (a_offset);\
+			(a_offset) = a_base##_alloc(1); \
+			*trpn_pointer(a_base, a_offset) = \
+				*trpn_pointer(a_base, old_offset); \
+		} \
+	} while (0);
+
+/* Left accessors. */
+#define trp_left_get(a_base, a_field, a_node) \
+	(trpn_pointer(a_base, a_node)->a_field.trpn_left)
+#define trp_left_set(a_base, a_field, a_node, a_left) \
+	do { \
+		trpn_modify(a_base, a_node); \
+		trp_left_get(a_base, a_field, a_node) = (a_left); \
+	} while(0)
+
+/* Right accessors. */
+#define trp_right_get(a_base, a_field, a_node) \
+	(trpn_pointer(a_base, a_node)->a_field.trpn_right)
+#define trp_right_set(a_base, a_field, a_node, a_right) \
+	do { \
+		trpn_modify(a_base, a_node); \
+		trp_right_get(a_base, a_field, a_node) = (a_right); \
+	} while(0)
+
+/*
+ * Fibonacci hash function.
+ * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2).
+ * See Knuth §6.4: volume 3, 3rd ed, p518.
+ */
+#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node))
+
+/* Priority accessors. */
+#define trp_prio_get(a_node) trpn_hash(a_node)
+
+/* Node initializer. */
+#define trp_node_new(a_base, a_field, a_node) \
+	do { \
+		trp_left_set(a_base, a_field, (a_node), ~0); \
+		trp_right_set(a_base, a_field, (a_node), ~0); \
+	} while(0)
+
+/* Internal utility macros. */
+#define trpn_first(a_base, a_field, a_root, r_node) \
+	do { \
+		(r_node) = (a_root); \
+		if ((r_node) == ~0) \
+			return NULL; \
+		while (~trp_left_get(a_base, a_field, (r_node))) \
+			(r_node) = trp_left_get(a_base, a_field, (r_node)); \
+	} while (0)
+
+#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
+	do { \
+		(r_node) = trp_right_get(a_base, a_field, (a_node)); \
+		trp_right_set(a_base, a_field, (a_node), \
+			trp_left_get(a_base, a_field, (r_node))); \
+		trp_left_set(a_base, a_field, (r_node), (a_node)); \
+	} while(0)
+
+#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
+	do { \
+		(r_node) = trp_left_get(a_base, a_field, (a_node)); \
+		trp_left_set(a_base, a_field, (a_node), \
+			trp_right_get(a_base, a_field, (r_node))); \
+		trp_right_set(a_base, a_field, (r_node), (a_node)); \
+	} while(0)
+
+#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
+a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \
+{ \
+	uint32_t ret; \
+	trpn_first(a_base, a_field, treap->trp_root, ret); \
+	return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \
+{ \
+	uint32_t ret; \
+	uint32_t offset = trpn_offset(a_base, node); \
+	if (~trp_right_get(a_base, a_field, offset)) { \
+		trpn_first(a_base, a_field, \
+			trp_right_get(a_base, a_field, offset), ret); \
+	} else { \
+		uint32_t tnode = treap->trp_root; \
+		ret = ~0; \
+		while (1) { \
+			int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
+				trpn_pointer(a_base, tnode)); \
+			if (cmp < 0) { \
+				ret = tnode; \
+				tnode = trp_left_get(a_base, a_field, tnode); \
+			} else if (cmp > 0) { \
+				tnode = trp_right_get(a_base, a_field, tnode); \
+			} else { \
+				break; \
+			} \
+		} \
+	} \
+	return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \
+{ \
+	int cmp; \
+	uint32_t ret = treap->trp_root; \
+	while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \
+		if (cmp < 0) \
+			ret = trp_left_get(a_base, a_field, ret); \
+		else \
+			ret = trp_right_get(a_base, a_field, ret); \
+	} \
+	return trpn_pointer(a_base, ret); \
+} \
+a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
+{ \
+	if (cur_node == ~0) { \
+		return (ins_node); \
+	} else { \
+		uint32_t ret; \
+		int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
+					trpn_pointer(a_base, cur_node)); \
+		if (cmp < 0) { \
+			uint32_t left = a_pre##insert_recurse( \
+				trp_left_get(a_base, a_field, cur_node), ins_node); \
+			trp_left_set(a_base, a_field, cur_node, left); \
+			if (trp_prio_get(left) < trp_prio_get(cur_node)) \
+				trpn_rotate_right(a_base, a_field, cur_node, ret); \
+			else \
+				ret = cur_node; \
+		} else { \
+			uint32_t right = a_pre##insert_recurse( \
+				trp_right_get(a_base, a_field, cur_node), ins_node); \
+			trp_right_set(a_base, a_field, cur_node, right); \
+			if (trp_prio_get(right) < trp_prio_get(cur_node)) \
+				trpn_rotate_left(a_base, a_field, cur_node, ret); \
+			else \
+				ret = cur_node; \
+		} \
+		return (ret); \
+	} \
+} \
+a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \
+{ \
+	uint32_t offset = trpn_offset(a_base, node); \
+	trp_node_new(a_base, a_field, offset); \
+	treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \
+} \
+a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
+{ \
+	int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
+			trpn_pointer(a_base, cur_node)); \
+	if (cmp == 0) { \
+		uint32_t ret; \
+		uint32_t left = trp_left_get(a_base, a_field, cur_node); \
+		uint32_t right = trp_right_get(a_base, a_field, cur_node); \
+		if (left == ~0) { \
+			if (right == ~0) \
+				return (~0); \
+		} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
+			trpn_rotate_right(a_base, a_field, cur_node, ret); \
+			right = a_pre##remove_recurse(cur_node, rem_node); \
+			trp_right_set(a_base, a_field, ret, right); \
+			return (ret); \
+		} \
+		trpn_rotate_left(a_base, a_field, cur_node, ret); \
+		left = a_pre##remove_recurse(cur_node, rem_node); \
+		trp_left_set(a_base, a_field, ret, left); \
+		return (ret); \
+	} else if (cmp < 0) { \
+		uint32_t left = a_pre##remove_recurse( \
+			trp_left_get(a_base, a_field, cur_node), rem_node); \
+		trp_left_set(a_base, a_field, cur_node, left); \
+		return (cur_node); \
+	} else { \
+		uint32_t right = a_pre##remove_recurse( \
+			trp_right_get(a_base, a_field, cur_node), rem_node); \
+		trp_right_set(a_base, a_field, cur_node, right); \
+		return (cur_node); \
+	} \
+} \
+a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \
+{ \
+	treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
+		trpn_offset(a_base, node)); \
+} \
+
+#endif
diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt
new file mode 100644
index 0000000..f387aaa
--- /dev/null
+++ b/vcs-svn/trp.txt
@@ -0,0 +1,90 @@
+treap API
+=========
+
+The trp API generates a data structure and functions to handle a
+large growing set of objects stored in a pool.
+
+The caller:
+
+. Specifies parameters for the generated functions with the
+  trp_gen(static, foo_, ...) macro.
+
+. Allocates and clears a `struct trp_node` variable.
+
+. Adds new items to the set using `foo_insert`.
+
+. Can find a specific item in the set using `foo_search`.
+
+. Can iterate over items in the set using `foo_first` and `foo_next`.
+
+. Can remove an item from the set using `foo_remove`.
+
+. The set is never freed.
+
+Example:
+
+----
+struct ex_node {
+	const char *s;
+	struct trp_node ex_link;
+};
+static struct trp_root ex_base;
+obj_pool_gen(ex, struct ex_node, 4096);
+trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp)
+struct ex_node *item;
+
+item = ex_pointer(ex_alloc(1));
+item->s = "hello";
+ex_insert(&ex_base, item);
+item = ex_pointer(ex_alloc(1));
+item->s = "goodbye";
+ex_insert(&ex_base, item);
+for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item))
+	printf("%s\n", item->s);
+----
+
+Functions
+---------
+
+trp_gen(attr, foo_, node_type, link_field, pool, cmp)::
+
+	Generate a type-specific treap implementation.
++
+. The storage class for generated functions will be 'attr' (e.g., `static`).
+. Generated function names are prefixed with 'foo_' (e.g., `treap_`).
+. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`).
+  This type must be a struct with at least one `struct trp_node` field
+  to point to its children.
+. The field used to access child nodes will be 'link_field'.
+. All treap nodes must lie in the 'pool' object pool.
+. Treap nodes must be totally ordered by the 'cmp' relation, with the
+  following prototype:
++
+int (*cmp)(node_type \*a, node_type \*b)
++
+and returning a value less than, equal to, or greater than zero
+according to the result of comparison.
+
+void foo_insert(struct trp_root *treap, node_type \*node)::
+
+	Insert node into treap.  If inserted multiple times,
+	a node will appear in the treap multiple times.
+
+void foo_remove(struct trp_root *treap, node_type \*node)::
+
+	Remove node from treap.  Caller must ensure node is
+	present in treap before using this function.
+
+node_type *foo_search(struct trp_root \*treap, node_type \*key)::
+
+	Search for a node that matches key.  If no match is found,
+	return what would be key's successor, were key in treap
+	(NULL if no successor).
+
+node_type *foo_first(struct trp_root \*treap)::
+
+	Find the first item from the treap, in sorted order.
+
+node_type *foo_next(struct trp_root \*treap, node_type \*node)::
+
+	Find the next item.
-- 
1.7.1

  parent reply	other threads:[~2010-06-24 10:57 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-24 10:50 [PATCH/RFC v2 0/9] Subversion dump parsing library Jonathan Nieder
2010-06-24 10:51 ` [PATCH 1/9] Export parse_date_basic() to convert a date string to timestamp Jonathan Nieder
2010-06-24 18:32   ` Ramkumar Ramachandra
2010-06-24 10:52 ` [PATCH 2/9] Introduce vcs-svn lib Jonathan Nieder
2010-06-24 20:27   ` Ramkumar Ramachandra
2010-06-24 10:53 ` [PATCH 3/9] Add memory pool library Jonathan Nieder
2010-06-24 18:43   ` Ramkumar Ramachandra
2010-06-24 18:55     ` Jonathan Nieder
2010-06-24 19:37       ` Ramkumar Ramachandra
2010-06-24 20:06         ` Jonathan Nieder
2010-06-24 20:20           ` Ramkumar Ramachandra
2010-06-24 10:57 ` Jonathan Nieder [this message]
2010-06-24 19:08   ` [PATCH 4/9] Add treap implementation Ramkumar Ramachandra
2010-06-24 19:22     ` Jonathan Nieder
2010-06-24 10:58 ` [PATCH 5/9] Add string-specific memory pool Jonathan Nieder
2010-06-24 19:19   ` Ramkumar Ramachandra
2010-06-24 11:01 ` [PATCH 6/9] Add stream helper library Jonathan Nieder
2010-06-24 21:23   ` Ramkumar Ramachandra
2010-06-24 21:29     ` Jonathan Nieder
2010-06-24 11:02 ` [PATCH 7/9] Add infrastructure to write revisions in fast-export format Jonathan Nieder
2010-06-24 19:29   ` Ramkumar Ramachandra
2010-06-24 19:36     ` Jonathan Nieder
2010-06-24 19:49     ` Jonathan Nieder
2010-06-24 21:14       ` Ramkumar Ramachandra
2010-06-24 11:03 ` [PATCH 8/9] Add SVN dump parser Jonathan Nieder
2010-06-24 20:33   ` Ramkumar Ramachandra
2010-06-24 11:07 ` [PATCH 9/9] Add a sample user for the svndump library Jonathan Nieder
2010-06-24 20:17   ` Ramkumar Ramachandra
2010-06-24 20:30     ` Jonathan Nieder
2010-06-24 20:42       ` Ramkumar Ramachandra
2010-06-24 20:52         ` Jonathan Nieder
2010-06-30  2:09   ` Sam Vilain
2010-06-24 13:06 ` [PATCH/RFC v2 0/9] Subversion dump parsing library Ramkumar Ramachandra
2010-06-24 18:24   ` Jonathan Nieder
2010-06-24 21:26   ` Jonathan Nieder
  -- strict thread matches above, loose matches on Subject: below --
2010-07-15 16:22 [PATCH 0/8] Resurrect rr/svn-export Ramkumar Ramachandra
2010-07-16 10:13 ` Jonathan Nieder
2010-07-16 10:23   ` [PATCH 4/9] Add treap implementation Jonathan Nieder
2010-07-16 18:26     ` Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100624105706.GD12376@burratino \
    --to=jrnieder@gmail.com \
    --cc=artagnon@gmail.com \
    --cc=daniel@shahaf.name \
    --cc=david.barr@cordelta.com \
    --cc=git@vger.kernel.org \
    --cc=srabbelier@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).