From: David Barr <davidbarr@google.com>
To: GIT Mailing-list <git@vger.kernel.org>
Cc: David Barr <davidbarr@google.com>
Subject: [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
Date: Wed, 22 Jun 2011 00:33:30 -0700 [thread overview]
Message-ID: <1308728011-14136-2-git-send-email-davidbarr@google.com> (raw)
In-Reply-To: <1308728011-14136-1-git-send-email-davidbarr@google.com>
One struct to capture all types, just 4 methods:
decode_message, encode_message, sizeof_message, hash_field.
Signed-off-by: David Barr <davidbarr@google.com>
---
This is the first in a series of small patches to introduce some higher-level
constructs into the git toolbag.
The motivation is to empower a libfastimport implementation that is frugal
with memory, fast and scalable.
This version lacks any driver or tests.
Soon to come are:
* an efficient allocator that provides a mapping from an integer handle to
pointer and length
* a hash table that is time and memory effective for very numerous entries
protobuf.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
protobuf.h | 70 ++++++++++++++++++++++
varint.h | 59 ++++++++++++++++++
3 files changed, 322 insertions(+), 0 deletions(-)
create mode 100644 protobuf.c
create mode 100644 protobuf.h
create mode 100644 varint.h
diff --git a/protobuf.c b/protobuf.c
new file mode 100644
index 0000000..09223dd
--- /dev/null
+++ b/protobuf.c
@@ -0,0 +1,193 @@
+#include "git-compat-util.h"
+#include "protobuf.h"
+#include "varint.h"
+
+static int decode_binary(const char **buf, const char *end, const char **result, size_t len)
+{
+ if (end && *buf + len > end)
+ return 1;
+ *result = *buf;
+ *buf += len;
+ return 0;
+}
+
+static int encode_binary(char **buf, const char *end, const char *value, size_t len)
+{
+ if (end && *buf + len > end)
+ return 1;
+ memcpy(*buf, value, len);
+ *buf += len;
+ return 0;
+}
+
+static int decode_field(const char **buf, const char *end, struct protobuf_field *field)
+{
+ uint32_t u32;
+ uint64_t key;
+ bzero(field, sizeof(struct protobuf_field));
+ if (decode_varint(buf, end, &key))
+ return 1;
+ field->tag = key >> WT_BITS,
+ field->type = key & WT_MASK;
+ switch (field->type) {
+ case WT_VARINT:
+ if (decode_varint(buf, end, &field->val.num))
+ return 1;
+ return 0;
+ case WT_64BIT:
+ field->val.bin.len = sizeof(uint64_t);
+ break;
+ case WT_STRING:
+ if (decode_varint(buf, end, &field->val.bin.len))
+ return 1;
+ break;
+ case WT_SHA1:
+ field->val.bin.len = 20;
+ break;
+ case WT_32BIT:
+ field->val.bin.len = sizeof(uint32_t);
+ break;
+ default:
+ return 1;
+ }
+ if (decode_binary(buf, end, &field->val.bin.ptr, field->val.bin.len))
+ return 1;
+ switch (field->type) {
+ case WT_64BIT:
+ memcpy(&field->val.num, field->val.bin.ptr, field->val.bin.len);
+ break;
+ case WT_32BIT:
+ memcpy(&u32, field->val.bin.ptr, field->val.bin.len);
+ field->val.num = u32;
+ }
+ return 0;
+}
+
+int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields)
+{
+ struct protobuf_field current;
+ bzero(message, fields * sizeof(struct protobuf_field));
+ while (*buf != end) {
+ if (decode_field(buf, end, ¤t))
+ return 1;
+ if (current.tag < fields)
+ message[current.tag] = current;
+ }
+ return 0;
+}
+
+static size_t sizeof_field(const struct protobuf_field *field)
+{
+ size_t sizeof_key = sizeof_varint(field->tag << WT_BITS | field->type);
+ switch (field->type) {
+ case WT_VARINT:
+ if (field->val.num)
+ return sizeof_key + sizeof_varint(field->val.num);
+ break;
+ case WT_64BIT:
+ if (field->val.num)
+ return sizeof_key + sizeof(uint64_t);
+ break;
+ case WT_STRING:
+ if (field->val.bin.len && field->val.bin.ptr)
+ return sizeof_key + sizeof_varint(field->val.bin.len)
+ + field->val.bin.len;
+ break;
+ case WT_SHA1:
+ if (field->val.bin.ptr)
+ return sizeof_key + 20;
+ break;
+ case WT_32BIT:
+ if (field->val.num)
+ return sizeof_key + sizeof(uint32_t);
+ break;
+ }
+ return 0;
+}
+
+uint64_t sizeof_message(const struct protobuf_field *message, size_t fields)
+{
+ uint64_t size = 0;
+ while (fields--)
+ size += sizeof_field(message++);
+ return size;
+}
+
+static int encode_field(char **buf, const struct protobuf_field *field, char *end)
+{
+ uint32_t u32;
+ uint64_t len;
+ const char *ptr;
+ uint64_t key = (field->tag << WT_BITS) | field->type;
+ if (!sizeof_field(field))
+ return 0;
+ if (encode_varint(buf, end, key))
+ return 1;
+ switch (field->type) {
+ case WT_VARINT:
+ if (encode_varint(buf, end, field->val.num))
+ return 1;
+ return 0;
+ case WT_64BIT:
+ ptr = (const char *)&field->val.num;
+ len = sizeof(uint64_t);
+ break;
+ case WT_STRING:
+ if (encode_varint(buf, end, field->val.bin.len))
+ return 1;
+ ptr = field->val.bin.ptr;
+ len = field->val.bin.len;
+ break;
+ case WT_SHA1:
+ ptr = field->val.bin.ptr;
+ len = 20;
+ break;
+ case WT_32BIT:
+ u32 = field->val.num;
+ ptr = (const char *)&u32;
+ len = sizeof(uint64_t);
+ break;
+ default:
+ return 1;
+ }
+ if (encode_binary(buf, end, ptr, len))
+ return 1;
+ return 0;
+}
+
+int encode_message(char **buf, char *end, const struct protobuf_field *message, size_t fields)
+{
+ while (*buf != end && fields--)
+ if (encode_field(buf, message++, end))
+ return 1;
+ return 0;
+}
+
+static uint32_t x65599(const char *s, uint64_t len)
+{
+ uint32_t r = 0;
+ while (len--)
+ r = *s++ + (r << 6) + (r << 16) - r;
+ return r;
+}
+
+uint32_t hash_field(const struct protobuf_field *field)
+{
+ uint32_t hc = 0;
+ switch (field->type) {
+ case WT_VARINT:
+ case WT_64BIT:
+ hc = (0x9e3779b97f4a7c15ull * field->val.num) >> 32;
+ break;
+ case WT_SHA1:
+ memcpy(&hc, field->val.bin.ptr, sizeof(hc));
+ break;
+ case WT_STRING:
+ hc = x65599(field->val.bin.ptr, field->val.bin.len);
+ break;
+ case WT_32BIT:
+ hc = 0x9e3779b9ul * (uint32_t)field->val.num;
+ break;
+ }
+ return hc;
+}
diff --git a/protobuf.h b/protobuf.h
new file mode 100644
index 0000000..89b70a4
--- /dev/null
+++ b/protobuf.h
@@ -0,0 +1,70 @@
+#ifndef PROTOBUF_H_
+#define PROTOBUF_H_
+
+#define WT_BITS 3
+#define WT_MASK 0x7
+
+enum wire_type {
+ WT_VARINT = 0,
+ WT_64BIT = 1,
+ WT_STRING = 2,
+ /* Custom wire type */
+ WT_SHA1 = 3,
+ WT_32BIT = 5
+};
+
+struct protobuf_field
+{
+ uint64_t tag : 64 - WT_BITS,
+ type : WT_BITS;
+ union protobuf_value {
+ uint64_t num;
+ struct protobuf_binary {
+ uint64_t len;
+ const char *ptr;
+ } bin;
+ } val;
+};
+
+#define PROTOBUF_KEY(p, f, wt) do { \
+ (p)->f.tag = &((p)->f) - (struct protobuf_field*)(p); \
+ (p)->f.type = wt; \
+} while(0)
+
+#define WT_VARINT(p, f, v) do { \
+ PROTOBUF_KEY(p, f, WT_VARINT); \
+ (p)->f.val.num = v; \
+} while(0)
+
+#define WT_64BIT(p, f, v) do { \
+ PROTOBUF_KEY(p, f, WT_64BIT); \
+ (p)->f.val.num = v; \
+} while(0)
+
+#define WT_STRING(p, f, s, l) do { \
+ PROTOBUF_KEY(p, f, WT_STRING); \
+ (p)->f.val.bin.ptr = s; \
+ (p)->f.val.bin.len = l; \
+} while(0)
+
+#define WT_SHA1(p, f, v) do { \
+ PROTOBUF_KEY(p, f, WT_SHA1); \
+ (p)->f.val.bin.ptr = s; \
+ (p)->f.val.bin.len = 20; \
+} while(0)
+
+#define WT_32BIT(p, f, v) do { \
+ PROTOBUF_KEY(p, f, WT_32BIT); \
+ (p)->f.val.num = v; \
+} while(0)
+
+#define PROTOBUF_CAST(p) \
+ (struct protobuf_field*)(p), \
+ sizeof(*(p)) / sizeof(struct protobuf_field)
+
+int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields);
+uint64_t sizeof_message(const struct protobuf_field *message, size_t fields);
+int encode_message(char **buf, char *end, const struct protobuf_field *message, size_t fields);
+uint32_t hash_field(const struct protobuf_field *field);
+
+#endif
diff --git a/varint.h b/varint.h
new file mode 100644
index 0000000..48a3547
--- /dev/null
+++ b/varint.h
@@ -0,0 +1,59 @@
+#ifndef VARINT_H_
+#define VARINT_H_
+
+#define VLI_CONTINUE 0x80
+#define VLI_DIGIT_MASK 0x7f
+#define VLI_BITS_PER_DIGIT 7
+
+static int decode_varint(const char **buf, const char *end, uint64_t *result)
+{
+ uint64_t rv = 0;
+ int shift = 0;
+ const char *pos;
+ for (pos = *buf; pos != end && shift < 64; pos++) {
+ unsigned char ch = *pos;
+
+ rv |= (ch & VLI_DIGIT_MASK) << shift;
+ shift += VLI_BITS_PER_DIGIT;
+ if (ch & VLI_CONTINUE)
+ continue;
+
+ *result = rv;
+ *buf = pos + 1;
+ return 0;
+ }
+ return 1;
+}
+
+static int encode_varint(char **buf, const char *end, uint64_t value)
+{
+ char *pos;
+ for (pos = *buf; pos != end; pos++) {
+ unsigned char ch = value & VLI_DIGIT_MASK;
+
+ value >>= VLI_BITS_PER_DIGIT;
+ if (value)
+ ch |= VLI_CONTINUE;
+ *pos = ch;
+ if (ch & VLI_CONTINUE)
+ continue;
+
+ *buf = pos + 1;
+ return 0;
+ }
+ return 1;
+}
+
+static size_t sizeof_varint(uint64_t value) {
+ size_t size = 0;
+ int shift = VLI_BITS_PER_DIGIT;
+ while (shift < 64) {
+ size++;
+ if (value < (1ull << shift))
+ break;
+ shift += VLI_BITS_PER_DIGIT;
+ }
+ return size;
+}
+
+#endif
--
1.7.5.1
next prev parent reply other threads:[~2011-06-22 7:34 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-22 7:33 [RFC/PATCH 0/3] David Barr
2011-06-22 7:33 ` David Barr [this message]
2011-06-22 19:42 ` [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures Junio C Hamano
2011-06-24 14:39 ` David Barr
2011-06-24 16:04 ` David Barr
2011-06-24 16:48 ` Junio C Hamano
2011-06-23 17:22 ` Junio C Hamano
2011-06-22 7:33 ` [RFC/PATCH 2/3] small-alloc: add allocator for small objects David Barr
2011-06-22 20:49 ` Junio C Hamano
2011-06-24 14:38 ` David Barr
2011-06-24 17:02 ` David Barr
2011-06-23 17:17 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1308728011-14136-2-git-send-email-davidbarr@google.com \
--to=davidbarr@google.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).