git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nicolas Pitre <nico@fluxnic.net>
To: git@vger.kernel.org
Subject: [PATCH 01/23] pack v4: initial pack dictionary structure and code
Date: Tue, 27 Aug 2013 00:25:45 -0400	[thread overview]
Message-ID: <1377577567-27655-2-git-send-email-nico@fluxnic.net> (raw)
In-Reply-To: <1377577567-27655-1-git-send-email-nico@fluxnic.net>

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 packv4-create.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 packv4-create.c

diff --git a/packv4-create.c b/packv4-create.c
new file mode 100644
index 0000000..2de6d41
--- /dev/null
+++ b/packv4-create.c
@@ -0,0 +1,137 @@
+/*
+ * packv4-create.c: management of dictionary tables used in pack v4
+ *
+ * (C) Nicolas Pitre <nico@fluxnic.net>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cache.h"
+
+struct data_entry {
+	unsigned offset;
+	unsigned hits;
+};
+
+struct dict_table {
+	char *data;
+	unsigned ptr;
+	unsigned size;
+	struct data_entry *entry;
+	unsigned nb_entries;
+	unsigned max_entries;
+	unsigned *hash;
+	unsigned hash_size;
+};
+
+struct dict_table *create_dict_table(void)
+{
+	return xcalloc(sizeof(struct dict_table), 1);
+}
+
+void destroy_dict_table(struct dict_table *t)
+{
+	free(t->data);
+	free(t->entry);
+	free(t->hash);
+	free(t);
+}
+
+static int locate_entry(struct dict_table *t, const char *str)
+{
+	int i = 0;
+	const unsigned char *s = (const unsigned char *) str;
+
+	while (*s)
+		i = i * 111 + *s++;
+	i = (unsigned)i % t->hash_size;
+
+	while (t->hash[i]) {
+		unsigned n = t->hash[i] - 1;
+		if (!strcmp(str, t->data + t->entry[n].offset))
+			return n;
+		if (++i >= t->hash_size)
+			i = 0;
+	}
+	return -1 - i;
+}
+
+static void rehash_entries(struct dict_table *t)
+{
+	unsigned n;
+
+	t->hash_size *= 2;
+	if (t->hash_size < 1024)
+		t->hash_size = 1024;
+	t->hash = xrealloc(t->hash, t->hash_size * sizeof(*t->hash));
+	memset(t->hash, 0, t->hash_size * sizeof(*t->hash));
+
+	for (n = 0; n < t->nb_entries; n++) {
+		int i = locate_entry(t, t->data + t->entry[n].offset);
+		if (i < 0)
+			t->hash[-1 - i] = n + 1;
+	}
+}
+
+int dict_add_entry(struct dict_table *t, const char *str)
+{
+	int i, len = strlen(str) + 1;
+
+	if (t->ptr + len >= t->size) {
+		t->size = (t->size + len + 1024) * 3 / 2;
+		t->data = xrealloc(t->data, t->size);
+	}
+	memcpy(t->data + t->ptr, str, len);
+
+	i = (t->nb_entries) ? locate_entry(t, t->data + t->ptr) : -1;
+	if (i >= 0) {
+		t->entry[i].hits++;
+		return i;
+	}
+
+	if (t->nb_entries >= t->max_entries) {
+		t->max_entries = (t->max_entries + 1024) * 3 / 2;
+		t->entry = xrealloc(t->entry, t->max_entries * sizeof(*t->entry));
+	}
+	t->entry[t->nb_entries].offset = t->ptr;
+	t->entry[t->nb_entries].hits = 1;
+	t->ptr += len + 1;
+	t->nb_entries++;
+
+	if (t->hash_size * 3 <= t->nb_entries * 4)
+		rehash_entries(t);
+	else
+		t->hash[-1 - i] = t->nb_entries;
+
+	return t->nb_entries - 1;
+}
+
+static int cmp_dict_entries(const void *a_, const void *b_)
+{
+	const struct data_entry *a = a_;
+	const struct data_entry *b = b_;
+	int diff = b->hits - a->hits;
+	if (!diff)
+		diff = a->offset - b->offset;
+	return diff;
+}
+
+static void sort_dict_entries_by_hits(struct dict_table *t)
+{
+	qsort(t->entry, t->nb_entries, sizeof(*t->entry), cmp_dict_entries);
+	t->hash_size = (t->nb_entries * 4 / 3) / 2;
+	rehash_entries(t);
+}
+
+void dict_dump(struct dict_table *t)
+{
+	int i;
+
+	sort_dict_entries_by_hits(t);
+	for (i = 0; i < t->nb_entries; i++)
+		printf("%d\t%s\n",
+			t->entry[i].hits,
+			t->data + t->entry[i].offset);
+}
-- 
1.8.4.22.g54757b7

  reply	other threads:[~2013-08-27  4:27 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-27  4:25 [PATCH 00/23] Preliminary pack v4 support Nicolas Pitre
2013-08-27  4:25 ` Nicolas Pitre [this message]
2013-08-27 15:08   ` [PATCH 01/23] pack v4: initial pack dictionary structure and code Junio C Hamano
2013-08-27 16:13     ` Nicolas Pitre
2013-08-27  4:25 ` [PATCH 02/23] export packed_object_info() Nicolas Pitre
2013-08-27  4:25 ` [PATCH 03/23] pack v4: scan tree objects Nicolas Pitre
2013-08-27  4:25 ` [PATCH 04/23] pack v4: add tree entry mode support to dictionary entries Nicolas Pitre
2013-08-27  4:25 ` [PATCH 05/23] pack v4: add commit object parsing Nicolas Pitre
2013-08-27 15:26   ` Junio C Hamano
2013-08-27 16:47     ` Nicolas Pitre
2013-08-27 17:42       ` Junio C Hamano
2013-08-27  4:25 ` [PATCH 06/23] pack v4: split the object list and dictionary creation Nicolas Pitre
2013-08-27  4:25 ` [PATCH 07/23] pack v4: move to struct pack_idx_entry and get rid of our own struct idx_entry Nicolas Pitre
2013-08-27  4:25 ` [PATCH 08/23] pack v4: basic references encoding Nicolas Pitre
2013-08-27 15:29   ` Junio C Hamano
2013-08-27 15:53     ` Nicolas Pitre
2013-08-27  4:25 ` [PATCH 09/23] pack v4: commit object encoding Nicolas Pitre
2013-08-27 15:39   ` Junio C Hamano
2013-08-27 16:50     ` Nicolas Pitre
2013-08-27 19:59     ` Nicolas Pitre
2013-08-27 20:15       ` Junio C Hamano
2013-08-27 21:43         ` Nicolas Pitre
2013-09-02 20:48   ` Duy Nguyen
2013-09-03  6:30     ` Nicolas Pitre
2013-09-03  7:41       ` Duy Nguyen
2013-09-05  3:50         ` Nicolas Pitre
2013-08-27  4:25 ` [PATCH 10/23] pack v4: tree " Nicolas Pitre
2013-08-27 15:44   ` Junio C Hamano
2013-08-27 16:52     ` Nicolas Pitre
2013-08-27  4:25 ` [PATCH 11/23] pack v4: dictionary table output Nicolas Pitre
2013-08-27  4:25 ` [PATCH 12/23] pack v4: creation code Nicolas Pitre
2013-08-27 15:48   ` Junio C Hamano
2013-08-27 16:59     ` Nicolas Pitre
2013-08-27  4:25 ` [PATCH 13/23] pack v4: object headers Nicolas Pitre
2013-08-27  4:25 ` [PATCH 14/23] pack v4: object data copy Nicolas Pitre
2013-08-27 15:53   ` Junio C Hamano
2013-08-27 18:24     ` Nicolas Pitre
2013-08-27  4:25 ` [PATCH 15/23] pack v4: object writing Nicolas Pitre
2013-08-27  4:26 ` [PATCH 16/23] pack v4: tree object delta encoding Nicolas Pitre
2013-08-27  4:26 ` [PATCH 17/23] pack v4: load delta candidate for encoding tree objects Nicolas Pitre
2013-08-27  4:26 ` [PATCH 18/23] pack v4: honor pack.compression config option Nicolas Pitre
2013-08-27  4:26 ` [PATCH 19/23] pack v4: relax commit parsing a bit Nicolas Pitre
2013-08-27  4:26 ` [PATCH 20/23] pack index v3 Nicolas Pitre
2013-08-27  4:26 ` [PATCH 21/23] pack v4: normalize pack name to properly generate the pack index file name Nicolas Pitre
2013-08-27  4:26 ` [PATCH 22/23] pack v4: add progress display Nicolas Pitre
2013-08-27  4:26 ` [PATCH 23/23] initial pack index v3 support on the read side Nicolas Pitre
2013-08-31 11:45   ` Duy Nguyen
2013-09-03  6:09     ` Nicolas Pitre
2013-09-03  7:34       ` Duy Nguyen
2013-08-27 11:17 ` [PATCH] Document pack v4 format Nguyễn Thái Ngọc Duy
2013-08-27 18:25   ` Junio C Hamano
2013-08-27 18:53   ` Nicolas Pitre
2013-08-31  2:49   ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2013-09-03  6:00     ` Nicolas Pitre
2013-09-03  6:46       ` Nicolas Pitre
2013-09-03 11:49         ` Duy Nguyen
2013-09-03 14:54           ` Duy Nguyen
2013-09-05  4:12             ` Nicolas Pitre
2013-09-05  4:19               ` Duy Nguyen
2013-09-05  4:40                 ` Nicolas Pitre
2013-09-05  5:04                   ` Duy Nguyen
2013-09-05  5:39                     ` Nicolas Pitre
2013-09-05 16:52                       ` Duy Nguyen
2013-09-05 17:14                         ` Nicolas Pitre
2013-09-05 20:26                           ` Junio C Hamano
2013-09-05 21:04                             ` Nicolas Pitre
2013-09-06  4:18                         ` Duy Nguyen
2013-09-06 13:19                           ` Nicolas Pitre
2013-09-06  2:14     ` [PATCH v3] " Nguyễn Thái Ngọc Duy
2013-09-06  3:23       ` Nicolas Pitre
2013-09-06  9:48         ` Duy Nguyen
2013-09-06 13:25           ` Nicolas Pitre
2013-09-06 13:44             ` Duy Nguyen
2013-09-06 16:44               ` Nicolas Pitre
2013-09-07  4:57                 ` Nicolas Pitre
2013-09-07  4:52       ` Nicolas Pitre
2013-09-07  8:05         ` Duy Nguyen
2013-08-27 15:03 ` [PATCH 00/23] Preliminary pack v4 support Junio C Hamano
2013-08-27 15:59   ` Nicolas Pitre
2013-08-27 16:44     ` Junio C Hamano
2013-08-28  2:30       ` Duy Nguyen
2013-08-28  2:58         ` Nicolas Pitre
2013-08-28  3:06           ` Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1377577567-27655-2-git-send-email-nico@fluxnic.net \
    --to=nico@fluxnic.net \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).