From: Nicolas Pitre <nico@fluxnic.net>
To: git@vger.kernel.org
Subject: [PATCH 03/23] pack v4: scan tree objects
Date: Tue, 27 Aug 2013 00:25:47 -0400 [thread overview]
Message-ID: <1377577567-27655-4-git-send-email-nico@fluxnic.net> (raw)
In-Reply-To: <1377577567-27655-1-git-send-email-nico@fluxnic.net>
From: Nicolas Pitre <nico@lenovo.(none)>
Let's read a pack to feed our dictionary with all the path strings
contained in all the tree objects.
Dump the resulting dictionary sorted by frequency to stdout.
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
Makefile | 1 +
packv4-create.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 138 insertions(+)
diff --git a/Makefile b/Makefile
index 3588ca1..4716113 100644
--- a/Makefile
+++ b/Makefile
@@ -550,6 +550,7 @@ PROGRAM_OBJS += shell.o
PROGRAM_OBJS += show-index.o
PROGRAM_OBJS += upload-pack.o
PROGRAM_OBJS += remote-testsvn.o
+PROGRAM_OBJS += packv4-create.o
# Binary suffix, set to .exe for Windows builds
X =
diff --git a/packv4-create.c b/packv4-create.c
index 2de6d41..00762a5 100644
--- a/packv4-create.c
+++ b/packv4-create.c
@@ -9,6 +9,8 @@
*/
#include "cache.h"
+#include "object.h"
+#include "tree-walk.h"
struct data_entry {
unsigned offset;
@@ -125,6 +127,22 @@ static void sort_dict_entries_by_hits(struct dict_table *t)
rehash_entries(t);
}
+static struct dict_table *tree_path_table;
+
+static int add_tree_dict_entries(void *buf, unsigned long size)
+{
+ struct tree_desc desc;
+ struct name_entry name_entry;
+
+ if (!tree_path_table)
+ tree_path_table = create_dict_table();
+
+ init_tree_desc(&desc, buf, size);
+ while (tree_entry(&desc, &name_entry))
+ dict_add_entry(tree_path_table, name_entry.path);
+ return 0;
+}
+
void dict_dump(struct dict_table *t)
{
int i;
@@ -135,3 +153,122 @@ void dict_dump(struct dict_table *t)
t->entry[i].hits,
t->data + t->entry[i].offset);
}
+
+struct idx_entry
+{
+ off_t offset;
+ const unsigned char *sha1;
+};
+
+static int sort_by_offset(const void *e1, const void *e2)
+{
+ const struct idx_entry *entry1 = e1;
+ const struct idx_entry *entry2 = e2;
+ if (entry1->offset < entry2->offset)
+ return -1;
+ if (entry1->offset > entry2->offset)
+ return 1;
+ return 0;
+}
+static int create_pack_dictionaries(struct packed_git *p)
+{
+ uint32_t nr_objects, i;
+ struct idx_entry *objects;
+
+ nr_objects = p->num_objects;
+ objects = xmalloc((nr_objects + 1) * sizeof(*objects));
+ objects[nr_objects].offset = p->index_size - 40;
+ for (i = 0; i < nr_objects; i++) {
+ objects[i].sha1 = nth_packed_object_sha1(p, i);
+ objects[i].offset = nth_packed_object_offset(p, i);
+ }
+ qsort(objects, nr_objects, sizeof(*objects), sort_by_offset);
+
+ for (i = 0; i < nr_objects; i++) {
+ void *data;
+ enum object_type type;
+ unsigned long size;
+ struct object_info oi = {};
+
+ oi.typep = &type;
+ oi.sizep = &size;
+ if (packed_object_info(p, objects[i].offset, &oi) < 0)
+ die("cannot get type of %s from %s",
+ sha1_to_hex(objects[i].sha1), p->pack_name);
+
+ switch (type) {
+ case OBJ_TREE:
+ break;
+ default:
+ continue;
+ }
+ data = unpack_entry(p, objects[i].offset, &type, &size);
+ if (!data)
+ die("cannot unpack %s from %s",
+ sha1_to_hex(objects[i].sha1), p->pack_name);
+ if (check_sha1_signature(objects[i].sha1, data, size, typename(type)))
+ die("packed %s from %s is corrupt",
+ sha1_to_hex(objects[i].sha1), p->pack_name);
+ if (add_tree_dict_entries(data, size) < 0)
+ die("can't process %s object %s",
+ typename(type), sha1_to_hex(objects[i].sha1));
+ free(data);
+ }
+ free(objects);
+
+ return 0;
+}
+
+static int process_one_pack(const char *path)
+{
+ char arg[PATH_MAX];
+ int len;
+ struct packed_git *p;
+
+ len = strlcpy(arg, path, PATH_MAX);
+ if (len >= PATH_MAX)
+ return error("name too long: %s", path);
+
+ /*
+ * In addition to "foo.idx" we accept "foo.pack" and "foo";
+ * normalize these forms to "foo.idx" for add_packed_git().
+ */
+ if (has_extension(arg, ".pack")) {
+ strcpy(arg + len - 5, ".idx");
+ len--;
+ } else if (!has_extension(arg, ".idx")) {
+ if (len + 4 >= PATH_MAX)
+ return error("name too long: %s.idx", arg);
+ strcpy(arg + len, ".idx");
+ len += 4;
+ }
+
+ /*
+ * add_packed_git() uses our buffer (containing "foo.idx") to
+ * build the pack filename ("foo.pack"). Make sure it fits.
+ */
+ if (len + 1 >= PATH_MAX) {
+ arg[len - 4] = '\0';
+ return error("name too long: %s.pack", arg);
+ }
+
+ p = add_packed_git(arg, len, 1);
+ if (!p)
+ return error("packfile %s not found.", arg);
+
+ install_packed_git(p);
+ if (open_pack_index(p))
+ return error("packfile %s index not opened", p->pack_name);
+ return create_pack_dictionaries(p);
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <packfile>\n", argv[0]);
+ exit(1);
+ }
+ process_one_pack(argv[1]);
+ dict_dump(tree_path_table);
+ return 0;
+}
--
1.8.4.22.g54757b7
next prev parent reply other threads:[~2013-08-27 4:27 UTC|newest]
Thread overview: 83+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-27 4:25 [PATCH 00/23] Preliminary pack v4 support Nicolas Pitre
2013-08-27 4:25 ` [PATCH 01/23] pack v4: initial pack dictionary structure and code Nicolas Pitre
2013-08-27 15:08 ` Junio C Hamano
2013-08-27 16:13 ` Nicolas Pitre
2013-08-27 4:25 ` [PATCH 02/23] export packed_object_info() Nicolas Pitre
2013-08-27 4:25 ` Nicolas Pitre [this message]
2013-08-27 4:25 ` [PATCH 04/23] pack v4: add tree entry mode support to dictionary entries Nicolas Pitre
2013-08-27 4:25 ` [PATCH 05/23] pack v4: add commit object parsing Nicolas Pitre
2013-08-27 15:26 ` Junio C Hamano
2013-08-27 16:47 ` Nicolas Pitre
2013-08-27 17:42 ` Junio C Hamano
2013-08-27 4:25 ` [PATCH 06/23] pack v4: split the object list and dictionary creation Nicolas Pitre
2013-08-27 4:25 ` [PATCH 07/23] pack v4: move to struct pack_idx_entry and get rid of our own struct idx_entry Nicolas Pitre
2013-08-27 4:25 ` [PATCH 08/23] pack v4: basic references encoding Nicolas Pitre
2013-08-27 15:29 ` Junio C Hamano
2013-08-27 15:53 ` Nicolas Pitre
2013-08-27 4:25 ` [PATCH 09/23] pack v4: commit object encoding Nicolas Pitre
2013-08-27 15:39 ` Junio C Hamano
2013-08-27 16:50 ` Nicolas Pitre
2013-08-27 19:59 ` Nicolas Pitre
2013-08-27 20:15 ` Junio C Hamano
2013-08-27 21:43 ` Nicolas Pitre
2013-09-02 20:48 ` Duy Nguyen
2013-09-03 6:30 ` Nicolas Pitre
2013-09-03 7:41 ` Duy Nguyen
2013-09-05 3:50 ` Nicolas Pitre
2013-08-27 4:25 ` [PATCH 10/23] pack v4: tree " Nicolas Pitre
2013-08-27 15:44 ` Junio C Hamano
2013-08-27 16:52 ` Nicolas Pitre
2013-08-27 4:25 ` [PATCH 11/23] pack v4: dictionary table output Nicolas Pitre
2013-08-27 4:25 ` [PATCH 12/23] pack v4: creation code Nicolas Pitre
2013-08-27 15:48 ` Junio C Hamano
2013-08-27 16:59 ` Nicolas Pitre
2013-08-27 4:25 ` [PATCH 13/23] pack v4: object headers Nicolas Pitre
2013-08-27 4:25 ` [PATCH 14/23] pack v4: object data copy Nicolas Pitre
2013-08-27 15:53 ` Junio C Hamano
2013-08-27 18:24 ` Nicolas Pitre
2013-08-27 4:25 ` [PATCH 15/23] pack v4: object writing Nicolas Pitre
2013-08-27 4:26 ` [PATCH 16/23] pack v4: tree object delta encoding Nicolas Pitre
2013-08-27 4:26 ` [PATCH 17/23] pack v4: load delta candidate for encoding tree objects Nicolas Pitre
2013-08-27 4:26 ` [PATCH 18/23] pack v4: honor pack.compression config option Nicolas Pitre
2013-08-27 4:26 ` [PATCH 19/23] pack v4: relax commit parsing a bit Nicolas Pitre
2013-08-27 4:26 ` [PATCH 20/23] pack index v3 Nicolas Pitre
2013-08-27 4:26 ` [PATCH 21/23] pack v4: normalize pack name to properly generate the pack index file name Nicolas Pitre
2013-08-27 4:26 ` [PATCH 22/23] pack v4: add progress display Nicolas Pitre
2013-08-27 4:26 ` [PATCH 23/23] initial pack index v3 support on the read side Nicolas Pitre
2013-08-31 11:45 ` Duy Nguyen
2013-09-03 6:09 ` Nicolas Pitre
2013-09-03 7:34 ` Duy Nguyen
2013-08-27 11:17 ` [PATCH] Document pack v4 format Nguyễn Thái Ngọc Duy
2013-08-27 18:25 ` Junio C Hamano
2013-08-27 18:53 ` Nicolas Pitre
2013-08-31 2:49 ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2013-09-03 6:00 ` Nicolas Pitre
2013-09-03 6:46 ` Nicolas Pitre
2013-09-03 11:49 ` Duy Nguyen
2013-09-03 14:54 ` Duy Nguyen
2013-09-05 4:12 ` Nicolas Pitre
2013-09-05 4:19 ` Duy Nguyen
2013-09-05 4:40 ` Nicolas Pitre
2013-09-05 5:04 ` Duy Nguyen
2013-09-05 5:39 ` Nicolas Pitre
2013-09-05 16:52 ` Duy Nguyen
2013-09-05 17:14 ` Nicolas Pitre
2013-09-05 20:26 ` Junio C Hamano
2013-09-05 21:04 ` Nicolas Pitre
2013-09-06 4:18 ` Duy Nguyen
2013-09-06 13:19 ` Nicolas Pitre
2013-09-06 2:14 ` [PATCH v3] " Nguyễn Thái Ngọc Duy
2013-09-06 3:23 ` Nicolas Pitre
2013-09-06 9:48 ` Duy Nguyen
2013-09-06 13:25 ` Nicolas Pitre
2013-09-06 13:44 ` Duy Nguyen
2013-09-06 16:44 ` Nicolas Pitre
2013-09-07 4:57 ` Nicolas Pitre
2013-09-07 4:52 ` Nicolas Pitre
2013-09-07 8:05 ` Duy Nguyen
2013-08-27 15:03 ` [PATCH 00/23] Preliminary pack v4 support Junio C Hamano
2013-08-27 15:59 ` Nicolas Pitre
2013-08-27 16:44 ` Junio C Hamano
2013-08-28 2:30 ` Duy Nguyen
2013-08-28 2:58 ` Nicolas Pitre
2013-08-28 3:06 ` Duy Nguyen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1377577567-27655-4-git-send-email-nico@fluxnic.net \
--to=nico@fluxnic.net \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).