All of lore.kernel.org
 help / color / mirror / Atom feed
From: Derrick Stolee <stolee@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net, git@jeffhostetler.com,
	sbeller@google.com, dstolee@microsoft.com
Subject: [PATCH 05/14] packed-graph: implement construct_graph()
Date: Thu, 25 Jan 2018 09:02:22 -0500	[thread overview]
Message-ID: <20180125140231.65604-6-dstolee@microsoft.com> (raw)
In-Reply-To: <20180125140231.65604-1-dstolee@microsoft.com>

Teach Git to write a packed graph file by checking all packed objects
to see if they are commits, then store the file in the given pack
directory.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Makefile       |   1 +
 packed-graph.c | 375 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 packed-graph.h |  20 +++
 3 files changed, 396 insertions(+)
 create mode 100644 packed-graph.c
 create mode 100644 packed-graph.h

diff --git a/Makefile b/Makefile
index d8b0d0457a..59439e13a1 100644
--- a/Makefile
+++ b/Makefile
@@ -841,6 +841,7 @@ LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
+LIB_OBJS += packed-graph.o
 LIB_OBJS += packfile.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-bitmap-write.o
diff --git a/packed-graph.c b/packed-graph.c
new file mode 100644
index 0000000000..9be9811667
--- /dev/null
+++ b/packed-graph.c
@@ -0,0 +1,375 @@
+#include "cache.h"
+#include "config.h"
+#include "git-compat-util.h"
+#include "pack.h"
+#include "packfile.h"
+#include "commit.h"
+#include "object.h"
+#include "packed-graph.h"
+
+#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
+#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
+#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
+#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
+#define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
+
+#define GRAPH_DATA_WIDTH 36
+
+#define GRAPH_VERSION_1 0x1
+#define GRAPH_VERSION GRAPH_VERSION_1
+
+#define GRAPH_OID_VERSION_SHA1 1
+#define GRAPH_OID_LEN_SHA1 20
+#define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
+#define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
+
+#define GRAPH_LARGE_EDGES_NEEDED 0x80000000
+#define GRAPH_PARENT_MISSING 0x7fffffff
+#define GRAPH_EDGE_LAST_MASK 0x7fffffff
+#define GRAPH_PARENT_NONE 0x70000000
+
+#define GRAPH_LAST_EDGE 0x80000000
+
+char* get_graph_filename_oid(const char *pack_dir,
+				  struct object_id *oid)
+{
+	size_t len;
+	struct strbuf head_path = STRBUF_INIT;
+	strbuf_addstr(&head_path, pack_dir);
+	strbuf_addstr(&head_path, "/graph-");
+	strbuf_addstr(&head_path, oid_to_hex(oid));
+	strbuf_addstr(&head_path, ".graph");
+
+	return strbuf_detach(&head_path, &len);
+}
+
+static void write_graph_chunk_fanout(
+	struct sha1file *f,
+	struct commit **commits, int nr_commits)
+{
+	uint32_t i, count = 0;
+	struct commit **list = commits;
+	struct commit **last = commits + nr_commits;
+
+	/*
+	* Write the first-level table (the list is sorted,
+	* but we use a 256-entry lookup to be able to avoid
+	* having to do eight extra binary search iterations).
+	*/
+	for (i = 0; i < 256; i++) {
+		uint32_t swap_count;
+
+		while (list < last) {
+			if ((*list)->object.oid.hash[0] != i)
+				break;
+			count++;
+			list++;
+		}
+
+		swap_count = htonl(count);
+		sha1write(f, &swap_count, 4);
+	}
+}
+
+static void write_graph_chunk_oids(
+	struct sha1file *f, int hash_len,
+	struct commit **commits, int nr_commits)
+{
+	struct commit **list = commits;
+	uint32_t i;
+	for (i = 0; i < nr_commits; i++) {
+		sha1write(f, (*list)->object.oid.hash, (int)hash_len);
+		list++;
+	}
+}
+
+static int commit_pos(struct commit **commits, int nr_commits, const struct object_id *oid, uint32_t *pos)
+{
+	uint32_t first = 0, last = nr_commits;
+
+	while (first < last) {
+		uint32_t mid = first + (last - first) / 2;
+		struct object_id *current;
+		int cmp;
+
+		current = &(commits[mid]->object.oid);
+		cmp = oidcmp(oid, current);
+		if (!cmp) {
+			*pos = mid;
+			return 1;
+		}
+		if (cmp > 0) {
+			first = mid + 1;
+			continue;
+		}
+		last = mid;
+	}
+
+	*pos = first;
+	return 0;
+}
+
+static void write_graph_chunk_data(
+	struct sha1file *f, int hash_len,
+	struct commit **commits, int nr_commits)
+{
+	struct commit **list = commits;
+	struct commit **last = commits + nr_commits;
+	uint32_t num_large_edges = 0;
+
+	while (list < last) {
+		struct commit_list *parent;
+		uint32_t intId, swapIntId;
+		uint32_t packedDate[2];
+
+		parse_commit(*list);
+		sha1write(f, (*list)->tree->object.oid.hash, hash_len);
+
+		parent = (*list)->parents;
+
+		if (!parent)
+			swapIntId = htonl(GRAPH_PARENT_NONE);
+		else if (commit_pos(commits, nr_commits, &(parent->item->object.oid), &intId))
+			swapIntId = htonl(intId);
+		else
+			swapIntId = htonl(GRAPH_PARENT_MISSING);
+
+		sha1write(f, &swapIntId, 4);
+
+		if (parent)
+			parent = parent->next;
+
+		if (!parent)
+			swapIntId = htonl(GRAPH_PARENT_NONE);
+		else if (parent->next)
+			swapIntId = htonl(GRAPH_LARGE_EDGES_NEEDED | num_large_edges);
+		else if (commit_pos(commits, nr_commits,  &(parent->item->object.oid), &intId))
+			swapIntId = htonl(intId);
+		else
+			swapIntId = htonl(GRAPH_PARENT_MISSING);
+
+		sha1write(f, &swapIntId, 4);
+
+		if (parent && parent->next) {
+			do {
+				num_large_edges++;
+				parent = parent->next;
+			} while (parent);
+		}
+
+		packedDate[0] = htonl((*list)->date >> 32);
+		packedDate[1] = htonl((*list)->date);
+		sha1write(f, packedDate, 8);
+
+		list++;
+	}
+}
+
+static void write_graph_chunk_large_edges(
+	struct sha1file *f,
+	struct commit **commits, int nr_commits)
+{
+	struct commit **list = commits;
+	struct commit **last = commits + nr_commits;
+	struct commit_list *parent;
+
+	while (list < last) {
+		int num_parents = 0;
+		for (parent = (*list)->parents; num_parents < 3 && parent; parent = parent->next)
+			num_parents++;
+
+		if (num_parents <= 2) {
+			list++;
+			continue;
+		}
+
+		for (parent = (*list)->parents; parent; parent = parent->next)
+		{
+			if (parent != (*list)->parents)
+			{
+				uint32_t intId, swapIntId;
+				uint32_t lastEdge = 0;
+
+				if (!parent->next)
+					lastEdge |= GRAPH_LAST_EDGE;
+
+				if (commit_pos(commits, nr_commits,  &(parent->item->object.oid), &intId))
+					swapIntId = htonl(intId | lastEdge);
+				else
+					swapIntId = htonl(GRAPH_PARENT_MISSING | lastEdge);
+
+				sha1write(f, &swapIntId, 4);
+			}
+		}
+
+		list++;
+	}
+}
+
+static int commit_compare(const void *_a, const void *_b)
+{
+	struct object_id *a = *(struct object_id **)_a;
+	struct object_id *b = *(struct object_id **)_b;
+	return oidcmp(a, b);
+}
+
+struct packed_commit_list {
+	struct commit **list;
+	int num;
+	int size;
+};
+
+struct packed_oid_list {
+	struct object_id **list;
+	int num;
+	int size;
+};
+
+static int if_packed_commit_add_to_list(const struct object_id *oid,
+					struct packed_git *pack,
+					uint32_t pos,
+					void *data)
+{
+	struct packed_oid_list *list = (struct packed_oid_list*)data;
+	enum object_type type;
+	unsigned long size;
+	void *inner_data;
+	off_t offset = nth_packed_object_offset(pack, pos);
+	inner_data = unpack_entry(pack, offset, &type, &size);
+
+	if (inner_data)
+		free(inner_data);
+
+	if (type != OBJ_COMMIT)
+		return 0;
+
+	ALLOC_GROW(list->list, list->num + 1, list->size);
+	list->list[list->num] = (struct object_id *)malloc(sizeof(struct object_id));
+	oidcpy(list->list[list->num], oid);
+	(list->num)++;
+
+	return 0;
+}
+
+struct object_id *construct_graph(const char *pack_dir)
+{
+	// Find a list of oids, adding the pointer to a list.
+	struct packed_oid_list oids;
+	struct packed_commit_list commits;
+	struct packed_graph_header hdr;
+	struct sha1file *f;
+	int i, count_distinct = 0;
+	struct strbuf tmp_file = STRBUF_INIT;
+	unsigned char final_hash[GIT_MAX_RAWSZ];
+	char *graph_name;
+	int fd;
+	uint32_t chunk_ids[5];
+	uint64_t chunk_offsets[5];
+	int num_long_edges;
+	struct object_id *f_oid;
+	char *fname;
+	struct commit_list *parent;
+
+	if (!core_graph)
+		return 0;
+
+	oids.num = 0;
+	oids.size = 1024;
+	ALLOC_ARRAY(oids.list, oids.size);
+	for_each_packed_object(if_packed_commit_add_to_list, &oids, 0);
+	QSORT(oids.list, oids.num, commit_compare);
+
+	count_distinct = 1;
+	for (i = 1; i < oids.num; i++)
+	{
+		if (oidcmp(oids.list[i-1], oids.list[i]))
+			count_distinct++;
+	}
+
+	commits.num = 0;
+	commits.size = count_distinct;
+	ALLOC_ARRAY(commits.list, commits.size);
+
+	num_long_edges = 0;
+	for (i = 0; i < oids.num; i++)
+	{
+		int num_parents = 0;
+		if (i > 0 && !oidcmp(oids.list[i-1], oids.list[i]))
+			continue;
+
+		commits.list[commits.num] = lookup_commit(oids.list[i]);
+		parse_commit(commits.list[commits.num]);
+
+		for (parent = commits.list[commits.num]->parents; parent; parent = parent->next) {
+			num_parents++;
+		}
+
+		if (num_parents > 2)
+			num_long_edges += num_parents - 1;
+
+		commits.num++;
+	}
+
+	// Create header (including chunk lengths?!?)
+	strbuf_addstr(&tmp_file, pack_dir);
+	strbuf_addstr(&tmp_file, "/tmp_graph_XXXXXX");
+
+	fd = git_mkstemp_mode(tmp_file.buf, 0444);
+	if (fd < 0)
+		die_errno("unable to create '%s'", tmp_file.buf);
+
+	graph_name = strbuf_detach(&tmp_file, NULL);
+	f = sha1fd(fd, graph_name);
+
+	/* fill header info */
+	hdr.graph_signature = htonl(GRAPH_SIGNATURE);
+	hdr.graph_version = GRAPH_VERSION;
+	hdr.hash_version = GRAPH_OID_VERSION;
+	hdr.hash_len = GRAPH_OID_LEN;
+	hdr.num_chunks = 4;
+
+	/* write header to file */
+	assert(sizeof(hdr) == 8);
+	sha1write(f, &hdr, sizeof(hdr));
+
+	chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
+	chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
+	chunk_ids[2] = GRAPH_CHUNKID_DATA;
+	chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES;
+	chunk_ids[4] = 0;
+
+	chunk_offsets[0] = sizeof(hdr) + 12 * 5; // Skip header and chunk list
+	chunk_offsets[1] = chunk_offsets[0] + 256 * 4; // fanout table size
+	chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.num; // lookup size
+	chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.num; // data size
+	chunk_offsets[4] = chunk_offsets[3] + 4 * num_long_edges;
+
+	for (i = 0; i <= hdr.num_chunks; i++) {
+		uint32_t chunk_write[3];
+
+		chunk_write[0] = htonl(chunk_ids[i]);
+		chunk_write[1] = htonl(chunk_offsets[i] >> 32);
+		chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
+		sha1write(f, chunk_write, 12);
+	}
+
+	write_graph_chunk_fanout(f, commits.list, commits.num);
+	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.num);
+	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.num);
+	write_graph_chunk_large_edges(f, commits.list, commits.num);
+
+	sha1close(f, final_hash, CSUM_CLOSE | CSUM_FSYNC);
+
+	f_oid = (struct object_id *)malloc(sizeof(struct object_id));
+	memcpy(f_oid->hash, final_hash, GIT_MAX_RAWSZ);
+	fname = get_graph_filename_oid(pack_dir, f_oid);
+
+	if (rename(graph_name, fname))
+		die("failed to rename %s to %s", graph_name, fname);
+
+	free(oids.list);
+	oids.size = 0;
+	oids.num = 0;
+
+	return f_oid;
+}
diff --git a/packed-graph.h b/packed-graph.h
new file mode 100644
index 0000000000..d4e10fb612
--- /dev/null
+++ b/packed-graph.h
@@ -0,0 +1,20 @@
+#ifndef PACKED_GRAPH_H
+#define PACKED_GRAPH_H
+
+#include "git-compat-util.h"
+#include "commit.h"
+
+extern char* get_graph_filename_oid(const char *pack_dir,
+				    struct object_id *oid);
+
+struct packed_graph_header {
+	uint32_t graph_signature;
+	unsigned char graph_version;
+	unsigned char hash_version;
+	unsigned char hash_len;
+	unsigned char num_chunks;
+};
+
+extern struct object_id *construct_graph(const char *pack_dir);
+
+#endif
-- 
2.16.0


  parent reply	other threads:[~2018-01-25 14:03 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-25 14:02 [PATCH 00/14] Serialized Commit Graph Derrick Stolee
2018-01-25 14:02 ` [PATCH 01/14] graph: add packed graph design document Derrick Stolee
2018-01-25 20:04   ` Stefan Beller
2018-01-26 12:49     ` Derrick Stolee
2018-01-26 18:17       ` Stefan Beller
2018-01-25 21:14   ` Junio C Hamano
2018-01-26 13:06     ` Derrick Stolee
2018-01-26 14:13   ` Duy Nguyen
2018-01-25 14:02 ` [PATCH 02/14] packed-graph: add core.graph setting Derrick Stolee
2018-01-25 20:17   ` Stefan Beller
2018-01-25 20:40     ` Derrick Stolee
2018-01-25 21:43   ` Junio C Hamano
2018-01-26 13:08     ` Derrick Stolee
2018-01-25 14:02 ` [PATCH 03/14] packed-graph: create git-graph builtin Derrick Stolee
2018-01-25 21:45   ` Stefan Beller
2018-01-26 13:13     ` Derrick Stolee
2018-01-25 23:01   ` Junio C Hamano
2018-01-26 13:14     ` Derrick Stolee
2018-01-26 14:16       ` Duy Nguyen
2018-01-25 14:02 ` [PATCH 04/14] packed-graph: add format document Derrick Stolee
2018-01-25 22:06   ` Junio C Hamano
2018-01-25 22:18     ` Stefan Beller
2018-01-25 22:29       ` Junio C Hamano
2018-01-26 13:22         ` Derrick Stolee
2018-01-25 22:07   ` Stefan Beller
2018-01-26 13:25     ` Derrick Stolee
2018-01-25 14:02 ` Derrick Stolee [this message]
2018-01-25 23:21   ` [PATCH 05/14] packed-graph: implement construct_graph() Stefan Beller
2018-01-26 20:47     ` Junio C Hamano
2018-01-26 20:55   ` Junio C Hamano
2018-01-26 21:14     ` Andreas Schwab
2018-01-26 22:04       ` Junio C Hamano
2018-01-25 14:02 ` [PATCH 06/14] packed-graph: implement git-graph --write Derrick Stolee
2018-01-25 23:28   ` Stefan Beller
2018-01-26 13:28     ` Derrick Stolee
2018-01-25 14:02 ` [PATCH 07/14] packed-graph: implement git-graph --read Derrick Stolee
2018-01-25 14:02 ` [PATCH 08/14] graph: implement git-graph --update-head Derrick Stolee
2018-01-25 14:02 ` [PATCH 09/14] packed-graph: implement git-graph --clear Derrick Stolee
2018-01-25 23:35   ` Stefan Beller
2018-01-25 14:02 ` [PATCH 10/14] packed-graph: teach git-graph --delete-expired Derrick Stolee
2018-01-25 14:02 ` [PATCH 11/14] commit: integrate packed graph with commit parsing Derrick Stolee
2018-01-26 19:38   ` Stefan Beller
2018-01-25 14:02 ` [PATCH 12/14] packed-graph: read only from specific pack-indexes Derrick Stolee
2018-01-25 14:02 ` [PATCH 13/14] packed-graph: close under reachability Derrick Stolee
2018-01-25 14:02 ` [PATCH 14/14] packed-graph: teach git-graph to read commits Derrick Stolee
2018-01-25 15:46 ` [PATCH 00/14] Serialized Commit Graph Ævar Arnfjörð Bjarmason
2018-01-25 16:09   ` Derrick Stolee
2018-01-25 23:06     ` Ævar Arnfjörð Bjarmason
2018-01-26 12:15       ` Derrick Stolee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180125140231.65604-6-dstolee@microsoft.com \
    --to=stolee@gmail.com \
    --cc=dstolee@microsoft.com \
    --cc=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.