From: "SZEDER Gábor" <szeder.dev@gmail.com>
To: Derrick Stolee <stolee@gmail.com>
Cc: "SZEDER Gábor" <szeder.dev@gmail.com>,
git@vger.kernel.org, gitster@pobox.com, peff@peff.net,
git@jeffhostetler.com, sbeller@google.com, dstolee@microsoft.com
Subject: Re: [PATCH v2 04/14] commit-graph: implement construct_commit_graph()
Date: Fri, 2 Feb 2018 00:46:43 +0100 [thread overview]
Message-ID: <20180201234643.7331-1-szeder.dev@gmail.com> (raw)
In-Reply-To: <1517348383-112294-5-git-send-email-dstolee@microsoft.com>
> Teach Git to write a commit graph file by checking all packed objects
> to see if they are commits, then store the file in the given pack
> directory.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
> Makefile | 1 +
> commit-graph.c | 376 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> commit-graph.h | 20 +++
> 3 files changed, 397 insertions(+)
> create mode 100644 commit-graph.c
> create mode 100644 commit-graph.h
> diff --git a/commit-graph.c b/commit-graph.c
> new file mode 100644
> index 0000000000..db2b7390c7
> --- /dev/null
> +++ b/commit-graph.c
> +struct packed_commit_list {
> + struct commit **list;
> + int num;
> + int size;
> +};
> +
> +struct packed_oid_list {
> + struct object_id **list;
> + int num;
> + int size;
> +};
When we manage the memory allocation of an array with the ALLOC_GROW
macro, then we tend to call the helper fields as 'alloc' and 'nr'.
> +static int if_packed_commit_add_to_list(const struct object_id *oid,
> + struct packed_git *pack,
> + uint32_t pos,
> + void *data)
> +{
> + struct packed_oid_list *list = (struct packed_oid_list*)data;
> + enum object_type type;
> + unsigned long size;
> + void *inner_data;
> + off_t offset = nth_packed_object_offset(pack, pos);
> + inner_data = unpack_entry(pack, offset, &type, &size);
> +
> + if (inner_data)
> + free(inner_data);
> +
> + if (type != OBJ_COMMIT)
> + return 0;
> +
> + ALLOC_GROW(list->list, list->num + 1, list->size);
> + list->list[list->num] = (struct object_id *)malloc(sizeof(struct object_id));
> + oidcpy(list->list[list->num], oid);
> + (list->num)++;
> +
> + return 0;
> +}
> +
> +struct object_id *construct_commit_graph(const char *pack_dir)
> +{
> + struct packed_oid_list oids;
> + struct packed_commit_list commits;
> + struct commit_graph_header hdr;
> + struct sha1file *f;
> + int i, count_distinct = 0;
> + struct strbuf tmp_file = STRBUF_INIT;
> + unsigned char final_hash[GIT_MAX_RAWSZ];
> + char *graph_name;
> + int fd;
> + uint32_t chunk_ids[5];
> + uint64_t chunk_offsets[5];
> + int num_long_edges;
> + struct object_id *f_hash;
> + char *fname;
> + struct commit_list *parent;
> +
> + oids.num = 0;
> + oids.size = 1024;
> + ALLOC_ARRAY(oids.list, oids.size);
> + for_each_packed_object(if_packed_commit_add_to_list, &oids, 0);
> + QSORT(oids.list, oids.num, commit_compare);
> +
> + count_distinct = 1;
> + for (i = 1; i < oids.num; i++) {
> + if (oidcmp(oids.list[i-1], oids.list[i]))
> + count_distinct++;
> + }
> +
> + commits.num = 0;
> + commits.size = count_distinct;
> + ALLOC_ARRAY(commits.list, commits.size);
> +
> + num_long_edges = 0;
> + for (i = 0; i < oids.num; i++) {
> + int num_parents = 0;
> + if (i > 0 && !oidcmp(oids.list[i-1], oids.list[i]))
> + continue;
> +
> + commits.list[commits.num] = lookup_commit(oids.list[i]);
> + parse_commit(commits.list[commits.num]);
> +
> + for (parent = commits.list[commits.num]->parents;
> + parent; parent = parent->next)
> + num_parents++;
> +
> + if (num_parents > 2)
> + num_long_edges += num_parents - 1;
> +
> + commits.num++;
> + }
> +
> + strbuf_addstr(&tmp_file, pack_dir);
> + strbuf_addstr(&tmp_file, "/tmp_graph_XXXXXX");
> +
> + fd = git_mkstemp_mode(tmp_file.buf, 0444);
> + if (fd < 0)
> + die_errno("unable to create '%s'", tmp_file.buf);
> +
> + graph_name = strbuf_detach(&tmp_file, NULL);
> + f = sha1fd(fd, graph_name);
> +
> + hdr.graph_signature = htonl(GRAPH_SIGNATURE);
> + hdr.graph_version = GRAPH_VERSION;
> + hdr.hash_version = GRAPH_OID_VERSION;
> + hdr.hash_len = GRAPH_OID_LEN;
> + hdr.num_chunks = 4;
> +
> + assert(sizeof(hdr) == 8);
> + sha1write(f, &hdr, sizeof(hdr));
> +
> + chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
> + chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
> + chunk_ids[2] = GRAPH_CHUNKID_DATA;
> + chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES;
> + chunk_ids[4] = 0;
> +
> + chunk_offsets[0] = sizeof(hdr) + GRAPH_CHUNKLOOKUP_SIZE;
> + chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
> + chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.num;
> + chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.num;
> + chunk_offsets[4] = chunk_offsets[3] + 4 * num_long_edges;
> +
> + for (i = 0; i <= hdr.num_chunks; i++) {
> + uint32_t chunk_write[3];
> +
> + chunk_write[0] = htonl(chunk_ids[i]);
> + chunk_write[1] = htonl(chunk_offsets[i] >> 32);
> + chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
> + sha1write(f, chunk_write, 12);
> + }
> +
> + write_graph_chunk_fanout(f, commits.list, commits.num);
> + write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.num);
> + write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.num);
> + write_graph_chunk_large_edges(f, commits.list, commits.num);
> +
> + sha1close(f, final_hash, CSUM_CLOSE | CSUM_FSYNC);
> +
> + f_hash = (struct object_id *)malloc(sizeof(struct object_id));
> + memcpy(f_hash->hash, final_hash, GIT_MAX_RAWSZ);
hashcpy(), perhaps?
> + fname = get_commit_graph_filename_hash(pack_dir, f_hash);
> +
> + if (rename(graph_name, fname))
> + die("failed to rename %s to %s", graph_name, fname);
> +
> + free(oids.list);
> + oids.size = 0;
> + oids.num = 0;
> +
> + return f_hash;
> +}
> +
> diff --git a/commit-graph.h b/commit-graph.h
> new file mode 100644
> index 0000000000..7b3469a7df
> --- /dev/null
> +++ b/commit-graph.h
> @@ -0,0 +1,20 @@
> +#ifndef COMMIT_GRAPH_H
> +#define COMMIT_GRAPH_H
> +
> +#include "git-compat-util.h"
> +#include "commit.h"
> +
> +extern char* get_commit_graph_filename_hash(const char *pack_dir,
> + struct object_id *hash);
> +
> +struct commit_graph_header {
> + uint32_t graph_signature;
> + unsigned char graph_version;
> + unsigned char hash_version;
> + unsigned char hash_len;
> + unsigned char num_chunks;
> +};
> +
> +extern struct object_id *construct_commit_graph(const char *pack_dir);
> +
> +#endif
> --
> 2.16.0.15.g9c3cf44.dirty
next prev parent reply other threads:[~2018-02-01 23:47 UTC|newest]
Thread overview: 146+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-30 21:39 [PATCH v2 00/14] Serialized Git Commit Graph Derrick Stolee
2018-01-30 21:39 ` [PATCH v2 01/14] commit-graph: add format document Derrick Stolee
2018-02-01 21:44 ` Jonathan Tan
2018-01-30 21:39 ` [PATCH v2 02/14] graph: add commit graph design document Derrick Stolee
2018-01-31 2:19 ` Stefan Beller
2018-01-30 21:39 ` [PATCH v2 03/14] commit-graph: create git-commit-graph builtin Derrick Stolee
2018-02-02 0:53 ` SZEDER Gábor
2018-01-30 21:39 ` [PATCH v2 04/14] commit-graph: implement construct_commit_graph() Derrick Stolee
2018-02-01 22:23 ` Jonathan Tan
2018-02-01 23:46 ` SZEDER Gábor [this message]
2018-02-02 15:32 ` SZEDER Gábor
2018-02-05 16:06 ` Derrick Stolee
2018-02-07 15:08 ` SZEDER Gábor
2018-02-07 15:10 ` Derrick Stolee
2018-01-30 21:39 ` [PATCH v2 05/14] commit-graph: implement git-commit-graph --write Derrick Stolee
2018-02-01 23:33 ` Jonathan Tan
2018-02-02 18:36 ` Stefan Beller
2018-02-02 22:48 ` Junio C Hamano
2018-02-03 1:58 ` Derrick Stolee
2018-02-03 9:28 ` Jeff King
2018-02-05 18:48 ` Junio C Hamano
2018-02-06 18:55 ` Derrick Stolee
2018-02-01 23:48 ` SZEDER Gábor
2018-02-05 18:07 ` Derrick Stolee
2018-02-02 1:47 ` SZEDER Gábor
2018-01-30 21:39 ` [PATCH v2 06/14] commit-graph: implement git-commit-graph --read Derrick Stolee
2018-01-31 2:22 ` Stefan Beller
2018-02-02 0:02 ` SZEDER Gábor
2018-02-02 0:23 ` Jonathan Tan
2018-02-05 19:29 ` Derrick Stolee
2018-01-30 21:39 ` [PATCH v2 07/14] commit-graph: implement git-commit-graph --update-head Derrick Stolee
2018-02-02 1:35 ` SZEDER Gábor
2018-02-05 21:01 ` Derrick Stolee
2018-02-02 2:45 ` SZEDER Gábor
2018-01-30 21:39 ` [PATCH v2 08/14] commit-graph: implement git-commit-graph --clear Derrick Stolee
2018-02-02 4:01 ` SZEDER Gábor
2018-01-30 21:39 ` [PATCH v2 09/14] commit-graph: teach git-commit-graph --delete-expired Derrick Stolee
2018-02-02 15:04 ` SZEDER Gábor
2018-01-30 21:39 ` [PATCH v2 10/14] commit-graph: add core.commitgraph setting Derrick Stolee
2018-01-31 22:44 ` Igor Djordjevic
2018-02-02 16:01 ` SZEDER Gábor
2018-01-30 21:39 ` [PATCH v2 11/14] commit: integrate commit graph with commit parsing Derrick Stolee
2018-02-02 1:51 ` Jonathan Tan
2018-02-06 14:53 ` Derrick Stolee
2018-01-30 21:39 ` [PATCH v2 12/14] commit-graph: read only from specific pack-indexes Derrick Stolee
2018-01-30 21:39 ` [PATCH v2 13/14] commit-graph: close under reachability Derrick Stolee
2018-01-30 21:39 ` [PATCH v2 14/14] commit-graph: build graph from starting commits Derrick Stolee
2018-01-30 21:47 ` [PATCH v2 00/14] Serialized Git Commit Graph Stefan Beller
2018-02-01 2:34 ` Stefan Beller
2018-02-08 20:37 ` [PATCH v3 " Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 01/14] commit-graph: add format document Derrick Stolee
2018-02-08 21:21 ` Junio C Hamano
2018-02-08 21:33 ` Derrick Stolee
2018-02-08 23:16 ` Junio C Hamano
2018-02-08 20:37 ` [PATCH v3 02/14] graph: add commit graph design document Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 03/14] commit-graph: create git-commit-graph builtin Derrick Stolee
2018-02-08 21:27 ` Junio C Hamano
2018-02-08 21:36 ` Derrick Stolee
2018-02-08 23:21 ` Junio C Hamano
2018-02-08 20:37 ` [PATCH v3 04/14] commit-graph: implement write_commit_graph() Derrick Stolee
2018-02-08 22:14 ` Junio C Hamano
2018-02-15 18:19 ` Junio C Hamano
2018-02-15 18:23 ` Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 05/14] commit-graph: implement 'git-commit-graph write' Derrick Stolee
2018-02-13 21:57 ` Jonathan Tan
2018-02-08 20:37 ` [PATCH v3 06/14] commit-graph: implement 'git-commit-graph read' Derrick Stolee
2018-02-08 23:38 ` Junio C Hamano
2018-02-08 20:37 ` [PATCH v3 07/14] commit-graph: update graph-head during write Derrick Stolee
2018-02-12 18:56 ` Junio C Hamano
2018-02-12 20:37 ` Junio C Hamano
2018-02-12 21:24 ` Derrick Stolee
2018-02-13 22:38 ` Jonathan Tan
2018-02-08 20:37 ` [PATCH v3 08/14] commit-graph: implement 'git-commit-graph clear' Derrick Stolee
2018-02-13 22:49 ` Jonathan Tan
2018-02-08 20:37 ` [PATCH v3 09/14] commit-graph: implement --delete-expired Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 10/14] commit-graph: add core.commitGraph setting Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 11/14] commit: integrate commit graph with commit parsing Derrick Stolee
2018-02-14 0:12 ` Jonathan Tan
2018-02-14 18:08 ` Derrick Stolee
2018-02-15 18:25 ` Junio C Hamano
2018-02-08 20:37 ` [PATCH v3 12/14] commit-graph: close under reachability Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 13/14] commit-graph: read only from specific pack-indexes Derrick Stolee
2018-02-08 20:37 ` [PATCH v3 14/14] commit-graph: build graph from starting commits Derrick Stolee
2018-02-09 13:02 ` SZEDER Gábor
2018-02-09 13:45 ` Derrick Stolee
2018-02-14 18:15 ` [PATCH v3 00/14] Serialized Git Commit Graph Derrick Stolee
2018-02-14 18:27 ` Stefan Beller
2018-02-14 19:11 ` Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 00/13] " Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 01/13] commit-graph: add format document Derrick Stolee
2018-02-20 20:49 ` Junio C Hamano
2018-02-21 19:23 ` Stefan Beller
2018-02-21 19:45 ` Derrick Stolee
2018-02-21 19:48 ` Stefan Beller
2018-03-30 13:25 ` Jakub Narebski
2018-04-02 13:09 ` Derrick Stolee
2018-04-02 14:09 ` Jakub Narebski
2018-02-19 18:53 ` [PATCH v4 02/13] graph: add commit graph design document Derrick Stolee
2018-02-20 21:42 ` Junio C Hamano
2018-02-23 15:44 ` Derrick Stolee
2018-02-21 19:34 ` Stefan Beller
2018-02-19 18:53 ` [PATCH v4 03/13] commit-graph: create git-commit-graph builtin Derrick Stolee
2018-02-20 21:51 ` Junio C Hamano
2018-02-21 18:58 ` Junio C Hamano
2018-02-23 16:07 ` Derrick Stolee
2018-02-26 16:25 ` SZEDER Gábor
2018-02-26 17:08 ` Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 04/13] commit-graph: implement write_commit_graph() Derrick Stolee
2018-02-20 22:57 ` Junio C Hamano
2018-02-23 17:23 ` Derrick Stolee
2018-02-23 19:30 ` Junio C Hamano
2018-02-23 19:48 ` Junio C Hamano
2018-02-23 20:02 ` Derrick Stolee
2018-02-26 16:10 ` SZEDER Gábor
2018-02-28 18:47 ` Junio C Hamano
2018-02-19 18:53 ` [PATCH v4 05/13] commit-graph: implement 'git-commit-graph write' Derrick Stolee
2018-02-21 19:25 ` Junio C Hamano
2018-02-19 18:53 ` [PATCH v4 06/13] commit-graph: implement git commit-graph read Derrick Stolee
2018-02-21 20:11 ` Junio C Hamano
2018-02-22 18:25 ` Junio C Hamano
2018-02-19 18:53 ` [PATCH v4 07/13] commit-graph: implement --set-latest Derrick Stolee
2018-02-22 18:31 ` Junio C Hamano
2018-02-23 17:53 ` Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 08/13] commit-graph: implement --delete-expired Derrick Stolee
2018-02-21 21:34 ` Stefan Beller
2018-02-23 17:43 ` Derrick Stolee
2018-02-22 18:48 ` Junio C Hamano
2018-02-23 17:59 ` Derrick Stolee
2018-02-23 19:33 ` Junio C Hamano
2018-02-23 19:41 ` Derrick Stolee
2018-02-23 19:51 ` Junio C Hamano
2018-02-19 18:53 ` [PATCH v4 09/13] commit-graph: add core.commitGraph setting Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 10/13] commit-graph: close under reachability Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 11/13] commit: integrate commit graph with commit parsing Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 12/13] commit-graph: read only from specific pack-indexes Derrick Stolee
2018-02-21 22:25 ` Stefan Beller
2018-02-23 19:19 ` Derrick Stolee
2018-02-19 18:53 ` [PATCH v4 13/13] commit-graph: build graph from starting commits Derrick Stolee
2018-03-30 11:10 ` [PATCH v4 00/13] Serialized Git Commit Graph Jakub Narebski
2018-04-02 13:02 ` Derrick Stolee
2018-04-02 14:46 ` Jakub Narebski
2018-04-02 15:02 ` Derrick Stolee
2018-04-02 17:35 ` Stefan Beller
2018-04-02 17:54 ` Derrick Stolee
2018-04-02 18:02 ` Stefan Beller
2018-04-07 22:37 ` Jakub Narebski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180201234643.7331-1-szeder.dev@gmail.com \
--to=szeder.dev@gmail.com \
--cc=dstolee@microsoft.com \
--cc=git@jeffhostetler.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
--cc=sbeller@google.com \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.