From: Derrick Stolee <stolee@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net, git@jeffhostetler.com,
sbeller@google.com, dstolee@microsoft.com
Subject: [PATCH 07/14] packed-graph: implement git-graph --read
Date: Thu, 25 Jan 2018 09:02:24 -0500 [thread overview]
Message-ID: <20180125140231.65604-8-dstolee@microsoft.com> (raw)
In-Reply-To: <20180125140231.65604-1-dstolee@microsoft.com>
Teach git-graph to read packed graph files and summarize their contents.
Use the --read option to verify the contents of a graph file in the
graph tests.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
Documentation/git-graph.txt | 7 +++
builtin/graph.c | 54 ++++++++++++++++
packed-graph.c | 147 +++++++++++++++++++++++++++++++++++++++++++-
packed-graph.h | 25 ++++++++
t/t5319-graph.sh | 50 +++++++++------
5 files changed, 260 insertions(+), 23 deletions(-)
diff --git a/Documentation/git-graph.txt b/Documentation/git-graph.txt
index be6bc38814..0939c3f1be 100644
--- a/Documentation/git-graph.txt
+++ b/Documentation/git-graph.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git graph' --write <options> [--pack-dir <pack_dir>]
+'git graph' --read <options> [--pack-dir <pack_dir>]
EXAMPLES
--------
@@ -20,6 +21,12 @@ EXAMPLES
$ git midx --write
------------------------------------------------
+* Read basic information from a graph file.
++
+------------------------------------------------
+$ git midx --read --graph-id=<oid>
+------------------------------------------------
+
CONFIGURATION
-------------
diff --git a/builtin/graph.c b/builtin/graph.c
index 09f5552338..bc66722924 100644
--- a/builtin/graph.c
+++ b/builtin/graph.c
@@ -10,15 +10,58 @@
static char const * const builtin_graph_usage[] ={
N_("git graph [--pack-dir <packdir>]"),
+ N_("git graph --read [--graph-id=<oid>]"),
N_("git graph --write [--pack-dir <packdir>]"),
NULL
};
static struct opts_graph {
const char *pack_dir;
+ int read;
+ const char *graph_id;
int write;
} opts;
+static int graph_read(void)
+{
+ struct object_id graph_oid;
+ struct packed_graph *graph = 0;
+ const char *graph_file;
+
+ if (opts.graph_id && strlen(opts.graph_id) == GIT_MAX_HEXSZ)
+ get_oid_hex(opts.graph_id, &graph_oid);
+ else
+ die("no graph id specified");
+
+ graph_file = get_graph_filename_oid(opts.pack_dir, &graph_oid);
+ graph = load_packed_graph_one(graph_file, opts.pack_dir);
+
+ if (!graph)
+ die("graph file %s does not exist.\n", graph_file);
+
+ printf("header: %08x %02x %02x %02x %02x\n",
+ ntohl(graph->hdr->graph_signature),
+ graph->hdr->graph_version,
+ graph->hdr->hash_version,
+ graph->hdr->hash_len,
+ graph->hdr->num_chunks);
+ printf("num_commits: %u\n", graph->num_commits);
+ printf("chunks:");
+
+ if (graph->chunk_oid_fanout)
+ printf(" oid_fanout");
+ if (graph->chunk_oid_lookup)
+ printf(" oid_lookup");
+ if (graph->chunk_commit_data)
+ printf(" commit_metadata");
+ if (graph->chunk_large_edges)
+ printf(" large_edges");
+ printf("\n");
+
+ printf("pack_dir: %s\n", graph->pack_dir);
+ return 0;
+}
+
static int graph_write(void)
{
struct object_id *graph_id = construct_graph(opts.pack_dir);
@@ -36,8 +79,14 @@ int cmd_graph(int argc, const char **argv, const char *prefix)
{ OPTION_STRING, 'p', "pack-dir", &opts.pack_dir,
N_("dir"),
N_("The pack directory to store the graph") },
+ OPT_BOOL('r', "read", &opts.read,
+ N_("read graph file")),
OPT_BOOL('w', "write", &opts.write,
N_("write graph file")),
+ { OPTION_STRING, 'M', "graph-id", &opts.graph_id,
+ N_("oid"),
+ N_("An OID for a specific graph file in the pack-dir."),
+ PARSE_OPT_OPTARG, NULL, (intptr_t) "" },
OPT_END(),
};
@@ -52,6 +101,9 @@ int cmd_graph(int argc, const char **argv, const char *prefix)
builtin_graph_options,
builtin_graph_usage, 0);
+ if (opts.write + opts.read > 1)
+ usage_with_options(builtin_graph_usage, builtin_graph_options);
+
if (!opts.pack_dir) {
struct strbuf path = STRBUF_INIT;
strbuf_addstr(&path, get_object_directory());
@@ -59,6 +111,8 @@ int cmd_graph(int argc, const char **argv, const char *prefix)
opts.pack_dir = strbuf_detach(&path, NULL);
}
+ if (opts.read)
+ return graph_read();
if (opts.write)
return graph_write();
diff --git a/packed-graph.c b/packed-graph.c
index 9be9811667..eaa656becb 100644
--- a/packed-graph.c
+++ b/packed-graph.c
@@ -30,6 +30,11 @@
#define GRAPH_LAST_EDGE 0x80000000
+#define GRAPH_FANOUT_SIZE (4*256)
+#define GRAPH_CHUNKLOOKUP_SIZE (5 * 12)
+#define GRAPH_MIN_SIZE (GRAPH_CHUNKLOOKUP_SIZE + GRAPH_FANOUT_SIZE + \
+ GRAPH_OID_LEN + sizeof(struct packed_graph_header))
+
char* get_graph_filename_oid(const char *pack_dir,
struct object_id *oid)
{
@@ -43,6 +48,142 @@ char* get_graph_filename_oid(const char *pack_dir,
return strbuf_detach(&head_path, &len);
}
+static struct packed_graph *alloc_packed_graph(int extra)
+{
+ struct packed_graph *g = xmalloc(st_add(sizeof(*g), extra));
+ memset(g, 0, sizeof(*g));
+ g->graph_fd = -1;
+
+ return g;
+}
+
+int close_graph(struct packed_graph *g)
+{
+ if (g->graph_fd < 0)
+ return 0;
+
+ munmap((void *)g->data, g->data_len);
+ g->data = 0;
+
+ close(g->graph_fd);
+ g->graph_fd = -1;
+
+ return 1;
+}
+
+static void free_packed_graph(struct packed_graph **g)
+{
+ if (!g || !*g)
+ return;
+
+ close_graph(*g);
+
+ free(*g);
+ *g = NULL;
+}
+
+struct packed_graph *load_packed_graph_one(const char *graph_file, const char *pack_dir)
+{
+ void *graph_map;
+ const unsigned char *data;
+ struct packed_graph_header *hdr;
+ size_t graph_size;
+ struct stat st;
+ uint32_t i;
+ struct packed_graph *graph;
+ int fd = git_open(graph_file);
+ uint64_t last_chunk_offset;
+ uint32_t last_chunk_id;
+
+ if (fd < 0)
+ return 0;
+ if (fstat(fd, &st)) {
+ close(fd);
+ return 0;
+ }
+ graph_size = xsize_t(st.st_size);
+
+ if (graph_size < GRAPH_MIN_SIZE) {
+ close(fd);
+ die("graph file %s is too small", graph_file);
+ }
+ graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ data = (const unsigned char *)graph_map;
+
+ hdr = graph_map;
+ if (ntohl(hdr->graph_signature) != GRAPH_SIGNATURE) {
+ uint32_t signature = ntohl(hdr->graph_signature);
+ munmap(graph_map, graph_size);
+ close(fd);
+ die("graph signature %X does not match signature %X",
+ signature, GRAPH_SIGNATURE);
+ }
+ if (hdr->graph_version != GRAPH_VERSION) {
+ unsigned char version = hdr->graph_version;
+ munmap(graph_map, graph_size);
+ close(fd);
+ die("graph version %X does not match version %X",
+ version, GRAPH_VERSION);
+ }
+
+ graph = alloc_packed_graph(strlen(pack_dir) + 1);
+
+ graph->hdr = hdr;
+ graph->graph_fd = fd;
+ graph->data = graph_map;
+ graph->data_len = graph_size;
+
+ last_chunk_id = 0;
+ last_chunk_offset = (uint64_t)sizeof(*hdr);
+ for (i = 0; i < hdr->num_chunks; i++) {
+ uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i));
+ uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 4));
+ uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8));
+ uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2;
+
+ if (chunk_offset > graph_size - GIT_MAX_RAWSZ)
+ die("improper chunk offset %08x%08x", (uint32_t)(chunk_offset >> 32),
+ (uint32_t)chunk_offset);
+
+ switch (chunk_id) {
+ case GRAPH_CHUNKID_OIDFANOUT:
+ graph->chunk_oid_fanout = data + chunk_offset;
+ break;
+
+ case GRAPH_CHUNKID_OIDLOOKUP:
+ graph->chunk_oid_lookup = data + chunk_offset;
+ break;
+
+ case GRAPH_CHUNKID_DATA:
+ graph->chunk_commit_data = data + chunk_offset;
+ break;
+
+ case GRAPH_CHUNKID_LARGEEDGES:
+ graph->chunk_large_edges = data + chunk_offset;
+ break;
+
+ case 0:
+ break;
+
+ default:
+ free_packed_graph(&graph);
+ die("unrecognized graph chunk id: %08x", chunk_id);
+ }
+
+ if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
+ {
+ graph->num_commits = (chunk_offset - last_chunk_offset)
+ / hdr->hash_len;
+ }
+
+ last_chunk_id = chunk_id;
+ last_chunk_offset = chunk_offset;
+ }
+
+ strcpy(graph->pack_dir, pack_dir);
+ return graph;
+}
+
static void write_graph_chunk_fanout(
struct sha1file *f,
struct commit **commits, int nr_commits)
@@ -338,8 +479,8 @@ struct object_id *construct_graph(const char *pack_dir)
chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES;
chunk_ids[4] = 0;
- chunk_offsets[0] = sizeof(hdr) + 12 * 5; // Skip header and chunk list
- chunk_offsets[1] = chunk_offsets[0] + 256 * 4; // fanout table size
+ chunk_offsets[0] = sizeof(hdr) + GRAPH_CHUNKLOOKUP_SIZE;
+ chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.num; // lookup size
chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.num; // data size
chunk_offsets[4] = chunk_offsets[3] + 4 * num_long_edges;
@@ -361,7 +502,7 @@ struct object_id *construct_graph(const char *pack_dir)
sha1close(f, final_hash, CSUM_CLOSE | CSUM_FSYNC);
f_oid = (struct object_id *)malloc(sizeof(struct object_id));
- memcpy(f_oid->hash, final_hash, GIT_MAX_RAWSZ);
+ hashcpy(f_oid->hash, final_hash);
fname = get_graph_filename_oid(pack_dir, f_oid);
if (rename(graph_name, fname))
diff --git a/packed-graph.h b/packed-graph.h
index d4e10fb612..1a7eaa2a46 100644
--- a/packed-graph.h
+++ b/packed-graph.h
@@ -15,6 +15,31 @@ struct packed_graph_header {
unsigned char num_chunks;
};
+extern struct packed_graph {
+ int graph_fd;
+
+ const unsigned char *data;
+ size_t data_len;
+
+ const struct packed_graph_header *hdr;
+
+ struct object_id oid;
+
+ uint32_t num_commits;
+
+ const unsigned char *chunk_oid_fanout;
+ const unsigned char *chunk_oid_lookup;
+ const unsigned char *chunk_commit_data;
+ const unsigned char *chunk_large_edges;
+
+ /* something like ".git/objects/pack" */
+ char pack_dir[FLEX_ARRAY]; /* more */
+} *packed_graph;
+
+extern int close_graph(struct packed_graph *g);
+
+extern struct packed_graph *load_packed_graph_one(const char *graph_file, const char *pack_dir);
+
extern struct object_id *construct_graph(const char *pack_dir);
#endif
diff --git a/t/t5319-graph.sh b/t/t5319-graph.sh
index 52e979dfd3..4975f65dee 100755
--- a/t/t5319-graph.sh
+++ b/t/t5319-graph.sh
@@ -3,8 +3,7 @@
test_description='packed graph'
. ./test-lib.sh
-test_expect_success \
- 'setup full repo' \
+test_expect_success 'setup full repo' \
'rm -rf .git &&
mkdir full &&
cd full &&
@@ -13,12 +12,10 @@ test_expect_success \
git config pack.threads 1 &&
packdir=".git/objects/pack"'
-test_expect_success \
- 'write graph with no packs' \
+test_expect_success 'write graph with no packs' \
'git graph --write --pack-dir .'
-test_expect_success \
- 'create commits and repack' \
+test_expect_success 'create commits and repack' \
'for i in $(test_seq 5)
do
echo $i >$i.txt &&
@@ -28,13 +25,23 @@ test_expect_success \
done &&
git repack'
-test_expect_success \
- 'write graph' \
+_graph_read_expect() {
+ cat >expect <<- EOF
+header: 43475048 01 01 14 04
+num_commits: $1
+chunks: oid_fanout oid_lookup commit_metadata large_edges
+pack_dir: $2
+EOF
+}
+
+test_expect_success 'write graph' \
'graph1=$(git graph --write) &&
- test_path_is_file ${packdir}/graph-${graph1}.graph'
+ test_path_is_file ${packdir}/graph-${graph1}.graph &&
+ git graph --read --graph-id=${graph1} >output &&
+ _graph_read_expect "5" "${packdir}" &&
+ cmp expect output'
-test_expect_success \
- 'Add more commits' \
+test_expect_success 'Add more commits' \
'git reset --hard commits/3 &&
for i in $(test_seq 6 10)
do
@@ -61,23 +68,26 @@ test_expect_success \
git merge commits/5 commits/13 &&
git repack'
-test_expect_success \
- 'write graph with merges' \
+test_expect_success 'write graph with merges' \
'graph2=$(git graph --write) &&
- test_path_is_file ${packdir}/graph-${graph2}.graph'
+ test_path_is_file ${packdir}/graph-${graph2}.graph &&
+ git graph --read --graph-id=${graph2} >output &&
+ _graph_read_expect "18" "${packdir}" &&
+ cmp expect output'
-test_expect_success \
- 'setup bare repo' \
+test_expect_success 'setup bare repo' \
'cd .. &&
git clone --bare full bare &&
cd bare &&
git config core.graph true &&
git config pack.threads 1 &&
- baredir="objects/pack"'
+ baredir="./objects/pack"'
-test_expect_success \
- 'write graph in bare repo' \
+test_expect_success 'write graph in bare repo' \
'graphbare=$(git graph --write) &&
- test_path_is_file ${baredir}/graph-${graphbare}.graph'
+ test_path_is_file ${baredir}/graph-${graphbare}.graph &&
+ git graph --read --graph-id=${graphbare} >output &&
+ _graph_read_expect "18" "${baredir}" &&
+ cmp expect output'
test_done
--
2.16.0
next prev parent reply other threads:[~2018-01-25 14:03 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-25 14:02 [PATCH 00/14] Serialized Commit Graph Derrick Stolee
2018-01-25 14:02 ` [PATCH 01/14] graph: add packed graph design document Derrick Stolee
2018-01-25 20:04 ` Stefan Beller
2018-01-26 12:49 ` Derrick Stolee
2018-01-26 18:17 ` Stefan Beller
2018-01-25 21:14 ` Junio C Hamano
2018-01-26 13:06 ` Derrick Stolee
2018-01-26 14:13 ` Duy Nguyen
2018-01-25 14:02 ` [PATCH 02/14] packed-graph: add core.graph setting Derrick Stolee
2018-01-25 20:17 ` Stefan Beller
2018-01-25 20:40 ` Derrick Stolee
2018-01-25 21:43 ` Junio C Hamano
2018-01-26 13:08 ` Derrick Stolee
2018-01-25 14:02 ` [PATCH 03/14] packed-graph: create git-graph builtin Derrick Stolee
2018-01-25 21:45 ` Stefan Beller
2018-01-26 13:13 ` Derrick Stolee
2018-01-25 23:01 ` Junio C Hamano
2018-01-26 13:14 ` Derrick Stolee
2018-01-26 14:16 ` Duy Nguyen
2018-01-25 14:02 ` [PATCH 04/14] packed-graph: add format document Derrick Stolee
2018-01-25 22:06 ` Junio C Hamano
2018-01-25 22:18 ` Stefan Beller
2018-01-25 22:29 ` Junio C Hamano
2018-01-26 13:22 ` Derrick Stolee
2018-01-25 22:07 ` Stefan Beller
2018-01-26 13:25 ` Derrick Stolee
2018-01-25 14:02 ` [PATCH 05/14] packed-graph: implement construct_graph() Derrick Stolee
2018-01-25 23:21 ` Stefan Beller
2018-01-26 20:47 ` Junio C Hamano
2018-01-26 20:55 ` Junio C Hamano
2018-01-26 21:14 ` Andreas Schwab
2018-01-26 22:04 ` Junio C Hamano
2018-01-25 14:02 ` [PATCH 06/14] packed-graph: implement git-graph --write Derrick Stolee
2018-01-25 23:28 ` Stefan Beller
2018-01-26 13:28 ` Derrick Stolee
2018-01-25 14:02 ` Derrick Stolee [this message]
2018-01-25 14:02 ` [PATCH 08/14] graph: implement git-graph --update-head Derrick Stolee
2018-01-25 14:02 ` [PATCH 09/14] packed-graph: implement git-graph --clear Derrick Stolee
2018-01-25 23:35 ` Stefan Beller
2018-01-25 14:02 ` [PATCH 10/14] packed-graph: teach git-graph --delete-expired Derrick Stolee
2018-01-25 14:02 ` [PATCH 11/14] commit: integrate packed graph with commit parsing Derrick Stolee
2018-01-26 19:38 ` Stefan Beller
2018-01-25 14:02 ` [PATCH 12/14] packed-graph: read only from specific pack-indexes Derrick Stolee
2018-01-25 14:02 ` [PATCH 13/14] packed-graph: close under reachability Derrick Stolee
2018-01-25 14:02 ` [PATCH 14/14] packed-graph: teach git-graph to read commits Derrick Stolee
2018-01-25 15:46 ` [PATCH 00/14] Serialized Commit Graph Ævar Arnfjörð Bjarmason
2018-01-25 16:09 ` Derrick Stolee
2018-01-25 23:06 ` Ævar Arnfjörð Bjarmason
2018-01-26 12:15 ` Derrick Stolee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180125140231.65604-8-dstolee@microsoft.com \
--to=stolee@gmail.com \
--cc=dstolee@microsoft.com \
--cc=git@jeffhostetler.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
--cc=sbeller@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.