* [PATCH 3/6] Allow the transport fetch command to add additional refs
2010-10-03 11:33 [RFC] New type of remote helpers Tomas Carnecky
2010-10-03 12:21 ` [PATCH 1/6] Remote helper: accept ':<value> <name>' as a response to 'list' Tomas Carnecky
2010-10-03 12:21 ` [PATCH 2/6] Allow more than one keepfile in the transport Tomas Carnecky
@ 2010-10-03 12:21 ` Tomas Carnecky
2010-10-05 2:18 ` Jonathan Nieder
2010-10-03 12:21 ` [PATCH 4/6] Rename get_mode() to decode_tree_mode() and export it Tomas Carnecky
` (3 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Tomas Carnecky @ 2010-10-03 12:21 UTC (permalink / raw)
To: git; +Cc: Tomas Carnecky
The fetch transport command (in particular in remote helpers) may need to create
or update additional refs which are used internally by the helper, but which
it doesn't want to present to the user. Those refs are refered to as 'silent'
throughout the code because git should be silent about their presence, but yet
process those just like the other refs.
Example use case:
Remote helpers such as those for svn may chose to save the Git SHA1 -> Subversion
revision mapping as notes attached to the commits (as opposed to strings in the
commit message itself). The helper would need to update the notes on each fetch,
but the user should not be bothered by the presence of that ref. The remote
helper can update the notes tree through fast-import and then inform Git core
that it should silently update the notes ref.
Signed-off-by: Tomas Carnecky <tom@dbservice.com>
---
builtin/clone.c | 11 +++++++----
builtin/fetch.c | 15 ++++++++++++---
transport-helper.c | 32 ++++++++++++++++++++++++++++----
transport.c | 16 ++++++++++------
transport.h | 6 ++++--
5 files changed, 61 insertions(+), 19 deletions(-)
diff --git a/builtin/clone.c b/builtin/clone.c
index 19ed640..78355b6 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -348,13 +348,16 @@ static struct ref *wanted_peer_refs(const struct ref *refs,
return local_refs;
}
-static void write_remote_refs(const struct ref *local_refs)
+static void write_remote_refs(const struct ref *local_refs, struct ref *silent)
{
const struct ref *r;
for (r = local_refs; r; r = r->next)
add_extra_ref(r->peer_ref->name, r->old_sha1, 0);
+ for (r = silent; r; r = r->next)
+ add_extra_ref(r->name, r->old_sha1, 0);
+
pack_refs(PACK_REFS_ALL);
clear_extra_refs();
}
@@ -369,7 +372,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
const struct ref *refs, *remote_head;
const struct ref *remote_head_points_at;
const struct ref *our_head_points_at;
- struct ref *mapped_refs;
+ struct ref *mapped_refs, *silent = NULL;
struct strbuf key = STRBUF_INIT, value = STRBUF_INIT;
struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT;
struct transport *transport = NULL;
@@ -542,14 +545,14 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
refs = transport_get_remote_refs(transport);
if (refs) {
mapped_refs = wanted_peer_refs(refs, refspec);
- transport_fetch_refs(transport, mapped_refs);
+ transport_fetch_refs(transport, mapped_refs, &silent);
}
}
if (refs) {
clear_extra_refs();
- write_remote_refs(mapped_refs);
+ write_remote_refs(mapped_refs, silent);
remote_head = find_ref_by_name(refs, "HEAD");
remote_head_points_at =
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 6fc5047..71db090 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -313,7 +313,7 @@ static int update_local_ref(struct ref *ref,
}
static int store_updated_refs(const char *raw_url, const char *remote_name,
- struct ref *ref_map)
+ struct ref *ref_map, struct ref *silent)
{
FILE *fp;
struct commit *commit;
@@ -411,6 +411,13 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
fprintf(stderr, " %s\n", note);
}
}
+
+ /* Also update the silent refs. */
+ for (rm = silent; rm; rm = rm->next) {
+ snprintf(note, 1024, "note here"); /* TODO */
+ update_local_ref(rm, rm->name, note);
+ }
+
free(url);
fclose(fp);
if (rc & STORE_REF_ERROR_DF_CONFLICT)
@@ -496,13 +503,15 @@ static int quickfetch(struct ref *ref_map)
static int fetch_refs(struct transport *transport, struct ref *ref_map)
{
+ struct ref *silent = NULL;
+
int ret = quickfetch(ref_map);
if (ret)
- ret = transport_fetch_refs(transport, ref_map);
+ ret = transport_fetch_refs(transport, ref_map, &silent);
if (!ret)
ret |= store_updated_refs(transport->url,
transport->remote->name,
- ref_map);
+ ref_map, silent);
transport_unlock_pack(transport);
return ret;
}
diff --git a/transport-helper.c b/transport-helper.c
index dcaaa89..c0133ca 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -330,8 +330,29 @@ static void map_impure_ref(int nr_heads, struct ref **to_fetch, char *map)
}
}
+/* `buf` points to the 'silent' response from the helper. Parse it and
+ * add the ref to the `silent` list. */
+static void add_silent_ref(struct strbuf *buf, struct ref **silent)
+{
+ if (!silent)
+ return;
+
+ struct ref *ref;
+ char *eon = strchr(buf->buf + 7, ' ');
+ if (!eon)
+ warning("Malformed helper response: %s", buf->buf);
+
+ *eon = '\0';
+ ref = alloc_ref(buf->buf + 7);
+ get_sha1_hex(eon + 1, ref->new_sha1);
+
+ ref->next = *silent;
+ *silent = ref;
+}
+
static int fetch_with_fetch(struct transport *transport,
- int nr_heads, struct ref **to_fetch)
+ int nr_heads, struct ref **to_fetch,
+ struct ref **silent)
{
struct helper_data *data = transport->data;
int i;
@@ -365,6 +386,8 @@ static int fetch_with_fetch(struct transport *transport,
transport_keep(transport, name);
} else if (!prefixcmp(buf.buf, "map ")) {
map_impure_ref(nr_heads, to_fetch, buf.buf + 4);
+ } else if (!prefixcmp(buf.buf, "silent ")) {
+ add_silent_ref(&buf, silent);
}
else if (!buf.len)
break;
@@ -559,14 +582,15 @@ static int connect_helper(struct transport *transport, const char *name,
}
static int fetch(struct transport *transport,
- int nr_heads, struct ref **to_fetch)
+ int nr_heads, struct ref **to_fetch,
+ struct ref **silent)
{
struct helper_data *data = transport->data;
int i, count;
if (process_connect(transport, 0)) {
do_take_over(transport);
- return transport->fetch(transport, nr_heads, to_fetch);
+ return transport->fetch(transport, nr_heads, to_fetch, silent);
}
count = 0;
@@ -578,7 +602,7 @@ static int fetch(struct transport *transport,
return 0;
if (data->fetch)
- return fetch_with_fetch(transport, nr_heads, to_fetch);
+ return fetch_with_fetch(transport, nr_heads, to_fetch, silent);
if (data->import)
return fetch_with_import(transport, nr_heads, to_fetch);
diff --git a/transport.c b/transport.c
index df2baa7..eaab276 100644
--- a/transport.c
+++ b/transport.c
@@ -253,7 +253,8 @@ static struct ref *get_refs_via_rsync(struct transport *transport, int for_push)
}
static int fetch_objs_via_rsync(struct transport *transport,
- int nr_objs, struct ref **to_fetch)
+ int nr_objs, struct ref **to_fetch,
+ struct ref **silent)
{
struct strbuf buf = STRBUF_INIT;
struct child_process rsync;
@@ -428,7 +429,8 @@ static struct ref *get_refs_from_bundle(struct transport *transport, int for_pus
}
static int fetch_refs_from_bundle(struct transport *transport,
- int nr_heads, struct ref **to_fetch)
+ int nr_heads, struct ref **to_fetch,
+ struct ref **silent)
{
struct bundle_transport_data *data = transport->data;
return unbundle(&data->header, data->fd);
@@ -508,7 +510,8 @@ static struct ref *get_refs_via_connect(struct transport *transport, int for_pus
}
static int fetch_refs_via_pack(struct transport *transport,
- int nr_heads, struct ref **to_fetch)
+ int nr_heads, struct ref **to_fetch,
+ struct ref **silent)
{
struct git_transport_data *data = transport->data;
char **heads = xmalloc(nr_heads * sizeof(*heads));
@@ -1083,7 +1086,8 @@ const struct ref *transport_get_remote_refs(struct transport *transport)
return transport->remote_refs;
}
-int transport_fetch_refs(struct transport *transport, struct ref *refs)
+int transport_fetch_refs(struct transport *transport, struct ref *refs,
+ struct ref **silent)
{
int rc;
int nr_heads = 0, nr_alloc = 0, nr_refs = 0;
@@ -1113,9 +1117,9 @@ int transport_fetch_refs(struct transport *transport, struct ref *refs)
heads[nr_heads++] = rm;
}
- rc = transport->fetch(transport, nr_heads, heads);
-
+ rc = transport->fetch(transport, nr_heads, heads, silent);
free(heads);
+
return rc;
}
diff --git a/transport.h b/transport.h
index 6320d28..22daf60 100644
--- a/transport.h
+++ b/transport.h
@@ -52,7 +52,7 @@ struct transport {
* get_refs_list(), it should set the old_sha1 fields in the
* provided refs now.
**/
- int (*fetch)(struct transport *transport, int refs_nr, struct ref **refs);
+ int (*fetch)(struct transport *transport, int refs_nr, struct ref **refs, struct ref **silent);
/**
* Push the objects and refs. Send the necessary objects, and
@@ -149,7 +149,9 @@ int transport_push(struct transport *connection,
const struct ref *transport_get_remote_refs(struct transport *transport);
-int transport_fetch_refs(struct transport *transport, struct ref *refs);
+int transport_fetch_refs(struct transport *transport, struct ref *refs,
+ struct ref **silent);
+
void transport_unlock_pack(struct transport *transport);
int transport_disconnect(struct transport *transport);
char *transport_anonymize_url(const char *url);
--
1.7.3.37.gb6088b
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 5/6] Introduce the git fast-import-helper
2010-10-03 11:33 [RFC] New type of remote helpers Tomas Carnecky
` (3 preceding siblings ...)
2010-10-03 12:21 ` [PATCH 4/6] Rename get_mode() to decode_tree_mode() and export it Tomas Carnecky
@ 2010-10-03 12:21 ` Tomas Carnecky
2010-10-03 15:31 ` Jonathan Nieder
2010-10-03 12:21 ` [PATCH 6/6] Add git-remote-svn Tomas Carnecky
2010-10-03 13:56 ` [RFC] New type of remote helpers Sverre Rabbelier
6 siblings, 1 reply; 21+ messages in thread
From: Tomas Carnecky @ 2010-10-03 12:21 UTC (permalink / raw)
To: git; +Cc: Tomas Carnecky
The g-f-i-h is a heavily modified (and simplified where possible) copy
of git-fast-import. It has a few very important changes which make it
suitable to be used in the new generation of remote helpers.
1) It does not update refs itself. Instead, for each 'mark' it sees, it
writes the SHA1 of the corresponding git object to stdout. The remote
helper can read this data and pass it along to core git for example.
2) It does not read/write mark files itself. Managing the marks is now
up to the application which uses g-f-i-h. To support that, a new
command was added: 'mark <name> <sha1>'. It can be used to feed
g-f-i-h with existing marks from earlier sessions. Also, marks
can now be arbitrary strings and not just numbers. This allows remote
helpers to use for example whole revision strings (r42 for svn or
mercurial changeset IDs).
3) Memory management has been significantly simplified. No more pools
and custom allocators. It uses plain malloc/free. Uses `struct
hash_table` instead of custom data structures. This may make it
a bit slower than the original, but on the other hand it reduces
the complexity of the source code.
Signed-off-by: Tomas Carnecky <tom@dbservice.com>
---
.gitignore | 1 +
Makefile | 1 +
fast-import-helper.c | 2201 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 2203 insertions(+), 0 deletions(-)
create mode 100644 fast-import-helper.c
diff --git a/.gitignore b/.gitignore
index 20560b8..c8aa8c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@
/git-describe
/git-fast-export
/git-fast-import
+/git-fast-import-helper
/git-fetch
/git-fetch--tool
/git-fetch-pack
diff --git a/Makefile b/Makefile
index 8a56b9a..f8a9c40 100644
--- a/Makefile
+++ b/Makefile
@@ -402,6 +402,7 @@ EXTRA_PROGRAMS =
PROGRAMS += $(EXTRA_PROGRAMS)
PROGRAM_OBJS += fast-import.o
+PROGRAM_OBJS += fast-import-helper.o
PROGRAM_OBJS += imap-send.o
PROGRAM_OBJS += shell.o
PROGRAM_OBJS += show-index.o
diff --git a/fast-import-helper.c b/fast-import-helper.c
new file mode 100644
index 0000000..b1f86a0
--- /dev/null
+++ b/fast-import-helper.c
@@ -0,0 +1,2201 @@
+
+#include "builtin.h"
+#include "cache.h"
+#include "object.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
+#include "delta.h"
+#include "pack.h"
+#include "refs.h"
+#include "csum-file.h"
+#include "quote.h"
+#include "exec_cmd.h"
+#include "hash.h"
+#include "tree-walk.h"
+
+#define DEPTH_BITS 13
+#define MAX_DEPTH ((1<<DEPTH_BITS)-1)
+
+struct fi_object
+{
+ struct fi_object *next;
+ struct pack_idx_entry idx;
+ uint32_t type : TYPE_BITS,
+ depth : DEPTH_BITS;
+};
+
+struct last_object
+{
+ struct strbuf data;
+ off_t offset;
+ unsigned int depth;
+ unsigned no_swap : 1;
+};
+
+/* An atom uniquely identifies an array of data (usually strings). */
+struct fi_atom
+{
+ struct fi_atom *next;
+ unsigned short len;
+ char data[FLEX_ARRAY];
+};
+
+/* The stream can use marks to mark objects (blobs, commits) with a unique
+ * string. After the object is added to the object database and we know its
+ * SHA1, we report that to stdout. */
+struct fi_mark
+{
+ struct fi_mark *next;
+ struct fi_atom *atom;
+ unsigned char sha1[20];
+};
+
+struct fi_tree;
+struct fi_tree_entry
+{
+ struct fi_tree *tree;
+
+ struct fi_atom *name;
+ struct fi_tree_entry_ms
+ {
+ unsigned int mode;
+ unsigned char sha1[20];
+ } versions[2];
+};
+
+struct fi_tree
+{
+ struct fi_tree *next;
+ unsigned int entry_capacity;
+ unsigned int entry_count;
+ unsigned int delta_depth;
+ struct fi_tree_entry *entries[FLEX_ARRAY];
+};
+
+struct fi_branch
+{
+ struct fi_branch *table_next_branch;
+ struct fi_branch *active_next_branch;
+ const char *name;
+ struct fi_tree_entry branch_tree;
+ uintmax_t num_notes;
+ unsigned active : 1;
+ unsigned char sha1[20];
+};
+
+struct fi_tag
+{
+ struct fi_tag *next_tag;
+ const char *name;
+ unsigned char sha1[20];
+};
+
+struct hash_list
+{
+ struct hash_list *next;
+ unsigned char sha1[20];
+};
+
+typedef enum {
+ WHENSPEC_RAW = 1,
+ WHENSPEC_RFC2822,
+ WHENSPEC_NOW
+} whenspec_type;
+
+/* Configured limits on output */
+static unsigned long max_depth = 10;
+static off_t max_packsize = 32 * 1024 * 1024;
+static uintmax_t big_file_threshold = 512 * 1024 * 1024;
+static int force_update;
+static int pack_compression_level = Z_DEFAULT_COMPRESSION;
+static int pack_compression_seen;
+
+/* The .pack file being generated */
+static struct sha1file *pack_file;
+static struct packed_git *pack_data;
+static off_t pack_size;
+
+/* Our last blob */
+static struct last_object last_blob = { STRBUF_INIT, 0, 0, 0 };
+
+/* Branch data */
+static struct hash_table branches;
+static unsigned long max_active_branches = 5;
+static unsigned long cur_active_branches;
+static struct fi_branch *active_branches;
+
+/* Tag data */
+static struct hash_table tags;
+
+/* Input stream parsing */
+static whenspec_type whenspec = WHENSPEC_RAW;
+static struct strbuf command_buf = STRBUF_INIT;
+
+static void end_packfile(void);
+static struct fi_atom *to_atom(const char *s, unsigned short len);
+static enum object_type fi_sha1_object_type(unsigned char sha1[20]);
+
+
+static int git_pack_config(const char *k, const char *v, void *cb)
+{
+ if (!strcmp(k, "pack.depth")) {
+ max_depth = git_config_int(k, v);
+ if (max_depth > MAX_DEPTH)
+ max_depth = MAX_DEPTH;
+ return 0;
+ }
+ if (!strcmp(k, "pack.compression")) {
+ int level = git_config_int(k, v);
+ if (level == -1)
+ level = Z_DEFAULT_COMPRESSION;
+ else if (level < 0 || level > Z_BEST_COMPRESSION)
+ die("bad pack compression level %d", level);
+ pack_compression_level = level;
+ pack_compression_seen = 1;
+ return 0;
+ }
+ if (!strcmp(k, "pack.indexversion")) {
+ pack_idx_default_version = git_config_int(k, v);
+ if (pack_idx_default_version > 2)
+ die("bad pack.indexversion=%"PRIu32,
+ pack_idx_default_version);
+ return 0;
+ }
+ if (!strcmp(k, "pack.packsizelimit")) {
+ max_packsize = git_config_ulong(k, v);
+ return 0;
+ }
+ if (!strcmp(k, "core.bigfilethreshold")) {
+ long n = git_config_int(k, v);
+ big_file_threshold = 0 < n ? n : 0;
+ }
+ return git_default_config(k, v, cb);
+}
+
+static NORETURN void die_nicely(const char *err, va_list params)
+{
+ static int zombie;
+ char message[2 * PATH_MAX];
+
+ vsnprintf(message, sizeof(message), err, params);
+ fputs("fatal: ", stderr);
+ fputs(message, stderr);
+ fputc('\n', stderr);
+
+ if (!zombie) {
+ zombie = 1;
+ end_packfile();
+ }
+ exit(128);
+}
+
+
+/**
+ * Misc methods
+ */
+
+static unsigned int hc_str(const char *s, size_t len)
+{
+ unsigned int r = 0;
+ while (len-- > 0)
+ r = r * 31 + *s++;
+ return r;
+}
+
+
+/**
+ * Command buffer
+ */
+
+static int read_next_command(void)
+{
+ static int stdin_eof = 0;
+
+ if (stdin_eof) {
+ return EOF;
+ }
+
+ do {
+ strbuf_detach(&command_buf, NULL);
+ stdin_eof = strbuf_getline(&command_buf, stdin, '\n');
+ if (stdin_eof)
+ return EOF;
+ } while (command_buf.buf[0] == '#');
+ //fprintf(stderr, "Command: %s\n", command_buf.buf);
+
+ return 0;
+}
+
+static void skip_optional_lf(void)
+{
+ if (command_buf.buf[0] == '\n')
+ read_next_command();
+}
+
+
+/**
+ * Object cache
+ */
+
+static struct hash_table objects;
+
+static struct fi_object *new_object(unsigned char *sha1)
+{
+ struct fi_object *e = malloc(sizeof(struct fi_object));
+ hashcpy(e->idx.sha1, sha1);
+ return e;
+}
+
+static struct fi_object *find_object(unsigned char *sha1)
+{
+ unsigned int hash = *(unsigned int *) sha1;
+ struct fi_object *head = lookup_hash(hash, &objects);
+
+ while (head) {
+ if (!hashcmp(sha1, head->idx.sha1))
+ return head;
+
+ head = head->next;
+ }
+
+ return NULL;
+}
+
+static struct fi_object *insert_object(unsigned char *sha1)
+{
+ unsigned int hash = *(unsigned int *) sha1;
+ struct fi_object *oe, *head = lookup_hash(hash, &objects);
+ void **ptr;
+
+ oe = head;
+ while (oe) {
+ if (!hashcmp(sha1, oe->idx.sha1))
+ return oe;
+
+ oe = oe->next;
+ }
+
+ oe = new_object(sha1);
+ oe->next = head;
+ oe->idx.offset = 0;
+
+ ptr = insert_hash(hash, oe, &objects);
+ if (ptr)
+ *ptr = oe;
+
+ return oe;
+}
+
+static int free_object(void *data)
+{
+ free(data);
+ return 0;
+}
+
+
+/**
+ * Atoms
+ */
+
+static struct hash_table atoms;
+
+static struct fi_atom *to_atom(const char *s, unsigned short len)
+{
+ unsigned int hash = hc_str(s, len);
+ struct fi_atom *atom, *head = lookup_hash(hash, &atoms);
+
+ atom = head;
+ while (atom) {
+ if (atom->len == len && !strncmp(s, atom->data, len))
+ return atom;
+ atom = atom->next;
+ }
+
+ atom = malloc(sizeof(struct fi_atom) + len + 1);
+ atom->len = len;
+ strncpy(atom->data, s, len);
+ atom->data[len] = 0;
+ atom->next = head;
+
+ insert_hash(hash, atom, &atoms);
+
+ return atom;
+}
+
+/**
+ * Mark cache
+ */
+
+static struct hash_table marks;
+
+static void insert_mark(struct fi_atom *atom, unsigned char sha1[20])
+{
+ unsigned int hash = hc_str(atom->data, atom->len);
+ struct fi_mark *mark, *head = lookup_hash(hash, &marks);
+ void **ptr;
+
+ /* If the mark already exists, overwrite its value. */
+ mark = head;
+ while (mark) {
+ if (atom == mark->atom) {
+ hashcpy(mark->sha1, sha1);
+ goto out;
+ }
+
+ }
+
+ mark = malloc(sizeof(struct fi_mark));
+ mark->next = head;
+ mark->atom = atom;
+ hashcpy(mark->sha1, sha1);
+
+ ptr = insert_hash(hash, mark, &marks);
+ if (ptr)
+ *ptr = mark;
+
+out:
+ /* Dump the mark mapping to stdout. */
+ fprintf(stdout, "mark :%s %s\n", atom->data, sha1_to_hex(sha1));
+ //fprintf(stderr, "mark :%s %s\n", atom->data, sha1_to_hex(sha1));
+ fflush(stdout); fflush(stderr);
+}
+
+
+/* Parse an atom in the string, set *atom and return the end of the atom. */
+static const char *parse_mark_to_atom(const char *s, struct fi_atom **atom)
+{
+ const char *end = strchr(s, ' ');
+ unsigned int len = end ? end - s : strlen(s);
+ *atom = to_atom(s, len);
+ return s + len;
+}
+
+static const char *find_mark(const char *mark, unsigned char sha1[20])
+{
+ struct fi_atom *atom;
+ const char *end = parse_mark_to_atom(mark, &atom);
+ unsigned int hash = hc_str(mark, end - mark);
+ struct fi_mark *head = lookup_hash(hash, &marks);
+
+ while (head) {
+ if (head->atom == atom) {
+ hashcpy(sha1, head->sha1);
+ return end;
+ }
+ head = head->next;
+ }
+
+ die("Did not find mark '%s'", mark);
+}
+
+
+/**
+ * Branch cache
+ */
+
+static struct fi_branch *lookup_branch(const char *name)
+{
+ unsigned int hash = hc_str(name, strlen(name));
+ struct fi_branch *b = lookup_hash(hash, &branches);
+
+ while (b) {
+ if (!strcmp(name, b->name))
+ return b;
+ b = b->table_next_branch;
+ }
+
+ return NULL;
+}
+
+static struct fi_branch *new_branch(const char *name)
+{
+ unsigned int hash = hc_str(name, strlen(name));
+ struct fi_branch *b = lookup_branch(name);
+ void **ptr;
+
+ if (b)
+ die("Invalid attempt to create duplicate branch: %s", name);
+ switch (check_ref_format(name)) {
+ case 0: break; /* its valid */
+ case CHECK_REF_FORMAT_ONELEVEL:
+ break; /* valid, but too few '/', allow anyway */
+ default:
+ die("Branch name doesn't conform to GIT standards: %s", name);
+ }
+
+ b = calloc(1, sizeof(struct fi_branch));
+ b->name = strdup(name);
+ b->table_next_branch = lookup_hash(hash, &branches);
+ b->branch_tree.versions[0].mode = S_IFDIR;
+ b->branch_tree.versions[1].mode = S_IFDIR;
+ b->num_notes = 0;
+ b->active = 0;
+ ptr = insert_hash(hash, b, &branches);
+ if (ptr)
+ *ptr = b;
+
+ return b;
+}
+
+
+/**
+ * Tree cache
+ */
+
+static struct fi_tree *new_tree_content(unsigned int cnt)
+{
+ struct fi_tree *t;
+
+ t = malloc(sizeof(*t) + sizeof(t->entries[0]) * cnt);
+ t->next = NULL;
+ t->entry_capacity = cnt;
+ t->entry_count = 0;
+ t->delta_depth = 0;
+
+ return t;
+}
+
+static void release_tree_entry(struct fi_tree_entry *e);
+static void release_tree_content(struct fi_tree *t)
+{
+ free(t);
+}
+
+static void release_tree_content_recursive(struct fi_tree *t)
+{
+ unsigned int i;
+ for (i = 0; i < t->entry_count; i++)
+ release_tree_entry(t->entries[i]);
+ release_tree_content(t);
+}
+
+static struct fi_tree *grow_tree_content(
+ struct fi_tree *t,
+ int amt)
+{
+ struct fi_tree *r = new_tree_content(t->entry_count + amt);
+ r->entry_count = t->entry_count;
+ r->delta_depth = t->delta_depth;
+ memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0]));
+ release_tree_content(t);
+ return r;
+}
+
+static struct fi_tree_entry *new_tree_entry(void)
+{
+ return xmalloc(sizeof(struct fi_tree_entry));
+}
+
+static void release_tree_entry(struct fi_tree_entry *e)
+{
+ free(e);
+}
+
+static struct fi_tree *dup_tree_content(struct fi_tree *s)
+{
+ struct fi_tree *d;
+ struct fi_tree_entry *a, *b;
+ unsigned int i;
+
+ if (!s)
+ return NULL;
+ d = new_tree_content(s->entry_count);
+ for (i = 0; i < s->entry_count; i++) {
+ a = s->entries[i];
+ b = new_tree_entry();
+ memcpy(b, a, sizeof(*a));
+ if (a->tree && is_null_sha1(b->versions[1].sha1))
+ b->tree = dup_tree_content(a->tree);
+ else
+ b->tree = NULL;
+ d->entries[i] = b;
+ }
+ d->entry_count = s->entry_count;
+ d->delta_depth = s->delta_depth;
+
+ return d;
+}
+
+
+/**
+ * Pack file handling
+ */
+
+static void start_packfile(void)
+{
+ static char tmpfile[PATH_MAX];
+ struct pack_header hdr;
+ int pack_fd;
+
+ pack_fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_pack_XXXXXX");
+ pack_data = xcalloc(1, sizeof(struct packed_git) + strlen(tmpfile) + 2);
+ strcpy(pack_data->pack_name, tmpfile);
+ pack_data->pack_fd = pack_fd;
+ pack_file = sha1fd(pack_fd, pack_data->pack_name);
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(2);
+ hdr.hdr_entries = 0;
+ sha1write(pack_file, &hdr, sizeof(hdr));
+
+ pack_size = sizeof(hdr);
+
+ for_each_hash(&objects, free_object);
+ free_hash(&objects);
+}
+
+static struct pack_idx_entry **create_index_iter;
+static int fi_add_to_index(void *data)
+{
+ struct fi_object *oe = data;
+ while (oe) {
+ *create_index_iter++ = &oe->idx;
+ oe = oe->next;
+ }
+
+ return 0;
+}
+
+static const char *create_index(void)
+{
+ const char *tmpfile;
+ struct pack_idx_entry **idx, **last;
+
+ /* Build the table of object IDs. */
+ idx = xmalloc(objects.nr * sizeof(*idx));
+ create_index_iter = idx;
+ for_each_hash(&objects, fi_add_to_index);
+ last = idx + objects.nr;
+ if (create_index_iter != last)
+ die("internal consistency error creating the index");
+
+ tmpfile = write_idx_file(NULL, idx, objects.nr, pack_data->sha1);
+ free(idx);
+ return tmpfile;
+}
+
+static char *keep_pack(const char *curr_index_name)
+{
+ static char name[PATH_MAX];
+ static const char *keep_msg = "fast-import";
+ int keep_fd;
+
+ keep_fd = odb_pack_keep(name, sizeof(name), pack_data->sha1);
+ if (keep_fd < 0)
+ die_errno("cannot create keep file");
+ write_or_die(keep_fd, keep_msg, strlen(keep_msg));
+ if (close(keep_fd))
+ die_errno("failed to write keep file");
+
+ snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
+ get_object_directory(), sha1_to_hex(pack_data->sha1));
+ if (move_temp_to_file(pack_data->pack_name, name))
+ die("cannot store pack file");
+
+ snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
+ get_object_directory(), sha1_to_hex(pack_data->sha1));
+ if (move_temp_to_file(curr_index_name, name))
+ die("cannot store index file");
+ free((void *)curr_index_name);
+ return name;
+}
+
+static void end_packfile(void)
+{
+ struct packed_git *new_p;
+
+ clear_delta_base_cache();
+ if (objects.nr) {
+ unsigned char cur_pack_sha1[20];
+ char *idx_name;
+
+ close_pack_windows(pack_data);
+ sha1close(pack_file, cur_pack_sha1, 0);
+ fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
+ pack_data->pack_name, objects.nr,
+ cur_pack_sha1, pack_size);
+ close(pack_data->pack_fd);
+ idx_name = keep_pack(create_index());
+
+ /* Register the packfile with core git's machinery. */
+ new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ if (!new_p)
+ die("core git rejected index %s", idx_name);
+ install_packed_git(new_p);
+ }
+ else {
+ close(pack_data->pack_fd);
+ unlink_or_warn(pack_data->pack_name);
+ }
+ free(pack_data);
+
+ /* We can't carry a delta across packfiles. */
+ strbuf_release(&last_blob.data);
+ last_blob.offset = 0;
+ last_blob.depth = 0;
+}
+
+static void cycle_packfile(void)
+{
+ end_packfile();
+ start_packfile();
+}
+
+
+/**
+ * Methods for storing objects
+ */
+
+static int store_object(
+ enum object_type type,
+ struct strbuf *dat,
+ struct last_object *last,
+ unsigned char *sha1out,
+ struct fi_atom *atom)
+{
+ void *out, *delta = NULL;
+ struct fi_object *e;
+ unsigned char hdr[96];
+ unsigned char sha1[20];
+ unsigned long hdrlen, deltalen;
+ git_SHA_CTX c;
+ z_stream s;
+
+
+ /* Construct the header. */
+ hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
+ (unsigned long)dat->len) + 1;
+
+ /* Compute the hash of the object. */
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, hdr, hdrlen);
+ git_SHA1_Update(&c, dat->buf, dat->len);
+ git_SHA1_Final(sha1, &c);
+
+ if (sha1out)
+ hashcpy(sha1out, sha1);
+ if (atom)
+ insert_mark(atom, sha1);
+
+ /* Determine if we should auto-checkpoint. */
+ if ((max_packsize && (pack_size + 60 + dat->len + hdrlen) > max_packsize)
+ || (pack_size + 60 + dat->len + hdrlen) < pack_size) {
+ cycle_packfile();
+ }
+
+ /* Insert the object into our cache, return if it already exists. */
+ e = insert_object(sha1);
+
+ if (e->idx.offset)
+ return 1;
+
+ e->type = type;
+ e->idx.offset = pack_size;
+
+ memset(&s, 0, sizeof(s));
+ deflateInit(&s, pack_compression_level);
+
+ /* Compress the data, try to create a delta against the last object. */
+ if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
+ delta = diff_delta(last->data.buf, last->data.len,
+ dat->buf, dat->len, &deltalen, dat->len - 20);
+ }
+
+ /* diff_delta() above can fail and return NULL! */
+ if (delta) {
+ s.next_in = delta;
+ s.avail_in = deltalen;
+ } else {
+ s.next_in = (void *)dat->buf;
+ s.avail_in = dat->len;
+ }
+
+ s.avail_out = deflateBound(&s, s.avail_in);
+ s.next_out = out = xmalloc(s.avail_out);
+ while (deflate(&s, Z_FINISH) == Z_OK)
+ ; /* nothing */
+ deflateEnd(&s);
+
+ /* Write the object to the packfile. */
+ crc32_begin(pack_file);
+ if (delta) {
+ off_t ofs = e->idx.offset - last->offset;
+ unsigned pos = sizeof(hdr) - 1;
+
+ e->depth = last->depth + 1;
+
+ hdrlen = encode_in_pack_object_header(OBJ_OFS_DELTA, deltalen, hdr);
+ sha1write(pack_file, hdr, hdrlen);
+ pack_size += hdrlen;
+
+ hdr[pos] = ofs & 127;
+ while (ofs >>= 7)
+ hdr[--pos] = 128 | (--ofs & 127);
+ sha1write(pack_file, hdr + pos, sizeof(hdr) - pos);
+ pack_size += sizeof(hdr) - pos;
+
+ free(delta);
+ } else {
+ e->depth = 0;
+ hdrlen = encode_in_pack_object_header(type, dat->len, hdr);
+ sha1write(pack_file, hdr, hdrlen);
+ pack_size += hdrlen;
+ }
+
+ sha1write(pack_file, out, s.total_out);
+ pack_size += s.total_out;
+ free(out);
+
+ /* Update the cached object. */
+ e->idx.crc32 = crc32_end(pack_file);
+
+ if (last) {
+ if (last->no_swap) {
+ last->data = *dat;
+ } else {
+ strbuf_swap(&last->data, dat);
+ }
+ last->offset = e->idx.offset;
+ last->depth = e->depth;
+ }
+
+ return 0;
+}
+
+static void truncate_pack(off_t to, git_SHA_CTX *ctx)
+{
+ if (ftruncate(pack_data->pack_fd, to)
+ || lseek(pack_data->pack_fd, to, SEEK_SET) != to)
+ die_errno("cannot truncate pack to skip duplicate");
+ pack_size = to;
+
+ /* yes this is a layering violation */
+ pack_file->total = to;
+ pack_file->offset = 0;
+ pack_file->ctx = *ctx;
+}
+
+static void stream_blob(uintmax_t len, unsigned char *sha1out, struct fi_atom *atom)
+{
+ size_t in_sz = 64 * 1024, out_sz = 64 * 1024;
+ unsigned char *in_buf = xmalloc(in_sz);
+ unsigned char *out_buf = xmalloc(out_sz);
+ struct fi_object *e;
+ unsigned char sha1[20];
+ unsigned long hdrlen;
+ off_t offset;
+ git_SHA_CTX c;
+ git_SHA_CTX pack_file_ctx;
+ z_stream s;
+ int status = Z_OK;
+
+ /* Determine if we should auto-checkpoint. */
+ if ((max_packsize && (pack_size + 60 + len) > max_packsize)
+ || (pack_size + 60 + len) < pack_size)
+ cycle_packfile();
+
+ offset = pack_size;
+
+ /* preserve the pack_file SHA1 ctx in case we have to truncate later */
+ sha1flush(pack_file);
+ pack_file_ctx = pack_file->ctx;
+
+ hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
+ if (out_sz <= hdrlen)
+ die("impossibly large object header");
+
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, out_buf, hdrlen);
+
+ crc32_begin(pack_file);
+
+ memset(&s, 0, sizeof(s));
+ deflateInit(&s, pack_compression_level);
+
+ hdrlen = encode_in_pack_object_header(OBJ_BLOB, len, out_buf);
+ if (out_sz <= hdrlen)
+ die("impossibly large object header");
+
+ s.next_out = out_buf + hdrlen;
+ s.avail_out = out_sz - hdrlen;
+
+ while (status != Z_STREAM_END) {
+ if (0 < len && !s.avail_in) {
+ size_t cnt = in_sz < len ? in_sz : (size_t)len;
+ size_t n = fread(in_buf, 1, cnt, stdin);
+ if (!n && feof(stdin))
+ die("EOF in data (%" PRIuMAX " bytes remaining)", len);
+
+ git_SHA1_Update(&c, in_buf, n);
+ s.next_in = in_buf;
+ s.avail_in = n;
+ len -= n;
+ }
+
+ status = deflate(&s, len ? 0 : Z_FINISH);
+
+ if (!s.avail_out || status == Z_STREAM_END) {
+ size_t n = s.next_out - out_buf;
+ sha1write(pack_file, out_buf, n);
+ pack_size += n;
+ s.next_out = out_buf;
+ s.avail_out = out_sz;
+ }
+
+ switch (status) {
+ case Z_OK:
+ case Z_BUF_ERROR:
+ case Z_STREAM_END:
+ continue;
+ default:
+ die("unexpected deflate failure: %d", status);
+ }
+ }
+ deflateEnd(&s);
+ git_SHA1_Final(sha1, &c);
+
+ if (sha1out)
+ hashcpy(sha1out, sha1);
+ if (atom)
+ insert_mark(atom, sha1);
+
+ e = insert_object(sha1);
+
+ if (e->idx.offset) {
+ truncate_pack(offset, &pack_file_ctx);
+ } else {
+ e->depth = 0;
+ e->type = OBJ_BLOB;
+ e->idx.offset = offset;
+ e->idx.crc32 = crc32_end(pack_file);
+ }
+
+ free(in_buf);
+ free(out_buf);
+}
+
+/* All calls must be guarded by find_object() or find_mark() to
+ * ensure the 'struct fi_object' passed was written by this
+ * process instance. We unpack the entry by the offset, avoiding
+ * the need for the corresponding .idx file. This unpacking rule
+ * works because we only use OBJ_REF_DELTA within the packfiles
+ * created by fast-import.
+ *
+ * oe must not be NULL. Such an oe usually comes from giving
+ * an unknown SHA-1 to find_object() or an undefined mark to
+ * find_mark(). Callers must test for this condition and use
+ * the standard read_sha1_file() when it happens.
+ *
+ * oe->pack_id must not be MAX_PACK_ID. Such an oe is usually from
+ * find_mark(), where the mark was reloaded from an existing marks
+ * file and is referencing an object that this fast-import process
+ * instance did not write out to a packfile. Callers must test for
+ * this condition and use read_sha1_file() instead.
+ */
+static void *fi_unpack_entry(
+ struct fi_object *oe,
+ enum object_type *type,
+ unsigned long *size)
+{
+ if (pack_data->pack_size < (pack_size + 20)) {
+ /* The object is stored in the packfile we are writing to
+ * and we have modified it since the last time we scanned
+ * back to read a previously written object. If an old
+ * window covered [p->pack_size, p->pack_size + 20) its
+ * data is stale and is not valid. Closing all windows
+ * and updating the packfile length ensures we can read
+ * the newly written data.
+ */
+ close_pack_windows(pack_data);
+ sha1flush(pack_file);
+
+ /* We have to offer 20 bytes additional on the end of
+ * the packfile as the core unpacker code assumes the
+ * footer is present at the file end and must promise
+ * at least 20 bytes within any window it maps. But
+ * we don't actually create the footer here.
+ */
+ pack_data->pack_size = pack_size + 20;
+ }
+
+ return unpack_entry(pack_data, oe->idx.offset, type, size);
+}
+
+/* Same as read_sha1_file() except that it first looks in our local cache
+ * which holds objects from our current pack. Git doesn't know anything
+ * about those objects until we finish the pack and register it with Git. */
+static void *fi_read_sha1_file(unsigned char sha1[20], enum object_type *type,
+ unsigned long *size)
+{
+ struct fi_object *obj = find_object(sha1);
+ if (obj)
+ return fi_unpack_entry(obj, type, size);
+
+ return read_sha1_file(sha1, type, size);
+}
+
+static enum object_type fi_sha1_object_type(unsigned char sha1[20])
+{
+ struct fi_object *obj = find_object(sha1);
+ if (obj) {
+ return obj->type;
+ }
+
+ return sha1_object_info(sha1, NULL);
+}
+
+
+/**
+ * Tree handling
+ */
+
+static void load_tree(struct fi_tree_entry *root)
+{
+ unsigned char *sha1 = root->versions[1].sha1;
+ struct fi_tree *t;
+ unsigned long size;
+ enum object_type type;
+ char *buf;
+ const char *c;
+
+ root->tree = t = new_tree_content(8);
+ if (is_null_sha1(sha1))
+ return;
+
+ buf = fi_read_sha1_file(sha1, &type, &size);
+ if (!buf || type != OBJ_TREE)
+ die("Can't load tree %s", sha1_to_hex(sha1));
+
+ c = buf;
+ while (c != (buf + size)) {
+ struct fi_tree_entry *e = new_tree_entry();
+
+ if (t->entry_count == t->entry_capacity)
+ root->tree = t = grow_tree_content(t, t->entry_count);
+ t->entries[t->entry_count++] = e;
+
+ e->tree = NULL;
+ c = decode_tree_mode(c, &e->versions[1].mode);
+ if (!c)
+ die("Corrupt mode in %s", sha1_to_hex(sha1));
+ e->versions[0].mode = e->versions[1].mode;
+ e->name = to_atom(c, strlen(c));
+ c += e->name->len + 1;
+ hashcpy(e->versions[0].sha1, (unsigned char *)c);
+ hashcpy(e->versions[1].sha1, (unsigned char *)c);
+ c += 20;
+ }
+ free(buf);
+}
+
+static int tecmp0 (const void *_a, const void *_b)
+{
+ struct fi_tree_entry *a = *((struct fi_tree_entry**)_a);
+ struct fi_tree_entry *b = *((struct fi_tree_entry**)_b);
+ return base_name_compare(
+ a->name->data, a->name->len, a->versions[0].mode,
+ b->name->data, b->name->len, b->versions[0].mode);
+}
+
+static int tecmp1 (const void *_a, const void *_b)
+{
+ struct fi_tree_entry *a = *((struct fi_tree_entry**)_a);
+ struct fi_tree_entry *b = *((struct fi_tree_entry**)_b);
+ return base_name_compare(
+ a->name->data, a->name->len, a->versions[1].mode,
+ b->name->data, b->name->len, b->versions[1].mode);
+}
+
+static void mktree(struct fi_tree *t, int v, struct strbuf *b)
+{
+ size_t maxlen = 0;
+ unsigned int i;
+
+ if (!v)
+ qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp0);
+ else
+ qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp1);
+
+ for (i = 0; i < t->entry_count; i++) {
+ if (t->entries[i]->versions[v].mode)
+ maxlen += t->entries[i]->name->len + 34;
+ }
+
+ strbuf_reset(b);
+ strbuf_grow(b, maxlen);
+ for (i = 0; i < t->entry_count; i++) {
+ struct fi_tree_entry *e = t->entries[i];
+ if (!e->versions[v].mode)
+ continue;
+ strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
+ e->name->data, '\0');
+ strbuf_add(b, e->versions[v].sha1, 20);
+ }
+}
+
+static void store_tree(struct fi_tree_entry *root)
+{
+ struct fi_tree *t = root->tree;
+ unsigned int i, j, del;
+ struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
+ struct fi_object *le;
+ struct strbuf new_tree = STRBUF_INIT;
+
+ if (!is_null_sha1(root->versions[1].sha1))
+ return;
+
+ for (i = 0; i < t->entry_count; i++) {
+ if (t->entries[i]->tree)
+ store_tree(t->entries[i]);
+ }
+
+ le = find_object(root->versions[0].sha1);
+ if (S_ISDIR(root->versions[0].mode) && le) {
+ mktree(t, 0, &lo.data);
+ lo.offset = le->idx.offset;
+ lo.depth = t->delta_depth;
+ }
+
+ mktree(t, 1, &new_tree);
+ store_object(OBJ_TREE, &new_tree, &lo, root->versions[1].sha1, NULL);
+
+ t->delta_depth = lo.depth;
+ for (i = 0, j = 0, del = 0; i < t->entry_count; i++) {
+ struct fi_tree_entry *e = t->entries[i];
+ if (e->versions[1].mode) {
+ e->versions[0].mode = e->versions[1].mode;
+ hashcpy(e->versions[0].sha1, e->versions[1].sha1);
+ t->entries[j++] = e;
+ } else {
+ release_tree_entry(e);
+ del++;
+ }
+ }
+ t->entry_count -= del;
+}
+
+static int tree_content_set(
+ struct fi_tree_entry *root,
+ const char *p,
+ const unsigned char *sha1,
+ const uint16_t mode,
+ struct fi_tree *subtree)
+{
+ struct fi_tree *t = root->tree;
+ const char *slash1;
+ unsigned int i, n;
+ struct fi_tree_entry *e;
+
+ slash1 = strchr(p, '/');
+ if (slash1)
+ n = slash1 - p;
+ else
+ n = strlen(p);
+ if (!n)
+ die("Empty path component found in input");
+ if (!slash1 && !S_ISDIR(mode) && subtree)
+ die("Non-directories cannot have subtrees");
+
+ for (i = 0; i < t->entry_count; i++) {
+ e = t->entries[i];
+ if (e->name->len == n && !strncmp(p, e->name->data, n)) {
+ if (!slash1) {
+ if (!S_ISDIR(mode)
+ && e->versions[1].mode == mode
+ && !hashcmp(e->versions[1].sha1, sha1))
+ return 0;
+ e->versions[1].mode = mode;
+ hashcpy(e->versions[1].sha1, sha1);
+ if (e->tree)
+ release_tree_content_recursive(e->tree);
+ e->tree = subtree;
+ hashclr(root->versions[1].sha1);
+ return 1;
+ }
+ if (!S_ISDIR(e->versions[1].mode)) {
+ e->tree = new_tree_content(8);
+ e->versions[1].mode = S_IFDIR;
+ }
+ if (!e->tree)
+ load_tree(e);
+ if (tree_content_set(e, slash1 + 1, sha1, mode, subtree)) {
+ hashclr(root->versions[1].sha1);
+ return 1;
+ }
+ return 0;
+ }
+ }
+
+ if (t->entry_count == t->entry_capacity)
+ root->tree = t = grow_tree_content(t, t->entry_count);
+ e = new_tree_entry();
+ e->name = to_atom(p, n);
+ e->versions[0].mode = 0;
+ hashclr(e->versions[0].sha1);
+ t->entries[t->entry_count++] = e;
+ if (slash1) {
+ e->tree = new_tree_content(8);
+ e->versions[1].mode = S_IFDIR;
+ tree_content_set(e, slash1 + 1, sha1, mode, subtree);
+ } else {
+ e->tree = subtree;
+ e->versions[1].mode = mode;
+ hashcpy(e->versions[1].sha1, sha1);
+ }
+ hashclr(root->versions[1].sha1);
+ return 1;
+}
+
+static int tree_content_remove(
+ struct fi_tree_entry *root,
+ const char *p,
+ struct fi_tree_entry *backup_leaf)
+{
+ struct fi_tree *t = root->tree;
+ const char *slash1;
+ unsigned int i, n;
+ struct fi_tree_entry *e;
+
+ slash1 = strchr(p, '/');
+ if (slash1)
+ n = slash1 - p;
+ else
+ n = strlen(p);
+
+ for (i = 0; i < t->entry_count; i++) {
+ e = t->entries[i];
+ if (e->name->len == n && !strncmp(p, e->name->data, n)) {
+ if (slash1 && !S_ISDIR(e->versions[1].mode))
+ /*
+ * If p names a file in some subdirectory, and a
+ * file or symlink matching the name of the
+ * parent directory of p exists, then p cannot
+ * exist and need not be deleted.
+ */
+ return 1;
+ if (!slash1 || !S_ISDIR(e->versions[1].mode))
+ goto del_entry;
+ if (!e->tree)
+ load_tree(e);
+ if (tree_content_remove(e, slash1 + 1, backup_leaf)) {
+ for (n = 0; n < e->tree->entry_count; n++) {
+ if (e->tree->entries[n]->versions[1].mode) {
+ hashclr(root->versions[1].sha1);
+ return 1;
+ }
+ }
+ backup_leaf = NULL;
+ goto del_entry;
+ }
+ return 0;
+ }
+ }
+ return 0;
+
+del_entry:
+ if (backup_leaf)
+ memcpy(backup_leaf, e, sizeof(*backup_leaf));
+ else if (e->tree)
+ release_tree_content_recursive(e->tree);
+ e->tree = NULL;
+ e->versions[1].mode = 0;
+ hashclr(e->versions[1].sha1);
+ hashclr(root->versions[1].sha1);
+ return 1;
+}
+
+static int tree_content_get(
+ struct fi_tree_entry *root,
+ const char *p,
+ struct fi_tree_entry *leaf)
+{
+ struct fi_tree *t = root->tree;
+ const char *slash1;
+ unsigned int i, n;
+ struct fi_tree_entry *e;
+
+ slash1 = strchr(p, '/');
+ if (slash1)
+ n = slash1 - p;
+ else
+ n = strlen(p);
+
+ for (i = 0; i < t->entry_count; i++) {
+ e = t->entries[i];
+ if (e->name->len == n && !strncmp(p, e->name->data, n)) {
+ if (!slash1) {
+ memcpy(leaf, e, sizeof(*leaf));
+ if (e->tree && is_null_sha1(e->versions[1].sha1))
+ leaf->tree = dup_tree_content(e->tree);
+ else
+ leaf->tree = NULL;
+ return 1;
+ }
+ if (!S_ISDIR(e->versions[1].mode))
+ return 0;
+ if (!e->tree)
+ load_tree(e);
+ return tree_content_get(e, slash1 + 1, leaf);
+ }
+ }
+ return 0;
+}
+
+/* Parse the optional mark from the stream and return the associated atom. */
+static struct fi_atom *parse_mark(void)
+{
+ struct fi_atom *atom = NULL;
+
+ if (!prefixcmp(command_buf.buf, "mark :")) {
+ atom = to_atom(command_buf.buf + 6, strlen(command_buf.buf + 6));
+ read_next_command();
+ }
+
+ return atom;
+}
+
+static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res)
+{
+ strbuf_reset(sb);
+
+ if (prefixcmp(command_buf.buf, "data "))
+ die("Expected 'data n' command, found: %s", command_buf.buf);
+
+ if (!prefixcmp(command_buf.buf + 5, "<<")) {
+ char *term = xstrdup(command_buf.buf + 5 + 2);
+ size_t term_len = command_buf.len - 5 - 2;
+
+ strbuf_detach(&command_buf, NULL);
+ for (;;) {
+ if (strbuf_getline(&command_buf, stdin, '\n') == EOF)
+ die("EOF in data (terminator '%s' not found)", term);
+ if (term_len == command_buf.len
+ && !strcmp(term, command_buf.buf))
+ break;
+ strbuf_addbuf(sb, &command_buf);
+ strbuf_addch(sb, '\n');
+ }
+ free(term);
+ }
+ else {
+ uintmax_t len = strtoumax(command_buf.buf + 5, NULL, 10);
+ size_t n = 0, length = (size_t)len;
+
+ if (limit && limit < len) {
+ *len_res = len;
+ return 0;
+ }
+ if (length < len)
+ die("data is too large to use in this context");
+
+ while (n < length) {
+ size_t s = strbuf_fread(sb, length - n, stdin);
+ if (!s && feof(stdin))
+ die("EOF in data (%lu bytes remaining)",
+ (unsigned long)(length - n));
+ n += s;
+ }
+ }
+
+ read_next_command();
+ skip_optional_lf();
+ return 1;
+}
+
+static int validate_raw_date(const char *src, char *result, int maxlen)
+{
+ const char *orig_src = src;
+ char *endp;
+ unsigned long num;
+
+ errno = 0;
+
+ num = strtoul(src, &endp, 10);
+ /* NEEDSWORK: perhaps check for reasonable values? */
+ if (errno || endp == src || *endp != ' ')
+ return -1;
+
+ src = endp + 1;
+ if (*src != '-' && *src != '+')
+ return -1;
+
+ num = strtoul(src + 1, &endp, 10);
+ if (errno || endp == src + 1 || *endp || (endp - orig_src) >= maxlen ||
+ 1400 < num)
+ return -1;
+
+ strcpy(result, orig_src);
+ return 0;
+}
+
+static char *parse_ident(const char *buf)
+{
+ const char *gt;
+ size_t name_len;
+ char *ident;
+
+ gt = strrchr(buf, '>');
+ if (!gt)
+ die("Missing > in ident string: %s", buf);
+ gt++;
+ if (*gt != ' ')
+ die("Missing space after > in ident string: %s", buf);
+ gt++;
+ name_len = gt - buf;
+ ident = xmalloc(name_len + 24);
+ strncpy(ident, buf, name_len);
+
+ switch (whenspec) {
+ case WHENSPEC_RAW:
+ if (validate_raw_date(gt, ident + name_len, 24) < 0)
+ die("Invalid raw date \"%s\" in ident: %s", gt, buf);
+ break;
+ case WHENSPEC_RFC2822:
+ if (parse_date(gt, ident + name_len, 24) < 0)
+ die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
+ break;
+ case WHENSPEC_NOW:
+ if (strcmp("now", gt))
+ die("Date in ident must be 'now': %s", buf);
+ datestamp(ident + name_len, 24);
+ break;
+ }
+
+ return ident;
+}
+
+static void parse_and_store_blob(
+ struct last_object *last,
+ unsigned char *sha1out,
+ struct fi_atom *atom)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ uintmax_t len;
+
+ if (parse_data(&buf, big_file_threshold, &len))
+ store_object(OBJ_BLOB, &buf, last, sha1out, atom);
+ else {
+ if (last) {
+ strbuf_release(&last->data);
+ last->offset = 0;
+ last->depth = 0;
+ }
+ stream_blob(len, sha1out, atom);
+ skip_optional_lf();
+ }
+}
+
+static void unload_one_branch(void)
+{
+ while (cur_active_branches
+ && cur_active_branches >= max_active_branches) {
+ struct fi_branch *e, *p = active_branches;
+
+ if (p) {
+ e = p->active_next_branch;
+ p->active_next_branch = e->active_next_branch;
+ } else {
+ e = active_branches;
+ active_branches = e->active_next_branch;
+ }
+ e->active = 0;
+ e->active_next_branch = NULL;
+ if (e->branch_tree.tree) {
+ release_tree_content_recursive(e->branch_tree.tree);
+ e->branch_tree.tree = NULL;
+ }
+ cur_active_branches--;
+ }
+}
+
+static void load_branch(struct fi_branch *b)
+{
+ load_tree(&b->branch_tree);
+ if (!b->active) {
+ b->active = 1;
+ b->active_next_branch = active_branches;
+ active_branches = b;
+ cur_active_branches++;
+ }
+}
+
+static unsigned char convert_num_notes_to_fanout(uintmax_t num_notes)
+{
+ unsigned char fanout = 0;
+ while ((num_notes >>= 8))
+ fanout++;
+ return fanout;
+}
+
+static void construct_path_with_fanout(const char *hex_sha1,
+ unsigned char fanout, char *path)
+{
+ unsigned int i = 0, j = 0;
+ if (fanout >= 20)
+ die("Too large fanout (%u)", fanout);
+ while (fanout) {
+ path[i++] = hex_sha1[j++];
+ path[i++] = hex_sha1[j++];
+ path[i++] = '/';
+ fanout--;
+ }
+ memcpy(path + i, hex_sha1 + j, 40 - j);
+ path[i + 40 - j] = '\0';
+}
+
+static uintmax_t do_change_note_fanout(
+ struct fi_tree_entry *orig_root, struct fi_tree_entry *root,
+ char *hex_sha1, unsigned int hex_sha1_len,
+ char *fullpath, unsigned int fullpath_len,
+ unsigned char fanout)
+{
+ struct fi_tree *t = root->tree;
+ struct fi_tree_entry *e, leaf;
+ unsigned int i, tmp_hex_sha1_len, tmp_fullpath_len;
+ uintmax_t num_notes = 0;
+ unsigned char sha1[20];
+ char realpath[60];
+
+ for (i = 0; t && i < t->entry_count; i++) {
+ e = t->entries[i];
+ tmp_hex_sha1_len = hex_sha1_len + e->name->len;
+ tmp_fullpath_len = fullpath_len;
+
+ /*
+ * We're interested in EITHER existing note entries (entries
+ * with exactly 40 hex chars in path, not including directory
+ * separators), OR directory entries that may contain note
+ * entries (with < 40 hex chars in path).
+ * Also, each path component in a note entry must be a multiple
+ * of 2 chars.
+ */
+ if (!e->versions[1].mode ||
+ tmp_hex_sha1_len > 40 ||
+ e->name->len % 2)
+ continue;
+
+ /* This _may_ be a note entry, or a subdir containing notes */
+ memcpy(hex_sha1 + hex_sha1_len, e->name->data,
+ e->name->len);
+ if (tmp_fullpath_len)
+ fullpath[tmp_fullpath_len++] = '/';
+ memcpy(fullpath + tmp_fullpath_len, e->name->data,
+ e->name->len);
+ tmp_fullpath_len += e->name->len;
+ fullpath[tmp_fullpath_len] = '\0';
+
+ if (tmp_hex_sha1_len == 40 && !get_sha1_hex(hex_sha1, sha1)) {
+ /* This is a note entry */
+ construct_path_with_fanout(hex_sha1, fanout, realpath);
+ if (!strcmp(fullpath, realpath)) {
+ /* Note entry is in correct location */
+ num_notes++;
+ continue;
+ }
+
+ /* Rename fullpath to realpath */
+ if (!tree_content_remove(orig_root, fullpath, &leaf))
+ die("Failed to remove path %s", fullpath);
+ tree_content_set(orig_root, realpath,
+ leaf.versions[1].sha1,
+ leaf.versions[1].mode,
+ leaf.tree);
+ } else if (S_ISDIR(e->versions[1].mode)) {
+ /* This is a subdir that may contain note entries */
+ if (!e->tree)
+ load_tree(e);
+ num_notes += do_change_note_fanout(orig_root, e,
+ hex_sha1, tmp_hex_sha1_len,
+ fullpath, tmp_fullpath_len, fanout);
+ }
+
+ /* The above may have reallocated the current tree_content */
+ t = root->tree;
+ }
+ return num_notes;
+}
+
+static uintmax_t change_note_fanout(struct fi_tree_entry *root,
+ unsigned char fanout)
+{
+ char hex_sha1[40], path[60];
+ return do_change_note_fanout(root, root, hex_sha1, 0, path, 0, fanout);
+}
+
+static void file_change_m(struct fi_branch *b)
+{
+ const char *p = command_buf.buf + 2;
+ static struct strbuf uq = STRBUF_INIT;
+ const char *endp;
+ struct fi_object *oe = NULL;
+ unsigned char sha1[20];
+ unsigned int mode, inline_data = 0;
+
+ p = decode_tree_mode(p, &mode);
+ if (!p)
+ die("Corrupt mode: %s", command_buf.buf);
+ switch (mode) {
+ case 0644:
+ case 0755:
+ mode |= S_IFREG;
+ case S_IFREG | 0644:
+ case S_IFREG | 0755:
+ case S_IFLNK:
+ case S_IFDIR:
+ case S_IFGITLINK:
+ /* ok */
+ break;
+ default:
+ die("Corrupt mode: %s", command_buf.buf);
+ }
+
+ if (*p == ':') {
+ p = find_mark(p + 1, sha1);
+ } else if (!prefixcmp(p, "inline")) {
+ inline_data = 1;
+ p += 6;
+ } else {
+ if (get_sha1_hex(p, sha1))
+ die("Invalid SHA1: %s", command_buf.buf);
+ p += 40;
+ }
+ if (*p++ != ' ')
+ die("Missing space after SHA1: %s", command_buf.buf);
+
+ strbuf_reset(&uq);
+ if (!unquote_c_style(&uq, p, &endp)) {
+ if (*endp)
+ die("Garbage after path in: %s", command_buf.buf);
+ p = uq.buf;
+ }
+
+ if (S_ISGITLINK(mode)) {
+ if (inline_data)
+ die("Git links cannot be specified 'inline': %s",
+ command_buf.buf);
+ else if (oe) {
+ if (oe->type != OBJ_COMMIT)
+ die("Not a commit (actually a %s): %s",
+ typename(oe->type), command_buf.buf);
+ }
+ /*
+ * Accept the sha1 without checking; it expected to be in
+ * another repository.
+ */
+ } else if (inline_data) {
+ if (S_ISDIR(mode))
+ die("Directories cannot be specified 'inline': %s",
+ command_buf.buf);
+ if (p != uq.buf) {
+ strbuf_addstr(&uq, p);
+ p = uq.buf;
+ }
+ read_next_command();
+ parse_and_store_blob(&last_blob, sha1, 0);
+ } else {
+ enum object_type expected = S_ISDIR(mode) ?
+ OBJ_TREE: OBJ_BLOB;
+ enum object_type type = fi_sha1_object_type(sha1);
+ if (type < 0)
+ die("%s not found: %s",
+ S_ISDIR(mode) ? "Tree" : "Blob",
+ command_buf.buf);
+ if (type != expected)
+ die("Not a %s (actually a %s): %s %s",
+ typename(expected), typename(type),
+ command_buf.buf, sha1_to_hex(sha1));
+ }
+
+ tree_content_set(&b->branch_tree, p, sha1, mode, NULL);
+}
+
+static void file_change_d(struct fi_branch *b)
+{
+ const char *p = command_buf.buf + 2;
+ static struct strbuf uq = STRBUF_INIT;
+ const char *endp;
+
+ strbuf_reset(&uq);
+ if (!unquote_c_style(&uq, p, &endp)) {
+ if (*endp)
+ die("Garbage after path in: %s", command_buf.buf);
+ p = uq.buf;
+ }
+ tree_content_remove(&b->branch_tree, p, NULL);
+}
+
+static void file_change_cr(struct fi_branch *b, int rename)
+{
+ const char *s, *d;
+ static struct strbuf s_uq = STRBUF_INIT;
+ static struct strbuf d_uq = STRBUF_INIT;
+ const char *endp;
+ struct fi_tree_entry leaf;
+
+ s = command_buf.buf + 2;
+ strbuf_reset(&s_uq);
+ if (!unquote_c_style(&s_uq, s, &endp)) {
+ if (*endp != ' ')
+ die("Missing space after source: %s", command_buf.buf);
+ } else {
+ endp = strchr(s, ' ');
+ if (!endp)
+ die("Missing space after source: %s", command_buf.buf);
+ strbuf_add(&s_uq, s, endp - s);
+ }
+ s = s_uq.buf;
+
+ endp++;
+ if (!*endp)
+ die("Missing dest: %s", command_buf.buf);
+
+ d = endp;
+ strbuf_reset(&d_uq);
+ if (!unquote_c_style(&d_uq, d, &endp)) {
+ if (*endp)
+ die("Garbage after dest in: %s", command_buf.buf);
+ d = d_uq.buf;
+ }
+
+ memset(&leaf, 0, sizeof(leaf));
+ if (rename)
+ tree_content_remove(&b->branch_tree, s, &leaf);
+ else
+ tree_content_get(&b->branch_tree, s, &leaf);
+ if (!leaf.versions[1].mode)
+ die("Path %s not in branch", s);
+ tree_content_set(&b->branch_tree, d,
+ leaf.versions[1].sha1,
+ leaf.versions[1].mode,
+ leaf.tree);
+}
+
+static void note_change_n(struct fi_branch *b, unsigned char old_fanout)
+{
+ const char *p = command_buf.buf + 2;
+ static struct strbuf uq = STRBUF_INIT;
+ struct fi_object *oe = oe;
+ struct fi_branch *s;
+ unsigned char sha1[20], commit_sha1[20];
+ char path[60];
+ uint16_t inline_data = 0;
+ unsigned char new_fanout;
+
+ /* <dataref> or 'inline' */
+ if (*p == ':') {
+ p = find_mark(p + 1, sha1);
+ } else if (!prefixcmp(p, "inline")) {
+ inline_data = 1;
+ p += 6;
+ } else {
+ if (get_sha1_hex(p, sha1))
+ die("Invalid SHA1: %s", command_buf.buf);
+ oe = find_object(sha1);
+ p += 40;
+ }
+ if (*p++ != ' ')
+ die("Missing space after SHA1: %s", command_buf.buf);
+
+ /* <committish> */
+ s = lookup_branch(p);
+ if (s) {
+ hashcpy(commit_sha1, s->sha1);
+ } else if (*p == ':') {
+ find_mark(p + 1, commit_sha1);
+ } else if (!get_sha1(p, commit_sha1)) {
+ enum object_type type;
+ type = fi_sha1_object_type(commit_sha1);
+ if (type != OBJ_COMMIT)
+ die("Can only add notes to commits");
+ } else
+ die("Invalid ref name or SHA1 expression: %s", p);
+
+ if (inline_data) {
+ if (p != uq.buf) {
+ strbuf_addstr(&uq, p);
+ p = uq.buf;
+ }
+ read_next_command();
+ parse_and_store_blob(&last_blob, sha1, 0);
+ } else if (oe) {
+ if (oe->type != OBJ_BLOB)
+ die("Not a blob (actually a %s): %s",
+ typename(oe->type), command_buf.buf);
+ } else if (!is_null_sha1(sha1)) {
+ enum object_type type = fi_sha1_object_type(sha1);
+ if (type < 0)
+ die("Blob not found: %s", command_buf.buf);
+ if (type != OBJ_BLOB)
+ die("Not a blob (actually a %s): %s",
+ typename(type), command_buf.buf);
+ }
+
+ construct_path_with_fanout(sha1_to_hex(commit_sha1), old_fanout, path);
+ if (tree_content_remove(&b->branch_tree, path, NULL))
+ b->num_notes--;
+
+ if (is_null_sha1(sha1))
+ return; /* nothing to insert */
+
+ b->num_notes++;
+ new_fanout = convert_num_notes_to_fanout(b->num_notes);
+ construct_path_with_fanout(sha1_to_hex(commit_sha1), new_fanout, path);
+ tree_content_set(&b->branch_tree, path, sha1, S_IFREG | 0644, NULL);
+}
+
+static void file_change_deleteall(struct fi_branch *b)
+{
+ release_tree_content_recursive(b->branch_tree.tree);
+ hashclr(b->branch_tree.versions[0].sha1);
+ hashclr(b->branch_tree.versions[1].sha1);
+ load_tree(&b->branch_tree);
+ b->num_notes = 0;
+}
+
+static void parse_from_commit(struct fi_branch *b, char *buf, unsigned long size)
+{
+ if (!buf || size < 46)
+ die("Not a valid commit: %s", sha1_to_hex(b->sha1));
+ if (memcmp("tree ", buf, 5)
+ || get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
+ die("The commit %s is corrupt", sha1_to_hex(b->sha1));
+ hashcpy(b->branch_tree.versions[0].sha1,
+ b->branch_tree.versions[1].sha1);
+}
+
+static void parse_from_existing(struct fi_branch *b)
+{
+ if (is_null_sha1(b->sha1)) {
+ hashclr(b->branch_tree.versions[0].sha1);
+ hashclr(b->branch_tree.versions[1].sha1);
+ } else {
+ unsigned long size;
+ enum object_type type;
+ char *buf;
+
+ buf = fi_read_sha1_file(b->sha1, &type, &size);
+ parse_from_commit(b, buf, size);
+ free(buf);
+ }
+}
+
+static int parse_from(struct fi_branch *b)
+{
+ const char *from;
+ struct fi_branch *s;
+
+ if (prefixcmp(command_buf.buf, "from "))
+ return 0;
+
+ if (b->branch_tree.tree) {
+ release_tree_content_recursive(b->branch_tree.tree);
+ b->branch_tree.tree = NULL;
+ }
+
+ from = strchr(command_buf.buf, ' ') + 1;
+ s = lookup_branch(from);
+ if (b == s)
+ die("Can't create a branch from itself: %s", b->name);
+ else if (s) {
+ unsigned char *t = s->branch_tree.versions[1].sha1;
+ hashcpy(b->sha1, s->sha1);
+ hashcpy(b->branch_tree.versions[0].sha1, t);
+ hashcpy(b->branch_tree.versions[1].sha1, t);
+ } else if (*from == ':') {
+ find_mark(from + 1, b->sha1);
+ if (!is_null_sha1(b->sha1)) {
+ unsigned long size;
+ enum object_type type;
+ char *buf = fi_read_sha1_file(b->sha1, &type, &size);
+ parse_from_commit(b, buf, size);
+ free(buf);
+ } else
+ parse_from_existing(b);
+ } else if (!get_sha1(from, b->sha1))
+ parse_from_existing(b);
+ else
+ die("Invalid ref name or SHA1 expression: %s", from);
+
+ read_next_command();
+ return 1;
+}
+
+static struct hash_list *parse_merge(unsigned int *count)
+{
+ struct hash_list *list = NULL, *n, *e = e;
+ const char *from;
+ struct fi_branch *s;
+
+ *count = 0;
+ while (!prefixcmp(command_buf.buf, "merge ")) {
+ from = strchr(command_buf.buf, ' ') + 1;
+ n = xmalloc(sizeof(*n));
+ s = lookup_branch(from);
+ if (s)
+ hashcpy(n->sha1, s->sha1);
+ else if (*from == ':') {
+ find_mark(from + 1, n->sha1);
+ } else if (!get_sha1(from, n->sha1)) {
+ unsigned long size;
+ char *buf = read_object_with_reference(n->sha1,
+ commit_type, &size, n->sha1);
+ if (!buf || size < 46)
+ die("Not a valid commit: %s", from);
+ free(buf);
+ } else
+ die("Invalid ref name or SHA1 expression: %s", from);
+
+ n->next = NULL;
+ if (list)
+ e->next = n;
+ else
+ list = n;
+ e = n;
+ (*count)++;
+ read_next_command();
+ }
+ return list;
+}
+
+static int fi_command_commit(void)
+{
+ static struct strbuf msg = STRBUF_INIT;
+ static struct strbuf new_data = STRBUF_INIT;
+ struct fi_branch *b;
+ struct fi_atom *atom;
+ char *sp;
+ char *author = NULL;
+ char *committer = NULL;
+ struct hash_list *merge_list = NULL;
+ unsigned int merge_count;
+ unsigned char prev_fanout, new_fanout;
+
+ /* Obtain the branch name from the rest of our command */
+ sp = strchr(command_buf.buf, ' ') + 1;
+ b = lookup_branch(sp);
+ if (!b)
+ b = new_branch(sp);
+
+ read_next_command();
+ atom = parse_mark();
+ if (!prefixcmp(command_buf.buf, "author ")) {
+ author = parse_ident(command_buf.buf + 7);
+ read_next_command();
+ }
+ if (!prefixcmp(command_buf.buf, "committer ")) {
+ committer = parse_ident(command_buf.buf + 10);
+ read_next_command();
+ }
+ if (!committer)
+ die("Expected committer but didn't get one");
+ parse_data(&msg, 0, NULL);
+ //read_next_command();
+ parse_from(b);
+ merge_list = parse_merge(&merge_count);
+
+ /* ensure the branch is active/loaded */
+ if (!b->branch_tree.tree || !max_active_branches) {
+ unload_one_branch();
+ load_branch(b);
+ }
+
+ prev_fanout = convert_num_notes_to_fanout(b->num_notes);
+
+ /* file_change* */
+ while (command_buf.len > 0) {
+ if (!prefixcmp(command_buf.buf, "M "))
+ file_change_m(b);
+ else if (!prefixcmp(command_buf.buf, "D "))
+ file_change_d(b);
+ else if (!prefixcmp(command_buf.buf, "R "))
+ file_change_cr(b, 1);
+ else if (!prefixcmp(command_buf.buf, "C "))
+ file_change_cr(b, 0);
+ else if (!prefixcmp(command_buf.buf, "N "))
+ note_change_n(b, prev_fanout);
+ else if (!strcmp("deleteall", command_buf.buf))
+ file_change_deleteall(b);
+ else
+ break;
+
+ if (read_next_command() == EOF)
+ break;
+ }
+
+ new_fanout = convert_num_notes_to_fanout(b->num_notes);
+ if (new_fanout != prev_fanout)
+ b->num_notes = change_note_fanout(&b->branch_tree, new_fanout);
+
+ /* build the tree and the commit */
+ store_tree(&b->branch_tree);
+ hashcpy(b->branch_tree.versions[0].sha1,
+ b->branch_tree.versions[1].sha1);
+
+ strbuf_reset(&new_data);
+ strbuf_addf(&new_data, "tree %s\n",
+ sha1_to_hex(b->branch_tree.versions[1].sha1));
+ if (!is_null_sha1(b->sha1))
+ strbuf_addf(&new_data, "parent %s\n", sha1_to_hex(b->sha1));
+ while (merge_list) {
+ struct hash_list *next = merge_list->next;
+ strbuf_addf(&new_data, "parent %s\n", sha1_to_hex(merge_list->sha1));
+ free(merge_list);
+ merge_list = next;
+ }
+ strbuf_addf(&new_data,
+ "author %s\n"
+ "committer %s\n"
+ "\n",
+ author ? author : committer, committer);
+ strbuf_addbuf(&new_data, &msg);
+ free(author);
+ free(committer);
+
+ store_object(OBJ_COMMIT, &new_data, NULL, b->sha1, atom);
+
+ read_next_command();
+ return 0;
+}
+
+static int fi_command_tag(void)
+{
+ static struct strbuf msg = STRBUF_INIT;
+ static struct strbuf new_data = STRBUF_INIT;
+ char *sp;
+ const char *from;
+ char *tagger;
+ struct fi_branch *s;
+ struct fi_tag *t;
+ int hash;
+ unsigned char sha1[20];
+ enum object_type type;
+
+ /* Obtain the new tag name from the rest of our command */
+ sp = strchr(command_buf.buf, ' ') + 1;
+ hash = hc_str(sp, strlen(sp));
+
+ t = malloc(sizeof(struct fi_tag));
+ t->next_tag = lookup_hash(hash, &tags);
+ t->name = strdup(sp);
+ insert_hash(hash, t, &tags);
+
+ read_next_command();
+
+ /* from ... */
+ if (prefixcmp(command_buf.buf, "from "))
+ die("Expected from command, got %s", command_buf.buf);
+ from = strchr(command_buf.buf, ' ') + 1;
+ s = lookup_branch(from);
+ if (s) {
+ hashcpy(sha1, s->sha1);
+ } else if (*from == ':') {
+ find_mark(from + 1, sha1);
+ } else if (get_sha1(from, sha1)) {
+ die("Invalid ref name or SHA1 expression: %s", from);
+ }
+
+ type = fi_sha1_object_type(sha1);
+ read_next_command();
+
+ /* tagger ... */
+ if (!prefixcmp(command_buf.buf, "tagger ")) {
+ tagger = parse_ident(command_buf.buf + 7);
+ read_next_command();
+ } else
+ tagger = NULL;
+
+ /* tag payload/message */
+ parse_data(&msg, 0, NULL);
+
+ /* build the tag object */
+ strbuf_reset(&new_data);
+
+ strbuf_addf(&new_data,
+ "object %s\n"
+ "type %s\n"
+ "tag %s\n",
+ sha1_to_hex(sha1), typename(type), t->name);
+ if (tagger)
+ strbuf_addf(&new_data,
+ "tagger %s\n", tagger);
+ strbuf_addch(&new_data, '\n');
+ strbuf_addbuf(&new_data, &msg);
+ free(tagger);
+
+ store_object(OBJ_TAG, &new_data, NULL, t->sha1, NULL);
+
+ read_next_command();
+ return 0;
+}
+
+static int fi_command_reset(void)
+{
+ struct fi_branch *b;
+ char *sp;
+
+ /* Obtain the branch name from the rest of our command */
+ sp = strchr(command_buf.buf, ' ') + 1;
+ b = lookup_branch(sp);
+ if (b) {
+ hashclr(b->sha1);
+ hashclr(b->branch_tree.versions[0].sha1);
+ hashclr(b->branch_tree.versions[1].sha1);
+ if (b->branch_tree.tree) {
+ release_tree_content_recursive(b->branch_tree.tree);
+ b->branch_tree.tree = NULL;
+ }
+ }
+ else
+ b = new_branch(sp);
+ read_next_command();
+ parse_from(b);
+
+ return 0;
+}
+
+static int fi_command_blob(void)
+{
+ struct fi_atom *atom;
+
+ read_next_command();
+ atom = parse_mark();
+ parse_and_store_blob(&last_blob, NULL, atom);
+
+ read_next_command();
+ return 0;
+}
+
+static int fi_command_checkpoint(void)
+{
+ if (objects.nr) {
+ cycle_packfile();
+ }
+ skip_optional_lf();
+
+ read_next_command();
+ return 0;
+}
+
+static int fi_command_progress(void)
+{
+ skip_optional_lf();
+ read_next_command();
+ return 0;
+}
+
+static int fi_command_feature(void)
+{
+ char *feature = command_buf.buf + 8;
+
+ if (!prefixcmp(feature, "date-format=")) {
+ char *fmt = feature + 12;
+ if (!strcmp(fmt, "raw")) {
+ whenspec = WHENSPEC_RAW;
+ } else if (!strcmp(fmt, "rfc2822")) {
+ whenspec = WHENSPEC_RFC2822;
+ } else if (!strcmp(fmt, "now")) {
+ whenspec = WHENSPEC_NOW;
+ } else {
+ return 1;
+ }
+ } else if (!prefixcmp(feature, "force")) {
+ force_update = 1;
+ } else {
+ return 1;
+ }
+
+ read_next_command();
+ return 0;
+}
+
+static int fi_command_option(void)
+{
+ read_next_command();
+ return 0;
+}
+
+/* mark SP :<name> SP <value> LF */
+static int fi_command_mark(void)
+{
+ struct fi_atom *atom;
+ unsigned char sha1[20];
+ char *end = strchr(command_buf.buf + 6, ' ');
+
+ atom = to_atom(command_buf.buf + 6, end - command_buf.buf - 6);
+ if (get_sha1(end + 1, sha1))
+ die("Invalid mark command: %s", command_buf.buf);
+
+ insert_mark(atom, sha1);
+
+ read_next_command();
+ return 0;
+}
+
+/* List of commands we understand. */
+struct fi_command {
+ const char *cmd;
+ int (*func)(void);
+} fi_command[] = {
+ { "commit", fi_command_commit },
+ { "tag", fi_command_tag },
+ { "reset", fi_command_reset },
+ { "blob", fi_command_blob },
+ { "checkpoint", fi_command_checkpoint },
+ { "progress", fi_command_progress },
+ { "feature", fi_command_feature },
+ { "option", fi_command_option },
+ { "mark", fi_command_mark },
+};
+
+static struct fi_command *find_command(const char *cmd)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fi_command); ++i) {
+ if (!prefixcmp(command_buf.buf, fi_command[i].cmd)) {
+ return &fi_command[i];
+ }
+ }
+
+ return NULL;
+}
+
+int main(int argc, const char **argv)
+{
+ git_extract_argv0_path(argv[0]);
+
+ setup_git_directory();
+ git_config(git_pack_config, NULL);
+ if (!pack_compression_seen && core_compression_seen)
+ pack_compression_level = core_compression_level;
+
+ set_die_routine(die_nicely);
+
+ /* Initialize hash tables. */
+ init_hash(&atoms);
+ init_hash(&tags);
+ init_hash(&branches);
+ init_hash(&marks);
+ init_hash(&objects);
+
+ prepare_packed_git();
+ start_packfile();
+
+ read_next_command();
+ while (command_buf.len > 0) {
+ struct fi_command *cmd = find_command(command_buf.buf);
+ if (!cmd)
+ die("Unsupported command: %s", command_buf.buf);
+
+ int err = (*cmd->func)();
+ if (err)
+ die("Command failed");
+
+ fflush(stdout);
+ }
+
+ end_packfile();
+
+ return 0;
+}
--
1.7.3.37.gb6088b
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 6/6] Add git-remote-svn
2010-10-03 11:33 [RFC] New type of remote helpers Tomas Carnecky
` (4 preceding siblings ...)
2010-10-03 12:21 ` [PATCH 5/6] Introduce the git fast-import-helper Tomas Carnecky
@ 2010-10-03 12:21 ` Tomas Carnecky
2010-10-05 2:26 ` Jonathan Nieder
2010-10-03 13:56 ` [RFC] New type of remote helpers Sverre Rabbelier
6 siblings, 1 reply; 21+ messages in thread
From: Tomas Carnecky @ 2010-10-03 12:21 UTC (permalink / raw)
To: git; +Cc: Tomas Carnecky
This is an experimental git remote helper for svn repositories. It uses
the new git fast-import-helper. It only works with local svn repos (not
over network). It uses notes to save the git commit -> svn revision
mapping (refs/notes/svn).
This remote helper serves as a technology preview of what the new type
of remote helpers can do.
It assumes that the svn repo uses the standard layout (trunk, branches).
---
.gitignore | 1 +
Makefile | 1 +
git-remote-svn.py | 408 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 410 insertions(+), 0 deletions(-)
create mode 100644 git-remote-svn.py
diff --git a/.gitignore b/.gitignore
index c8aa8c7..0a6011c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,6 +114,7 @@
/git-remote-ftp
/git-remote-ftps
/git-remote-testgit
+/git-remote-svn
/git-repack
/git-replace
/git-repo-config
diff --git a/Makefile b/Makefile
index f8a9c40..eb959c9 100644
--- a/Makefile
+++ b/Makefile
@@ -387,6 +387,7 @@ SCRIPT_PERL += git-send-email.perl
SCRIPT_PERL += git-svn.perl
SCRIPT_PYTHON += git-remote-testgit.py
+SCRIPT_PYTHON += git-remote-svn.py
SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
$(patsubst %.perl,%,$(SCRIPT_PERL)) \
diff --git a/git-remote-svn.py b/git-remote-svn.py
new file mode 100644
index 0000000..c617743
--- /dev/null
+++ b/git-remote-svn.py
@@ -0,0 +1,408 @@
+#!/usr/bin/env python
+
+import sys, os, re, time, subprocess
+import svn.core, svn.repos, svn.fs
+
+ct_short = ['M', 'A', 'D', 'R', 'X']
+
+############################################################################
+# Class which encapsulates the fast import helper. Provides methods to
+# read/write data to it.
+class FastImportHelper:
+
+ def start(self):
+ PIPE = subprocess.PIPE
+ args = ['git', 'fast-import-helper']
+ self.helper = subprocess.Popen(args, stdin=PIPE, stdout=PIPE)
+
+ def close(self):
+ self.helper.stdin.close()
+ self.helper.wait()
+ del self.helper
+
+ def write(self, data):
+ self.helper.stdin.write(data)
+
+ # Expect the helper to write the mark mapping to its stdout. Verify that
+ # the mark is the same that we've given and return the git object name
+ def response(self, mark):
+ line = self.helper.stdout.readline().strip().split(' ')
+ assert(str(line[1][1:]) == str(mark))
+
+ return line[2]
+
+ # Make a commit from the given arguments, return the git object name
+ # corresponding to that just created commit
+ def commit(self, mark, ref, parents, author, committer, changes, message):
+ self.write("commit %s\n" % ref)
+ self.write("mark :%s\n" % mark)
+ if author:
+ self.write("author %s %s -0000\n" % author)
+ self.write("committer %s %s -0000\n" % committer)
+ self.write("data %s\n" % len(message))
+ self.write(message)
+
+ parent = parents.pop(0)
+ if parent:
+ self.write("from %s\n" % parent)
+ for parent in parents:
+ self.write("merge %s\n", parent)
+
+ self.write(''.join(changes))
+
+ # Make it happen
+ self.write("\n")
+ return self.response(mark)
+
+ # Create a blob from the given arguments. 'read' is callable object
+ # which returns data. Return the git object name
+ def blob(self, mark, length, read):
+ self.write("blob\nmark :%s\n" % mark)
+ self.write("data %s\n" % length)
+
+ while length > 0:
+ avail = min(length, 4096)
+ data = read(avail)
+ err = self.write(data)
+ length -= avail
+
+ # Make it happen
+ self.write("\n")
+ return self.response(mark)
+
+
+
+############################################################################
+# Base class for python remote helpers. It handles the main command loop.
+# This class also manages the fast-import-helper and marks. If you want to
+# use the fih, call self.fih.start() first and after you're done call .close()
+class RemoteHelper(object):
+
+ def __init__(self, kind):
+ self.kind = kind
+ self.fih = FastImportHelper()
+
+ self.notes = []
+
+ # nfrom is the current notes commit, we'll need that later when
+ # adding new notes. Check if that ref exists, if not set nfrom
+ # to None, if yes, get the object name and store it in nfrom
+ argv = [ 'git', 'rev-parse', 'refs/notes/%s^0' % kind ]
+ PIPE = subprocess.PIPE
+ proc = subprocess.Popen(argv, stdout=PIPE, stderr=PIPE)
+ proc.wait()
+ if proc.returncode == 0:
+ self.nfrom = proc.stdout.readline().strip()
+ else:
+ self.nfrom = None
+
+ # The commands we understand
+ COMMANDS = ( 'capabilities', 'list', 'fetch', )
+
+ # Read next command. Raise an exception if the command is invalid.
+ # Return a tuple (command, args,)
+ def read_next_command(self):
+ line = sys.stdin.readline()
+ if not line:
+ return ( None, None, )
+
+ cmdline = line.strip().split()
+ if not cmdline:
+ return ( None, None, )
+
+ cmd = cmdline.pop(0)
+ if cmd not in self.COMMANDS:
+ raise Exception("Invalid command '%s'" % cmd)
+
+ return ( cmd, cmdline, )
+
+ # Run the remote helper, process commands until the end of the world. Or
+ # until we're told to finish.
+ def run(self):
+ while (True):
+ ( cmd, args, ) = self.read_next_command()
+ if cmd is None:
+ return
+
+ func = getattr(self, cmd, None)
+ if func is None or not callable(func):
+ raise Exception("Command '%s' not implemented" % cmd)
+
+ result = func(args)
+ sys.stdout.flush()
+
+
+ # Convenience method for writing data back to git
+ def reply(self, data):
+ sys.stdout.write(data)
+
+ # Return all refs and the contents of the note attached to each.
+ # This can be used by the remote helper to find out what the latest
+ # version is that we fetched into this repo.
+ # Returns list of tuples of (sha1, typename, refname, note,)
+ def refs(self):
+ refs = []
+
+ PIPE = subprocess.PIPE
+ args = [ 'git', 'for-each-ref' ]
+ gfer = subprocess.Popen(args, stdin=PIPE, stdout=PIPE)
+
+ # Regular expression for matching the output from g-f-e-r
+ pattern = re.compile(r"(.{40}) (\w+) (.*)")
+ while (True):
+ line = gfer.stdout.readline()
+ if not line:
+ break
+
+ match = pattern.match(line)
+
+ # The sha1 and name of the ref
+ sha1 = match.group(1)
+ typename = match.group(2)
+ refname = match.group(3)
+
+ # Extract the note using `git notes show <sha>`
+ git_notes_show = [ 'git', 'notes', 'show', sha1 ]
+
+ # Set GIT_NOTES_REF to point to the notes of our kind
+ env = { "GIT_NOTES_REF": "refs/notes/%s" % self.kind }
+
+ note = subprocess.Popen(git_notes_show, env=env, stdout=PIPE, stderr=PIPE)
+ refs.append(( sha1, typename, refname, note.stdout.readline() ))
+
+ gfer.wait()
+
+ return refs
+
+ # Attach text to an object. objects are currently limited to commits
+ def note(self, obj, text):
+ self.notes.append(( obj, text, ))
+ if len(self.notes) >= 10:
+ self.flush()
+
+ # Commit all outstanding notes. Don't forget to flush the notes before
+ # you close the fih
+ def flush(self):
+ if len(self.notes) == 0:
+ return
+
+ now = int(time.time())
+ mark = "%s-notes" % self.kind
+ ref = "refs/notes/%s" % self.kind
+ parents = [ self.nfrom ]
+ author = ( 'nobody <nobody@localhost>', now, )
+ committer = ( 'nobody <nobody@localhost>', now, )
+ message = "Update notes"
+
+ changes = []
+ for ( obj, text, ) in self.notes:
+ changes.append("N inline %s\ndata %s\n" % (obj, len(text)))
+ changes.append(text)
+ changes.append("\n")
+
+ self.nfrom = self.fih.commit(mark, ref, parents, author, committer, changes, message)
+ self.notes = []
+
+
+
+############################################################################
+# Remote helper for Subversion
+class RemoteHelperSubversion(RemoteHelper):
+
+ def __init__(self, url):
+ super(RemoteHelperSubversion, self).__init__("svn")
+
+ url = svn.core.svn_path_canonicalize(url)
+ self.repo = svn.repos.svn_repos_open(url)
+ self.fs = svn.repos.svn_repos_fs(self.repo)
+ self.uuid = svn.fs.svn_fs_get_uuid(self.fs)
+
+
+ # Here follow the commands this helper implements
+
+ # RH command 'capabilities'
+ def capabilities(self, args):
+ self.reply("list\nfetch\n\n")
+
+ # RH command 'list'
+ def list(self, args):
+ rev = svn.fs.svn_fs_youngest_rev(self.fs)
+ root = svn.fs.svn_fs_revision_root(self.fs, rev)
+
+ refs = self.discover(root)
+ for ( name, rev, ) in refs:
+ self.reply(":r%s %s\n" % ( rev, name, ))
+
+ if len(refs) > 0:
+ self.reply("@%s HEAD\n" % refs[0][0])
+ self.reply("\n")
+
+ # RH command 'fetch'
+ def fetch(self, args):
+ # Start the fast-import helper
+ self.fih.start()
+
+ # Fetches are done in batches. Process fetch lines until we see a
+ # blank newline
+ while args:
+ # The revision to fetch, strip the leading 'r' from 'r42'
+ new = int(args[0][1:])
+
+ # Trailing slash to ensure that it's a directory
+ prefix = "/%s/" % args[1]
+
+ ( sha1, old, ) = self.parent(args[1])
+ sys.stderr.write("Best parent: %s %s\n" % (old, new,))
+
+ if old != new:
+ sha1 = self.fi(prefix, old, new, sha1)
+ self.reply("map r%s %s\n" % ( new, sha1 ))
+
+ # Read next line, break if it's a newline (ending this fetch batch)
+ ( cmd, args, ) = self.read_next_command()
+ if not cmd:
+ break
+
+ self.flush()
+ self.fih.close()
+
+ # Before finishing this command, make sure to emit the 'silent'
+ # command to register the notes
+ self.reply("silent refs/notes/%s %s\n" % (self.kind, self.nfrom, ))
+
+ self.reply("\n")
+
+ # Discover all refs (trunk, braches) in the repository
+ def discover(self, root):
+ refs = []
+
+ # First check /trunk
+ entries = svn.fs.svn_fs_dir_entries(root, "/")
+ names = entries.keys()
+
+ if 'trunk' in names:
+ refs.append(( 'trunk', self.rev(root, '/trunk'), ))
+
+ if 'branches' in names:
+ entries = svn.fs.svn_fs_dir_entries(root, "/branches")
+ names = entries.keys()
+ for name in names:
+ refs.append(( 'branches/'+name, self.rev(root, '/branches/%s' % name), ))
+
+ return refs
+
+ # Get the revision when `path` was last modified
+ def rev(self, root, path):
+ history = svn.fs.svn_fs_node_history(root, path)
+
+ # Yes, this is required.
+ history = svn.fs.svn_fs_history_prev(history, True)
+ if not history:
+ return 1
+
+ ( path, rev, ) = svn.fs.svn_fs_history_location(history)
+ return rev
+
+
+ # Find the git commit we can use as parent when importing from the
+ # repo with the given prefix. All commits imported from svn
+ # have a note attached which contains this information. But to make our
+ # job easier, we only scan ref heads and not the whole history.
+ # Go through all refs, see which one has a note that matches the given
+ # prefix and extract the svn revision number from the note.
+ # Return a tuple (sha1, rev,) which identifies the git commit and svn
+ # revision.
+ def parent(self, prefix):
+ pattern = re.compile(r"([0-9a-h-]+)/([^@]*)@(\d+)")
+ res = []
+ for ( sha1, typename, name, note, ) in self.refs():
+ if typename != "commit":
+ continue
+
+ match = pattern.match(note)
+ if not match:
+ continue
+
+ if match.group(2) == prefix and match.group(1) == self.uuid:
+ rev = int(match.group(3))
+ res.append(( sha1, rev ))
+
+ if len(res) == 0:
+ return ( None, 1, )
+
+ res.sort(lambda a,b: a[1] < b[1])
+ return res[0]
+
+
+ # Run fast import of revision `old` up to `new`, only considering files
+ # under the given prefix. Use `sha1` as the parent of the first commit.
+ # Return the git commit name that corresponds to the last revision so
+ # we can report it back to git.
+ def fi(self, prefix, old, new, sha1):
+ for rev in xrange(old or 1, new + 1):
+ sha1 = self.feed(rev, prefix, sha1)
+
+ return sha1
+
+
+ # Feed the fast-import helper with the given revision
+ def feed(self, rev, prefix, sha1):
+ # Open the root at that revision and get the changes
+ root = svn.fs.svn_fs_revision_root(self.fs, rev)
+ changes = svn.fs.svn_fs_paths_changed(root)
+
+ i, file_changes = 1, []
+ for path, change_type in changes.iteritems():
+ if svn.fs.svn_fs_is_dir(root, path):
+ continue
+ if not path.startswith(prefix):
+ continue
+
+ realpath = path.replace(prefix, '')
+
+ c_t = ct_short[change_type.change_kind]
+ if c_t == 'D':
+ file_changes.append("D %s\n" % realpath)
+ else:
+ file_changes.append("M 644 :%s %s\n" % (i, realpath))
+
+ length = int(svn.fs.svn_fs_file_length(root, path))
+ stream = svn.fs.svn_fs_file_contents(root, path)
+ read = lambda x: svn.core.svn_stream_read(stream, x)
+ self.fih.blob(i, length, read)
+ svn.core.svn_stream_close(stream)
+ i += 1
+
+ if len(file_changes) == 0:
+ return sha1
+
+ props = svn.fs.svn_fs_revision_proplist(self.fs, rev)
+
+ # Collect all the needed information to create the commit
+ mark = str(rev)
+ ref = "refs/heads/master"
+ parents = [ sha1 ]
+
+ svndate = props['svn:date'][0:-8]
+ commit_time = time.mktime(time.strptime(svndate, '%Y-%m-%dT%H:%M:%S'))
+
+ if props.has_key('svn:author'):
+ author = "%s <%s@localhost>" % (props['svn:author'], props['svn:author'])
+ else:
+ author = 'nobody <nobody@localhost>'
+
+ committer = ( author, int(commit_time), )
+ message = props['svn:log']
+
+ sha1 = self.fih.commit(mark, ref, parents, None, committer, file_changes, message)
+
+ note = "%s%s@%s\n" % (svn.fs.svn_fs_get_uuid(self.fs), prefix[:-1], rev)
+ self.note(sha1, note)
+
+ return sha1
+
+
+
+if __name__ == '__main__':
+ helper = RemoteHelperSubversion(sys.argv[2])
+ helper.run()
--
1.7.3.37.gb6088b
^ permalink raw reply related [flat|nested] 21+ messages in thread