git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Support multiple virtual repositories with a single object store and refs
@ 2011-05-24  1:02 Josh Triplett
  2011-05-24 19:59 ` Jeff King
  0 siblings, 1 reply; 4+ messages in thread
From: Josh Triplett @ 2011-05-24  1:02 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jamey Sharp

Given many repositories with copies of the same objects (such as
branches of the same source), sharing a common object store will avoid
duplication.  Alternates provide a single baseline, but don't handle
ongoing activity in the various repositories.  Git safely handles
concurrent accesses to the same object store across repositories, but
operations such as gc need to know about all of the refs.

This change adds support in upload-pack and receive-pack to simulate
multiple virtual repositories within the object store and references of
a single underlying repository.  The refs and heads of the virtual
repositories get stored in the underlying repository using prefixed
names specified by the --ref-prefix and --head options; for instance,
--ref-prefix=repo1/ will use refs/repo1/heads/* and refs/repo1/tags/*.
upload-pack and receive-pack will not expose any references that do not
match the specified prefix.

These options implement the underlying mechanism for virtual
repositories; the higher-level protocol handler (such as http-backend or
a custom server) can pass these options when invoking upload-pack or
receive-pack, providing values based on components of the repository
path.  For a simple local test, git-remote-ext works:

git clone ext::'git %s --ref-prefix=prefix/ --head=prefix-HEAD /tmp/prefixed.git'

Commit by Josh Triplett and Jamey Sharp.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
---
 builtin/receive-pack.c |   38 +++++++++++++++++++++++++++++---------
 upload-pack.c          |   34 +++++++++++++++++++++++++++-------
 2 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index e1ba4dc..45d0b35 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -34,6 +34,8 @@ static int prefer_ofs_delta = 1;
 static int auto_update_server_info;
 static int auto_gc = 1;
 static const char *head_name;
+static const char *head_path = "HEAD";
+static const char *ref_prefix = "refs/";
 static int sent_capabilities;
 
 static enum deny_action parse_deny_action(const char *var, const char *value)
@@ -108,11 +110,12 @@ static int receive_pack_config(const char *var, const char *value, void *cb)
 
 static int show_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
+	const char *refnameprefix = cb_data;
 	if (sent_capabilities)
-		packet_write(1, "%s %s\n", sha1_to_hex(sha1), path);
+		packet_write(1, "%s %s%s\n", sha1_to_hex(sha1), refnameprefix, path);
 	else
-		packet_write(1, "%s %s%c%s%s\n",
-			     sha1_to_hex(sha1), path, 0,
+		packet_write(1, "%s %s%s%c%s%s\n",
+			     sha1_to_hex(sha1), refnameprefix, path, 0,
 			     " report-status delete-refs side-band-64k",
 			     prefer_ofs_delta ? " ofs-delta" : "");
 	sent_capabilities = 1;
@@ -121,9 +124,9 @@ static int show_ref(const char *path, const unsigned char *sha1, int flag, void
 
 static void write_head_info(void)
 {
-	for_each_ref(show_ref, NULL);
+	for_each_ref_in(ref_prefix, show_ref, "refs/");
 	if (!sent_capabilities)
-		show_ref("capabilities^{}", null_sha1, 0, NULL);
+		show_ref("capabilities^{}", null_sha1, 0, "");
 
 }
 
@@ -332,6 +335,8 @@ static void refuse_unconfigured_deny_delete_current(void)
 static const char *update(struct command *cmd)
 {
 	const char *name = cmd->ref_name;
+	struct strbuf prefixed_name_buf = STRBUF_INIT;
+	const char *prefixed_name;
 	unsigned char *old_sha1 = cmd->old_sha1;
 	unsigned char *new_sha1 = cmd->new_sha1;
 	struct ref_lock *lock;
@@ -342,7 +347,12 @@ static const char *update(struct command *cmd)
 		return "funny refname";
 	}
 
-	if (is_ref_checked_out(name)) {
+	strbuf_addf(&prefixed_name_buf, "%s%s", ref_prefix, name + 5);
+	prefixed_name = strbuf_detach(&prefixed_name_buf, NULL);
+
+	rp_warning("name \"%s\", prefixed_name \"%s\"", name, prefixed_name);
+
+	if (is_ref_checked_out(prefixed_name)) {
 		switch (deny_current_branch) {
 		case DENY_IGNORE:
 			break;
@@ -370,7 +380,7 @@ static const char *update(struct command *cmd)
 			return "deletion prohibited";
 		}
 
-		if (!strcmp(name, head_name)) {
+		if (!strcmp(prefixed_name, head_name)) {
 			switch (deny_delete_current) {
 			case DENY_IGNORE:
 				break;
@@ -426,14 +436,14 @@ static const char *update(struct command *cmd)
 			rp_warning("Allowing deletion of corrupt ref.");
 			old_sha1 = NULL;
 		}
-		if (delete_ref(name, old_sha1, 0)) {
+		if (delete_ref(prefixed_name, old_sha1, 0)) {
 			rp_error("failed to delete %s", name);
 			return "failed to delete";
 		}
 		return NULL; /* good */
 	}
 	else {
-		lock = lock_any_ref_for_update(name, old_sha1, 0);
+		lock = lock_any_ref_for_update(prefixed_name, old_sha1, 0);
 		if (!lock) {
 			rp_error("failed to lock %s", name);
 			return "failed to lock";
@@ -760,6 +770,16 @@ int cmd_receive_pack(int argc, const char **argv, const char *prefix)
 				advertise_refs = 1;
 				continue;
 			}
+			if (!prefixcmp(arg, "--head=")) {
+				head_path = arg+7;
+				continue;
+			}
+			if (!prefixcmp(arg, "--ref-prefix=")) {
+				struct strbuf prefixbuf = STRBUF_INIT;
+				strbuf_addf(&prefixbuf, "refs/%s", arg+13);
+				ref_prefix = strbuf_detach(&prefixbuf, NULL);
+				continue;
+			}
 			if (!strcmp(arg, "--stateless-rpc")) {
 				stateless_rpc = 1;
 				continue;
diff --git a/upload-pack.c b/upload-pack.c
index ce5cbbe..a1e495f 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -34,6 +34,8 @@ static int shallow_nr;
 static struct object_array have_obj;
 static struct object_array want_obj;
 static struct object_array extra_edge_obj;
+static const char *head_path = "HEAD";
+static const char *ref_prefix = "";
 static unsigned int timeout;
 /* 0 for no sideband,
  * otherwise maximum packet size (up to 65520 bytes).
@@ -640,17 +642,18 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo
 	static const char *capabilities = "multi_ack thin-pack side-band"
 		" side-band-64k ofs-delta shallow no-progress"
 		" include-tag multi_ack_detailed";
+	const char *refnameprefix = cb_data;
 	struct object *o = parse_object(sha1);
 
 	if (!o)
 		die("git upload-pack: cannot find object %s:", sha1_to_hex(sha1));
 
 	if (capabilities)
-		packet_write(1, "%s %s%c%s%s\n", sha1_to_hex(sha1), refname,
+		packet_write(1, "%s %s%s%c%s%s\n", sha1_to_hex(sha1), refnameprefix, refname,
 			     0, capabilities,
 			     stateless_rpc ? " no-done" : "");
 	else
-		packet_write(1, "%s %s\n", sha1_to_hex(sha1), refname);
+		packet_write(1, "%s %s%s\n", sha1_to_hex(sha1), refnameprefix, refname);
 	capabilities = NULL;
 	if (!(o->flags & OUR_REF)) {
 		o->flags |= OUR_REF;
@@ -659,7 +662,7 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo
 	if (o->type == OBJ_TAG) {
 		o = deref_tag(o, refname, 0);
 		if (o)
-			packet_write(1, "%s %s^{}\n", sha1_to_hex(o->sha1), refname);
+			packet_write(1, "%s %s%s^{}\n", sha1_to_hex(o->sha1), refnameprefix, refname);
 	}
 	return 0;
 }
@@ -678,15 +681,24 @@ static int mark_our_ref(const char *refname, const unsigned char *sha1, int flag
 
 static void upload_pack(void)
 {
+	struct strbuf prefix = STRBUF_INIT;
+	unsigned char sha1[20];
+	int flag;
+
+	strbuf_addf(&prefix, "refs/%s", ref_prefix);
 	if (advertise_refs || !stateless_rpc) {
 		reset_timeout();
-		head_ref(send_ref, NULL);
-		for_each_ref(send_ref, NULL);
+		if (resolve_ref(head_path, sha1, 1, &flag))
+			send_ref("HEAD", sha1, flag, "");
+		for_each_ref_in(prefix.buf, send_ref, "refs/");
 		packet_flush(1);
 	} else {
-		head_ref(mark_our_ref, NULL);
-		for_each_ref(mark_our_ref, NULL);
+		if (resolve_ref(head_path, sha1, 1, &flag))
+			mark_our_ref("HEAD", sha1, flag, NULL);
+		for_each_ref_in(prefix.buf, mark_our_ref, NULL);
 	}
+	strbuf_release(&prefix);
+
 	if (advertise_refs)
 		return;
 
@@ -716,6 +728,14 @@ int main(int argc, char **argv)
 			advertise_refs = 1;
 			continue;
 		}
+		if (!prefixcmp(arg, "--head=")) {
+			head_path = arg+7;
+			continue;
+		}
+		if (!prefixcmp(arg, "--ref-prefix=")) {
+			ref_prefix = arg+13;
+			continue;
+		}
 		if (!strcmp(arg, "--stateless-rpc")) {
 			stateless_rpc = 1;
 			continue;
-- 
1.7.5.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] Support multiple virtual repositories with a single object store and refs
  2011-05-24  1:02 [PATCH] Support multiple virtual repositories with a single object store and refs Josh Triplett
@ 2011-05-24 19:59 ` Jeff King
  2011-05-24 22:47   ` Josh Triplett
  0 siblings, 1 reply; 4+ messages in thread
From: Jeff King @ 2011-05-24 19:59 UTC (permalink / raw)
  To: Josh Triplett; +Cc: git, Junio C Hamano, Jamey Sharp

On Mon, May 23, 2011 at 06:02:52PM -0700, Josh Triplett wrote:

> Given many repositories with copies of the same objects (such as
> branches of the same source), sharing a common object store will avoid
> duplication.  Alternates provide a single baseline, but don't handle
> ongoing activity in the various repositories.  Git safely handles
> concurrent accesses to the same object store across repositories, but
> operations such as gc need to know about all of the refs.
> 
> This change adds support in upload-pack and receive-pack to simulate
> multiple virtual repositories within the object store and references of
> a single underlying repository.

Neat idea. It is important to note, though, that it is possible to leak
information between virtual repos that share the same object store. You
can't directly say "give me object ABCD" if you don't have a ref to it,
but you can do some other sneaky things like:

  1. Claiming to push ABCD, at which point the server will optimize out
     the need for you to actually send it. Now you have a ref to ABCD
     and can fetch it (claiming not to have it, of course).

  2. Requesting other refs, claiming that you have ABCD, at which point
     the server may generates deltas against ABCD.

Both are problems with alternates, too, of course. But in the case of
alternates, you can share only a subset of the objects. So every day or
so, you could pack all of the objects that _all_ repos can see into one
big alternates repo, and then each "leaf" repo contains any objects
private to itself.

Of course none of this is a concern if you are just hosting public
repositories, or everyone who gets to see one virtual repo can see
what's in other ones (e.g., everybody is sharing objects within one
organization).

But it may make sense to touch on these issues in the documentation
(which also needs to be written at all :) ).

> The refs and heads of the virtual repositories get stored in the
> underlying repository using prefixed names specified by the
> --ref-prefix and --head options; for instance, --ref-prefix=repo1/
> will use refs/repo1/heads/* and refs/repo1/tags/*.  upload-pack and
> receive-pack will not expose any references that do not match the
> specified prefix.

You have a namespace clash if a repo is named "heads" or "tags" or
"remotes". Should we give it its own namespace, like:

  refs/virtual/repo1/heads/*

?

Also, it seems conceptually simpler to me if it's a straight prefix.
IOW, "refs/heads/foo" in repo1 becomes:

  refs/virtual/repo1/refs/heads/foo

Then if we are operating in the virtual repo1 space, then:

  1. It is an easy test to know whether we are allowed to see a ref:
     "does it start with refs/virtual/$repo/ ?"

  2. Converting back and forth is simple. You just prepend or strip the
     refs/virtual/$repo prefix.

-Peff

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Support multiple virtual repositories with a single object store and refs
  2011-05-24 19:59 ` Jeff King
@ 2011-05-24 22:47   ` Josh Triplett
  2011-05-25 16:56     ` Shawn Pearce
  0 siblings, 1 reply; 4+ messages in thread
From: Josh Triplett @ 2011-05-24 22:47 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Jamey Sharp

Thanks for your feedback!  We just sent v2 of the patch (with a new patch
to http-backend) before seeing your mail, so we'll send out a v3
incorporating your feedback.

On Tue, May 24, 2011 at 03:59:37PM -0400, Jeff King wrote:
> On Mon, May 23, 2011 at 06:02:52PM -0700, Josh Triplett wrote:
> > Given many repositories with copies of the same objects (such as
> > branches of the same source), sharing a common object store will avoid
> > duplication.  Alternates provide a single baseline, but don't handle
> > ongoing activity in the various repositories.  Git safely handles
> > concurrent accesses to the same object store across repositories, but
> > operations such as gc need to know about all of the refs.
> > 
> > This change adds support in upload-pack and receive-pack to simulate
> > multiple virtual repositories within the object store and references of
> > a single underlying repository.
> 
> Neat idea. It is important to note, though, that it is possible to leak
> information between virtual repos that share the same object store. You
> can't directly say "give me object ABCD" if you don't have a ref to it,
> but you can do some other sneaky things like:
> 
>   1. Claiming to push ABCD, at which point the server will optimize out
>      the need for you to actually send it. Now you have a ref to ABCD
>      and can fetch it (claiming not to have it, of course).
> 
>   2. Requesting other refs, claiming that you have ABCD, at which point
>      the server may generates deltas against ABCD.
> 
> Both are problems with alternates, too, of course. But in the case of
> alternates, you can share only a subset of the objects. So every day or
> so, you could pack all of the objects that _all_ repos can see into one
> big alternates repo, and then each "leaf" repo contains any objects
> private to itself.
> 
> Of course none of this is a concern if you are just hosting public
> repositories, or everyone who gets to see one virtual repo can see
> what's in other ones (e.g., everybody is sharing objects within one
> organization).

We hadn't thought of those ways to access objects from another virtual
repository.  We had already planned to use separate storage repositories
for separate security domains for exactly such reasons, though.  Fixing
those issues seems possible if someone cares about security models other
than the everyone-can-read model you mentioned, but given that our use
case fits in that model we'd like to leave that as Someone Else's
Problem(tm). :)

> But it may make sense to touch on these issues in the documentation
> (which also needs to be written at all :) ).

v2 of the patch includes some documentation in the http-backend manpage,
though we probably should have a separate manpage documenting the whole
concept and reference that from any backends which implement it.  How
does gitvirtual(1) sound?

We didn't document the new upload-pack and receive-pack options, but
those programs already seem to have a pile of undocumented options. :)

v3 will include additional documentation, and we'll make sure to mention
the security implications.

> > The refs and heads of the virtual repositories get stored in the
> > underlying repository using prefixed names specified by the
> > --ref-prefix and --head options; for instance, --ref-prefix=repo1/
> > will use refs/repo1/heads/* and refs/repo1/tags/*.  upload-pack and
> > receive-pack will not expose any references that do not match the
> > specified prefix.
> 
> You have a namespace clash if a repo is named "heads" or "tags" or
> "remotes". Should we give it its own namespace, like:
> 
>   refs/virtual/repo1/heads/*
> 
> ?
> 
> Also, it seems conceptually simpler to me if it's a straight prefix.
> IOW, "refs/heads/foo" in repo1 becomes:
> 
>   refs/virtual/repo1/refs/heads/foo
>
> Then if we are operating in the virtual repo1 space, then:
> 
>   1. It is an easy test to know whether we are allowed to see a ref:
>      "does it start with refs/virtual/$repo/ ?"
>
>   2. Converting back and forth is simple. You just prepend or strip the
>      refs/virtual/$repo prefix.

Both of the namespaces you suggested work with our current patch:
--ref-prefix=virtual/repo1/, or --ref-prefix=virtual/repo1/refs/.  We'd
like to leave the exact choice of paths up to the policies of the host,
but your suggestion does seem like a good general namespacing policy.

- Josh Triplett and Jamey Sharp

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Support multiple virtual repositories with a single object store and refs
  2011-05-24 22:47   ` Josh Triplett
@ 2011-05-25 16:56     ` Shawn Pearce
  0 siblings, 0 replies; 4+ messages in thread
From: Shawn Pearce @ 2011-05-25 16:56 UTC (permalink / raw)
  To: Josh Triplett; +Cc: Jeff King, git, Junio C Hamano, Jamey Sharp

On Tue, May 24, 2011 at 15:47, Josh Triplett <josh@joshtriplett.org> wrote:
> On Tue, May 24, 2011 at 03:59:37PM -0400, Jeff King wrote:
>> You have a namespace clash if a repo is named "heads" or "tags" or
>> "remotes". Should we give it its own namespace, like:
>>
>>   refs/virtual/repo1/heads/*

Yes, I strongly agree with Peff here. We should "standardize" the
prefix of "refs/virtual/" for these things, to keep them from
inteferring with the other "standard" namespaces of refs/heads,
refs/remotes, refs/tags, refs/notes, and if Gerrit Code Review is
used, refs/changes.

>> Also, it seems conceptually simpler to me if it's a straight prefix.
>> IOW, "refs/heads/foo" in repo1 becomes:
>>
>>   refs/virtual/repo1/refs/heads/foo

I also think this is a great idea. It vastly simplifies the operations
involved and allows each virtual namespace to have its own HEAD within
the virtual namespace, as a sibling of "refs", just like in a normal
repository. It may seem a little ugly to put two refs in there, but I
think this is easily understood by repository owners/administrators
and will keep the implementation much more simple.

>> Then if we are operating in the virtual repo1 space, then:
>>
>>   1. It is an easy test to know whether we are allowed to see a ref:
>>      "does it start with refs/virtual/$repo/ ?"
>>
>>   2. Converting back and forth is simple. You just prepend or strip the
>>      refs/virtual/$repo prefix.
>
> Both of the namespaces you suggested work with our current patch:
> --ref-prefix=virtual/repo1/, or --ref-prefix=virtual/repo1/refs/.  We'd
> like to leave the exact choice of paths up to the policies of the host,
> but your suggestion does seem like a good general namespacing policy.

Ideally your implementation would only add/remove the prefix and
wouldn't muck around with the "refs/" part. Then step 1 and step 2 are
trivial.

Don't forget that a lot of Git usage comes from shell scripts. Being
able to use git for-each-ref with a simple sed script to process these
virtual namespaces is really important. If the sed script just needs
to remove a prefix, this easy. If it needs to remove part of the
prefix and replace with something else, its a bit more complicated for
the script writer to work with.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-05-25 16:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-24  1:02 [PATCH] Support multiple virtual repositories with a single object store and refs Josh Triplett
2011-05-24 19:59 ` Jeff King
2011-05-24 22:47   ` Josh Triplett
2011-05-25 16:56     ` Shawn Pearce

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).