git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] Add support for alternates in HTTP
@ 2005-09-15  3:26 Daniel Barkalow
  2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
  0 siblings, 1 reply; 4+ messages in thread
From: Daniel Barkalow @ 2005-09-15  3:26 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

This tries .../objects/info/http-alternates and then
.../objects/info/alternates, looking for a file which specifies where
else to download objects and packs from.

It currently only supports absolute paths, and doesn't support full URLs.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>

---
This is probably pu material at this point; there's a bit more parsing 
work that ought to get done once there are suitable test cases out there 
for me to try.

It tries to find additional places it should look for things to download. 
What I'd like it to do is:

 Try .../objects/info/http-alternates, which can be a full URL, an 
 absolute path (same server), or a relative path based on the objects 
 directory. This should specify the objects directory, not the base.

 Try .../objects/info/alternates, which can be a relative path as above, 
 or, just in case it works, an absolute path.

Currently implemented are the two absolute path cases, which let me fetch 
from the aegl repository, getting Linus's pack from his repository.

 http-fetch.c |  169 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 134 insertions(+), 35 deletions(-)

c01c2fc40aafddc23a14ffdc07de3bf950e323a4
diff --git a/http-fetch.c b/http-fetch.c
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -15,7 +15,17 @@
 
 static CURL *curl;
 
-static char *base;
+static char *initial_base;
+
+struct alt_base
+{
+	char *base;
+	int got_indices;
+	struct packed_git *packs;
+	struct alt_base *next;
+};
+
+struct alt_base *alt = NULL;
 
 static SHA_CTX c;
 static z_stream stream;
@@ -71,11 +81,9 @@ void prefetch(unsigned char *sha1)
 {
 }
 
-static int got_indices = 0;
-
-static struct packed_git *packs = NULL;
+static int got_alternates = 0;
 
-static int fetch_index(unsigned char *sha1)
+static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 {
 	char *filename;
 	char *url;
@@ -89,9 +97,9 @@ static int fetch_index(unsigned char *sh
 		fprintf(stderr, "Getting index for pack %s\n",
 			sha1_to_hex(sha1));
 	
-	url = xmalloc(strlen(base) + 64);
+	url = xmalloc(strlen(repo->base) + 64);
 	sprintf(url, "%s/objects/pack/pack-%s.idx",
-		base, sha1_to_hex(sha1));
+		repo->base, sha1_to_hex(sha1));
 	
 	filename = sha1_pack_index_name(sha1);
 	indexfile = fopen(filename, "w");
@@ -112,22 +120,92 @@ static int fetch_index(unsigned char *sh
 	return 0;
 }
 
-static int setup_index(unsigned char *sha1)
+static int setup_index(struct alt_base *repo, unsigned char *sha1)
 {
 	struct packed_git *new_pack;
 	if (has_pack_file(sha1))
 		return 0; // don't list this as something we can get
 
-	if (fetch_index(sha1))
+	if (fetch_index(repo, sha1))
 		return -1;
 
 	new_pack = parse_pack_index(sha1);
-	new_pack->next = packs;
-	packs = new_pack;
+	new_pack->next = repo->packs;
+	repo->packs = new_pack;
 	return 0;
 }
 
-static int fetch_indices(void)
+static int fetch_alternates(char *base)
+{
+	int ret = 0;
+	struct buffer buffer;
+	char *url;
+	char *data;
+	int i = 0;
+	if (got_alternates)
+		return 0;
+	data = xmalloc(4096);
+	buffer.size = 4096;
+	buffer.posn = 0;
+	buffer.buffer = data;
+
+	if (get_verbosely)
+		fprintf(stderr, "Getting alternates list\n");
+	
+	url = xmalloc(strlen(base) + 31);
+	sprintf(url, "%s/objects/info/http-alternates", base);
+
+	curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
+	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+	curl_easy_setopt(curl, CURLOPT_URL, url);
+
+	if (curl_easy_perform(curl) || !buffer.posn) {
+		sprintf(url, "%s/objects/info/alternates", base);
+		
+		curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
+		curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+		curl_easy_setopt(curl, CURLOPT_URL, url);
+		
+		if (curl_easy_perform(curl)) {
+			return 0;
+		}
+	}
+
+	while (i < buffer.posn) {
+		int posn = i;
+		while (posn < buffer.posn && data[posn] != '\n')
+			posn++;
+		if (data[posn] == '\n') {
+			if (data[i] == '/') {
+				int serverlen = strchr(base + 8, '/') - base;
+				// skip 'objects' at end
+				char *target = 
+					xmalloc(serverlen + posn - i - 6);
+				struct alt_base *newalt;
+				strncpy(target, base, serverlen);
+				strncpy(target + serverlen, data + i,
+					posn - i - 7);
+				target[serverlen + posn - i - 7] = '\0';
+				if (get_verbosely)
+					fprintf(stderr, 
+						"Also look at %s\n", target);
+				newalt = xmalloc(sizeof(*newalt));
+				newalt->next = alt;
+				newalt->base = target;
+				newalt->got_indices = 0;
+				newalt->packs = NULL;
+				alt = newalt;
+				ret++;
+			}
+		}
+		i = posn + 1;
+	}
+	got_alternates = 1;
+	
+	return ret;
+}
+
+static int fetch_indices(struct alt_base *repo)
 {
 	unsigned char sha1[20];
 	char *url;
@@ -135,7 +213,7 @@ static int fetch_indices(void)
 	char *data;
 	int i = 0;
 
-	if (got_indices)
+	if (repo->got_indices)
 		return 0;
 
 	data = xmalloc(4096);
@@ -146,8 +224,8 @@ static int fetch_indices(void)
 	if (get_verbosely)
 		fprintf(stderr, "Getting pack list\n");
 	
-	url = xmalloc(strlen(base) + 21);
-	sprintf(url, "%s/objects/info/packs", base);
+	url = xmalloc(strlen(repo->base) + 21);
+	sprintf(url, "%s/objects/info/packs", repo->base);
 
 	curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
 	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
@@ -157,7 +235,7 @@ static int fetch_indices(void)
 		return error("Unable to get pack index %s", url);
 	}
 
-	do {
+	while (i < buffer.posn) {
 		switch (data[i]) {
 		case 'P':
 			i++;
@@ -165,7 +243,7 @@ static int fetch_indices(void)
 			    !strncmp(data + i, " pack-", 6) &&
 			    !strncmp(data + i + 46, ".pack\n", 6)) {
 				get_sha1_hex(data + i + 6, sha1);
-				setup_index(sha1);
+				setup_index(repo, sha1);
 				i += 51;
 				break;
 			}
@@ -174,13 +252,13 @@ static int fetch_indices(void)
 				i++;
 		}
 		i++;
-	} while (i < buffer.posn);
+	}
 
-	got_indices = 1;
+	repo->got_indices = 1;
 	return 0;
 }
 
-static int fetch_pack(unsigned char *sha1)
+static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 {
 	char *url;
 	struct packed_git *target;
@@ -188,12 +266,11 @@ static int fetch_pack(unsigned char *sha
 	FILE *packfile;
 	char *filename;
 
-	if (fetch_indices())
+	if (fetch_indices(repo))
 		return -1;
-	target = find_sha1_pack(sha1, packs);
+	target = find_sha1_pack(sha1, repo->packs);
 	if (!target)
-		return error("Couldn't get %s: not separate or in any pack",
-			     sha1_to_hex(sha1));
+		return -1;
 
 	if (get_verbosely) {
 		fprintf(stderr, "Getting pack %s\n",
@@ -202,9 +279,9 @@ static int fetch_pack(unsigned char *sha
 			sha1_to_hex(sha1));
 	}
 
-	url = xmalloc(strlen(base) + 65);
+	url = xmalloc(strlen(repo->base) + 65);
 	sprintf(url, "%s/objects/pack/pack-%s.pack",
-		base, sha1_to_hex(target->sha1));
+		repo->base, sha1_to_hex(target->sha1));
 
 	filename = sha1_pack_name(target->sha1);
 	packfile = fopen(filename, "w");
@@ -223,7 +300,7 @@ static int fetch_pack(unsigned char *sha
 
 	fclose(packfile);
 
-	lst = &packs;
+	lst = &repo->packs;
 	while (*lst != target)
 		lst = &((*lst)->next);
 	*lst = (*lst)->next;
@@ -233,7 +310,7 @@ static int fetch_pack(unsigned char *sha
 	return 0;
 }
 
-int fetch(unsigned char *sha1)
+int fetch_object(struct alt_base *repo, unsigned char *sha1)
 {
 	char *hex = sha1_to_hex(sha1);
 	char *filename = sha1_file_name(sha1);
@@ -256,9 +333,9 @@ int fetch(unsigned char *sha1)
 	curl_easy_setopt(curl, CURLOPT_FILE, NULL);
 	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 
-	url = xmalloc(strlen(base) + 50);
-	strcpy(url, base);
-	posn = url + strlen(base);
+	url = xmalloc(strlen(repo->base) + 50);
+	strcpy(url, repo->base);
+	posn = url + strlen(repo->base);
 	strcpy(posn, "objects/");
 	posn += 8;
 	memcpy(posn, hex, 2);
@@ -270,9 +347,7 @@ int fetch(unsigned char *sha1)
 
 	if (curl_easy_perform(curl)) {
 		unlink(filename);
-		if (fetch_pack(sha1))
-			return error("Tried %s", url);
-		return 0;
+		return -1;
 	}
 
 	close(local);
@@ -291,11 +366,30 @@ int fetch(unsigned char *sha1)
 	return 0;
 }
 
+int fetch(unsigned char *sha1)
+{
+	struct alt_base *altbase = alt;
+	while (altbase) {
+		if (!fetch_object(altbase, sha1))
+			return 0;
+		if (!fetch_pack(altbase, sha1))
+			return 0;
+		if (fetch_alternates(altbase->base) > 0) {
+			altbase = alt;
+			continue;
+		}
+		altbase = altbase->next;
+	}
+	return error("Unable to find %s under %s\n", sha1_to_hex(sha1), 
+		     initial_base);
+}
+
 int fetch_ref(char *ref, unsigned char *sha1)
 {
         char *url, *posn;
         char hex[42];
         struct buffer buffer;
+	char *base = initial_base;
         buffer.size = 41;
         buffer.posn = 0;
         buffer.buffer = hex;
@@ -361,7 +455,12 @@ int main(int argc, char **argv)
 	curl_easy_setopt(curl, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
 #endif
 
-	base = url;
+	alt = xmalloc(sizeof(*alt));
+	alt->base = url;
+	alt->got_indices = 0;
+	alt->packs = NULL;
+	alt->next = NULL;
+	initial_base = url;
 
 	if (pull(commit_id))
 		return 1;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Current state of GIT fetch/pull clients
  2005-09-15  3:26 [PATCH RFC] Add support for alternates in HTTP Daniel Barkalow
@ 2005-09-15 23:52 ` Junio C Hamano
  2005-09-16  1:03   ` Jeff Garzik
  2005-09-16  5:49   ` Keith Owens
  0 siblings, 2 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-09-15 23:52 UTC (permalink / raw)
  To: git; +Cc: Daniel Barkalow, Linus Torvalds, linux-kernel

After Linus suggested to use objects/info/alternates to point at
his linux-2.6 tree so that the maintainer trees can borrow from
it, there may be a bit of confusion.  Here is my attempt to
clarify the current state of affairs.

If you are a subsystem maintainer with an account $u at
master.kernel.org and have your repository $tree derived from
Linus' linux-2.6.git reopsitory, it would be at:

      /pub/scm/linux/kernel/git/$u/$tree

Of course, you may have more than one such $tree.  The
suggestion by Linus was to do (please do not do this yet -- that
is what this message is about):

    $ cd /pub/scm/linux/kernel/git/$u/$tree
    $ cat /pub/scm/linux/kernel/git/torvalds/linux-2.6/objects \
        >objects/info/alternates
    $ GIT_DIR=. git prune

What this does is:

    * A process on master.kernel.org and its mirrors running in
      your repository (i.e. /pub/scm/linux/kernel/git/$u/$tree)
      can read objects not found in your repository from Linus'
      repository.  This means that $u/$tree/objects/?? would
      contain only your changes and practically 99% of the
      objects are coming from Linus' repository for such
      process.  You do not have to have any objects Linus'
      repository has in your repository.

Sounds very nice, doesn't it?

However, there are currently certain issues around almost all
git clients, and Cogito cg-pull shares this problem.

Neither http nor rsync transports know about the 'alternates'
mechanism yet, so if a downloader does:

    $ git pull http://kernel.org/pub/scm/linux/kernel/git/$u/$tree
    $ git pull rsync://kernel.org/pub/scm/linux/kernel/git/$u/$tree

unless the downloader has already fetched from Linus'
repository, this will not work.

  * In the case of http transport, it would start from your branch
    head, walking commits backwards, and as soon as it hits a commit
    (or a tree/blob) that your repository is borrowing from Linus.

  * In the case of rsync transport, it would slurp all objects
    your repository has, but does not get objects from Linus'
    repository.  Also, rsync will overwrite the
    objects/info/alternates file the downloader has in his
    repository with what you have in your repository, which is
    not what we want.

The only transport that works is what Linus uses himself.  If
the downloader has an account on master.kernel.org:

    $ git pull master.kernel.org:/pub/scm/linux/kernel/git/$u/$tree

would work, because this transport runs git in your repository
on the master.kernel.org side, and that knows how to use
objects/info/alternates file you set up as Linus suggested.
Another transport that _could_ work is git-daemon:

    $ git pull git://kernel.org/pub/scm/linux/kernel/git/$u/$tree

but unfortunately kernel.org servers do not run git-daemon yet.

What this means is that using objects/info/alternates mechanism
in your repository is a bit premature as things currently stand,
if you intend your repository to be used by the general public.

Daniel started working on teaching http transport to be able to
read from objects/info/alternates last night, and I am expecting
that would be proven stable and usable sometime next week at the
latest.  HPA is helping us in the discussion to whip git-daemon
into a shape usable on kernel.org to enable it there.  I'll be
fixing rsync transport sometime over this weekend.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Current state of GIT fetch/pull clients
  2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
@ 2005-09-16  1:03   ` Jeff Garzik
  2005-09-16  5:49   ` Keith Owens
  1 sibling, 0 replies; 4+ messages in thread
From: Jeff Garzik @ 2005-09-16  1:03 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Daniel Barkalow, Linus Torvalds, linux-kernel

Junio C Hamano wrote:
> Neither http nor rsync transports know about the 'alternates'
> mechanism yet, so if a downloader does:
> 
>     $ git pull http://kernel.org/pub/scm/linux/kernel/git/$u/$tree
>     $ git pull rsync://kernel.org/pub/scm/linux/kernel/git/$u/$tree
> 
> unless the downloader has already fetched from Linus'
> repository, this will not work.
> 
>   * In the case of rsync transport, it would slurp all objects
>     your repository has, but does not get objects from Linus'
>     repository.  Also, rsync will overwrite the
>     objects/info/alternates file the downloader has in his
>     repository with what you have in your repository, which is
>     not what we want.

Yes, this is why I don't bother with alternates at the present time. 
Users of my repos, at least, have been trained to use rsync://... and 
currently expect to get a working tree that way.

	Jeff

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Current state of GIT fetch/pull clients
  2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
  2005-09-16  1:03   ` Jeff Garzik
@ 2005-09-16  5:49   ` Keith Owens
  1 sibling, 0 replies; 4+ messages in thread
From: Keith Owens @ 2005-09-16  5:49 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, Daniel Barkalow, Linus Torvalds, linux-kernel, tony.luck

On Thu, 15 Sep 2005 16:52:20 -0700, 
Junio C Hamano <junkio@cox.net> wrote:
>What this means is that using objects/info/alternates mechanism
>in your repository is a bit premature as things currently stand,
>if you intend your repository to be used by the general public.

Clients using rsync can use a workaround, although it is a bit clumsy.
I do rsync for selected /pub/scm/linux/kernel/git/ trees, including
torvalds and aegl.  At one point Tony Luck (aegl) was using alternates,
with objects/info/alternates containing
/pub/scm/linux/kernel/git/torvalds/linux-2.6.git/objects

As you described in your mail, rsync with alternates overwrites the
local file, so the local alternates ended up pointing at a local
directory that did not exist.  Creating a local symlink from
/pub/scm/linux/kernel/git to the local directory that contains the
torvalds and aegl directories worked around the problem.  git checkout
on the aegl tree was quite happy to follow the symlink and pick up most
of the files from torvalds.

No point in doing that now, Tony has reverted the aegl tree to a full
copy of Linus, instead of using alternates.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-09-16  5:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-15  3:26 [PATCH RFC] Add support for alternates in HTTP Daniel Barkalow
2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
2005-09-16  1:03   ` Jeff Garzik
2005-09-16  5:49   ` Keith Owens

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).