* [PATCH RFC] Add support for alternates in HTTP
@ 2005-09-15 3:26 Daniel Barkalow
2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
0 siblings, 1 reply; 4+ messages in thread
From: Daniel Barkalow @ 2005-09-15 3:26 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano
This tries .../objects/info/http-alternates and then
.../objects/info/alternates, looking for a file which specifies where
else to download objects and packs from.
It currently only supports absolute paths, and doesn't support full URLs.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
This is probably pu material at this point; there's a bit more parsing
work that ought to get done once there are suitable test cases out there
for me to try.
It tries to find additional places it should look for things to download.
What I'd like it to do is:
Try .../objects/info/http-alternates, which can be a full URL, an
absolute path (same server), or a relative path based on the objects
directory. This should specify the objects directory, not the base.
Try .../objects/info/alternates, which can be a relative path as above,
or, just in case it works, an absolute path.
Currently implemented are the two absolute path cases, which let me fetch
from the aegl repository, getting Linus's pack from his repository.
http-fetch.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 134 insertions(+), 35 deletions(-)
c01c2fc40aafddc23a14ffdc07de3bf950e323a4
diff --git a/http-fetch.c b/http-fetch.c
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -15,7 +15,17 @@
static CURL *curl;
-static char *base;
+static char *initial_base;
+
+struct alt_base
+{
+ char *base;
+ int got_indices;
+ struct packed_git *packs;
+ struct alt_base *next;
+};
+
+struct alt_base *alt = NULL;
static SHA_CTX c;
static z_stream stream;
@@ -71,11 +81,9 @@ void prefetch(unsigned char *sha1)
{
}
-static int got_indices = 0;
-
-static struct packed_git *packs = NULL;
+static int got_alternates = 0;
-static int fetch_index(unsigned char *sha1)
+static int fetch_index(struct alt_base *repo, unsigned char *sha1)
{
char *filename;
char *url;
@@ -89,9 +97,9 @@ static int fetch_index(unsigned char *sh
fprintf(stderr, "Getting index for pack %s\n",
sha1_to_hex(sha1));
- url = xmalloc(strlen(base) + 64);
+ url = xmalloc(strlen(repo->base) + 64);
sprintf(url, "%s/objects/pack/pack-%s.idx",
- base, sha1_to_hex(sha1));
+ repo->base, sha1_to_hex(sha1));
filename = sha1_pack_index_name(sha1);
indexfile = fopen(filename, "w");
@@ -112,22 +120,92 @@ static int fetch_index(unsigned char *sh
return 0;
}
-static int setup_index(unsigned char *sha1)
+static int setup_index(struct alt_base *repo, unsigned char *sha1)
{
struct packed_git *new_pack;
if (has_pack_file(sha1))
return 0; // don't list this as something we can get
- if (fetch_index(sha1))
+ if (fetch_index(repo, sha1))
return -1;
new_pack = parse_pack_index(sha1);
- new_pack->next = packs;
- packs = new_pack;
+ new_pack->next = repo->packs;
+ repo->packs = new_pack;
return 0;
}
-static int fetch_indices(void)
+static int fetch_alternates(char *base)
+{
+ int ret = 0;
+ struct buffer buffer;
+ char *url;
+ char *data;
+ int i = 0;
+ if (got_alternates)
+ return 0;
+ data = xmalloc(4096);
+ buffer.size = 4096;
+ buffer.posn = 0;
+ buffer.buffer = data;
+
+ if (get_verbosely)
+ fprintf(stderr, "Getting alternates list\n");
+
+ url = xmalloc(strlen(base) + 31);
+ sprintf(url, "%s/objects/info/http-alternates", base);
+
+ curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+ curl_easy_setopt(curl, CURLOPT_URL, url);
+
+ if (curl_easy_perform(curl) || !buffer.posn) {
+ sprintf(url, "%s/objects/info/alternates", base);
+
+ curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+ curl_easy_setopt(curl, CURLOPT_URL, url);
+
+ if (curl_easy_perform(curl)) {
+ return 0;
+ }
+ }
+
+ while (i < buffer.posn) {
+ int posn = i;
+ while (posn < buffer.posn && data[posn] != '\n')
+ posn++;
+ if (data[posn] == '\n') {
+ if (data[i] == '/') {
+ int serverlen = strchr(base + 8, '/') - base;
+ // skip 'objects' at end
+ char *target =
+ xmalloc(serverlen + posn - i - 6);
+ struct alt_base *newalt;
+ strncpy(target, base, serverlen);
+ strncpy(target + serverlen, data + i,
+ posn - i - 7);
+ target[serverlen + posn - i - 7] = '\0';
+ if (get_verbosely)
+ fprintf(stderr,
+ "Also look at %s\n", target);
+ newalt = xmalloc(sizeof(*newalt));
+ newalt->next = alt;
+ newalt->base = target;
+ newalt->got_indices = 0;
+ newalt->packs = NULL;
+ alt = newalt;
+ ret++;
+ }
+ }
+ i = posn + 1;
+ }
+ got_alternates = 1;
+
+ return ret;
+}
+
+static int fetch_indices(struct alt_base *repo)
{
unsigned char sha1[20];
char *url;
@@ -135,7 +213,7 @@ static int fetch_indices(void)
char *data;
int i = 0;
- if (got_indices)
+ if (repo->got_indices)
return 0;
data = xmalloc(4096);
@@ -146,8 +224,8 @@ static int fetch_indices(void)
if (get_verbosely)
fprintf(stderr, "Getting pack list\n");
- url = xmalloc(strlen(base) + 21);
- sprintf(url, "%s/objects/info/packs", base);
+ url = xmalloc(strlen(repo->base) + 21);
+ sprintf(url, "%s/objects/info/packs", repo->base);
curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
@@ -157,7 +235,7 @@ static int fetch_indices(void)
return error("Unable to get pack index %s", url);
}
- do {
+ while (i < buffer.posn) {
switch (data[i]) {
case 'P':
i++;
@@ -165,7 +243,7 @@ static int fetch_indices(void)
!strncmp(data + i, " pack-", 6) &&
!strncmp(data + i + 46, ".pack\n", 6)) {
get_sha1_hex(data + i + 6, sha1);
- setup_index(sha1);
+ setup_index(repo, sha1);
i += 51;
break;
}
@@ -174,13 +252,13 @@ static int fetch_indices(void)
i++;
}
i++;
- } while (i < buffer.posn);
+ }
- got_indices = 1;
+ repo->got_indices = 1;
return 0;
}
-static int fetch_pack(unsigned char *sha1)
+static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
{
char *url;
struct packed_git *target;
@@ -188,12 +266,11 @@ static int fetch_pack(unsigned char *sha
FILE *packfile;
char *filename;
- if (fetch_indices())
+ if (fetch_indices(repo))
return -1;
- target = find_sha1_pack(sha1, packs);
+ target = find_sha1_pack(sha1, repo->packs);
if (!target)
- return error("Couldn't get %s: not separate or in any pack",
- sha1_to_hex(sha1));
+ return -1;
if (get_verbosely) {
fprintf(stderr, "Getting pack %s\n",
@@ -202,9 +279,9 @@ static int fetch_pack(unsigned char *sha
sha1_to_hex(sha1));
}
- url = xmalloc(strlen(base) + 65);
+ url = xmalloc(strlen(repo->base) + 65);
sprintf(url, "%s/objects/pack/pack-%s.pack",
- base, sha1_to_hex(target->sha1));
+ repo->base, sha1_to_hex(target->sha1));
filename = sha1_pack_name(target->sha1);
packfile = fopen(filename, "w");
@@ -223,7 +300,7 @@ static int fetch_pack(unsigned char *sha
fclose(packfile);
- lst = &packs;
+ lst = &repo->packs;
while (*lst != target)
lst = &((*lst)->next);
*lst = (*lst)->next;
@@ -233,7 +310,7 @@ static int fetch_pack(unsigned char *sha
return 0;
}
-int fetch(unsigned char *sha1)
+int fetch_object(struct alt_base *repo, unsigned char *sha1)
{
char *hex = sha1_to_hex(sha1);
char *filename = sha1_file_name(sha1);
@@ -256,9 +333,9 @@ int fetch(unsigned char *sha1)
curl_easy_setopt(curl, CURLOPT_FILE, NULL);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
- url = xmalloc(strlen(base) + 50);
- strcpy(url, base);
- posn = url + strlen(base);
+ url = xmalloc(strlen(repo->base) + 50);
+ strcpy(url, repo->base);
+ posn = url + strlen(repo->base);
strcpy(posn, "objects/");
posn += 8;
memcpy(posn, hex, 2);
@@ -270,9 +347,7 @@ int fetch(unsigned char *sha1)
if (curl_easy_perform(curl)) {
unlink(filename);
- if (fetch_pack(sha1))
- return error("Tried %s", url);
- return 0;
+ return -1;
}
close(local);
@@ -291,11 +366,30 @@ int fetch(unsigned char *sha1)
return 0;
}
+int fetch(unsigned char *sha1)
+{
+ struct alt_base *altbase = alt;
+ while (altbase) {
+ if (!fetch_object(altbase, sha1))
+ return 0;
+ if (!fetch_pack(altbase, sha1))
+ return 0;
+ if (fetch_alternates(altbase->base) > 0) {
+ altbase = alt;
+ continue;
+ }
+ altbase = altbase->next;
+ }
+ return error("Unable to find %s under %s\n", sha1_to_hex(sha1),
+ initial_base);
+}
+
int fetch_ref(char *ref, unsigned char *sha1)
{
char *url, *posn;
char hex[42];
struct buffer buffer;
+ char *base = initial_base;
buffer.size = 41;
buffer.posn = 0;
buffer.buffer = hex;
@@ -361,7 +455,12 @@ int main(int argc, char **argv)
curl_easy_setopt(curl, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
#endif
- base = url;
+ alt = xmalloc(sizeof(*alt));
+ alt->base = url;
+ alt->got_indices = 0;
+ alt->packs = NULL;
+ alt->next = NULL;
+ initial_base = url;
if (pull(commit_id))
return 1;
^ permalink raw reply [flat|nested] 4+ messages in thread
* Current state of GIT fetch/pull clients
2005-09-15 3:26 [PATCH RFC] Add support for alternates in HTTP Daniel Barkalow
@ 2005-09-15 23:52 ` Junio C Hamano
2005-09-16 1:03 ` Jeff Garzik
2005-09-16 5:49 ` Keith Owens
0 siblings, 2 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-09-15 23:52 UTC (permalink / raw)
To: git; +Cc: Daniel Barkalow, Linus Torvalds, linux-kernel
After Linus suggested to use objects/info/alternates to point at
his linux-2.6 tree so that the maintainer trees can borrow from
it, there may be a bit of confusion. Here is my attempt to
clarify the current state of affairs.
If you are a subsystem maintainer with an account $u at
master.kernel.org and have your repository $tree derived from
Linus' linux-2.6.git reopsitory, it would be at:
/pub/scm/linux/kernel/git/$u/$tree
Of course, you may have more than one such $tree. The
suggestion by Linus was to do (please do not do this yet -- that
is what this message is about):
$ cd /pub/scm/linux/kernel/git/$u/$tree
$ cat /pub/scm/linux/kernel/git/torvalds/linux-2.6/objects \
>objects/info/alternates
$ GIT_DIR=. git prune
What this does is:
* A process on master.kernel.org and its mirrors running in
your repository (i.e. /pub/scm/linux/kernel/git/$u/$tree)
can read objects not found in your repository from Linus'
repository. This means that $u/$tree/objects/?? would
contain only your changes and practically 99% of the
objects are coming from Linus' repository for such
process. You do not have to have any objects Linus'
repository has in your repository.
Sounds very nice, doesn't it?
However, there are currently certain issues around almost all
git clients, and Cogito cg-pull shares this problem.
Neither http nor rsync transports know about the 'alternates'
mechanism yet, so if a downloader does:
$ git pull http://kernel.org/pub/scm/linux/kernel/git/$u/$tree
$ git pull rsync://kernel.org/pub/scm/linux/kernel/git/$u/$tree
unless the downloader has already fetched from Linus'
repository, this will not work.
* In the case of http transport, it would start from your branch
head, walking commits backwards, and as soon as it hits a commit
(or a tree/blob) that your repository is borrowing from Linus.
* In the case of rsync transport, it would slurp all objects
your repository has, but does not get objects from Linus'
repository. Also, rsync will overwrite the
objects/info/alternates file the downloader has in his
repository with what you have in your repository, which is
not what we want.
The only transport that works is what Linus uses himself. If
the downloader has an account on master.kernel.org:
$ git pull master.kernel.org:/pub/scm/linux/kernel/git/$u/$tree
would work, because this transport runs git in your repository
on the master.kernel.org side, and that knows how to use
objects/info/alternates file you set up as Linus suggested.
Another transport that _could_ work is git-daemon:
$ git pull git://kernel.org/pub/scm/linux/kernel/git/$u/$tree
but unfortunately kernel.org servers do not run git-daemon yet.
What this means is that using objects/info/alternates mechanism
in your repository is a bit premature as things currently stand,
if you intend your repository to be used by the general public.
Daniel started working on teaching http transport to be able to
read from objects/info/alternates last night, and I am expecting
that would be proven stable and usable sometime next week at the
latest. HPA is helping us in the discussion to whip git-daemon
into a shape usable on kernel.org to enable it there. I'll be
fixing rsync transport sometime over this weekend.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Current state of GIT fetch/pull clients
2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
@ 2005-09-16 1:03 ` Jeff Garzik
2005-09-16 5:49 ` Keith Owens
1 sibling, 0 replies; 4+ messages in thread
From: Jeff Garzik @ 2005-09-16 1:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Daniel Barkalow, Linus Torvalds, linux-kernel
Junio C Hamano wrote:
> Neither http nor rsync transports know about the 'alternates'
> mechanism yet, so if a downloader does:
>
> $ git pull http://kernel.org/pub/scm/linux/kernel/git/$u/$tree
> $ git pull rsync://kernel.org/pub/scm/linux/kernel/git/$u/$tree
>
> unless the downloader has already fetched from Linus'
> repository, this will not work.
>
> * In the case of rsync transport, it would slurp all objects
> your repository has, but does not get objects from Linus'
> repository. Also, rsync will overwrite the
> objects/info/alternates file the downloader has in his
> repository with what you have in your repository, which is
> not what we want.
Yes, this is why I don't bother with alternates at the present time.
Users of my repos, at least, have been trained to use rsync://... and
currently expect to get a working tree that way.
Jeff
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Current state of GIT fetch/pull clients
2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
2005-09-16 1:03 ` Jeff Garzik
@ 2005-09-16 5:49 ` Keith Owens
1 sibling, 0 replies; 4+ messages in thread
From: Keith Owens @ 2005-09-16 5:49 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, Daniel Barkalow, Linus Torvalds, linux-kernel, tony.luck
On Thu, 15 Sep 2005 16:52:20 -0700,
Junio C Hamano <junkio@cox.net> wrote:
>What this means is that using objects/info/alternates mechanism
>in your repository is a bit premature as things currently stand,
>if you intend your repository to be used by the general public.
Clients using rsync can use a workaround, although it is a bit clumsy.
I do rsync for selected /pub/scm/linux/kernel/git/ trees, including
torvalds and aegl. At one point Tony Luck (aegl) was using alternates,
with objects/info/alternates containing
/pub/scm/linux/kernel/git/torvalds/linux-2.6.git/objects
As you described in your mail, rsync with alternates overwrites the
local file, so the local alternates ended up pointing at a local
directory that did not exist. Creating a local symlink from
/pub/scm/linux/kernel/git to the local directory that contains the
torvalds and aegl directories worked around the problem. git checkout
on the aegl tree was quite happy to follow the symlink and pick up most
of the files from torvalds.
No point in doing that now, Tony has reverted the aegl tree to a full
copy of Linus, instead of using alternates.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-09-16 5:49 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-15 3:26 [PATCH RFC] Add support for alternates in HTTP Daniel Barkalow
2005-09-15 23:52 ` Current state of GIT fetch/pull clients Junio C Hamano
2005-09-16 1:03 ` Jeff Garzik
2005-09-16 5:49 ` Keith Owens
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).