git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Get commits from remote repositories by HTTP
@ 2005-04-16 22:03 Daniel Barkalow
  2005-04-16 22:17 ` Martin Mares
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Daniel Barkalow @ 2005-04-16 22:03 UTC (permalink / raw)
  To: git

This adds a program to download a commit, the trees, and the blobs in them
from a remote repository using HTTP. It skips anything you already have.

There are a number of improvements possible, to be done if this catches
on, including, significantly, checking if the response was correct (or
even not an error).

It makes fsck-cache and rev-tree give harmless warnings, because it
includes some code that should probably be shared with them in revision.h

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>

Index: Makefile
===================================================================
--- ed4f6e454b40650b904ab72048b2f93a068dccc3/Makefile  (mode:100644 sha1:b39b4ea37586693dd707d1d0750a9b580350ec50)
+++ a65375b46154c90e7499b7e76998d430cd9cd29d/Makefile  (mode:100644 sha1:d41860aed161a14ca61e7b6c7f591f65928bd61f)
@@ -14,7 +14,7 @@
 
 PROG=   update-cache show-diff init-db write-tree read-tree commit-tree \
 	cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
-	check-files ls-tree merge-tree
+	check-files ls-tree merge-tree http-get
 
 all: $(PROG)
 
@@ -23,6 +23,9 @@
 
 LIBS= -lssl -lz
 
+http-get:%:%.o read-cache.o
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
 init-db: init-db.o
 
 update-cache: update-cache.o read-cache.o
Index: http-get.c
===================================================================
--- /dev/null  (tree:ed4f6e454b40650b904ab72048b2f93a068dccc3)
+++ a65375b46154c90e7499b7e76998d430cd9cd29d/http-get.c  (mode:100644 sha1:6a36cfa079519a7a3ad5b1618be8711c5127b531)
@@ -0,0 +1,175 @@
+#include <sys/socket.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include "cache.h"
+#include "revision.h"
+#include <errno.h>
+
+static struct sockaddr_in sockad;
+static char *url;
+static char *base;
+
+static int target_url(char *target)
+{
+	char *name;
+	struct hostent *entry;
+	if (memcmp(target, "http://", 7))
+		return -1;
+	url = target;
+	base = strchr(target + 7, '/');
+	name = malloc(base - (target + 7) + 1);
+	memcpy(name, target + 7, base - (target + 7));
+	name[base - (target + 7)] = '\0';
+	printf("Connect to %s\n", name);
+	entry = gethostbyname(name);
+	memcpy(&sockad.sin_addr.s_addr,
+	       &((struct in_addr *)entry->h_addr)->s_addr, 4);
+	sockad.sin_port = htons(80);
+	sockad.sin_family = AF_INET;
+}
+
+static int get_connection()
+{
+	int fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (connect(fd, (struct sockaddr*) &sockad,
+		    sizeof(struct sockaddr_in))) {
+		perror(url);
+	}
+	return fd;
+}
+
+static void release_connection(int fd) {
+	close(fd);
+}
+
+static int fetch(unsigned char *sha1)
+{
+	int header_end_posn = 0;
+	int local;
+	char *hex = sha1_to_hex(sha1);
+	char *filename = sha1_file_name(sha1);
+	char buffer[4096];
+	int fd;
+	struct stat st;
+
+	if (!stat(filename, &st)) {
+		return 0;
+	}
+
+	fd = get_connection();
+	if (fd < 0) {
+		return 1;
+	}
+
+	write(fd, "GET ", 4);
+	write(fd, base, strlen(base));
+	write(fd, "objects/", 8);
+	write(fd, hex, 2);
+	write(fd, "/", 1);
+	write(fd, hex + 2, 38);
+	write(fd, " HTTP/1.0\r\n", 11);
+	write(fd, "\r\n", 2);
+
+	local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
+
+	do {
+		int sz = read(fd, buffer, 4096);
+		if (!sz) {
+			break;
+		}
+		if (sz < 0) {
+			perror("Reading from connection");
+			unlink(filename);
+			close(local);
+			return 1;
+		}
+		if (header_end_posn < 4) {
+			int i = 0;
+			char *flag = "\r\n\r\n";
+			while (i < sz && header_end_posn < 4) {
+				if (buffer[i] == flag[header_end_posn]) {
+					header_end_posn++;
+				} else {
+					header_end_posn = 0;
+				}
+				i++;
+			}
+			if (i < sz) {
+				write(local, buffer + i, sz - i);
+			}
+			continue;
+		}
+		write(local, buffer, sz);
+	} while (1);
+
+	close(local);
+	
+	release_connection(fd);
+	return 0;
+}
+
+static int process_tree(unsigned char *sha1)
+{
+	void *buffer;
+        unsigned long size;
+        char type[20];
+
+        buffer = read_sha1_file(sha1, type, &size);
+	if (!buffer)
+		return -1;
+	if (strcmp(type, "tree"))
+		return -1;
+	while (size) {
+		int len = strlen(buffer) + 1;
+		unsigned char *sha1 = buffer + len;
+		unsigned int mode;
+		int retval;
+
+		if (size < len + 20 || sscanf(buffer, "%o", &mode) != 1)
+			return -1;
+
+		buffer = sha1 + 20;
+		size -= len + 20;
+
+		retval = fetch(sha1);
+		if (retval)
+			return -1;
+
+		if (S_ISDIR(mode)) {
+			retval = process_tree(sha1);
+			if (retval)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+static int process_commit(unsigned char *sha1)
+{
+	struct revision *rev = lookup_rev(sha1);
+	if (parse_commit_object(rev))
+		return -1;
+	
+	fetch(rev->tree);
+	process_tree(rev->tree);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	char *commit_id = argv[1];
+	char *url = argv[2];
+
+	unsigned char sha1[20];
+
+	get_sha1_hex(commit_id, sha1);
+
+	target_url(url);
+
+	fetch(sha1);
+	return process_commit(sha1);
+}
Index: revision.h
===================================================================
--- ed4f6e454b40650b904ab72048b2f93a068dccc3/revision.h  (mode:100664 sha1:28d0de3261a61f68e4e0948a25a416a515cd2e83)
+++ a65375b46154c90e7499b7e76998d430cd9cd29d/revision.h  (mode:100664 sha1:523bde6e14e18bb0ecbded8f83ad4df93fc467ab)
@@ -24,6 +24,7 @@
 	unsigned int flags;
 	unsigned char sha1[20];
 	unsigned long date;
+	unsigned char tree[20];
 	struct parent *parent;
 };
 
@@ -111,4 +112,29 @@
 	}
 }
 
+static int parse_commit_object(struct revision *rev)
+{
+	if (!(rev->flags & SEEN)) {
+		void *buffer, *bufptr;
+		unsigned long size;
+		char type[20];
+		unsigned char parent[20];
+
+		rev->flags |= SEEN;
+		buffer = bufptr = read_sha1_file(rev->sha1, type, &size);
+		if (!buffer || strcmp(type, "commit"))
+			return -1;
+		get_sha1_hex(bufptr + 5, rev->tree);
+		bufptr += 46; /* "tree " + "hex sha1" + "\n" */
+		while (!memcmp(bufptr, "parent ", 7) && 
+		       !get_sha1_hex(bufptr+7, parent)) {
+			add_relationship(rev, parent);
+			bufptr += 48;   /* "parent " + "hex sha1" + "\n" */
+		}
+		//rev->date = parse_commit_date(bufptr);
+		free(buffer);
+	}
+	return 0;
+}
+
 #endif /* REVISION_H */


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:03 [PATCH] Get commits from remote repositories by HTTP Daniel Barkalow
@ 2005-04-16 22:17 ` Martin Mares
  2005-04-16 22:43   ` Daniel Barkalow
  2005-04-16 22:24 ` Tony Luck
  2005-04-16 22:32 ` Jan-Benedict Glaw
  2 siblings, 1 reply; 14+ messages in thread
From: Martin Mares @ 2005-04-16 22:17 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git

Hello!

> This adds a program to download a commit, the trees, and the blobs in them
> from a remote repository using HTTP. It skips anything you already have.

Is it really necessary to write your own HTTP downloader? If so, is it
necessary to forget basic stuff like the "Host:" header? ;-)

If you feel that it should be optimized for speed, then at least use
persistent connections.

> +	if (memcmp(target, "http://", 7))
> +		return -1;

Can crash if the string is too short.

> +	entry = gethostbyname(name);
> +	memcpy(&sockad.sin_addr.s_addr,
> +	       &((struct in_addr *)entry->h_addr)->s_addr, 4);

Can crash if the host doesn't exist or if you feed it with an URL containing
port number.

> +static int get_connection()

(void)

> +	local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);

What if it fails?

				Have a nice fortnight
-- 
Martin `MJ' Mares   <mj@ucw.cz>   http://atrey.karlin.mff.cuni.cz/~mj/
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
A student who changes the course of history is probably taking an exam.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:03 [PATCH] Get commits from remote repositories by HTTP Daniel Barkalow
  2005-04-16 22:17 ` Martin Mares
@ 2005-04-16 22:24 ` Tony Luck
  2005-04-16 22:33   ` Daniel Barkalow
  2005-04-16 22:42   ` Adam Kropelin
  2005-04-16 22:32 ` Jan-Benedict Glaw
  2 siblings, 2 replies; 14+ messages in thread
From: Tony Luck @ 2005-04-16 22:24 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git

On 4/16/05, Daniel Barkalow <barkalow@iabervon.org> wrote:
> +        buffer = read_sha1_file(sha1, type, &size);

You never free this buffer.

It would also be nice if you saved "tree" objects in some temporary file
and did not install them until after you had fetched all the blobs and
trees that this tree references.  Then if your connection is interrupted
you can just restart it.

Otherwise this looks really nice.  I was going to script something
similar using "wget" ... but that would have made zillions of seperate
connections.  Not so kind to the server.

-Tony

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:03 [PATCH] Get commits from remote repositories by HTTP Daniel Barkalow
  2005-04-16 22:17 ` Martin Mares
  2005-04-16 22:24 ` Tony Luck
@ 2005-04-16 22:32 ` Jan-Benedict Glaw
  2005-04-16 22:37   ` Daniel Barkalow
  2 siblings, 1 reply; 14+ messages in thread
From: Jan-Benedict Glaw @ 2005-04-16 22:32 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git

On Sat, 2005-04-16 18:03:51 -0400, Daniel Barkalow <barkalow@iabervon.org>
wrote in message <Pine.LNX.4.21.0504161750020.30848-100000@iabervon.org>:
> --- /dev/null  (tree:ed4f6e454b40650b904ab72048b2f93a068dccc3)
> +++ a65375b46154c90e7499b7e76998d430cd9cd29d/http-get.c  (mode:100644 sha1:6a36cfa079519a7a3ad5b1618be8711c5127b531)

> +	local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);

0666 is a bit too lazy. I'd suggest 0664 or 0644.

MfG, JBG

-- 
Jan-Benedict Glaw       jbglaw@lug-owl.de    . +49-172-7608481             _ O _
"Eine Freie Meinung in  einem Freien Kopf    | Gegen Zensur | Gegen Krieg  _ _ O
 fuer einen Freien Staat voll Freier Bürger" | im Internet! |   im Irak!   O O O
ret = do_actions((curr | FREE_SPEECH) & ~(NEW_COPYRIGHT_LAW | DRM | TCPA));

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:24 ` Tony Luck
@ 2005-04-16 22:33   ` Daniel Barkalow
  2005-04-16 22:42   ` Adam Kropelin
  1 sibling, 0 replies; 14+ messages in thread
From: Daniel Barkalow @ 2005-04-16 22:33 UTC (permalink / raw)
  To: Tony Luck; +Cc: git

On Sat, 16 Apr 2005, Tony Luck wrote:

> On 4/16/05, Daniel Barkalow <barkalow@iabervon.org> wrote:
> > +        buffer = read_sha1_file(sha1, type, &size);
> 
> You never free this buffer.

Ideally, this should all be rearranged to share the code with
read-tree, and it should be fixed in common.

> It would also be nice if you saved "tree" objects in some temporary file
> and did not install them until after you had fetched all the blobs and
> trees that this tree references.  Then if your connection is interrupted
> you can just restart it.

It looks over everything relevant, even if it doesn't need to download
anything, so it should work to continue if it stops in between.

	-Daniel
*This .sig left intentionally blank*


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:32 ` Jan-Benedict Glaw
@ 2005-04-16 22:37   ` Daniel Barkalow
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Barkalow @ 2005-04-16 22:37 UTC (permalink / raw)
  To: Jan-Benedict Glaw; +Cc: git

On Sun, 17 Apr 2005, Jan-Benedict Glaw wrote:

> On Sat, 2005-04-16 18:03:51 -0400, Daniel Barkalow <barkalow@iabervon.org>
> wrote in message <Pine.LNX.4.21.0504161750020.30848-100000@iabervon.org>:
> > --- /dev/null  (tree:ed4f6e454b40650b904ab72048b2f93a068dccc3)
> > +++ a65375b46154c90e7499b7e76998d430cd9cd29d/http-get.c  (mode:100644 sha1:6a36cfa079519a7a3ad5b1618be8711c5127b531)
> 
> > +	local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
> 
> 0666 is a bit too lazy. I'd suggest 0664 or 0644.

Actually, 0444 would make most sense, since these shouldn't get modified
at all. But umask is applied to them anyway, so 0664 or 0644 (or 0660 or 
0600) is up to the local system policy. This just matches
write_sha1_buffer().

	-Daniel
*This .sig left intentionally blank*


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:24 ` Tony Luck
  2005-04-16 22:33   ` Daniel Barkalow
@ 2005-04-16 22:42   ` Adam Kropelin
  2005-04-16 22:45     ` Daniel Barkalow
  2005-04-17  3:16     ` tony.luck
  1 sibling, 2 replies; 14+ messages in thread
From: Adam Kropelin @ 2005-04-16 22:42 UTC (permalink / raw)
  To: Tony Luck, Daniel Barkalow; +Cc: git

Tony Luck wrote:
> Otherwise this looks really nice.  I was going to script something
> similar using "wget" ... but that would have made zillions of seperate
> connections.  Not so kind to the server.

How about building a file list and doing a batch download via 'wget -i 
/tmp/foo'? A quick test (on my ancient wget-1.7) indicates that it reuses 
connectionss when successive URLs point to the same server.

Writing yet another http client does seem a bit pointless, what with wget 
and curl available. The real win lies in creating the smarts to get the 
minimum number of files.

--Adam


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:17 ` Martin Mares
@ 2005-04-16 22:43   ` Daniel Barkalow
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Barkalow @ 2005-04-16 22:43 UTC (permalink / raw)
  To: Martin Mares; +Cc: git

On Sun, 17 Apr 2005, Martin Mares wrote:

> Hello!
> 
> > This adds a program to download a commit, the trees, and the blobs in them
> > from a remote repository using HTTP. It skips anything you already have.
> 
> Is it really necessary to write your own HTTP downloader? If so, is it
> necessary to forget basic stuff like the "Host:" header? ;-)

I wanted to get something hacked quickly; can you suggest a good one to
use?

> If you feel that it should be optimized for speed, then at least use
> persistent connections.

That's the next step.

	-Daniel
*This .sig left intentionally blank*


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:42   ` Adam Kropelin
@ 2005-04-16 22:45     ` Daniel Barkalow
  2005-04-16 22:52       ` Adam Kropelin
  2005-04-17  3:16     ` tony.luck
  1 sibling, 1 reply; 14+ messages in thread
From: Daniel Barkalow @ 2005-04-16 22:45 UTC (permalink / raw)
  To: Adam Kropelin; +Cc: Tony Luck, git

On Sat, 16 Apr 2005, Adam Kropelin wrote:

> Tony Luck wrote:
> > Otherwise this looks really nice.  I was going to script something
> > similar using "wget" ... but that would have made zillions of seperate
> > connections.  Not so kind to the server.
> 
> How about building a file list and doing a batch download via 'wget -i 
> /tmp/foo'? A quick test (on my ancient wget-1.7) indicates that it reuses 
> connectionss when successive URLs point to the same server.

You need to look at some of the files before you know what other files to
get. You could do it in waves, but that would be excessively complicated
to code and not the most efficient anyway.

	-Daniel
*This .sig left intentionally blank*


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:45     ` Daniel Barkalow
@ 2005-04-16 22:52       ` Adam Kropelin
  0 siblings, 0 replies; 14+ messages in thread
From: Adam Kropelin @ 2005-04-16 22:52 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: Tony Luck, git

Daniel Barkalow wrote:
> On Sat, 16 Apr 2005, Adam Kropelin wrote:
>> How about building a file list and doing a batch download via 'wget
>> -i /tmp/foo'? A quick test (on my ancient wget-1.7) indicates that
>> it reuses connectionss when successive URLs point to the same server.
>
> You need to look at some of the files before you know what other
> files to get. You could do it in waves, but that would be excessively
> complicated to code and not the most efficient anyway.

Ah, yes. Makes sense. How about libcurl or another http client library, 
then? Minimizing dependencies on external libraries is good, but writing a 
really robust http client is a tricky business. (Not that you aren't up to 
it; I just wonder if it's the best way to spend your time.)

--Adam


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-16 22:42   ` Adam Kropelin
  2005-04-16 22:45     ` Daniel Barkalow
@ 2005-04-17  3:16     ` tony.luck
  2005-04-18 18:41       ` tony.luck
  1 sibling, 1 reply; 14+ messages in thread
From: tony.luck @ 2005-04-17  3:16 UTC (permalink / raw)
  To: Adam Kropelin; +Cc: Daniel Barkalow, git

>How about building a file list and doing a batch download via 'wget -i 
>/tmp/foo'? A quick test (on my ancient wget-1.7) indicates that it reuses 
>connectionss when successive URLs point to the same server.

Here's a script that does just that.  So there is a burst of individual
wget commands to get HEAD, the top commit object, and all the tree
objects.  The just one to get all the missing blobs.

Subsequent runs will do far less work as many of the tree objects will
not have changed, so we don't descend into any tree that we already have.

-Tony

Not a patch ... it is a whole file.  I called it "git-wget", but it might
also want to be called "git-pulltop".

Signed-off-by: Tony Luck <tony.luck@intel.com>

------ script starts here -----
#!/bin/sh

# Copyright (C) 2005 Tony Luck

REMOTE=http://www.kernel.org/pub/linux/kernel/people/torvalds/linux-2.6.git/

rm -rf .gittmp
# set up a temp git repository so that we can use cat-file and ls-tree on the
# objects we pull without installing them into our tree. This allows us to
# restart if the download is interrupted
mkdir .gittmp
cd .gittmp
init-db

wget -q $REMOTE/HEAD

if cmp -s ../.git/HEAD HEAD
then
	echo Already have HEAD = `cat ../.git/HEAD`
	cd ..
	rm -rf .gittmp
	exit 0
fi

sha1=`cat HEAD`
sha1file=${sha1:0:2}/${sha1:2}

if [ -f ../.git/objects/$sha1file ]
then
	echo Already have most recent commit. Update HEAD to $sha1
	cd ..
	rm -rf .gittmp
	exit 0
fi

wget -q $REMOTE/objects/$sha1file -O .git/objects/$sha1file

treesha1=`cat-file commit $sha1 | (read tag tree ; echo $tree)`

get_tree()
{
	treesha1file=${1:0:2}/${1:2}
	if [ -f ../.git/objects/$treesha1file ]
	then
		return
	fi
	wget -q $REMOTE/objects/$treesha1file -O .git/objects/$treesha1file
	ls-tree $1 | while read mode tag sha1 name
	do
		subsha1file=${sha1:0:2}/${sha1:2}
		if [  -f ../.git/objects/$subsha1file ]
		then
			continue
		fi
		if [ $mode = 40000 ]
		then
			get_tree $sha1 `expr $2 + 1`
		else
			echo objects/$subsha1file >> needbloblist
		fi
	done
}

# get all the tree objects to our .gittmp area, and create list of needed blobs
get_tree $treesha1

# now get the blobs
cd ../.git
if [ -s ../.gittmp/needbloblist ]
then
	wget -q -r -nH  --cut-dirs=6 --base=$REMOTE -i ../.gittmp/needbloblist
fi

# Now we have the blobs, move the trees and commit from .gitttmp
cd ../.gittmp/.git/objects
find ?? -type f -print | while read f
do
	mv $f ../../../.git/objects/$f
done

# update HEAD
cd ../..
mv HEAD ../.git

cd ..
rm -rf .gittmp
------ script ends here -----

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-17  3:16     ` tony.luck
@ 2005-04-18 18:41       ` tony.luck
  2005-04-18 18:47         ` Petr Baudis
  0 siblings, 1 reply; 14+ messages in thread
From: tony.luck @ 2005-04-18 18:41 UTC (permalink / raw)
  To: git

>Not a patch ... it is a whole file.  I called it "git-wget", but it might
>also want to be called "git-pulltop".

It's been pointed out to me that I based this script on a pre-historic version
of ls-tree from sometime last week.  Modern versions print the mode with %06o
so there is a leading 0 on the mode for a directory.  Just change

		if [ $mode = 40000 ]

to

		if [ $mode = 040000 ]

to fix it.

The script might also be useful for anyone behind a firewall that blocks
rsync transfers.

-Tony

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-18 18:41       ` tony.luck
@ 2005-04-18 18:47         ` Petr Baudis
  2005-04-18 20:48           ` tony.luck
  0 siblings, 1 reply; 14+ messages in thread
From: Petr Baudis @ 2005-04-18 18:47 UTC (permalink / raw)
  To: tony.luck; +Cc: git

Dear diary, on Mon, Apr 18, 2005 at 08:41:42PM CEST, I got a letter
where tony.luck@intel.com told me that...
> >Not a patch ... it is a whole file.  I called it "git-wget", but it might
> >also want to be called "git-pulltop".
> 
> It's been pointed out to me that I based this script on a pre-historic version
> of ls-tree from sometime last week.  Modern versions print the mode with %06o
> so there is a leading 0 on the mode for a directory.  Just change
> 
> 		if [ $mode = 40000 ]
> 
> to
> 
> 		if [ $mode = 040000 ]
> 
> to fix it.

...and this is precisely why ls-tree actually outputs those "blob" and
"tree" tags. ;-)

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Get commits from remote repositories by HTTP
  2005-04-18 18:47         ` Petr Baudis
@ 2005-04-18 20:48           ` tony.luck
  0 siblings, 0 replies; 14+ messages in thread
From: tony.luck @ 2005-04-18 20:48 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git

> ...and this is precisely why ls-tree actually outputs those "blob" and
> "tree" tags. ;-)

Doh!

Here's a fresh copy with "if [ $tag = tree ]".  I just used it to pull
from Linus into an "empty" directory (just ran init-db to make the .git
.git/objects and .git/objects/xx directories).

-Tony


#!/bin/bash

# Copyright (C) 2005 Tony Luck

REMOTE=http://www.kernel.org/pub/linux/kernel/people/torvalds/linux-2.6.git/

rm -rf .gittmp
# set up a temp git repository so that we can use cat-file and ls-tree on the
# objects we pull without installing them into our tree. This allows us to
# restart if the download is interrupted
mkdir .gittmp
cd .gittmp
init-db

wget -q $REMOTE/HEAD

if cmp -s ../.git/HEAD HEAD
then
	echo Already have HEAD = `cat ../.git/HEAD`
	cd ..
	rm -rf .gittmp
	exit 0
fi

sha1=`cat HEAD`
sha1file=${sha1:0:2}/${sha1:2}

if [ -f ../.git/objects/$sha1file ]
then
	echo Already have most recent commit. Update HEAD to $sha1
	cd ..
	rm -rf .gittmp
	exit 0
fi

wget -q $REMOTE/objects/$sha1file -O .git/objects/$sha1file

treesha1=`cat-file commit $sha1 | (read tag tree ; echo $tree)`

get_tree()
{
	treesha1file=${1:0:2}/${1:2}
	if [ -f ../.git/objects/$treesha1file ]
	then
		return
	fi
	wget -q $REMOTE/objects/$treesha1file -O .git/objects/$treesha1file
	ls-tree $1 | while read mode tag sha1 name
	do
		subsha1file=${sha1:0:2}/${sha1:2}
		if [  -f ../.git/objects/$subsha1file ]
		then
			continue
		fi
		if [ $tag = tree ]
		then
			get_tree $sha1 `expr $2 + 1`
		else
			echo objects/$subsha1file >> needbloblist
		fi
	done
}

# get all the tree objects to our .gittmp area, and create list of needed blobs
get_tree $treesha1

# now get the blobs
cd ../.git
if [ -s ../.gittmp/needbloblist ]
then
	wget -q -r -nH  --cut-dirs=6 --base=$REMOTE -i ../.gittmp/needbloblist
fi

# Now we have the blobs, move the trees and commit from .gitttmp
cd ../.gittmp/.git/objects
find ?? -type f -print | while read f
do
	mv $f ../../../.git/objects/$f
done

# update HEAD
cd ../..
mv HEAD ../.git

cd ..
rm -rf .gittmp

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2005-04-18 20:45 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-16 22:03 [PATCH] Get commits from remote repositories by HTTP Daniel Barkalow
2005-04-16 22:17 ` Martin Mares
2005-04-16 22:43   ` Daniel Barkalow
2005-04-16 22:24 ` Tony Luck
2005-04-16 22:33   ` Daniel Barkalow
2005-04-16 22:42   ` Adam Kropelin
2005-04-16 22:45     ` Daniel Barkalow
2005-04-16 22:52       ` Adam Kropelin
2005-04-17  3:16     ` tony.luck
2005-04-18 18:41       ` tony.luck
2005-04-18 18:47         ` Petr Baudis
2005-04-18 20:48           ` tony.luck
2005-04-16 22:32 ` Jan-Benedict Glaw
2005-04-16 22:37   ` Daniel Barkalow

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).