git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] HTTP partial transfer support for object, pack, and index transfers
@ 2005-09-28 17:14 Nick Hengeveld
  2005-09-30  7:47 ` Junio C Hamano
  0 siblings, 1 reply; 4+ messages in thread
From: Nick Hengeveld @ 2005-09-28 17:14 UTC (permalink / raw)
  To: git

HTTP partial transfer support for object, pack, and index transfers

Signed-off-by: Nick Hengeveld <nickh@reactrix.com>


---

 http-fetch.c |  185 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 161 insertions(+), 24 deletions(-)

492f6dfd5f0e7030fe96b3646a07a9adb1a7a2d0
diff --git a/http-fetch.c b/http-fetch.c
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -13,8 +13,12 @@
 #define curl_global_init(a) do { /* nothing */ } while(0)
 #endif
 
+#define PREV_BUF_SIZE 4096
+#define RANGE_HEADER_SIZE 30
+
 static CURL *curl;
 static struct curl_slist *no_pragma_header;
+static struct curl_slist *no_range_header;
 
 static char *initial_base;
 
@@ -82,12 +86,37 @@ void prefetch(unsigned char *sha1)
 {
 }
 
+int relink_or_rename(char *old, char *new) {
+	int ret;
+
+	ret = link(old, new);
+	if (ret < 0) {
+		/* Same Coda hack as in write_sha1_file(sha1_file.c) */
+		ret = errno;
+		if (ret == EXDEV && !rename(old, new))
+			return 0;
+	}
+	unlink(old);
+	if (ret) {
+		if (ret != EEXIST)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int got_alternates = 0;
 
 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 {
 	char *filename;
 	char *url;
+	char tmpfile[PATH_MAX];
+	int ret;
+	long prev_posn = 0;
+	char range[RANGE_HEADER_SIZE];
+	struct curl_slist *range_header = NULL;
+	CURLcode curl_result;
 
 	FILE *indexfile;
 
@@ -103,7 +132,8 @@ static int fetch_index(struct alt_base *
 		repo->base, sha1_to_hex(sha1));
 	
 	filename = sha1_pack_index_name(sha1);
-	indexfile = fopen(filename, "w");
+	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+	indexfile = fopen(tmpfile, "a");
 	if (!indexfile)
 		return error("Unable to open local file %s for pack index",
 			     filename);
@@ -113,12 +143,35 @@ static int fetch_index(struct alt_base *
 	curl_easy_setopt(curl, CURLOPT_URL, url);
 	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
 	
-	if (curl_easy_perform(curl)) {
+	/* If there is data present from a previous transfer attempt,
+	   resume where it left off */
+	prev_posn = ftell(indexfile);
+	if (prev_posn>0) {
+		if (get_verbosely)
+			fprintf(stderr,
+				"Resuming fetch of index for pack %s at byte %ld\n",
+				sha1_to_hex(sha1), prev_posn);
+		sprintf(range, "Range: bytes=%ld-", prev_posn);
+		range_header = curl_slist_append(range_header, range);
+		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+	}
+
+	/* Clear out the Range: header after performing the request, so
+	   other curl requests don't inherit inappropriate header data */
+	curl_result = curl_easy_perform(curl);
+	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+	if (curl_result != 0) {
 		fclose(indexfile);
 		return error("Unable to get pack index %s", url);
 	}
 
 	fclose(indexfile);
+
+	ret = relink_or_rename(tmpfile, filename);
+	if (ret)
+		return error("unable to write index filename %s: %s",
+			     filename, strerror(ret));
+
 	return 0;
 }
 
@@ -299,6 +352,12 @@ static int fetch_pack(struct alt_base *r
 	struct packed_git **lst;
 	FILE *packfile;
 	char *filename;
+	char tmpfile[PATH_MAX];
+	int ret;
+	long prev_posn = 0;
+	char range[RANGE_HEADER_SIZE];
+	struct curl_slist *range_header = NULL;
+	CURLcode curl_result;
 
 	if (fetch_indices(repo))
 		return -1;
@@ -318,7 +377,8 @@ static int fetch_pack(struct alt_base *r
 		repo->base, sha1_to_hex(target->sha1));
 
 	filename = sha1_pack_name(target->sha1);
-	packfile = fopen(filename, "w");
+	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+	packfile = fopen(tmpfile, "a");
 	if (!packfile)
 		return error("Unable to open local file %s for pack",
 			     filename);
@@ -327,14 +387,36 @@ static int fetch_pack(struct alt_base *r
 	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite);
 	curl_easy_setopt(curl, CURLOPT_URL, url);
 	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
-	
-	if (curl_easy_perform(curl)) {
+
+	/* If there is data present from a previous transfer attempt,
+	   resume where it left off */
+	prev_posn = ftell(packfile);
+	if (prev_posn>0) {
+		if (get_verbosely)
+			fprintf(stderr,
+				"Resuming fetch of pack %s at byte %ld\n",
+				sha1_to_hex(target->sha1), prev_posn);
+		sprintf(range, "Range: bytes=%ld-", prev_posn);
+		range_header = curl_slist_append(range_header, range);
+		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+	}
+
+	/* Clear out the Range: header after performing the request, so
+	   other curl requests don't inherit inappropriate header data */
+	curl_result = curl_easy_perform(curl);
+	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+	if (curl_result != 0) {
 		fclose(packfile);
 		return error("Unable to get pack file %s", url);
 	}
 
 	fclose(packfile);
 
+	ret = relink_or_rename(tmpfile, filename);
+	if (ret)
+		return error("unable to write pack filename %s: %s",
+			     filename, strerror(ret));
+
 	lst = &repo->packs;
 	while (*lst != target)
 		lst = &((*lst)->next);
@@ -351,14 +433,29 @@ int fetch_object(struct alt_base *repo, 
 	char *filename = sha1_file_name(sha1);
 	unsigned char real_sha1[20];
 	char tmpfile[PATH_MAX];
+	char prevfile[PATH_MAX];
 	int ret;
 	char *url;
 	char *posn;
+	int prevlocal;
+	unsigned char prev_buf[PREV_BUF_SIZE];
+	ssize_t prev_read = 0;
+	long prev_posn = 0;
+	char range[RANGE_HEADER_SIZE];
+	struct curl_slist *range_header = NULL;
+	CURLcode curl_result;
+
+	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+	snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+	unlink(prevfile);
+	rename(tmpfile, prevfile);
+	unlink(tmpfile);
+
+	local = open(tmpfile, O_WRONLY | O_CREAT | O_EXCL, 0666);
 
-	snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX",
-		 get_object_directory());
+	/* Note: if another instance starts now, it will turn our new
+	   tmpfile into its prevfile. */
 
-	local = mkstemp(tmpfile);
 	if (local < 0)
 		return error("Couldn't create temporary file %s for %s: %s\n",
 			     tmpfile, filename, strerror(errno));
@@ -386,8 +483,56 @@ int fetch_object(struct alt_base *repo, 
 
 	curl_easy_setopt(curl, CURLOPT_URL, url);
 
-	if (curl_easy_perform(curl)) {
-		unlink(filename);
+	/* If a previous temp file is present, process what was already
+	   fetched. */
+	prevlocal = open(prevfile, O_RDONLY);
+	if (prevlocal != -1) {
+		do {
+			prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
+			if (prev_read>0) {
+				if (fwrite_sha1_file(prev_buf,
+						     1,
+						     prev_read,
+						     NULL) == prev_read) {
+					prev_posn += prev_read;
+				} else {
+					prev_read = -1;
+				}
+			}
+		} while (prev_read > 0);
+		close(prevlocal);
+	}
+	unlink(prevfile);
+
+	/* Reset inflate/SHA1 if there was an error reading the previous temp
+	   file; also rewind to the beginning of the local file. */
+	if (prev_read == -1) {
+		memset(&stream, 0, sizeof(stream));
+		inflateInit(&stream);
+		SHA1_Init(&c);
+		if (prev_posn>0) {
+			prev_posn = 0;
+			lseek(local, SEEK_SET, 0);
+		}
+	}
+
+	/* If we have successfully processed data from a previous fetch
+	   attempt, only fetch the data we don't already have. */
+	if (prev_posn>0) {
+		if (get_verbosely)
+			fprintf(stderr,
+				"Resuming fetch of object %s at byte %ld\n",
+				hex, prev_posn);
+		sprintf(range, "Range: bytes=%ld-", prev_posn);
+		range_header = curl_slist_append(range_header, range);
+		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+	}
+
+	/* Clear out the Range: header after performing the request, so
+	   other curl requests don't inherit inappropriate header data */
+	curl_result = curl_easy_perform(curl);
+	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+	if (curl_result != 0) {
 		return -1;
 	}
 
@@ -403,20 +548,11 @@ int fetch_object(struct alt_base *repo, 
 		unlink(tmpfile);
 		return error("File %s has bad hash\n", hex);
 	}
-	ret = link(tmpfile, filename);
-	if (ret < 0) {
-		/* Same Coda hack as in write_sha1_file(sha1_file.c) */
-		ret = errno;
-		if (ret == EXDEV && !rename(tmpfile, filename))
-			goto out;
-	}
-	unlink(tmpfile);
-	if (ret) {
-		if (ret != EEXIST)
-			return error("unable to write sha1 filename %s: %s",
-				     filename, strerror(ret));
-	}
- out:
+	ret = relink_or_rename(tmpfile, filename);
+	if (ret)
+		return error("unable to write sha1 filename %s: %s",
+			     filename, strerror(ret));
+
 	pull_say("got %s\n", hex);
 	return 0;
 }
@@ -505,6 +641,7 @@ int main(int argc, char **argv)
 
 	curl = curl_easy_init();
 	no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
+	no_range_header = curl_slist_append(no_range_header, "Range:");
 
 	curl_ssl_verify = getenv("GIT_SSL_NO_VERIFY") ? 0 : 1;
 	curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, curl_ssl_verify);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] HTTP partial transfer support for object, pack, and index transfers
  2005-09-28 17:14 [PATCH] HTTP partial transfer support for object, pack, and index transfers Nick Hengeveld
@ 2005-09-30  7:47 ` Junio C Hamano
  2005-09-30 23:27   ` Nick Hengeveld
  0 siblings, 1 reply; 4+ messages in thread
From: Junio C Hamano @ 2005-09-30  7:47 UTC (permalink / raw)
  To: Nick Hengeveld; +Cc: git

I took a look at this patch.  It did not cleanly apply anymore,
but I merged it anyway, and then took the liberty of updating it
further, according to your response to my earlier comments.

They will appear near the tip of the proposed updates branch.
I'd appreciate it if you could check it out and see I did not
break things by mistake.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] HTTP partial transfer support for object, pack, and index transfers
  2005-09-30  7:47 ` Junio C Hamano
@ 2005-09-30 23:27   ` Nick Hengeveld
  2005-10-01  7:17     ` Junio C Hamano
  0 siblings, 1 reply; 4+ messages in thread
From: Nick Hengeveld @ 2005-09-30 23:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Fri, Sep 30, 2005 at 12:47:30AM -0700, Junio C Hamano wrote:

> I took a look at this patch.  It did not cleanly apply anymore,
> but I merged it anyway, and then took the liberty of updating it
> further, according to your response to my earlier comments.

Sorry about that - we're actually using git as a back end to a content
distribution system, so I'm still learning the finer points of using it
for source control...

> They will appear near the tip of the proposed updates branch.
> I'd appreciate it if you could check it out and see I did not
> break things by mistake.

I've included a patch that fixes one small problem; I've tested partial
transfers of packs and objects with this applied and it looks good.




Don't unlink the temp file when an object transfer fails, so next attempt
will pick up where the failed transfer left off

Signed-off-by: Nick Hengeveld <nickh@reactrix.com>


---

 http-fetch.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

22226e509b916958a8b7aae76945c08d15ec686a
diff --git a/http-fetch.c b/http-fetch.c
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -550,7 +550,6 @@ static int fetch_object(struct alt_base 
 	curl_result = curl_easy_perform(curl);
 	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
 	if (curl_result != 0) {
-		unlink(tmpfile);
 		return error("%s", curl_errorstr);
 	}
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] HTTP partial transfer support for object, pack, and index transfers
  2005-09-30 23:27   ` Nick Hengeveld
@ 2005-10-01  7:17     ` Junio C Hamano
  0 siblings, 0 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-10-01  7:17 UTC (permalink / raw)
  To: Nick Hengeveld; +Cc: git

Thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-10-01  7:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-28 17:14 [PATCH] HTTP partial transfer support for object, pack, and index transfers Nick Hengeveld
2005-09-30  7:47 ` Junio C Hamano
2005-09-30 23:27   ` Nick Hengeveld
2005-10-01  7:17     ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).