Git development
 help / color / mirror / Atom feed
* [WIP Patch 11/12] Use the new http API in http-push.c:fetch_index()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-11-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-push.c |   76 ++++++----------------------------------------------------
 1 files changed, 8 insertions(+), 68 deletions(-)

diff --git a/http-push.c b/http-push.c
index b32def4..0812f58 100644
--- a/http-push.c
+++ b/http-push.c
@@ -898,32 +898,15 @@ static int fetch_index(unsigned char *sha1)
 	char *hex = sha1_to_hex(sha1);
 	char *filename;
 	char *url;
-	char tmpfile[PATH_MAX];
-	long prev_posn = 0;
-	char range[RANGE_HEADER_SIZE];
-	struct curl_slist *range_header = NULL;
-
-	FILE *indexfile;
-	struct active_request_slot *slot;
-	struct slot_results results;
+	int ret = 0;
 
 	/* Don't use the index if the pack isn't there */
 	url = xmalloc(strlen(remote->url) + 64);
 	sprintf(url, "%sobjects/pack/pack-%s.pack", remote->url, hex);
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			free(url);
-			return error("Unable to verify pack %s is available",
-				     hex);
-		}
-	} else {
+	if (http_get_strbuf(url, NULL, 0)) {
 		free(url);
-		return error("Unable to start request");
+		return error("Unable to verify pack %s is available",
+			     hex);
 	}
 
 	if (has_pack_index(sha1)) {
@@ -937,55 +920,12 @@ static int fetch_index(unsigned char *sha1)
 	sprintf(url, "%sobjects/pack/pack-%s.idx", remote->url, hex);
 
 	filename = sha1_pack_index_name(sha1);
-	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
-	indexfile = fopen(tmpfile, "a");
-	if (!indexfile) {
-		free(url);
-		return error("Unable to open local file %s for pack index",
-			     tmpfile);
-	}
-
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
-	slot->local = indexfile;
-
-	/* If there is data present from a previous transfer attempt,
-	   resume where it left off */
-	prev_posn = ftell(indexfile);
-	if (prev_posn>0) {
-		if (push_verbosely)
-			fprintf(stderr,
-				"Resuming fetch of index for pack %s at byte %ld\n",
-				hex, prev_posn);
-		sprintf(range, "Range: bytes=%ld-", prev_posn);
-		range_header = curl_slist_append(range_header, range);
-		curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
-	}
-
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			free(url);
-			fclose(indexfile);
-			return error("Unable to get pack index %s\n%s", url,
-				     curl_errorstr);
-		}
-	} else {
-		free(url);
-		fclose(indexfile);
-		return error("Unable to start request");
-	}
 
+	if (http_get_file(url, filename, 0) != HTTP_OK)
+		ret = error("Unable to get pack index %s\n", url);
+	free(filename);
 	free(url);
-	fclose(indexfile);
-
-	return move_temp_to_file(tmpfile, filename);
+	return ret;
 }
 
 static int setup_index(unsigned char *sha1)
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 10/12] Use the new http API in http-walker.c:fetch_index()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-10-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-walker.c |   55 +++++--------------------------------------------------
 1 files changed, 5 insertions(+), 50 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index edcb779..bc1db2b 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -368,15 +368,7 @@ static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned ch
 	char *hex = sha1_to_hex(sha1);
 	char *filename;
 	char *url;
-	char tmpfile[PATH_MAX];
-	long prev_posn = 0;
-	char range[RANGE_HEADER_SIZE];
-	struct curl_slist *range_header = NULL;
-	struct walker_data *data = walker->data;
-
-	FILE *indexfile;
-	struct active_request_slot *slot;
-	struct slot_results results;
+	int ret = 0;
 
 	if (has_pack_index(sha1))
 		return 0;
@@ -388,48 +380,11 @@ static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned ch
 	sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 
 	filename = sha1_pack_index_name(sha1);
-	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
-	indexfile = fopen(tmpfile, "a");
-	if (!indexfile)
-		return error("Unable to open local file %s for pack index",
-			     tmpfile);
-
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, data->no_pragma_header);
-	slot->local = indexfile;
-
-	/* If there is data present from a previous transfer attempt,
-	   resume where it left off */
-	prev_posn = ftell(indexfile);
-	if (prev_posn>0) {
-		if (walker->get_verbosely)
-			fprintf(stderr,
-				"Resuming fetch of index for pack %s at byte %ld\n",
-				hex, prev_posn);
-		sprintf(range, "Range: bytes=%ld-", prev_posn);
-		range_header = curl_slist_append(range_header, range);
-		curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
-	}
-
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			fclose(indexfile);
-			return error("Unable to get pack index %s\n%s", url,
-				     curl_errorstr);
-		}
-	} else {
-		fclose(indexfile);
-		return error("Unable to start request");
-	}
-
-	fclose(indexfile);
 
-	return move_temp_to_file(tmpfile, filename);
+	if (http_get_file(url, filename, 0) != HTTP_OK)
+		ret = error("Unable to get pack index %s\n", url);
+	free(url);
+	return ret;
 }
 
 static int setup_index(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 08/12] Use the new http API in update_remote_info_refs()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-8-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-push.c |   29 ++++++++++-------------------
 1 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/http-push.c b/http-push.c
index e0b4f5a..7627860 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1960,29 +1960,20 @@ static void update_remote_info_refs(struct remote_lock *lock)
 static int remote_exists(const char *path)
 {
 	char *url = xmalloc(strlen(remote->url) + strlen(path) + 1);
-	struct active_request_slot *slot;
-	struct slot_results results;
-	int ret = -1;
+	int ret;
 
 	sprintf(url, "%s%s", remote->url, path);
 
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
-
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.http_code == 404)
-			ret = 0;
-		else if (results.curl_result == CURLE_OK)
-			ret = 1;
-		else
-			fprintf(stderr, "HEAD HTTP error %ld\n", results.http_code);
-	} else {
-		fprintf(stderr, "Unable to start HEAD request\n");
+	switch (http_get_strbuf(url, NULL, 0)) {
+	case HTTP_OK:
+		ret = 1;
+		break;
+	case HTTP_MISSING_TARGET:
+		ret = 0;
+		break;
+	default:
+		ret = -1;
 	}
-
 	free(url);
 	return ret;
 }
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 09/12] Use the new http API in fetch_symref()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-9-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-push.c |   19 ++-----------------
 1 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/http-push.c b/http-push.c
index 7627860..b32def4 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1982,27 +1982,12 @@ static void fetch_symref(const char *path, char **symref, unsigned char *sha1)
 {
 	char *url;
 	struct strbuf buffer = STRBUF_INIT;
-	struct active_request_slot *slot;
-	struct slot_results results;
 
 	url = xmalloc(strlen(remote->url) + strlen(path) + 1);
 	sprintf(url, "%s%s", remote->url, path);
 
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			die("Couldn't get %s for remote symref\n%s",
-			    url, curl_errorstr);
-		}
-	} else {
-		die("Unable to start remote symref request");
-	}
+	if (http_get_strbuf(url, &buffer, 0) != HTTP_OK)
+		die("Couldn't get %s for remote symref\n", url);
 	free(url);
 
 	free(*symref);
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 07/12] Use the new http API in http-push.c:fetch_indices()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-7-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-push.c |   32 +++++++-------------------------
 1 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/http-push.c b/http-push.c
index e69179b..e0b4f5a 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1008,9 +1008,7 @@ static int fetch_indices(void)
 	struct strbuf buffer = STRBUF_INIT;
 	char *data;
 	int i = 0;
-
-	struct active_request_slot *slot;
-	struct slot_results results;
+	int ret = 0;
 
 	if (push_verbosely)
 		fprintf(stderr, "Getting pack list\n");
@@ -1018,28 +1016,10 @@ static int fetch_indices(void)
 	url = xmalloc(strlen(remote->url) + 20);
 	sprintf(url, "%sobjects/info/packs", remote->url);
 
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			strbuf_release(&buffer);
-			free(url);
-			if (results.http_code == 404)
-				return 0;
-			else
-				return error("%s", curl_errorstr);
-		}
-	} else {
-		strbuf_release(&buffer);
-		free(url);
-		return error("Unable to start request");
+	if (http_get_strbuf(url, &buffer, 0) != HTTP_OK) {
+		ret = -1;
+		goto cleanup;
 	}
-	free(url);
 
 	data = buffer.buf;
 	while (i < buffer.len) {
@@ -1061,8 +1041,10 @@ static int fetch_indices(void)
 		i++;
 	}
 
+cleanup:
 	strbuf_release(&buffer);
-	return 0;
+	free(url);
+	return ret;
 }
 
 static void one_remote_object(const char *hex)
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 05/12] Use the new http API in get_refs_via_curl()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-5-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 transport.c |   27 ++++-----------------------
 1 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/transport.c b/transport.c
index 6919ff1..55bf274 100644
--- a/transport.c
+++ b/transport.c
@@ -432,9 +432,6 @@ static struct ref *get_refs_via_curl(struct transport *transport)
 	char *refs_url;
 	int i = 0;
 
-	struct active_request_slot *slot;
-	struct slot_results results;
-
 	struct ref *refs = NULL;
 	struct ref *ref = NULL;
 	struct ref *last_ref = NULL;
@@ -450,26 +447,8 @@ static struct ref *get_refs_via_curl(struct transport *transport)
 	refs_url = xmalloc(strlen(transport->url) + 11);
 	sprintf(refs_url, "%s/info/refs", transport->url);
 
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, refs_url);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
-
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			strbuf_release(&buffer);
-			if (missing_target(&results))
-				die("%s not found: did you run git update-server-info on the server?", refs_url);
-			else
-				die("%s download error - %s", refs_url, curl_errorstr);
-		}
-	} else {
-		strbuf_release(&buffer);
-		die("Unable to start HTTP request");
-	}
+	if (http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE) != HTTP_OK)
+		goto cleanup;
 
 	data = buffer.buf;
 	start = NULL;
@@ -508,6 +487,8 @@ static struct ref *get_refs_via_curl(struct transport *transport)
 		free(ref);
 	}
 
+cleanup:
+	strbuf_release(&buffer);
 	http_cleanup();
 	free(refs_url);
 	return refs;
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 06/12] Use the new http API in http-walker.c:fetch_indices()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-6-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-walker.c |   31 ++++++++-----------------------
 1 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index 0139d1e..edcb779 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -648,9 +648,6 @@ static int fetch_indices(struct walker *walker, struct alt_base *repo)
 	int i = 0;
 	int ret = 0;
 
-	struct active_request_slot *slot;
-	struct slot_results results;
-
 	if (repo->got_indices)
 		return 0;
 
@@ -660,27 +657,15 @@ static int fetch_indices(struct walker *walker, struct alt_base *repo)
 	url = xmalloc(strlen(repo->base) + 21);
 	sprintf(url, "%s/objects/info/packs", repo->base);
 
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result != CURLE_OK) {
-			if (missing_target(&results)) {
-				repo->got_indices = 1;
-				goto cleanup;
-			} else {
-				repo->got_indices = 0;
-				ret = error("%s", curl_errorstr);
-				goto cleanup;
-			}
-		}
-	} else {
+	switch (http_get_strbuf(url, &buffer, HTTP_NO_CACHE)) {
+	case HTTP_OK:
+		break;
+	case HTTP_MISSING_TARGET:
+		repo->got_indices = 1;
+		goto cleanup;
+	default:
 		repo->got_indices = 0;
-		ret = error("Unable to start request");
+		ret = -1;
 		goto cleanup;
 	}
 
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 03/12] Two new functions for the http API
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-3-git-send-email-mh@glandium.org>

http_get_strbuf and http_get_file allow respectively to retrieve contents of
an URL to a strbuf or an opened file handle.

Both these functions are the beginning of the http code refactoring.

Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http.c |   85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 http.h |   17 +++++++++++++
 2 files changed, 102 insertions(+), 0 deletions(-)

diff --git a/http.c b/http.c
index ee58799..82534cf 100644
--- a/http.c
+++ b/http.c
@@ -638,3 +638,88 @@ int http_fetch_ref(const char *base, struct ref *ref)
 	free(url);
 	return ret;
 }
+
+/* http_request() targets */
+#define HTTP_REQUEST_STRBUF	0
+#define HTTP_REQUEST_FILE	1
+
+static int http_request(const char *url, void *result, int target, int options)
+{
+	struct active_request_slot *slot;
+	struct slot_results results;
+	struct curl_slist *headers = NULL;
+	struct strbuf buf = STRBUF_INIT;
+
+	slot = get_active_slot();
+	slot->results = &results;
+	curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
+
+	if (result == NULL) {
+		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
+	} else {
+		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
+		curl_easy_setopt(slot->curl, CURLOPT_FILE, result);
+
+		if (target == HTTP_REQUEST_FILE) {
+			long posn = ftell(result);
+			curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION,
+					 fwrite);
+			if (posn > 0) {
+				strbuf_addf(&buf, "Range: bytes=%ld-", posn);
+				headers = curl_slist_append(headers, buf.buf);
+				strbuf_reset(&buf);
+			}
+			slot->local = result;
+		} else
+			curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION,
+					 fwrite_buffer);
+	}
+
+	strbuf_addstr(&buf, "Pragma:");
+	if (options & HTTP_NO_CACHE)
+		strbuf_addstr(&buf, " no-cache");
+
+	headers = curl_slist_append(headers, buf.buf);
+	strbuf_release(&buf);
+
+	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
+	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
+
+	if (start_active_slot(slot)) {
+		run_active_slot(slot);
+		if (results.curl_result == CURLE_OK)
+			return HTTP_OK;
+		else if (missing_target(&results))
+			return HTTP_MISSING_TARGET;
+	}
+	return HTTP_ERROR;
+}
+
+int http_get_strbuf(const char *url, struct strbuf *result, int options)
+{
+	return http_request(url, result, HTTP_REQUEST_STRBUF, options);
+}
+
+int http_get_file(const char *url, const char *filename, int options)
+{
+	int ret;
+	struct strbuf tmpfile = STRBUF_INIT;
+	FILE *result;
+
+	strbuf_addf(&tmpfile, "%s.temp", filename);
+	result = fopen(tmpfile.buf, "a");
+	if (! result) {
+		error("Unable to open local file %s", tmpfile.buf);
+		ret = HTTP_ERROR;
+		goto cleanup;
+	}
+
+	ret = http_request(url, result, HTTP_REQUEST_FILE, options);
+	fclose(result);
+
+	if ((ret == HTTP_OK) && move_temp_to_file(tmpfile.buf, filename))
+		ret = HTTP_ERROR;
+cleanup:
+	strbuf_release(&tmpfile);
+	return ret;
+}
diff --git a/http.h b/http.h
index 905b462..323c780 100644
--- a/http.h
+++ b/http.h
@@ -104,4 +104,21 @@ static inline int missing__target(int code, int result)
 
 extern int http_fetch_ref(const char *base, struct ref *ref);
 
+/* Options for http_request_*() */
+#define HTTP_NO_CACHE		1
+
+/* Return values for http_request_*() */
+#define HTTP_OK			0
+#define HTTP_MISSING_TARGET	1
+#define HTTP_ERROR		2
+
+/* Requests an url and stores the result in a strbuf.
+ * If the result pointer is NULL, a HTTP HEAD request is made instead of GET. */
+int http_get_strbuf(const char *url, struct strbuf *result, int options);
+
+/* Downloads an url and stores the result in the given file.
+ * If a previous interrupted download is detected (i.e. a previous temporary
+ * file is still around) the download is resumed. */
+int http_get_file(const char *url, const char *filename, int options);
+
 #endif /* HTTP_H */
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 01/12] Don't expect verify_pack() callers to set pack_size
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-1-git-send-email-mh@glandium.org>

Since use_pack() will end up populating pack_size if it is not already set,
we can just adapt the code in verify_packfile() such that it doesn't require
pack_size to be set beforehand.

This allows callers not to have to set pack_size themselves, and we can thus
revert changes from 1c23d794 (Don't die in git-http-fetch when fetching packs).

Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http-push.c   |    3 ---
 http-walker.c |    1 -
 pack-check.c  |    8 +++++---
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/http-push.c b/http-push.c
index a4b7d08..e69179b 100644
--- a/http-push.c
+++ b/http-push.c
@@ -771,14 +771,11 @@ static void finish_request(struct transfer_request *request)
 				request->url, curl_errorstr);
 			remote->can_update_info_refs = 0;
 		} else {
-			off_t pack_size = ftell(request->local_stream);
-
 			fclose(request->local_stream);
 			request->local_stream = NULL;
 			if (!move_temp_to_file(request->tmpfile,
 					       request->filename)) {
 				target = (struct packed_git *)request->userData;
-				target->pack_size = pack_size;
 				lst = &remote->packs;
 				while (*lst != target)
 					lst = &((*lst)->next);
diff --git a/http-walker.c b/http-walker.c
index 7271c7d..0139d1e 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -785,7 +785,6 @@ static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned cha
 		return error("Unable to start request");
 	}
 
-	target->pack_size = ftell(packfile);
 	fclose(packfile);
 
 	ret = move_temp_to_file(tmpfile, filename);
diff --git a/pack-check.c b/pack-check.c
index 90c33b1..166ca70 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -49,7 +49,7 @@ static int verify_packfile(struct packed_git *p,
 	const unsigned char *index_base = p->index_data;
 	git_SHA_CTX ctx;
 	unsigned char sha1[20], *pack_sig;
-	off_t offset = 0, pack_sig_ofs = p->pack_size - 20;
+	off_t offset = 0, pack_sig_ofs = 0;
 	uint32_t nr_objects, i;
 	int err = 0;
 	struct idx_entry *entries;
@@ -61,14 +61,16 @@ static int verify_packfile(struct packed_git *p,
 	 */
 
 	git_SHA1_Init(&ctx);
-	while (offset < pack_sig_ofs) {
+	do {
 		unsigned int remaining;
 		unsigned char *in = use_pack(p, w_curs, offset, &remaining);
 		offset += remaining;
+		if (!pack_sig_ofs)
+			pack_sig_ofs = p->pack_size - 20;
 		if (offset > pack_sig_ofs)
 			remaining -= (unsigned int)(offset - pack_sig_ofs);
 		git_SHA1_Update(&ctx, in, remaining);
-	}
+	} while (offset < pack_sig_ofs);
 	git_SHA1_Final(sha1, &ctx);
 	pack_sig = use_pack(p, w_curs, pack_sig_ofs, NULL);
 	if (hashcmp(sha1, pack_sig))
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 00/12] Refactoring the http API
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <20090118074911.GB30228@glandium.org>

> And I realize I have had a partial improvement on that sitting on my
> harddrive, without me having time (nor motivation) to go further.
>
> Maybe it's time I let it go and post the work in progress for someone
> else to take over.

Here it is. 

Note I already sent the first patch a year ago:
 http://markmail.org/message/s4w4pmla4tzjkpsf

This set of patches only deals with HTTP GET requests, so all of http-push
is not taken care of.

As it is work in progress, some error handling might have regressions, but
the original error handling is not necessarily much better.

Also note I only rebased my one-year-old work on current master, and haven't
actually tested it, though, as the code hasn't changed much, I guess it
should be fine.

I hope someone will have an itch to scratch to improve the whole thing.

Mike Hommey (12):
  Don't expect verify_pack() callers to set pack_size
  Some cleanup in get_refs_via_curl()
  Two new functions for the http API
  Use the new http API in http_fetch_ref()
  Use the new http API in get_refs_via_curl()
  Use the new http API in http-walker.c:fetch_indices()
  Use the new http API in http-push.c:fetch_indices()
  Use the new http API in update_remote_info_refs()
  Use the new http API in fetch_symref()
  Use the new http API in http-walker.c:fetch_index()
  Use the new http API in http-push.c:fetch_index()
  Use the new http API in http-walker.c:fetch_pack()

 http-push.c   |  159 ++++++++++-----------------------------------------------
 http-walker.c |  141 ++++++--------------------------------------------
 http.c        |  112 ++++++++++++++++++++++++++++++++--------
 http.h        |   17 ++++++
 pack-check.c  |    8 ++-
 transport.c   |   29 ++--------
 6 files changed, 162 insertions(+), 304 deletions(-)

^ permalink raw reply

* [WIP Patch 04/12] Use the new http API in http_fetch_ref()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-4-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 http.c |   33 ++++++++-------------------------
 1 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/http.c b/http.c
index 82534cf..0c9504b 100644
--- a/http.c
+++ b/http.c
@@ -604,34 +604,17 @@ int http_fetch_ref(const char *base, struct ref *ref)
 {
 	char *url;
 	struct strbuf buffer = STRBUF_INIT;
-	struct active_request_slot *slot;
-	struct slot_results results;
-	int ret;
+	int ret = -1;
 
 	url = quote_ref_url(base, ref->name);
-	slot = get_active_slot();
-	slot->results = &results;
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
-	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
-	if (start_active_slot(slot)) {
-		run_active_slot(slot);
-		if (results.curl_result == CURLE_OK) {
-			strbuf_rtrim(&buffer);
-			if (buffer.len == 40)
-				ret = get_sha1_hex(buffer.buf, ref->old_sha1);
-			else if (!prefixcmp(buffer.buf, "ref: ")) {
-				ref->symref = xstrdup(buffer.buf + 5);
-				ret = 0;
-			} else
-				ret = 1;
-		} else {
-			ret = error("Couldn't get %s for %s\n%s",
-				    url, ref->name, curl_errorstr);
+	if (http_get_strbuf(url, &buffer, HTTP_NO_CACHE) == HTTP_OK) {
+		strbuf_rtrim(&buffer);
+		if (buffer.len == 40)
+			ret = get_sha1_hex(buffer.buf, ref->old_sha1);
+		else if (!prefixcmp(buffer.buf, "ref: ")) {
+			ref->symref = xstrdup(buffer.buf + 5);
+			ret = 0;
 		}
-	} else {
-		ret = error("Unable to start request");
 	}
 
 	strbuf_release(&buffer);
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* [WIP Patch 02/12] Some cleanup in get_refs_via_curl()
From: Mike Hommey @ 2009-01-18  8:04 UTC (permalink / raw)
  To: git, gitster; +Cc: johannes.schindelin
In-Reply-To: <1232265877-3649-2-git-send-email-mh@glandium.org>


Signed-off-by: Mike Hommey <mh@glandium.org>
---
 transport.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/transport.c b/transport.c
index 56831c5..6919ff1 100644
--- a/transport.c
+++ b/transport.c
@@ -508,6 +508,8 @@ static struct ref *get_refs_via_curl(struct transport *transport)
 		free(ref);
 	}
 
+	http_cleanup();
+	free(refs_url);
 	return refs;
 }
 
-- 
1.6.1.141.gb32a

^ permalink raw reply related

* Re: [PATCH] http-push: fix off-by-path_len
From: Mike Hommey @ 2009-01-18  7:49 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Kirill A. Korinskiy, git, gitster, Ray Chuan, Nick Hengeveld
In-Reply-To: <alpine.DEB.1.00.0901171632330.3586@pacific.mpi-cbg.de>

On Sat, Jan 17, 2009 at 04:36:26PM +0100, Johannes Schindelin wrote:
> 
> When getting the result of remote_ls(), we were advancing the variable
> "path" to the relative path inside the repository.
> 
> However, then we went on to malloc a bogus amount of memory: we were
> subtracting the prefix length _again_, quite possibly getting something
> negative, which xmalloc() interprets as really, really much.
> 
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
> 
> 	Note that the push in t5540 is still broken, as http-push does
> 	not handle packed-refs (when looking what branches are on the 
> 	remote side).
> 
> 	It should not even try to access the directory structure under
> 	refs/ to begin with, but read info/refs instead.

It would actually need to do both, because nothing guarantees info/refs
is up-to-date.

> 	However, that is just one example of the ugliness that is 
> 	http-push.c; it also seems to be a perfect example of a copy-pasting 
> 	hell; just look at the output of "git grep
> 	curl_easy_setopt http-push.c".

Likewise for http.c.

> 	There _has_ to be lot of room for improvement.

And I realize I have had a partial improvement on that sitting on my
harddrive, without me having time (nor motivation) to go further.

Maybe it's time I let it go and post the work in progress for someone
else to take over.

Mike

^ permalink raw reply

* Re: [PATCH/RFC v1 1/1] +5 cases (4 fail), diff whitespace tests
From: Junio C Hamano @ 2009-01-18  7:47 UTC (permalink / raw)
  To: Keith Cascio; +Cc: git
In-Reply-To: <alpine.GSO.2.00.0901141633030.9831@kiwi.cs.ucla.edu>

Keith Cascio <keith@CS.UCLA.EDU> writes:

>  +5 cases (4 fail), diff whitespace tests
>  There are 2^3 = eight possible combinations of the three flags:
>  -w -b --ignore-space-at-eol
>  Three of those combinations were already being tested:
>  [none]
>  -w
>  -b
>  Add tests of the other five combinations,

Hmm.  Are these three supposed to be orthogonal?

^ permalink raw reply

* Re: [PATCH/RFC v1 1/1] +5 cases (4 fail), diff whitespace tests
From: Junio C Hamano @ 2009-01-18  7:45 UTC (permalink / raw)
  To: Keith Cascio; +Cc: git
In-Reply-To: <alpine.GSO.2.00.0901141633030.9831@kiwi.cs.ucla.edu>

Thanks, applied.

^ permalink raw reply

* Re: [PATCH] interpret_nth_last_branch(): avoid traversing the reflogs twice
From: Junio C Hamano @ 2009-01-18  7:25 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Thomas Rast, git, Johannes Sixt, Johan Herland
In-Reply-To: <alpine.DEB.1.00.0901180201070.3586@pacific.mpi-cbg.de>

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Note: these are the memory requirements after some really unrealistically 
> high activity, and the memory is free()d during parameter parsing.
>
> A much more realistical expectation would be to switch branches maybe 20 
> times a day, which would amount to something like 36 kilobyte.  And again, 
> they are free()d before the action really starts.

My HEAD reflog is 7MB long with 39000 entries, and among them, 13100
entries have "checkout: moving ".

I know I will never want to switch back to the 10000th from the last
branch.  I am quite sure that I would forget which branch I was on after
switching branches three or four times (hence my original hardcoded
limitation of 10 which "should be plenty").  When I know I only have to
keep track of 10 entries, having to keep track of 13100 entries, even if
it is 36kB (it would actually be 260kB in my case) feels there is
something wrong in the design.

^ permalink raw reply

* Re: [PATCH next resend] bash completion: refactor diff options
From: Junio C Hamano @ 2009-01-18  7:18 UTC (permalink / raw)
  To: Thomas Rast; +Cc: git, Shawn O. Pearce
In-Reply-To: <1232240603-11729-1-git-send-email-trast@student.ethz.ch>

Thomas Rast <trast@student.ethz.ch> writes:

> diff, log and show all take the same diff options.  Refactor them from
> __git_diff and __git_log into a variable, and complete them in
> __git_show too.
>
> Signed-off-by: Thomas Rast <trast@student.ethz.ch>
>
> ---
>
> Any news on this?

> +__git_diff_common_options="--stat --numstat --shortstat --summary
>  			--patch-with-stat --name-only --name-status --color
>  			--no-color --color-words --no-renames --check
>  			--full-index --binary --abbrev --diff-filter=
> -			--find-copies-harder --pickaxe-all --pickaxe-regex
> +			--find-copies-harder

The changes around pickaxe made me "Huh?".  For log pickaxe makes very
good sense but for a single diff it doesn't, yet the original seems to
have had them only on "git diff" and not on "git log", which feels wrong.

Other than that, I think the patch tries to achieve a great thing in the
longer term---we do not have to worry about common parts going out of sync
between diff and log family of commands.

^ permalink raw reply

* Re: [PATCH] revision walker: include a detached HEAD in --all
From: Junio C Hamano @ 2009-01-18  6:36 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Thomas Rast, Johannes Sixt, git
In-Reply-To: <7v8wp917c3.fsf@gitster.siamese.dyndns.org>

Junio C Hamano <gitster@pobox.com> writes:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> ...
>> Note that this affects creating bundles with --all; I contend that it
>> is a good change to add the HEAD, so that cloning from such a bundle
>> will give you a current branch.  However, I had to fix t5701 as it
>> assumed that --all does not imply HEAD.
>
> Sorry, but I do not understand.
>
>> diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh
>> index 8dfaaa4..14413f8 100755
>> --- a/t/t5701-clone-local.sh
>> +++ b/t/t5701-clone-local.sh
>> @@ -11,8 +11,8 @@ test_expect_success 'preparing origin repository' '
>>  	git clone --bare . x &&
>>  	test "$(GIT_CONFIG=a.git/config git config --bool core.bare)" = true &&
>>  	test "$(GIT_CONFIG=x/config git config --bool core.bare)" = true
>> -	git bundle create b1.bundle --all HEAD &&
>> -	git bundle create b2.bundle --all &&
>> +	git bundle create b1.bundle master HEAD &&
>> +	git bundle create b2.bundle master &&
>
> Because --all did not imply HEAD, "--all HEAD" used to be the way to say
> "everything and HEAD".  Now --all does imply HEAD, but it should still be
> a valid way to say "everything, by the way, do not forget HEAD".
>
> Does the first one need to be changed to "master HEAD"?  If "--all HEAD"
> makes the rest of the test unhappy because HEAD is listed twice, perhaps
> that is an independent bug that needs to be fixed?
>
> For that matter, what does "git bundle create x HEAD HEAD" do?  Does it
> list HEAD twice?

With a patch like this, I think b1.bundle can be created with "--all HEAD"
as before.

Of course, to advertise that --all now includes HEAD and it is a _good_
thing, we may want to even say "git bundle create b1.bundle --all" in the
above test sequence.

Creation of b2.bundle should say "master" explicitly as in your patch,
because the point of that bundle is to test a use of such HEAD-less bundle
in the later parts of the script.

-- >8 --
Subject: [PATCH] bundle: allow the same ref to be given more than once

"git bundle create x master master" used to create a bundle that lists
the same branch (master) twice.  Cloning from such a bundle resulted in
a needless warning "warning: Duplicated ref: refs/remotes/origin/master".

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle.c |    2 ++
 object.c |   19 +++++++++++++++++++
 object.h |    1 +
 3 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/bundle.c b/bundle.c
index daecd8e..b20f210 100644
--- a/bundle.c
+++ b/bundle.c
@@ -240,6 +240,8 @@ int create_bundle(struct bundle_header *header, const char *path,
 		return error("unrecognized argument: %s'", argv[i]);
 	}
 
+	object_array_remove_duplicates(&revs.pending);
+
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object_array_entry *e = revs.pending.objects + i;
 		unsigned char sha1[20];
diff --git a/object.c b/object.c
index 50b6528..7e6a92c 100644
--- a/object.c
+++ b/object.c
@@ -268,3 +268,22 @@ void add_object_array_with_mode(struct object *obj, const char *name, struct obj
 	objects[nr].mode = mode;
 	array->nr = ++nr;
 }
+
+void object_array_remove_duplicates(struct object_array *array)
+{
+	int ref, src, dst;
+	struct object_array_entry *objects = array->objects;
+
+	for (ref = 0; ref < array->nr - 1; ref++) {
+		for (src = ref + 1, dst = src;
+		     src < array->nr;
+		     src++) {
+			if (!strcmp(objects[ref].name, objects[src].name))
+				continue;
+			if (src != dst)
+				objects[dst] = objects[src];
+			dst++;
+		}
+		array->nr = dst;
+	}
+}
diff --git a/object.h b/object.h
index 036bd66..3193916 100644
--- a/object.h
+++ b/object.h
@@ -71,5 +71,6 @@ int object_list_contains(struct object_list *list, struct object *obj);
 /* Object array handling .. */
 void add_object_array(struct object *obj, const char *name, struct object_array *array);
 void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode);
+void object_array_remove_duplicates(struct object_array *);
 
 #endif /* OBJECT_H */
-- 
1.6.1.208.g58df

^ permalink raw reply related

* Re: [PATCH] revision walker: include a detached HEAD in --all
From: Junio C Hamano @ 2009-01-18  6:01 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Thomas Rast, Johannes Sixt, git
In-Reply-To: <alpine.DEB.1.00.0901161351460.3586@pacific.mpi-cbg.de>

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> When HEAD is detached, --all should list it, too, logically, as a
> detached HEAD is by definition a temporary, unnamed branch.
>
> It is especially necessary to list it when garbage collecting, as
> the detached HEAD would be trashed.
>
> Noticed by Thomas Rast.
>
> Note that this affects creating bundles with --all; I contend that it
> is a good change to add the HEAD, so that cloning from such a bundle
> will give you a current branch.  However, I had to fix t5701 as it
> assumed that --all does not imply HEAD.

Sorry, but I do not understand.

> diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh
> index 8dfaaa4..14413f8 100755
> --- a/t/t5701-clone-local.sh
> +++ b/t/t5701-clone-local.sh
> @@ -11,8 +11,8 @@ test_expect_success 'preparing origin repository' '
>  	git clone --bare . x &&
>  	test "$(GIT_CONFIG=a.git/config git config --bool core.bare)" = true &&
>  	test "$(GIT_CONFIG=x/config git config --bool core.bare)" = true
> -	git bundle create b1.bundle --all HEAD &&
> -	git bundle create b2.bundle --all &&
> +	git bundle create b1.bundle master HEAD &&
> +	git bundle create b2.bundle master &&

Because --all did not imply HEAD, "--all HEAD" used to be the way to say
"everything and HEAD".  Now --all does imply HEAD, but it should still be
a valid way to say "everything, by the way, do not forget HEAD".

Does the first one need to be changed to "master HEAD"?  If "--all HEAD"
makes the rest of the test unhappy because HEAD is listed twice, perhaps
that is an independent bug that needs to be fixed?

For that matter, what does "git bundle create x HEAD HEAD" do?  Does it
list HEAD twice?

^ permalink raw reply

* Re: [PATCH/RFC] git-am: Make it easier to see which patch failed
From: Junio C Hamano @ 2009-01-18  5:42 UTC (permalink / raw)
  To: Jonas Flodén; +Cc: git, Johannes Schindelin
In-Reply-To: <636ecac0901160634r586c72a0r9bb63c6f019f5bff@mail.gmail.com>

"Jonas Flodén" <jonas@floden.nu> writes:

> When git-am fails it's not always easy to see which patch failed,
> since it's often hidden by a lot of error messages.
> Add an extra line which prints the name of the failed patch just
> before the resolve message to make it easier to find.
>
> Signed-off-by: Jonas Flodén <jonas@floden.nu>
> ---
> Johannes Schindelin wrote:
>> Maybe
>>
>> -               echo Patch failed at $msgnum.
>> +               echo Patch failed at $msgnum($FIRSTLINE).
>
> How about this instead. Though the line could get very long.
> This makes the line stand out a little more.
>
>  git-am.sh |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/git-am.sh b/git-am.sh
> index 4b157fe..09c2f9c 100755
> --- a/git-am.sh
> +++ b/git-am.sh
> @@ -501,7 +501,7 @@ do
>  	fi
>  	if test $apply_status != 0
>  	then
> -		echo Patch failed at $msgnum.
> +		printf '\nPatch failed at %s (%s)\n' "$msgnum" "$FIRSTLINE"
>  		stop_here_user_resolve $this
>  	fi

Looks sane except that I do not think you need printf nor the leading
blank line, i.e.

	echo "Patch failed at $msgnum ($FIRSTLINE)"

^ permalink raw reply

* [PATCH v2] git-svn: Add --localtime option to "fetch"
From: Pete Harlan @ 2009-01-18  4:10 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git mailing list
In-Reply-To: <4972A896.5050205@pcharlan.com>

By default git-svn stores timestamps of fetched commits in
Subversion's UTC format.  Passing --localtime to fetch will convert
them to the timezone of the server on which git-svn is run.

This makes the timestamps of a resulting "git log" agree with what
"svn log" shows for the same repository.

Signed-off-by: Pete Harlan <pgit@pcharlan.com>
---

Changes to v2 after feedback from Eric Wong:

1. "--convert-timezones" renamed to "--localtime".

2. Removed warnings about breaking interoperability with Subversion,
   because the option doesn't do that.  Instead warn about
   interoperability with other git-svn users cloning from the same
   repository if they don't all use or not use --localtime.

3. Move config variable into Git::SVN namespace.

4. Better conformance to coding guidelines.

 Documentation/git-svn.txt              |   11 ++++++
 contrib/completion/git-completion.bash |    2 +-
 git-svn.perl                           |   54 ++++++++++++++++++++++++++++++-
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt
index 8d0c421..63d2f5e 100644
--- a/Documentation/git-svn.txt
+++ b/Documentation/git-svn.txt
@@ -92,6 +92,17 @@ COMMANDS
 	.git/config file may be specified as an optional command-line
 	argument.

+--localtime;;
+	Store Git commit times in the local timezone instead of UTC.  This
+	makes 'git-log' (even without --date=local) show the same times
+	that `svn log` would in the local timezone.
+
+This doesn't interfere with interoperating with the Subversion
+repository you cloned from, but if you wish for your local Git
+repository to be able to interoperate with someone else's local Git
+repository, either don't use this option or you should both use it in
+the same local timezone.
+
 'clone'::
 	Runs 'init' and 'fetch'.  It will automatically create a
 	directory based on the basename of the URL passed to it;
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index f8b845a..c9d2c02 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1575,7 +1575,7 @@ _git_svn ()
 			--follow-parent --authors-file= --repack=
 			--no-metadata --use-svm-props --use-svnsync-props
 			--log-window-size= --no-checkout --quiet
-			--repack-flags --user-log-author $remote_opts
+			--repack-flags --user-log-author --localtime $remote_opts
 			"
 		local init_opts="
 			--template= --shared= --trunk= --tags=
diff --git a/git-svn.perl b/git-svn.perl
index ad01e18..0adc8db 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -84,6 +84,7 @@ my %fc_opts = ( 'follow-parent|follow!' => \$Git::SVN::_follow_parent,
 		   \$Git::SVN::_repack_flags,
 		'use-log-author' => \$Git::SVN::_use_log_author,
 		'add-author-from' => \$Git::SVN::_add_author_from,
+		'localtime' => \$Git::SVN::_localtime,
 		%remote_opts );

 my ($_trunk, $_tags, $_branches, $_stdlayout);
@@ -1364,7 +1365,7 @@ use constant rev_map_fmt => 'NH40';
 use vars qw/$default_repo_id $default_ref_id $_no_metadata $_follow_parent
             $_repack $_repack_flags $_use_svm_props $_head
             $_use_svnsync_props $no_reuse_existing $_minimize_url
-	    $_use_log_author $_add_author_from/;
+	    $_use_log_author $_add_author_from $_localtime/;
 use Carp qw/croak/;
 use File::Path qw/mkpath/;
 use File::Copy qw/copy/;
@@ -2526,12 +2527,61 @@ sub get_untracked {
 	\@out;
 }

+# parse_svn_date(DATE)
+# --------------------
+# Given a date (in UTC) from Subversion, return a string in the format
+# "<TZ Offset> <local date/time>" that Git will use.
+#
+# By default the parsed date will be in UTC; if $Git::SVN::_localtime
+# is true we'll convert it to the local timezone instead.
 sub parse_svn_date {
 	my $date = shift || return '+0000 1970-01-01 00:00:00';
 	my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
 	                                    (\d\d)\:(\d\d)\:(\d\d).\d+Z$/x) or
 	                                 croak "Unable to parse date: $date\n";
-	"+0000 $Y-$m-$d $H:$M:$S";
+	my $parsed_date;    # Set next.
+
+	if ($Git::SVN::_localtime) {
+		# Translate the Subversion datetime to an epoch time.
+		# Begin by switching ourselves to $date's timezone, UTC.
+		my $old_env_TZ = $ENV{TZ};
+		$ENV{TZ} = 'UTC';
+
+		my $epoch_in_UTC =
+		    POSIX::strftime('%s', $S, $M, $H, $d, $m - 1, $Y - 1900);
+
+		# Determine our local timezone (including DST) at the
+		# time of $epoch_in_UTC.  $Git::SVN::Log::TZ stored the
+		# value of TZ, if any, at the time we were run.
+		if (defined $Git::SVN::Log::TZ) {
+			$ENV{TZ} = $Git::SVN::Log::TZ;
+		} else {
+			delete $ENV{TZ};
+		}
+
+		my $our_TZ =
+		    POSIX::strftime('%Z', $S, $M, $H, $d, $m - 1, $Y - 1900);
+
+		# This converts $epoch_in_UTC into our local timezone.
+		my ($sec, $min, $hour, $mday, $mon, $year,
+		    $wday, $yday, $isdst) = localtime($epoch_in_UTC);
+
+		$parsed_date = sprintf('%s %04d-%02d-%02d %02d:%02d:%02d',
+				       $our_TZ, $year + 1900, $mon + 1,
+				       $mday, $hour, $min, $sec);
+
+		# Reset us to the timezone in effect when we entered
+		# this routine.
+		if (defined $old_env_TZ) {
+			$ENV{TZ} = $old_env_TZ;
+		} else {
+			delete $ENV{TZ};
+		}
+	} else {
+		$parsed_date = "+0000 $Y-$m-$d $H:$M:$S";
+	}
+
+	return $parsed_date;
 }

 sub check_author {
-- 
1.6.1.77.g953e7

^ permalink raw reply related

* Re: [PATCH] rebase -p: seed first commit in case it's before the merge bases.
From: Stephen Haberman @ 2009-01-18  4:02 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: git, Michael J Gruber, Stephan Beyer, Sitaram Chamarty
In-Reply-To: <20090117215751.60ade90a.stephen@exigencecorp.com>


> Perhaps it is not "good", but the explanation a blank REWRITTEN/sha1 is
> used a marker during the probe phase that this commit will be rewritten.
> So when looking at any of its children commits, they should be rewritten
> if a REWRITTEN/parentSha1 exists.

Ugh, fixing several typos:

Perhaps it is not "good", but the explanation /is that/ a blank
REWRITTEN/sha1 is used /as/ a marker during the probe phase that this
commit will be rewritten. So when looking at any of its children
commits, /the children/ should be rewritten if a REWRITTEN/parentSha1
exists.

Sorry about that.

- Stephen

^ permalink raw reply

* Re: [PATCH/RFC] git-svn: Add --convert-timezone option
From: Pete Harlan @ 2009-01-18  3:57 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git mailing list
In-Reply-To: <20090117103711.GB29598@dcvr.yhbt.net>

Eric Wong wrote:
> Pete Harlan <pgit@pcharlan.com> wrote:
>> By default git svn stores timestamps of fetched commits in
>> Subversion's UTC format, to facilitate interoperating with a
>> Subversion repository.
>>
>> If you're using git svn to convert a repository to Git and aren't
>> interested in pushing Git commits back to Subversion, you can use this
>> option to store timestamps of fetched commits as though they were made
>> in the local timezone of the host on which git svn is run.  This makes
>> the times and timezones of a resulting "git log" agree with what "svn
>> log" shows for the same repository.
>>
>> Signed-off-by: Pete Harlan <pgit@pcharlan.com>
>> ---
>>
>> This is a patch I've had floating around for a while.  I haven't
>> submitted it before because I find the solution ungainly.  There has
>> to be a better way to convert from one timezone to the other, but I
>> didn't run across it and now that I've converted away from Subversion
>> I'm sort of done thinking about it.  I'm submitting it now because
>> even in its current state it would have saved me some headache.
>>
>> Also, I'm not sure I'm correct when asserting that converting
>> timezones like this will break Subversion interoperability.  Eric, if
>> that isn't true then I can remove that claim and resubmit.  If
>> converting timezones breaks nothing, then maybe it could even be the
>> default.
> 
> Hi,
> 
> It'll break interoperability between multiple users of git-svn
> tracking the same repo.  But several options already allow for
> this (authors file, noMetdata, ...), so I'm fine with it as long
> as it's optional.

Thank you for your review.  I'll comment here and then followup with a
replacement patch.

>> One improvement that I didn't bother to make would be to convert to
>> different local timezones based on author.  This change uses the
>> timezone of the machine running git-svn, which in my case was fine.
>> Using per-author timezones would be nice, but since parse_svn_date()
>> doesn't already know which author the date is associated with it would
>> be a more intrusive change.
> 
> Could be an interesting idea, but on the other hand I doubt many people
> would bother configuring the authors-file for it.
> 
> On a side note, for the total conversions I've done, I've found it
> easier/faster/more bandwidth efficient to just forgo authors-file
> entirely and use git-filter-branch after-the-fact.

Interesting.  When I was using git-svn I was new enough to Git that I
wasn't aware of filter-branch.

>> My primary motivation in this was to reduce transition shock among our
>> development team.  The fewer ways "git log" looks unhelpfully
>> different than the old "svn log" the better; converting all commit
>> times into GMT wasn't going to look friendly.
>>
>> Comments welcome.
> 
> My usual coding style nits apply (pretty much git (and Linux kernel)
> standard coding style things, too).
> 
> No space between "function(arguments)".
> 
> lower_snake_case, especially for local variables.  mixedCase requires
> more effort to read IMO.

Thanks; I'm sorry you had to point these things out.  Hopefully fixed
in the followup patch.

> More comments inline...
> 
>> --Pete
>>
>>  Documentation/git-svn.txt              |    8 ++++
>>  contrib/completion/git-completion.bash |    2 +-
>>  git-svn.perl                           |   56 ++++++++++++++++++++++++++++++-
>>  3 files changed, 63 insertions(+), 3 deletions(-)
>>
>> diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt
>> index 8d0c421..8811bf0 100644
>> --- a/Documentation/git-svn.txt
>> +++ b/Documentation/git-svn.txt
>> @@ -92,6 +92,14 @@ COMMANDS
>>  	.git/config file may be specified as an optional command-line
>>  	argument.
>>
>> +--convert-timezones;;
> 
> Is it "timezone" or "timezones" here?  I think "convert-timezone" is
> more correct since we only use the local timezone, not different
> ones for each author.  On the other hand, maybe "--localtime" is
> an even better name for this option...
> 
> Any other opinions out there?

I liked your "--localtime" suggestion, and used that in v2.  I
originally chose plural for "timzeones" because it converts between
two timezones, and there are two timestamps associated with each
commit (even if they're the same, in git-svn).  But singular would
have made just as much sense, and at one point it was singular.

>> +	Store Git commit times in the local timezone instead of UTC.  This
>> +	makes 'git-log' (even without --date=local) show the same times
>> +	that `svn log` would in the local timezone.
>> +
>> +This breaks interoperability with SVN, but may be cosmetically
>> +desirable when converting a repository from SVN to Git.
> 
> Again, this only breaks interoperability with other users of git-svn
> using the default configuration on the same repo.

Thanks, fixed in v2.

>>  'clone'::
>>  	Runs 'init' and 'fetch'.  It will automatically create a
>>  	directory based on the basename of the URL passed to it;
>> diff --git a/git-svn.perl b/git-svn.perl
>> index ad01e18..c2f600d 100755
>> --- a/git-svn.perl
>> +++ b/git-svn.perl
>> @@ -66,7 +66,7 @@ my ($_stdin, $_help, $_edit,
>>  	$_version, $_fetch_all, $_no_rebase,
>>  	$_merge, $_strategy, $_dry_run, $_local,
>>  	$_prefix, $_no_checkout, $_url, $_verbose,
>> -	$_git_format, $_commit_url, $_tag);
>> +	$_git_format, $_commit_url, $_tag, $_convert_timezones);
> 
> Not easy to tell (and apologies for that) but this new variable probably
> belongs in the Git::SVN namespace.  I really need to find some time to
> reorganize and split out the source to git-svn.

I moved it into Git::SVN for v2, though I admit to not having a good
grasp of the big picture about what belongs where.

>>  $Git::SVN::_follow_parent = 1;
>>  my %remote_opts = ( 'username=s' => \$Git::SVN::Prompt::_username,
>>                      'config-dir=s' => \$Git::SVN::Ra::config_dir,
>> @@ -84,6 +84,7 @@ my %fc_opts = ( 'follow-parent|follow!' => \$Git::SVN::_follow_parent,
>>  		   \$Git::SVN::_repack_flags,
>>  		'use-log-author' => \$Git::SVN::_use_log_author,
>>  		'add-author-from' => \$Git::SVN::_add_author_from,
>> +		'convert-timezones' => \$_convert_timezones,
>>  		%remote_opts );
>>
>>  my ($_trunk, $_tags, $_branches, $_stdlayout);
>> @@ -2526,12 +2527,63 @@ sub get_untracked {
>>  	\@out;
>>  }
>>
>> +# parse_svn_date(DATE)
>> +# --------------------
>> +# Given a date (in UTC) from Subversion, return a string in the format
>> +# "<TZ Offset> <local date/time>" that Git will use.
>> +#
>> +# By default the parsed date will be in UTC for interoperating with
>> +# Subversion, but if $_convert_timezones is true we'll convert it to
>> +# the local timezone instead.
>>  sub parse_svn_date {
>>  	my $date = shift || return '+0000 1970-01-01 00:00:00';
>>  	my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
>>  	                                    (\d\d)\:(\d\d)\:(\d\d).\d+Z$/x) or
>>  	                                 croak "Unable to parse date: $date\n";
>> -	"+0000 $Y-$m-$d $H:$M:$S";
>> +	my $parsed_date;    # Set next.
>> +
>> +	if ($_convert_timezones) {
>> +		# Translate the Subversion datetime to an epoch time.
>> +		# We need to switch ourselves to $date's timezone,
>> +		# UTC, for this.
>> +		my $oldEnvTZ = $ENV{TZ};
>> +		$ENV{TZ} = 'UTC';
>> +
>> +		my $epochUTC =
>> +		    POSIX::strftime ('%s', $S, $M, $H, $d, $m - 1, $Y - 1900);
>> +
>> +		# Determine our local timezone (including DST) at the
>> +		# time of $epochUTC.  $Git::SVN::Log::TZ stored the
>> +		# value of TZ, if any, at the time we were run.
>> +		if (defined $Git::SVN::Log::TZ) {
>> +			$ENV{TZ} = $Git::SVN::Log::TZ;
>> +		} else {
>> +			delete $ENV{TZ};
>> +		}
>> +
>> +		my $ourTZ =
>> +		    POSIX::strftime ('%Z', $S, $M, $H, $d, $m - 1, $Y - 1900);
>> +
>> +		# This converts $epochUTC into our local timezone.
>> +		my ($sec, $min, $hour, $mday, $mon, $year,
>> +		    $wday, $yday, $isdst) = localtime ($epochUTC);
>> +
>> +		$parsed_date = sprintf ('%s %04d-%02d-%02d %02d:%02d:%02d',
>> +					$ourTZ, $year + 1900, $mon + 1,
>> +					$mday, $hour, $min, $sec);
> 
> There's probably a reason you didn't use strftime here, or is there?

If you mean why couldn't I have saved an sprintf by using the previous
strftime call to format this data at the same time I got the timzeone,
it's because strftime doesn't convert the time into the local
timezone.  It just passes back the same values that you pass in.

> The stock Perl time/date handling functions have always frightened me,
> so I'll just trust the (+|-) (1|1900) things are correct :)

:) I tested it pretty well...

--
Pete Harlan
pgit@pcharlan.com

^ permalink raw reply

* Re: [PATCH] rebase -p: seed first commit in case it's before the merge bases.
From: Stephen Haberman @ 2009-01-18  3:57 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: git, Michael J Gruber, Stephan Beyer, Sitaram Chamarty
In-Reply-To: <alpine.DEB.1.00.0901180108480.3586@pacific.mpi-cbg.de>


> > However, I have a strong feeling that just piling onto the current
> > code will not fix the underlying issues.
> 
> BTW just to clarify what I mean by "underlying issues": if you say
> "git rebase -i" in Sitaram's test case, you will see the two commits
> -- as expected.
> 
> However, if you add "-p", all of a sudden you will only see "noop".
> IMO there is no excuse that the code can hide them at all.  If the
> commits are reachable from HEAD but not from $UPSTREAM, they have to
> be in the list.  As simple as that.

Agreed--the rewritten-parent probing being rooted at the merge bases
was not good enough.

> Another thing that I find horribly wrong: there is a "touch
> $REWRITTEN/sha1".  There was a simple design in the beginning: the
> files in $REWRITTEN are actually a mapping from old SHA-1 (file name)
> to new SHA-1 (content).  This was broken, without any good
> explanation.

Perhaps it is not "good", but the explanation a blank REWRITTEN/sha1 is
used a marker during the probe phase that this commit will be rewritten.
So when looking at any of its children commits, they should be rewritten
if a REWRITTEN/parentSha1 exists. Then as the rewriting actually happens,
they get filled in with the new sha1. I cribbed this approach from
Stephan's sequencer rewrite of rebase-i-p.

If you want a different data structure, be it file based, or bash/list
based, or whatever, to track "this commit will eventually be rewritten
but we haven't gotten there yet" during the probe, then we could go back
to leaving REWRITTEN/sha1 alone until after the sha1 commit has been
rebased.

I'm open to suggestions.

Also, as you seem to realize, the current bug stems from not knowing how
to initialize the rewritten data structure. For Sitaram's case, the
first commit is behind any of the merge bases, so marking its parents
(if they exist) as rewritten to ONTO seems reasonable.

If there are no parents, as you point out, I added a "-o sha1 = FIRST"
that should also get the ball rolling. It's another hack, but does this
address your concern until a large refactoring happens?

-------------------------- git-rebase--interactive.sh --------------------------
index c8b0861..8740d9f 100755
@@ -604,11 +604,18 @@ first and then run 'git rebase --continue' again."
 				echo $ONTO > "$REWRITTEN"/$c ||
 					die "Could not init rewritten commits"
 			done
+			# Along with the merge bases, look at the first commit's
+			# parent (which may be before the merge base) and mark it
+			# as rewritten to ONTO
+			FIRST="$(git rev-list --reverse --first-parent $UPSTREAM..$HEAD | head -n 1)"
+			for p in $(git rev-list --parents -1 $FIRST | cut -d' ' -f2)
+			do
+				echo $ONTO > "$REWRITTEN/$p"
+			done
 			# No cherry-pick because our first pass is to determine
 			# parents to rewrite and skipping dropped commits would
 			# prematurely end our probe
 			MERGES_OPTION=
-			first_after_upstream="$(git rev-list --reverse --first-parent $UPSTREAM..$HEAD | head -n 1)"
 		else
 			MERGES_OPTION="--no-merges --cherry-pick"
 		fi
@@ -629,12 +636,12 @@ first and then run 'git rebase --continue' again."
 				preserve=t
 				for p in $(git rev-list --parents -1 $sha1 | cut -d' ' -f2-)
 				do
-					if test -f "$REWRITTEN"/$p -a \( $p != $UPSTREAM -o $sha1 = $first_after_upstream \)
+					if test -f "$REWRITTEN"/$p -a $p != $UPSTREAM
 					then
 						preserve=f
 					fi
 				done
-				if test f = "$preserve"
+				if test f = "$preserve" -o $sha1 = $FIRST
 				then
 					touch "$REWRITTEN"/$sha1
 					echo "pick $shortsha1 $rest" >> "$TODO"

(I'm adding the other 3 cc's back after my failed patch attempt
stripped them out--sorry, guys.)

- Stephen

^ permalink raw reply

* Re: [PATCH 2/2] http-push: remove MOVE step after PUT when sending objects to server
From: Ray Chuan @ 2009-01-18  3:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7viqod5thi.fsf@gitster.siamese.dyndns.org>

Hi,

On Sun, Jan 18, 2009 at 12:48 AM, Junio C Hamano <gitster@pobox.com> wrote:
> "Ray Chuan" <rctay89@gmail.com> writes:
>
>> Currently, git PUTs to
>>
>>  /repo.git/objects/1a/1a2b...9z_opaquelocktoken:1234-....
>>
>> then MOVEs to
>>
>>  /repo.git/objects/1a/1a2b...9z
>>
>> This is needless. In fact, the only time MOVE requests are sent is for
>> this sole purpose (ie. of renaming an object).

this would be my "true" reason. i do believe PUTting to a "temporary"
file (ie with '__opaquelocktoken:1234-....' appended) to be a needless
step.

>> A concern raised was repository corruption in the event of failure
>> during PUT. "put && move" won't afford any more protection than using
>> simply "put", since info/refs is not updated if a PUT fails, so there
>> is no cause for concern.
>
> That's a completely bogus reasoning.  Normal operation inside the
> repository that was pushed into won't look at info/refs at all.
>
> The true reason you want to avoid put-then-move is...?

i mentioned this "repository corruption" issue as it was raised
previously by Johannes (towards the bottom):

  http://article.gmane.org/gmane.comp.version-control.git/106031

-- 
Cheers,
Ray Chuan

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox