git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] builtin-bundle.c - use stream buffered input for rev-list
@ 2007-08-10 22:29 Mark Levedahl
  2007-08-10 22:58 ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: Mark Levedahl @ 2007-08-10 22:29 UTC (permalink / raw)
  To: Junio C Hamano, Johannes Schindelin; +Cc: Git Mailing List, Mark Levedahl

git-bundle create on cygwin was nearly unusable due to 1 character
at a time (unbuffered) reading from an exec'ed process. Fix by using
fdopen to get a buffered stream.

Results for "time git bundle create test.bdl v1.0.3..v1.5.2" are:

before this patch:
         cygwin         linux
real    1m38.828s      0m3.578s
user    0m12.122s      0m2.896s
sys     1m28.215s      0m0.692s

after this patch:
real    0m3.688s       0m2.835s
user    0m3.075s       0m2.731s
sys     0m1.075s       0m0.149s

Signed-off-by: Mark Levedahl <mdl123@verizon.net>
---
 builtin-bundle.c |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin-bundle.c b/builtin-bundle.c
index 2d0e106..b954213 100644
--- a/builtin-bundle.c
+++ b/builtin-bundle.c
@@ -207,6 +207,7 @@ static int create_bundle(struct bundle_header *header, const char *path,
 	char buffer[1024];
 	struct rev_info revs;
 	struct child_process rls;
+	FILE *rls_fout;
 
 	/*
 	 * NEEDSWORK: this should use something like lock-file
@@ -236,10 +237,11 @@ static int create_bundle(struct bundle_header *header, const char *path,
 	rls.git_cmd = 1;
 	if (start_command(&rls))
 		return -1;
-	while ((i = read_string(rls.out, buffer, sizeof(buffer))) > 0) {
+	rls_fout = fdopen(rls.out, "r");
+	while (fgets(buffer, sizeof(buffer), rls_fout)) {
 		unsigned char sha1[20];
 		if (buffer[0] == '-') {
-			write_or_die(bundle_fd, buffer, i);
+			write_or_die(bundle_fd, buffer, strlen(buffer));
 			if (!get_sha1_hex(buffer + 1, sha1)) {
 				struct object *object = parse_object(sha1);
 				object->flags |= UNINTERESTING;
@@ -250,6 +252,7 @@ static int create_bundle(struct bundle_header *header, const char *path,
 			object->flags |= SHOWN;
 		}
 	}
+	fclose(rls_fout);
 	if (finish_command(&rls))
 		return error("rev-list died");
 
-- 
1.5.3.rc4.53.g9fc90

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] builtin-bundle.c - use stream buffered input for rev-list
  2007-08-10 22:29 [PATCH] builtin-bundle.c - use stream buffered input for rev-list Mark Levedahl
@ 2007-08-10 22:58 ` Junio C Hamano
  2007-08-11  0:27   ` [PATCH] builtin-bundle - use buffered reads for bundle header Mark Levedahl
  2007-08-11  0:39   ` Mark Levedahl
  0 siblings, 2 replies; 5+ messages in thread
From: Junio C Hamano @ 2007-08-10 22:58 UTC (permalink / raw)
  To: Mark Levedahl; +Cc: Johannes Schindelin, Git Mailing List

Mark Levedahl <mdl123@verizon.net> writes:

> git-bundle create on cygwin was nearly unusable due to 1 character
> at a time (unbuffered) reading from an exec'ed process.

Gaah, well spotted.  I was wondering what that 20x difference
was, but did not notice that read_string() thing.

That function is probably sometimes necessary if you are reading
from a non-seekable fd and need to stop immediately after
reading the header (so that you can hand the rest to pack
handling code), but it really should be the last-ditch thing to
read one-byte-at-a-time.

Also, I even suspect that we are always reading from a seekable
bundle file in this program and it should be able to use
buffered input with seeking back as needed after you read
enough.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] builtin-bundle - use buffered reads for bundle header
  2007-08-10 22:58 ` Junio C Hamano
@ 2007-08-11  0:27   ` Mark Levedahl
  2007-08-11  0:31     ` Junio C Hamano
  2007-08-11  0:39   ` Mark Levedahl
  1 sibling, 1 reply; 5+ messages in thread
From: Mark Levedahl @ 2007-08-11  0:27 UTC (permalink / raw)
  To: Junio C Hamano, Johannes Schindelin; +Cc: Git Mailing List, Mark Levedahl

This eliminates all use of byte-at-a-time reading of data in this
function: as Junio noted, a bundle file is seekable so we can
reset the file position to the first part of the pack-file using lseek
after reading the header.

Signed-off-by: Mark Levedahl <mdl123@verizon.net>
---
 builtin-bundle.c |   37 +++++++++++++------------------------
 1 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/builtin-bundle.c b/builtin-bundle.c
index b954213..6651c51 100644
--- a/builtin-bundle.c
+++ b/builtin-bundle.c
@@ -44,38 +44,21 @@ struct bundle_header {
 	struct ref_list references;
 };
 
-/* this function returns the length of the string */
-static int read_string(int fd, char *buffer, int size)
-{
-	int i;
-	for (i = 0; i < size - 1; i++) {
-		ssize_t count = xread(fd, buffer + i, 1);
-		if (count < 0)
-			return error("Read error: %s", strerror(errno));
-		if (count == 0) {
-			i--;
-			break;
-		}
-		if (buffer[i] == '\n')
-			break;
-	}
-	buffer[i + 1] = '\0';
-	return i + 1;
-}
-
 /* returns an fd */
 static int read_header(const char *path, struct bundle_header *header) {
 	char buffer[1024];
-	int fd = open(path, O_RDONLY);
+	int fd;
+    long fpos;
+    FILE *ffd = fopen(path, "r");
 
-	if (fd < 0)
+	if (!ffd)
 		return error("could not open '%s'", path);
-	if (read_string(fd, buffer, sizeof(buffer)) < 0 ||
+    if (!fgets(buffer, sizeof(buffer), ffd) ||
 			strcmp(buffer, bundle_signature)) {
-		close(fd);
+        fclose(ffd);
 		return error("'%s' does not look like a v2 bundle file", path);
 	}
-	while (read_string(fd, buffer, sizeof(buffer)) > 0
+    while (fgets(buffer, sizeof(buffer), ffd)
 			&& buffer[0] != '\n') {
 		int is_prereq = buffer[0] == '-';
 		int offset = is_prereq ? 1 : 0;
@@ -97,6 +80,12 @@ static int read_header(const char *path, struct bundle_header *header) {
 		add_to_ref_list(sha1, isspace(delim) ?
 				buffer + 41 + offset : "", list);
 	}
+    fpos = ftell(ffd);
+    fclose(ffd);
+    fd = open(path, O_RDONLY);
+    if (fd < 0)
+        return error("could not open '%s'", path);
+    lseek(fd, fpos, SEEK_SET);
 	return fd;
 }
 
-- 
1.5.3.rc4.54.gbe00

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] builtin-bundle - use buffered reads for bundle header
  2007-08-11  0:27   ` [PATCH] builtin-bundle - use buffered reads for bundle header Mark Levedahl
@ 2007-08-11  0:31     ` Junio C Hamano
  0 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2007-08-11  0:31 UTC (permalink / raw)
  To: Mark Levedahl; +Cc: Johannes Schindelin, Git Mailing List

Is this just me (I am writing this from an unfamiliar terminal),
or is your patch somehow breaks all indentation?

 /* returns an fd */
 static int read_header(const char *path, struct bundle_header *header) {
 	char buffer[1024];
-	int fd = open(path, O_RDONLY);
+	int fd;
+    long fpos;
+    FILE *ffd = fopen(path, "r");

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] builtin-bundle - use buffered reads for bundle header
  2007-08-10 22:58 ` Junio C Hamano
  2007-08-11  0:27   ` [PATCH] builtin-bundle - use buffered reads for bundle header Mark Levedahl
@ 2007-08-11  0:39   ` Mark Levedahl
  1 sibling, 0 replies; 5+ messages in thread
From: Mark Levedahl @ 2007-08-11  0:39 UTC (permalink / raw)
  To: Junio C Hamano, Johannes Schindelin; +Cc: Git Mailing List, Mark Levedahl

This eliminates all use of byte-at-a-time reading of data in this
function: as Junio noted, a bundle file is seekable so we can
reset the file position to the first part of the pack-file using lseek
after reading the header.

Signed-off-by: Mark Levedahl <mdl123@verizon.net>
---

 Try again - editor had its tab settings mangled..grrrr

 builtin-bundle.c |   37 +++++++++++++------------------------
 1 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/builtin-bundle.c b/builtin-bundle.c
index b954213..b8f8e78 100644
--- a/builtin-bundle.c
+++ b/builtin-bundle.c
@@ -44,38 +44,21 @@ struct bundle_header {
 	struct ref_list references;
 };

-/* this function returns the length of the string */
-static int read_string(int fd, char *buffer, int size)
-{
-	int i;
-	for (i = 0; i < size - 1; i++) {
-		ssize_t count = xread(fd, buffer + i, 1);
-		if (count < 0)
-			return error("Read error: %s", strerror(errno));
-		if (count == 0) {
-			i--;
-			break;
-		}
-		if (buffer[i] == '\n')
-			break;
-	}
-	buffer[i + 1] = '\0';
-	return i + 1;
-}
-
 /* returns an fd */
 static int read_header(const char *path, struct bundle_header *header) {
 	char buffer[1024];
-	int fd = open(path, O_RDONLY);
+	int fd;
+	long fpos;
+	FILE *ffd = fopen(path, "r");

-	if (fd < 0)
+	if (!ffd)
 		return error("could not open '%s'", path);
-	if (read_string(fd, buffer, sizeof(buffer)) < 0 ||
+	if (!fgets(buffer, sizeof(buffer), ffd) ||
 			strcmp(buffer, bundle_signature)) {
-		close(fd);
+        fclose(ffd);
 		return error("'%s' does not look like a v2 bundle file", path);
 	}
-	while (read_string(fd, buffer, sizeof(buffer)) > 0
+	while (fgets(buffer, sizeof(buffer), ffd)
 			&& buffer[0] != '\n') {
 		int is_prereq = buffer[0] == '-';
 		int offset = is_prereq ? 1 : 0;
@@ -97,6 +80,12 @@ static int read_header(const char *path, struct bundle_header *header) {
 		add_to_ref_list(sha1, isspace(delim) ?
 				buffer + 41 + offset : "", list);
 	}
+	fpos = ftell(ffd);
+	fclose(ffd);
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return error("could not open '%s'", path);
+	lseek(fd, fpos, SEEK_SET);
 	return fd;
 }

--
1.5.3.rc4.54.gbe00

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-08-11  0:39 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-10 22:29 [PATCH] builtin-bundle.c - use stream buffered input for rev-list Mark Levedahl
2007-08-10 22:58 ` Junio C Hamano
2007-08-11  0:27   ` [PATCH] builtin-bundle - use buffered reads for bundle header Mark Levedahl
2007-08-11  0:31     ` Junio C Hamano
2007-08-11  0:39   ` Mark Levedahl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).