git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
@ 2007-04-08 23:26 Dana How
  0 siblings, 0 replies; 4+ messages in thread
From: Dana How @ 2007-04-08 23:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, danahow


Rewrite write_pack_file() to break to a new packfile
whenever write_object/write_one request it,  and
correct the header's object count in the previous packfile.
Change write_index_file() to write an index
for just the objects in the most recent packfile.

Signed-off-by: Dana How <how@deathvalley.cswitch.com>
---
 builtin-pack-objects.c |  126 ++++++++++++++++++++++++++++++++++--------------
 1 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index a088f2e..d750c4b 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -572,6 +572,7 @@ static off_t write_one(struct sha1file *f,
 	result = write_object(f, e, offset_limit && nr_written ? offset_limit - offset : 0);
 	if (!result)
 		return result;
+	written_list[nr_written++] = e;
 	e->offset = offset;
 	return offset + result;
 }
@@ -628,52 +629,103 @@ static void write_index_file(void);
 
 static void write_pack_file(void)
 {
-	uint32_t i;
+	uint32_t i, j;
 	struct sha1file *f;
 	off_t offset;
 	struct pack_header hdr;
 	unsigned last_percent = 999;
-	int do_progress = progress;
+	int do_progress = progress >> !base_name;
+	char oldname[PATH_MAX];
+	int pack_fd;
+	SHA_CTX ctx;
 
-	if (!base_name) {
-		f = sha1fd(1, "<stdout>");
-		do_progress >>= 1;
-	}
-	else
-		f = sha1create("%s-%s.%s", base_name,
-			       sha1_to_hex(object_list_sha1), "pack");
 	if (do_progress)
 		fprintf(stderr, "Writing %u objects.\n", nr_result);
+	written_list = xmalloc(nr_objects * sizeof(struct object_entry *));
 
-	hdr.hdr_signature = htonl(PACK_SIGNATURE);
-	hdr.hdr_version = htonl(PACK_VERSION);
-	hdr.hdr_entries = htonl(nr_result);
-	sha1write(f, &hdr, sizeof(hdr));
-	offset = sizeof(hdr);
-	if (!nr_result)
-		goto done;
-	for (i = 0; i < nr_objects; i++) {
-		offset = write_one(f, objects + i, offset);
-		if (do_progress) {
-			unsigned percent = written * 100 / nr_result;
-			if (progress_update || percent != last_percent) {
-				fprintf(stderr, "%4u%% (%u/%u) done\r",
-					percent, written, nr_result);
-				progress_update = 0;
-				last_percent = percent;
+	for (i = 0; i < nr_objects;) {
+		if (!base_name) {
+			f = sha1fd(pack_fd = 1, "<stdout>");
+		}
+		else {
+			int len = snprintf(oldname, sizeof oldname, "%s-XXXXXX", base_name);
+			if (len >= PATH_MAX)
+				die("excessive pathname length for initial packfile name");
+			pack_fd = mkstemp(oldname);
+			if (pack_fd < 0)
+				die("can't create %s: %s", oldname, strerror(errno));
+			f = sha1fd(pack_fd, oldname);
+		}
+
+		hdr.hdr_signature = htonl(PACK_SIGNATURE);
+		hdr.hdr_version = htonl(PACK_VERSION);
+		hdr.hdr_entries = htonl(nr_result);
+		sha1write(f, &hdr, sizeof(hdr));
+		offset = sizeof(hdr);
+		nr_written = 0;
+		for (; i < nr_objects; i++) {
+			off_t offset_one = write_one(f, objects + i, offset);
+			if (!offset_one)
+				break;
+			offset = offset_one;
+			if (do_progress) {
+				unsigned percent = written * 100 / nr_result;
+				if (progress_update || percent != last_percent) {
+					fprintf(stderr, "%4u%% (%u/%u) done\r",
+						percent, written, nr_result);
+					progress_update = 0;
+					last_percent = percent;
+				}
 			}
 		}
+
+		/*
+		 * Did we write the wrong # entries in the header?
+		 * If so, rewrite it like in fast-import (gackk).
+		 */
+		if ( !base_name || nr_written == nr_result ) {
+			sha1close(f, pack_file_sha1, 1);
+		} else {
+			sha1close(f, pack_file_sha1, -1);
+			fixup_header_footer(pack_fd, pack_file_sha1, oldname, nr_written);
+		}
+
+		/*
+		 * compute object_list_sha1 of sorted sha's we just wrote out;
+		 * we also mark these objects as written
+		 */
+		current_sort = sha1_sort;
+		qsort(written_list, nr_written, sizeof(struct object_entry *), sort_comparator);
+		SHA1_Init(&ctx);
+		for (j = 0; j < nr_written; j++) {
+			struct object_entry *entry = written_list[j];
+			entry->prev_pack = 1;
+			SHA1_Update(&ctx, entry->sha1, 20);
+		}
+		SHA1_Final(object_list_sha1, &ctx);
+		/*
+		 * now we can rename the pack correctly and write the index file
+		 */
+		if (base_name) {
+			char newname[PATH_MAX];
+			int len = snprintf(newname, sizeof newname, "%s-%s.%s",
+						base_name, sha1_to_hex(object_list_sha1), "pack");
+			if (len >= PATH_MAX)
+				die("excessive pathname length for final packfile name");
+			if (rename(oldname, newname) < 0)
+				die("could not rename the pack file");
+		}
+		if (!pack_to_stdout) {
+			write_index_file();
+			puts(sha1_to_hex(object_list_sha1));
+		}
 	}
-	if (do_progress)
+
+	free(written_list);
+	if (nr_result && do_progress)
 		fputc('\n', stderr);
- done:
 	if (written != nr_result)
 		die("wrote %u objects while expecting %u", written, nr_result);
-	sha1close(f, pack_file_sha1, 1);
-	if (!pack_to_stdout) {
-		write_index_file();
-		puts(sha1_to_hex(object_list_sha1));
-	}
 }
 
 static void write_index_file(void)
@@ -681,8 +733,8 @@ static void write_index_file(void)
 	uint32_t i;
 	struct sha1file *f = sha1create("%s-%s.%s", base_name,
 					sha1_to_hex(object_list_sha1), "idx");
-	struct object_entry **list = sorted_by_sha;
-	struct object_entry **last = list + nr_result;
+	struct object_entry **list = written_list;
+	struct object_entry **last = list + nr_written;
 	uint32_t array[256];
 
 	/*
@@ -698,7 +750,7 @@ static void write_index_file(void)
 				break;
 			next++;
 		}
-		array[i] = htonl(next - sorted_by_sha);
+		array[i] = htonl(next - written_list);
 		list = next;
 	}
 	sha1write(f, array, 256 * 4);
@@ -706,8 +758,8 @@ static void write_index_file(void)
 	/*
 	 * Write the actual SHA1 entries..
 	 */
-	list = sorted_by_sha;
-	for (i = 0; i < nr_result; i++) {
+	list = written_list;
+	for (i = 0; i < nr_written; i++) {
 		struct object_entry *entry = *list++;
 		uint32_t offset = htonl(entry->offset);
 		sha1write(f, &offset, 4);
-- 
1.5.1.89.g8abf0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
@ 2007-04-30 23:24 Dana How
  2007-05-01  5:40 ` Shawn O. Pearce
  0 siblings, 1 reply; 4+ messages in thread
From: Dana How @ 2007-04-30 23:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, danahow


Rewrite write_pack_file() to break to a new packfile
whenever write_object/write_one request it,  and
correct the header's object count in the previous packfile.
Change write_index_file() to write an index
for just the objects in the most recent packfile.

Signed-off-by: Dana L. How <danahow@gmail.com>
---
 builtin-pack-objects.c |  161 ++++++++++++++++++++++++++----------------------
 1 files changed, 87 insertions(+), 74 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index b50de05..328b3cb 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -623,6 +623,7 @@ static off_t write_one(struct sha1file *f,
 	size = write_object(f, e, pack_size_limit && nr_written ? pack_size_limit - offset : 0);
 	if (!size)
 		return e->offset = 0;
+	written_list[nr_written++] = e;
 
 	/* make sure off_t is sufficiently large not to wrap */
 	if (offset > offset + size)
@@ -631,7 +632,7 @@ static off_t write_one(struct sha1file *f,
 }
 
 static void fixup_header_footer(int pack_fd, unsigned char *pack_file_sha1,
-				char *pack_name, uint32_t object_count)
+				const char *pack_name, uint32_t object_count)
 {
 	static const int buf_sz = 128 * 1024;
 	SHA_CTX c;
@@ -672,74 +673,94 @@ static int adjust_perm(const char *path, mode_t mode);
 
 static void write_pack_file(void)
 {
-	uint32_t i;
+	uint32_t i = 0, j;
 	struct sha1file *f;
-	off_t offset, last_obj_offset = 0;
+	off_t offset, offset_one, last_obj_offset = 0;
 	struct pack_header hdr;
-	int do_progress = progress;
-
-	if (pack_to_stdout) {
-		f = sha1fd(1, "<stdout>");
-		do_progress >>= 1;
-	} else {
-		int fd;
-		snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
-		fd = mkstemp(tmpname);
-		if (fd < 0)
-			die("unable to create %s: %s\n", tmpname, strerror(errno));
-		pack_tmp_name = xstrdup(tmpname);
-		f = sha1fd(fd, pack_tmp_name);
-	}
+	int do_progress = progress >> pack_to_stdout;
 
 	if (do_progress)
 		start_progress(&progress_state, "Writing %u objects...", "", nr_result);
+	written_list = xmalloc(nr_objects * sizeof(struct object_entry *));
 
-	hdr.hdr_signature = htonl(PACK_SIGNATURE);
-	hdr.hdr_version = htonl(PACK_VERSION);
-	hdr.hdr_entries = htonl(nr_result);
-	sha1write(f, &hdr, sizeof(hdr));
-	offset = sizeof(hdr);
-	if (!nr_result)
-		goto done;
-	for (i = 0; i < nr_objects; i++) {
-		last_obj_offset = offset;
-		offset = write_one(f, objects + i, offset);
-		if (do_progress)
-			display_progress(&progress_state, written);
-	}
+	do {
+		if (pack_to_stdout) {
+			f = sha1fd(1, "<stdout>");
+		} else {
+			int fd;
+			snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
+			fd = mkstemp(tmpname);
+			if (fd < 0)
+				die("unable to create %s: %s\n", tmpname, strerror(errno));
+			pack_tmp_name = xstrdup(tmpname);
+			f = sha1fd(fd, pack_tmp_name);
+		}
+
+		hdr.hdr_signature = htonl(PACK_SIGNATURE);
+		hdr.hdr_version = htonl(PACK_VERSION);
+		hdr.hdr_entries = htonl(nr_result);
+		sha1write(f, &hdr, sizeof(hdr));
+		offset = sizeof(hdr);
+		nr_written = 0;
+		for (; i < nr_objects; i++) {
+			last_obj_offset = offset;
+			offset_one = write_one(f, objects + i, offset);
+			if (!offset_one)
+				break;
+			offset = offset_one;
+			if (do_progress)
+				display_progress(&progress_state, written);
+		}
+
+		/*
+		 * Did we write the wrong # entries in the header?
+		 * If so, rewrite it like in fast-import
+		 */
+		if (pack_to_stdout || nr_written == nr_result) {
+			sha1close(f, pack_file_sha1, 1);
+		} else {
+			sha1close(f, pack_file_sha1, -1);
+			fixup_header_footer(f->fd, pack_file_sha1, pack_tmp_name, nr_written);
+		}
+
+		if (!pack_to_stdout) {
+			unsigned char object_list_sha1[20];
+			mode_t mode = umask(0);
+
+			umask(mode);
+			mode = 0444 & ~mode;
+
+			write_index_file(last_obj_offset, object_list_sha1);
+			snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
+				 base_name, sha1_to_hex(object_list_sha1));
+			if (adjust_perm(pack_tmp_name, mode))
+				die("unable to make temporary pack file readable: %s",
+				    strerror(errno));
+			if (rename(pack_tmp_name, tmpname))
+				die("unable to rename temporary pack file: %s",
+				    strerror(errno));
+			snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
+				 base_name, sha1_to_hex(object_list_sha1));
+			if (adjust_perm(idx_tmp_name, mode))
+				die("unable to make temporary index file readable: %s",
+				    strerror(errno));
+			if (rename(idx_tmp_name, tmpname))
+				die("unable to rename temporary index file: %s",
+				    strerror(errno));
+			puts(sha1_to_hex(object_list_sha1));
+		}
+
+		/* mark written objects as written to previous pack */
+		for (j = 0; j < nr_written; j++) {
+			written_list[j]->offset = (off_t)-1;
+		}
+	} while (i < nr_objects);
+
+	free(written_list);
 	if (do_progress)
 		stop_progress(&progress_state);
- done:
 	if (written != nr_result)
 		die("wrote %u objects while expecting %u", written, nr_result);
-	sha1close(f, pack_file_sha1, 1);
-
-	if (!pack_to_stdout) {
-		unsigned char object_list_sha1[20];
-		mode_t mode = umask(0);
-
-		umask(mode);
-		mode = 0444 & ~mode;
-
-		write_index_file(last_obj_offset, object_list_sha1);
-		snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
-			 base_name, sha1_to_hex(object_list_sha1));
-		if (adjust_perm(pack_tmp_name, mode))
-			die("unable to make temporary pack file readable: %s",
-			    strerror(errno));
-		if (rename(pack_tmp_name, tmpname))
-			die("unable to rename temporary pack file: %s",
-			    strerror(errno));
-		snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
-			 base_name, sha1_to_hex(object_list_sha1));
-		if (adjust_perm(idx_tmp_name, mode))
-			die("unable to make temporary index file readable: %s",
-			    strerror(errno));
-		if (rename(idx_tmp_name, tmpname))
-			die("unable to rename temporary index file: %s",
-			    strerror(errno));
-		puts(sha1_to_hex(object_list_sha1));
-	}
 }
 
 static int sha1_sort(const void *_a, const void *_b)
@@ -768,18 +789,11 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 	idx_tmp_name = xstrdup(tmpname);
 	f = sha1fd(fd, idx_tmp_name);
 
-	if (nr_result) {
-		uint32_t j = 0;
-		sorted_by_sha =
-			xcalloc(nr_result, sizeof(struct object_entry *));
-		for (i = 0; i < nr_objects; i++)
-			if (!objects[i].preferred_base)
-				sorted_by_sha[j++] = objects + i;
-		if (j != nr_result)
-			die("listed %u objects while expecting %u", j, nr_result);
-		qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
+	if (nr_written) {
+		sorted_by_sha = written_list;
+		qsort(sorted_by_sha, nr_written, sizeof(*sorted_by_sha), sha1_sort);
 		list = sorted_by_sha;
-		last = sorted_by_sha + nr_result;
+		last = sorted_by_sha + nr_written;
 	} else
 		sorted_by_sha = list = last = NULL;
 
@@ -817,7 +831,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 
 	/* Write the actual SHA1 entries. */
 	list = sorted_by_sha;
-	for (i = 0; i < nr_result; i++) {
+	for (i = 0; i < nr_written; i++) {
 		struct object_entry *entry = *list++;
 		if (index_version < 2) {
 			uint32_t offset = htonl(entry->offset);
@@ -832,7 +846,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 
 		/* write the crc32 table */
 		list = sorted_by_sha;
-		for (i = 0; i < nr_objects; i++) {
+		for (i = 0; i < nr_written; i++) {
 			struct object_entry *entry = *list++;
 			uint32_t crc32_val = htonl(entry->crc32);
 			sha1write(f, &crc32_val, 4);
@@ -840,7 +854,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 
 		/* write the 32-bit offset table */
 		list = sorted_by_sha;
-		for (i = 0; i < nr_objects; i++) {
+		for (i = 0; i < nr_written; i++) {
 			struct object_entry *entry = *list++;
 			uint32_t offset = (entry->offset <= index_off32_limit) ?
 				entry->offset : (0x80000000 | nr_large_offset++);
@@ -865,7 +879,6 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 
 	sha1write(f, pack_file_sha1, 20);
 	sha1close(f, NULL, 1);
-	free(sorted_by_sha);
 	SHA1_Final(sha1, &ctx);
 }
 
-- 
1.5.2.rc0.766.gba60-dirty

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
  2007-04-30 23:24 Dana How
@ 2007-05-01  5:40 ` Shawn O. Pearce
  2007-05-01  6:05   ` Dana How
  0 siblings, 1 reply; 4+ messages in thread
From: Shawn O. Pearce @ 2007-05-01  5:40 UTC (permalink / raw)
  To: Dana How; +Cc: Junio C Hamano, Git Mailing List

Dana How <danahow@gmail.com> wrote:
> Rewrite write_pack_file() to break to a new packfile
> whenever write_object/write_one request it,  and
> correct the header's object count in the previous packfile.
> Change write_index_file() to write an index
> for just the objects in the most recent packfile.
...
> diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
...
> @@ -672,74 +673,94 @@ static int adjust_perm(const char *path, mode_t mode);
...
> +		hdr.hdr_signature = htonl(PACK_SIGNATURE);
> +		hdr.hdr_version = htonl(PACK_VERSION);
> +		hdr.hdr_entries = htonl(nr_result);

What about keeping track of how many objects in nr_result that
have been written already in the prior iteration of this do{}
while loop and using that to set hdr_entries?  This way if you are
splitting into multiple packfiles the last packfile won't need to
do a header/footer fixup.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
  2007-05-01  5:40 ` Shawn O. Pearce
@ 2007-05-01  6:05   ` Dana How
  0 siblings, 0 replies; 4+ messages in thread
From: Dana How @ 2007-05-01  6:05 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Junio C Hamano, Git Mailing List, danahow

On 4/30/07, Shawn O. Pearce <spearce@spearce.org> wrote:
> Dana How <danahow@gmail.com> wrote:
> > Rewrite write_pack_file() to break to a new packfile
> > whenever write_object/write_one request it,  and
> > correct the header's object count in the previous packfile.
> > Change write_index_file() to write an index
> > for just the objects in the most recent packfile.
> ...
> > diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
> ...
> > +             hdr.hdr_signature = htonl(PACK_SIGNATURE);
> > +             hdr.hdr_version = htonl(PACK_VERSION);
> > +             hdr.hdr_entries = htonl(nr_result);
>
> What about keeping track of how many objects in nr_result that
> have been written already in the prior iteration of this do{}
> while loop and using that to set hdr_entries?  This way if you are
> splitting into multiple packfiles the last packfile won't need to
> do a header/footer fixup.
Cool --
I had the same thought (late),
but figured I would address it in a follow-on.
I was thinking of adding nr_left, which would be initialized
from nr_result, and have nr_written repeatedly subtracted.
nr_result in your quote would change to nr_left
(also later, where we decide whether or not to fix up the header).

Thanks,
-- 
Dana L. How  danahow@gmail.com  +1 650 804 5991 cell

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-05-01  6:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-08 23:26 [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one Dana How
  -- strict thread matches above, loose matches on Subject: below --
2007-04-30 23:24 Dana How
2007-05-01  5:40 ` Shawn O. Pearce
2007-05-01  6:05   ` Dana How

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).