* [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
@ 2007-04-08 23:26 Dana How
0 siblings, 0 replies; 4+ messages in thread
From: Dana How @ 2007-04-08 23:26 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Git Mailing List, danahow
Rewrite write_pack_file() to break to a new packfile
whenever write_object/write_one request it, and
correct the header's object count in the previous packfile.
Change write_index_file() to write an index
for just the objects in the most recent packfile.
Signed-off-by: Dana How <how@deathvalley.cswitch.com>
---
builtin-pack-objects.c | 126 ++++++++++++++++++++++++++++++++++--------------
1 files changed, 89 insertions(+), 37 deletions(-)
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index a088f2e..d750c4b 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -572,6 +572,7 @@ static off_t write_one(struct sha1file *f,
result = write_object(f, e, offset_limit && nr_written ? offset_limit - offset : 0);
if (!result)
return result;
+ written_list[nr_written++] = e;
e->offset = offset;
return offset + result;
}
@@ -628,52 +629,103 @@ static void write_index_file(void);
static void write_pack_file(void)
{
- uint32_t i;
+ uint32_t i, j;
struct sha1file *f;
off_t offset;
struct pack_header hdr;
unsigned last_percent = 999;
- int do_progress = progress;
+ int do_progress = progress >> !base_name;
+ char oldname[PATH_MAX];
+ int pack_fd;
+ SHA_CTX ctx;
- if (!base_name) {
- f = sha1fd(1, "<stdout>");
- do_progress >>= 1;
- }
- else
- f = sha1create("%s-%s.%s", base_name,
- sha1_to_hex(object_list_sha1), "pack");
if (do_progress)
fprintf(stderr, "Writing %u objects.\n", nr_result);
+ written_list = xmalloc(nr_objects * sizeof(struct object_entry *));
- hdr.hdr_signature = htonl(PACK_SIGNATURE);
- hdr.hdr_version = htonl(PACK_VERSION);
- hdr.hdr_entries = htonl(nr_result);
- sha1write(f, &hdr, sizeof(hdr));
- offset = sizeof(hdr);
- if (!nr_result)
- goto done;
- for (i = 0; i < nr_objects; i++) {
- offset = write_one(f, objects + i, offset);
- if (do_progress) {
- unsigned percent = written * 100 / nr_result;
- if (progress_update || percent != last_percent) {
- fprintf(stderr, "%4u%% (%u/%u) done\r",
- percent, written, nr_result);
- progress_update = 0;
- last_percent = percent;
+ for (i = 0; i < nr_objects;) {
+ if (!base_name) {
+ f = sha1fd(pack_fd = 1, "<stdout>");
+ }
+ else {
+ int len = snprintf(oldname, sizeof oldname, "%s-XXXXXX", base_name);
+ if (len >= PATH_MAX)
+ die("excessive pathname length for initial packfile name");
+ pack_fd = mkstemp(oldname);
+ if (pack_fd < 0)
+ die("can't create %s: %s", oldname, strerror(errno));
+ f = sha1fd(pack_fd, oldname);
+ }
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(PACK_VERSION);
+ hdr.hdr_entries = htonl(nr_result);
+ sha1write(f, &hdr, sizeof(hdr));
+ offset = sizeof(hdr);
+ nr_written = 0;
+ for (; i < nr_objects; i++) {
+ off_t offset_one = write_one(f, objects + i, offset);
+ if (!offset_one)
+ break;
+ offset = offset_one;
+ if (do_progress) {
+ unsigned percent = written * 100 / nr_result;
+ if (progress_update || percent != last_percent) {
+ fprintf(stderr, "%4u%% (%u/%u) done\r",
+ percent, written, nr_result);
+ progress_update = 0;
+ last_percent = percent;
+ }
}
}
+
+ /*
+ * Did we write the wrong # entries in the header?
+ * If so, rewrite it like in fast-import (gackk).
+ */
+ if ( !base_name || nr_written == nr_result ) {
+ sha1close(f, pack_file_sha1, 1);
+ } else {
+ sha1close(f, pack_file_sha1, -1);
+ fixup_header_footer(pack_fd, pack_file_sha1, oldname, nr_written);
+ }
+
+ /*
+ * compute object_list_sha1 of sorted sha's we just wrote out;
+ * we also mark these objects as written
+ */
+ current_sort = sha1_sort;
+ qsort(written_list, nr_written, sizeof(struct object_entry *), sort_comparator);
+ SHA1_Init(&ctx);
+ for (j = 0; j < nr_written; j++) {
+ struct object_entry *entry = written_list[j];
+ entry->prev_pack = 1;
+ SHA1_Update(&ctx, entry->sha1, 20);
+ }
+ SHA1_Final(object_list_sha1, &ctx);
+ /*
+ * now we can rename the pack correctly and write the index file
+ */
+ if (base_name) {
+ char newname[PATH_MAX];
+ int len = snprintf(newname, sizeof newname, "%s-%s.%s",
+ base_name, sha1_to_hex(object_list_sha1), "pack");
+ if (len >= PATH_MAX)
+ die("excessive pathname length for final packfile name");
+ if (rename(oldname, newname) < 0)
+ die("could not rename the pack file");
+ }
+ if (!pack_to_stdout) {
+ write_index_file();
+ puts(sha1_to_hex(object_list_sha1));
+ }
}
- if (do_progress)
+
+ free(written_list);
+ if (nr_result && do_progress)
fputc('\n', stderr);
- done:
if (written != nr_result)
die("wrote %u objects while expecting %u", written, nr_result);
- sha1close(f, pack_file_sha1, 1);
- if (!pack_to_stdout) {
- write_index_file();
- puts(sha1_to_hex(object_list_sha1));
- }
}
static void write_index_file(void)
@@ -681,8 +733,8 @@ static void write_index_file(void)
uint32_t i;
struct sha1file *f = sha1create("%s-%s.%s", base_name,
sha1_to_hex(object_list_sha1), "idx");
- struct object_entry **list = sorted_by_sha;
- struct object_entry **last = list + nr_result;
+ struct object_entry **list = written_list;
+ struct object_entry **last = list + nr_written;
uint32_t array[256];
/*
@@ -698,7 +750,7 @@ static void write_index_file(void)
break;
next++;
}
- array[i] = htonl(next - sorted_by_sha);
+ array[i] = htonl(next - written_list);
list = next;
}
sha1write(f, array, 256 * 4);
@@ -706,8 +758,8 @@ static void write_index_file(void)
/*
* Write the actual SHA1 entries..
*/
- list = sorted_by_sha;
- for (i = 0; i < nr_result; i++) {
+ list = written_list;
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t offset = htonl(entry->offset);
sha1write(f, &offset, 4);
--
1.5.1.89.g8abf0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
@ 2007-04-30 23:24 Dana How
2007-05-01 5:40 ` Shawn O. Pearce
0 siblings, 1 reply; 4+ messages in thread
From: Dana How @ 2007-04-30 23:24 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Git Mailing List, danahow
Rewrite write_pack_file() to break to a new packfile
whenever write_object/write_one request it, and
correct the header's object count in the previous packfile.
Change write_index_file() to write an index
for just the objects in the most recent packfile.
Signed-off-by: Dana L. How <danahow@gmail.com>
---
builtin-pack-objects.c | 161 ++++++++++++++++++++++++++----------------------
1 files changed, 87 insertions(+), 74 deletions(-)
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index b50de05..328b3cb 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -623,6 +623,7 @@ static off_t write_one(struct sha1file *f,
size = write_object(f, e, pack_size_limit && nr_written ? pack_size_limit - offset : 0);
if (!size)
return e->offset = 0;
+ written_list[nr_written++] = e;
/* make sure off_t is sufficiently large not to wrap */
if (offset > offset + size)
@@ -631,7 +632,7 @@ static off_t write_one(struct sha1file *f,
}
static void fixup_header_footer(int pack_fd, unsigned char *pack_file_sha1,
- char *pack_name, uint32_t object_count)
+ const char *pack_name, uint32_t object_count)
{
static const int buf_sz = 128 * 1024;
SHA_CTX c;
@@ -672,74 +673,94 @@ static int adjust_perm(const char *path, mode_t mode);
static void write_pack_file(void)
{
- uint32_t i;
+ uint32_t i = 0, j;
struct sha1file *f;
- off_t offset, last_obj_offset = 0;
+ off_t offset, offset_one, last_obj_offset = 0;
struct pack_header hdr;
- int do_progress = progress;
-
- if (pack_to_stdout) {
- f = sha1fd(1, "<stdout>");
- do_progress >>= 1;
- } else {
- int fd;
- snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
- fd = mkstemp(tmpname);
- if (fd < 0)
- die("unable to create %s: %s\n", tmpname, strerror(errno));
- pack_tmp_name = xstrdup(tmpname);
- f = sha1fd(fd, pack_tmp_name);
- }
+ int do_progress = progress >> pack_to_stdout;
if (do_progress)
start_progress(&progress_state, "Writing %u objects...", "", nr_result);
+ written_list = xmalloc(nr_objects * sizeof(struct object_entry *));
- hdr.hdr_signature = htonl(PACK_SIGNATURE);
- hdr.hdr_version = htonl(PACK_VERSION);
- hdr.hdr_entries = htonl(nr_result);
- sha1write(f, &hdr, sizeof(hdr));
- offset = sizeof(hdr);
- if (!nr_result)
- goto done;
- for (i = 0; i < nr_objects; i++) {
- last_obj_offset = offset;
- offset = write_one(f, objects + i, offset);
- if (do_progress)
- display_progress(&progress_state, written);
- }
+ do {
+ if (pack_to_stdout) {
+ f = sha1fd(1, "<stdout>");
+ } else {
+ int fd;
+ snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
+ fd = mkstemp(tmpname);
+ if (fd < 0)
+ die("unable to create %s: %s\n", tmpname, strerror(errno));
+ pack_tmp_name = xstrdup(tmpname);
+ f = sha1fd(fd, pack_tmp_name);
+ }
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(PACK_VERSION);
+ hdr.hdr_entries = htonl(nr_result);
+ sha1write(f, &hdr, sizeof(hdr));
+ offset = sizeof(hdr);
+ nr_written = 0;
+ for (; i < nr_objects; i++) {
+ last_obj_offset = offset;
+ offset_one = write_one(f, objects + i, offset);
+ if (!offset_one)
+ break;
+ offset = offset_one;
+ if (do_progress)
+ display_progress(&progress_state, written);
+ }
+
+ /*
+ * Did we write the wrong # entries in the header?
+ * If so, rewrite it like in fast-import
+ */
+ if (pack_to_stdout || nr_written == nr_result) {
+ sha1close(f, pack_file_sha1, 1);
+ } else {
+ sha1close(f, pack_file_sha1, -1);
+ fixup_header_footer(f->fd, pack_file_sha1, pack_tmp_name, nr_written);
+ }
+
+ if (!pack_to_stdout) {
+ unsigned char object_list_sha1[20];
+ mode_t mode = umask(0);
+
+ umask(mode);
+ mode = 0444 & ~mode;
+
+ write_index_file(last_obj_offset, object_list_sha1);
+ snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
+ base_name, sha1_to_hex(object_list_sha1));
+ if (adjust_perm(pack_tmp_name, mode))
+ die("unable to make temporary pack file readable: %s",
+ strerror(errno));
+ if (rename(pack_tmp_name, tmpname))
+ die("unable to rename temporary pack file: %s",
+ strerror(errno));
+ snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
+ base_name, sha1_to_hex(object_list_sha1));
+ if (adjust_perm(idx_tmp_name, mode))
+ die("unable to make temporary index file readable: %s",
+ strerror(errno));
+ if (rename(idx_tmp_name, tmpname))
+ die("unable to rename temporary index file: %s",
+ strerror(errno));
+ puts(sha1_to_hex(object_list_sha1));
+ }
+
+ /* mark written objects as written to previous pack */
+ for (j = 0; j < nr_written; j++) {
+ written_list[j]->offset = (off_t)-1;
+ }
+ } while (i < nr_objects);
+
+ free(written_list);
if (do_progress)
stop_progress(&progress_state);
- done:
if (written != nr_result)
die("wrote %u objects while expecting %u", written, nr_result);
- sha1close(f, pack_file_sha1, 1);
-
- if (!pack_to_stdout) {
- unsigned char object_list_sha1[20];
- mode_t mode = umask(0);
-
- umask(mode);
- mode = 0444 & ~mode;
-
- write_index_file(last_obj_offset, object_list_sha1);
- snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
- base_name, sha1_to_hex(object_list_sha1));
- if (adjust_perm(pack_tmp_name, mode))
- die("unable to make temporary pack file readable: %s",
- strerror(errno));
- if (rename(pack_tmp_name, tmpname))
- die("unable to rename temporary pack file: %s",
- strerror(errno));
- snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
- base_name, sha1_to_hex(object_list_sha1));
- if (adjust_perm(idx_tmp_name, mode))
- die("unable to make temporary index file readable: %s",
- strerror(errno));
- if (rename(idx_tmp_name, tmpname))
- die("unable to rename temporary index file: %s",
- strerror(errno));
- puts(sha1_to_hex(object_list_sha1));
- }
}
static int sha1_sort(const void *_a, const void *_b)
@@ -768,18 +789,11 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
idx_tmp_name = xstrdup(tmpname);
f = sha1fd(fd, idx_tmp_name);
- if (nr_result) {
- uint32_t j = 0;
- sorted_by_sha =
- xcalloc(nr_result, sizeof(struct object_entry *));
- for (i = 0; i < nr_objects; i++)
- if (!objects[i].preferred_base)
- sorted_by_sha[j++] = objects + i;
- if (j != nr_result)
- die("listed %u objects while expecting %u", j, nr_result);
- qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
+ if (nr_written) {
+ sorted_by_sha = written_list;
+ qsort(sorted_by_sha, nr_written, sizeof(*sorted_by_sha), sha1_sort);
list = sorted_by_sha;
- last = sorted_by_sha + nr_result;
+ last = sorted_by_sha + nr_written;
} else
sorted_by_sha = list = last = NULL;
@@ -817,7 +831,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* Write the actual SHA1 entries. */
list = sorted_by_sha;
- for (i = 0; i < nr_result; i++) {
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
if (index_version < 2) {
uint32_t offset = htonl(entry->offset);
@@ -832,7 +846,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* write the crc32 table */
list = sorted_by_sha;
- for (i = 0; i < nr_objects; i++) {
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t crc32_val = htonl(entry->crc32);
sha1write(f, &crc32_val, 4);
@@ -840,7 +854,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* write the 32-bit offset table */
list = sorted_by_sha;
- for (i = 0; i < nr_objects; i++) {
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t offset = (entry->offset <= index_off32_limit) ?
entry->offset : (0x80000000 | nr_large_offset++);
@@ -865,7 +879,6 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
sha1write(f, pack_file_sha1, 20);
sha1close(f, NULL, 1);
- free(sorted_by_sha);
SHA1_Final(sha1, &ctx);
}
--
1.5.2.rc0.766.gba60-dirty
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
2007-04-30 23:24 Dana How
@ 2007-05-01 5:40 ` Shawn O. Pearce
2007-05-01 6:05 ` Dana How
0 siblings, 1 reply; 4+ messages in thread
From: Shawn O. Pearce @ 2007-05-01 5:40 UTC (permalink / raw)
To: Dana How; +Cc: Junio C Hamano, Git Mailing List
Dana How <danahow@gmail.com> wrote:
> Rewrite write_pack_file() to break to a new packfile
> whenever write_object/write_one request it, and
> correct the header's object count in the previous packfile.
> Change write_index_file() to write an index
> for just the objects in the most recent packfile.
...
> diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
...
> @@ -672,74 +673,94 @@ static int adjust_perm(const char *path, mode_t mode);
...
> + hdr.hdr_signature = htonl(PACK_SIGNATURE);
> + hdr.hdr_version = htonl(PACK_VERSION);
> + hdr.hdr_entries = htonl(nr_result);
What about keeping track of how many objects in nr_result that
have been written already in the prior iteration of this do{}
while loop and using that to set hdr_entries? This way if you are
splitting into multiple packfiles the last packfile won't need to
do a header/footer fixup.
--
Shawn.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one
2007-05-01 5:40 ` Shawn O. Pearce
@ 2007-05-01 6:05 ` Dana How
0 siblings, 0 replies; 4+ messages in thread
From: Dana How @ 2007-05-01 6:05 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: Junio C Hamano, Git Mailing List, danahow
On 4/30/07, Shawn O. Pearce <spearce@spearce.org> wrote:
> Dana How <danahow@gmail.com> wrote:
> > Rewrite write_pack_file() to break to a new packfile
> > whenever write_object/write_one request it, and
> > correct the header's object count in the previous packfile.
> > Change write_index_file() to write an index
> > for just the objects in the most recent packfile.
> ...
> > diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
> ...
> > + hdr.hdr_signature = htonl(PACK_SIGNATURE);
> > + hdr.hdr_version = htonl(PACK_VERSION);
> > + hdr.hdr_entries = htonl(nr_result);
>
> What about keeping track of how many objects in nr_result that
> have been written already in the prior iteration of this do{}
> while loop and using that to set hdr_entries? This way if you are
> splitting into multiple packfiles the last packfile won't need to
> do a header/footer fixup.
Cool --
I had the same thought (late),
but figured I would address it in a follow-on.
I was thinking of adding nr_left, which would be initialized
from nr_result, and have nr_written repeatedly subtracted.
nr_result in your quote would change to nr_left
(also later, where we decide whether or not to fix up the header).
Thanks,
--
Dana L. How danahow@gmail.com +1 650 804 5991 cell
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-05-01 6:05 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-08 23:26 [PATCH 7/8] git-repack --max-pack-size: split packs as asked by write_object/write_one Dana How
-- strict thread matches above, loose matches on Subject: below --
2007-04-30 23:24 Dana How
2007-05-01 5:40 ` Shawn O. Pearce
2007-05-01 6:05 ` Dana How
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).