* [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
@ 2007-04-08 23:24 Dana How
2007-04-09 0:03 ` Junio C Hamano
2007-04-09 0:40 ` Nicolas Pitre
0 siblings, 2 replies; 10+ messages in thread
From: Dana How @ 2007-04-08 23:24 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Git Mailing List, danahow
Accept new 'limit' argument and check against it
before each group of writes. Update delta usability rules
for possibility of delta base being in a previously-
written pack. Inline sha1write_compressed() so we know
the exact size of the written data when it needs to be compressed.
Signed-off-by: Dana How <how@deathvalley.cswitch.com>
---
builtin-pack-objects.c | 66 ++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 58 insertions(+), 8 deletions(-)
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 7ab0712..9530008 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -373,7 +373,8 @@ static int revalidate_loose_object(struct object_entry *entry,
}
static off_t write_object(struct sha1file *f,
- struct object_entry *entry)
+ struct object_entry *entry,
+ unsigned long limit)
{
unsigned long size;
enum object_type type;
@@ -407,6 +408,10 @@ static off_t write_object(struct sha1file *f,
if (revalidate_loose_object(entry, map, mapsize))
die("corrupt loose object %s",
sha1_to_hex(entry->sha1));
+ if ( limit && mapsize + 20 >= limit ) {
+ munmap(map, mapsize);
+ return 0;
+ }
sha1write(f, map, mapsize);
munmap(map, mapsize);
written++;
@@ -418,24 +423,51 @@ static off_t write_object(struct sha1file *f,
}
if (!to_reuse) {
+ z_stream stream;
+ unsigned long maxsize;
+ void *out;
+ /* no if no delta */
+ int usable_delta = !entry->delta ? 0 :
+ /* yes if unlimited packfile */
+ !offset_limit ? 1 :
+ /* no if base written to previous pack */
+ entry->delta->prev_pack ? 0 :
+ /* otherwise double-check written to this
+ * pack, like we do below
+ */
+ entry->delta->offset ? 1 : 0;
buf = read_sha1_file(entry->sha1, &type, &size);
if (!buf)
die("unable to read %s", sha1_to_hex(entry->sha1));
if (size != entry->size)
die("object %s size inconsistency (%lu vs %lu)",
sha1_to_hex(entry->sha1), size, entry->size);
- if (entry->delta) {
+ if (usable_delta) {
buf = delta_against(buf, size, entry);
size = entry->delta_size;
obj_type = (allow_ofs_delta && entry->delta->offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
}
+ /* compress the data to store and put compressed length in datalen */
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, zlib_compression_level);
+ maxsize = deflateBound(&stream, size);
+ out = xmalloc(maxsize);
+ /* Compress it */
+ stream.next_in = buf;
+ stream.avail_in = size;
+ stream.next_out = out;
+ stream.avail_out = maxsize;
+ while (deflate(&stream, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&stream);
+ datalen = stream.total_out;
+ deflateEnd(&stream);
/*
* The object header is a byte of 'type' followed by zero or
* more bytes of length.
*/
hdrlen = encode_header(obj_type, size, header);
- sha1write(f, header, hdrlen);
if (obj_type == OBJ_OFS_DELTA) {
/*
@@ -448,6 +480,12 @@ static off_t write_object(struct sha1file *f,
header[pos] = ofs & 127;
while (ofs >>= 7)
header[--pos] = 128 | (--ofs & 127);
+ if ( limit && hdrlen + sizeof(header) - pos + datalen + 20 >= limit ) {
+ free(out);
+ free(buf);
+ return 0;
+ }
+ sha1write(f, header, hdrlen);
sha1write(f, header + pos, sizeof(header) - pos);
hdrlen += sizeof(header) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
@@ -455,10 +493,17 @@ static off_t write_object(struct sha1file *f,
* Deltas with a base reference contain
* an additional 20 bytes for the base sha1.
*/
+ if ( limit && hdrlen + 20 + datalen + 20 >= limit ) {
+ free(out);
+ free(buf);
+ return 0;
+ }
+ sha1write(f, header, hdrlen);
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
}
- datalen = sha1write_compressed(f, buf, size);
+ sha1write(f, out, datalen);
+ free(out);
free(buf);
}
else {
@@ -472,23 +517,28 @@ static off_t write_object(struct sha1file *f,
reused_delta++;
}
hdrlen = encode_header(obj_type, entry->size, header);
- sha1write(f, header, hdrlen);
+ datalen = find_packed_object_size(p, entry->in_pack_offset)
+ - entry->in_pack_header_size;
if (obj_type == OBJ_OFS_DELTA) {
off_t ofs = entry->offset - entry->delta->offset;
unsigned pos = sizeof(header) - 1;
header[pos] = ofs & 127;
while (ofs >>= 7)
header[--pos] = 128 | (--ofs & 127);
+ if ( limit && hdrlen + sizeof(header) - pos + datalen + 20 >= limit )
+ return 0;
+ sha1write(f, header, hdrlen);
sha1write(f, header + pos, sizeof(header) - pos);
hdrlen += sizeof(header) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
+ if ( limit && hdrlen + 20 + datalen + 20 >= limit )
+ return 0;
+ sha1write(f, header, hdrlen);
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
}
offset = entry->in_pack_offset + entry->in_pack_header_size;
- datalen = find_packed_object_size(p, entry->in_pack_offset)
- - entry->in_pack_header_size;
if (!pack_to_stdout && check_pack_inflate(p, &w_curs,
offset, datalen, entry->size))
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
@@ -515,7 +565,7 @@ static off_t write_one(struct sha1file *f,
if (e->delta)
offset = write_one(f, e->delta, offset);
e->offset = offset;
- return offset + write_object(f, e);
+ return offset + write_object(f, e, 0);
}
/*
--
1.5.1.89.g8abf0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-08 23:24 [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg Dana How
@ 2007-04-09 0:03 ` Junio C Hamano
2007-04-09 0:15 ` Junio C Hamano
2007-04-09 0:40 ` Nicolas Pitre
1 sibling, 1 reply; 10+ messages in thread
From: Junio C Hamano @ 2007-04-09 0:03 UTC (permalink / raw)
To: Dana How; +Cc: Git Mailing List
Dana How <danahow@gmail.com> writes:
> Accept new 'limit' argument and check against it
> before each group of writes. Update delta usability rules
> for possibility of delta base being in a previously-
> written pack. Inline sha1write_compressed() so we know
> the exact size of the written data when it needs to be compressed.
>
> Signed-off-by: Dana How <how@deathvalley.cswitch.com>
My first reaction of open-coding sha1write_compressed() was
"Ugh", but as you are removing the only user of that function,
maybe this is not as bad as it looks.
> @@ -407,6 +408,10 @@ static off_t write_object(struct sha1file *f,
> if (revalidate_loose_object(entry, map, mapsize))
> die("corrupt loose object %s",
> sha1_to_hex(entry->sha1));
> + if ( limit && mapsize + 20 >= limit ) {
> + munmap(map, mapsize);
> + return 0;
> + }
Our coding style guideline is similar to that of the kernel. No
SP after '(' or before ')' in conditionals.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 0:03 ` Junio C Hamano
@ 2007-04-09 0:15 ` Junio C Hamano
2007-04-09 2:28 ` Nicolas Pitre
0 siblings, 1 reply; 10+ messages in thread
From: Junio C Hamano @ 2007-04-09 0:15 UTC (permalink / raw)
To: Dana How; +Cc: Git Mailing List
Junio C Hamano <junkio@cox.net> writes:
> Dana How <danahow@gmail.com> writes:
>
>> Accept new 'limit' argument and check against it
>> before each group of writes. Update delta usability rules
>> for possibility of delta base being in a previously-
>> written pack. Inline sha1write_compressed() so we know
>> the exact size of the written data when it needs to be compressed.
>>
>> Signed-off-by: Dana How <how@deathvalley.cswitch.com>
>
> My first reaction of open-coding sha1write_compressed() was
> "Ugh", but as you are removing the only user of that function,
> maybe this is not as bad as it looks.
Having said that, I suspect that for other possible users of
that function we might have later, it would be a better
interface to add an optional 'limit' and 'prelude' to
sha1write_compressed(). The function would write prelude
followed by the compressed payload, only if they fit the limit.
Then your write_object() would prepare the header (depending on
the type, the object header, ofs-delta header or ref-delta
header) in header[] but would not cal sha1write() itself.
Instead it would send header[] in as prelude, *buf as the
payload, with an appropriate limit.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 0:15 ` Junio C Hamano
@ 2007-04-09 2:28 ` Nicolas Pitre
0 siblings, 0 replies; 10+ messages in thread
From: Nicolas Pitre @ 2007-04-09 2:28 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Dana How, Git Mailing List
On Sun, 8 Apr 2007, Junio C Hamano wrote:
> Junio C Hamano <junkio@cox.net> writes:
>
> > Dana How <danahow@gmail.com> writes:
> >
> >> Accept new 'limit' argument and check against it
> >> before each group of writes. Update delta usability rules
> >> for possibility of delta base being in a previously-
> >> written pack. Inline sha1write_compressed() so we know
> >> the exact size of the written data when it needs to be compressed.
> >>
> >> Signed-off-by: Dana How <how@deathvalley.cswitch.com>
> >
> > My first reaction of open-coding sha1write_compressed() was
> > "Ugh", but as you are removing the only user of that function,
> > maybe this is not as bad as it looks.
>
> Having said that, I suspect that for other possible users of
> that function we might have later, it would be a better
> interface to add an optional 'limit' and 'prelude' to
> sha1write_compressed(). The function would write prelude
> followed by the compressed payload, only if they fit the limit.
I'd wait for those possible future users to show up before doing such
thing though.
Nicolas
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-08 23:24 [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg Dana How
2007-04-09 0:03 ` Junio C Hamano
@ 2007-04-09 0:40 ` Nicolas Pitre
2007-04-09 18:51 ` Dana How
1 sibling, 1 reply; 10+ messages in thread
From: Nicolas Pitre @ 2007-04-09 0:40 UTC (permalink / raw)
To: Dana How; +Cc: Junio C Hamano, Git Mailing List
On Sun, 8 Apr 2007, Dana How wrote:
>
> Accept new 'limit' argument and check against it
> before each group of writes. Update delta usability rules
> for possibility of delta base being in a previously-
> written pack. Inline sha1write_compressed() so we know
> the exact size of the written data when it needs to be compressed.
It would be cleaner if the compression code was made into a function of
its own I think.
> @@ -448,6 +480,12 @@ static off_t write_object(struct sha1file *f,
> header[pos] = ofs & 127;
> while (ofs >>= 7)
> header[--pos] = 128 | (--ofs & 127);
> + if ( limit && hdrlen + sizeof(header) - pos + datalen + 20 >= limit ) {
> + free(out);
> + free(buf);
> + return 0;
> + }
> + sha1write(f, header, hdrlen);
> sha1write(f, header + pos, sizeof(header) - pos);
The above looks rather buggy to me.
Nicolas
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 0:40 ` Nicolas Pitre
@ 2007-04-09 18:51 ` Dana How
2007-04-09 18:59 ` Nicolas Pitre
0 siblings, 1 reply; 10+ messages in thread
From: Dana How @ 2007-04-09 18:51 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow
On 4/8/07, Nicolas Pitre <nico@cam.org> wrote:
> > @@ -448,6 +480,12 @@ static off_t write_object(struct sha1file *f,
> > header[pos] = ofs & 127;
> > while (ofs >>= 7)
> > header[--pos] = 128 | (--ofs & 127);
> > + if ( limit && hdrlen + sizeof(header) - pos + datalen + 20 >= limit ) {
> > + free(out);
> > + free(buf);
> > + return 0;
> > + }
> > + sha1write(f, header, hdrlen);
> > sha1write(f, header + pos, sizeof(header) - pos);
>
> The above looks rather buggy to me.
OK, can you be more specific?
So far from my re-inspection I don't see any error in the snippet above.
Two lines before it, however, we have the line
off_t ofs = entry->offset - entry->delta->offset;
and this only works if entry->offset is set in write_one() *before*
calling write_object(). I will make that change.
Thanks!
--
Dana L. How danahow@gmail.com +1 650 804 5991 cell
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 18:51 ` Dana How
@ 2007-04-09 18:59 ` Nicolas Pitre
2007-04-09 19:20 ` Dana How
0 siblings, 1 reply; 10+ messages in thread
From: Nicolas Pitre @ 2007-04-09 18:59 UTC (permalink / raw)
To: Dana How; +Cc: Junio C Hamano, Git Mailing List
On Mon, 9 Apr 2007, Dana How wrote:
> On 4/8/07, Nicolas Pitre <nico@cam.org> wrote:
> > > @@ -448,6 +480,12 @@ static off_t write_object(struct sha1file *f,
> > > header[pos] = ofs & 127;
> > > while (ofs >>= 7)
> > > header[--pos] = 128 | (--ofs & 127);
> > > + if ( limit && hdrlen + sizeof(header) - pos +
> > datalen + 20 >= limit ) {
> > > + free(out);
> > > + free(buf);
> > > + return 0;
> > > + }
> > > + sha1write(f, header, hdrlen);
> > > sha1write(f, header + pos, sizeof(header) - pos);
> >
> > The above looks rather buggy to me.
>
> OK, can you be more specific?
You're writing the content of the array 'header' twice in a row. Sure
the second time it is header + pos but it is still the result of an
operation that used to put data into 'header' after the first content
was already written out. Right now it looks like the first write might
contain clobbered data.
Nicolas
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 18:59 ` Nicolas Pitre
@ 2007-04-09 19:20 ` Dana How
2007-04-09 19:25 ` Nicolas Pitre
2007-04-09 20:09 ` Junio C Hamano
0 siblings, 2 replies; 10+ messages in thread
From: Dana How @ 2007-04-09 19:20 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow
On 4/9/07, Nicolas Pitre <nico@cam.org> wrote:
> On Mon, 9 Apr 2007, Dana How wrote:
> > On 4/8/07, Nicolas Pitre <nico@cam.org> wrote:
> > > > + sha1write(f, header, hdrlen);
> > > > sha1write(f, header + pos, sizeof(header) - pos);
> > >
> > > The above looks rather buggy to me.
> >
> > OK, can you be more specific?
>
> You're writing the content of the array 'header' twice in a row. Sure
> the second time it is header + pos but it is still the result of an
> operation that used to put data into 'header' after the first content
> was already written out. Right now it looks like the first write might
> contain clobbered data.
Thanks, I'll take care of that.
For testing, I've been using git-fsck/git-verify-pack/git-unpack-objects .
The only bugs they ever caught were (1) offset wraparound in .idx
before I started this patchset (with a very bad error message) and
(2) I failed to flush out the buffer before changing packs.
Everything else has been detected manually (not always by me). Hmm.
Thanks,
--
Dana L. How danahow@gmail.com +1 650 804 5991 cell
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 19:20 ` Dana How
@ 2007-04-09 19:25 ` Nicolas Pitre
2007-04-09 20:09 ` Junio C Hamano
1 sibling, 0 replies; 10+ messages in thread
From: Nicolas Pitre @ 2007-04-09 19:25 UTC (permalink / raw)
To: Dana How; +Cc: Junio C Hamano, Git Mailing List
On Mon, 9 Apr 2007, Dana How wrote:
> For testing, I've been using git-fsck/git-verify-pack/git-unpack-objects .
> The only bugs they ever caught were (1) offset wraparound in .idx
> before I started this patchset (with a very bad error message) and
> (2) I failed to flush out the buffer before changing packs.
> Everything else has been detected manually (not always by me). Hmm.
Make sure you always run 'make test' as well when you're done.
Nicolas
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg
2007-04-09 19:20 ` Dana How
2007-04-09 19:25 ` Nicolas Pitre
@ 2007-04-09 20:09 ` Junio C Hamano
1 sibling, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2007-04-09 20:09 UTC (permalink / raw)
To: Dana How; +Cc: Nicolas Pitre, Junio C Hamano, Git Mailing List
"Dana How" <danahow@gmail.com> writes:
> For testing, I've been using git-fsck/git-verify-pack/git-unpack-objects .
Among the above, verify-pack does not do much more than what the
normal object reading path does already these days. Also
unpack-objects (especially with -n) omits a lot of object
integrity checks, and not very appropriate test.
We have some tests for packfiles in t/ (make test) but I would
not be surprised if we do not have enough. Most of the existing
tests are done on loose objects the tests themselves produce
without repacking, because their focus is not about packs and
they test whatever they are interested in, assuming that the
lowest level object layer is correctly functioning.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2007-04-09 20:10 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-08 23:24 [PATCH 5/8] get-repack --max-pack-size: write_object() takes 'limit' arg Dana How
2007-04-09 0:03 ` Junio C Hamano
2007-04-09 0:15 ` Junio C Hamano
2007-04-09 2:28 ` Nicolas Pitre
2007-04-09 0:40 ` Nicolas Pitre
2007-04-09 18:51 ` Dana How
2007-04-09 18:59 ` Nicolas Pitre
2007-04-09 19:20 ` Dana How
2007-04-09 19:25 ` Nicolas Pitre
2007-04-09 20:09 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).