From: Junio C Hamano <junkio@cox.net>
To: Nicolas Pitre <nico@cam.org>
Cc: git@vger.kernel.org
Subject: Re: [PATCH] binary patch.
Date: Fri, 05 May 2006 03:15:53 -0700 [thread overview]
Message-ID: <7vy7xgzsiu.fsf@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: 7vwtd10xyd.fsf@assigned-by-dhcp.cox.net
Junio C Hamano <junkio@cox.net> writes:
> Nicolas Pitre <nico@cam.org> writes:
>
>> This is nice.
>>
>> However I'd deflate the delta data before encoding it with base85.
>
> Yeah, things still to be done are deflating both delta and
> optionally perhaps use just deflate without delta for "new file"
> codepath.
>
> And testsuite.
-- >8 --
[PATCH] binary diff: further updates.
This updates the user interface and generated diff data format.
* "diff --binary" is used to signal that we want an e-mailable
binary patch. It implies --full-index and -p.
* "apply --allow-binary-replacement" acquired a short synonym
"apply --binary".
* After the "GIT binary patch\n" header line there is a token
to record which binary patch mechanism was used, so that we
can extend it later. Currently there are two mechanisms
defined: "literal" and "delta". The former records the
deflated postimage and the latter records the deflated delta
from the preimage to postimage.
For purely implementation convenience, I added the deflated
length after these "literal/delta" tokens (otherwise the
decoding side needs to guess and reallocate the buffer while
inflating). Improvement patches are very welcomed.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
* Have done only very minimum testing, and unlike somebody else ;-)
my code is not always perfect, so this will still stay out of
"next" for a while.
apply.c | 130 +++++++++++++++++++++++++++++++++++++++++++++----------------
cache.h | 1
diff.c | 135 +++++++++++++++++++++++++++++++++------------------------------
diff.h | 1
4 files changed, 169 insertions(+), 98 deletions(-)
diff --git a/apply.c b/apply.c
index e37c4eb..1b93aab 100644
--- a/apply.c
+++ b/apply.c
@@ -114,6 +114,9 @@ struct patch {
char *new_name, *old_name, *def_name;
unsigned int old_mode, new_mode;
int is_rename, is_copy, is_new, is_delete, is_binary;
+#define BINARY_DELTA_DEFLATED 1
+#define BINARY_LITERAL_DEFLATED 2
+ unsigned long deflate_origlen;
int lines_added, lines_deleted;
int score;
struct fragment *fragments;
@@ -969,9 +972,11 @@ static inline int metadata_changes(struc
static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
{
- /* We have read "GIT binary patch\n"; what follows is a
- * sequence of 'length-byte' followed by base-85 encoded
- * delta data.
+ /* We have read "GIT binary patch\n"; what follows is a line
+ * that says the patch method (currently, either "deflated
+ * literal" or "deflated delta") and the length of data before
+ * deflating; a sequence of 'length-byte' followed by base-85
+ * encoded data follows.
*
* Each 5-byte sequence of base-85 encodes up to 4 bytes,
* and we would limit the patch line to 66 characters,
@@ -982,11 +987,27 @@ static int parse_binary(char *buffer, un
*/
int llen, used;
struct fragment *fragment;
- char *delta = NULL;
+ char *data = NULL;
- patch->is_binary = 1;
patch->fragments = fragment = xcalloc(1, sizeof(*fragment));
- used = 0;
+
+ /* Grab the type of patch */
+ llen = linelen(buffer, size);
+ used = llen;
+ linenr++;
+
+ if (!strncmp(buffer, "delta ", 6)) {
+ patch->is_binary = BINARY_DELTA_DEFLATED;
+ patch->deflate_origlen = strtoul(buffer + 6, NULL, 10);
+ }
+ else if (!strncmp(buffer, "literal ", 8)) {
+ patch->is_binary = BINARY_LITERAL_DEFLATED;
+ patch->deflate_origlen = strtoul(buffer + 8, NULL, 10);
+ }
+ else
+ return error("unrecognized binary patch at line %d: %.*s",
+ linenr-1, llen-1, buffer);
+ buffer += llen;
while (1) {
int byte_length, max_byte_length, newsize;
llen = linelen(buffer, size);
@@ -1015,8 +1036,8 @@ static int parse_binary(char *buffer, un
byte_length <= max_byte_length - 4)
goto corrupt;
newsize = fragment->size + byte_length;
- delta = xrealloc(delta, newsize);
- if (decode_85(delta + fragment->size,
+ data = xrealloc(data, newsize);
+ if (decode_85(data + fragment->size,
buffer + 1,
byte_length))
goto corrupt;
@@ -1024,7 +1045,7 @@ static int parse_binary(char *buffer, un
buffer += llen;
size -= llen;
}
- fragment->patch = delta;
+ fragment->patch = data;
return used;
corrupt:
return error("corrupt binary patch at line %d: %.*s",
@@ -1425,6 +1446,61 @@ #endif
return offset;
}
+static char *inflate_it(const void *data, unsigned long size,
+ unsigned long inflated_size)
+{
+ z_stream stream;
+ void *out;
+ int st;
+
+ memset(&stream, 0, sizeof(stream));
+
+ stream.next_in = (unsigned char *)data;
+ stream.avail_in = size;
+ stream.next_out = out = xmalloc(inflated_size);
+ stream.avail_out = inflated_size;
+ inflateInit(&stream);
+ st = inflate(&stream, Z_FINISH);
+ if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
+ free(out);
+ return NULL;
+ }
+ return out;
+}
+
+static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
+{
+ unsigned long dst_size;
+ struct fragment *fragment = patch->fragments;
+ void *data;
+ void *result;
+
+ data = inflate_it(fragment->patch, fragment->size,
+ patch->deflate_origlen);
+ if (!data)
+ return error("corrupt patch data");
+ switch (patch->is_binary) {
+ case BINARY_DELTA_DEFLATED:
+ result = patch_delta(desc->buffer, desc->size,
+ data,
+ patch->deflate_origlen,
+ &dst_size);
+ free(desc->buffer);
+ desc->buffer = result;
+ free(data);
+ break;
+ case BINARY_LITERAL_DEFLATED:
+ free(desc->buffer);
+ desc->buffer = data;
+ dst_size = patch->deflate_origlen;
+ break;
+ }
+ if (!desc->buffer)
+ return -1;
+ desc->size = desc->alloc = dst_size;
+ return 0;
+}
+
static int apply_binary(struct buffer_desc *desc, struct patch *patch)
{
const char *name = patch->old_name ? patch->old_name : patch->new_name;
@@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_de
"'%s' but it is not empty", name);
}
- if (desc->buffer) {
+ get_sha1_hex(patch->new_sha1_prefix, sha1);
+ if (!memcmp(sha1, null_sha1, 20)) {
free(desc->buffer);
desc->alloc = desc->size = 0;
- }
- get_sha1_hex(patch->new_sha1_prefix, sha1);
- if (!memcmp(sha1, null_sha1, 20))
+ desc->buffer = NULL;
return 0; /* deletion patch */
+ }
if (has_sha1_file(sha1)) {
+ /* We already have the postimage */
char type[10];
unsigned long size;
+ free(desc->buffer);
desc->buffer = read_sha1_file(sha1, type, &size);
if (!desc->buffer)
return error("the necessary postimage %s for "
@@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_de
desc->alloc = desc->size = size;
}
else {
- char type[10];
- unsigned long src_size, dst_size;
- void *src;
-
- get_sha1_hex(patch->old_sha1_prefix, sha1);
- src = read_sha1_file(sha1, type, &src_size);
- if (!src)
- return error("the necessary preimage %s for "
- "'%s' cannot be read",
- patch->old_sha1_prefix, name);
-
- /* patch->fragment->patch has the delta data and
- * we should apply it to the preimage.
+ /* We have verified desc matches the preimage;
+ * apply the patch data to it, which is stored
+ * in the patch->fragments->{patch,size}.
*/
- desc->buffer = patch_delta(src, src_size,
- (void*) patch->fragments->patch,
- patch->fragments->size,
- &dst_size);
- if (!desc->buffer)
+ if (apply_binary_fragment(desc, patch))
return error("binary patch does not apply to '%s'",
name);
- desc->size = desc->alloc = dst_size;
/* verify that the result matches */
write_sha1_file_prepare(desc->buffer, desc->size, blob_type,
@@ -2102,7 +2165,8 @@ int main(int argc, char **argv)
diffstat = 1;
continue;
}
- if (!strcmp(arg, "--allow-binary-replacement")) {
+ if (!strcmp(arg, "--allow-binary-replacement") ||
+ !strcmp(arg, "--binary")) {
allow_binary_replacement = 1;
continue;
}
diff --git a/cache.h b/cache.h
index 2f32f3d..4b7a439 100644
--- a/cache.h
+++ b/cache.h
@@ -365,5 +365,6 @@ extern void setup_pager(void);
/* base85 */
int decode_85(char *dst, char *line, int linelen);
+void encode_85(char *buf, unsigned char *data, int bytes);
#endif /* CACHE_H */
diff --git a/diff.c b/diff.c
index b14d897..94bd94a 100644
--- a/diff.c
+++ b/diff.c
@@ -392,78 +392,75 @@ static void show_stats(struct diffstat_t
total_files, adds, dels);
}
-static void *encode_delta_size(void *data, unsigned long size)
+static unsigned char *deflate_it(char *data,
+ unsigned long size,
+ unsigned long *result_size)
{
- unsigned char *cp = data;
- *cp++ = size;
- size >>= 7;
- while (size) {
- cp[-1] |= 0x80;
- *cp++ = size;
- size >>= 7;
- }
- return cp;
-}
-
-static void *safe_diff_delta(const unsigned char *src, unsigned long src_size,
- const unsigned char *dst, unsigned long dst_size,
- unsigned long *delta_size)
-{
- unsigned long bufsize;
- unsigned char *data;
- unsigned char *cp;
-
- if (src_size && dst_size)
- return diff_delta(src, src_size, dst, dst_size, delta_size, 0);
-
- /* diff-delta does not like to do delta with empty, so
- * we do that by hand here. Sigh...
- */
-
- if (!src_size)
- /* literal copy can be done only 127-byte at a time.
- */
- bufsize = dst_size + (dst_size / 127) + 40;
- else
- bufsize = 40;
- data = xmalloc(bufsize);
- cp = encode_delta_size(data, src_size);
- cp = encode_delta_size(cp, dst_size);
-
- if (dst_size) {
- /* copy out literally */
- while (dst_size) {
- int sz = (127 < dst_size) ? 127 : dst_size;
- *cp++ = sz;
- dst_size -= sz;
- while (sz) {
- *cp++ = *dst++;
- sz--;
- }
- }
- }
- *delta_size = (cp - data);
- return data;
+ int bound;
+ unsigned char *deflated;
+ z_stream stream;
+
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, Z_BEST_COMPRESSION);
+ bound = deflateBound(&stream, size);
+ deflated = xmalloc(bound);
+ stream.next_out = deflated;
+ stream.avail_out = bound;
+
+ stream.next_in = (unsigned char *)data;
+ stream.avail_in = size;
+ while (deflate(&stream, Z_FINISH) == Z_OK)
+ ; /* nothing */
+ deflateEnd(&stream);
+ *result_size = stream.total_out;
+ return deflated;
}
static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
{
- void *delta, *cp;
+ void *cp;
+ void *delta;
+ void *deflated;
+ void *data;
+ unsigned long orig_size;
unsigned long delta_size;
+ unsigned long deflate_size;
+ unsigned long data_size;
printf("GIT binary patch\n");
- delta = safe_diff_delta(one->ptr, one->size,
- two->ptr, two->size,
- &delta_size);
- if (!delta)
- die("unable to generate binary diff");
-
- /* emit delta encoded in base85 */
- cp = delta;
- while (delta_size) {
- int bytes = (52 < delta_size) ? 52 : delta_size;
+ /* We could do deflated delta, or we could do just deflated two,
+ * whichever is smaller.
+ */
+ delta = NULL;
+ deflated = deflate_it(two->ptr, two->size, &deflate_size);
+ if (one->size && two->size) {
+ delta = diff_delta(one->ptr, one->size,
+ two->ptr, two->size,
+ &delta_size, deflate_size);
+ orig_size = delta_size;
+ if (delta)
+ delta = deflate_it(delta, delta_size, &delta_size);
+ }
+
+ if (delta && delta_size < deflate_size) {
+ printf("delta %lu\n", orig_size);
+ free(deflated);
+ data = delta;
+ data_size = delta_size;
+ }
+ else {
+ printf("literal %lu\n", two->size);
+ free(delta);
+ data = deflated;
+ data_size = deflate_size;
+ }
+
+ /* emit data encoded in base85 */
+ cp = data;
+ while (data_size) {
+ int bytes = (52 < data_size) ? 52 : data_size;
char line[70];
- delta_size -= bytes;
+ data_size -= bytes;
if (bytes <= 26)
line[0] = bytes + 'A' - 1;
else
@@ -473,7 +470,7 @@ static void emit_binary_diff(mmfile_t *o
puts(line);
}
printf("\n");
- free(delta);
+ free(data);
}
#define FIRST_FEW_BYTES 8000
@@ -538,7 +535,11 @@ static void builtin_diff(const char *nam
die("unable to read files to diff");
if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) {
- if (o->full_index)
+ /* Quite common confusing case */
+ if (mf1.size == mf2.size &&
+ !memcmp(mf1.ptr, mf2.ptr, mf1.size))
+ goto free_ab_and_return;
+ if (o->binary)
emit_binary_diff(&mf1, &mf2);
else
printf("Binary files %s and %s differ\n",
@@ -1239,6 +1240,10 @@ int diff_opt_parse(struct diff_options *
options->rename_limit = strtoul(arg+2, NULL, 10);
else if (!strcmp(arg, "--full-index"))
options->full_index = 1;
+ else if (!strcmp(arg, "--binary")) {
+ options->output_format = DIFF_FORMAT_PATCH;
+ options->full_index = options->binary = 1;
+ }
else if (!strcmp(arg, "--name-only"))
options->output_format = DIFF_FORMAT_NAME;
else if (!strcmp(arg, "--name-status"))
diff --git a/diff.h b/diff.h
index b3b2c4d..d052608 100644
--- a/diff.h
+++ b/diff.h
@@ -28,6 +28,7 @@ struct diff_options {
with_raw:1,
with_stat:1,
tree_in_recursive:1,
+ binary:1,
full_index:1,
silent_on_remove:1,
find_copies_harder:1;
--
1.3.1.g25a9
next prev parent reply other threads:[~2006-05-05 10:16 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-05-04 23:52 [PATCH] binary patch Junio C Hamano
2006-05-05 2:47 ` Nicolas Pitre
2006-05-05 6:47 ` Junio C Hamano
2006-05-05 10:15 ` Junio C Hamano [this message]
2006-05-05 15:41 ` Nicolas Pitre
2006-05-05 17:38 ` Junio C Hamano
2006-05-05 18:33 ` Nicolas Pitre
2006-05-05 19:23 ` Junio C Hamano
2006-05-05 20:07 ` Nicolas Pitre
2006-05-05 20:33 ` Daniel Barkalow
2006-05-05 20:50 ` Junio C Hamano
2006-05-06 7:40 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7vy7xgzsiu.fsf@assigned-by-dhcp.cox.net \
--to=junkio@cox.net \
--cc=git@vger.kernel.org \
--cc=nico@cam.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).