diff --git a/diff.c b/diff.c index f644947..0901cdc 100644 --- a/diff.c +++ b/diff.c @@ -2447,6 +2447,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE; else if (!strcmp(arg, "--ignore-space-at-eol")) options->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL; + else if (!strcmp(arg, "--patience")) + options->xdl_opts |= XDF_USE_PATIENCE; /* flags options */ else if (!strcmp(arg, "--binary")) { diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 84fff58..bba915c 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -32,6 +32,7 @@ extern "C" { #define XDF_IGNORE_WHITESPACE (1 << 2) #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3) #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4) +#define XDF_USE_PATIENCE (1 << 5) #define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL) #define XDL_PATCH_NORMAL '-' diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 9d0324a..1a5b13a 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -37,8 +37,235 @@ typedef struct s_xdpsplit { int min_lo, min_hi; } xdpsplit_t; +typedef struct s_xduniqmatch { + struct s_xduniqmatch *next; + long i1, i2, len; +} xduniqmatch_t; + +typedef struct s_xdp_ha { + struct s_xdp_ha *up, *down; + struct s_xdp_ha *left; + long ha; + long off1; + long off2; +} xdp_ha_t; + + +static xduniqmatch_t *xdl_uniq_match_alloc(long i1, long i2, long len) +{ + xduniqmatch_t *match = xdl_malloc(sizeof(xduniqmatch_t)); + if (match) { + match->next = NULL; + match->i1 = i1; + match->i2 = i2; + match->len = len; + } + return match; +} + +static xduniqmatch_t *xdl_uniq_popfree(xduniqmatch_t *lst) +{ + xduniqmatch_t *next = lst->next; + xdl_free(lst); + return next; +} + +static int xdp_ha_cmp_by_ha(const void *a1, const void *a2) +{ + const xdp_ha_t *ha1 = a1; + const xdp_ha_t *ha2 = a2; + if (ha1->ha == ha2->ha) { + return ha1->off2 - ha2->off2; + } + return ha1->ha - ha2->ha; +} + +static int xdp_ha_cmp_by_off2(const void *a1, const void *a2) +{ + const xdp_ha_t *ha1 = a1; + const xdp_ha_t *ha2 = a2; + return ha2->off2 - ha1->off2; +} + +static int patience_bisect_stack(xdp_ha_t **stacks, long len, long off1) +{ + long l = 0, r = len; + + while (l < r) { + long i = (r + l) / 2; + + if (off1 < stacks[i]->off1) { + l = i + 1; + } else { + r = i; + } + } + return l; +} + +static int xdl_patience_split_aux(xduniqmatch_t *lcs, xdp_ha_t *ha) +{ + xduniqmatch_t *tmp; + + while (ha) { + tmp = xdl_uniq_match_alloc(ha->off1, ha->off2, 1); + if (!tmp) + return -1; + tmp->next = lcs->next; + lcs->next = tmp; + lcs = tmp; + + while ((ha = ha->down ? ha->down : ha->left)) { + if (lcs->i1 + lcs->len + 1 == ha->off1 && lcs->i2 + lcs->len + 1 == ha->off2) { + lcs->len++; + continue; + } + break; + } + } + return 0; +} + +static int xdl_patience_split(unsigned long const *ha1, unsigned long const *ha2, + xduniqmatch_t *begin, xduniqmatch_t *end) +{ + xdp_ha_t *recs; + long off1 = begin->i1 + begin->len, lim1 = end->i1; + long off2 = begin->i2 + begin->len, lim2 = end->i2; + long len, i, j, uniq; + + len = lim1 - off1 + lim2 - off2; + recs = (xdp_ha_t *)xdl_malloc(sizeof(xdp_ha_t) * len); + if (recs == NULL) + return -1; + + for (i = 0, j = off1; j < lim1 - off1; j++, i++) { + recs[i].ha = ha1[j]; + recs[i].off1 = j; + recs[i].off2 = -1; + recs[i].up = recs[i].down = recs[i].left = NULL; + } + for (j = off2; j < lim2; j++, i++) { + recs[i].ha = ha2[j]; + recs[i].off1 = -1; + recs[i].off2 = j; + recs[i].up = recs[i].down = recs[i].left = NULL; + } + + qsort(recs, len, sizeof(xdp_ha_t), xdp_ha_cmp_by_ha); + + uniq = 0; + for (i = 0; i < len - 1; ) { + long ha = recs[i].ha; + + if (ha != recs[i + 1].ha) { + i++; + continue; + } + + if (i < len - 2 && ha == recs[i + 2].ha) { + i += 3; + while (i < len - 1 && recs[i].ha == ha && i < len - 1) { + i++; + } + continue; + } + + if (recs[i].off2 < 0 && recs[i + 1].off1 < 0) { + long a, b; + recs[uniq].ha = ha; + a = recs[uniq].off1 = recs[i].off1; + b = recs[uniq].off2 = recs[i + 1].off2; + uniq++; + } + i += 2; + } + + if (uniq) { + xdp_ha_t **stacks; + long alloc, len; + + qsort(recs, uniq, sizeof(xdp_ha_t), xdp_ha_cmp_by_off2); + + alloc = xdl_bogosqrt(uniq); + stacks = xdl_malloc(sizeof(xdp_ha_t *) * alloc); + if (stacks == NULL) + goto error; + len = 1; + stacks[0] = recs; + + for (i = 1; i < uniq; i++) { + long off1 = recs[i].off1; + long k; + + if (off1 < stacks[len - 1]->off1) { + if (len >= alloc) { + alloc *= 2; + stacks = xdl_realloc(stacks, sizeof(xdp_ha_t *) * alloc); + if (!stacks) + goto error; + } + stacks[k = len++] = NULL; + } else { + k = patience_bisect_stack(stacks, len - 1, off1); + } + + if (k > 0) { + recs[i].left = stacks[k - 1]; + } + if (stacks[k]) { + stacks[k]->down = &recs[i]; + recs[i].up = stacks[k]; + } + stacks[k] = &recs[i]; + } + + if (xdl_patience_split_aux(begin, stacks[len - 1]) < 0) { + xdl_free(stacks); + goto error; + } + + xdl_free(stacks); + } + + xdl_free(recs); + return 0; + +error: + xdl_free(recs); + return -1; +} + +static int xdl_patience_lcs(xdfenv_t *xe, xduniqmatch_t *begin, xduniqmatch_t *end) +{ + unsigned long const *ha1 = xe->xdf1.ha, *ha2 = xe->xdf2.ha; + long off1 = begin->i1 + begin->len, lim1 = end->i1; + long off2 = begin->i2 + begin->len, lim2 = end->i2; + xduniqmatch_t *next; + + for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); + for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); + + begin->len += off1 - begin->i1; + end->len += end->i1 - lim1; + end->i1 = lim1; + end->i2 = lim2; + + if (off1 == lim1 || off2 == lim2) + return 0; + + if (xdl_patience_split(ha1, ha2, begin, end)) + return -1; + + for (next = begin->next; next != end; begin = next, next = begin->next) { + if (xdl_patience_lcs(xe, begin, next) < 0) + return -1; + } + + return 0; +} static long xdl_split(unsigned long const *ha1, long off1, long lim1, unsigned long const *ha2, long off2, long lim2, @@ -321,13 +548,13 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, return 0; } - int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { long ndiags; long *kvd, *kvdf, *kvdb; xdalgoenv_t xenv; diffdata_t dd1, dd2; + int need_min = (xpp->flags & XDF_NEED_MINIMAL) != 0; if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { @@ -364,12 +591,54 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, dd2.rchg = xe->xdf2.rchg; dd2.rindex = xe->xdf2.rindex; - if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, - kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { + if (xpp->flags & XDF_USE_PATIENCE) { + xduniqmatch_t *lcs; + long i1, i2; + + lcs = xdl_uniq_match_alloc(0, 0, 0); + if (!lcs) + goto error; + lcs->next = xdl_uniq_match_alloc(xe->xdf1.nreff, xe->xdf2.nreff, 0); + if (!lcs->next || xdl_patience_lcs(xe, lcs, lcs->next) < 0) { + while ((lcs = xdl_uniq_popfree(lcs))); + goto error; + } - xdl_free(kvd); - xdl_free_env(xe); - return -1; + i1 = i2 = lcs->len; + if (lcs->len) { + fprintf(stderr, "skip %ld:%ld -> %ld:%ld\n", + lcs->i1, i1, lcs->i2, i2); + } + + while ((lcs = xdl_uniq_popfree(lcs))) { + fprintf(stderr, "usual %ld:%ld -> %ld:%ld\n", + i1, lcs->i1, i2, lcs->i2); + fprintf(stderr, "l/r: %ld / %ld\n", + xe->xdf1.rindex[lcs->i1], + xe->xdf2.rindex[lcs->i2]); + if (xdl_recs_cmp(&dd1, i1, lcs->i1, &dd2, i2, lcs->i2, + kvdf, kvdb, need_min, &xenv) < 0) { + while ((lcs = xdl_uniq_popfree(lcs))); + goto error; + } + i1 = lcs->i1 + lcs->len; + i2 = lcs->i2 + lcs->len; + if (lcs->len) { + fprintf(stderr, "skip %ld:%ld -> %ld:%ld (len %ld)\n", + lcs->i1, i1, lcs->i2, i2, lcs->len); + fprintf(stderr, "l/r: %ld / %ld\n", + xe->xdf1.rindex[lcs->i1 + lcs->len], + xe->xdf2.rindex[lcs->i2 + lcs->len]); + } + } + } else { + if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, + kvdf, kvdb, need_min, &xenv) < 0) { +error: + xdl_free(kvd); + xdl_free_env(xe); + return -1; + } } xdl_free(kvd);