From: Davide Libenzi <davidel@xmailserver.org>
To: Junio C Hamano <junkio@cox.net>
Cc: git@vger.kernel.org, Linus Torvalds <torvalds@osdl.org>
Subject: Re: [RFH] xdiff shows trivially redundant diff.
Date: Mon, 3 Apr 2006 16:29:16 -0700 (PDT) [thread overview]
Message-ID: <Pine.LNX.4.64.0604031625330.30048@alien.or.mcafeemobile.com> (raw)
In-Reply-To: <7vzmj3k7x9.fsf@assigned-by-dhcp.cox.net>
On Sun, 2 Apr 2006, Junio C Hamano wrote:
> Davide Libenzi <davidel@xmailserver.org> writes:
>
>> On Sun, 2 Apr 2006, Junio C Hamano wrote:
>>
>>> $ git diff-tree -p 52e8a6^2 52d8a6 -- git-fetch.sh
>>>
>>> shows a change that trivially is redundant, like this:
>>>
>>> diff --git a/git-fetch.sh b/git-fetch.sh
>>> index b4325d9..de4f011 100755
>>> --- a/git-fetch.sh
>>> +++ b/git-fetch.sh
>>> @@ -320,7 +320,7 @@ fetch_main () {
>>> ..
>>> Notice the first '-' and '+' lines of second hunk are identical?
>>>
>>> There is another interesting thing. This is running diff
>>> between 52e8a6^2 and 52d8a6 blobs, but if I change them slightly
>>> so that the first hunk is not different, then this anomaly
>>> disappears.
>>
>> Could you send me the two files that creates the above diff?
>
> I should have tried your pristine xdiff code myself before
> bothering you, but I haven't (sorry).
>
> The problem is from the "stripped down" version we use in git,
> so you may or may not see the problem in your version. Attached
> are the files.
This is the change I made to libxdiff. Xregression already made a few
thousands on iterations w/out problems.
- Davide
--- xdiff/xdiffi.c
+++ xdiff/xdiffi.c
@@ -349,12 +349,7 @@
kvdf += xe->xdf2.nreff + 1;
kvdb += xe->xdf2.nreff + 1;
- /*
- * Classical integer square root approximation using shifts.
- */
- xenv.mxcost = 1;
- for (; ndiags; ndiags >>= 2)
- xenv.mxcost <<= 1;
+ xenv.mxcost = xdl_bogosqrt(ndiags);
if (xenv.mxcost < XDL_MAX_COST_MIN)
xenv.mxcost = XDL_MAX_COST_MIN;
xenv.snake_cnt = XDL_SNAKE_CNT;
--- xdiff/xprepare.c
+++ xdiff/xprepare.c
@@ -25,6 +25,7 @@
#define XDL_KPDIS_RUN 4
+#define XDL_MAX_EQLIMIT 1024
@@ -305,26 +306,48 @@
static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
- long r, rdis, rpdis;
+ long r, rdis0, rpdis0, rdis1, rpdis1;
- for (r = 1, rdis = 0, rpdis = 1; (i - r) >= s; r++) {
+ /*
+ * Scans the lines before 'i' to find a run of lines that either
+ * have no match (dis[j] == 0) or have multiple matches (dis[j] > 1).
+ * Note that we always call this function with dis[i] > 1, so the
+ * current line (i) is already a multimatch line.
+ */
+ for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) {
if (!dis[i - r])
- rdis++;
+ rdis0++;
else if (dis[i - r] == 2)
- rpdis++;
+ rpdis0++;
else
break;
}
- for (r = 1; (i + r) <= e; r++) {
+ /*
+ * If the run before the line 'i' found only multimatch lines, we
+ * return 0 and hence we don't make the current line (i) discarded.
+ * We want to discard multimatch lines only when they appear in the
+ * middle of runs with nomatch lines (dis[j] == 0).
+ */
+ if (rdis0 == 0)
+ return 0;
+ for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) {
if (!dis[i + r])
- rdis++;
+ rdis1++;
else if (dis[i + r] == 2)
- rpdis++;
+ rpdis1++;
else
break;
}
+ /*
+ * If the run after the line 'i' found only multimatch lines, we
+ * return 0 and hence we don't make the current line (i) discarded.
+ */
+ if (rdis1 == 0)
+ return 0;
+ rdis1 += rdis0;
+ rpdis1 += rpdis0;
- return rpdis * XDL_KPDIS_RUN < (rpdis + rdis);
+ return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1);
}
@@ -334,34 +357,40 @@
* might be potentially discarded if they happear in a run of discardable.
*/
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) {
- long i, rhi, nreff;
+ long i, nm, rhi, nreff, mlim;
unsigned long hav;
xrecord_t **recs;
xrecord_t *rec;
char *dis, *dis1, *dis2;
- if (!(dis = (char *) xdl_malloc((xdf1->nrec + xdf2->nrec + 2) * sizeof(char)))) {
+ if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) {
return -1;
}
- memset(dis, 0, (xdf1->nrec + xdf2->nrec + 2) * sizeof(char));
+ memset(dis, 0, xdf1->nrec + xdf2->nrec + 2);
dis1 = dis;
dis2 = dis1 + xdf1->nrec + 1;
+ if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
+ mlim = XDL_MAX_EQLIMIT;
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
hav = (*recs)->ha;
rhi = (long) XDL_HASHLONG(hav, xdf2->hbits);
- for (rec = xdf2->rhash[rhi]; rec; rec = rec->next)
- if (rec->ha == hav && ++dis1[i] == 2)
+ for (nm = 0, rec = xdf2->rhash[rhi]; rec; rec = rec->next)
+ if (rec->ha == hav && ++nm == mlim)
break;
+ dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
}
+ if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
+ mlim = XDL_MAX_EQLIMIT;
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
hav = (*recs)->ha;
rhi = (long) XDL_HASHLONG(hav, xdf1->hbits);
- for (rec = xdf1->rhash[rhi]; rec; rec = rec->next)
- if (rec->ha == hav && ++dis2[i] == 2)
+ for (nm = 0, rec = xdf1->rhash[rhi]; rec; rec = rec->next)
+ if (rec->ha == hav && ++nm == mlim)
break;
+ dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
}
for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
--- xdiff/xutils.c
+++ xdiff/xutils.c
@@ -29,6 +29,19 @@
+long xdl_bogosqrt(long n) {
+ long i;
+
+ /*
+ * Classical integer square root approximation using shifts.
+ */
+ for (i = 1; n > 0; n >>= 2)
+ i <<= 1;
+
+ return i;
+}
+
+
int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
xdemitcb_t *ecb) {
int i = 2;
--- xdiff/xutils.h
+++ xdiff/xutils.h
@@ -25,6 +25,7 @@
+long xdl_bogosqrt(long n);
int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
xdemitcb_t *ecb);
int xdl_mmfile_outf(void *priv, mmbuffer_t *mb, int nbuf);
next prev parent reply other threads:[~2006-04-03 23:29 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-02 9:15 [RFH] xdiff shows trivially redundant diff Junio C Hamano
2006-04-02 17:35 ` Davide Libenzi
2006-04-02 21:02 ` Junio C Hamano
2006-04-02 21:16 ` Linus Torvalds
2006-04-02 22:18 ` Davide Libenzi
2006-04-02 22:14 ` Davide Libenzi
2006-04-03 0:52 ` Linus Torvalds
2006-04-03 3:26 ` Davide Libenzi
2006-04-03 4:19 ` Linus Torvalds
2006-04-03 4:30 ` Davide Libenzi
2006-04-03 7:33 ` Junio C Hamano
2006-04-03 19:33 ` Davide Libenzi
2006-04-03 19:42 ` Davide Libenzi
2006-04-03 20:01 ` Junio C Hamano
2006-04-03 22:50 ` Davide Libenzi
2006-04-03 23:29 ` Davide Libenzi [this message]
2006-04-04 1:55 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Pine.LNX.4.64.0604031625330.30048@alien.or.mcafeemobile.com \
--to=davidel@xmailserver.org \
--cc=git@vger.kernel.org \
--cc=junkio@cox.net \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).