From: Junio C Hamano <gitster@pobox.com>
To: git@vger.kernel.org
Subject: [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE
Date: Sun, 19 Feb 2012 18:16:27 -0800 [thread overview]
Message-ID: <1329704188-9955-5-git-send-email-gitster@pobox.com> (raw)
In-Reply-To: <1329704188-9955-1-git-send-email-gitster@pobox.com>
Teach the hash function and per-line comparison logic to compare lines
while ignoring the differences in case. It is not an ignore-whitespace
option but still needs to trigger the inexact match logic, and that is
why the previous step introduced XDF_INEXACT_MATCH mask.
Assign the 7th bit for this option, and move the bits to select diff
algorithms out of the way in order to leave room for a few bits to add
more variants of ignore-whitespace, such as --ignore-tab-expansion, if
somebody else is inclined to do so later.
We would still need to teach the front-end to flip this bit, for this
change to be any useful.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
xdiff/xdiff.h | 7 ++++---
xdiff/xutils.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 48793f9..5a01a0e 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -36,10 +36,11 @@ extern "C" {
(XDF_IGNORE_WHITESPACE | \
XDF_IGNORE_WHITESPACE_CHANGE | \
XDF_IGNORE_WHITESPACE_AT_EOL)
-#define XDF_INEXACT_MATCH XDF_WHITESPACE_FLAGS
+#define XDF_IGNORE_CASE (1 << 7)
+#define XDF_INEXACT_MATCH (XDF_WHITESPACE_FLAGS | XDF_IGNORE_CASE)
-#define XDF_PATIENCE_DIFF (1 << 5)
-#define XDF_HISTOGRAM_DIFF (1 << 6)
+#define XDF_PATIENCE_DIFF (1 << 8)
+#define XDF_HISTOGRAM_DIFF (1 << 9)
#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
#define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index aa6de74..925f1d0 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -170,6 +170,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
return nl + 1;
}
+static inline int match_a_byte(char ch1, char ch2, long flags)
+{
+ if (ch1 == ch2)
+ return 1;
+ if (!(flags & XDF_IGNORE_CASE) || ((ch1 | ch2) & 0x80))
+ return 0;
+ if (isupper(ch1))
+ ch1 = tolower(ch1);
+ if (isupper(ch2))
+ ch2 = tolower(ch2);
+ return (ch1 == ch2);
+}
+
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
{
int i1, i2;
@@ -192,7 +205,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
if (flags & XDF_IGNORE_WHITESPACE) {
goto skip_ws;
while (i1 < s1 && i2 < s2) {
- if (l1[i1++] != l2[i2++])
+ if (!match_a_byte(l1[i1++], l2[i2++], flags))
return 0;
skip_ws:
while (i1 < s1 && XDL_ISSPACE(l1[i1]))
@@ -210,15 +223,28 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
i2++;
continue;
}
- if (l1[i1++] != l2[i2++])
+ if (!match_a_byte(l1[i1++], l2[i2++], flags))
return 0;
}
- } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
- while (i1 < s1 && i2 < s2 && l1[i1++] == l2[i2++])
+ } else {
+ /*
+ * Either XDF_IGNORE_WHITESPACE_AT_EOL is set, or
+ * no whitespace is ignored, but INEXACT_MATCH is
+ * in effect for other reasons.
+ */
+ while (i1 < s1 && i2 < s2 &&
+ match_a_byte(l1[i1++], l2[i2++], flags))
; /* keep going */
}
/*
+ * If we are not ignoring any whitespace, we must have consumed
+ * everything for the lines to match.
+ */
+ if (!(flags & XDF_WHITESPACE_FLAGS))
+ return i1 == s1 && i2 == s2;
+
+ /*
* After running out of one side, the remaining side must have
* nothing but whitespace for the lines to match. Note that
* ignore-whitespace-at-eol case may break out of the loop
@@ -238,6 +264,14 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
return 1;
}
+static inline unsigned long hash_a_byte(const char ch_, long flags)
+{
+ unsigned long ch = ch_ & 0xFF;
+ if ((flags & XDF_IGNORE_CASE) && !(ch & 0x80) && isupper(ch))
+ ch = tolower(ch);
+ return ch;
+}
+
static unsigned long xdl_hash_record_with_whitespace(char const **data,
char const *top, long flags) {
unsigned long ha = 5381;
@@ -256,20 +290,20 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
&& !at_eol) {
ha += (ha << 5);
- ha ^= (unsigned long) ' ';
+ ha ^= hash_a_byte(' ', flags);
}
else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
&& !at_eol) {
while (ptr2 != ptr + 1) {
ha += (ha << 5);
- ha ^= (unsigned long) *ptr2;
+ ha ^= hash_a_byte(*ptr2, flags);
ptr2++;
}
}
continue;
}
ha += (ha << 5);
- ha ^= (unsigned long) *ptr;
+ ha ^= hash_a_byte(*ptr, flags);
}
*data = ptr < top ? ptr + 1: ptr;
@@ -286,7 +320,7 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
for (; ptr < top && *ptr != '\n'; ptr++) {
ha += (ha << 5);
- ha ^= (unsigned long) *ptr;
+ ha ^= hash_a_byte(*ptr, flags);
}
*data = ptr < top ? ptr + 1: ptr;
--
1.7.9.1.265.g25f75
next prev parent reply other threads:[~2012-02-20 2:16 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-20 2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
2012-02-20 2:16 ` [PATCH 1/5] xdiff: remove XDL_PATCH_* macros Junio C Hamano
2012-02-20 2:16 ` [PATCH 2/5] xdiff: PATIENCE/HISTOGRAM are not independent option bits Junio C Hamano
2012-02-20 2:16 ` [PATCH 3/5] xdiff: introduce XDF_INEXACT_MATCH Junio C Hamano
2012-02-20 2:16 ` Junio C Hamano [this message]
2012-02-22 18:07 ` [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE Jakub Narebski
2012-02-20 2:16 ` [PATCH 5/5] diff: --ignore-case Junio C Hamano
2012-02-20 7:36 ` [PATCH 6/5] diff -i Junio C Hamano
2012-02-20 8:41 ` [PATCH 0/5] diff --ignore-case Johannes Sixt
2012-02-20 8:52 ` Junio C Hamano
2012-02-20 14:06 ` Thomas Rast
2012-02-20 19:47 ` Junio C Hamano
2012-02-20 22:10 ` Chris Leong
2012-02-21 9:02 ` Re* " Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1329704188-9955-5-git-send-email-gitster@pobox.com \
--to=gitster@pobox.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).