git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] diff --ignore-case
@ 2012-02-20  2:16 Junio C Hamano
  2012-02-20  2:16 ` [PATCH 1/5] xdiff: remove XDL_PATCH_* macros Junio C Hamano
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  2:16 UTC (permalink / raw)
  To: git

This teaches our "diff" machinery to optionally consider lines that are
different only in upper/lower-cases to be equivalent.

Junio C Hamano (5):
  xdiff: remove XDL_PATCH_* macros
  xdiff: PATIENCE/HISTOGRAM are not independent option bits
  xdiff: introduce XDF_INEXACT_MATCH
  xdiff: introduce XDF_IGNORE_CASE
  diff: --ignore-case

 Documentation/diff-options.txt |    4 +++
 diff.c                         |    8 +++---
 diff.h                         |    2 ++
 merge-recursive.c              |    4 +--
 t/lib-diff-alternative.sh      |   45 +++++++++++++++++++++++++++++++--
 t/t4033-diff-patience.sh       |    6 +++++
 t/t4050-diff-histogram.sh      |    2 ++
 xdiff/xdiff.h                  |   19 ++++++++------
 xdiff/xdiffi.c                 |    4 +--
 xdiff/xhistogram.c             |    2 +-
 xdiff/xpatience.c              |    2 +-
 xdiff/xprepare.c               |   21 ++++++++--------
 xdiff/xutils.c                 |   54 ++++++++++++++++++++++++++++++++--------
 13 files changed, 133 insertions(+), 40 deletions(-)

-- 
1.7.9.1.265.g25f75

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] xdiff: remove XDL_PATCH_* macros
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
@ 2012-02-20  2:16 ` Junio C Hamano
  2012-02-20  2:16 ` [PATCH 2/5] xdiff: PATIENCE/HISTOGRAM are not independent option bits Junio C Hamano
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  2:16 UTC (permalink / raw)
  To: git

These are not used anywhere in our codebase, and the bit assignment
definition is merely confusing.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiff.h |    5 -----
 1 file changed, 5 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 00d36c3..70c8b87 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -36,11 +36,6 @@ extern "C" {
 #define XDF_HISTOGRAM_DIFF (1 << 6)
 #define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL)
 
-#define XDL_PATCH_NORMAL '-'
-#define XDL_PATCH_REVERSE '+'
-#define XDL_PATCH_MODEMASK ((1 << 8) - 1)
-#define XDL_PATCH_IGNOREBSPACE (1 << 8)
-
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
-- 
1.7.9.1.265.g25f75

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/5] xdiff: PATIENCE/HISTOGRAM are not independent option bits
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
  2012-02-20  2:16 ` [PATCH 1/5] xdiff: remove XDL_PATCH_* macros Junio C Hamano
@ 2012-02-20  2:16 ` Junio C Hamano
  2012-02-20  2:16 ` [PATCH 3/5] xdiff: introduce XDF_INEXACT_MATCH Junio C Hamano
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  2:16 UTC (permalink / raw)
  To: git

Because the default Myers, patience and histogram algorithms cannot be in
effect at the same time, XDL_PATIENCE_DIFF and XDL_HISTOGRAM_DIFF are not
independent bits.  Instead of wasting one bit per algorithm, define a few
macros to access the few bits they occupy and update the code that access
them.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c             |    4 ++--
 diff.h             |    2 ++
 merge-recursive.c  |    4 ++--
 xdiff/xdiff.h      |    5 ++++-
 xdiff/xdiffi.c     |    4 ++--
 xdiff/xhistogram.c |    2 +-
 xdiff/xpatience.c  |    2 +-
 xdiff/xprepare.c   |   21 ++++++++++-----------
 8 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/diff.c b/diff.c
index 374ecf3..52cda7a 100644
--- a/diff.c
+++ b/diff.c
@@ -3400,9 +3400,9 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
 	else if (!strcmp(arg, "--patience"))
-		DIFF_XDL_SET(options, PATIENCE_DIFF);
+		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
-		DIFF_XDL_SET(options, HISTOGRAM_DIFF);
+		options->xdl_opts = DIFF_WITH_ALG(options, HISTOGRAM_DIFF);
 
 	/* flags options */
 	else if (!strcmp(arg, "--binary")) {
diff --git a/diff.h b/diff.h
index 0c51724..e688a48 100644
--- a/diff.h
+++ b/diff.h
@@ -88,6 +88,8 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
 #define DIFF_XDL_SET(opts, flag)    ((opts)->xdl_opts |= XDF_##flag)
 #define DIFF_XDL_CLR(opts, flag)    ((opts)->xdl_opts &= ~XDF_##flag)
 
+#define DIFF_WITH_ALG(opts, flag)   (((opts)->xdl_opts & ~XDF_DIFF_ALGORITHM_MASK) | XDF_##flag)
+
 enum diff_words_type {
 	DIFF_WORDS_NONE = 0,
 	DIFF_WORDS_PORCELAIN,
diff --git a/merge-recursive.c b/merge-recursive.c
index cc664c3..1d574fe 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -2069,9 +2069,9 @@ int parse_merge_opt(struct merge_options *o, const char *s)
 	else if (!prefixcmp(s, "subtree="))
 		o->subtree_shift = s + strlen("subtree=");
 	else if (!strcmp(s, "patience"))
-		o->xdl_opts |= XDF_PATIENCE_DIFF;
+		o->xdl_opts = DIFF_WITH_ALG(o, PATIENCE_DIFF);
 	else if (!strcmp(s, "histogram"))
-		o->xdl_opts |= XDF_HISTOGRAM_DIFF;
+		o->xdl_opts = DIFF_WITH_ALG(o, HISTOGRAM_DIFF);
 	else if (!strcmp(s, "ignore-space-change"))
 		o->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE;
 	else if (!strcmp(s, "ignore-all-space"))
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 70c8b87..09215af 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -32,9 +32,12 @@ extern "C" {
 #define XDF_IGNORE_WHITESPACE (1 << 2)
 #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3)
 #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4)
+#define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL)
+
 #define XDF_PATIENCE_DIFF (1 << 5)
 #define XDF_HISTOGRAM_DIFF (1 << 6)
-#define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL)
+#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
+#define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)
 
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 75a3922..bc889e8 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -328,10 +328,10 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	xdalgoenv_t xenv;
 	diffdata_t dd1, dd2;
 
-	if (xpp->flags & XDF_PATIENCE_DIFF)
+	if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF)
 		return xdl_do_patience_diff(mf1, mf2, xpp, xe);
 
-	if (xpp->flags & XDF_HISTOGRAM_DIFF)
+	if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
 		return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
 
 	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 18f6f99..bf99787 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -252,7 +252,7 @@ static int fall_back_to_classic_diff(struct histindex *index,
 		int line1, int count1, int line2, int count2)
 {
 	xpparam_t xpp;
-	xpp.flags = index->xpp->flags & ~XDF_HISTOGRAM_DIFF;
+	xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
 
 	return xdl_fall_back_diff(index->env, &xpp,
 				  line1, count1, line2, count2);
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index fdd7d02..04e1a1a 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -288,7 +288,7 @@ static int fall_back_to_classic_diff(struct hashmap *map,
 		int line1, int count1, int line2, int count2)
 {
 	xpparam_t xpp;
-	xpp.flags = map->xpp->flags & ~XDF_PATIENCE_DIFF;
+	xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
 
 	return xdl_fall_back_diff(map->env, &xpp,
 				  line1, count1, line2, count2);
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index e419f4f..63a22c6 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -181,7 +181,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 	if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
 		goto abort;
 
-	if (xpp->flags & XDF_HISTOGRAM_DIFF)
+	if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
 		hbits = hsize = 0;
 	else {
 		hbits = xdl_hashbits((unsigned int) narec);
@@ -209,8 +209,8 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 			crec->ha = hav;
 			recs[nrec++] = crec;
 
-			if (!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
-				xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
+			if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
+			    xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
 				goto abort;
 		}
 	}
@@ -273,16 +273,15 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	 * (nrecs) will be updated correctly anyway by
 	 * xdl_prepare_ctx().
 	 */
-	sample = xpp->flags & XDF_HISTOGRAM_DIFF ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1;
+	sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF
+		  ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1);
 
 	enl1 = xdl_guess_lines(mf1, sample) + 1;
 	enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-	if (!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
-		xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
-
+	if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
+	    xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
 		return -1;
-	}
 
 	if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
 
@@ -296,9 +295,9 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		return -1;
 	}
 
-	if (!(xpp->flags & XDF_PATIENCE_DIFF) &&
-			!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
-			xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
+	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
+	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
+	    xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
 
 		xdl_free_ctx(&xe->xdf2);
 		xdl_free_ctx(&xe->xdf1);
-- 
1.7.9.1.265.g25f75

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/5] xdiff: introduce XDF_INEXACT_MATCH
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
  2012-02-20  2:16 ` [PATCH 1/5] xdiff: remove XDL_PATCH_* macros Junio C Hamano
  2012-02-20  2:16 ` [PATCH 2/5] xdiff: PATIENCE/HISTOGRAM are not independent option bits Junio C Hamano
@ 2012-02-20  2:16 ` Junio C Hamano
  2012-02-20  2:16 ` [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE Junio C Hamano
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  2:16 UTC (permalink / raw)
  To: git

The XDF_WHITESPACE_FLAGS mask is used to decide if a simple memcmp() is
sufficient to tell if two lines are the same for the purpose of producing
diff. In the current code, only options in the ignore-whitespace family
happen to call for a matching logic that is more complex than memcmp(),
but when we add new options, e.g. ignore-case, it will become apparent
that the mask is misnamed.

Introduce XDF_INEXACT_MATCH mask to express its true meaning: when any of
these bits are set, we may need to inspect the contents of the two lines
that are not exactly the same, as we may still consider them "matching".

The XDF_WHITESPACE_FLAGS will become useful again in a later patch in the
series and its definition is kept.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c         |    2 +-
 xdiff/xdiff.h  |    6 +++++-
 xdiff/xutils.c |    4 ++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/diff.c b/diff.c
index 52cda7a..87b2ec1 100644
--- a/diff.c
+++ b/diff.c
@@ -2142,7 +2142,7 @@ static void builtin_diff(const char *name_a,
 		struct emit_callback ecbdata;
 		const struct userdiff_funcname *pe;
 
-		if (!DIFF_XDL_TST(o, WHITESPACE_FLAGS) || must_show_header) {
+		if (!DIFF_XDL_TST(o, INEXACT_MATCH) || must_show_header) {
 			fprintf(o->file, "%s", header.buf);
 			strbuf_reset(&header);
 		}
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 09215af..48793f9 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -32,7 +32,11 @@ extern "C" {
 #define XDF_IGNORE_WHITESPACE (1 << 2)
 #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3)
 #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4)
-#define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL)
+#define XDF_WHITESPACE_FLAGS \
+	(XDF_IGNORE_WHITESPACE | \
+	 XDF_IGNORE_WHITESPACE_CHANGE | \
+	 XDF_IGNORE_WHITESPACE_AT_EOL)
+#define XDF_INEXACT_MATCH XDF_WHITESPACE_FLAGS
 
 #define XDF_PATIENCE_DIFF (1 << 5)
 #define XDF_HISTOGRAM_DIFF (1 << 6)
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 0de084e..aa6de74 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -176,7 +176,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 
 	if (s1 == s2 && !memcmp(l1, l2, s1))
 		return 1;
-	if (!(flags & XDF_WHITESPACE_FLAGS))
+	if (!(flags & XDF_INEXACT_MATCH))
 		return 0;
 
 	i1 = 0;
@@ -281,7 +281,7 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 	unsigned long ha = 5381;
 	char const *ptr = *data;
 
-	if (flags & XDF_WHITESPACE_FLAGS)
+	if (flags & XDF_INEXACT_MATCH)
 		return xdl_hash_record_with_whitespace(data, top, flags);
 
 	for (; ptr < top && *ptr != '\n'; ptr++) {
-- 
1.7.9.1.265.g25f75

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
                   ` (2 preceding siblings ...)
  2012-02-20  2:16 ` [PATCH 3/5] xdiff: introduce XDF_INEXACT_MATCH Junio C Hamano
@ 2012-02-20  2:16 ` Junio C Hamano
  2012-02-22 18:07   ` Jakub Narebski
  2012-02-20  2:16 ` [PATCH 5/5] diff: --ignore-case Junio C Hamano
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  2:16 UTC (permalink / raw)
  To: git

Teach the hash function and per-line comparison logic to compare lines
while ignoring the differences in case.  It is not an ignore-whitespace
option but still needs to trigger the inexact match logic, and that is
why the previous step introduced XDF_INEXACT_MATCH mask.

Assign the 7th bit for this option, and move the bits to select diff
algorithms out of the way in order to leave room for a few bits to add
more variants of ignore-whitespace, such as --ignore-tab-expansion, if
somebody else is inclined to do so later.

We would still need to teach the front-end to flip this bit, for this
change to be any useful.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiff.h  |    7 ++++---
 xdiff/xutils.c |   50 ++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 48793f9..5a01a0e 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -36,10 +36,11 @@ extern "C" {
 	(XDF_IGNORE_WHITESPACE | \
 	 XDF_IGNORE_WHITESPACE_CHANGE | \
 	 XDF_IGNORE_WHITESPACE_AT_EOL)
-#define XDF_INEXACT_MATCH XDF_WHITESPACE_FLAGS
+#define XDF_IGNORE_CASE (1 << 7)
+#define XDF_INEXACT_MATCH (XDF_WHITESPACE_FLAGS | XDF_IGNORE_CASE)
 
-#define XDF_PATIENCE_DIFF (1 << 5)
-#define XDF_HISTOGRAM_DIFF (1 << 6)
+#define XDF_PATIENCE_DIFF (1 << 8)
+#define XDF_HISTOGRAM_DIFF (1 << 9)
 #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
 #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)
 
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index aa6de74..925f1d0 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -170,6 +170,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
 	return nl + 1;
 }
 
+static inline int match_a_byte(char ch1, char ch2, long flags)
+{
+	if (ch1 == ch2)
+		return 1;
+	if (!(flags & XDF_IGNORE_CASE) || ((ch1 | ch2) & 0x80))
+		return 0;
+	if (isupper(ch1))
+		ch1 = tolower(ch1);
+	if (isupper(ch2))
+		ch2 = tolower(ch2);
+	return (ch1 == ch2);
+}
+
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
 	int i1, i2;
@@ -192,7 +205,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 	if (flags & XDF_IGNORE_WHITESPACE) {
 		goto skip_ws;
 		while (i1 < s1 && i2 < s2) {
-			if (l1[i1++] != l2[i2++])
+			if (!match_a_byte(l1[i1++], l2[i2++], flags))
 				return 0;
 		skip_ws:
 			while (i1 < s1 && XDL_ISSPACE(l1[i1]))
@@ -210,15 +223,28 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 					i2++;
 				continue;
 			}
-			if (l1[i1++] != l2[i2++])
+			if (!match_a_byte(l1[i1++], l2[i2++], flags))
 				return 0;
 		}
-	} else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
-		while (i1 < s1 && i2 < s2 && l1[i1++] == l2[i2++])
+	} else {
+		/*
+		 * Either XDF_IGNORE_WHITESPACE_AT_EOL is set, or
+		 * no whitespace is ignored, but INEXACT_MATCH is
+		 * in effect for other reasons.
+		 */
+		while (i1 < s1 && i2 < s2 &&
+		       match_a_byte(l1[i1++], l2[i2++], flags))
 			; /* keep going */
 	}
 
 	/*
+	 * If we are not ignoring any whitespace, we must have consumed
+	 * everything for the lines to match.
+	 */
+	if (!(flags & XDF_WHITESPACE_FLAGS))
+		return i1 == s1 && i2 == s2;
+
+	/*
 	 * After running out of one side, the remaining side must have
 	 * nothing but whitespace for the lines to match.  Note that
 	 * ignore-whitespace-at-eol case may break out of the loop
@@ -238,6 +264,14 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 	return 1;
 }
 
+static inline unsigned long hash_a_byte(const char ch_, long flags)
+{
+	unsigned long ch = ch_ & 0xFF;
+	if ((flags & XDF_IGNORE_CASE) && !(ch & 0x80) && isupper(ch))
+		ch = tolower(ch);
+	return ch;
+}
+
 static unsigned long xdl_hash_record_with_whitespace(char const **data,
 		char const *top, long flags) {
 	unsigned long ha = 5381;
@@ -256,20 +290,20 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
 			else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
 				 && !at_eol) {
 				ha += (ha << 5);
-				ha ^= (unsigned long) ' ';
+				ha ^= hash_a_byte(' ', flags);
 			}
 			else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
 				 && !at_eol) {
 				while (ptr2 != ptr + 1) {
 					ha += (ha << 5);
-					ha ^= (unsigned long) *ptr2;
+					ha ^= hash_a_byte(*ptr2, flags);
 					ptr2++;
 				}
 			}
 			continue;
 		}
 		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
+		ha ^= hash_a_byte(*ptr, flags);
 	}
 	*data = ptr < top ? ptr + 1: ptr;
 
@@ -286,7 +320,7 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 
 	for (; ptr < top && *ptr != '\n'; ptr++) {
 		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
+		ha ^= hash_a_byte(*ptr, flags);
 	}
 	*data = ptr < top ? ptr + 1: ptr;
 
-- 
1.7.9.1.265.g25f75

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/5] diff: --ignore-case
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
                   ` (3 preceding siblings ...)
  2012-02-20  2:16 ` [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE Junio C Hamano
@ 2012-02-20  2:16 ` Junio C Hamano
  2012-02-20  7:36 ` [PATCH 6/5] diff -i Junio C Hamano
  2012-02-20  8:41 ` [PATCH 0/5] diff --ignore-case Johannes Sixt
  6 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  2:16 UTC (permalink / raw)
  To: git

Teach the front-end to flip XDF_IGNORE_CASE bit with the options GNU diff
uses.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt |    4 ++++
 diff.c                         |    2 ++
 t/lib-diff-alternative.sh      |   45 ++++++++++++++++++++++++++++++++++++++--
 t/t4033-diff-patience.sh       |    6 ++++++
 t/t4050-diff-histogram.sh      |    2 ++
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 9f7cba2..791e07f 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -404,6 +404,10 @@ endif::git-format-patch[]
 	differences even if one line has whitespace where the other
 	line has none.
 
+--ignore-case::
+	Ignore changes in case only; only ASCII alphabet is supported for
+	now.
+
 --inter-hunk-context=<lines>::
 	Show the context between diff hunks, up to the specified number
 	of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index 87b2ec1..d7604b7 100644
--- a/diff.c
+++ b/diff.c
@@ -3399,6 +3399,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
+	else if (!strcmp(arg, "--ignore-case"))
+		DIFF_XDL_SET(options, IGNORE_CASE);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/t/lib-diff-alternative.sh b/t/lib-diff-alternative.sh
index 75ffd91..45c665e 100644
--- a/t/lib-diff-alternative.sh
+++ b/t/lib-diff-alternative.sh
@@ -104,8 +104,9 @@ EOF
 
 	STRATEGY=$1
 
+	cmd='git diff --no-index'
 	test_expect_success "$STRATEGY diff" '
-		test_must_fail git diff --no-index "--$STRATEGY" file1 file2 > output &&
+		test_must_fail $cmd ${STRATEGY:+"--$STRATEGY"} file1 file2 >output &&
 		test_cmp expect output
 	'
 
@@ -157,9 +158,49 @@ EOF
 
 	STRATEGY=$1
 
+	cmd='git diff --no-index'
 	test_expect_success 'completely different files' '
-		test_must_fail git diff --no-index "--$STRATEGY" uniq1 uniq2 > output &&
+
+		test_must_fail $cmd  ${STRATEGY:+"--$STRATEGY"} uniq1 uniq2 >output &&
 		test_cmp expect output
 	'
 }
 
+test_diff_ignore () {
+
+	STRATEGY=$1
+
+	echo "A quick brown fox" >test.0
+	echo "A  quick brown fox" >test-b
+	echo " A quick brownfox" >test-w
+	echo "A quick brown fox " >test--ignore-space-at-eol
+	echo "A Quick Brown Fox" >test--ignore-case
+	echo "A Quick  Brown Fox" >test--ignore-case-b
+	echo "A quick brown fox jumps" >test
+	cases="-b -w --ignore-space-at-eol --ignore-case"
+
+	if test -z "$STRATEGY"
+	then
+		label=baseline
+	else
+		label=$STRATEGY
+	fi
+
+	cmd="git diff --no-index ${STRATEGY:+--$STRATEGY}"
+
+	test_expect_success "$label diff" '
+		test_must_fail $cmd test.0 test
+	'
+	for case in $cases
+	do
+		test_expect_success "$label diff $case" '
+			$cmd $case test.0 test$case &&
+			test_must_fail $cmd test.0 test
+		'
+	done
+
+	test_expect_success "$label diff -b --ignore-case" '
+		$cmd -b --ignore-case test.0 test--ignore-case-b
+	'
+
+}
diff --git a/t/t4033-diff-patience.sh b/t/t4033-diff-patience.sh
index 3c9932e..c7f8c6c 100755
--- a/t/t4033-diff-patience.sh
+++ b/t/t4033-diff-patience.sh
@@ -5,8 +5,14 @@ test_description='patience diff algorithm'
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-diff-alternative.sh
 
+# baseline
+test_diff_unique ""
+test_diff_ignore ""
+
 test_diff_frobnitz "patience"
 
 test_diff_unique "patience"
 
+test_diff_ignore "patience"
+
 test_done
diff --git a/t/t4050-diff-histogram.sh b/t/t4050-diff-histogram.sh
index fd3e86a..98c6686 100755
--- a/t/t4050-diff-histogram.sh
+++ b/t/t4050-diff-histogram.sh
@@ -9,4 +9,6 @@ test_diff_frobnitz "histogram"
 
 test_diff_unique "histogram"
 
+test_diff_ignore "histogram"
+
 test_done
-- 
1.7.9.1.265.g25f75

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/5] diff -i
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
                   ` (4 preceding siblings ...)
  2012-02-20  2:16 ` [PATCH 5/5] diff: --ignore-case Junio C Hamano
@ 2012-02-20  7:36 ` Junio C Hamano
  2012-02-20  8:41 ` [PATCH 0/5] diff --ignore-case Johannes Sixt
  6 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  7:36 UTC (permalink / raw)
  To: git

The previous patch to teach "--ignore-case" option to our "diff" machinery
deliberately omitted a short-and-sweet "-i" that GNU diff uses to ask for
"--ignore-case".  This is because our diff machinery are often used by and
shares the command line options with the commands in the "git log" family,
where the short option already means something entirely different. Namely,
it instructs to use case-insensitive match when looking for commits that
match information that appear in the commit object itself, e.g. --author
and --grep.

Tweak the option parser so that "-i" means both, so that

	$ git log --grep=frotz -i -p

first picks commits that have string "frotz" in any combination of cases,
and then shows patches that ignore case-only changes for the chosen
commits, while "--ignore-case" and "--regexp-ignore-case" can be used to
mean only one, i.e.

	$ git log --grep=frotz --regexp-ignore-case -p

would pick the same set of commits, but the patches shown by it does not
ignore case-only changes while

	$ git log --grep=frotz --ignore-case -p

would pick commits that has "frotz" in all lowercase, but shows patches
that ignore case-only changes for the chosen commits.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt |    1 +
 diff.c                         |    2 +-
 revision.c                     |    6 +++++-
 t/lib-diff-alternative.sh      |    3 ++-
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 791e07f..9ed78c9 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -404,6 +404,7 @@ endif::git-format-patch[]
 	differences even if one line has whitespace where the other
 	line has none.
 
+-i::
 --ignore-case::
 	Ignore changes in case only; only ASCII alphabet is supported for
 	now.
diff --git a/diff.c b/diff.c
index d7604b7..9d1584e 100644
--- a/diff.c
+++ b/diff.c
@@ -3399,7 +3399,7 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
-	else if (!strcmp(arg, "--ignore-case"))
+	else if (!strcmp(arg, "--ignore-case") || !strcmp(arg, "-i"))
 		DIFF_XDL_SET(options, IGNORE_CASE);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
diff --git a/revision.c b/revision.c
index 8764dde..f1a1354 100644
--- a/revision.c
+++ b/revision.c
@@ -13,6 +13,7 @@
 #include "decorate.h"
 #include "log-tree.h"
 #include "string-list.h"
+#include "xdiff-interface.h"
 
 volatile show_early_output_fn_t show_early_output;
 
@@ -1557,7 +1558,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
 		return argcount;
 	} else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
 		revs->grep_filter.regflags |= REG_EXTENDED;
-	} else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
+	} else if (!strcmp(arg, "-i")) {
+		DIFF_XDL_SET(&revs->diffopt, IGNORE_CASE);
+		revs->grep_filter.regflags |= REG_ICASE;
+	} else if (!strcmp(arg, "--regexp-ignore-case")) {
 		revs->grep_filter.regflags |= REG_ICASE;
 	} else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
 		revs->grep_filter.fixed = 1;
diff --git a/t/lib-diff-alternative.sh b/t/lib-diff-alternative.sh
index 45c665e..81609f8 100644
--- a/t/lib-diff-alternative.sh
+++ b/t/lib-diff-alternative.sh
@@ -175,9 +175,10 @@ test_diff_ignore () {
 	echo " A quick brownfox" >test-w
 	echo "A quick brown fox " >test--ignore-space-at-eol
 	echo "A Quick Brown Fox" >test--ignore-case
+	echo "A Quick Brown FoX" >test-i
 	echo "A Quick  Brown Fox" >test--ignore-case-b
 	echo "A quick brown fox jumps" >test
-	cases="-b -w --ignore-space-at-eol --ignore-case"
+	cases="-b -w --ignore-space-at-eol --ignore-case -i"
 
 	if test -z "$STRATEGY"
 	then

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/5] diff --ignore-case
  2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
                   ` (5 preceding siblings ...)
  2012-02-20  7:36 ` [PATCH 6/5] diff -i Junio C Hamano
@ 2012-02-20  8:41 ` Johannes Sixt
  2012-02-20  8:52   ` Junio C Hamano
  6 siblings, 1 reply; 14+ messages in thread
From: Johannes Sixt @ 2012-02-20  8:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Am 20.02.2012 03:16, schrieb Junio C Hamano:
> This teaches our "diff" machinery to optionally consider lines that are
> different only in upper/lower-cases to be equivalent.

When would I want to use --ignore-case? And how useful is it that it
does not obey the current locale, as in your implementation?

-- Hannes

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/5] diff --ignore-case
  2012-02-20  8:41 ` [PATCH 0/5] diff --ignore-case Johannes Sixt
@ 2012-02-20  8:52   ` Junio C Hamano
  2012-02-20 14:06     ` Thomas Rast
  0 siblings, 1 reply; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20  8:52 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git

Johannes Sixt <j6t@kdbg.org> writes:

> Am 20.02.2012 03:16, schrieb Junio C Hamano:
>> This teaches our "diff" machinery to optionally consider lines that are
>> different only in upper/lower-cases to be equivalent.
>
> When would I want to use --ignore-case?

I wouldn't myself; it was just I saw somebody asked if -G can be used to
do case insensitive and then I realized we do not even support such in the
diff machinery to begin with, but now this opens a way to do so by copying
the xdl options when the pickaxe-grep codepath runs a subdiff.

You can update the hash and recmatch to be locale-aware, which would be an
internal update to xdiff/ directory that is independent of the rest of the
series, and/or the change to plug the machinery to -G.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/5] diff --ignore-case
  2012-02-20  8:52   ` Junio C Hamano
@ 2012-02-20 14:06     ` Thomas Rast
  2012-02-20 19:47       ` Junio C Hamano
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Rast @ 2012-02-20 14:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, git, Chris Leong

Junio C Hamano <gitster@pobox.com> writes:

> Johannes Sixt <j6t@kdbg.org> writes:
>
>> Am 20.02.2012 03:16, schrieb Junio C Hamano:
>>> This teaches our "diff" machinery to optionally consider lines that are
>>> different only in upper/lower-cases to be equivalent.
>>
>> When would I want to use --ignore-case?
>
> I wouldn't myself; it was just I saw somebody asked if -G can be used to
> do case insensitive and then I realized we do not even support such in the
> diff machinery to begin with, but now this opens a way to do so by copying
> the xdl options when the pickaxe-grep codepath runs a subdiff.

I wonder which one of us misunderstood the original request ;-)

It was

} Is there any way to run diff -G with a case insensitivity flag?

and I took that to mean "I want to find addition/removal of a string
like -G does, but I don't know how it was capitalized".  OTOH you
interpreted it as "I want to run -G but ignore changes in case while
diffing".

So maybe Chris can comment on what was intended?

-- 
Thomas Rast
trast@{inf,student}.ethz.ch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/5] diff --ignore-case
  2012-02-20 14:06     ` Thomas Rast
@ 2012-02-20 19:47       ` Junio C Hamano
  2012-02-20 22:10         ` Chris Leong
  2012-02-21  9:02         ` Re* " Junio C Hamano
  0 siblings, 2 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-20 19:47 UTC (permalink / raw)
  To: Thomas Rast; +Cc: Johannes Sixt, git, Chris Leong

Thomas Rast <trast@inf.ethz.ch> writes:

> I wonder which one of us misunderstood the original request ;-)

Heh, I did ;-)

> It was
>
> } Is there any way to run diff -G with a case insensitivity flag?
>
> and I took that to mean "I want to find addition/removal of a string
> like -G does, but I don't know how it was capitalized".

I think it is just the matter of checking REG_ICASE that may be set in
revs->grep_filter.regflags, and propagating it down to the regcomp at the
beginning of diffcore_pickaxe_grep().

Want to try and see how well it works?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/5] diff --ignore-case
  2012-02-20 19:47       ` Junio C Hamano
@ 2012-02-20 22:10         ` Chris Leong
  2012-02-21  9:02         ` Re* " Junio C Hamano
  1 sibling, 0 replies; 14+ messages in thread
From: Chris Leong @ 2012-02-20 22:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Thomas Rast, Johannes Sixt, git

It was about not worrying about the exact case of matches.

On Tue, Feb 21, 2012 at 6:47 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Want to try and see how well it works?
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re* [PATCH 0/5] diff --ignore-case
  2012-02-20 19:47       ` Junio C Hamano
  2012-02-20 22:10         ` Chris Leong
@ 2012-02-21  9:02         ` Junio C Hamano
  1 sibling, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-02-21  9:02 UTC (permalink / raw)
  To: Thomas Rast; +Cc: Johannes Sixt, git, Chris Leong

Junio C Hamano <gitster@pobox.com> writes:

> Thomas Rast <trast@inf.ethz.ch> writes:
>
>> I wonder which one of us misunderstood the original request ;-)
>
> Heh, I did ;-)
>
>> It was
>>
>> } Is there any way to run diff -G with a case insensitivity flag?
>>
>> and I took that to mean "I want to find addition/removal of a string
>> like -G does, but I don't know how it was capitalized".
>
> I think it is just the matter of checking REG_ICASE that may be set in
> revs->grep_filter.regflags, and propagating it down to the regcomp at the
> beginning of diffcore_pickaxe_grep().
>
> Want to try and see how well it works?

The gist of the patch should look like this.

Even though I haven't done anything more than just to compile and run "git
log -p -i -G search 233054d", it looks obviously correct ;-)

Note that this does not depend on any of the "diff --ignore-case" topic,
and it may conflict with [PATCH 6/5].  Among the two "else if" that flips
the REG_ICASE in revision.c, it needs to be decided if only one of them,
or both of them should set the new option. I haven't thought things through
to decide which.

 diff.h             |    1 +
 diffcore-pickaxe.c |    6 +++++-
 revision.c         |    1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/diff.h b/diff.h
index 7af5f1e..182cb0e 100644
--- a/diff.h
+++ b/diff.h
@@ -82,6 +82,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
 #define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27)
 #define DIFF_OPT_DIRSTAT_BY_LINE     (1 << 28)
 #define DIFF_OPT_FUNCCONTEXT         (1 << 29)
+#define DIFF_OPT_PICKAXE_IGNORE_CASE (1 << 30)
 
 #define DIFF_OPT_TST(opts, flag)    ((opts)->flags & DIFF_OPT_##flag)
 #define DIFF_OPT_SET(opts, flag)    ((opts)->flags |= DIFF_OPT_##flag)
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 380a837..8ffb741 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -138,8 +138,12 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 {
 	int err;
 	regex_t regex;
+	int cflags = REG_EXTENDED | REG_NEWLINE;
 
-	err = regcomp(&regex, o->pickaxe, REG_EXTENDED | REG_NEWLINE);
+	if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
+		cflags |= REG_ICASE;
+
+	err = regcomp(&regex, o->pickaxe, cflags);
 	if (err) {
 		char errbuf[1024];
 		regerror(err, &regex, errbuf, 1024);
diff --git a/revision.c b/revision.c
index 819ff01..b3554ed 100644
--- a/revision.c
+++ b/revision.c
@@ -1582,6 +1582,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
 		revs->grep_filter.regflags |= REG_EXTENDED;
 	} else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
 		revs->grep_filter.regflags |= REG_ICASE;
+		DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
 	} else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
 		revs->grep_filter.fixed = 1;
 	} else if (!strcmp(arg, "--all-match")) {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE
  2012-02-20  2:16 ` [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE Junio C Hamano
@ 2012-02-22 18:07   ` Jakub Narebski
  0 siblings, 0 replies; 14+ messages in thread
From: Jakub Narebski @ 2012-02-22 18:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> Teach the hash function and per-line comparison logic to compare lines
> while ignoring the differences in case.  It is not an ignore-whitespace
> option but still needs to trigger the inexact match logic, and that is
> why the previous step introduced XDF_INEXACT_MATCH mask.

Nb. how it compares with ignore case in filesystem paths?
 
> Assign the 7th bit for this option, and move the bits to select diff
> algorithms out of the way in order to leave room for a few bits to add
> more variants of ignore-whitespace, such as --ignore-tab-expansion, if
> somebody else is inclined to do so later.

Or do a proper Unicode sorting / collation algorithm, with different
levels 

  (4.3 Form a sort key for each string, UTS #10.):

     Level 1: alphabetic ordering
     Level 2: diacritic ordering
     Level 3: case ordering
     Level 4: tie-breaking (e.g. in the case when variable is 'shifted')

> We would still need to teach the front-end to flip this bit, for this
> change to be any useful.
> 
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---

> +static inline int match_a_byte(char ch1, char ch2, long flags)
> +{
> +	if (ch1 == ch2)
> +		return 1;
> +	if (!(flags & XDF_IGNORE_CASE) || ((ch1 | ch2) & 0x80))
> +		return 0;
> +	if (isupper(ch1))
> +		ch1 = tolower(ch1);
> +	if (isupper(ch2))
> +		ch2 = tolower(ch2);
> +	return (ch1 == ch2);
> +}

<del>
Wouldn't a better solution be a collate algorithm rather than changing
a sorting function?  Or is it a performance hack on typical body of
text under version control (mainly lowercase)?
</del>

"(libc.info)Collation Fuctions" says:

     The functions `strcoll' and `wcscoll' perform this translation
  implicitly, in order to do one comparison.  By contrast, `strxfrm' and
  `wcsxfrm' perform the mapping explicitly.  If you are making multiple
  comparisons using the same string or set of strings, it is likely to be
  more efficient to use `strxfrm' or `wcsxfrm' to transform all the
  strings just once, and subsequently compare the transformed strings
  with `strcmp' or `wcscmp'.

The function match_a_byte (memcoll?) defined here is similar to strcoll;
do we compare single line with more than one other line?

> +static inline unsigned long hash_a_byte(const char ch_, long flags)
> +{
> +	unsigned long ch = ch_ & 0xFF;
> +	if ((flags & XDF_IGNORE_CASE) && !(ch & 0x80) && isupper(ch))
> +		ch = tolower(ch);
> +	return ch;
> +}
> +

Hmmm... hash_a_byte (memxfrm?) is similar to strxfrm, so you do use
one or the other...

-- 
Jakub Narebski

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-02-22 18:08 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-20  2:16 [PATCH 0/5] diff --ignore-case Junio C Hamano
2012-02-20  2:16 ` [PATCH 1/5] xdiff: remove XDL_PATCH_* macros Junio C Hamano
2012-02-20  2:16 ` [PATCH 2/5] xdiff: PATIENCE/HISTOGRAM are not independent option bits Junio C Hamano
2012-02-20  2:16 ` [PATCH 3/5] xdiff: introduce XDF_INEXACT_MATCH Junio C Hamano
2012-02-20  2:16 ` [PATCH 4/5] xdiff: introduce XDF_IGNORE_CASE Junio C Hamano
2012-02-22 18:07   ` Jakub Narebski
2012-02-20  2:16 ` [PATCH 5/5] diff: --ignore-case Junio C Hamano
2012-02-20  7:36 ` [PATCH 6/5] diff -i Junio C Hamano
2012-02-20  8:41 ` [PATCH 0/5] diff --ignore-case Johannes Sixt
2012-02-20  8:52   ` Junio C Hamano
2012-02-20 14:06     ` Thomas Rast
2012-02-20 19:47       ` Junio C Hamano
2012-02-20 22:10         ` Chris Leong
2012-02-21  9:02         ` Re* " Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).