Git development
 help / color / mirror / Atom feed
* [PATCH] xdiff: Show function names in hunk headers.
@ 2006-03-28  2:23 Mark Wooding
  2006-03-28  5:54 ` Junio C Hamano
  2006-03-28 11:07 ` Andreas Ericsson
  0 siblings, 2 replies; 8+ messages in thread
From: Mark Wooding @ 2006-03-28  2:23 UTC (permalink / raw)
  To: git; +Cc: Mark Wooding

The speed of the built-in diff generator is nice; but the function names
shown by `diff -p' are /really/ nice.  And I hate having to choose.  So,
we hack xdiff to find the function names and print them.

xdiff has grown a flag to say whether to dig up the function names.  The
builtin_diff function passes this flag unconditionally.  I suppose it
could parse GIT_DIFF_OPTS, but it doesn't at the moment.  I've also
reintroduced the `function name' into the test suite, from which it was
removed in commit 3ce8f089.

The function names are parsed by a particularly stupid algorithm at the
moment: it just tries to find a line in the `old' file, from before the
start of the hunk, whose first character looks plausible.  Still, it's
most definitely a start.

Signed-off-by: Mark Wooding <mdw@distorted.org.uk>

---

 diff.c                 |    1 +
 t/t4001-diff-rename.sh |    2 +-
 xdiff/xdiff.h          |    3 +++
 xdiff/xemit.c          |   41 ++++++++++++++++++++++++++++++++++++++++-
 xdiff/xinclude.h       |    1 +
 xdiff/xutils.c         |   15 ++++++++++++---
 xdiff/xutils.h         |    3 ++-
 7 files changed, 60 insertions(+), 6 deletions(-)

746418a20769c003886d7f4bbec6563af7aabd4b
diff --git a/diff.c b/diff.c
index 5eae094..8b37477 100644
--- a/diff.c
+++ b/diff.c
@@ -267,6 +267,7 @@ static void builtin_diff(const char *nam
 		ecbdata.label_path = lbl;
 		xpp.flags = XDF_NEED_MINIMAL;
 		xecfg.ctxlen = 3;
+		xecfg.flags = XDL_EMIT_FUNCNAMES;
 		if (!diffopts)
 			;
 		else if (!strncmp(diffopts, "--unified=", 10))
diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh
index 08c1131..2e3c20d 100755
--- a/t/t4001-diff-rename.sh
+++ b/t/t4001-diff-rename.sh
@@ -49,7 +49,7 @@ rename from path0
 rename to path1
 --- a/path0
 +++ b/path1
-@@ -8,7 +8,7 @@
+@@ -8,7 +8,7 @@ Line 7
  Line 8
  Line 9
  Line 10
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 71cb939..2540e8a 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -35,6 +35,8 @@ #define XDL_PATCH_REVERSE '+'
 #define XDL_PATCH_MODEMASK ((1 << 8) - 1)
 #define XDL_PATCH_IGNOREBSPACE (1 << 8)
 
+#define XDL_EMIT_FUNCNAMES (1 << 0)
+
 #define XDL_MMB_READONLY (1 << 0)
 
 #define XDL_MMF_ATOMIC (1 << 0)
@@ -65,6 +67,7 @@ typedef struct s_xdemitcb {
 
 typedef struct s_xdemitconf {
 	long ctxlen;
+	unsigned long flags;
 } xdemitconf_t;
 
 typedef struct s_bdiffparam {
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 2e5d54c..ad5bfb1 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -69,10 +69,43 @@ static xdchange_t *xdl_get_hunk(xdchange
 }
 
 
+static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll) {
+
+	/*
+	 * Be quite stupid about this for now.  Find a line in the old file
+	 * before the start of the hunk (and context) which starts with a
+	 * plausible character.
+	 */
+
+	const char *rec;
+	long len;
+
+	*ll = 0;
+	while (i-- > 0) {
+		len = xdl_get_rec(xf, i, &rec);
+		if (len > 0 &&
+		    (isalpha((unsigned char)*rec) || /* identifier? */
+		     *rec == '_' ||	/* also identifier? */
+		     *rec == '(' ||	/* lisp defun? */
+		     *rec == '#')) {	/* #define? */
+			if (len > sz)
+				len = sz;
+			if (len && rec[len - 1] == '\n')
+				len--;
+			memcpy(buf, rec, len);
+			*ll = len;
+			return;
+		}
+	}
+}
+
+
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg) {
 	long s1, s2, e1, e2, lctx;
 	xdchange_t *xch, *xche;
+	char funcbuf[40];
+	long funclen = 0;
 
 	for (xch = xche = xscr; xch; xch = xche->next) {
 		xche = xdl_get_hunk(xch, xecfg);
@@ -90,7 +123,13 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange
 		/*
 		 * Emit current hunk header.
 		 */
-		if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, ecb) < 0)
+
+		if (xecfg->flags & XDL_EMIT_FUNCNAMES) {
+			xdl_find_func(&xe->xdf1, s1, funcbuf,
+				      sizeof(funcbuf), &funclen);
+		}
+		if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
+				      funcbuf, funclen, ecb) < 0)
 			return -1;
 
 		/*
diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h
index 9490fc5..04a9da8 100644
--- a/xdiff/xinclude.h
+++ b/xdiff/xinclude.h
@@ -23,6 +23,7 @@
 #if !defined(XINCLUDE_H)
 #define XINCLUDE_H
 
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 8221806..afaada1 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -235,7 +235,8 @@ long xdl_atol(char const *str, char cons
 }
 
 
-int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb) {
+int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
+		      const char *func, long funclen, xdemitcb_t *ecb) {
 	int nb = 0;
 	mmbuffer_t mb;
 	char buf[128];
@@ -264,8 +265,16 @@ int xdl_emit_hunk_hdr(long s1, long c1, 
 		nb += xdl_num_out(buf + nb, c2);
 	}
 
-	memcpy(buf + nb, " @@\n", 4);
-	nb += 4;
+	memcpy(buf + nb, " @@", 3);
+	nb += 3;
+	if (func && funclen) {
+		buf[nb++] = ' ';
+		if (funclen > sizeof(buf) - nb - 1)
+			funclen = sizeof(buf) - nb - 1;
+		memcpy(buf + nb, func, funclen);
+		nb += funclen;
+	}
+	buf[nb++] = '\n';
 
 	mb.ptr = buf;
 	mb.size = nb;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index 428a4bb..55b0d39 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -36,7 +36,8 @@ unsigned long xdl_hash_record(char const
 unsigned int xdl_hashbits(unsigned int size);
 int xdl_num_out(char *out, long val);
 long xdl_atol(char const *str, char const **next);
-int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb);
+int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
+		      const char *func, long funclen, xdemitcb_t *ecb);
 
 
 
-- 
1.3.0.rc1.g7464

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-28  2:23 [PATCH] xdiff: Show function names in hunk headers Mark Wooding
@ 2006-03-28  5:54 ` Junio C Hamano
  2006-03-28  9:50   ` Junio C Hamano
  2006-03-28 15:03   ` Mark Wooding
  2006-03-28 11:07 ` Andreas Ericsson
  1 sibling, 2 replies; 8+ messages in thread
From: Junio C Hamano @ 2006-03-28  5:54 UTC (permalink / raw)
  To: Mark Wooding; +Cc: git

Mark Wooding <mdw@distorted.org.uk> writes:

> The function names are parsed by a particularly stupid algorithm at the
> moment: it just tries to find a line in the `old' file, from before the
> start of the hunk, whose first character looks plausible.  Still, it's
> most definitely a start.

> +		    (isalpha((unsigned char)*rec) || /* identifier? */
> +		     *rec == '_' ||	/* also identifier? */
> +		     *rec == '(' ||	/* lisp defun? */
> +		     *rec == '#')) {	/* #define? */

GNU diff -p does "^[[:alpha:]$_]"; personally I think any line
that does not begin with a whitespace is good enough.  In either
way, your patch is good.  Thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-28  5:54 ` Junio C Hamano
@ 2006-03-28  9:50   ` Junio C Hamano
  2006-03-28 10:13     ` Mark Wooding
  2006-03-28 15:03   ` Mark Wooding
  1 sibling, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2006-03-28  9:50 UTC (permalink / raw)
  To: Mark Wooding; +Cc: git

Junio C Hamano <junkio@cox.net> writes:

> Mark Wooding <mdw@distorted.org.uk> writes:
>...
>> +		    (isalpha((unsigned char)*rec) || /* identifier? */
>> +		     *rec == '_' ||	/* also identifier? */
>> +		     *rec == '(' ||	/* lisp defun? */
>> +		     *rec == '#')) {	/* #define? */
>
> GNU diff -p does "^[[:alpha:]$_]"; personally I think any line
> that does not begin with a whitespace is good enough.

Obviously I was not thinking.  That should at least be "any line
that begins with a non-whitespace and has a few characters", to
omit "{\n" and catch "int main()\n" in:

	int main()
        {
        	printf("Hello, world.\n");
        }

;-).

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-28  9:50   ` Junio C Hamano
@ 2006-03-28 10:13     ` Mark Wooding
  0 siblings, 0 replies; 8+ messages in thread
From: Mark Wooding @ 2006-03-28 10:13 UTC (permalink / raw)
  To: git

Junio C Hamano <junkio@cox.net> wrote:

> Obviously I was not thinking.  That should at least be "any line
> that begins with a non-whitespace and has a few characters", to
> omit "{\n" and catch "int main()\n" in:

Heh!  I already got that one wrong last night.  Hence my more
complicated version. ;-)

-- [mdw]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-28  2:23 [PATCH] xdiff: Show function names in hunk headers Mark Wooding
  2006-03-28  5:54 ` Junio C Hamano
@ 2006-03-28 11:07 ` Andreas Ericsson
  1 sibling, 0 replies; 8+ messages in thread
From: Andreas Ericsson @ 2006-03-28 11:07 UTC (permalink / raw)
  To: Mark Wooding; +Cc: git

Mark Wooding wrote:
> 
> The function names are parsed by a particularly stupid algorithm at the
> moment: it just tries to find a line in the `old' file, from before the
> start of the hunk, whose first character looks plausible.  Still, it's
> most definitely a start.
> 

Stupid is sometimes good. I've noticed that the gnu diff algorithm 
sometimes won't notice changes in small structs, though they are large 
enough for the surrounding context in the unified diff not to show it 
either.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-28  5:54 ` Junio C Hamano
  2006-03-28  9:50   ` Junio C Hamano
@ 2006-03-28 15:03   ` Mark Wooding
  2006-03-29  0:21     ` Junio C Hamano
  1 sibling, 1 reply; 8+ messages in thread
From: Mark Wooding @ 2006-03-28 15:03 UTC (permalink / raw)
  To: git

Junio C Hamano <junkio@cox.net> wrote:

> GNU diff -p does "^[[:alpha:]$_]"; personally I think any line
> that does not begin with a whitespace is good enough.

Hmm.  I think my approach is wrong.  I've noticed that targets of the
form `$(FOO): ...' in Makefiles would make nice hunk headers, but my
current hack won't notice them.  Without a shift of approach, I think
I run the risk of deluging the list with little fixes to this bit of
code, which sounds like a pile of no fun.

So, I have two main suggestions.  The first is /very/ stupid, and just
asks for two non-whitespace characters at the start of a line.

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index ad5bfb1..822f991 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -83,11 +83,9 @@ static void xdl_find_func(xdfile_t *xf, 
        *ll = 0;
        while (i-- > 0) {
                len = xdl_get_rec(xf, i, &rec);
-               if (len > 0 &&
-                   (isalpha((unsigned char)*rec) || /* identifier? */
-                    *rec == '_' ||     /* also identifier? */
-                    *rec == '(' ||     /* lisp defun? */
-                    *rec == '#')) {    /* #define? */
+               if (len >= 2 &&
+                   !isspace((unsigned char)rec[0]) &&
+                   !isspace((unsigned char)rec[1])) {
                        if (len > sz)
                                len = sz;
                        if (len && rec[len - 1] == '\n')

The second suggestion is slightly refined, but a little more
complicated.  We ask for a line which starts /either/ with two
non-whitespace characters, or with an alphanumeric.  Why?  Because text
documents have a tendency to have headings of the form `7 Heading!' and
I want to catch them.

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index ad5bfb1..bcb3e47 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -83,11 +83,10 @@ static void xdl_find_func(xdfile_t *xf, 
        *ll = 0;
        while (i-- > 0) {
                len = xdl_get_rec(xf, i, &rec);
-               if (len > 0 &&
-                   (isalpha((unsigned char)*rec) || /* identifier? */
-                    *rec == '_' ||     /* also identifier? */
-                    *rec == '(' ||     /* lisp defun? */
-                    *rec == '#')) {    /* #define? */
+               if (len && !isspace((unsigned char)*rec) &&
+                   ((len >= 2 && !isspace((unsigned char)rec[1])) ||
+                    isalnum((unsigned char)*rec) ||
+                    *rec == '_')) {
                        if (len > sz)
                                len = sz;
                        if (len && rec[len - 1] == '\n')

Another possibility I just thought of: insist that the line starts with
a non-space, and contains another non-space somewhere.  This will get
caught out by `{       /* ... rest of comment */', which I've seen a few
places, though.

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index ad5bfb1..81b38ce 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -79,15 +79,18 @@ static void xdl_find_func(xdfile_t *xf, 
 
        const char *rec;
        long len;
+       long j;
 
        *ll = 0;
        while (i-- > 0) {
                len = xdl_get_rec(xf, i, &rec);
-               if (len > 0 &&
-                   (isalpha((unsigned char)*rec) || /* identifier? */
-                    *rec == '_' ||     /* also identifier? */
-                    *rec == '(' ||     /* lisp defun? */
-                    *rec == '#')) {    /* #define? */
+               if (len && !isspace((unsigned char)*rec)) {
+                       for (j = 1; j < len; j++) {
+                               if (!isspace((unsigned char)rec[j]))
+                                       goto good;
+                       }
+                       continue;
+               good:
                        if (len > sz)
                                len = sz;
                        if (len && rec[len - 1] == '\n')

I think I like option 2 best, as a nice compromise between stupidity and
actually working.  Opinions, anyone?

-- [mdw]

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-28 15:03   ` Mark Wooding
@ 2006-03-29  0:21     ` Junio C Hamano
  2006-03-29  9:57       ` Mark Wooding
  0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2006-03-29  0:21 UTC (permalink / raw)
  To: Mark Wooding; +Cc: git

Mark Wooding <mdw@distorted.org.uk> writes:

> Junio C Hamano <junkio@cox.net> wrote:
>
>> GNU diff -p does "^[[:alpha:]$_]"; personally I think any line
>> that does not begin with a whitespace is good enough.
>...
> The second suggestion is slightly refined, but a little more
> complicated.  We ask for a line which starts /either/ with two
> non-whitespace characters, or with an alphanumeric.  Why?  Because text
> documents have a tendency to have headings of the form `7 Heading!' and
> I want to catch them.

Asciidoc?

        . enumerated one
          this is one item

        . enumerated two
          this is another item

> I think I like option 2 best, as a nice compromise between stupidity and
> actually working.  Opinions, anyone?

It's just a heuristic, so there are only two things we could
sensibly do.  Either we keep it absolutely stupid to save our
code and sanity, or we give full configurability via -F regexp
to the end users.

I suspect feeping creaturism would eventually push us to go the
latter route, but for now I'd vote for doing exactly the same as
what default GNU does, by looking at the first letter without
using regexps.  When we add regexps later, the users can
customize the pattern to match the languages they use, and we
might end up having to have a set of (file-suffix -> default
regexp) mappings, with full end user configurability via
.git/config -- gaaah but true X-<.

	[diff]
        	functionline = "^\w" for .c
                functionline = "^(?i)\s*(?:function|procedure)" for .f77
                functionline = "^\(defun " for .el
                ...

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] xdiff: Show function names in hunk headers.
  2006-03-29  0:21     ` Junio C Hamano
@ 2006-03-29  9:57       ` Mark Wooding
  0 siblings, 0 replies; 8+ messages in thread
From: Mark Wooding @ 2006-03-29  9:57 UTC (permalink / raw)
  To: git

Junio C Hamano <junkio@cox.net> wrote:

> Asciidoc?
>
>         . enumerated one
>           this is one item

I thought about this, and decided that missing these was a good thing,
because item heads are probably too low-level.  I'd rather go for
section or subsection headings.

> It's just a heuristic, so there are only two things we could
> sensibly do.  Either we keep it absolutely stupid to save our
> code and sanity, or we give full configurability via -F regexp
> to the end users.

I'm already thinking about that...

-- [mdw]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-03-29  9:57 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-03-28  2:23 [PATCH] xdiff: Show function names in hunk headers Mark Wooding
2006-03-28  5:54 ` Junio C Hamano
2006-03-28  9:50   ` Junio C Hamano
2006-03-28 10:13     ` Mark Wooding
2006-03-28 15:03   ` Mark Wooding
2006-03-29  0:21     ` Junio C Hamano
2006-03-29  9:57       ` Mark Wooding
2006-03-28 11:07 ` Andreas Ericsson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox