From: Junio C Hamano <junio@pobox.com>
To: Tsugikazu Shibata <tshibata@ab.jp.nec.com>
Cc: git@vger.kernel.org
Subject: [PATCH 3/2] attribute "coding": specify blob encoding
Date: Wed, 02 Jan 2008 01:50:21 -0800 [thread overview]
Message-ID: <7vsl1gy2si.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: 7v1w904x29.fsf@gitster.siamese.dyndns.org
This teaches "diff hunk header" function about custom character
encoding per path via gitattributes(5) so that it can sensibly
chomp a line without truncating a character in the middle.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
* This is not intended for serious inclusion, but was done more
as a demonstration of the concept, hence [3/2].
Makefile | 5 ++-
coding.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
coding.h | 10 +++++++++
diff.c | 48 ++++++++++++++++++++++++++++++++++++++++-----
diffcore.h | 1 +
5 files changed, 118 insertions(+), 8 deletions(-)
create mode 100644 coding.c
create mode 100644 coding.h
diff --git a/Makefile b/Makefile
index 21c80e6..de205e6 100644
--- a/Makefile
+++ b/Makefile
@@ -293,7 +293,8 @@ LIB_H = \
run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \
- mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h
+ mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h \
+ coding.h
DIFF_OBJS = \
diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -316,7 +317,7 @@ LIB_OBJS = \
alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
convert.o attr.o decorate.o progress.o mailmap.o symlinks.o remote.o \
- transport.o bundle.o walker.o parse-options.o ws.o
+ transport.o bundle.o walker.o parse-options.o ws.o coding.o
BUILTIN_OBJS = \
builtin-add.o \
diff --git a/coding.c b/coding.c
new file mode 100644
index 0000000..63c5dc7
--- /dev/null
+++ b/coding.c
@@ -0,0 +1,62 @@
+#include "coding.h"
+
+unsigned long truncate_euc_jp(char *line, unsigned long len)
+{
+ /*
+ * line contains len bytes but the end of the string could
+ * be incomplete (chomped in the middle of a character).
+ * grab complete lines and count the display width, and
+ * return the number of bytes to chomp at (which could
+ * be smaller than 'len' given, to discard the corrupt
+ * characters at the end
+ */
+ char *ep = line + len;
+ unsigned long sofar = 0;
+
+ while (line < ep) {
+ int ch = (*line++ & 0xFF);
+ int width;
+
+ if (ch < 0x80) {
+ /* JIS X 0201 lower half */
+ sofar++;
+ width = 1;
+ } else if (0xA1 <= ch && ch <= 0xFE) {
+ /* JIS X 0208 */
+ if (ep <= line)
+ goto invalid;
+ ch = (*line++ & 0xFF);
+ if (ch < 0xA1 || 0xFE < ch)
+ goto invalid;
+ width = 2;
+ sofar += 2;
+ } else if (ch == 0x8E) {
+ /* JIS X 0201 upper half */
+ if (ep <= line)
+ goto invalid;
+ ch = (*line++ & 0xFF);
+ if (ch <= 0xA1 || 0xDF < ch)
+ goto invalid;
+ width = 1;
+ sofar += 2;
+ } else if (ch == 0x8F) {
+ /* JIS X 0212 */
+ if (ep <= line)
+ goto invalid;
+ ch = (*line++ & 0xFF);
+ if (ch < 0xA1 || 0xFE < ch)
+ goto invalid;
+ if (ep <= line)
+ goto invalid;
+ ch = (*line++ & 0xFF);
+ if (ch < 0xA1 || 0xFE < ch)
+ goto invalid;
+ width = 2;
+ sofar += 3;
+ } else {
+ invalid:
+ return sofar;
+ }
+ }
+ return sofar;
+}
diff --git a/coding.h b/coding.h
new file mode 100644
index 0000000..0e02701
--- /dev/null
+++ b/coding.h
@@ -0,0 +1,10 @@
+#ifndef CODING_H
+#define CODING_H
+
+#include "cache.h"
+
+typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
+
+unsigned long truncate_euc_jp(char *, unsigned long);
+
+#endif
diff --git a/diff.c b/diff.c
index b9159bc..40c8d2b 100644
--- a/diff.c
+++ b/diff.c
@@ -11,6 +11,7 @@
#include "attr.h"
#include "run-command.h"
#include "utf8.h"
+#include "coding.h"
#ifdef NO_FAST_WORKING_DIRECTORY
#define FAST_WORKING_DIRECTORY 0
@@ -466,8 +467,6 @@ static void diff_words_show(struct diff_words_data *diff_words)
}
}
-typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
-
struct emit_callback {
struct xdiff_emit_state xm;
int nparents, color_diff;
@@ -1125,30 +1124,34 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
static void setup_diff_attr_check(struct git_attr_check *check)
{
static struct git_attr *attr_diff;
+ static struct git_attr *attr_coding;
if (!attr_diff) {
attr_diff = git_attr("diff", 4);
+ attr_coding = git_attr("coding", 6);
}
check[0].attr = attr_diff;
+ check[1].attr = attr_coding;
}
static void diff_filespec_check_attr(struct diff_filespec *one)
{
- struct git_attr_check attr_diff_check;
+ struct git_attr_check attr_diff_check[2];
int check_from_data = 0;
if (one->checked_attr)
return;
- setup_diff_attr_check(&attr_diff_check);
+ setup_diff_attr_check(attr_diff_check);
one->is_binary = 0;
one->funcname_pattern_ident = NULL;
+ one->coding = NULL;
- if (!git_checkattr(one->path, 1, &attr_diff_check)) {
+ if (!git_checkattr(one->path, 2, attr_diff_check)) {
const char *value;
/* binaryness */
- value = attr_diff_check.value;
+ value = attr_diff_check[0].value;
if (ATTR_TRUE(value))
;
else if (ATTR_FALSE(value))
@@ -1161,6 +1164,15 @@ static void diff_filespec_check_attr(struct diff_filespec *one)
;
else
one->funcname_pattern_ident = value;
+
+ /* coding */
+ value = attr_diff_check[1].value;
+ if (ATTR_TRUE(value) ||
+ ATTR_FALSE(value) ||
+ ATTR_UNSET(value))
+ ;
+ else
+ one->coding = value;
}
if (check_from_data) {
@@ -1233,6 +1245,27 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
return NULL;
}
+static struct {
+ const char *coding;
+ sane_truncate_fn fn;
+} builtin_truncate_fn[] = {
+ { "euc-jp", truncate_euc_jp },
+ { "utf-8", NULL },
+};
+
+static sane_truncate_fn diff_truncate_line_fn(struct diff_filespec *one)
+{
+ int i;
+
+ diff_filespec_check_attr(one);
+ if (!one->coding)
+ return NULL;
+ for (i = 0; i < ARRAY_SIZE(builtin_truncate_fn); i++)
+ if (!strcmp(one->coding, builtin_truncate_fn[i].coding))
+ return builtin_truncate_fn[i].fn;
+ return NULL;
+}
+
static void builtin_diff(const char *name_a,
const char *name_b,
struct diff_filespec *one,
@@ -1318,6 +1351,9 @@ static void builtin_diff(const char *name_a,
ecbdata.color_diff = DIFF_OPT_TST(o, COLOR_DIFF);
ecbdata.found_changesp = &o->found_changes;
ecbdata.ws_rule = whitespace_rule(name_b ? name_b : name_a);
+ ecbdata.truncate = diff_truncate_line_fn(one);
+ if (!ecbdata.truncate)
+ ecbdata.truncate = diff_truncate_line_fn(two);
xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts;
xecfg.ctxlen = o->context;
xecfg.flags = XDL_EMIT_FUNCNAMES;
diff --git a/diffcore.h b/diffcore.h
index cc96c20..711a4df 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -28,6 +28,7 @@ struct diff_filespec {
void *data;
void *cnt_data;
const char *funcname_pattern_ident;
+ const char *coding; /* blob text coding */
unsigned long size;
int count; /* Reference count */
int xfrm_flags; /* for use by the xfrm */
--
1.5.4.rc2.13.g690bd
next prev parent reply other threads:[~2008-01-02 9:52 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-01 23:20 [RFC/PATCH] fix "git diff" to create wrong UTF-8 text Tsugikazu Shibata
2008-01-02 5:26 ` Junio C Hamano
2008-01-02 9:49 ` [PATCH 1/2] utf8_width(): allow non NUL-terminated input Junio C Hamano
2008-01-02 9:50 ` [PATCH 2/2] diff: do not chomp hunk-header in the middle of a character Junio C Hamano
2008-01-02 9:50 ` Junio C Hamano [this message]
2008-01-03 21:23 ` [PATCH 3/2] attribute "coding": specify blob encoding しらいしななこ
2008-01-03 21:54 ` Junio C Hamano
2008-01-04 16:16 ` Tsugikazu Shibata
2008-01-04 20:53 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7vsl1gy2si.fsf@gitster.siamese.dyndns.org \
--to=junio@pobox.com \
--cc=git@vger.kernel.org \
--cc=tshibata@ab.jp.nec.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).