From: "Torsten Bögershausen" <tboegi@web.de>
To: git@vger.kernel.org
Cc: tboegi@web.de
Subject: [RFC] i18n.pathencoding
Date: Sat, 1 Sep 2012 08:11:33 +0200 [thread overview]
Message-ID: <201209010811.33994.tboegi@web.de> (raw)
Allow path names to be encoded in UTF-8 in the repository
and checkout out as e.g. ISO-8859-1 in the working tree.
Introduce a config variable i18n.pathEncoding.
If empty, no re-encoding of path names is done.
Add t3911 to test encoding back and forth
The re-encoding is done in compat/reencode_pathname.c,
where all file system functions like open(), stat(),
readdir() are re-defined.
reencode_pathname.c includes all functionality from
precompose_utf8.c, which should be removed
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
Please read this as an RFC, so there several limitations:
compat/reencode_pathname.h defines struct dirent_psx with d_name[2].
This is done to test renc_pn_readdir() in compat/reencode_pathname.c
test case t1450 failes even on one of my linux machines. At first glance
it looks as the same failure which has been sometimes observed on Mac OS X.
compat/precompose_utf8.[ch] had been integrated into reencode_pathname.[ch],
and should be removed.
The patch should work on v7.1.12, it's not tested against latest master
Comments are welcome.
Documentation/config.txt | 10 +
Makefile | 11 +-
builtin/init-db.c | 3 +
cache.h | 1 +
compat/reencode_pathname.c | 441 ++++++++++++++++++++++++++++++++++++++++++
compat/reencode_pathname.h | 72 +++++++
config.c | 3 +
environment.c | 1 +
git-compat-util.h | 20 +-
parse-options.c | 2 +-
t/t3911-i18n-filename-8859.sh | 251 ++++++++++++++++++++++++
wt-status.c | 21 +-
12 files changed, 827 insertions(+), 9 deletions(-)
create mode 100644 compat/reencode_pathname.c
create mode 100644 compat/reencode_pathname.h
create mode 100755 t/t3911-i18n-filename-8859.sh
diff --git a/Documentation/config.txt b/Documentation/config.txt
index a95e5a4..d633d54 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1442,6 +1442,16 @@ i18n.logOutputEncoding::
Character encoding the commit messages are converted to when
running 'git log' and friends.
+i18n.pathEncoding::
+ This option is only used by some implementations of git.
+ When "git init" sets core.supportspathencoding to true,
+ i18n.pathEncoding can be set to re-encode path names when
+ a working tree is checked out.
+ Path names may be e.g. encoded in ISO-8859-1 and are stored as
+ UTF-8 encoded in the repository.
+ When not set, the encoding of path names is the same in working tree
+ and the repository.
+
imap::
The configuration variables in the 'imap' section are described
in linkgit:git-imap-send[1].
diff --git a/Makefile b/Makefile
index 6b0c961..141562e 100644
--- a/Makefile
+++ b/Makefile
@@ -143,6 +143,9 @@ all::
#
# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
#
+# Define PATH_ENCODING if the encoding of file names
+# differs from the encoding in the git repo
+#
# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
# Patrick Mauritz).
#
@@ -595,6 +598,7 @@ LIB_H += compat/bswap.h
LIB_H += compat/cygwin.h
LIB_H += compat/mingw.h
LIB_H += compat/obstack.h
+LIB_H += compat/reencode_pathname.h
LIB_H += compat/precompose_utf8.h
LIB_H += compat/terminal.h
LIB_H += compat/win32/dirent.h
@@ -932,6 +936,7 @@ ifeq ($(uname_S),OSF1)
NO_NSEC = YesPlease
endif
ifeq ($(uname_S),Linux)
+ PATH_ENCODING = YesPlease
NO_STRLCPY = YesPlease
NO_MKSTEMPS = YesPlease
HAVE_PATHS_H = YesPlease
@@ -999,7 +1004,7 @@ ifeq ($(uname_S),Darwin)
NO_MEMMEM = YesPlease
USE_ST_TIMESPEC = YesPlease
HAVE_DEV_TTY = YesPlease
- COMPAT_OBJS += compat/precompose_utf8.o
+ COMPAT_OBJS += compat/reencode_pathname.o
BASIC_CFLAGS += -DPRECOMPOSE_UNICODE
endif
ifeq ($(uname_S),SunOS)
@@ -1591,6 +1596,10 @@ ifdef FREAD_READS_DIRECTORIES
COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
COMPAT_OBJS += compat/fopen.o
endif
+ifdef PATH_ENCODING
+ COMPAT_CFLAGS += -DPATH_ENCODING
+ COMPAT_OBJS += compat/reencode_pathname.o
+endif
ifdef NO_SYMLINK_HEAD
BASIC_CFLAGS += -DNO_SYMLINK_HEAD
endif
diff --git a/builtin/init-db.c b/builtin/init-db.c
index 244fb7f..f159d43 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -291,6 +291,9 @@ static int create_default_files(const char *template_path)
if (!access(path, F_OK))
git_config_set("core.ignorecase", "true");
probe_utf8_pathname_composition(path, len);
+#ifdef PATH_ENCODING
+ git_config_set("core.supportspathencoding", "true");
+#endif
}
return reinit;
diff --git a/cache.h b/cache.h
index 67f28b4..8023767 100644
--- a/cache.h
+++ b/cache.h
@@ -1160,6 +1160,7 @@ extern int user_ident_sufficiently_given(void);
extern const char *git_commit_encoding;
extern const char *git_log_output_encoding;
extern const char *git_mailmap_file;
+extern const char *wt_path_encoding;
/* IO helper functions */
extern void maybe_flush_or_die(FILE *, const char *);
diff --git a/compat/reencode_pathname.c b/compat/reencode_pathname.c
new file mode 100644
index 0000000..3bdc776
--- /dev/null
+++ b/compat/reencode_pathname.c
@@ -0,0 +1,441 @@
+/*
+ * Converts pathnames from one encoding into another.
+ * The pathnames are stored as UTF-8 in the repository,
+ * and might be checkout out as e.g. ISO-8859-1 in the working tree
+ *
+ * On MacOS X decomposed unicode is converted into precomposed unicode.
+ */
+
+#define REENCODE_PATHNAME_C
+#include "cache.h"
+#include "utf8.h"
+#include "reencode_pathname.h"
+
+#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6))
+ typedef const char *iconv_ibp;
+#else
+ typedef char *iconv_ibp;
+#endif
+
+const static char *repo_path_encoding = "UTF-8";
+
+static iconv_t iconv_open_or_die(const char *tocode, const char *fromcode)
+{
+ iconv_t my_iconv;
+ my_iconv = iconv_open(tocode, fromcode);
+ if (my_iconv == (iconv_t) -1)
+ die_errno(_("iconv_open(%s,%s) failed"), tocode, fromcode);
+ return my_iconv;
+}
+
+static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
+{
+ const uint8_t *ptr = (const uint8_t *)s;
+ size_t strlen_chars = 0;
+ size_t ret = 0;
+
+ if (!ptr || !*ptr)
+ return 0;
+
+ while (*ptr && maxlen) {
+ if (*ptr & 0x80)
+ ret++;
+ strlen_chars++;
+ ptr++;
+ maxlen--;
+ }
+ if (strlen_c)
+ *strlen_c = strlen_chars;
+
+ return ret;
+}
+
+#ifdef PRECOMPOSE_UNICODE
+void probe_utf8_pathname_composition(char *path, int len)
+{
+ static const char *auml_nfc = "\xc3\xa4";
+ static const char *auml_nfd = "\x61\xcc\x88";
+ int output_fd;
+ if (precomposed_unicode != -1)
+ return; /* We found it defined in the global config, respect it */
+ strcpy(path + len, auml_nfc);
+ output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
+ if (output_fd >= 0) {
+ close(output_fd);
+ strcpy(path + len, auml_nfd);
+ /* Indicate to the user, that we can configure it to true */
+ if (!access(path, R_OK))
+ git_config_set("core.precomposeunicode", "false");
+ /* To be backward compatible, set precomposed_unicode to 0 */
+ precomposed_unicode = 0;
+ strcpy(path + len, auml_nfc);
+ if (unlink(path))
+ die_errno(_("failed to unlink '%s'"), path);
+ }
+}
+#endif
+
+void reencode_argv(int argc, const char **argv)
+{
+ int i = 0;
+ const char *oldarg;
+ char *newarg;
+ iconv_t ic_wt_to_repo;
+
+#ifdef PRECOMPOSE_UNICODE
+ if (precomposed_unicode == 1)
+ wt_path_encoding = "UTF-8-MAC";
+#endif
+
+ if (!wt_path_encoding || !*wt_path_encoding)
+ return;
+
+ ic_wt_to_repo = iconv_open_or_die(repo_path_encoding, wt_path_encoding);
+
+ while (i < argc) {
+ size_t namelen;
+ oldarg = argv[i];
+ if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
+ newarg = reencode_string_iconv(oldarg, namelen, ic_wt_to_repo);
+ if (newarg)
+ argv[i] = newarg;
+ }
+ i++;
+ }
+ iconv_close(ic_wt_to_repo);
+}
+
+#ifdef PATH_ENCODING
+char *str_repo2worktree(const char *in)
+{
+ int olderrno = errno;
+ char *retvalue = NULL;
+ size_t inlen;
+
+ if (!wt_path_encoding || !*wt_path_encoding)
+ return NULL;
+
+ if (!in)
+ die("str_repo2worktree in == NULL\n");
+
+ if (has_non_ascii(in, (size_t)-1, &inlen)) {
+ iconv_t my_iconv_repo2worktree;
+ my_iconv_repo2worktree = iconv_open_or_die(wt_path_encoding,
+ repo_path_encoding);
+
+ retvalue = reencode_string_iconv(in, inlen, my_iconv_repo2worktree);
+ iconv_close(my_iconv_repo2worktree);
+ if (retvalue)
+ errno = olderrno;
+ } else
+ errno = olderrno;
+
+ return retvalue;
+}
+
+char *str_worktree2repolen(const char *in, size_t insz)
+{
+ char *retvalue = NULL;
+ size_t inlen;
+ if (!wt_path_encoding || !*wt_path_encoding)
+ return NULL;
+
+ if (has_non_ascii(in, insz, &inlen)) {
+ int olderrno = errno;
+ iconv_t my_iconv_worktree2repo;
+ my_iconv_worktree2repo = iconv_open_or_die(repo_path_encoding,
+ wt_path_encoding);
+ retvalue = reencode_string_iconv(in, insz, my_iconv_worktree2repo);
+ iconv_close(my_iconv_worktree2repo);
+ if (retvalue)
+ errno = olderrno;
+ }
+ return retvalue;
+}
+
+char *str_worktree2repo(const char *in)
+{
+ return str_worktree2repolen(in, strlen(in));
+}
+#endif
+
+#define RENC_PN_DECL_SAVERRNO_PATH1(path) \
+ int olderrno = errno; \
+ const char *path1_enc = path; \
+ char *path1_malloc_wt_encoded = NULL
+
+#define RENC_PN_DECL_PATH2(path) \
+ const char *path2_enc = path; \
+ char *path2_malloc_wt_encoded = NULL
+
+
+#define RENC_PN_CONV_PATH1(path, erroret) \
+ errno=0; \
+ path1_malloc_wt_encoded = str_repo2worktree(path); \
+ if (!path1_malloc_wt_encoded && errno) { \
+ return erroret; \
+ } \
+ if (path1_malloc_wt_encoded) \
+ path1_enc = path1_malloc_wt_encoded; \
+ errno = olderrno;
+
+#define RENC_PN_CONV_PATH2(path) \
+ errno=0; \
+ path2_malloc_wt_encoded = str_repo2worktree(path); \
+ if (!path2_malloc_wt_encoded && errno) { \
+ return -1; \
+ } \
+ if (path2_malloc_wt_encoded) \
+ path2_enc = path2_malloc_wt_encoded; \
+ errno = olderrno;
+
+
+RENC_FN_DIR *renc_pn_opendir(const char *dirname)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(dirname);
+ RENC_FN_DIR *renc_pn_dir = xmalloc(sizeof(RENC_FN_DIR));
+
+#ifdef PRECOMPOSE_UNICODE
+ if (precomposed_unicode == 1)
+ wt_path_encoding = "UTF-8-MAC";
+#endif
+
+ renc_pn_dir->dirent_utf8 = xmalloc(sizeof(dirent_psx));
+ renc_pn_dir->dirent_utf8->max_name_len = sizeof(renc_pn_dir->dirent_utf8->d_name);
+
+ RENC_PN_CONV_PATH1(dirname, NULL);
+
+ renc_pn_dir->dirp = opendir(path1_enc);
+ olderrno = errno;
+ if (!renc_pn_dir->dirp) {
+ free(path1_malloc_wt_encoded);
+ free(renc_pn_dir->dirent_utf8);
+ free(renc_pn_dir);
+ return NULL;
+ } else
+ renc_pn_dir->ic_wt_to_repo = (iconv_t)-1;
+
+ free(path1_malloc_wt_encoded);
+ errno = olderrno;
+ return renc_pn_dir;
+}
+
+struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *renc_pn_dir)
+{
+ struct dirent *res;
+ res = readdir(renc_pn_dir->dirp);
+ if (res) {
+ size_t namelenz = strlen(res->d_name) + 1; /* \0 */
+ size_t new_len_needed = 0;
+ int ret_errno = errno;
+
+ renc_pn_dir->dirent_utf8->d_ino = res->d_ino;
+ renc_pn_dir->dirent_utf8->d_type = res->d_type;
+ do {
+ if (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len) {
+ size_t new_len = sizeof(dirent_psx) + new_len_needed -
+ sizeof(renc_pn_dir->dirent_utf8->d_name);
+
+ renc_pn_dir->dirent_utf8 = xrealloc(renc_pn_dir->dirent_utf8, new_len);
+ renc_pn_dir->dirent_utf8->max_name_len = new_len_needed;
+ }
+
+ if (wt_path_encoding && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
+ iconv_ibp cp = (iconv_ibp)res->d_name;
+ size_t inleft = namelenz;
+ char *outpos = &renc_pn_dir->dirent_utf8->d_name[0];
+ size_t outsz = renc_pn_dir->dirent_utf8->max_name_len;
+ errno = 0;
+ if (renc_pn_dir->ic_wt_to_repo == (iconv_t)-1)
+ renc_pn_dir->ic_wt_to_repo = iconv_open_or_die(repo_path_encoding,
+ wt_path_encoding);
+ if (-1 != iconv(renc_pn_dir->ic_wt_to_repo,
+ &cp, &inleft, &outpos, &outsz))
+ break; /* Conversion OK, we are done */
+ if (errno == E2BIG) {
+ char *tmp = reencode_string_iconv(res->d_name, namelenz,
+ renc_pn_dir->ic_wt_to_repo);
+ if (tmp) {
+ new_len_needed = strlen(tmp) + 1; /* \0 */
+ free(tmp);
+ }
+ } else {
+ /*
+ * iconv() failed and errno could be EILSEQ, EINVAL, EBADF
+ * In general we avoid illegal byte sequences.
+ * If they occur on a mounted drive (e.g. NFS) it is not worth to
+ * die() for that, but rather let the user see the original name
+ */
+ namelenz = 0; /* trigger strlcpy */
+ }
+ } else {
+ if (namelenz > renc_pn_dir->dirent_utf8->max_name_len)
+ new_len_needed = namelenz; /* need to re-allocate */
+ else
+ namelenz = 0; /* trigger strlcpy */
+ }
+ } while (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len);
+
+ if (!namelenz)
+ strlcpy(renc_pn_dir->dirent_utf8->d_name, res->d_name,
+ renc_pn_dir->dirent_utf8->max_name_len);
+
+ errno = ret_errno;
+ return renc_pn_dir->dirent_utf8;
+ }
+ return NULL;
+}
+
+int renc_pn_closedir(RENC_FN_DIR *renc_pn_dir)
+{
+ int ret_value;
+ int ret_errno;
+ ret_value = closedir(renc_pn_dir->dirp);
+ ret_errno = errno;
+ if (renc_pn_dir->ic_wt_to_repo != (iconv_t)-1)
+ iconv_close(renc_pn_dir->ic_wt_to_repo);
+ free(renc_pn_dir->dirent_utf8);
+ free(renc_pn_dir);
+ errno = ret_errno;
+ return ret_value;
+}
+
+int renc_pn_mkdir(const char *path, mode_t mode)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = mkdir(path1_enc, mode);
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_lstat(const char *path, struct stat *buf)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = lstat(path1_enc, buf);
+
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_stat(const char *path, struct stat *buf)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = stat(path1_enc, buf);
+
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_open(const char *path, int oflag, ... )
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ va_list params;
+ int mode;
+ int ret;
+
+ va_start(params, oflag);
+ mode = va_arg(params, int);
+ va_end(params);
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = open(path1_enc, oflag, mode);
+
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_unlink(const char *path)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = unlink(path1_enc);
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+FILE *renc_pn_fopen(const char *path, const char *mode)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ FILE *ret;
+
+ RENC_PN_CONV_PATH1(path,NULL);
+
+ ret = fopen(path1_enc,mode);
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+
+ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ ssize_t ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = readlink(path1_enc, buf, bufsiz);
+
+ if (ret > 0) {
+ char *new_buf = NULL;
+ errno = 0;
+ new_buf = str_worktree2repolen(buf, ret);
+ if (new_buf) {
+ size_t newlen = strlen(new_buf);
+ if (newlen > bufsiz)
+ newlen = bufsiz;
+ memcpy(buf, new_buf, newlen);
+ ret = newlen;
+ free(new_buf);
+ } else if (!errno)
+ errno = olderrno;
+ }
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_symlink(const char *oldname, const char *newname)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(oldname);
+ RENC_PN_DECL_PATH2(newname);
+ int ret;
+
+ RENC_PN_CONV_PATH1(oldname, -1);
+ RENC_PN_CONV_PATH2(newname);
+
+ ret = symlink(path1_enc, path2_enc);
+ free(path1_malloc_wt_encoded);
+ free(path2_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_rename(const char *oldname, const char *newname)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(oldname);
+ RENC_PN_DECL_PATH2(newname);
+ int ret;
+
+ RENC_PN_CONV_PATH1(oldname, -1);
+ RENC_PN_CONV_PATH2(newname);
+
+ ret = rename(path1_enc, path2_enc);
+ free(path1_malloc_wt_encoded);
+ free(path2_malloc_wt_encoded);
+
+ return ret;
+}
diff --git a/compat/reencode_pathname.h b/compat/reencode_pathname.h
new file mode 100644
index 0000000..9300ba4
--- /dev/null
+++ b/compat/reencode_pathname.h
@@ -0,0 +1,70 @@
+#ifndef REENCODE_PATHNAME_H
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <iconv.h>
+
+typedef struct dirent_psx {
+ ino_t d_ino; /* Posix */
+ size_t max_name_len; /* See below */
+ unsigned char d_type; /* available on all systems git runs on */
+
+ /*
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html
+ * NAME_MAX + 1 should be enough, but some systems have
+ * NAME_MAX=255 and strlen(d_name) may return 508 or 510
+ * Solution: allocate more when needed, see renc_pn_readdir()
+ */
+ char d_name[/* NAME_MAX */ 1+1];
+} dirent_psx;
+
+typedef struct {
+ iconv_t ic_wt_to_repo;
+ DIR *dirp;
+ struct dirent_psx *dirent_utf8;
+} RENC_FN_DIR;
+
+void reencode_argv(int argc, const char **argv);
+void probe_utf8_pathname_composition(char *, int);
+
+RENC_FN_DIR *renc_pn_opendir(const char *dirname);
+struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *dirp);
+int renc_pn_closedir(RENC_FN_DIR *dirp);
+
+#ifdef PATH_ENCODING
+char* str_repo2worktree(const char *in);
+int renc_pn_mkdir(const char *path, mode_t mode);
+int renc_pn_lstat(const char *path, struct stat *buf);
+int renc_pn_stat(const char *path, struct stat *buf);
+int renc_pn_open(const char *path, int oflag, ... );
+int renc_pn_unlink(const char *path);
+FILE *renc_pn_fopen(const char *path, const char *mode);
+ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz);
+int renc_pn_symlink(const char *oldname, const char *newname);
+int renc_pn_rename(const char *oldname, const char *newname);
+#endif
+
+#ifndef REENCODE_PATHNAME_C
+#define opendir(n) renc_pn_opendir(n)
+#define readdir(d) renc_pn_readdir(d)
+#define closedir(d) renc_pn_closedir(d)
+#define dirent dirent_psx
+#define DIR RENC_FN_DIR
+
+#ifdef PATH_ENCODING
+#define mkdir(a,b) renc_pn_mkdir((a),(b))
+#define lstat(a,b) renc_pn_lstat((a),(b))
+#define stat(a,b) renc_pn_stat((a),(b))
+#define open renc_pn_open
+#define unlink renc_pn_unlink
+#define fopen(a,b) renc_pn_fopen((a),(b))
+#define readlink(a,b,c) renc_pn_readlink(a,b,c)
+#define symlink(a,b) renc_pn_symlink(a,b)
+#define rename(a,b) renc_pn_rename(a,b)
+#endif
+
+#endif /* REENCODE_PATHNAME_C */
+#define REENCODE_PATHNAME_H
+#endif /* REENCODE_PATHNAME_H */
diff --git a/config.c b/config.c
index 2b706ea..d591c09 100644
--- a/config.c
+++ b/config.c
@@ -775,6 +775,9 @@ static int git_default_i18n_config(const char *var, const char *value)
if (!strcmp(var, "i18n.logoutputencoding"))
return git_config_string(&git_log_output_encoding, var, value);
+ if (!strcmp(var, "i18n.pathencoding"))
+ return git_config_string(&wt_path_encoding, var, value);
+
/* Add other config variables here and to Documentation/config.txt. */
return 0;
}
diff --git a/environment.c b/environment.c
index 85edd7f..ba81575 100644
--- a/environment.c
+++ b/environment.c
@@ -59,6 +59,7 @@ int grafts_replace_parents = 1;
int core_apply_sparse_checkout;
int merge_log_config = -1;
int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */
+const char *wt_path_encoding = NULL;
struct startup_info *startup_info;
unsigned long pack_size_limit_cfg;
diff --git a/git-compat-util.h b/git-compat-util.h
index 35b095e..877b060 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -153,13 +153,21 @@
#endif
#endif
-/* used on Mac OS X */
-#ifdef PRECOMPOSE_UNICODE
-#include "compat/precompose_utf8.h"
+#if defined(PATH_ENCODING) || defined(PRECOMPOSE_UNICODE)
+#include "compat/reencode_pathname.h"
#else
-#define precompose_str(in,i_nfd2nfc)
-#define precompose_argv(c,v)
-#define probe_utf8_pathname_composition(a,b)
+#define reencode_argv(c,v)
+#endif
+
+/* needed for Mac OS X */
+#ifndef PRECOMPOSE_UNICODE
+#define probe_utf8_pathname_composition(a,b);
+#endif
+
+#ifndef PATH_ENCODING
+#define str_worktree2repolen(in, insz) (NULL)
+#define str_repo2worktree(in) (NULL)
+#define str_worktree2repo(in) (NULL)
#endif
#ifndef NO_LIBGEN_H
diff --git a/parse-options.c b/parse-options.c
index c1c66bd..5840c18 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -476,7 +476,7 @@ int parse_options(int argc, const char **argv, const char *prefix,
usage_with_options(usagestr, options);
}
- precompose_argv(argc, argv);
+ reencode_argv(argc, argv);
return parse_options_end(&ctx);
}
diff --git a/t/t3911-i18n-filename-8859.sh b/t/t3911-i18n-filename-8859.sh
new file mode 100755
index 0000000..aa2be57
--- /dev/null
+++ b/t/t3911-i18n-filename-8859.sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# Copyright (c) 2010 Torsten Bögershausen
+#
+
+test_description='file system encodings UTF-8 ISO8859-1'
+
+. ./test-lib.sh
+
+fname_UTF_8=`printf '\303\206\302\242'`
+fname_ISO8859_1=`printf '\306\242'`
+Euro_utf8=`printf '\342\202\254'`
+supportspathencoding=`git config core.supportspathencoding` || :
+
+
+add_file_dir_link() {
+ local bname=$1
+ local fname=$2
+ test_expect_success "add file $fname.f $bname" '
+ git checkout master &&
+ git checkout -b add_f_$bname &&
+ >$fname.f &&
+ git add $fname.f &&
+ git commit -m "add fname"
+ '
+
+ test_expect_success "add dir $fname.d $bname" '
+ git checkout master &&
+ git checkout -b add_d_$bname &&
+ mkdir $fname.d &&
+ touch $fname.d/$fname.f &&
+ git add $fname.d/$fname.f &&
+ git commit -m "add fname.d/fname"
+ '
+
+ i=0
+ for src in x $fname; do
+ for dst in x $fname; do
+ test_expect_success "add link $dst.l->$src.f on branch add_l_${i}_$bname" '
+ git checkout master &&
+ git checkout -b add_l_${i}_$bname &&
+ ln -s $src.f $dst.l &&
+ git add $dst.l &&
+ git commit -m "add fname.l $i"
+ '
+ i=$(($i+1))
+ done
+ done
+}
+
+test_expect_success "setup add rm x" '
+ >x &&
+ git add x &&
+ git commit -m "1st commit" &&
+ git rm x &&
+ git commit -m "rm x"
+'
+
+#combinations to be tested:
+# UTF-8 -> ISO8859-1
+# ISO8859-1 -> UTF-8
+
+if test "$supportspathencoding"
+then
+ srcencodings="ISO8859-1 UTF-8"
+ for srcenc in $srcencodings
+ do
+ case $srcenc in
+ ISO8859-1)
+ dstenc=UTF-8
+ ;;
+ UTF-8)
+ dstenc=ISO8859-1
+ ;;
+ UTF-8-MAC)
+ dstenc=UTF-8
+ ;;
+ *)
+ echo >&2 "Wrong encoding $srcenc"
+ exit 1
+ ;;
+ esac
+ eval fname_src=\$fname_$(echo $srcenc | sed -e 's/-/_/g' -e 's/_MAC//')
+ eval fname_dst=\$fname_$(echo $dstenc | sed -e 's/-/_/g')
+ test_expect_success "setup $srcenc" '
+ git checkout master &&
+ git config i18n.pathencoding $srcenc
+ '
+ add_file_dir_link $srcenc $fname_src
+
+ test_expect_success "setup $dstenc" '
+ git checkout master &&
+ echo "git checkout Master" >&2
+ ls -l >&2
+ git config i18n.pathencoding $dstenc
+ '
+
+ test_expect_success "checkout file $dstenc (was $srcenc)" '
+ git checkout add_f_$srcenc
+ '
+
+ test_expect_success "exists file $dstenc (was $srcenc)" '
+ test -f $fname_dst.f
+ '
+
+ test_expect_success "log file $dstenc (was $srcenc)" '
+ git log $fname_dst.f
+ '
+
+ test_expect_success "git mv" '
+ git checkout -b mv_file_$srcenc &&
+ git mv $fname_dst.f XX.f &&
+ git commit -m "git mv fname_dst.f XX.f"
+ '
+
+ test_expect_success "checkout dir $dstenc (was $srcenc)" '
+ git checkout add_d_$srcenc
+ '
+
+ test_expect_success "exist dir $dstenc (was $srcenc)" '
+ test -d $fname_dst.d
+ '
+
+ test_expect_success "log dir $dstenc (was $srcenc)" '
+ git log $fname_dst.d
+ '
+
+ i=0
+ for src in x $fname_dst; do
+ for dst in x $fname_dst; do
+ test_expect_success "checkout link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ git checkout add_l_${i}_$srcenc
+ '
+ test_expect_success "exist link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ test -L $dst.l
+ '
+ test_expect_success "log link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ git log $dst.l
+ '
+ test_expect_success "readlink $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ echo "$src.f" >expect &&
+ readlink "$dst.l" > actual &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+ i=$(($i+1))
+ done
+ done
+ done
+ # Make sure that Euro sign can NOT be checked out in 8859
+ #fname_src=Euro
+ test_expect_success "setup UTF-8" '
+ git checkout master &&
+ git config i18n.pathencoding UTF-8
+ '
+ add_file_dir_link Euro $Euro_utf8
+
+ test_expect_success "setup ISO8859-1" '
+ git checkout master &&
+ rm -rf * &&
+ git config i18n.pathencoding ISO8859-1
+ '
+ test_expect_success "checkout file Euro branch add_f_Euro" '
+ git checkout add_f_Euro
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+ test_expect_success "checkout dir Euro branch add_d_Euro" '
+ rm -rf * &&
+ test_must_fail git checkout add_d_Euro
+ '
+
+ test_expect_success "Cleanup" '
+ git config i18n.pathencoding UTF-8 &&
+ git checkout master &&
+ rm -rf * &&
+ git reset --hard &&
+ git config i18n.pathencoding ISO8859-1
+ '
+
+ test_expect_success "checkout link Euro.l->x.f branch add_l_1_Euro" '
+ ! git checkout add_l_1_Euro
+ '
+
+ test_expect_success "No link Euro.l->x.f" '
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+ test_expect_success "Cleanup after Euro.l->x.f" '
+ git config i18n.pathencoding UTF-8 &&
+ git checkout master &&
+ rm -rf * &&
+ git reset --hard &&
+ git config i18n.pathencoding ISO8859-1
+ '
+
+ # Checkoing out a soft link pointing to a filename outside
+ # 8859-1 should fail
+ test_expect_failure "checkout link x.l->Euro.f branch add_l_2_Euro" '
+ ! git checkout add_l_2_Euro
+ '
+
+ test_expect_success "No link x.f->Euro.l" '
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+ test_expect_success "Cleanup after link x.l->Euro.f branch" '
+ git config i18n.pathencoding UTF-8 &&
+ git checkout master &&
+ rm -rf * &&
+ git reset --hard &&
+ git config i18n.pathencoding ISO8859-1
+ '
+
+ test_expect_success "checkout link Euro.l->Euro.f branch add_l_3_Euro" '
+ ! git checkout add_l_3_Euro
+ '
+
+ test_expect_success "No link Euro.l->Euro.f" '
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+else
+ test_expect_success "setup 8859" '
+ git config i18n.pathencoding ISO8859-1 &&
+ git checkout -b add_file_8859 &&
+ > $fname_src.f &&
+ git add $fname_src.f &&
+ git commit -m "add fname_src" &&
+ git config i18n.pathencoding UTF-8 &&
+ rm -rf * &&
+ git reset --hard
+ '
+ test_expect_success "Silent support of pathencoding" '
+ test_must_fail test -f $fname_UTF_8.f
+ '
+fi
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index c110cbc..1590caa 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -233,7 +233,26 @@ static void wt_status_print_trailer(struct wt_status *s)
status_printf_ln(s, color(WT_STATUS_HEADER, s), "");
}
-#define quote_path quote_path_relative
+#ifdef PATH_ENCODING
+char *quote_path_repo2worktree(const char *in, int len,
+ struct strbuf *out, const char *prefix)
+{
+ const char *in_encoded = in;
+ char *in_worktree_encoded = str_repo2worktree(in);
+ char *ret;
+ (void)len;
+
+ if (in_worktree_encoded)
+ in_encoded = in_worktree_encoded;
+ ret = quote_path_relative(in_encoded , -1, out, prefix);
+ free(in_worktree_encoded);
+ return ret;
+
+}
+ #define quote_path quote_path_repo2worktree
+#else
+ #define quote_path quote_path_relative
+#endif
static void wt_status_print_unmerged_data(struct wt_status *s,
struct string_list_item *it)
--
1.7.12
next reply other threads:[~2012-09-01 6:12 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-01 6:11 Torsten Bögershausen [this message]
2012-09-02 22:59 ` [RFC] i18n.pathencoding Robin Rosenberg
2012-09-08 10:09 ` Torsten Bögershausen
2012-09-04 12:23 ` Nguyen Thai Ngoc Duy
2012-09-04 17:19 ` Junio C Hamano
2012-09-04 19:51 ` Torsten Bögershausen
2012-09-04 20:12 ` Junio C Hamano
2012-09-05 19:52 ` Torsten Bögershausen
2012-09-05 11:11 ` Nguyen Thai Ngoc Duy
2012-09-05 19:49 ` Torsten Bögershausen
2012-09-06 3:24 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201209010811.33994.tboegi@web.de \
--to=tboegi@web.de \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.