From: "Torsten Bögershausen" <tboegi@web.de>
To: git@vger.kernel.org
Cc: tboegi@web.de
Subject: [RFC] i18n.pathencoding
Date: Sat, 1 Sep 2012 08:11:33 +0200 [thread overview]
Message-ID: <201209010811.33994.tboegi@web.de> (raw)
Allow path names to be encoded in UTF-8 in the repository
and checkout out as e.g. ISO-8859-1 in the working tree.
Introduce a config variable i18n.pathEncoding.
If empty, no re-encoding of path names is done.
Add t3911 to test encoding back and forth
The re-encoding is done in compat/reencode_pathname.c,
where all file system functions like open(), stat(),
readdir() are re-defined.
reencode_pathname.c includes all functionality from
precompose_utf8.c, which should be removed
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
Please read this as an RFC, so there several limitations:
compat/reencode_pathname.h defines struct dirent_psx with d_name[2].
This is done to test renc_pn_readdir() in compat/reencode_pathname.c
test case t1450 failes even on one of my linux machines. At first glance
it looks as the same failure which has been sometimes observed on Mac OS X.
compat/precompose_utf8.[ch] had been integrated into reencode_pathname.[ch],
and should be removed.
The patch should work on v7.1.12, it's not tested against latest master
Comments are welcome.
Documentation/config.txt | 10 +
Makefile | 11 +-
builtin/init-db.c | 3 +
cache.h | 1 +
compat/reencode_pathname.c | 441 ++++++++++++++++++++++++++++++++++++++++++
compat/reencode_pathname.h | 72 +++++++
config.c | 3 +
environment.c | 1 +
git-compat-util.h | 20 +-
parse-options.c | 2 +-
t/t3911-i18n-filename-8859.sh | 251 ++++++++++++++++++++++++
wt-status.c | 21 +-
12 files changed, 827 insertions(+), 9 deletions(-)
create mode 100644 compat/reencode_pathname.c
create mode 100644 compat/reencode_pathname.h
create mode 100755 t/t3911-i18n-filename-8859.sh
diff --git a/Documentation/config.txt b/Documentation/config.txt
index a95e5a4..d633d54 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1442,6 +1442,16 @@ i18n.logOutputEncoding::
Character encoding the commit messages are converted to when
running 'git log' and friends.
+i18n.pathEncoding::
+ This option is only used by some implementations of git.
+ When "git init" sets core.supportspathencoding to true,
+ i18n.pathEncoding can be set to re-encode path names when
+ a working tree is checked out.
+ Path names may be e.g. encoded in ISO-8859-1 and are stored as
+ UTF-8 encoded in the repository.
+ When not set, the encoding of path names is the same in working tree
+ and the repository.
+
imap::
The configuration variables in the 'imap' section are described
in linkgit:git-imap-send[1].
diff --git a/Makefile b/Makefile
index 6b0c961..141562e 100644
--- a/Makefile
+++ b/Makefile
@@ -143,6 +143,9 @@ all::
#
# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
#
+# Define PATH_ENCODING if the encoding of file names
+# differs from the encoding in the git repo
+#
# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
# Patrick Mauritz).
#
@@ -595,6 +598,7 @@ LIB_H += compat/bswap.h
LIB_H += compat/cygwin.h
LIB_H += compat/mingw.h
LIB_H += compat/obstack.h
+LIB_H += compat/reencode_pathname.h
LIB_H += compat/precompose_utf8.h
LIB_H += compat/terminal.h
LIB_H += compat/win32/dirent.h
@@ -932,6 +936,7 @@ ifeq ($(uname_S),OSF1)
NO_NSEC = YesPlease
endif
ifeq ($(uname_S),Linux)
+ PATH_ENCODING = YesPlease
NO_STRLCPY = YesPlease
NO_MKSTEMPS = YesPlease
HAVE_PATHS_H = YesPlease
@@ -999,7 +1004,7 @@ ifeq ($(uname_S),Darwin)
NO_MEMMEM = YesPlease
USE_ST_TIMESPEC = YesPlease
HAVE_DEV_TTY = YesPlease
- COMPAT_OBJS += compat/precompose_utf8.o
+ COMPAT_OBJS += compat/reencode_pathname.o
BASIC_CFLAGS += -DPRECOMPOSE_UNICODE
endif
ifeq ($(uname_S),SunOS)
@@ -1591,6 +1596,10 @@ ifdef FREAD_READS_DIRECTORIES
COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
COMPAT_OBJS += compat/fopen.o
endif
+ifdef PATH_ENCODING
+ COMPAT_CFLAGS += -DPATH_ENCODING
+ COMPAT_OBJS += compat/reencode_pathname.o
+endif
ifdef NO_SYMLINK_HEAD
BASIC_CFLAGS += -DNO_SYMLINK_HEAD
endif
diff --git a/builtin/init-db.c b/builtin/init-db.c
index 244fb7f..f159d43 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -291,6 +291,9 @@ static int create_default_files(const char *template_path)
if (!access(path, F_OK))
git_config_set("core.ignorecase", "true");
probe_utf8_pathname_composition(path, len);
+#ifdef PATH_ENCODING
+ git_config_set("core.supportspathencoding", "true");
+#endif
}
return reinit;
diff --git a/cache.h b/cache.h
index 67f28b4..8023767 100644
--- a/cache.h
+++ b/cache.h
@@ -1160,6 +1160,7 @@ extern int user_ident_sufficiently_given(void);
extern const char *git_commit_encoding;
extern const char *git_log_output_encoding;
extern const char *git_mailmap_file;
+extern const char *wt_path_encoding;
/* IO helper functions */
extern void maybe_flush_or_die(FILE *, const char *);
diff --git a/compat/reencode_pathname.c b/compat/reencode_pathname.c
new file mode 100644
index 0000000..3bdc776
--- /dev/null
+++ b/compat/reencode_pathname.c
@@ -0,0 +1,441 @@
+/*
+ * Converts pathnames from one encoding into another.
+ * The pathnames are stored as UTF-8 in the repository,
+ * and might be checkout out as e.g. ISO-8859-1 in the working tree
+ *
+ * On MacOS X decomposed unicode is converted into precomposed unicode.
+ */
+
+#define REENCODE_PATHNAME_C
+#include "cache.h"
+#include "utf8.h"
+#include "reencode_pathname.h"
+
+#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6))
+ typedef const char *iconv_ibp;
+#else
+ typedef char *iconv_ibp;
+#endif
+
+const static char *repo_path_encoding = "UTF-8";
+
+static iconv_t iconv_open_or_die(const char *tocode, const char *fromcode)
+{
+ iconv_t my_iconv;
+ my_iconv = iconv_open(tocode, fromcode);
+ if (my_iconv == (iconv_t) -1)
+ die_errno(_("iconv_open(%s,%s) failed"), tocode, fromcode);
+ return my_iconv;
+}
+
+static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
+{
+ const uint8_t *ptr = (const uint8_t *)s;
+ size_t strlen_chars = 0;
+ size_t ret = 0;
+
+ if (!ptr || !*ptr)
+ return 0;
+
+ while (*ptr && maxlen) {
+ if (*ptr & 0x80)
+ ret++;
+ strlen_chars++;
+ ptr++;
+ maxlen--;
+ }
+ if (strlen_c)
+ *strlen_c = strlen_chars;
+
+ return ret;
+}
+
+#ifdef PRECOMPOSE_UNICODE
+void probe_utf8_pathname_composition(char *path, int len)
+{
+ static const char *auml_nfc = "\xc3\xa4";
+ static const char *auml_nfd = "\x61\xcc\x88";
+ int output_fd;
+ if (precomposed_unicode != -1)
+ return; /* We found it defined in the global config, respect it */
+ strcpy(path + len, auml_nfc);
+ output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
+ if (output_fd >= 0) {
+ close(output_fd);
+ strcpy(path + len, auml_nfd);
+ /* Indicate to the user, that we can configure it to true */
+ if (!access(path, R_OK))
+ git_config_set("core.precomposeunicode", "false");
+ /* To be backward compatible, set precomposed_unicode to 0 */
+ precomposed_unicode = 0;
+ strcpy(path + len, auml_nfc);
+ if (unlink(path))
+ die_errno(_("failed to unlink '%s'"), path);
+ }
+}
+#endif
+
+void reencode_argv(int argc, const char **argv)
+{
+ int i = 0;
+ const char *oldarg;
+ char *newarg;
+ iconv_t ic_wt_to_repo;
+
+#ifdef PRECOMPOSE_UNICODE
+ if (precomposed_unicode == 1)
+ wt_path_encoding = "UTF-8-MAC";
+#endif
+
+ if (!wt_path_encoding || !*wt_path_encoding)
+ return;
+
+ ic_wt_to_repo = iconv_open_or_die(repo_path_encoding, wt_path_encoding);
+
+ while (i < argc) {
+ size_t namelen;
+ oldarg = argv[i];
+ if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
+ newarg = reencode_string_iconv(oldarg, namelen, ic_wt_to_repo);
+ if (newarg)
+ argv[i] = newarg;
+ }
+ i++;
+ }
+ iconv_close(ic_wt_to_repo);
+}
+
+#ifdef PATH_ENCODING
+char *str_repo2worktree(const char *in)
+{
+ int olderrno = errno;
+ char *retvalue = NULL;
+ size_t inlen;
+
+ if (!wt_path_encoding || !*wt_path_encoding)
+ return NULL;
+
+ if (!in)
+ die("str_repo2worktree in == NULL\n");
+
+ if (has_non_ascii(in, (size_t)-1, &inlen)) {
+ iconv_t my_iconv_repo2worktree;
+ my_iconv_repo2worktree = iconv_open_or_die(wt_path_encoding,
+ repo_path_encoding);
+
+ retvalue = reencode_string_iconv(in, inlen, my_iconv_repo2worktree);
+ iconv_close(my_iconv_repo2worktree);
+ if (retvalue)
+ errno = olderrno;
+ } else
+ errno = olderrno;
+
+ return retvalue;
+}
+
+char *str_worktree2repolen(const char *in, size_t insz)
+{
+ char *retvalue = NULL;
+ size_t inlen;
+ if (!wt_path_encoding || !*wt_path_encoding)
+ return NULL;
+
+ if (has_non_ascii(in, insz, &inlen)) {
+ int olderrno = errno;
+ iconv_t my_iconv_worktree2repo;
+ my_iconv_worktree2repo = iconv_open_or_die(repo_path_encoding,
+ wt_path_encoding);
+ retvalue = reencode_string_iconv(in, insz, my_iconv_worktree2repo);
+ iconv_close(my_iconv_worktree2repo);
+ if (retvalue)
+ errno = olderrno;
+ }
+ return retvalue;
+}
+
+char *str_worktree2repo(const char *in)
+{
+ return str_worktree2repolen(in, strlen(in));
+}
+#endif
+
+#define RENC_PN_DECL_SAVERRNO_PATH1(path) \
+ int olderrno = errno; \
+ const char *path1_enc = path; \
+ char *path1_malloc_wt_encoded = NULL
+
+#define RENC_PN_DECL_PATH2(path) \
+ const char *path2_enc = path; \
+ char *path2_malloc_wt_encoded = NULL
+
+
+#define RENC_PN_CONV_PATH1(path, erroret) \
+ errno=0; \
+ path1_malloc_wt_encoded = str_repo2worktree(path); \
+ if (!path1_malloc_wt_encoded && errno) { \
+ return erroret; \
+ } \
+ if (path1_malloc_wt_encoded) \
+ path1_enc = path1_malloc_wt_encoded; \
+ errno = olderrno;
+
+#define RENC_PN_CONV_PATH2(path) \
+ errno=0; \
+ path2_malloc_wt_encoded = str_repo2worktree(path); \
+ if (!path2_malloc_wt_encoded && errno) { \
+ return -1; \
+ } \
+ if (path2_malloc_wt_encoded) \
+ path2_enc = path2_malloc_wt_encoded; \
+ errno = olderrno;
+
+
+RENC_FN_DIR *renc_pn_opendir(const char *dirname)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(dirname);
+ RENC_FN_DIR *renc_pn_dir = xmalloc(sizeof(RENC_FN_DIR));
+
+#ifdef PRECOMPOSE_UNICODE
+ if (precomposed_unicode == 1)
+ wt_path_encoding = "UTF-8-MAC";
+#endif
+
+ renc_pn_dir->dirent_utf8 = xmalloc(sizeof(dirent_psx));
+ renc_pn_dir->dirent_utf8->max_name_len = sizeof(renc_pn_dir->dirent_utf8->d_name);
+
+ RENC_PN_CONV_PATH1(dirname, NULL);
+
+ renc_pn_dir->dirp = opendir(path1_enc);
+ olderrno = errno;
+ if (!renc_pn_dir->dirp) {
+ free(path1_malloc_wt_encoded);
+ free(renc_pn_dir->dirent_utf8);
+ free(renc_pn_dir);
+ return NULL;
+ } else
+ renc_pn_dir->ic_wt_to_repo = (iconv_t)-1;
+
+ free(path1_malloc_wt_encoded);
+ errno = olderrno;
+ return renc_pn_dir;
+}
+
+struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *renc_pn_dir)
+{
+ struct dirent *res;
+ res = readdir(renc_pn_dir->dirp);
+ if (res) {
+ size_t namelenz = strlen(res->d_name) + 1; /* \0 */
+ size_t new_len_needed = 0;
+ int ret_errno = errno;
+
+ renc_pn_dir->dirent_utf8->d_ino = res->d_ino;
+ renc_pn_dir->dirent_utf8->d_type = res->d_type;
+ do {
+ if (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len) {
+ size_t new_len = sizeof(dirent_psx) + new_len_needed -
+ sizeof(renc_pn_dir->dirent_utf8->d_name);
+
+ renc_pn_dir->dirent_utf8 = xrealloc(renc_pn_dir->dirent_utf8, new_len);
+ renc_pn_dir->dirent_utf8->max_name_len = new_len_needed;
+ }
+
+ if (wt_path_encoding && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
+ iconv_ibp cp = (iconv_ibp)res->d_name;
+ size_t inleft = namelenz;
+ char *outpos = &renc_pn_dir->dirent_utf8->d_name[0];
+ size_t outsz = renc_pn_dir->dirent_utf8->max_name_len;
+ errno = 0;
+ if (renc_pn_dir->ic_wt_to_repo == (iconv_t)-1)
+ renc_pn_dir->ic_wt_to_repo = iconv_open_or_die(repo_path_encoding,
+ wt_path_encoding);
+ if (-1 != iconv(renc_pn_dir->ic_wt_to_repo,
+ &cp, &inleft, &outpos, &outsz))
+ break; /* Conversion OK, we are done */
+ if (errno == E2BIG) {
+ char *tmp = reencode_string_iconv(res->d_name, namelenz,
+ renc_pn_dir->ic_wt_to_repo);
+ if (tmp) {
+ new_len_needed = strlen(tmp) + 1; /* \0 */
+ free(tmp);
+ }
+ } else {
+ /*
+ * iconv() failed and errno could be EILSEQ, EINVAL, EBADF
+ * In general we avoid illegal byte sequences.
+ * If they occur on a mounted drive (e.g. NFS) it is not worth to
+ * die() for that, but rather let the user see the original name
+ */
+ namelenz = 0; /* trigger strlcpy */
+ }
+ } else {
+ if (namelenz > renc_pn_dir->dirent_utf8->max_name_len)
+ new_len_needed = namelenz; /* need to re-allocate */
+ else
+ namelenz = 0; /* trigger strlcpy */
+ }
+ } while (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len);
+
+ if (!namelenz)
+ strlcpy(renc_pn_dir->dirent_utf8->d_name, res->d_name,
+ renc_pn_dir->dirent_utf8->max_name_len);
+
+ errno = ret_errno;
+ return renc_pn_dir->dirent_utf8;
+ }
+ return NULL;
+}
+
+int renc_pn_closedir(RENC_FN_DIR *renc_pn_dir)
+{
+ int ret_value;
+ int ret_errno;
+ ret_value = closedir(renc_pn_dir->dirp);
+ ret_errno = errno;
+ if (renc_pn_dir->ic_wt_to_repo != (iconv_t)-1)
+ iconv_close(renc_pn_dir->ic_wt_to_repo);
+ free(renc_pn_dir->dirent_utf8);
+ free(renc_pn_dir);
+ errno = ret_errno;
+ return ret_value;
+}
+
+int renc_pn_mkdir(const char *path, mode_t mode)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = mkdir(path1_enc, mode);
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_lstat(const char *path, struct stat *buf)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = lstat(path1_enc, buf);
+
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_stat(const char *path, struct stat *buf)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = stat(path1_enc, buf);
+
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_open(const char *path, int oflag, ... )
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ va_list params;
+ int mode;
+ int ret;
+
+ va_start(params, oflag);
+ mode = va_arg(params, int);
+ va_end(params);
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = open(path1_enc, oflag, mode);
+
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_unlink(const char *path)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ int ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = unlink(path1_enc);
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+FILE *renc_pn_fopen(const char *path, const char *mode)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ FILE *ret;
+
+ RENC_PN_CONV_PATH1(path,NULL);
+
+ ret = fopen(path1_enc,mode);
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+
+ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(path);
+ ssize_t ret;
+
+ RENC_PN_CONV_PATH1(path, -1);
+
+ ret = readlink(path1_enc, buf, bufsiz);
+
+ if (ret > 0) {
+ char *new_buf = NULL;
+ errno = 0;
+ new_buf = str_worktree2repolen(buf, ret);
+ if (new_buf) {
+ size_t newlen = strlen(new_buf);
+ if (newlen > bufsiz)
+ newlen = bufsiz;
+ memcpy(buf, new_buf, newlen);
+ ret = newlen;
+ free(new_buf);
+ } else if (!errno)
+ errno = olderrno;
+ }
+ free(path1_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_symlink(const char *oldname, const char *newname)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(oldname);
+ RENC_PN_DECL_PATH2(newname);
+ int ret;
+
+ RENC_PN_CONV_PATH1(oldname, -1);
+ RENC_PN_CONV_PATH2(newname);
+
+ ret = symlink(path1_enc, path2_enc);
+ free(path1_malloc_wt_encoded);
+ free(path2_malloc_wt_encoded);
+ return ret;
+}
+
+int renc_pn_rename(const char *oldname, const char *newname)
+{
+ RENC_PN_DECL_SAVERRNO_PATH1(oldname);
+ RENC_PN_DECL_PATH2(newname);
+ int ret;
+
+ RENC_PN_CONV_PATH1(oldname, -1);
+ RENC_PN_CONV_PATH2(newname);
+
+ ret = rename(path1_enc, path2_enc);
+ free(path1_malloc_wt_encoded);
+ free(path2_malloc_wt_encoded);
+
+ return ret;
+}
diff --git a/compat/reencode_pathname.h b/compat/reencode_pathname.h
new file mode 100644
index 0000000..9300ba4
--- /dev/null
+++ b/compat/reencode_pathname.h
@@ -0,0 +1,70 @@
+#ifndef REENCODE_PATHNAME_H
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <iconv.h>
+
+typedef struct dirent_psx {
+ ino_t d_ino; /* Posix */
+ size_t max_name_len; /* See below */
+ unsigned char d_type; /* available on all systems git runs on */
+
+ /*
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html
+ * NAME_MAX + 1 should be enough, but some systems have
+ * NAME_MAX=255 and strlen(d_name) may return 508 or 510
+ * Solution: allocate more when needed, see renc_pn_readdir()
+ */
+ char d_name[/* NAME_MAX */ 1+1];
+} dirent_psx;
+
+typedef struct {
+ iconv_t ic_wt_to_repo;
+ DIR *dirp;
+ struct dirent_psx *dirent_utf8;
+} RENC_FN_DIR;
+
+void reencode_argv(int argc, const char **argv);
+void probe_utf8_pathname_composition(char *, int);
+
+RENC_FN_DIR *renc_pn_opendir(const char *dirname);
+struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *dirp);
+int renc_pn_closedir(RENC_FN_DIR *dirp);
+
+#ifdef PATH_ENCODING
+char* str_repo2worktree(const char *in);
+int renc_pn_mkdir(const char *path, mode_t mode);
+int renc_pn_lstat(const char *path, struct stat *buf);
+int renc_pn_stat(const char *path, struct stat *buf);
+int renc_pn_open(const char *path, int oflag, ... );
+int renc_pn_unlink(const char *path);
+FILE *renc_pn_fopen(const char *path, const char *mode);
+ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz);
+int renc_pn_symlink(const char *oldname, const char *newname);
+int renc_pn_rename(const char *oldname, const char *newname);
+#endif
+
+#ifndef REENCODE_PATHNAME_C
+#define opendir(n) renc_pn_opendir(n)
+#define readdir(d) renc_pn_readdir(d)
+#define closedir(d) renc_pn_closedir(d)
+#define dirent dirent_psx
+#define DIR RENC_FN_DIR
+
+#ifdef PATH_ENCODING
+#define mkdir(a,b) renc_pn_mkdir((a),(b))
+#define lstat(a,b) renc_pn_lstat((a),(b))
+#define stat(a,b) renc_pn_stat((a),(b))
+#define open renc_pn_open
+#define unlink renc_pn_unlink
+#define fopen(a,b) renc_pn_fopen((a),(b))
+#define readlink(a,b,c) renc_pn_readlink(a,b,c)
+#define symlink(a,b) renc_pn_symlink(a,b)
+#define rename(a,b) renc_pn_rename(a,b)
+#endif
+
+#endif /* REENCODE_PATHNAME_C */
+#define REENCODE_PATHNAME_H
+#endif /* REENCODE_PATHNAME_H */
diff --git a/config.c b/config.c
index 2b706ea..d591c09 100644
--- a/config.c
+++ b/config.c
@@ -775,6 +775,9 @@ static int git_default_i18n_config(const char *var, const char *value)
if (!strcmp(var, "i18n.logoutputencoding"))
return git_config_string(&git_log_output_encoding, var, value);
+ if (!strcmp(var, "i18n.pathencoding"))
+ return git_config_string(&wt_path_encoding, var, value);
+
/* Add other config variables here and to Documentation/config.txt. */
return 0;
}
diff --git a/environment.c b/environment.c
index 85edd7f..ba81575 100644
--- a/environment.c
+++ b/environment.c
@@ -59,6 +59,7 @@ int grafts_replace_parents = 1;
int core_apply_sparse_checkout;
int merge_log_config = -1;
int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */
+const char *wt_path_encoding = NULL;
struct startup_info *startup_info;
unsigned long pack_size_limit_cfg;
diff --git a/git-compat-util.h b/git-compat-util.h
index 35b095e..877b060 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -153,13 +153,21 @@
#endif
#endif
-/* used on Mac OS X */
-#ifdef PRECOMPOSE_UNICODE
-#include "compat/precompose_utf8.h"
+#if defined(PATH_ENCODING) || defined(PRECOMPOSE_UNICODE)
+#include "compat/reencode_pathname.h"
#else
-#define precompose_str(in,i_nfd2nfc)
-#define precompose_argv(c,v)
-#define probe_utf8_pathname_composition(a,b)
+#define reencode_argv(c,v)
+#endif
+
+/* needed for Mac OS X */
+#ifndef PRECOMPOSE_UNICODE
+#define probe_utf8_pathname_composition(a,b);
+#endif
+
+#ifndef PATH_ENCODING
+#define str_worktree2repolen(in, insz) (NULL)
+#define str_repo2worktree(in) (NULL)
+#define str_worktree2repo(in) (NULL)
#endif
#ifndef NO_LIBGEN_H
diff --git a/parse-options.c b/parse-options.c
index c1c66bd..5840c18 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -476,7 +476,7 @@ int parse_options(int argc, const char **argv, const char *prefix,
usage_with_options(usagestr, options);
}
- precompose_argv(argc, argv);
+ reencode_argv(argc, argv);
return parse_options_end(&ctx);
}
diff --git a/t/t3911-i18n-filename-8859.sh b/t/t3911-i18n-filename-8859.sh
new file mode 100755
index 0000000..aa2be57
--- /dev/null
+++ b/t/t3911-i18n-filename-8859.sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# Copyright (c) 2010 Torsten Bögershausen
+#
+
+test_description='file system encodings UTF-8 ISO8859-1'
+
+. ./test-lib.sh
+
+fname_UTF_8=`printf '\303\206\302\242'`
+fname_ISO8859_1=`printf '\306\242'`
+Euro_utf8=`printf '\342\202\254'`
+supportspathencoding=`git config core.supportspathencoding` || :
+
+
+add_file_dir_link() {
+ local bname=$1
+ local fname=$2
+ test_expect_success "add file $fname.f $bname" '
+ git checkout master &&
+ git checkout -b add_f_$bname &&
+ >$fname.f &&
+ git add $fname.f &&
+ git commit -m "add fname"
+ '
+
+ test_expect_success "add dir $fname.d $bname" '
+ git checkout master &&
+ git checkout -b add_d_$bname &&
+ mkdir $fname.d &&
+ touch $fname.d/$fname.f &&
+ git add $fname.d/$fname.f &&
+ git commit -m "add fname.d/fname"
+ '
+
+ i=0
+ for src in x $fname; do
+ for dst in x $fname; do
+ test_expect_success "add link $dst.l->$src.f on branch add_l_${i}_$bname" '
+ git checkout master &&
+ git checkout -b add_l_${i}_$bname &&
+ ln -s $src.f $dst.l &&
+ git add $dst.l &&
+ git commit -m "add fname.l $i"
+ '
+ i=$(($i+1))
+ done
+ done
+}
+
+test_expect_success "setup add rm x" '
+ >x &&
+ git add x &&
+ git commit -m "1st commit" &&
+ git rm x &&
+ git commit -m "rm x"
+'
+
+#combinations to be tested:
+# UTF-8 -> ISO8859-1
+# ISO8859-1 -> UTF-8
+
+if test "$supportspathencoding"
+then
+ srcencodings="ISO8859-1 UTF-8"
+ for srcenc in $srcencodings
+ do
+ case $srcenc in
+ ISO8859-1)
+ dstenc=UTF-8
+ ;;
+ UTF-8)
+ dstenc=ISO8859-1
+ ;;
+ UTF-8-MAC)
+ dstenc=UTF-8
+ ;;
+ *)
+ echo >&2 "Wrong encoding $srcenc"
+ exit 1
+ ;;
+ esac
+ eval fname_src=\$fname_$(echo $srcenc | sed -e 's/-/_/g' -e 's/_MAC//')
+ eval fname_dst=\$fname_$(echo $dstenc | sed -e 's/-/_/g')
+ test_expect_success "setup $srcenc" '
+ git checkout master &&
+ git config i18n.pathencoding $srcenc
+ '
+ add_file_dir_link $srcenc $fname_src
+
+ test_expect_success "setup $dstenc" '
+ git checkout master &&
+ echo "git checkout Master" >&2
+ ls -l >&2
+ git config i18n.pathencoding $dstenc
+ '
+
+ test_expect_success "checkout file $dstenc (was $srcenc)" '
+ git checkout add_f_$srcenc
+ '
+
+ test_expect_success "exists file $dstenc (was $srcenc)" '
+ test -f $fname_dst.f
+ '
+
+ test_expect_success "log file $dstenc (was $srcenc)" '
+ git log $fname_dst.f
+ '
+
+ test_expect_success "git mv" '
+ git checkout -b mv_file_$srcenc &&
+ git mv $fname_dst.f XX.f &&
+ git commit -m "git mv fname_dst.f XX.f"
+ '
+
+ test_expect_success "checkout dir $dstenc (was $srcenc)" '
+ git checkout add_d_$srcenc
+ '
+
+ test_expect_success "exist dir $dstenc (was $srcenc)" '
+ test -d $fname_dst.d
+ '
+
+ test_expect_success "log dir $dstenc (was $srcenc)" '
+ git log $fname_dst.d
+ '
+
+ i=0
+ for src in x $fname_dst; do
+ for dst in x $fname_dst; do
+ test_expect_success "checkout link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ git checkout add_l_${i}_$srcenc
+ '
+ test_expect_success "exist link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ test -L $dst.l
+ '
+ test_expect_success "log link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ git log $dst.l
+ '
+ test_expect_success "readlink $dst.l->$src.f branch add_l_${i}_$srcenc" '
+ echo "$src.f" >expect &&
+ readlink "$dst.l" > actual &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+ i=$(($i+1))
+ done
+ done
+ done
+ # Make sure that Euro sign can NOT be checked out in 8859
+ #fname_src=Euro
+ test_expect_success "setup UTF-8" '
+ git checkout master &&
+ git config i18n.pathencoding UTF-8
+ '
+ add_file_dir_link Euro $Euro_utf8
+
+ test_expect_success "setup ISO8859-1" '
+ git checkout master &&
+ rm -rf * &&
+ git config i18n.pathencoding ISO8859-1
+ '
+ test_expect_success "checkout file Euro branch add_f_Euro" '
+ git checkout add_f_Euro
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+ test_expect_success "checkout dir Euro branch add_d_Euro" '
+ rm -rf * &&
+ test_must_fail git checkout add_d_Euro
+ '
+
+ test_expect_success "Cleanup" '
+ git config i18n.pathencoding UTF-8 &&
+ git checkout master &&
+ rm -rf * &&
+ git reset --hard &&
+ git config i18n.pathencoding ISO8859-1
+ '
+
+ test_expect_success "checkout link Euro.l->x.f branch add_l_1_Euro" '
+ ! git checkout add_l_1_Euro
+ '
+
+ test_expect_success "No link Euro.l->x.f" '
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+ test_expect_success "Cleanup after Euro.l->x.f" '
+ git config i18n.pathencoding UTF-8 &&
+ git checkout master &&
+ rm -rf * &&
+ git reset --hard &&
+ git config i18n.pathencoding ISO8859-1
+ '
+
+ # Checkoing out a soft link pointing to a filename outside
+ # 8859-1 should fail
+ test_expect_failure "checkout link x.l->Euro.f branch add_l_2_Euro" '
+ ! git checkout add_l_2_Euro
+ '
+
+ test_expect_success "No link x.f->Euro.l" '
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+ test_expect_success "Cleanup after link x.l->Euro.f branch" '
+ git config i18n.pathencoding UTF-8 &&
+ git checkout master &&
+ rm -rf * &&
+ git reset --hard &&
+ git config i18n.pathencoding ISO8859-1
+ '
+
+ test_expect_success "checkout link Euro.l->Euro.f branch add_l_3_Euro" '
+ ! git checkout add_l_3_Euro
+ '
+
+ test_expect_success "No link Euro.l->Euro.f" '
+ echo * >actual &&
+ echo "*" >expect &&
+ test_cmp expect actual &&
+ rm expect actual
+ '
+
+else
+ test_expect_success "setup 8859" '
+ git config i18n.pathencoding ISO8859-1 &&
+ git checkout -b add_file_8859 &&
+ > $fname_src.f &&
+ git add $fname_src.f &&
+ git commit -m "add fname_src" &&
+ git config i18n.pathencoding UTF-8 &&
+ rm -rf * &&
+ git reset --hard
+ '
+ test_expect_success "Silent support of pathencoding" '
+ test_must_fail test -f $fname_UTF_8.f
+ '
+fi
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index c110cbc..1590caa 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -233,7 +233,26 @@ static void wt_status_print_trailer(struct wt_status *s)
status_printf_ln(s, color(WT_STATUS_HEADER, s), "");
}
-#define quote_path quote_path_relative
+#ifdef PATH_ENCODING
+char *quote_path_repo2worktree(const char *in, int len,
+ struct strbuf *out, const char *prefix)
+{
+ const char *in_encoded = in;
+ char *in_worktree_encoded = str_repo2worktree(in);
+ char *ret;
+ (void)len;
+
+ if (in_worktree_encoded)
+ in_encoded = in_worktree_encoded;
+ ret = quote_path_relative(in_encoded , -1, out, prefix);
+ free(in_worktree_encoded);
+ return ret;
+
+}
+ #define quote_path quote_path_repo2worktree
+#else
+ #define quote_path quote_path_relative
+#endif
static void wt_status_print_unmerged_data(struct wt_status *s,
struct string_list_item *it)
--
1.7.12
next reply other threads:[~2012-09-01 6:12 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-01 6:11 Torsten Bögershausen [this message]
2012-09-02 22:59 ` [RFC] i18n.pathencoding Robin Rosenberg
2012-09-08 10:09 ` Torsten Bögershausen
2012-09-04 12:23 ` Nguyen Thai Ngoc Duy
2012-09-04 17:19 ` Junio C Hamano
2012-09-04 19:51 ` Torsten Bögershausen
2012-09-04 20:12 ` Junio C Hamano
2012-09-05 19:52 ` Torsten Bögershausen
2012-09-05 11:11 ` Nguyen Thai Ngoc Duy
2012-09-05 19:49 ` Torsten Bögershausen
2012-09-06 3:24 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201209010811.33994.tboegi@web.de \
--to=tboegi@web.de \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).