From: Michael J Gruber <git@drmicha.warpmail.net>
To: "Torsten Bögershausen" <totte.enea@gmail.com>
Cc: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
matthias.moeller@math.tu-dortmund.de, git@vger.kernel.org
Subject: Re: Git, Mac OS X and German special characters
Date: Thu, 20 May 2010 11:15:32 +0200 [thread overview]
Message-ID: <4BF4FDB4.2010409@drmicha.warpmail.net> (raw)
In-Reply-To: <4BF4FA89.2040904@gmail.com>
Torsten Bögershausen venit, vidit, dixit 20.05.2010 11:02:
> Hej,
> I have the same problem here.
> Below there is a patch, which may solve the problem.
> (Yes, whitespaces are broken. I'm still fighting with
> git format-patch -s --cover-letter -M --stdout origin/master | git
> imap-send)
> But this patch may be a start point for improvements.
> Comments welcome
> BR
> /Torsten
>
>
>
> Improved interwork between Mac OS X and linux when umlauts are used
> When a git repository containing utf-8 coded umlaut characters
> is cloned onto an Mac OS X machine, the Mac OS system will convert
> all filenames returned by readdir() into denormalized utf-8.
> As a result of this conversion, git will not find them on disk.
> This helps by treating the NFD and NFD version of filenames as
> identical on Mac OS.
>
>
>
>
>
>
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>
You signed off, but is Markus Kuhn's code from UCS GPL2-licensed?
Also, a few tests would be nice.
I remember we had threads on this issue in the past. I haven't checked
yet (Thunderbird pruned my nntp history), but it is worth checking that
you addressed any issues mentioned there.
I have no Mac so I can't test, sorry. Would be happy to run Mac OS in a
vm, but you know...
Thanks for looking into this!
Michael
> ---
> name-hash.c | 40 ++++++++++++++++++++++++++++++++++++++++
> utf8.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-------
> utf8.h | 11 +++++++++++
> 3 files changed, 99 insertions(+), 7 deletions(-)
>
> diff --git a/name-hash.c b/name-hash.c
> index 0031d78..e6494e8 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -7,6 +7,7 @@
> */
> #define NO_THE_INDEX_COMPATIBILITY_MACROS
> #include "cache.h"
> +#include "utf8.h"
>
> /*
> * This removes bit 5 if bit 6 is set.
> @@ -100,6 +101,25 @@ static int same_name(const struct cache_entry *ce,
> const char *name, int namelen
> return icase && slow_same_name(name, namelen, ce->name, len);
> }
>
> +#ifdef __APPLE__
> +struct cache_entry *index_name_exists2(struct index_state *istate,
> const char *name, int icase)
> +{
> + int namelen = (int)strlen(name);
> + unsigned int hash = hash_name(name, namelen);
> + struct cache_entry *ce;
> +
> + ce = lookup_hash(hash, &istate->name_hash);
> + while (ce) {
> + if (!(ce->ce_flags & CE_UNHASHED)) {
> + if (same_name(ce, name, namelen, icase))
> + return ce;
> + }
> + ce = ce->next;
> + }
> + return NULL;
> +}
> +#endif
> +
> struct cache_entry *index_name_exists(struct index_state *istate, const
> char *name, int namelen, int icase)
> {
> unsigned int hash = hash_name(name, namelen);
> @@ -115,5 +135,25 @@ struct cache_entry *index_name_exists(struct
> index_state *istate, const char *na
> }
> ce = ce->next;
> }
> +#ifdef __APPLE__
> + {
> + char *name_nfc_nfd;
> + name_nfc_nfd = str_nfc2nfd(name);
> + if (name_nfc_nfd) {
> + ce = index_name_exists2(istate, name_nfc_nfd, icase);
> + free(name_nfc_nfd);
> + if (ce)
> + return ce;
> + }
> + name_nfc_nfd = str_nfd2nfc(name);
> + if (name_nfc_nfd) {
> + ce = index_name_exists2(istate, name_nfc_nfd, icase);
> + free(name_nfc_nfd);
> + if (ce)
> + return ce;
> + }
> + }
> +#endif
> +
> return NULL;
> }
> diff --git a/utf8.c b/utf8.c
> index 84cfc72..8e794dc 100644
> --- a/utf8.c
> +++ b/utf8.c
> @@ -2,6 +2,11 @@
> #include "strbuf.h"
> #include "utf8.h"
>
> +#ifdef __APPLE__
> +static iconv_t my_iconv_nfd2nfc = (iconv_t) -1;
> +static iconv_t my_iconv_nfc2nfd = (iconv_t) -1;
> +#endif
> +
> /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */
>
> struct interval {
> @@ -424,18 +429,13 @@ int is_encoding_utf8(const char *name)
> #else
> typedef char * iconv_ibp;
> #endif
> -char *reencode_string(const char *in, const char *out_encoding, const
> char *in_encoding)
> +
> +char *reencode_string_iconv(const char *in, iconv_t conv)
> {
> - iconv_t conv;
> size_t insz, outsz, outalloc;
> char *out, *outpos;
> iconv_ibp cp;
>
> - if (!in_encoding)
> - return NULL;
> - conv = iconv_open(out_encoding, in_encoding);
> - if (conv == (iconv_t) -1)
> - return NULL;
> insz = strlen(in);
> outsz = insz;
> outalloc = outsz + 1; /* for terminating NUL */
> @@ -469,7 +469,48 @@ char *reencode_string(const char *in, const char
> *out_encoding, const char *in_e
> break;
> }
> }
> + return out;
> +}
> +
> +char *reencode_string(const char *in, const char *out_encoding, const
> char *in_encoding)
> +{
> + iconv_t conv;
> + char *out;
> +
> + if (!in_encoding)
> + return NULL;
> + conv = iconv_open(out_encoding, in_encoding);
> + if (conv == (iconv_t) -1)
> + return NULL;
> + out = reencode_string_iconv(in, conv);
> iconv_close(conv);
> return out;
> }
> +
> +#ifdef __APPLE__
> +char*
> +str_nfc2nfd(const char *in)
> +{
> + if (my_iconv_nfc2nfd == (iconv_t) -1) {
> + my_iconv_nfc2nfd = iconv_open("utf-8-mac", "utf-8");
> + if (my_iconv_nfc2nfd == (iconv_t) -1) {
> + return NULL;
> + }
> + }
> + return reencode_string_iconv(in, my_iconv_nfc2nfd);
> +}
> +
> +char*
> +str_nfd2nfc(const char *in)
> +{
> + if (my_iconv_nfd2nfc == (iconv_t) -1){
> + my_iconv_nfd2nfc = iconv_open("utf-8", "utf-8-mac");
> + if (my_iconv_nfd2nfc == (iconv_t) -1) {
> + return NULL;
> + }
> + }
> + return reencode_string_iconv(in, my_iconv_nfd2nfc);
> +}
> +#endif /* APPLE */
> +
> #endif
> diff --git a/utf8.h b/utf8.h
> index ebc4d2f..db29c8a 100644
> --- a/utf8.h
> +++ b/utf8.h
> @@ -13,8 +13,19 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
>
> #ifndef NO_ICONV
> char *reencode_string(const char *in, const char *out_encoding, const
> char *in_encoding);
> +char *reencode_string_iconv(const char *in, iconv_t conv);
> +#ifdef __APPLE__
> +char *str_nfc2nfd(const char *in);
> +char *str_nfd2nfc(const char *in);
> +#else
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> +#endif
> #else
> #define reencode_string(a,b,c) NULL
> +#define reencode_string2(a,b) NULL
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> #endif
>
> #endif
next prev parent reply other threads:[~2010-05-20 9:15 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-20 7:26 Git, Mac OS X and German special characters Matthias Moeller
2010-05-20 8:34 ` Ævar Arnfjörð Bjarmason
2010-05-20 8:50 ` Michael J Gruber
2010-05-20 8:57 ` demerphq
2010-05-20 9:02 ` Torsten Bögershausen
2010-05-20 9:15 ` Michael J Gruber [this message]
[not found] ` <4BF5294E.7060206@web.de>
2010-05-20 14:29 ` Michael J Gruber
2010-05-20 15:30 ` Jay Soffian
2010-05-20 15:50 ` Jay Soffian
2010-05-20 18:22 ` Jay Soffian
2010-05-20 9:16 ` Matthias Moeller
2010-05-20 10:38 ` Thomas Singer
2010-05-20 8:55 ` demerphq
-- strict thread matches above, loose matches on Subject: below --
2011-10-01 12:44 Albert Zeyer
2011-10-01 13:39 ` Andreas Ericsson
[not found] ` <CAO1Q+jeLEp2ReNc9eOFoJxdGq6oRE3b+O=JvMNU0Kqx_eAX=7w@mail.gmail.com>
2011-10-01 14:24 ` Andreas Ericsson
2011-10-01 19:47 ` Andreas Krey
2011-10-01 22:02 ` Michael Witten
2011-10-01 23:14 ` Jakub Narebski
2011-10-01 23:26 ` Michael Witten
2011-10-01 23:48 ` Albert Zeyer
2011-10-03 19:48 ` Torsten Bögershausen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BF4FDB4.2010409@drmicha.warpmail.net \
--to=git@drmicha.warpmail.net \
--cc=avarab@gmail.com \
--cc=git@vger.kernel.org \
--cc=matthias.moeller@math.tu-dortmund.de \
--cc=totte.enea@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.