From: Michael J Gruber <git@drmicha.warpmail.net>
To: "Torsten Bögershausen" <totte.enea@gmail.com>
Cc: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
matthias.moeller@math.tu-dortmund.de, git@vger.kernel.org
Subject: Re: Git, Mac OS X and German special characters
Date: Thu, 20 May 2010 11:15:32 +0200 [thread overview]
Message-ID: <4BF4FDB4.2010409@drmicha.warpmail.net> (raw)
In-Reply-To: <4BF4FA89.2040904@gmail.com>
Torsten Bögershausen venit, vidit, dixit 20.05.2010 11:02:
> Hej,
> I have the same problem here.
> Below there is a patch, which may solve the problem.
> (Yes, whitespaces are broken. I'm still fighting with
> git format-patch -s --cover-letter -M --stdout origin/master | git
> imap-send)
> But this patch may be a start point for improvements.
> Comments welcome
> BR
> /Torsten
>
>
>
> Improved interwork between Mac OS X and linux when umlauts are used
> When a git repository containing utf-8 coded umlaut characters
> is cloned onto an Mac OS X machine, the Mac OS system will convert
> all filenames returned by readdir() into denormalized utf-8.
> As a result of this conversion, git will not find them on disk.
> This helps by treating the NFD and NFD version of filenames as
> identical on Mac OS.
>
>
>
>
>
>
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>
You signed off, but is Markus Kuhn's code from UCS GPL2-licensed?
Also, a few tests would be nice.
I remember we had threads on this issue in the past. I haven't checked
yet (Thunderbird pruned my nntp history), but it is worth checking that
you addressed any issues mentioned there.
I have no Mac so I can't test, sorry. Would be happy to run Mac OS in a
vm, but you know...
Thanks for looking into this!
Michael
> ---
> name-hash.c | 40 ++++++++++++++++++++++++++++++++++++++++
> utf8.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-------
> utf8.h | 11 +++++++++++
> 3 files changed, 99 insertions(+), 7 deletions(-)
>
> diff --git a/name-hash.c b/name-hash.c
> index 0031d78..e6494e8 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -7,6 +7,7 @@
> */
> #define NO_THE_INDEX_COMPATIBILITY_MACROS
> #include "cache.h"
> +#include "utf8.h"
>
> /*
> * This removes bit 5 if bit 6 is set.
> @@ -100,6 +101,25 @@ static int same_name(const struct cache_entry *ce,
> const char *name, int namelen
> return icase && slow_same_name(name, namelen, ce->name, len);
> }
>
> +#ifdef __APPLE__
> +struct cache_entry *index_name_exists2(struct index_state *istate,
> const char *name, int icase)
> +{
> + int namelen = (int)strlen(name);
> + unsigned int hash = hash_name(name, namelen);
> + struct cache_entry *ce;
> +
> + ce = lookup_hash(hash, &istate->name_hash);
> + while (ce) {
> + if (!(ce->ce_flags & CE_UNHASHED)) {
> + if (same_name(ce, name, namelen, icase))
> + return ce;
> + }
> + ce = ce->next;
> + }
> + return NULL;
> +}
> +#endif
> +
> struct cache_entry *index_name_exists(struct index_state *istate, const
> char *name, int namelen, int icase)
> {
> unsigned int hash = hash_name(name, namelen);
> @@ -115,5 +135,25 @@ struct cache_entry *index_name_exists(struct
> index_state *istate, const char *na
> }
> ce = ce->next;
> }
> +#ifdef __APPLE__
> + {
> + char *name_nfc_nfd;
> + name_nfc_nfd = str_nfc2nfd(name);
> + if (name_nfc_nfd) {
> + ce = index_name_exists2(istate, name_nfc_nfd, icase);
> + free(name_nfc_nfd);
> + if (ce)
> + return ce;
> + }
> + name_nfc_nfd = str_nfd2nfc(name);
> + if (name_nfc_nfd) {
> + ce = index_name_exists2(istate, name_nfc_nfd, icase);
> + free(name_nfc_nfd);
> + if (ce)
> + return ce;
> + }
> + }
> +#endif
> +
> return NULL;
> }
> diff --git a/utf8.c b/utf8.c
> index 84cfc72..8e794dc 100644
> --- a/utf8.c
> +++ b/utf8.c
> @@ -2,6 +2,11 @@
> #include "strbuf.h"
> #include "utf8.h"
>
> +#ifdef __APPLE__
> +static iconv_t my_iconv_nfd2nfc = (iconv_t) -1;
> +static iconv_t my_iconv_nfc2nfd = (iconv_t) -1;
> +#endif
> +
> /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */
>
> struct interval {
> @@ -424,18 +429,13 @@ int is_encoding_utf8(const char *name)
> #else
> typedef char * iconv_ibp;
> #endif
> -char *reencode_string(const char *in, const char *out_encoding, const
> char *in_encoding)
> +
> +char *reencode_string_iconv(const char *in, iconv_t conv)
> {
> - iconv_t conv;
> size_t insz, outsz, outalloc;
> char *out, *outpos;
> iconv_ibp cp;
>
> - if (!in_encoding)
> - return NULL;
> - conv = iconv_open(out_encoding, in_encoding);
> - if (conv == (iconv_t) -1)
> - return NULL;
> insz = strlen(in);
> outsz = insz;
> outalloc = outsz + 1; /* for terminating NUL */
> @@ -469,7 +469,48 @@ char *reencode_string(const char *in, const char
> *out_encoding, const char *in_e
> break;
> }
> }
> + return out;
> +}
> +
> +char *reencode_string(const char *in, const char *out_encoding, const
> char *in_encoding)
> +{
> + iconv_t conv;
> + char *out;
> +
> + if (!in_encoding)
> + return NULL;
> + conv = iconv_open(out_encoding, in_encoding);
> + if (conv == (iconv_t) -1)
> + return NULL;
> + out = reencode_string_iconv(in, conv);
> iconv_close(conv);
> return out;
> }
> +
> +#ifdef __APPLE__
> +char*
> +str_nfc2nfd(const char *in)
> +{
> + if (my_iconv_nfc2nfd == (iconv_t) -1) {
> + my_iconv_nfc2nfd = iconv_open("utf-8-mac", "utf-8");
> + if (my_iconv_nfc2nfd == (iconv_t) -1) {
> + return NULL;
> + }
> + }
> + return reencode_string_iconv(in, my_iconv_nfc2nfd);
> +}
> +
> +char*
> +str_nfd2nfc(const char *in)
> +{
> + if (my_iconv_nfd2nfc == (iconv_t) -1){
> + my_iconv_nfd2nfc = iconv_open("utf-8", "utf-8-mac");
> + if (my_iconv_nfd2nfc == (iconv_t) -1) {
> + return NULL;
> + }
> + }
> + return reencode_string_iconv(in, my_iconv_nfd2nfc);
> +}
> +#endif /* APPLE */
> +
> #endif
> diff --git a/utf8.h b/utf8.h
> index ebc4d2f..db29c8a 100644
> --- a/utf8.h
> +++ b/utf8.h
> @@ -13,8 +13,19 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
>
> #ifndef NO_ICONV
> char *reencode_string(const char *in, const char *out_encoding, const
> char *in_encoding);
> +char *reencode_string_iconv(const char *in, iconv_t conv);
> +#ifdef __APPLE__
> +char *str_nfc2nfd(const char *in);
> +char *str_nfd2nfc(const char *in);
> +#else
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> +#endif
> #else
> #define reencode_string(a,b,c) NULL
> +#define reencode_string2(a,b) NULL
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> #endif
>
> #endif
next prev parent reply other threads:[~2010-05-20 9:15 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-20 7:26 Git, Mac OS X and German special characters Matthias Moeller
2010-05-20 8:34 ` Ævar Arnfjörð Bjarmason
2010-05-20 8:50 ` Michael J Gruber
2010-05-20 8:57 ` demerphq
2010-05-20 9:02 ` Torsten Bögershausen
2010-05-20 9:15 ` Michael J Gruber [this message]
[not found] ` <4BF5294E.7060206@web.de>
2010-05-20 14:29 ` Michael J Gruber
2010-05-20 15:30 ` Jay Soffian
2010-05-20 15:50 ` Jay Soffian
2010-05-20 18:22 ` Jay Soffian
2010-05-20 9:16 ` Matthias Moeller
2010-05-20 10:38 ` Thomas Singer
2010-05-20 8:55 ` demerphq
-- strict thread matches above, loose matches on Subject: below --
2011-10-01 12:44 Albert Zeyer
2011-10-01 13:39 ` Andreas Ericsson
[not found] ` <CAO1Q+jeLEp2ReNc9eOFoJxdGq6oRE3b+O=JvMNU0Kqx_eAX=7w@mail.gmail.com>
2011-10-01 14:24 ` Andreas Ericsson
2011-10-01 19:47 ` Andreas Krey
2011-10-01 22:02 ` Michael Witten
2011-10-01 23:14 ` Jakub Narebski
2011-10-01 23:26 ` Michael Witten
2011-10-01 23:48 ` Albert Zeyer
2011-10-03 19:48 ` Torsten Bögershausen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BF4FDB4.2010409@drmicha.warpmail.net \
--to=git@drmicha.warpmail.net \
--cc=avarab@gmail.com \
--cc=git@vger.kernel.org \
--cc=matthias.moeller@math.tu-dortmund.de \
--cc=totte.enea@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).