git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michael J Gruber <git@drmicha.warpmail.net>
To: "Torsten Bögershausen" <totte.enea@gmail.com>
Cc: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	matthias.moeller@math.tu-dortmund.de, git@vger.kernel.org
Subject: Re: Git, Mac OS X and German special characters
Date: Thu, 20 May 2010 11:15:32 +0200	[thread overview]
Message-ID: <4BF4FDB4.2010409@drmicha.warpmail.net> (raw)
In-Reply-To: <4BF4FA89.2040904@gmail.com>

Torsten Bögershausen venit, vidit, dixit 20.05.2010 11:02:
> Hej,
> I have the same problem here.
> Below there is a patch, which may solve the problem.
> (Yes, whitespaces are broken. I'm still fighting with
> git format-patch -s --cover-letter -M --stdout origin/master | git 
> imap-send)
> But this patch may be a start point for improvements.
> Comments welcome
> BR
> /Torsten
> 
> 
> 
> Improved interwork between Mac OS X and linux when umlauts are used
> When a git repository containing utf-8 coded umlaut characters
> is cloned onto an Mac OS X machine, the Mac OS system will convert
> all filenames returned by readdir() into denormalized utf-8.
> As a result of this conversion, git will not find them on disk.
> This helps by treating the NFD and NFD version of filenames as
> identical on Mac OS.
> 
> 
> 
> 
> 
> 
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>

You signed off, but is Markus Kuhn's code from UCS GPL2-licensed?
Also, a few tests would be nice.

I remember we had threads on this issue in the past. I haven't checked
yet (Thunderbird pruned my nntp history), but it is worth checking that
you addressed any issues mentioned there.

I have no Mac so I can't test, sorry. Would be happy to run Mac OS in a
vm, but you know...

Thanks for looking into this!

Michael

> ---
> name-hash.c |   40 ++++++++++++++++++++++++++++++++++++++++
> utf8.c      |   55 ++++++++++++++++++++++++++++++++++++++++++++++++-------
> utf8.h      |   11 +++++++++++
> 3 files changed, 99 insertions(+), 7 deletions(-)
> 
> diff --git a/name-hash.c b/name-hash.c
> index 0031d78..e6494e8 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -7,6 +7,7 @@
>   */
> #define NO_THE_INDEX_COMPATIBILITY_MACROS
> #include "cache.h"
> +#include "utf8.h"
> 
> /*
>   * This removes bit 5 if bit 6 is set.
> @@ -100,6 +101,25 @@ static int same_name(const struct cache_entry *ce, 
> const char *name, int namelen
>      return icase && slow_same_name(name, namelen, ce->name, len);
> }
> 
> +#ifdef __APPLE__
> +struct cache_entry *index_name_exists2(struct index_state *istate, 
> const char *name, int icase)
> +{
> +    int namelen = (int)strlen(name);
> +    unsigned int hash = hash_name(name, namelen);
> +    struct cache_entry *ce;
> +
> +    ce = lookup_hash(hash, &istate->name_hash);
> +    while (ce) {
> +        if (!(ce->ce_flags & CE_UNHASHED)) {
> +            if (same_name(ce, name, namelen, icase))
> +                return ce;
> +        }
> +        ce = ce->next;
> +    }
> +    return NULL;
> +}
> +#endif
> +
> struct cache_entry *index_name_exists(struct index_state *istate, const 
> char *name, int namelen, int icase)
> {
>      unsigned int hash = hash_name(name, namelen);
> @@ -115,5 +135,25 @@ struct cache_entry *index_name_exists(struct 
> index_state *istate, const char *na
>          }
>          ce = ce->next;
>      }
> +#ifdef __APPLE__
> +    {
> +        char *name_nfc_nfd;
> +        name_nfc_nfd = str_nfc2nfd(name);
> +        if (name_nfc_nfd) {
> +            ce = index_name_exists2(istate, name_nfc_nfd, icase);
> +            free(name_nfc_nfd);
> +            if (ce)
> +                return ce;
> +        }
> +        name_nfc_nfd = str_nfd2nfc(name);
> +        if (name_nfc_nfd) {
> +            ce = index_name_exists2(istate, name_nfc_nfd, icase);
> +            free(name_nfc_nfd);
> +            if (ce)
> +                return ce;
> +        }
> +    }
> +#endif
> +
>      return NULL;
> }
> diff --git a/utf8.c b/utf8.c
> index 84cfc72..8e794dc 100644
> --- a/utf8.c
> +++ b/utf8.c
> @@ -2,6 +2,11 @@
> #include "strbuf.h"
> #include "utf8.h"
> 
> +#ifdef __APPLE__
> +static iconv_t my_iconv_nfd2nfc = (iconv_t) -1;
> +static iconv_t my_iconv_nfc2nfd = (iconv_t) -1;
> +#endif
> +
> /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */
> 
> struct interval {
> @@ -424,18 +429,13 @@ int is_encoding_utf8(const char *name)
> #else
>      typedef char * iconv_ibp;
> #endif
> -char *reencode_string(const char *in, const char *out_encoding, const 
> char *in_encoding)
> +
> +char *reencode_string_iconv(const char *in, iconv_t conv)
> {
> -    iconv_t conv;
>      size_t insz, outsz, outalloc;
>      char *out, *outpos;
>      iconv_ibp cp;
> 
> -    if (!in_encoding)
> -        return NULL;
> -    conv = iconv_open(out_encoding, in_encoding);
> -    if (conv == (iconv_t) -1)
> -        return NULL;
>      insz = strlen(in);
>      outsz = insz;
>      outalloc = outsz + 1; /* for terminating NUL */
> @@ -469,7 +469,48 @@ char *reencode_string(const char *in, const char 
> *out_encoding, const char *in_e
>              break;
>          }
>      }
> +    return out;
> +}
> +
> +char *reencode_string(const char *in, const char *out_encoding, const 
> char *in_encoding)
> +{
> +    iconv_t conv;
> +    char *out;
> +
> +    if (!in_encoding)
> +        return NULL;
> +    conv = iconv_open(out_encoding, in_encoding);
> +    if (conv == (iconv_t) -1)
> +        return NULL;
> +    out = reencode_string_iconv(in, conv);
>      iconv_close(conv);
>      return out;
> }
> +
> +#ifdef __APPLE__
> +char*
> +str_nfc2nfd(const char *in)
> +{
> +    if (my_iconv_nfc2nfd == (iconv_t) -1) {
> +        my_iconv_nfc2nfd = iconv_open("utf-8-mac", "utf-8");
> +        if (my_iconv_nfc2nfd == (iconv_t) -1) {
> +            return NULL;
> +        }
> +    }
> +    return reencode_string_iconv(in, my_iconv_nfc2nfd);
> +}
> +
> +char*
> +str_nfd2nfc(const char *in)
> +{
> +    if (my_iconv_nfd2nfc == (iconv_t) -1){
> +        my_iconv_nfd2nfc = iconv_open("utf-8", "utf-8-mac");
> +        if (my_iconv_nfd2nfc == (iconv_t) -1) {
> +            return NULL;
> +        }
> +    }
> +    return reencode_string_iconv(in, my_iconv_nfd2nfc);
> +}
> +#endif /* APPLE */
> +
> #endif
> diff --git a/utf8.h b/utf8.h
> index ebc4d2f..db29c8a 100644
> --- a/utf8.h
> +++ b/utf8.h
> @@ -13,8 +13,19 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
> 
> #ifndef NO_ICONV
> char *reencode_string(const char *in, const char *out_encoding, const 
> char *in_encoding);
> +char *reencode_string_iconv(const char *in, iconv_t conv);
> +#ifdef __APPLE__
> +char *str_nfc2nfd(const char *in);
> +char *str_nfd2nfc(const char *in);
> +#else
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> +#endif
> #else
> #define reencode_string(a,b,c) NULL
> +#define reencode_string2(a,b) NULL
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> #endif
> 
> #endif

  reply	other threads:[~2010-05-20  9:15 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-20  7:26 Git, Mac OS X and German special characters Matthias Moeller
2010-05-20  8:34 ` Ævar Arnfjörð Bjarmason
2010-05-20  8:50   ` Michael J Gruber
2010-05-20  8:57     ` demerphq
2010-05-20  9:02     ` Torsten Bögershausen
2010-05-20  9:15       ` Michael J Gruber [this message]
     [not found]         ` <4BF5294E.7060206@web.de>
2010-05-20 14:29           ` Michael J Gruber
2010-05-20 15:30         ` Jay Soffian
2010-05-20 15:50       ` Jay Soffian
2010-05-20 18:22         ` Jay Soffian
2010-05-20  9:16     ` Matthias Moeller
2010-05-20 10:38     ` Thomas Singer
2010-05-20  8:55   ` demerphq
  -- strict thread matches above, loose matches on Subject: below --
2011-10-01 12:44 Albert Zeyer
2011-10-01 13:39 ` Andreas Ericsson
     [not found]   ` <CAO1Q+jeLEp2ReNc9eOFoJxdGq6oRE3b+O=JvMNU0Kqx_eAX=7w@mail.gmail.com>
2011-10-01 14:24     ` Andreas Ericsson
2011-10-01 19:47       ` Andreas Krey
2011-10-01 22:02         ` Michael Witten
2011-10-01 23:14           ` Jakub Narebski
2011-10-01 23:26             ` Michael Witten
2011-10-01 23:48           ` Albert Zeyer
2011-10-03 19:48 ` Torsten Bögershausen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BF4FDB4.2010409@drmicha.warpmail.net \
    --to=git@drmicha.warpmail.net \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=matthias.moeller@math.tu-dortmund.de \
    --cc=totte.enea@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).