git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* builtin conversion between tabs and spaces
       [not found] ` <d4bc1a2a0810141842q1e50c85au7d813f2e5e37a84c@mail.gmail.com>
@ 2008-10-15  1:44   ` Stefan Karpinski
  2008-10-15  1:47     ` Stefan Karpinski
                       ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-15  1:44 UTC (permalink / raw)
  To: Git Mailing List

I find myself really wanting to enforce standards in the use of spaces
versus tabs. I deal with some unruly programmers who refuse to set
their editors to use soft tabs, but I *hate* tabs in the repo. And of
course others feel equally strongly about keeping only tabs in the
repo (e.g. the git repo).

This led me to wonder if it wouldn't make sense to have this
conversion ability built into git. The following patch implements this
functionality. It still needs work—it's not meant to be final, just to
give an idea—but I just wanted to see if people on the git list
thought this sort of thing would be worthwhile at all.

If people think it's worth having in git, then how should it be
configured? I feel like a project should be able to define the
expected tab size for binary file types. Moreover, the project should
be able to define the default cannonicalization with resepect to
whitespace for different files types. Then, if they so desire, each
git user should be able to override the output format on a
per-repository basis.

Does this make any sense? Comments?

---
diff --git a/convert.c b/convert.c
index 1816e97..280f45b 100644
--- a/convert.c
+++ b/convert.c
@@ -18,7 +18,7 @@

 struct text_stat {
       /* NUL, CR, LF and CRLF counts */
-       unsigned nul, cr, lf, crlf;
+       unsigned nul, cr, lf, crlf, tab;

       /* These are just approximations! */
       unsigned printable, nonprintable;
@@ -48,7 +48,10 @@
 static void gather_stats(const char *buf, unsigned long size, struct
text_stat *
               else if (c < 32) {
                       switch (c) {
                               /* BS, HT, ESC and FF */
-                       case '\b': case '\t': case '\033': case '\014':
+                       case '\t':
+                               stats->tab++;
+                               /* fall through */
+                       case '\b': case '\033': case '\014':
                               stats->printable++;
                               break;
                       case 0:
@@ -235,6 +238,105 @@
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
       return 1;
 }

+static int tabs_to_spaces(const char *path, const char *src, size_t len,
+
                  struct strbuf *buf, int untabify)
+{
+       char *to_free = NULL;
+       struct text_stat stats;
+  static const unsigned tab_size = 4;
+       char *spaces;
+
+       if (!untabify)
+               return 0;
+
+  /* instead of calling twice, should cache these stats across calls */
+       gather_stats(src, len, &stats);
+
+       if (!stats.tab)
+    return 0;
+
+       /* are we "faking" in place editing ? */
+       if (src == buf->buf)
+               to_free = strbuf_detach(buf, NULL);
+
+  /* this growth may be excessive: not all tabs get tab_size spaces */
+       strbuf_grow(buf, len + tab_size * stats.tab);
+  spaces = (char *) xmalloc(tab_size);
+  memset(spaces, ' ', tab_size);
+       for (;;) {
+               const char *line = src;
+               const char *nl = memchr(src, '\n', len);
+               char *tab;
+    if (!nl)
+      nl = src + len;
+    while (src < nl && (tab = memchr(src, '\t', nl - src))) {
+      strbuf_add(buf, src, tab - src);
+      strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size));
+      src = tab + 1;
+    }
+    if (src < nl)
+      strbuf_add(buf, src, nl - src);
+               if (nl < src + len)
+                       strbuf_addch(buf, '\n');
+               else
+                       break;
+               src = nl + 1;
+               len -= src - line;
+       }
+
+       free(to_free);
+       free(spaces);
+       return 1;
+}
+
+static int spaces_to_tabs(const char *path, const char *src, size_t len,
+
                  struct strbuf *buf, int tabify)
+{
+  static const unsigned tab_size = 4;
+
+       if (!tabify)
+               return 0;
+
+       /* only grow if not in place */
+       if (strbuf_avail(buf) + buf->len < len)
+               strbuf_grow(buf, len - buf->len);
+
+       for (;;) {
+               int tabs = 0, spaces = 0;
+               const char *line = src;
+               const char *nl = memchr(src, '\n', len);
+    if (!nl)
+      nl = src + len;
+               for (;; src++) {
+                       if (*src == ' ') {
+                               spaces++;
+                               if (spaces == tab_size) {
+                                       tabs++;
+                                       spaces = 0;
+                               }
+                       } else if (*src == '\t') {
+                               tabs++;
+                               spaces = 0;
+                       } else break;
+               }
+               if (line < src) {
+                       memset(buf->buf + buf->len, '\t', tabs);
+                       memset(buf->buf + buf->len + tabs, ' ', spaces);
+                       strbuf_setlen(buf, buf->len + tabs + spaces);
+               }
+    if (src < nl)
+      strbuf_add(buf, src, nl - src);
+               if (nl < src + len)
+                       strbuf_addch(buf, '\n');
+               else
+                       break;
+               src = nl + 1;
+               len -= src - line;
+       }
+
+       return 1;
+}
+
 struct filter_params {
       const char *src;
       unsigned long size;
@@ -370,22 +472,29 @@
 static int read_convert_config(const char *var, const char *value, void *cb)
       return 0;
 }

-static void setup_convert_check(struct git_attr_check *check)
+struct convert_checks {
+  struct git_attr_check crlf, tabs, ident, filter;
+};
+
+static void setup_convert_check(struct convert_checks *checks)
 {
       static struct git_attr *attr_crlf;
+       static struct git_attr *attr_tabs;
       static struct git_attr *attr_ident;
       static struct git_attr *attr_filter;

       if (!attr_crlf) {
               attr_crlf = git_attr("crlf", 4);
+               attr_tabs = git_attr("tabs", 4);
               attr_ident = git_attr("ident", 5);
               attr_filter = git_attr("filter", 6);
               user_convert_tail = &user_convert;
               git_config(read_convert_config, NULL);
       }
-       check[0].attr = attr_crlf;
-       check[1].attr = attr_ident;
-       check[2].attr = attr_filter;
+       checks->crlf.attr = attr_crlf;
+       checks->tabs.attr = attr_tabs;
+       checks->ident.attr = attr_ident;
+       checks->filter.attr = attr_filter;
 }

 static int count_ident(const char *cp, unsigned long size)
@@ -566,20 +675,22 @@
 static int git_path_check_ident(const char *path, struct git_attr_check *check)
       return !!ATTR_TRUE(value);
 }

+#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct
git_attr_check))
+
 int convert_to_git(const char *path, const char *src, size_t len,
                   struct strbuf *dst, enum safe_crlf checksafe)
 {
-       struct git_attr_check check[3];
+       struct convert_checks checks;
       int crlf = CRLF_GUESS;
       int ident = 0, ret = 0;
       const char *filter = NULL;

-       setup_convert_check(check);
-       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+       setup_convert_check(&checks);
+       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
&checks)) {
               struct convert_driver *drv;
-               crlf = git_path_check_crlf(path, check + 0);
-               ident = git_path_check_ident(path, check + 1);
-               drv = git_path_check_convert(path, check + 2);
+               crlf = git_path_check_crlf(path, &(checks.crlf));
+               ident = git_path_check_ident(path, &(checks.ident));
+               drv = git_path_check_convert(path, &(checks.filter));
               if (drv && drv->clean)
                       filter = drv->clean;
       }
@@ -589,6 +700,11 @@
 int convert_to_git(const char *path, const char *src, size_t len,
               src = dst->buf;
               len = dst->len;
       }
+       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
+       if (ret) {
+               src = dst->buf;
+               len = dst->len;
+       }
       ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
       if (ret) {
               src = dst->buf;
@@ -599,17 +715,17 @@
 int convert_to_git(const char *path, const char *src, size_t len,

 int convert_to_working_tree(const char *path, const char *src, size_t
len, struct strbuf *dst)
 {
-       struct git_attr_check check[3];
+       struct convert_checks checks;
       int crlf = CRLF_GUESS;
       int ident = 0, ret = 0;
       const char *filter = NULL;

-       setup_convert_check(check);
-       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+       setup_convert_check(&checks);
+       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
&checks)) {
               struct convert_driver *drv;
-               crlf = git_path_check_crlf(path, check + 0);
-               ident = git_path_check_ident(path, check + 1);
-               drv = git_path_check_convert(path, check + 2);
+               crlf = git_path_check_crlf(path, &(checks.crlf));
+               ident = git_path_check_ident(path, &(checks.ident));
+               drv = git_path_check_convert(path, &(checks.filter));
               if (drv && drv->smudge)
                       filter = drv->smudge;
       }
@@ -624,5 +740,10 @@
 int convert_to_working_tree(const char *path, const char *src, size_t
len, struc
               src = dst->buf;
               len = dst->len;
       }
+       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
+       if (ret) {
+               src = dst->buf;
+               len = dst->len;
+       }
       return ret | apply_filter(path, src, len, dst, filter);
 }

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15  1:44   ` builtin conversion between tabs and spaces Stefan Karpinski
@ 2008-10-15  1:47     ` Stefan Karpinski
  2008-10-15  6:25     ` Alex Riesen
  2008-10-15  6:26     ` Johannes Sixt
  2 siblings, 0 replies; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-15  1:47 UTC (permalink / raw)
  To: Git Mailing List

Appologies for gmail mangling. I will use git send-email for real patches.

On Tue, Oct 14, 2008 at 6:44 PM, Stefan Karpinski
<stefan.karpinski@gmail.com> wrote:
> I find myself really wanting to enforce standards in the use of spaces
> versus tabs. I deal with some unruly programmers who refuse to set
> their editors to use soft tabs, but I *hate* tabs in the repo. And of
> course others feel equally strongly about keeping only tabs in the
> repo (e.g. the git repo).
>
> This led me to wonder if it wouldn't make sense to have this
> conversion ability built into git. The following patch implements this
> functionality. It still needs work—it's not meant to be final, just to
> give an idea—but I just wanted to see if people on the git list
> thought this sort of thing would be worthwhile at all.
>
> If people think it's worth having in git, then how should it be
> configured? I feel like a project should be able to define the
> expected tab size for binary file types. Moreover, the project should
> be able to define the default cannonicalization with resepect to
> whitespace for different files types. Then, if they so desire, each
> git user should be able to override the output format on a
> per-repository basis.
>
> Does this make any sense? Comments?
>
> ---
> diff --git a/convert.c b/convert.c
> index 1816e97..280f45b 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -18,7 +18,7 @@
>
>  struct text_stat {
>       /* NUL, CR, LF and CRLF counts */
> -       unsigned nul, cr, lf, crlf;
> +       unsigned nul, cr, lf, crlf, tab;
>
>       /* These are just approximations! */
>       unsigned printable, nonprintable;
> @@ -48,7 +48,10 @@
>  static void gather_stats(const char *buf, unsigned long size, struct
> text_stat *
>               else if (c < 32) {
>                       switch (c) {
>                               /* BS, HT, ESC and FF */
> -                       case '\b': case '\t': case '\033': case '\014':
> +                       case '\t':
> +                               stats->tab++;
> +                               /* fall through */
> +                       case '\b': case '\033': case '\014':
>                               stats->printable++;
>                               break;
>                       case 0:
> @@ -235,6 +238,105 @@
>  static int crlf_to_worktree(const char *path, const char *src, size_t len,
>       return 1;
>  }
>
> +static int tabs_to_spaces(const char *path, const char *src, size_t len,
> +
>                  struct strbuf *buf, int untabify)
> +{
> +       char *to_free = NULL;
> +       struct text_stat stats;
> +  static const unsigned tab_size = 4;
> +       char *spaces;
> +
> +       if (!untabify)
> +               return 0;
> +
> +  /* instead of calling twice, should cache these stats across calls */
> +       gather_stats(src, len, &stats);
> +
> +       if (!stats.tab)
> +    return 0;
> +
> +       /* are we "faking" in place editing ? */
> +       if (src == buf->buf)
> +               to_free = strbuf_detach(buf, NULL);
> +
> +  /* this growth may be excessive: not all tabs get tab_size spaces */
> +       strbuf_grow(buf, len + tab_size * stats.tab);
> +  spaces = (char *) xmalloc(tab_size);
> +  memset(spaces, ' ', tab_size);
> +       for (;;) {
> +               const char *line = src;
> +               const char *nl = memchr(src, '\n', len);
> +               char *tab;
> +    if (!nl)
> +      nl = src + len;
> +    while (src < nl && (tab = memchr(src, '\t', nl - src))) {
> +      strbuf_add(buf, src, tab - src);
> +      strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size));
> +      src = tab + 1;
> +    }
> +    if (src < nl)
> +      strbuf_add(buf, src, nl - src);
> +               if (nl < src + len)
> +                       strbuf_addch(buf, '\n');
> +               else
> +                       break;
> +               src = nl + 1;
> +               len -= src - line;
> +       }
> +
> +       free(to_free);
> +       free(spaces);
> +       return 1;
> +}
> +
> +static int spaces_to_tabs(const char *path, const char *src, size_t len,
> +
>                  struct strbuf *buf, int tabify)
> +{
> +  static const unsigned tab_size = 4;
> +
> +       if (!tabify)
> +               return 0;
> +
> +       /* only grow if not in place */
> +       if (strbuf_avail(buf) + buf->len < len)
> +               strbuf_grow(buf, len - buf->len);
> +
> +       for (;;) {
> +               int tabs = 0, spaces = 0;
> +               const char *line = src;
> +               const char *nl = memchr(src, '\n', len);
> +    if (!nl)
> +      nl = src + len;
> +               for (;; src++) {
> +                       if (*src == ' ') {
> +                               spaces++;
> +                               if (spaces == tab_size) {
> +                                       tabs++;
> +                                       spaces = 0;
> +                               }
> +                       } else if (*src == '\t') {
> +                               tabs++;
> +                               spaces = 0;
> +                       } else break;
> +               }
> +               if (line < src) {
> +                       memset(buf->buf + buf->len, '\t', tabs);
> +                       memset(buf->buf + buf->len + tabs, ' ', spaces);
> +                       strbuf_setlen(buf, buf->len + tabs + spaces);
> +               }
> +    if (src < nl)
> +      strbuf_add(buf, src, nl - src);
> +               if (nl < src + len)
> +                       strbuf_addch(buf, '\n');
> +               else
> +                       break;
> +               src = nl + 1;
> +               len -= src - line;
> +       }
> +
> +       return 1;
> +}
> +
>  struct filter_params {
>       const char *src;
>       unsigned long size;
> @@ -370,22 +472,29 @@
>  static int read_convert_config(const char *var, const char *value, void *cb)
>       return 0;
>  }
>
> -static void setup_convert_check(struct git_attr_check *check)
> +struct convert_checks {
> +  struct git_attr_check crlf, tabs, ident, filter;
> +};
> +
> +static void setup_convert_check(struct convert_checks *checks)
>  {
>       static struct git_attr *attr_crlf;
> +       static struct git_attr *attr_tabs;
>       static struct git_attr *attr_ident;
>       static struct git_attr *attr_filter;
>
>       if (!attr_crlf) {
>               attr_crlf = git_attr("crlf", 4);
> +               attr_tabs = git_attr("tabs", 4);
>               attr_ident = git_attr("ident", 5);
>               attr_filter = git_attr("filter", 6);
>               user_convert_tail = &user_convert;
>               git_config(read_convert_config, NULL);
>       }
> -       check[0].attr = attr_crlf;
> -       check[1].attr = attr_ident;
> -       check[2].attr = attr_filter;
> +       checks->crlf.attr = attr_crlf;
> +       checks->tabs.attr = attr_tabs;
> +       checks->ident.attr = attr_ident;
> +       checks->filter.attr = attr_filter;
>  }
>
>  static int count_ident(const char *cp, unsigned long size)
> @@ -566,20 +675,22 @@
>  static int git_path_check_ident(const char *path, struct git_attr_check *check)
>       return !!ATTR_TRUE(value);
>  }
>
> +#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct
> git_attr_check))
> +
>  int convert_to_git(const char *path, const char *src, size_t len,
>                   struct strbuf *dst, enum safe_crlf checksafe)
>  {
> -       struct git_attr_check check[3];
> +       struct convert_checks checks;
>       int crlf = CRLF_GUESS;
>       int ident = 0, ret = 0;
>       const char *filter = NULL;
>
> -       setup_convert_check(check);
> -       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
> +       setup_convert_check(&checks);
> +       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
> &checks)) {
>               struct convert_driver *drv;
> -               crlf = git_path_check_crlf(path, check + 0);
> -               ident = git_path_check_ident(path, check + 1);
> -               drv = git_path_check_convert(path, check + 2);
> +               crlf = git_path_check_crlf(path, &(checks.crlf));
> +               ident = git_path_check_ident(path, &(checks.ident));
> +               drv = git_path_check_convert(path, &(checks.filter));
>               if (drv && drv->clean)
>                       filter = drv->clean;
>       }
> @@ -589,6 +700,11 @@
>  int convert_to_git(const char *path, const char *src, size_t len,
>               src = dst->buf;
>               len = dst->len;
>       }
> +       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
> +       if (ret) {
> +               src = dst->buf;
> +               len = dst->len;
> +       }
>       ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
>       if (ret) {
>               src = dst->buf;
> @@ -599,17 +715,17 @@
>  int convert_to_git(const char *path, const char *src, size_t len,
>
>  int convert_to_working_tree(const char *path, const char *src, size_t
> len, struct strbuf *dst)
>  {
> -       struct git_attr_check check[3];
> +       struct convert_checks checks;
>       int crlf = CRLF_GUESS;
>       int ident = 0, ret = 0;
>       const char *filter = NULL;
>
> -       setup_convert_check(check);
> -       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
> +       setup_convert_check(&checks);
> +       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
> &checks)) {
>               struct convert_driver *drv;
> -               crlf = git_path_check_crlf(path, check + 0);
> -               ident = git_path_check_ident(path, check + 1);
> -               drv = git_path_check_convert(path, check + 2);
> +               crlf = git_path_check_crlf(path, &(checks.crlf));
> +               ident = git_path_check_ident(path, &(checks.ident));
> +               drv = git_path_check_convert(path, &(checks.filter));
>               if (drv && drv->smudge)
>                       filter = drv->smudge;
>       }
> @@ -624,5 +740,10 @@
>  int convert_to_working_tree(const char *path, const char *src, size_t
> len, struc
>               src = dst->buf;
>               len = dst->len;
>       }
> +       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
> +       if (ret) {
> +               src = dst->buf;
> +               len = dst->len;
> +       }
>       return ret | apply_filter(path, src, len, dst, filter);
>  }
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15  1:44   ` builtin conversion between tabs and spaces Stefan Karpinski
  2008-10-15  1:47     ` Stefan Karpinski
@ 2008-10-15  6:25     ` Alex Riesen
  2008-10-15 20:52       ` Stefan Karpinski
  2008-10-15  6:26     ` Johannes Sixt
  2 siblings, 1 reply; 11+ messages in thread
From: Alex Riesen @ 2008-10-15  6:25 UTC (permalink / raw)
  To: Stefan Karpinski; +Cc: Git Mailing List

Stefan Karpinski, Wed, Oct 15, 2008 03:44:10 +0200:
> I find myself really wanting to enforce standards in the use of spaces
> versus tabs. I deal with some unruly programmers who refuse to set
> their editors to use soft tabs, but I *hate* tabs in the repo. And of
> course others feel equally strongly about keeping only tabs in the
> repo (e.g. the git repo).
> 
> This led me to wonder if it wouldn't make sense to have this
> conversion ability built into git. The following patch implements this
> functionality. It still needs work—it's not meant to be final, just to
> give an idea—but I just wanted to see if people on the git list
> thought this sort of thing would be worthwhile at all.

Is your conversion two-way? IOW, is it possible to convert the
converted file and get the original? (Because all the existing
conversions are reversible)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15  1:44   ` builtin conversion between tabs and spaces Stefan Karpinski
  2008-10-15  1:47     ` Stefan Karpinski
  2008-10-15  6:25     ` Alex Riesen
@ 2008-10-15  6:26     ` Johannes Sixt
  2008-10-15 20:55       ` Stefan Karpinski
  2 siblings, 1 reply; 11+ messages in thread
From: Johannes Sixt @ 2008-10-15  6:26 UTC (permalink / raw)
  To: Stefan Karpinski; +Cc: Git Mailing List

Stefan Karpinski schrieb:
> This led me to wonder if it wouldn't make sense to have this
> conversion ability built into git.

This wouldn't help your case a lot. It is still at the discretion of each
individual repository owner to enable the conversion. (You didn't mean to
make this conversion mandatory, did you?)

BTW, you don't need to change git code to achieve this. It's sufficient to
install a suitable "clean" filter:

echo "*.c filter=c-code" > .git/info/attributes
git config filter.c-code.clean tabs2spaces

where tabs2spaces is your utility that does the conversion.

-- Hannes

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15  6:25     ` Alex Riesen
@ 2008-10-15 20:52       ` Stefan Karpinski
  2008-10-15 21:02         ` Jonathan del Strother
  0 siblings, 1 reply; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-15 20:52 UTC (permalink / raw)
  To: Alex Riesen; +Cc: Git Mailing List

> Is your conversion two-way? IOW, is it possible to convert the
> converted file and get the original? (Because all the existing
> conversions are reversible)

Yes and no. The CRLF conversion isn't always invertable—it is so long
as your use of CRLF/LF is consistent. The tab/space conversion is
similar: if you consistently use spaces, then tabs_to_spaces will
always give you back your original version; if you consistently use
tabs, then spaces_to_tabs will give you back your original version. If
you use some crazy mix of the two, you cannot reconstruct your
original without remembering where there were tabs versus spaces,
which information either filter destroys. But that's the same as the
CRLF conversion. You could enable a warning when worktree file has an
inconsistent mixture of tabs and spaces, like there is for
inconsistent CRLF files.

On Tue, Oct 14, 2008 at 11:25 PM, Alex Riesen <raa.lkml@gmail.com> wrote:
> Stefan Karpinski, Wed, Oct 15, 2008 03:44:10 +0200:
>> I find myself really wanting to enforce standards in the use of spaces
>> versus tabs. I deal with some unruly programmers who refuse to set
>> their editors to use soft tabs, but I *hate* tabs in the repo. And of
>> course others feel equally strongly about keeping only tabs in the
>> repo (e.g. the git repo).
>>
>> This led me to wonder if it wouldn't make sense to have this
>> conversion ability built into git. The following patch implements this
>> functionality. It still needs work—it's not meant to be final, just to
>> give an idea—but I just wanted to see if people on the git list
>> thought this sort of thing would be worthwhile at all.
>
> Is your conversion two-way? IOW, is it possible to convert the
> converted file and get the original? (Because all the existing
> conversions are reversible)
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15  6:26     ` Johannes Sixt
@ 2008-10-15 20:55       ` Stefan Karpinski
  0 siblings, 0 replies; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-15 20:55 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Git Mailing List

>> This led me to wonder if it wouldn't make sense to have this
>> conversion ability built into git.
>
> This wouldn't help your case a lot. It is still at the discretion of each
> individual repository owner to enable the conversion. (You didn't mean to
> make this conversion mandatory, did you?)

This would all be completely discretionary, of course. I just thought
that the issue was complicated enough and the problem of mixed spaces
and tabs in source code ubiquitous enough that it might be worth
"solving" correctly in git itself.

> BTW, you don't need to change git code to achieve this. It's sufficient to
> install a suitable "clean" filter:
>
> echo "*.c filter=c-code" > .git/info/attributes
> git config filter.c-code.clean tabs2spaces
>
> where tabs2spaces is your utility that does the conversion.

That was the first thing I did—in ruby first, then in C. The ruby
version is *way* too slow to use on any number of files—and the
typical use case is that most of your files are source code and so
will have this applied to them. On the other hand, the ruby script (or
perl or whatever) is portable, so I can send it to my Windows-based
developers and have them use it too.

But the speed issue is pretty crucial, so I rewrote the thing as an
external C program, which is obviously much faster. But it's still
noticably slower than native git checkout though. Worse still, now the
filter script isn't portable, so I have to cross-compile it and give
every developer the version that works on their platform and make sure
that they have their path setup correctly, etc.

Having the conversion built into git would
  1) have native git checkout speed (I've tried it and it's not
noticable slower than normal git checkouts);
  2) be naturally portable to anywhere git works.

Now, if I thought this was a thing that would only benefit me, I would
never bring it up. However, it seems like something that a lot of
projects might want to use to ensure standard spacing in their source
files. Moreover, since developers feel strongly about using tabs or
spaces, it would be beneficial for individuals too. I think there's
enough complex issues that solving it once-and-for-all might be worth
a shot.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15 20:52       ` Stefan Karpinski
@ 2008-10-15 21:02         ` Jonathan del Strother
  2008-10-15 21:18           ` Stefan Karpinski
  0 siblings, 1 reply; 11+ messages in thread
From: Jonathan del Strother @ 2008-10-15 21:02 UTC (permalink / raw)
  To: Stefan Karpinski; +Cc: Alex Riesen, Git Mailing List

On Wed, Oct 15, 2008 at 9:52 PM, Stefan Karpinski
<stefan.karpinski@gmail.com> wrote:
> if you consistently use spaces, then tabs_to_spaces will
> always give you back your original version; if you consistently use
> tabs, then spaces_to_tabs will give you back your original version. If
> you use some crazy mix of the two, you cannot reconstruct your
> original without remembering where there were tabs versus spaces,

Just IMO, a crazy mix of tabs and spaces is the only _sane_ thing to
do.  Using tabs for the initial indentation, plus spaces for alignment
of function arguments / comments / whatever, is the only way of
getting a layout that will both look right regardless of the tab size,
and allow a viewer to alter the indentation size.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15 21:02         ` Jonathan del Strother
@ 2008-10-15 21:18           ` Stefan Karpinski
  2008-10-15 23:02             ` Stefan Karpinski
  0 siblings, 1 reply; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-15 21:18 UTC (permalink / raw)
  To: Jonathan del Strother; +Cc: Alex Riesen, Git Mailing List, Johannes Sixt

>> if you consistently use spaces, then tabs_to_spaces will
>> always give you back your original version; if you consistently use
>> tabs, then spaces_to_tabs will give you back your original version. If
>> you use some crazy mix of the two, you cannot reconstruct your
>> original without remembering where there were tabs versus spaces,
>
> Just IMO, a crazy mix of tabs and spaces is the only _sane_ thing to
> do.  Using tabs for the initial indentation, plus spaces for alignment
> of function arguments / comments / whatever, is the only way of
> getting a layout that will both look right regardless of the tab size,
> and allow a viewer to alter the indentation size.

That's not what I would call a "crazy" mix of tabs and spaces, but
rather a *sane* mix of tabs and spaces. That can consistently be
reproduced, and is in fact what the spaces_to_tabs function included
above produces. The sane consistent formats as I see it are:

  1) use spaces for everything
  2) use tabs for indentation, spaces for everything else
  3) use tabs for indentation and alignment

If you know the tab size, you can reproduce any of these from the
others, except that #3 is a little tricky since there's places where
the tab/space issue can be ambiguous. I actually think that keeping
the repo version with tab-based indentation is a very sane thing to
do. However, I'd still like to be able to edit the files using soft
tabs, largely because any program that doesn't know what my tab size
should be applies its own interpretation and makes the code look
terrible (think terminal output for diff, cat, less, etc.)

On the other hand, a *crazy* mix of tabs and spaces is where some
indentation is done with spaces while other indentation is done with
tabs. Even crazier is a single line where the indentation is a mixture
of tabs and spaces. I think that just about everyone can agree that
this is not only crazy, but evil and is the kind of thing one really
wants to avoid in a code base. Unfortunately, when developers disagree
on their standard settings, it's very, very hard to avoid precisely
this kind of mess. My idea is to enable git to prevent this sort of
insanity if configured to do so.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15 21:18           ` Stefan Karpinski
@ 2008-10-15 23:02             ` Stefan Karpinski
  2008-10-15 23:34               ` A Large Angry SCM
  0 siblings, 1 reply; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-15 23:02 UTC (permalink / raw)
  To: Jonathan del Strother; +Cc: Alex Riesen, Git Mailing List, Johannes Sixt

Any further comments? I'm more than willing to implement this, but I
won't bother if there's no chance of getting it accepted as a patch.
Does no one else feel like at least having the option to enforce
whitespace consistency in git is a good thing? If not, I guess I'll
just muddle along without this feature instead of implementing it.

On Wed, Oct 15, 2008 at 2:18 PM, Stefan Karpinski
<stefan.karpinski@gmail.com> wrote:
> That's not what I would call a "crazy" mix of tabs and spaces, but
> rather a *sane* mix of tabs and spaces. That can consistently be
> reproduced, and is in fact what the spaces_to_tabs function included
> above produces. The sane consistent formats as I see it are:
>
>  1) use spaces for everything
>  2) use tabs for indentation, spaces for everything else
>  3) use tabs for indentation and alignment
>
> If you know the tab size, you can reproduce any of these from the
> others, except that #3 is a little tricky since there's places where
> the tab/space issue can be ambiguous. I actually think that keeping
> the repo version with tab-based indentation is a very sane thing to
> do. However, I'd still like to be able to edit the files using soft
> tabs, largely because any program that doesn't know what my tab size
> should be applies its own interpretation and makes the code look
> terrible (think terminal output for diff, cat, less, etc.)
>
> On the other hand, a *crazy* mix of tabs and spaces is where some
> indentation is done with spaces while other indentation is done with
> tabs. Even crazier is a single line where the indentation is a mixture
> of tabs and spaces. I think that just about everyone can agree that
> this is not only crazy, but evil and is the kind of thing one really
> wants to avoid in a code base. Unfortunately, when developers disagree
> on their standard settings, it's very, very hard to avoid precisely
> this kind of mess. My idea is to enable git to prevent this sort of
> insanity if configured to do so.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15 23:02             ` Stefan Karpinski
@ 2008-10-15 23:34               ` A Large Angry SCM
  2008-10-16  2:00                 ` Stefan Karpinski
  0 siblings, 1 reply; 11+ messages in thread
From: A Large Angry SCM @ 2008-10-15 23:34 UTC (permalink / raw)
  To: Stefan Karpinski
  Cc: Jonathan del Strother, Alex Riesen, Git Mailing List,
	Johannes Sixt

Stefan Karpinski wrote:
> Any further comments? I'm more than willing to implement this, but I
> won't bother if there's no chance of getting it accepted as a patch.
> Does no one else feel like at least having the option to enforce
> whitespace consistency in git is a good thing? If not, I guess I'll
> just muddle along without this feature instead of implementing it.

I'm against including this in except as a sample smudge/clean script. 
Git is a content tracker; not the enforcement mechanism for individual 
project policies.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: builtin conversion between tabs and spaces
  2008-10-15 23:34               ` A Large Angry SCM
@ 2008-10-16  2:00                 ` Stefan Karpinski
  0 siblings, 0 replies; 11+ messages in thread
From: Stefan Karpinski @ 2008-10-16  2:00 UTC (permalink / raw)
  To: gitzilla
  Cc: Jonathan del Strother, Alex Riesen, Git Mailing List,
	Johannes Sixt

>> Any further comments? I'm more than willing to implement this, but I
>> won't bother if there's no chance of getting it accepted as a patch.
>> Does no one else feel like at least having the option to enforce
>> whitespace consistency in git is a good thing? If not, I guess I'll
>> just muddle along without this feature instead of implementing it.
>
> I'm against including this in except as a sample smudge/clean script. Git is
> a content tracker; not the enforcement mechanism for individual project
> policies.

Alright. Fair enough.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-10-16  2:02 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <d4bc1a2a0810141839r547a770j3a8e56312afa6a53@mail.gmail.com>
     [not found] ` <d4bc1a2a0810141842q1e50c85au7d813f2e5e37a84c@mail.gmail.com>
2008-10-15  1:44   ` builtin conversion between tabs and spaces Stefan Karpinski
2008-10-15  1:47     ` Stefan Karpinski
2008-10-15  6:25     ` Alex Riesen
2008-10-15 20:52       ` Stefan Karpinski
2008-10-15 21:02         ` Jonathan del Strother
2008-10-15 21:18           ` Stefan Karpinski
2008-10-15 23:02             ` Stefan Karpinski
2008-10-15 23:34               ` A Large Angry SCM
2008-10-16  2:00                 ` Stefan Karpinski
2008-10-15  6:26     ` Johannes Sixt
2008-10-15 20:55       ` Stefan Karpinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).