* [PATCH 1/3] Add basic infrastructure to assign attributes to paths
@ 2007-04-13 9:01 Junio C Hamano
2007-04-13 9:33 ` Andy Parkins
` (2 more replies)
0 siblings, 3 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-13 9:01 UTC (permalink / raw)
To: git
This adds the basic infrastructure to assign attributes to
paths, in a way similar to what the exclusion mechanism does
based on $GIT_DIR/info/exclude and .gitignore files.
An attribute is just a simple string that does not contain any
whitespace. They can be specified in $GIT_DIR/info/attributes
file, and .gitattributes file in each directory.
Each line in these files defines a pattern matching rule.
Similar to the exclusion mechanism, a later match overrides an
earlier match in the same file, and entries from .gitattributes
file in the same directory takes precedence over the ones from
parent directories. Lines in $GIT_DIR/info/attributes file are
used as the lowest precedence default rules.
A line is either a comment (an empty line, or a line that begins
with a '#'), or a rule, which is a whitespace separated list of
tokens. The first token on the line is a shell glob pattern.
The rest are names of attributes, each of which can optionally
be prefixed with '!'. Such a line means "if a path matches this
glob, this attribute is set (or unset -- if the attribute name
is prefixed with '!'). For glob matching, the same "if the
pattern does not have a slash in it, the basename of the path is
matched with fnmatch(3) against the pattern, otherwise, the path
is matched with the pattern with FNM_PATHNAME" rule as the
exclusion mechanism is used.
This does not define what an attribute means. Tying an
attribute to various effects it has on git operation for paths
that have it (or doesn't) will be specified separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
.gitignore | 1 +
Makefile | 5 +-
attr.c | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++
attr.h | 16 +++
builtin-check-attr.c | 31 +++++
builtin.h | 1 +
cache.h | 1 +
git.c | 1 +
8 files changed, 355 insertions(+), 2 deletions(-)
create mode 100644 attr.c
create mode 100644 attr.h
create mode 100644 builtin-check-attr.c
diff --git a/.gitignore b/.gitignore
index 9229e91..d96f4f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ git-blame
git-branch
git-bundle
git-cat-file
+git-check-attr
git-check-ref-format
git-checkout
git-checkout-index
diff --git a/Makefile b/Makefile
index b8e6030..ac89d1b 100644
--- a/Makefile
+++ b/Makefile
@@ -283,7 +283,7 @@ LIB_H = \
diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \
run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
- utf8.h reflog-walk.h patch-ids.h
+ utf8.h reflog-walk.h attr.h
DIFF_OBJS = \
diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -305,7 +305,7 @@ LIB_OBJS = \
write_or_die.o trace.o list-objects.o grep.o match-trees.o \
alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
- convert.o
+ convert.o attr.o
BUILTIN_OBJS = \
builtin-add.o \
@@ -316,6 +316,7 @@ BUILTIN_OBJS = \
builtin-branch.o \
builtin-bundle.o \
builtin-cat-file.o \
+ builtin-check-attr.o \
builtin-checkout-index.o \
builtin-check-ref-format.o \
builtin-commit-tree.o \
diff --git a/attr.c b/attr.c
new file mode 100644
index 0000000..bdbc4a3
--- /dev/null
+++ b/attr.c
@@ -0,0 +1,301 @@
+#include "cache.h"
+#include "attr.h"
+
+/*
+ * The basic design decision here is that we are not going to have
+ * insanely large number of attributes.
+ *
+ * This is a randomly chosen prime.
+ */
+#define HASHSIZE 257
+
+struct git_attr {
+ struct git_attr *next;
+ unsigned h;
+ char name[FLEX_ARRAY];
+};
+
+static struct git_attr *(git_attr_hash[HASHSIZE]);
+
+static unsigned hash_name(const char *name, int namelen)
+{
+ unsigned val = 0;
+ unsigned char c;
+
+ while (namelen--) {
+ c = *name++;
+ val = ((val << 7) | (val >> 22)) ^ c;
+ }
+ return val;
+}
+
+struct git_attr *git_attr(const char *name, int len)
+{
+ unsigned hval = hash_name(name, len);
+ unsigned pos = hval % HASHSIZE;
+ struct git_attr *a;
+
+ for (a = git_attr_hash[pos]; a; a = a->next) {
+ if (a->h == hval &&
+ !memcmp(a->name, name, len) && !a->name[len])
+ return a;
+ }
+
+ a = xmalloc(sizeof(*a) + len + 1);
+ memcpy(a->name, name, len);
+ a->name[len] = 0;
+ a->h = hval;
+ a->next = git_attr_hash[pos];
+ git_attr_hash[pos] = a;
+ return a;
+}
+
+/*
+ * .gitattributes file is one line per record, each of which is
+ *
+ * (1) glob pattern.
+ * (2) whitespace
+ * (3) whitespace separated list of attribute names, each of which
+ * could be prefixed with '!' to mean "not set".
+ */
+
+struct attr_state {
+ int unset;
+ struct git_attr *attr;
+};
+
+struct match_attr {
+ char *pattern;
+ unsigned num_attr;
+ struct attr_state state[FLEX_ARRAY];
+};
+
+static const char blank[] = " \t\r\n";
+
+static struct match_attr *parse_attr_line(char *line)
+{
+ int namelen;
+ int num_attr;
+ char *cp, *name;
+ struct match_attr *res = res;
+ int pass;
+
+ cp = line + strspn(line, blank);
+ if (!*cp || *cp == '#')
+ return NULL;
+ name = cp;
+ namelen = strcspn(name, blank);
+
+ for (pass = 0; pass < 2; pass++) {
+ /* pass 0 counts and allocates, pass 1 fills */
+ num_attr = 0;
+ cp = name + namelen;
+ cp = cp + strspn(cp, blank);
+ while (*cp) {
+ char *ep;
+ ep = cp + strcspn(cp, blank);
+ if (pass) {
+ struct attr_state *e;
+
+ e = &(res->state[num_attr]);
+ if (*cp == '!') {
+ e->unset = 1;
+ cp++;
+ }
+ e->attr = git_attr(cp, ep - cp);
+ }
+ num_attr++;
+ cp = ep + strspn(ep, blank);
+ }
+ if (pass)
+ break;
+ res = xcalloc(1,
+ sizeof(*res) +
+ sizeof(struct attr_state) * num_attr +
+ namelen + 1);
+ res->pattern = (char*)&(res->state[num_attr]);
+ memcpy(res->pattern, name, namelen);
+ res->pattern[namelen] = 0;
+ res->num_attr = num_attr;
+ }
+ return res;
+}
+
+/*
+ * Like info/exclude and .gitignore, the attribute information can
+ * come from many places.
+ *
+ * (1) .gitattribute file of the same directory;
+ * (2) .gitattribute file of the parent directory if (1) does not have any match;
+ * this goes recursively upwards, just like .gitignore
+ * (3) perhaps $GIT_DIR/info/attributes, as the final fallback.
+ *
+ * In the same file, later entries override the earlier match, so in the
+ * global list, we would have entries from info/attributes the earliest
+ * (reading the file from top to bottom), .gitattribute of the root
+ * directory (again, reading the file from top to bottom) down to the
+ * current directory, and then scan the list backwards to find the first match.
+ * This is exactly the same as what excluded() does in dir.c to deal with
+ * .gitignore
+ */
+
+static struct attr_stack {
+ struct attr_stack *prev;
+ char *origin;
+ unsigned num_matches;
+ struct match_attr **attrs;
+} *attr_stack;
+
+static void free_attr_elem(struct attr_stack *e)
+{
+ int i;
+ free(e->origin);
+ for (i = 0; i < e->num_matches; i++)
+ free(e->attrs[i]);
+ free(e);
+}
+
+static struct attr_stack *read_attr_from_file(const char *path)
+{
+ FILE *fp;
+ struct attr_stack *res;
+ char buf[2048];
+
+ res = xcalloc(1, sizeof(*res));
+ fp = fopen(path, "r");
+ if (!fp)
+ return res;
+
+ while (fgets(buf, sizeof(buf), fp)) {
+ struct match_attr *a = parse_attr_line(buf);
+ if (!a)
+ continue;
+ res->attrs = xrealloc(res->attrs, res->num_matches + 1);
+ res->attrs[res->num_matches++] = a;
+ }
+ fclose(fp);
+ return res;
+}
+
+static void prepare_attr_stack(const char *path, int dirlen)
+{
+ struct attr_stack *elem;
+ int len;
+ char pathbuf[PATH_MAX];
+
+ if (!attr_stack) {
+ elem = read_attr_from_file(git_path("info/attributes"));
+ elem->origin = NULL;
+ attr_stack = elem;
+
+ elem = read_attr_from_file(GITATTRIBUTES_FILE);
+ elem->origin = strdup("");
+ elem->prev = attr_stack;
+ attr_stack = elem;
+ }
+
+ while (attr_stack && attr_stack->origin) {
+ int namelen = strlen(attr_stack->origin);
+ int len = (dirlen < namelen) ? dirlen : namelen;
+ if (!strncmp(attr_stack->origin, path, len))
+ break;
+ elem = attr_stack;
+ attr_stack = elem->prev;
+ free_attr_elem(elem);
+ }
+
+ while (1) {
+ char *cp;
+
+ len = strlen(attr_stack->origin);
+ if (dirlen <= len)
+ break;
+ memcpy(pathbuf, path, dirlen);
+ memcpy(pathbuf + dirlen, "/", 2);
+ cp = strchr(pathbuf + len + 1, '/');
+ strcpy(cp + 1, GITATTRIBUTES_FILE);
+ elem = read_attr_from_file(pathbuf);
+ *cp = '\0';
+ elem->origin = strdup(pathbuf);
+ elem->prev = attr_stack;
+ attr_stack = elem;
+ }
+}
+
+static int path_matches(const char *pathname, int pathlen,
+ const char *pattern,
+ const char *base, int baselen)
+{
+ if (!strchr(pattern, '/')) {
+ /* match basename */
+ const char *basename = strrchr(pathname, '/');
+ basename = basename ? basename + 1 : pathname;
+ return (fnmatch(pattern, basename, 0) == 0);
+ }
+ /*
+ * match with FNM_PATHNAME; the pattern has base implicitly
+ * in front of it.
+ */
+ if (*pattern == '/')
+ pattern++;
+ if (pathlen < baselen ||
+ (baselen && pathname[baselen - 1] != '/') ||
+ strncmp(pathname, base, baselen))
+ return 0;
+ return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+}
+
+/*
+ * I do not like this at all. Only because we allow individual
+ * attribute to be set or unset incrementally by individual
+ * lines in .gitattribute files, we need to do this triple
+ * loop which looks quite wasteful.
+ */
+static int fill(const char *path, int pathlen,
+ struct attr_stack *stk, struct git_attr_check *check,
+ int num, int rem)
+{
+ int i, j, k;
+ const char *base = stk->origin ? stk->origin : "";
+
+ for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
+ struct match_attr *a = stk->attrs[i];
+ if (path_matches(path, pathlen,
+ a->pattern, base, strlen(base))) {
+ for (j = 0; j < a->num_attr; j++) {
+ struct git_attr *attr = a->state[j].attr;
+ int set = !a->state[j].unset;
+ for (k = 0; k < num; k++) {
+ if (0 <= check[k].isset ||
+ check[k].attr != attr)
+ continue;
+ check[k].isset = set;
+ rem--;
+ }
+ }
+ }
+ }
+ return rem;
+}
+
+int git_checkattr(const char *path, int num, struct git_attr_check *check)
+{
+ struct attr_stack *stk;
+ const char *cp;
+ int dirlen, pathlen, i, rem;
+
+ for (i = 0; i < num; i++)
+ check[i].isset = -1;
+
+ pathlen = strlen(path);
+ cp = strrchr(path, '/');
+ if (!cp)
+ dirlen = 0;
+ else
+ dirlen = cp - path;
+ prepare_attr_stack(path, dirlen);
+ rem = num;
+ for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
+ rem = fill(path, pathlen, stk, check, num, rem);
+ return 0;
+}
diff --git a/attr.h b/attr.h
new file mode 100644
index 0000000..1e5ab40
--- /dev/null
+++ b/attr.h
@@ -0,0 +1,16 @@
+#ifndef ATTR_H
+#define ATTR_H
+
+/* An attribute is a pointer to this opaque structure */
+struct git_attr;
+
+struct git_attr *git_attr(const char *, int);
+
+struct git_attr_check {
+ struct git_attr *attr;
+ int isset;
+};
+
+int git_checkattr(const char *path, int, struct git_attr_check *);
+
+#endif /* ATTR_H */
diff --git a/builtin-check-attr.c b/builtin-check-attr.c
new file mode 100644
index 0000000..643f3ba
--- /dev/null
+++ b/builtin-check-attr.c
@@ -0,0 +1,31 @@
+#include "builtin.h"
+#include "attr.h"
+
+static const char check_attr_usage[] =
+"git-check-attr pathname attr...";
+
+int cmd_check_attr(int argc, const char **argv, const char *prefix)
+{
+
+ struct git_attr_check *check;
+ int cnt, i;
+
+ if (argc < 3)
+ usage(check_attr_usage);
+
+ cnt = argc - 2;
+ check = xcalloc(cnt, sizeof(*check));
+ for (i = 0; i < cnt; i++) {
+ const char *name;
+ name = argv[i+2];
+ check[i].attr = git_attr(name, strlen(name));
+ }
+ if (git_checkattr(argv[1], cnt, check))
+ die("git_checkattr died");
+ for (i = 0; i < cnt; i++)
+ printf("%s: %s\n", argv[i+2],
+ (check[i].isset < 0) ? "unspecified" :
+ (check[i].isset == 0) ? "unset" :
+ "set");
+ return 0;
+}
diff --git a/builtin.h b/builtin.h
index af203e9..d3f3a74 100644
--- a/builtin.h
+++ b/builtin.h
@@ -22,6 +22,7 @@ extern int cmd_branch(int argc, const char **argv, const char *prefix);
extern int cmd_bundle(int argc, const char **argv, const char *prefix);
extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
extern int cmd_checkout_index(int argc, const char **argv, const char *prefix);
+extern int cmd_check_attr(int argc, const char **argv, const char *prefix);
extern int cmd_check_ref_format(int argc, const char **argv, const char *prefix);
extern int cmd_cherry(int argc, const char **argv, const char *prefix);
extern int cmd_cherry_pick(int argc, const char **argv, const char *prefix);
diff --git a/cache.h b/cache.h
index b1bd9e4..bec1938 100644
--- a/cache.h
+++ b/cache.h
@@ -151,6 +151,7 @@ enum object_type {
#define CONFIG_ENVIRONMENT "GIT_CONFIG"
#define CONFIG_LOCAL_ENVIRONMENT "GIT_CONFIG_LOCAL"
#define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
+#define GITATTRIBUTES_FILE ".gitattributes"
extern int is_bare_repository_cfg;
extern int is_bare_repository(void);
diff --git a/git.c b/git.c
index 7def319..f200907 100644
--- a/git.c
+++ b/git.c
@@ -234,6 +234,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp)
{ "cat-file", cmd_cat_file, RUN_SETUP },
{ "checkout-index", cmd_checkout_index, RUN_SETUP },
{ "check-ref-format", cmd_check_ref_format },
+ { "check-attr", cmd_check_attr, RUN_SETUP | NOT_BARE },
{ "cherry", cmd_cherry, RUN_SETUP },
{ "cherry-pick", cmd_cherry_pick, RUN_SETUP | NOT_BARE },
{ "commit-tree", cmd_commit_tree, RUN_SETUP },
--
1.5.1.1.784.g95e2
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] Add basic infrastructure to assign attributes to paths
2007-04-13 9:01 [PATCH 1/3] Add basic infrastructure to assign attributes to paths Junio C Hamano
@ 2007-04-13 9:33 ` Andy Parkins
2007-04-15 0:59 ` Junio C Hamano
2007-04-13 15:04 ` [PATCH 1/3] Add basic infrastructure to assign attributes to paths Linus Torvalds
2007-04-15 15:54 ` Johannes Schindelin
2 siblings, 1 reply; 19+ messages in thread
From: Andy Parkins @ 2007-04-13 9:33 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano
On Friday 2007, April 13, Junio C Hamano wrote:
> parent directories. Lines in $GIT_DIR/info/attributes file are
> used as the lowest precedence default rules.
Shouldn't this be the highest precedence? This would be important for
those cases where I (as a fringe developer) disagree with an attribute
that's been assigned in-tree. I don't want to force my will on every
other developer, but would want my repository to work how I like it.
For example: what if I /can/ read postscript :-)
Incidentally - already I love gitattributes - nicely done Junio.
Andy
--
Dr Andy Parkins, M Eng (hons), MIET
andyparkins@gmail.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] Add basic infrastructure to assign attributes to paths
2007-04-13 9:01 [PATCH 1/3] Add basic infrastructure to assign attributes to paths Junio C Hamano
2007-04-13 9:33 ` Andy Parkins
@ 2007-04-13 15:04 ` Linus Torvalds
2007-04-15 15:54 ` Johannes Schindelin
2 siblings, 0 replies; 19+ messages in thread
From: Linus Torvalds @ 2007-04-13 15:04 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Fri, 13 Apr 2007, Junio C Hamano wrote:
>
> This adds the basic infrastructure to assign attributes to
> paths, in a way similar to what the exclusion mechanism does
> based on $GIT_DIR/info/exclude and .gitignore files.
Me likee. The patches look much simpler/cleaner than I expected.
Linus
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] Add basic infrastructure to assign attributes to paths
2007-04-13 9:33 ` Andy Parkins
@ 2007-04-15 0:59 ` Junio C Hamano
2007-04-15 1:01 ` [PATCH 1/2] attribute macro support Junio C Hamano
2007-04-15 1:03 ` [PATCH 2/2] Define a few built-in attribute rules Junio C Hamano
0 siblings, 2 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 0:59 UTC (permalink / raw)
To: git; +Cc: Andy Parkins, Johannes Sixt
Andy Parkins <andyparkins@gmail.com> writes:
>> parent directories. Lines in $GIT_DIR/info/attributes file are
>> used as the lowest precedence default rules.
>
> Shouldn't this be the highest precedence? This would be important for
> those cases where I (as a fringe developer) disagree with an attribute
> that's been assigned in-tree. I don't want to force my will on every
> other developer, but would want my repository to work how I like it.
Johannes Sixt <J.Sixt@eudaptics.com> writes:
>> This makes paths with 'nodiff' attribute not to produce
>> "textual" diffs from 'git-diff' family.
>
> If saying "nodiff" can be made equivalent to "!diff", then I'd strongly
> prefer an attribute "diff" over "nodiff". I'm a strong disbeliever in
> double negation.
Both of these are good points.
The only reason I initially made it 'nodiff' was to have a pair
of examples to demonstrate positive and negative setting of
attributes, and I agree it makes more sense to say 'diff' in
positive.
I reshuffled the code to make $GIT_DIR/info/attributes the
highest precedence, and unsetting 'diff' attribute to disable
diff; the result is in 'next'.
I'll follow this message up with a few more patches in the
series.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 1/2] attribute macro support
2007-04-15 0:59 ` Junio C Hamano
@ 2007-04-15 1:01 ` Junio C Hamano
2007-04-15 1:03 ` [PATCH 2/2] Define a few built-in attribute rules Junio C Hamano
1 sibling, 0 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 1:01 UTC (permalink / raw)
To: git
This adds "attribute macros" (for lack of better name). So far,
we have low-level attributes such as crlf and diff, which are
defined in operational terms --- setting or unsetting them on a
particular path directly affects what is done to the path. For
example, in order to decline diffs or crlf conversions on a
binary blob, no diffs on PostScript files, and treat all other
files normally, you would have something like these:
* diff crlf
*.ps !diff
proprietary.o !diff !crlf
That is fine as the operation goes, but gets unwieldy rather
rapidly, when we start adding more low-level attributes that are
defined in operational terms. A near-term example of such an
attribute would be 'merge-3way' which would control if git
should attempt the usual 3-way file-level merge internally, or
leave merging to a specialized external program of user's
choice. When it is added, we do _not_ want to force the users
to update the above to:
* diff crlf merge-3way
*.ps !diff
proprietary.o !diff !crlf !merge-3way
The way this patch solves this issue is to realize that the
attributes the user is assigning to paths are not defined in
terms of operations but in terms of what they are.
All of the three low-level attributes usually make sense for
most of the files that sane SCM users have git operate on (these
files are typically called "text'). Only a few cases, such as
binary blob, need exception to decline the "usual treatment
given to text files" -- and people mark them as "binary".
So this allows the $GIT_DIR/info/alternates and .gitattributes
at the toplevel of the project to also specify attributes that
assigns other attributes. The syntax is '[attr]' followed by an
attribute name followed by a list of attribute names:
[attr] binary !diff !crlf !merge-3way
When "binary" attribute is set to a path, if the path has not
got diff/crlf/merge-3way attribute set or unset by other rules,
this rule unsets the three low-level attributes.
It is expected that the user level .gitattributes will be
expressed mostly in terms of attributes based on what the files
are, and the above sample would become like this:
(built-in attribute configuration)
[attr] binary !diff !crlf !merge-3way
* diff crlf merge-3way
(project specific .gitattributes)
proprietary.o binary
(user preference $GIT_DIR/info/attributes)
*.ps !diff
There are a few caveats.
* As described above, you can define these macros only in
$GIT_DIR/info/attributes and toplevel .gitattributes.
* There is no attempt to detect circular definition of macro
attributes, and definitions are evaluated from bottom to top
as usual to fill in other attributes that have not yet got
values. The following would work as expected:
[attr] text diff crlf
[attr] ps text !diff
*.ps ps
while this would most likely not (I haven't tried):
[attr] ps text !diff
[attr] text diff crlf
*.ps ps
* When a macro says "[attr] A B !C", saying that a path does
not have attribute A does not let you tell anything about
attributes B or C. That is, given this:
[attr] text diff crlf
[attr] ps text !diff
*.txt !ps
path hello.txt, which would match "*.txt" pattern, would have
"ps" attribute set to zero, but that does not make text
attribute of hello.txt set to false (nor diff attribute set to
true).
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
attr.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++---------------
cache.h | 1 +
2 files changed, 108 insertions(+), 33 deletions(-)
diff --git a/attr.c b/attr.c
index 7435d92..3a14df1 100644
--- a/attr.c
+++ b/attr.c
@@ -16,9 +16,12 @@
struct git_attr {
struct git_attr *next;
unsigned h;
+ int attr_nr;
char name[FLEX_ARRAY];
};
+static int attr_nr;
+static struct git_attr_check *check_all_attr;
static struct git_attr *(git_attr_hash[HASHSIZE]);
static unsigned hash_name(const char *name, int namelen)
@@ -50,7 +53,12 @@ struct git_attr *git_attr(const char *name, int len)
a->name[len] = 0;
a->h = hval;
a->next = git_attr_hash[pos];
+ a->attr_nr = attr_nr++;
git_attr_hash[pos] = a;
+
+ check_all_attr = xrealloc(check_all_attr,
+ sizeof(*check_all_attr) * attr_nr);
+ check_all_attr[a->attr_nr].attr = a;
return a;
}
@@ -69,26 +77,46 @@ struct attr_state {
};
struct match_attr {
- char *pattern;
+ union {
+ char *pattern;
+ struct git_attr *attr;
+ } u;
+ char is_macro;
unsigned num_attr;
struct attr_state state[FLEX_ARRAY];
};
static const char blank[] = " \t\r\n";
-static struct match_attr *parse_attr_line(const char *line)
+static struct match_attr *parse_attr_line(const char *line, const char *src,
+ int lineno, int macro_ok)
{
int namelen;
int num_attr;
const char *cp, *name;
struct match_attr *res = res;
int pass;
+ int is_macro;
cp = line + strspn(line, blank);
if (!*cp || *cp == '#')
return NULL;
name = cp;
namelen = strcspn(name, blank);
+ if (strlen(ATTRIBUTE_MACRO_PREFIX) < namelen &&
+ !prefixcmp(name, ATTRIBUTE_MACRO_PREFIX)) {
+ if (!macro_ok) {
+ fprintf(stderr, "%s not allowed: %s:%d\n",
+ name, src, lineno);
+ return NULL;
+ }
+ is_macro = 1;
+ name += strlen(ATTRIBUTE_MACRO_PREFIX);
+ name += strspn(name, blank);
+ namelen = strcspn(name, blank);
+ }
+ else
+ is_macro = 0;
for (pass = 0; pass < 2; pass++) {
/* pass 0 counts and allocates, pass 1 fills */
@@ -113,13 +141,19 @@ static struct match_attr *parse_attr_line(const char *line)
}
if (pass)
break;
+
res = xcalloc(1,
sizeof(*res) +
sizeof(struct attr_state) * num_attr +
- namelen + 1);
- res->pattern = (char*)&(res->state[num_attr]);
- memcpy(res->pattern, name, namelen);
- res->pattern[namelen] = 0;
+ (is_macro ? 0 : namelen + 1));
+ if (is_macro)
+ res->u.attr = git_attr(name, namelen);
+ else {
+ res->u.pattern = (char*)&(res->state[num_attr]);
+ memcpy(res->u.pattern, name, namelen);
+ res->u.pattern[namelen] = 0;
+ }
+ res->is_macro = is_macro;
res->num_attr = num_attr;
}
return res;
@@ -167,10 +201,13 @@ static struct attr_stack *read_attr_from_array(const char **list)
{
struct attr_stack *res;
const char *line;
+ int lineno = 0;
res = xcalloc(1, sizeof(*res));
while ((line = *(list++)) != NULL) {
- struct match_attr *a = parse_attr_line(line);
+ struct match_attr *a;
+
+ a = parse_attr_line(line, "[builtin]", ++lineno, 1);
if (!a)
continue;
res->attrs = xrealloc(res->attrs, res->num_matches + 1);
@@ -179,11 +216,12 @@ static struct attr_stack *read_attr_from_array(const char **list)
return res;
}
-static struct attr_stack *read_attr_from_file(const char *path)
+static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
{
FILE *fp;
struct attr_stack *res;
char buf[2048];
+ int lineno = 0;
res = xcalloc(1, sizeof(*res));
fp = fopen(path, "r");
@@ -191,7 +229,9 @@ static struct attr_stack *read_attr_from_file(const char *path)
return res;
while (fgets(buf, sizeof(buf), fp)) {
- struct match_attr *a = parse_attr_line(buf);
+ struct match_attr *a;
+
+ a = parse_attr_line(buf, path, ++lineno, macro_ok);
if (!a)
continue;
res->attrs = xrealloc(res->attrs, res->num_matches + 1);
@@ -206,11 +246,17 @@ static void debug_info(const char *what, struct attr_stack *elem)
{
fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()");
}
+static void debug_set(const char *what, const char *match, struct git_attr *attr, int set)
+{
+ fprintf(stderr, "%s: %s => %d (%s)\n",
+ what, attr->name, set, match);
+}
#define debug_push(a) debug_info("push", (a))
#define debug_pop(a) debug_info("pop", (a))
#else
#define debug_push(a) do { ; } while (0)
#define debug_pop(a) do { ; } while (0)
+#define debug_set(a,b,c,d) do { ; } while (0)
#endif
static void prepare_attr_stack(const char *path, int dirlen)
@@ -238,13 +284,13 @@ static void prepare_attr_stack(const char *path, int dirlen)
elem->prev = attr_stack;
attr_stack = elem;
- elem = read_attr_from_file(GITATTRIBUTES_FILE);
+ elem = read_attr_from_file(GITATTRIBUTES_FILE, 1);
elem->origin = strdup("");
elem->prev = attr_stack;
attr_stack = elem;
debug_push(elem);
- elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE));
+ elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE), 1);
elem->origin = NULL;
elem->prev = attr_stack;
attr_stack = elem;
@@ -286,7 +332,7 @@ static void prepare_attr_stack(const char *path, int dirlen)
memcpy(pathbuf + dirlen, "/", 2);
cp = strchr(pathbuf + len + 1, '/');
strcpy(cp + 1, GITATTRIBUTES_FILE);
- elem = read_attr_from_file(pathbuf);
+ elem = read_attr_from_file(pathbuf, 0);
*cp = '\0';
elem->origin = strdup(pathbuf);
elem->prev = attr_stack;
@@ -324,31 +370,26 @@ static int path_matches(const char *pathname, int pathlen,
return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0;
}
-/*
- * I do not like this at all. Only because we allow individual
- * attribute to be set or unset incrementally by individual
- * lines in .gitattribute files, we need to do this triple
- * loop which looks quite wasteful.
- */
-static int fill(const char *path, int pathlen,
- struct attr_stack *stk, struct git_attr_check *check,
- int num, int rem)
+static int fill(const char *path, int pathlen, struct attr_stack *stk, int rem)
{
- int i, j, k;
const char *base = stk->origin ? stk->origin : "";
+ int i, j;
+ struct git_attr_check *check = check_all_attr;
for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
struct match_attr *a = stk->attrs[i];
+ if (a->is_macro)
+ continue;
if (path_matches(path, pathlen,
- a->pattern, base, strlen(base))) {
- for (j = 0; j < a->num_attr; j++) {
+ a->u.pattern, base, strlen(base))) {
+ for (j = 0; 0 < rem && j < a->num_attr; j++) {
struct git_attr *attr = a->state[j].attr;
int set = !a->state[j].unset;
- for (k = 0; k < num; k++) {
- if (0 <= check[k].isset ||
- check[k].attr != attr)
- continue;
- check[k].isset = set;
+ int *n = &(check[attr->attr_nr].isset);
+
+ if (*n < 0) {
+ debug_set("fill", a->u.pattern, attr, set);
+ *n = set;
rem--;
}
}
@@ -357,14 +398,40 @@ static int fill(const char *path, int pathlen,
return rem;
}
+static int macroexpand(struct attr_stack *stk, int rem)
+{
+ int i, j;
+ struct git_attr_check *check = check_all_attr;
+
+ for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
+ struct match_attr *a = stk->attrs[i];
+ if (!a->is_macro)
+ continue;
+ if (check[a->u.attr->attr_nr].isset < 0)
+ continue;
+ for (j = 0; 0 < rem && j < a->num_attr; j++) {
+ struct git_attr *attr = a->state[j].attr;
+ int set = !a->state[j].unset;
+ int *n = &(check[attr->attr_nr].isset);
+
+ if (*n < 0) {
+ debug_set("expand", a->u.attr->name, attr, set);
+ *n = set;
+ rem--;
+ }
+ }
+ }
+ return rem;
+}
+
int git_checkattr(const char *path, int num, struct git_attr_check *check)
{
struct attr_stack *stk;
const char *cp;
int dirlen, pathlen, i, rem;
- for (i = 0; i < num; i++)
- check[i].isset = -1;
+ for (i = 0; i < attr_nr; i++)
+ check_all_attr[i].isset = -1;
pathlen = strlen(path);
cp = strrchr(path, '/');
@@ -373,8 +440,15 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
else
dirlen = cp - path;
prepare_attr_stack(path, dirlen);
- rem = num;
+ rem = attr_nr;
+ for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
+ rem = fill(path, pathlen, stk, rem);
+
for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
- rem = fill(path, pathlen, stk, check, num, rem);
+ rem = macroexpand(stk, rem);
+
+ for (i = 0; i < num; i++)
+ check[i].isset = check_all_attr[check[i].attr->attr_nr].isset;
+
return 0;
}
diff --git a/cache.h b/cache.h
index 63af43f..38ad006 100644
--- a/cache.h
+++ b/cache.h
@@ -153,6 +153,7 @@ enum object_type {
#define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
#define GITATTRIBUTES_FILE ".gitattributes"
#define INFOATTRIBUTES_FILE "info/attributes"
+#define ATTRIBUTE_MACRO_PREFIX "[attr]"
extern int is_bare_repository_cfg;
extern int is_bare_repository(void);
--
1.5.1.1.810.gac3a
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 0:59 ` Junio C Hamano
2007-04-15 1:01 ` [PATCH 1/2] attribute macro support Junio C Hamano
@ 2007-04-15 1:03 ` Junio C Hamano
2007-04-15 1:41 ` Linus Torvalds
1 sibling, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 1:03 UTC (permalink / raw)
To: git
This adds an obviously sane pair of default attribute rules as built-ins.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
attr.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/attr.c b/attr.c
index 3a14df1..9068c2e 100644
--- a/attr.c
+++ b/attr.c
@@ -194,6 +194,8 @@ static void free_attr_elem(struct attr_stack *e)
}
static const char *builtin_attr[] = {
+ "[attr]binary !diff !crlf",
+ "* diff crlf",
NULL,
};
--
1.5.1.1.810.gac3a
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 1:03 ` [PATCH 2/2] Define a few built-in attribute rules Junio C Hamano
@ 2007-04-15 1:41 ` Linus Torvalds
2007-04-15 1:48 ` Brian Gernhardt
2007-04-15 2:04 ` Junio C Hamano
0 siblings, 2 replies; 19+ messages in thread
From: Linus Torvalds @ 2007-04-15 1:41 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sat, 14 Apr 2007, Junio C Hamano wrote:
>
> This adds an obviously sane pair of default attribute rules as built-ins.
I'm not sure.
> + "[attr]binary !diff !crlf",
> + "* diff crlf",
Why would
* diff crlf
be "obviously sane"?
In fact, I'd call it obviously insane.
We do *not* want to default crlf to all files. We want the default to be
"automatic crlf depending on content".
Then, on top of that, you can *explicitly* specify crlf or !crlf on some
particular filename pattern bases.
(Side thought - I have to concur with whoever suggested "-" instead of
"!". It just reads better, I think)
Linus
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 1:41 ` Linus Torvalds
@ 2007-04-15 1:48 ` Brian Gernhardt
2007-04-15 2:04 ` Junio C Hamano
1 sibling, 0 replies; 19+ messages in thread
From: Brian Gernhardt @ 2007-04-15 1:48 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Junio C Hamano, git
On Apr 14, 2007, at 9:41 PM, Linus Torvalds wrote:
> (Side thought - I have to concur with whoever suggested "-" instead of
> "!". It just reads better, I think)
The "diff" vs "nodiff" argument reminds me of Vim option setting.
Marking a path "diff" means a diff is useful. Marking it "nodiff"
marks it non-useful, same with "crlf" and "nocrlf". Just an idea to
throw out there. It's fairly unambiguous.
~~ Brian G.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 1:41 ` Linus Torvalds
2007-04-15 1:48 ` Brian Gernhardt
@ 2007-04-15 2:04 ` Junio C Hamano
2007-04-15 2:34 ` Junio C Hamano
2007-04-15 4:30 ` Linus Torvalds
1 sibling, 2 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 2:04 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Sat, 14 Apr 2007, Junio C Hamano wrote:
>>
>> This adds an obviously sane pair of default attribute rules as built-ins.
>
> I'm not sure.
>
>> + "[attr]binary !diff !crlf",
>> + "* diff crlf",
>
> Why would
>
> * diff crlf
>
> be "obviously sane"?
>
> In fact, I'd call it obviously insane.
>
> We do *not* want to default crlf to all files. We want the default to be
> "automatic crlf depending on content".
You do not have to worry.
That's how "crlf" is defined. Paths you explicitly say !crlf
will _not_ go through the existing core.autocrlf mechanism.
"* crlf" just says, by default everybody is subject to core.autocrlf,
and on sane platforms, core.autocrlf is by default off, hence you will
not get LF <-> CRLF applied.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 2:04 ` Junio C Hamano
@ 2007-04-15 2:34 ` Junio C Hamano
2007-04-15 16:21 ` Johannes Schindelin
2007-04-15 4:30 ` Linus Torvalds
1 sibling, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 2:34 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Junio C Hamano <junkio@cox.net> writes:
> Linus Torvalds <torvalds@linux-foundation.org> writes:
>
>> Why would
>>
>> * diff crlf
>>
>> be "obviously sane"?
>>
>> In fact, I'd call it obviously insane.
>>
>> We do *not* want to default crlf to all files. We want the default to be
>> "automatic crlf depending on content".
>
> You do not have to worry.
>
> That's how "crlf" is defined. Paths you explicitly say !crlf
> will _not_ go through the existing core.autocrlf mechanism.
>
> "* crlf" just says, by default everybody is subject to core.autocrlf,
> and on sane platforms, core.autocrlf is by default off, hence you will
> not get LF <-> CRLF applied.
Having said that, if we really wanted to, we could introduce a
way to explicitly say "Even if the contents do not look like
text, apply line ending conversion, always", by redefining the
meaning of 'crlf' attribute.
But I do not know if that makes much sense. Being able to turn
_off_ would be a good thing because a particular file that looks
like CRLF terminated text might not be text. But the other way
around? IOW, I do not think of a case where a file that does
not even look like a text wants CRLF conversion.
---
diff --git a/convert.c b/convert.c
index 20c744a..f9e5d63 100644
--- a/convert.c
+++ b/convert.c
@@ -191,7 +191,7 @@ static void setup_crlf_check(struct git_attr_check *check)
check->attr = attr_crlf;
}
-static int git_path_is_binary(const char *path)
+static int git_path_check_crlf(const char *path)
{
struct git_attr_check attr_crlf_check;
@@ -202,20 +202,31 @@ static int git_path_is_binary(const char *path)
* disable autocrlf only when crlf attribute is explicitly
* unset.
*/
- return (!git_checkattr(path, 1, &attr_crlf_check) &&
- (0 == attr_crlf_check.isset));
+ if (!git_checkattr(path, 1, &attr_crlf_check))
+ return -1;
+ return attr_crlf_check.isset;
}
int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
{
- if (git_path_is_binary(path))
+ switch (git_path_check_crlf(path)) {
+ case 0:
return 0;
- return autocrlf_to_git(path, bufp, sizep);
+ case 1:
+ return forcecrlf_to_git(path, bufp, sizep);
+ default:
+ return autocrlf_to_git(path, bufp, sizep);
+ }
}
int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
{
- if (git_path_is_binary(path))
+ switch (git_path_check_crlf(path)) {
+ case 0:
return 0;
- return autocrlf_to_working_tree(path, bufp, sizep);
+ case 1:
+ return forcecrlf_to_working_tree(path, bufp, sizep);
+ default:
+ return autocrlf_to_working_tree(path, bufp, sizep);
+ }
}
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 2:04 ` Junio C Hamano
2007-04-15 2:34 ` Junio C Hamano
@ 2007-04-15 4:30 ` Linus Torvalds
2007-04-15 23:10 ` [PATCH] Fix 'crlf' attribute semantics Junio C Hamano
1 sibling, 1 reply; 19+ messages in thread
From: Linus Torvalds @ 2007-04-15 4:30 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sat, 14 Apr 2007, Junio C Hamano wrote:
>
> You do not have to worry.
I do.
> That's how "crlf" is defined. Paths you explicitly say !crlf
> will _not_ go through the existing core.autocrlf mechanism.
That's broken.
It should be:
- "crlf": always do crlf.
- "!crlf": never do crlf.
- no attrbute: guess.
Why? Because quite frankly, it's quite possible that some file really *is*
text, even if the content-based guessing doesn't catch it.
It boils down to a simple truth: if our content-based guessing is so
perfect that it never makes mistakes, there's no *point* to having a
'crlf' attribute in the first place!
Here's a simple example:
echo -e '\007Bell!' > bell
and just because we consider the BEL character to be binary, we'll think
the file is binary.
Could we add the BEL character? Sure. But that's not the point. The
*point* is that the whole and only reason for attributes in the first
place is to _override_ guessing.
The guesses should be good enough that hopefully nobody really will ever
need attributes. But people do strange things.
Linus
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] Add basic infrastructure to assign attributes to paths
2007-04-13 9:01 [PATCH 1/3] Add basic infrastructure to assign attributes to paths Junio C Hamano
2007-04-13 9:33 ` Andy Parkins
2007-04-13 15:04 ` [PATCH 1/3] Add basic infrastructure to assign attributes to paths Linus Torvalds
@ 2007-04-15 15:54 ` Johannes Schindelin
2 siblings, 0 replies; 19+ messages in thread
From: Johannes Schindelin @ 2007-04-15 15:54 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Hi,
On Fri, 13 Apr 2007, Junio C Hamano wrote:
> --- a/Makefile
> +++ b/Makefile
> @@ -283,7 +283,7 @@ LIB_H = \
> diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \
> run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
> tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
> - utf8.h reflog-walk.h patch-ids.h
> + utf8.h reflog-walk.h attr.h
Did you really want to remove "patch-ids.h" from the list?
Ciao,
Dscho
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 2:34 ` Junio C Hamano
@ 2007-04-15 16:21 ` Johannes Schindelin
2007-04-15 19:58 ` Junio C Hamano
0 siblings, 1 reply; 19+ messages in thread
From: Johannes Schindelin @ 2007-04-15 16:21 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Linus Torvalds, git
Hi,
On Sat, 14 Apr 2007, Junio C Hamano wrote:
> [...] if we really wanted to, we could introduce a way to explicitly say
> "Even if the contents do not look like text, apply line ending
> conversion, always", by redefining the meaning of 'crlf' attribute.
I think it might make more sense to introduce a way to say "do not even
check; I _know_ that I want crlf on these".
It might show performance improvements on large repos, for example.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/2] Define a few built-in attribute rules.
2007-04-15 16:21 ` Johannes Schindelin
@ 2007-04-15 19:58 ` Junio C Hamano
0 siblings, 0 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 19:58 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Linus Torvalds, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> On Sat, 14 Apr 2007, Junio C Hamano wrote:
>
>> [...] if we really wanted to, we could introduce a way to explicitly say
>> "Even if the contents do not look like text, apply line ending
>> conversion, always", by redefining the meaning of 'crlf' attribute.
>
> I think it might make more sense to introduce a way to say "do not even
> check; I _know_ that I want crlf on these".
You said I wanted to say in the message in clearer words. We
are in agreement.
Also the comments to the two patches in this series you sent are
valid; I reworked them before merging to 'next' and pushing them
out yesterday, but I think the Makefile thing still remains.
Will fix up.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] Fix 'crlf' attribute semantics.
2007-04-15 4:30 ` Linus Torvalds
@ 2007-04-15 23:10 ` Junio C Hamano
2007-04-15 23:12 ` [PATCH] Fix 'diff' " Junio C Hamano
2007-04-15 23:37 ` [PATCH] Fix 'crlf' " Tom Prince
0 siblings, 2 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 23:10 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Earlier we said 'crlf lets the path go through core.autocrlf
process while !crlf disables it altogether'. This fixes the
semantics to:
- Lack of 'crlf' attribute makes core.autocrlf to apply
(i.e. we guess based on the contents and if platform
expresses its desire to have CRLF line endings via
core.autocrlf, we do so).
- Setting 'crlf' attribute to true forces CRLF line endings in
working tree files, even if blob does not look like text
(e.g. contains NUL or other bytes we consider binary).
- Setting 'crlf' attribute to false disables conversion.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Linus Torvalds <torvalds@linux-foundation.org> writes:
> Here's a simple example:
>
> echo -e '\007Bell!' > bell
>
> and just because we consider the BEL character to be binary, we'll think
> the file is binary.
You are right. This replaces my earlier "we could do..." patch.
convert.c | 122 +++++++++++++++++++++++++++++++++++++++----------------------
1 files changed, 78 insertions(+), 44 deletions(-)
diff --git a/convert.c b/convert.c
index 20c744a..d0d4b81 100644
--- a/convert.c
+++ b/convert.c
@@ -74,13 +74,13 @@ static int is_binary(unsigned long size, struct text_stat *stats)
return 0;
}
-static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
+static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int guess)
{
char *buffer, *nbuf;
unsigned long size, nsize;
struct text_stat stats;
- if (!auto_crlf)
+ if (guess && !auto_crlf)
return 0;
size = *sizep;
@@ -94,19 +94,21 @@ static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
if (!stats.cr)
return 0;
- /*
- * We're currently not going to even try to convert stuff
- * that has bare CR characters. Does anybody do that crazy
- * stuff?
- */
- if (stats.cr != stats.crlf)
- return 0;
-
- /*
- * And add some heuristics for binary vs text, of course...
- */
- if (is_binary(size, &stats))
- return 0;
+ if (guess) {
+ /*
+ * We're currently not going to even try to convert stuff
+ * that has bare CR characters. Does anybody do that crazy
+ * stuff?
+ */
+ if (stats.cr != stats.crlf)
+ return 0;
+
+ /*
+ * And add some heuristics for binary vs text, of course...
+ */
+ if (is_binary(size, &stats))
+ return 0;
+ }
/*
* Ok, allocate a new buffer, fill it in, and return true
@@ -116,28 +118,42 @@ static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
nbuf = xmalloc(nsize);
*bufp = nbuf;
*sizep = nsize;
- do {
- unsigned char c = *buffer++;
- if (c != '\r')
- *nbuf++ = c;
- } while (--size);
+
+ if (guess) {
+ do {
+ unsigned char c = *buffer++;
+ if (c != '\r')
+ *nbuf++ = c;
+ } while (--size);
+ } else {
+ do {
+ unsigned char c = *buffer++;
+ if (! (c == '\r' && (1 < size && *buffer == '\n')))
+ *nbuf++ = c;
+ } while (--size);
+ }
return 1;
}
-static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+ return crlf_to_git(path, bufp, sizep, 1);
+}
+
+static int forcecrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+ return crlf_to_git(path, bufp, sizep, 0);
+}
+
+static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep, int guess)
{
char *buffer, *nbuf;
unsigned long size, nsize;
struct text_stat stats;
unsigned char last;
- /*
- * FIXME! Other pluggable conversions should go here,
- * based on filename patterns. Right now we just do the
- * stupid auto-CRLF one.
- */
- if (auto_crlf <= 0)
+ if (guess && auto_crlf <= 0)
return 0;
size = *sizep;
@@ -155,12 +171,14 @@ static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long
if (stats.lf == stats.crlf)
return 0;
- /* If we have any bare CR characters, we're not going to touch it */
- if (stats.cr != stats.crlf)
- return 0;
+ if (guess) {
+ /* If we have any bare CR characters, we're not going to touch it */
+ if (stats.cr != stats.crlf)
+ return 0;
- if (is_binary(size, &stats))
- return 0;
+ if (is_binary(size, &stats))
+ return 0;
+ }
/*
* Ok, allocate a new buffer, fill it in, and return true
@@ -182,6 +200,16 @@ static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long
return 1;
}
+static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+ return crlf_to_working_tree(path, bufp, sizep, 1);
+}
+
+static int forcecrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+ return crlf_to_working_tree(path, bufp, sizep, 0);
+}
+
static void setup_crlf_check(struct git_attr_check *check)
{
static struct git_attr *attr_crlf;
@@ -191,31 +219,37 @@ static void setup_crlf_check(struct git_attr_check *check)
check->attr = attr_crlf;
}
-static int git_path_is_binary(const char *path)
+static int git_path_check_crlf(const char *path)
{
struct git_attr_check attr_crlf_check;
setup_crlf_check(&attr_crlf_check);
- /*
- * If crlf is not mentioned, default to autocrlf;
- * disable autocrlf only when crlf attribute is explicitly
- * unset.
- */
- return (!git_checkattr(path, 1, &attr_crlf_check) &&
- (0 == attr_crlf_check.isset));
+ if (git_checkattr(path, 1, &attr_crlf_check))
+ return -1;
+ return attr_crlf_check.isset;
}
int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
{
- if (git_path_is_binary(path))
+ switch (git_path_check_crlf(path)) {
+ case 0:
return 0;
- return autocrlf_to_git(path, bufp, sizep);
+ case 1:
+ return forcecrlf_to_git(path, bufp, sizep);
+ default:
+ return autocrlf_to_git(path, bufp, sizep);
+ }
}
int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
{
- if (git_path_is_binary(path))
+ switch (git_path_check_crlf(path)) {
+ case 0:
return 0;
- return autocrlf_to_working_tree(path, bufp, sizep);
+ case 1:
+ return forcecrlf_to_working_tree(path, bufp, sizep);
+ default:
+ return autocrlf_to_working_tree(path, bufp, sizep);
+ }
}
--
1.5.1.1.815.g3e763
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH] Fix 'diff' attribute semantics.
2007-04-15 23:10 ` [PATCH] Fix 'crlf' attribute semantics Junio C Hamano
@ 2007-04-15 23:12 ` Junio C Hamano
2007-04-15 23:37 ` [PATCH] Fix 'crlf' " Tom Prince
1 sibling, 0 replies; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 23:12 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
This is in the same spirit as the previous one. Earlier 'diff'
meant 'do the built-in binary heuristics and disable patch text
generation based on it' while '!diff' meant 'do not guess, do
not generate patch text'. There was no way to say 'do generate
patch text even when the heuristics says it has NUL in it'.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
* And this is a companion patch to 'crlf' one.
diff.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/diff.c b/diff.c
index e4efb65..dcea405 100644
--- a/diff.c
+++ b/diff.c
@@ -1069,8 +1069,9 @@ static int file_is_binary(struct diff_filespec *one)
setup_diff_attr_check(&attr_diff_check);
if (!git_checkattr(one->path, 1, &attr_diff_check) &&
- (0 == attr_diff_check.isset))
- return 1;
+ (0 <= attr_diff_check.isset))
+ return !attr_diff_check.isset;
+
if (!one->data) {
if (!DIFF_FILE_VALID(one))
return 0;
--
1.5.1.1.815.g3e763
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH] Fix 'crlf' attribute semantics.
2007-04-15 23:10 ` [PATCH] Fix 'crlf' attribute semantics Junio C Hamano
2007-04-15 23:12 ` [PATCH] Fix 'diff' " Junio C Hamano
@ 2007-04-15 23:37 ` Tom Prince
2007-04-15 23:44 ` Junio C Hamano
1 sibling, 1 reply; 19+ messages in thread
From: Tom Prince @ 2007-04-15 23:37 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Linus Torvalds, git
On Sun, Apr 15, 2007 at 04:10:56PM -0700, Junio C Hamano wrote:
> Earlier we said 'crlf lets the path go through core.autocrlf
> process while !crlf disables it altogether'. This fixes the
> semantics to:
This change means there is no way to enable the automatic heuristics for a
specific pattern once it has been disable for a more generic pattern. Would it
make sense to make the attributes more than simply boolean?
Tom
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] Fix 'crlf' attribute semantics.
2007-04-15 23:37 ` [PATCH] Fix 'crlf' " Tom Prince
@ 2007-04-15 23:44 ` Junio C Hamano
2007-04-16 6:21 ` Raimund Bauer
0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2007-04-15 23:44 UTC (permalink / raw)
To: Tom Prince; +Cc: git, Linus Torvalds
Tom Prince <tom.prince@ualberta.net> writes:
<offtopic>Please do not rudely point other people with
mail-followup-to; I did not want to address this message to
Linus but wanted to talk to YOU specifically, and you stole a
few seconds of my time, forcing me to rewrite my To: line
</offtopic>
> On Sun, Apr 15, 2007 at 04:10:56PM -0700, Junio C Hamano wrote:
>> Earlier we said 'crlf lets the path go through core.autocrlf
>> process while !crlf disables it altogether'. This fixes the
>> semantics to:
>
> This change means there is no way to enable the automatic heuristics for a
> specific pattern once it has been disable for a more generic pattern. Would it
> make sense to make the attributes more than simply boolean?
I do not think that is a problem in practice. Do not set
something to "false" explicitly with a generic pattern, if you
might want to override it.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] Fix 'crlf' attribute semantics.
2007-04-15 23:44 ` Junio C Hamano
@ 2007-04-16 6:21 ` Raimund Bauer
0 siblings, 0 replies; 19+ messages in thread
From: Raimund Bauer @ 2007-04-16 6:21 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Tom Prince, git, Linus Torvalds
On Sun, 2007-04-15 at 16:44 -0700, Junio C Hamano wrote:
> > This change means there is no way to enable the automatic heuristics for a
> > specific pattern once it has been disable for a more generic pattern. Would it
> > make sense to make the attributes more than simply boolean?
>
> I do not think that is a problem in practice. Do not set
> something to "false" explicitly with a generic pattern, if you
> might want to override it.
I also don't think it's a problem, but I think it would generally be a
good idea to have values for attributes. So you can say
crlf=yes|no|auto
diff=yes|no|my-xml-diff|...
merge=3way|...
In the yes/no case we could keep the existing syntax on just add the
attribute=othervalue for those that need more than a boolean decision.
--
best regards
Ray
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2007-04-16 6:21 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-13 9:01 [PATCH 1/3] Add basic infrastructure to assign attributes to paths Junio C Hamano
2007-04-13 9:33 ` Andy Parkins
2007-04-15 0:59 ` Junio C Hamano
2007-04-15 1:01 ` [PATCH 1/2] attribute macro support Junio C Hamano
2007-04-15 1:03 ` [PATCH 2/2] Define a few built-in attribute rules Junio C Hamano
2007-04-15 1:41 ` Linus Torvalds
2007-04-15 1:48 ` Brian Gernhardt
2007-04-15 2:04 ` Junio C Hamano
2007-04-15 2:34 ` Junio C Hamano
2007-04-15 16:21 ` Johannes Schindelin
2007-04-15 19:58 ` Junio C Hamano
2007-04-15 4:30 ` Linus Torvalds
2007-04-15 23:10 ` [PATCH] Fix 'crlf' attribute semantics Junio C Hamano
2007-04-15 23:12 ` [PATCH] Fix 'diff' " Junio C Hamano
2007-04-15 23:37 ` [PATCH] Fix 'crlf' " Tom Prince
2007-04-15 23:44 ` Junio C Hamano
2007-04-16 6:21 ` Raimund Bauer
2007-04-13 15:04 ` [PATCH 1/3] Add basic infrastructure to assign attributes to paths Linus Torvalds
2007-04-15 15:54 ` Johannes Schindelin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).