[parent not found: <11771520591703-gi t-send-email-junkio@cox.net>]
* [PATCH 1/4] Simplify calling of CR/LF conversion routines
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
[not found] ` <11771520591703-gi t-send-email-junkio@cox.net>
@ 2007-04-21 10:40 ` Junio C Hamano
2007-04-21 10:40 ` [PATCH 2/4] convert.c: restructure the attribute checking part Junio C Hamano
` (5 subsequent siblings)
7 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2007-04-21 10:40 UTC (permalink / raw)
To: git
From: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
* This is rebased on 'next'
builtin-apply.c | 18 +++++--------
cache.h | 4 +-
convert.c | 71 +++++++++++++++++++++++++++----------------------------
diff.c | 4 +-
entry.c | 7 +----
sha1_file.c | 7 ++---
6 files changed, 51 insertions(+), 60 deletions(-)
diff --git a/builtin-apply.c b/builtin-apply.c
index fd92ef7..ccd342c 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1475,8 +1475,8 @@ static int read_old_data(struct stat *st, const char *path, char **buf_p, unsign
}
close(fd);
nsize = got;
- nbuf = buf;
- if (convert_to_git(path, &nbuf, &nsize)) {
+ nbuf = convert_to_git(path, buf, &nsize);
+ if (nbuf) {
free(buf);
*buf_p = nbuf;
*alloc_p = nsize;
@@ -2355,9 +2355,8 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
static int try_create_file(const char *path, unsigned int mode, const char *buf, unsigned long size)
{
- int fd, converted;
+ int fd;
char *nbuf;
- unsigned long nsize;
if (has_symlinks && S_ISLNK(mode))
/* Although buf:size is counted string, it also is NUL
@@ -2369,13 +2368,10 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
if (fd < 0)
return -1;
- nsize = size;
- nbuf = (char *) buf;
- converted = convert_to_working_tree(path, &nbuf, &nsize);
- if (converted) {
+ nbuf = convert_to_working_tree(path, buf, &size);
+ if (nbuf)
buf = nbuf;
- size = nsize;
- }
+
while (size) {
int written = xwrite(fd, buf, size);
if (written < 0)
@@ -2387,7 +2383,7 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
}
if (close(fd) < 0)
die("closing file %s: %s", path, strerror(errno));
- if (converted)
+ if (nbuf)
free(nbuf);
return 0;
}
diff --git a/cache.h b/cache.h
index 38ad006..8c804cb 100644
--- a/cache.h
+++ b/cache.h
@@ -496,8 +496,8 @@ extern void trace_printf(const char *format, ...);
extern void trace_argv_printf(const char **argv, int count, const char *format, ...);
/* convert.c */
-extern int convert_to_git(const char *path, char **bufp, unsigned long *sizep);
-extern int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep);
+extern char *convert_to_git(const char *path, const char *src, unsigned long *sizep);
+extern char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep);
/* match-trees.c */
void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
diff --git a/convert.c b/convert.c
index da64253..742b895 100644
--- a/convert.c
+++ b/convert.c
@@ -79,25 +79,24 @@ static int is_binary(unsigned long size, struct text_stat *stats)
return 0;
}
-static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int action)
+static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
{
- char *buffer, *nbuf;
+ char *buffer, *dst;
unsigned long size, nsize;
struct text_stat stats;
if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
- return 0;
+ return NULL;
size = *sizep;
if (!size)
- return 0;
- buffer = *bufp;
+ return NULL;
- gather_stats(buffer, size, &stats);
+ gather_stats(src, size, &stats);
/* No CR? Nothing to convert, regardless. */
if (!stats.cr)
- return 0;
+ return NULL;
if (action == CRLF_GUESS) {
/*
@@ -106,13 +105,13 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
* stuff?
*/
if (stats.cr != stats.crlf)
- return 0;
+ return NULL;
/*
* And add some heuristics for binary vs text, of course...
*/
if (is_binary(size, &stats))
- return 0;
+ return NULL;
}
/*
@@ -120,10 +119,10 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
* to let the caller know that we switched buffers on it.
*/
nsize = size - stats.crlf;
- nbuf = xmalloc(nsize);
- *bufp = nbuf;
+ buffer = xmalloc(nsize);
*sizep = nsize;
+ dst = buffer;
if (action == CRLF_GUESS) {
/*
* If we guessed, we already know we rejected a file with
@@ -131,54 +130,53 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
* follow it.
*/
do {
- unsigned char c = *buffer++;
+ unsigned char c = *src++;
if (c != '\r')
- *nbuf++ = c;
+ *dst++ = c;
} while (--size);
} else {
do {
- unsigned char c = *buffer++;
+ unsigned char c = *src++;
if (! (c == '\r' && (1 < size && *buffer == '\n')))
- *nbuf++ = c;
+ *dst++ = c;
} while (--size);
}
- return 1;
+ return buffer;
}
-static int crlf_to_worktree(const char *path, char **bufp, unsigned long *sizep, int action)
+static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
{
- char *buffer, *nbuf;
+ char *buffer, *dst;
unsigned long size, nsize;
struct text_stat stats;
unsigned char last;
if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
(action == CRLF_GUESS && auto_crlf <= 0))
- return 0;
+ return NULL;
size = *sizep;
if (!size)
- return 0;
- buffer = *bufp;
+ return NULL;
- gather_stats(buffer, size, &stats);
+ gather_stats(src, size, &stats);
/* No LF? Nothing to convert, regardless. */
if (!stats.lf)
- return 0;
+ return NULL;
/* Was it already in CRLF format? */
if (stats.lf == stats.crlf)
- return 0;
+ return NULL;
if (action == CRLF_GUESS) {
/* If we have any bare CR characters, we're not going to touch it */
if (stats.cr != stats.crlf)
- return 0;
+ return NULL;
if (is_binary(size, &stats))
- return 0;
+ return NULL;
}
/*
@@ -186,19 +184,20 @@ static int crlf_to_worktree(const char *path, char **bufp, unsigned long *sizep,
* to let the caller know that we switched buffers on it.
*/
nsize = size + stats.lf - stats.crlf;
- nbuf = xmalloc(nsize);
- *bufp = nbuf;
+ buffer = xmalloc(nsize);
*sizep = nsize;
last = 0;
+
+ dst = buffer;
do {
- unsigned char c = *buffer++;
+ unsigned char c = *src++;
if (c == '\n' && last != '\r')
- *nbuf++ = '\r';
- *nbuf++ = c;
+ *dst++ = '\r';
+ *dst++ = c;
last = c;
} while (--size);
- return 1;
+ return buffer;
}
static void setup_crlf_check(struct git_attr_check *check)
@@ -231,12 +230,12 @@ static int git_path_check_crlf(const char *path)
return CRLF_GUESS;
}
-int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
{
- return crlf_to_git(path, bufp, sizep, git_path_check_crlf(path));
+ return crlf_to_git(path, src, sizep, git_path_check_crlf(path));
}
-int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
{
- return crlf_to_worktree(path, bufp, sizep, git_path_check_crlf(path));
+ return crlf_to_worktree(path, src, sizep, git_path_check_crlf(path));
}
diff --git a/diff.c b/diff.c
index 5f50186..1cb1230 100644
--- a/diff.c
+++ b/diff.c
@@ -1493,9 +1493,9 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
/*
* Convert from working tree format to canonical git format
*/
- buf = s->data;
size = s->size;
- if (convert_to_git(s->path, &buf, &size)) {
+ buf = convert_to_git(s->path, s->data, &size);
+ if (buf) {
munmap(s->data, s->size);
s->should_munmap = 0;
s->data = buf;
diff --git a/entry.c b/entry.c
index d72f811..3771209 100644
--- a/entry.c
+++ b/entry.c
@@ -79,7 +79,6 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
}
switch (ntohl(ce->ce_mode) & S_IFMT) {
char *buf;
- unsigned long nsize;
case S_IFREG:
if (to_tempfile) {
@@ -96,12 +95,10 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
/*
* Convert from git internal format to working tree format
*/
- buf = new;
- nsize = size;
- if (convert_to_working_tree(ce->name, &buf, &nsize)) {
+ buf = convert_to_working_tree(ce->name, new, &size);
+ if (buf) {
free(new);
new = buf;
- size = nsize;
}
wrote = write_in_full(fd, new, size);
diff --git a/sha1_file.c b/sha1_file.c
index 4304fe9..1978d5f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2277,10 +2277,9 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
*/
if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
unsigned long nsize = size;
- char *nbuf = buf;
- if (convert_to_git(path, &nbuf, &nsize)) {
- if (size)
- munmap(buf, size);
+ char *nbuf = convert_to_git(path, buf, &nsize);
+ if (nbuf) {
+ munmap(buf, size);
size = nsize;
buf = nbuf;
re_allocated = 1;
--
1.5.1.1.948.g9f6f
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 2/4] convert.c: restructure the attribute checking part.
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
[not found] ` <11771520591703-gi t-send-email-junkio@cox.net>
2007-04-21 10:40 ` [PATCH 1/4] Simplify calling of CR/LF conversion routines Junio C Hamano
@ 2007-04-21 10:40 ` Junio C Hamano
2007-04-21 10:40 ` [PATCH 3/4] lockfile: record the primary process Junio C Hamano
` (4 subsequent siblings)
7 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2007-04-21 10:40 UTC (permalink / raw)
To: git
This separates the checkattr() call and interpretation of the
returned value specific to the 'crlf' attribute into separate
routines, so that we can run a single call to checkattr() to
check for more than one attributes, and then interprete what
the returned settings mean separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
convert.c | 48 ++++++++++++++++++++++++++++--------------------
1 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/convert.c b/convert.c
index 742b895..37239ac 100644
--- a/convert.c
+++ b/convert.c
@@ -200,7 +200,7 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long *
return buffer;
}
-static void setup_crlf_check(struct git_attr_check *check)
+static void setup_convert_check(struct git_attr_check *check)
{
static struct git_attr *attr_crlf;
@@ -209,33 +209,41 @@ static void setup_crlf_check(struct git_attr_check *check)
check->attr = attr_crlf;
}
-static int git_path_check_crlf(const char *path)
+static int git_path_check_crlf(const char *path, struct git_attr_check *check)
{
- struct git_attr_check attr_crlf_check;
-
- setup_crlf_check(&attr_crlf_check);
-
- if (!git_checkattr(path, 1, &attr_crlf_check)) {
- const char *value = attr_crlf_check.value;
- if (ATTR_TRUE(value))
- return CRLF_TEXT;
- else if (ATTR_FALSE(value))
- return CRLF_BINARY;
- else if (ATTR_UNSET(value))
- ;
- else if (!strcmp(value, "input"))
- return CRLF_INPUT;
- /* fallthru */
- }
+ const char *value = check->value;
+
+ if (ATTR_TRUE(value))
+ return CRLF_TEXT;
+ else if (ATTR_FALSE(value))
+ return CRLF_BINARY;
+ else if (ATTR_UNSET(value))
+ ;
+ else if (!strcmp(value, "input"))
+ return CRLF_INPUT;
return CRLF_GUESS;
}
char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
{
- return crlf_to_git(path, src, sizep, git_path_check_crlf(path));
+ struct git_attr_check check[1];
+ int crlf = CRLF_GUESS;
+
+ setup_convert_check(check);
+ if (!git_checkattr(path, 1, check)) {
+ crlf = git_path_check_crlf(path, check);
+ }
+ return crlf_to_git(path, src, sizep, crlf);
}
char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
{
- return crlf_to_worktree(path, src, sizep, git_path_check_crlf(path));
+ struct git_attr_check check[1];
+ int crlf = CRLF_GUESS;
+
+ setup_convert_check(check);
+ if (!git_checkattr(path, 1, check)) {
+ crlf = git_path_check_crlf(path, check);
+ }
+ return crlf_to_worktree(path, src, sizep, crlf);
}
--
1.5.1.1.948.g9f6f
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 3/4] lockfile: record the primary process.
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
` (2 preceding siblings ...)
2007-04-21 10:40 ` [PATCH 2/4] convert.c: restructure the attribute checking part Junio C Hamano
@ 2007-04-21 10:40 ` Junio C Hamano
2007-04-21 10:40 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Junio C Hamano
` (3 subsequent siblings)
7 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2007-04-21 10:40 UTC (permalink / raw)
To: git
The usual process flow is the main process opens and holds the lock to
the index, does its thing, perhaps spawning children during the course,
and then writes the resulting index out by releaseing the lock.
However, the lockfile interface uses atexit(3) to clean it up, without
regard to who actually created the lock. This typically leads to a
confusing behaviour of lock being released too early when the child
exits, and then the parent process when it calls commit_lockfile()
finds that it cannot unlock it.
This fixes the problem by recording who created and holds the lock, and
upon atexit(3) handler, child simply ignores the lockfile the parent
created.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
cache.h | 1 +
lockfile.c | 6 +++++-
2 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/cache.h b/cache.h
index 8c804cb..faddaf6 100644
--- a/cache.h
+++ b/cache.h
@@ -209,6 +209,7 @@ extern int refresh_cache(unsigned int flags);
struct lock_file {
struct lock_file *next;
+ pid_t owner;
char on_list;
char filename[PATH_MAX];
};
diff --git a/lockfile.c b/lockfile.c
index bed6b21..23db35a 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -8,8 +8,11 @@ static const char *alternate_index_output;
static void remove_lock_file(void)
{
+ pid_t me = getpid();
+
while (lock_file_list) {
- if (lock_file_list->filename[0])
+ if (lock_file_list->owner == me &&
+ lock_file_list->filename[0])
unlink(lock_file_list->filename);
lock_file_list = lock_file_list->next;
}
@@ -28,6 +31,7 @@ static int lock_file(struct lock_file *lk, const char *path)
sprintf(lk->filename, "%s.lock", path);
fd = open(lk->filename, O_RDWR | O_CREAT | O_EXCL, 0666);
if (0 <= fd) {
+ lk->owner = getpid();
if (!lk->on_list) {
lk->next = lock_file_list;
lock_file_list = lk;
--
1.5.1.1.948.g9f6f
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
` (3 preceding siblings ...)
2007-04-21 10:40 ` [PATCH 3/4] lockfile: record the primary process Junio C Hamano
@ 2007-04-21 10:40 ` Junio C Hamano
2007-04-22 0:39 ` Shawn O. Pearce
` (2 more replies)
2007-04-21 20:03 ` [PATCH 0/4] External 'filter' attributes and drivers Alex Riesen
` (2 subsequent siblings)
7 siblings, 3 replies; 22+ messages in thread
From: Junio C Hamano @ 2007-04-21 10:40 UTC (permalink / raw)
To: git
The interface is similar to the custom low-level merge drivers.
First you configure your filter driver by defining 'filter.<name>.*'
variables in the configuration.
filter.<name>.clean filter command to run upon checkin
filter.<name>.smudge filter command to run upon checkout
Then you assign filter attribute to each path, whose name
matches the custom filter driver's name.
Example:
(in .gitattributes)
*.c filter=indent
(in config)
[filter "indent"]
clean = indent
smudge = cat
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
convert.c | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++--
t/t0021-filter.sh | 35 ++++++++
2 files changed, 279 insertions(+), 9 deletions(-)
create mode 100755 t/t0021-filter.sh
diff --git a/convert.c b/convert.c
index 37239ac..62d8cee 100644
--- a/convert.c
+++ b/convert.c
@@ -1,5 +1,6 @@
#include "cache.h"
#include "attr.h"
+#include "run-command.h"
/*
* convert.c - convert a file when checking it out and checking it in.
@@ -200,18 +201,208 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long *
return buffer;
}
+static int filter_buffer(const char *path, const char *src,
+ unsigned long size, const char *cmd)
+{
+ /*
+ * Spawn cmd and feed the buffer contents through its stdin.
+ */
+ struct child_process child_process;
+ int pipe_feed[2];
+ int write_err, status;
+
+ memset(&child_process, 0, sizeof(child_process));
+
+ if (pipe(pipe_feed) < 0) {
+ error("cannot create pipe to run external filter %s", cmd);
+ return 1;
+ }
+
+ child_process.pid = fork();
+ if (child_process.pid < 0) {
+ error("cannot fork to run external filter %s", cmd);
+ close(pipe_feed[0]);
+ close(pipe_feed[1]);
+ return 1;
+ }
+ if (!child_process.pid) {
+ dup2(pipe_feed[0], 0);
+ close(pipe_feed[0]);
+ close(pipe_feed[1]);
+ execlp(cmd, cmd, NULL);
+ return 1;
+ }
+ close(pipe_feed[0]);
+
+ write_err = (write_in_full(pipe_feed[1], src, size) < 0);
+ if (close(pipe_feed[1]))
+ write_err = 1;
+ if (write_err)
+ error("cannot feed the input to external filter %s", cmd);
+
+ status = finish_command(&child_process);
+ if (status)
+ error("external filter %s failed %d", cmd, -status);
+ return (write_err || status);
+}
+
+static char *apply_filter(const char *path, const char *src,
+ unsigned long *sizep, const char *cmd)
+{
+ /*
+ * Create a pipeline to have the command filter the buffer's
+ * contents.
+ *
+ * (child --> cmd) --> us
+ */
+ const int SLOP = 4096;
+ int pipe_feed[2];
+ int status;
+ char *dst;
+ unsigned long dstsize, dstalloc;
+ struct child_process child_process;
+
+ if (!cmd)
+ return NULL;
+
+ memset(&child_process, 0, sizeof(child_process));
+
+ if (pipe(pipe_feed) < 0) {
+ error("cannot create pipe to run external filter %s", cmd);
+ return NULL;
+ }
+
+ child_process.pid = fork();
+ if (child_process.pid < 0) {
+ error("cannot fork to run external filter %s", cmd);
+ close(pipe_feed[0]);
+ close(pipe_feed[1]);
+ return NULL;
+ }
+ if (!child_process.pid) {
+ dup2(pipe_feed[1], 1);
+ close(pipe_feed[0]);
+ close(pipe_feed[1]);
+ exit(filter_buffer(path, src, *sizep, cmd));
+ }
+ close(pipe_feed[1]);
+
+ dstalloc = *sizep;
+ dst = xmalloc(dstalloc);
+ dstsize = 0;
+
+ while (1) {
+ ssize_t numread = xread(pipe_feed[0], dst + dstsize,
+ dstalloc - dstsize);
+
+ if (numread <= 0) {
+ if (!numread)
+ break;
+ error("read from external filter %s failed", cmd);
+ free(dst);
+ dst = NULL;
+ break;
+ }
+ dstsize += numread;
+ if (dstalloc <= dstsize + SLOP) {
+ dstalloc = dstsize + SLOP;
+ dst = xrealloc(dst, dstalloc);
+ }
+ }
+
+ status = finish_command(&child_process);
+ if (status) {
+ error("external filter %s failed %d", cmd, -status);
+ free(dst);
+ dst = NULL;
+ }
+
+ if (dst)
+ *sizep = dstsize;
+ return dst;
+}
+
+static struct convert_driver {
+ const char *name;
+ struct convert_driver *next;
+ char *smudge;
+ char *clean;
+} *user_convert, **user_convert_tail;
+
+static int read_convert_config(const char *var, const char *value)
+{
+ const char *ep, *name;
+ int namelen;
+ struct convert_driver *drv;
+
+ /*
+ * External conversion drivers are configured using
+ * "filter.<name>.variable".
+ */
+ if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
+ return 0;
+ name = var + 7;
+ namelen = ep - name;
+ for (drv = user_convert; drv; drv = drv->next)
+ if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
+ break;
+ if (!drv) {
+ char *namebuf;
+ drv = xcalloc(1, sizeof(struct convert_driver));
+ namebuf = xmalloc(namelen + 1);
+ memcpy(namebuf, name, namelen);
+ namebuf[namelen] = 0;
+ drv->name = namebuf;
+ drv->next = NULL;
+ *user_convert_tail = drv;
+ user_convert_tail = &(drv->next);
+ }
+
+ ep++;
+
+ /*
+ * filter.<name>.smudge and filter.<name>.clean specifies
+ * the command line:
+ *
+ * command-line
+ *
+ * The command-line will not be interpolated in any way.
+ */
+
+ if (!strcmp("smudge", ep)) {
+ if (!value)
+ return error("%s: lacks value", var);
+ drv->smudge = strdup(value);
+ return 0;
+ }
+
+ if (!strcmp("clean", ep)) {
+ if (!value)
+ return error("%s: lacks value", var);
+ drv->clean = strdup(value);
+ return 0;
+ }
+ return 0;
+}
+
static void setup_convert_check(struct git_attr_check *check)
{
static struct git_attr *attr_crlf;
+ static struct git_attr *attr_filter;
- if (!attr_crlf)
+ if (!attr_crlf) {
attr_crlf = git_attr("crlf", 4);
- check->attr = attr_crlf;
+ attr_filter = git_attr("filter", 6);
+ user_convert_tail = &user_convert;
+ git_config(read_convert_config);
+ }
+ check[0].attr = attr_crlf;
+ check[1].attr = attr_filter;
}
static int git_path_check_crlf(const char *path, struct git_attr_check *check)
{
- const char *value = check->value;
+ const char *value = check[0].value;
if (ATTR_TRUE(value))
return CRLF_TEXT;
@@ -224,26 +415,70 @@ static int git_path_check_crlf(const char *path, struct git_attr_check *check)
return CRLF_GUESS;
}
+static struct convert_driver *git_path_check_convert(const char *path,
+ struct git_attr_check *check)
+{
+ const char *value = check[1].value;
+ struct convert_driver *drv;
+
+ if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
+ return NULL;
+ for (drv = user_convert; drv; drv = drv->next)
+ if (!strcmp(value, drv->name))
+ return drv;
+ return NULL;
+}
+
char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
{
- struct git_attr_check check[1];
+ struct git_attr_check check[2];
int crlf = CRLF_GUESS;
+ char *filter = NULL;
+ char *buf, *buf2;
setup_convert_check(check);
- if (!git_checkattr(path, 1, check)) {
+ if (!git_checkattr(path, 2, check)) {
+ struct convert_driver *drv;
crlf = git_path_check_crlf(path, check);
+ drv = git_path_check_convert(path, check);
+ if (drv && drv->clean)
+ filter = drv->clean;
}
- return crlf_to_git(path, src, sizep, crlf);
+
+ buf = apply_filter(path, src, sizep, filter);
+
+ buf2 = crlf_to_git(path, buf ? buf : src, sizep, crlf);
+ if (buf2) {
+ free(buf);
+ buf = buf2;
+ }
+
+ return buf;
}
char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
{
- struct git_attr_check check[1];
+ struct git_attr_check check[2];
int crlf = CRLF_GUESS;
+ char *filter = NULL;
+ char *buf, *buf2;
setup_convert_check(check);
- if (!git_checkattr(path, 1, check)) {
+ if (!git_checkattr(path, 2, check)) {
+ struct convert_driver *drv;
crlf = git_path_check_crlf(path, check);
+ drv = git_path_check_convert(path, check);
+ if (drv && drv->smudge)
+ filter = drv->smudge;
}
- return crlf_to_worktree(path, src, sizep, crlf);
+
+ buf = crlf_to_worktree(path, src, sizep, crlf);
+
+ buf2 = apply_filter(path, buf ? buf : src, sizep, filter);
+ if (buf2) {
+ free(buf);
+ buf = buf2;
+ }
+
+ return buf;
}
diff --git a/t/t0021-filter.sh b/t/t0021-filter.sh
new file mode 100755
index 0000000..0f4cd05
--- /dev/null
+++ b/t/t0021-filter.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+test_description='external filter conversion'
+
+. ./test-lib.sh
+
+cat <<\EOF >rot13.sh
+tr '[a-zA-Z]' '[n-za-mN-ZA-M]'
+EOF
+chmod +x rot13.sh
+
+test_expect_success setup '
+ git config filter.rot13.smudge ./rot13.sh &&
+ git config filter.rot13.clean ./rot13.sh &&
+
+ echo "*.t filter=rot13" >.gitattributes &&
+
+ {
+ echo a b c d e f g h i j k l m
+ echo n o p q r s t u v w x y z
+ } >test &&
+ cat test >test.t &&
+ cat test >test.o &&
+ git add test test.t &&
+ git checkout -- test test.t
+'
+
+test_expect_success check '
+
+ cmp test.o test &&
+ cmp test.o test.t
+
+'
+
+test_done
--
1.5.1.1.948.g9f6f
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-21 10:40 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Junio C Hamano
@ 2007-04-22 0:39 ` Shawn O. Pearce
2007-04-22 2:15 ` Junio C Hamano
2007-04-22 1:33 ` David Lang
2007-04-22 5:47 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Linus Torvalds
2 siblings, 1 reply; 22+ messages in thread
From: Shawn O. Pearce @ 2007-04-22 0:39 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <junkio@cox.net> wrote:
> The interface is similar to the custom low-level merge drivers.
...
> +static int filter_buffer(const char *path, const char *src,
> + unsigned long size, const char *cmd)
> +{
> + /*
> + * Spawn cmd and feed the buffer contents through its stdin.
> + */
...
ick. What about something like this on top? I moved the extra child
process for the input pipe down into the start_command routine,
where we can do something a little smarter on some systems, like
using a thread rather than a full process. Its also a shorter
patch and uses more of the run-command API.
Its not documented very well, but if you set child_process.{in,out}
to <0 we open the pipe for you, and close your side of the pipe
for you. That really simplifies the logic in the callers.
I did consider rewriting this as a select() based loop to feed the
input and read the output, but that might not port well onto a more
native Win32 based set of APIs.
---
diff --git a/convert.c b/convert.c
index 62d8cee..2ba7ea3 100644
--- a/convert.c
+++ b/convert.c
@@ -201,99 +201,37 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long *
return buffer;
}
-static int filter_buffer(const char *path, const char *src,
- unsigned long size, const char *cmd)
-{
- /*
- * Spawn cmd and feed the buffer contents through its stdin.
- */
- struct child_process child_process;
- int pipe_feed[2];
- int write_err, status;
-
- memset(&child_process, 0, sizeof(child_process));
-
- if (pipe(pipe_feed) < 0) {
- error("cannot create pipe to run external filter %s", cmd);
- return 1;
- }
-
- child_process.pid = fork();
- if (child_process.pid < 0) {
- error("cannot fork to run external filter %s", cmd);
- close(pipe_feed[0]);
- close(pipe_feed[1]);
- return 1;
- }
- if (!child_process.pid) {
- dup2(pipe_feed[0], 0);
- close(pipe_feed[0]);
- close(pipe_feed[1]);
- execlp(cmd, cmd, NULL);
- return 1;
- }
- close(pipe_feed[0]);
-
- write_err = (write_in_full(pipe_feed[1], src, size) < 0);
- if (close(pipe_feed[1]))
- write_err = 1;
- if (write_err)
- error("cannot feed the input to external filter %s", cmd);
-
- status = finish_command(&child_process);
- if (status)
- error("external filter %s failed %d", cmd, -status);
- return (write_err || status);
-}
-
static char *apply_filter(const char *path, const char *src,
unsigned long *sizep, const char *cmd)
{
- /*
- * Create a pipeline to have the command filter the buffer's
- * contents.
- *
- * (child --> cmd) --> us
- */
const int SLOP = 4096;
- int pipe_feed[2];
+ struct child_process filter_process;
+ const char *filter_argv[] = {cmd, NULL};
int status;
char *dst;
unsigned long dstsize, dstalloc;
- struct child_process child_process;
if (!cmd)
return NULL;
- memset(&child_process, 0, sizeof(child_process));
-
- if (pipe(pipe_feed) < 0) {
- error("cannot create pipe to run external filter %s", cmd);
- return NULL;
- }
-
- child_process.pid = fork();
- if (child_process.pid < 0) {
- error("cannot fork to run external filter %s", cmd);
- close(pipe_feed[0]);
- close(pipe_feed[1]);
+ memset(&filter_process, 0, sizeof(filter_process));
+ filter_process.in_bufptr = src;
+ filter_process.in_buflen = *sizep;
+ filter_process.out = -1;
+ filter_process.argv = filter_argv;
+ status = start_command(&filter_process);
+ if (status) {
+ error("external filter %s failed %d", cmd, -status);
return NULL;
}
- if (!child_process.pid) {
- dup2(pipe_feed[1], 1);
- close(pipe_feed[0]);
- close(pipe_feed[1]);
- exit(filter_buffer(path, src, *sizep, cmd));
- }
- close(pipe_feed[1]);
dstalloc = *sizep;
dst = xmalloc(dstalloc);
dstsize = 0;
while (1) {
- ssize_t numread = xread(pipe_feed[0], dst + dstsize,
- dstalloc - dstsize);
+ ssize_t numread = xread(filter_process.out,
+ dst + dstsize, dstalloc - dstsize);
if (numread <= 0) {
if (!numread)
@@ -310,7 +248,7 @@ static char *apply_filter(const char *path, const char *src,
}
}
- status = finish_command(&child_process);
+ status = finish_command(&filter_process);
if (status) {
error("external filter %s failed %d", cmd, -status);
free(dst);
diff --git a/run-command.c b/run-command.c
index eff523e..72887f8 100644
--- a/run-command.c
+++ b/run-command.c
@@ -20,12 +20,29 @@ int start_command(struct child_process *cmd)
int need_in, need_out;
int fdin[2], fdout[2];
- need_in = !cmd->no_stdin && cmd->in < 0;
+ need_in = !cmd->no_stdin && (cmd->in_bufptr || cmd->in < 0);
if (need_in) {
if (pipe(fdin) < 0)
return -ERR_RUN_COMMAND_PIPE;
- cmd->in = fdin[1];
- cmd->close_in = 1;
+ if (cmd->in_bufptr) {
+ pid_t in_feeder = fork();
+ if (in_feeder < 0) {
+ close_pair(fdin);
+ return -ERR_RUN_COMMAND_PIPE;
+ }
+ if (!in_feeder) {
+ close(fdin[0]);
+ if (write_in_full(fdin[1],
+ cmd->in_bufptr,
+ cmd->in_buflen) != cmd->in_buflen)
+ die("'%s' did not read all input", cmd->argv[0]);
+ exit(0);
+ }
+ close(fdin[1]);
+ } else {
+ cmd->in = fdin[1];
+ cmd->close_in = 1;
+ }
}
need_out = !cmd->no_stdout
diff --git a/run-command.h b/run-command.h
index 3680ef9..7632843 100644
--- a/run-command.h
+++ b/run-command.h
@@ -13,6 +13,8 @@ enum {
struct child_process {
const char **argv;
+ const char *in_bufptr;
+ size_t in_buflen;
pid_t pid;
int in;
int out;
--
1.5.1.1.135.gf948
--
Shawn.
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 0:39 ` Shawn O. Pearce
@ 2007-04-22 2:15 ` Junio C Hamano
2007-04-22 3:00 ` Shawn O. Pearce
0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2007-04-22 2:15 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: git
"Shawn O. Pearce" <spearce@spearce.org> writes:
> ick. What about something like this on top? I moved the extra child
> process for the input pipe down into the start_command routine,
> where we can do something a little smarter on some systems, like
> using a thread rather than a full process. Its also a shorter
> patch and uses more of the run-command API.
Well, I did not like start_command() that wanted to always
perform the full exec of something else for its inflexibility,
and this piles a specific hack on top of it... Why not a
callback with void * pointer?
Or are you trying to make this interface as inflexible and
feature-limited as possible, perhaps to make it easier to
porting to Windows?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 2:15 ` Junio C Hamano
@ 2007-04-22 3:00 ` Shawn O. Pearce
0 siblings, 0 replies; 22+ messages in thread
From: Shawn O. Pearce @ 2007-04-22 3:00 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <junkio@cox.net> wrote:
> Well, I did not like start_command() that wanted to always
> perform the full exec of something else for its inflexibility,
> and this piles a specific hack on top of it... Why not a
> callback with void * pointer?
Well, that's because its always used to execute some external
program. And some operating system designers once upon a time
thought that was the only way anyone would ever need to start a
new parallel thread of execution. ;-)
But why do you want a callback here in start_command() given
that all you are doing is running a filter command anyway?
Is that so you could start a "thread" to handle the stdin
pipe?
--
Shawn.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-21 10:40 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Junio C Hamano
2007-04-22 0:39 ` Shawn O. Pearce
@ 2007-04-22 1:33 ` David Lang
2007-04-22 6:33 ` Junio C Hamano
2007-04-22 5:47 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Linus Torvalds
2 siblings, 1 reply; 22+ messages in thread
From: David Lang @ 2007-04-22 1:33 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sat, 21 Apr 2007, Junio C Hamano wrote:
> The interface is similar to the custom low-level merge drivers.
>
> First you configure your filter driver by defining 'filter.<name>.*'
> variables in the configuration.
>
> filter.<name>.clean filter command to run upon checkin
> filter.<name>.smudge filter command to run upon checkout
>
> Then you assign filter attribute to each path, whose name
> matches the custom filter driver's name.
>
> Example:
>
> (in .gitattributes)
> *.c filter=indent
>
> (in config)
> [filter "indent"]
> clean = indent
> smudge = cat
hmm, three things come to mind here
1. it would be useful in many cases for the filter program to know what file
it's working on (and probably some other things), so there are probably some
command-line arguments that should be able to be passed to the filter.
2. should this be done as a modification of the in-memory buffer (s this patch
does it?) or should it be done at the time of the read/write, makeing the filter
be responsible for actually doing the disk I/O, which would give it the benifit
of being able to do things like set permissions and other things that can't be
done until the file is actually on the filesystem (for something managing config
files, this could include restarting the daemon related to the config file for
example)
3. why specify seperate clean/smudge programs instead of just one script with a
read/write parameter? I suspect that in most cases the external filter program
that cleans files will be the same one that smudges them. the clean/smudge
version does let you specify vastly different things without requireing a
wrapper script around them, but it would mean duplicating the line when they are
the same.
the first two items seem fairly important to me, but the third is a niceity that
I could live with as-is.
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 1:33 ` David Lang
@ 2007-04-22 6:33 ` Junio C Hamano
2007-04-22 9:09 ` David Lang
0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2007-04-22 6:33 UTC (permalink / raw)
To: David Lang; +Cc: git
David Lang <david.lang@digitalinsight.com> writes:
> 1. it would be useful in many cases for the filter program to know
> what file it's working on (and probably some other things), so there
> are probably some command-line arguments that should be able to be
> passed to the filter.
I can see that you missed the class when Linus talked about how
messy things would get once you allow the conversion to be
stateful. I was in the class and remembered it ;-)
Although I initially considered interpolating "%P" with
pathname, I ended up deciding against it, to discourage people
from abusing the filter for stateful conversion that changes the
results depending on time, pathname, commit, branch and stuff.
> 2. should this be done as a modification of the in-memory buffer (s
> this patch does it?) or should it be done at the time of the
> read/write, makeing the filter be responsible for actually doing the
> disk I/O, which would give it the benifit of being able to do things
> like set permissions and other things ...
The conversion is not about overriding the mode bits recorded in
tree objects, nor making git as a replacement for build procedure.
> 3. why specify seperate clean/smudge programs instead of just one
> script with a read/write parameter?
I think the most common two ways have clean as a cleaner and
smudge as a no-op (similar to crlf=input conversion), or clean
and smudge are inverse operations (similar to crlf=true
conversion. I do not see a sane case where clean and smudge are
the same, unless you are thinking about the toy demonstration
test piece I added to t0021 which uses rot13 as both clean and
smudge filters.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 6:33 ` Junio C Hamano
@ 2007-04-22 9:09 ` David Lang
2007-04-22 9:20 ` David Lang
` (2 more replies)
0 siblings, 3 replies; 22+ messages in thread
From: David Lang @ 2007-04-22 9:09 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sat, 21 Apr 2007, Junio C Hamano wrote:
> David Lang <david.lang@digitalinsight.com> writes:
>
>> 1. it would be useful in many cases for the filter program to know
>> what file it's working on (and probably some other things), so there
>> are probably some command-line arguments that should be able to be
>> passed to the filter.
>
> I can see that you missed the class when Linus talked about how
> messy things would get once you allow the conversion to be
> stateful. I was in the class and remembered it ;-)
>
> Although I initially considered interpolating "%P" with
> pathname, I ended up deciding against it, to discourage people
> from abusing the filter for stateful conversion that changes the
> results depending on time, pathname, commit, branch and stuff.
I didn't miss it, I just don't think that the path in the repository is
nessasarily as dangerous as the other things (time, branch, etc)
one thing that was listed as a possibilty was to use the sha1 of the file, but
you would force the filter to calculate that itself. it's already available when
extracting and recalcuating it is a waste
>> 2. should this be done as a modification of the in-memory buffer (s
>> this patch does it?) or should it be done at the time of the
>> read/write, makeing the filter be responsible for actually doing the
>> disk I/O, which would give it the benifit of being able to do things
>> like set permissions and other things ...
>
> The conversion is not about overriding the mode bits recorded in
> tree objects, nor making git as a replacement for build procedure.
what build procedures?
I'm talking about doing things like managing files in /etc
git doesn't have all the hooks to be able to set the permissions when you
extract a file, but if the filters were actual readers/writers instead of
in-memory operators, this becomes trivial to implement with no further changes
to git itself
>> 3. why specify seperate clean/smudge programs instead of just one
>> script with a read/write parameter?
>
> I think the most common two ways have clean as a cleaner and
> smudge as a no-op (similar to crlf=input conversion), or clean
> and smudge are inverse operations (similar to crlf=true
> conversion. I do not see a sane case where clean and smudge are
> the same, unless you are thinking about the toy demonstration
> test piece I added to t0021 which uses rot13 as both clean and
> smudge filters.
actually, I'm thinking of much more complicated filters, where it's easier to
have one program do both functions then it is to have two seperate programs
(like tar -c /tar -x)
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 9:09 ` David Lang
@ 2007-04-22 9:20 ` David Lang
2007-04-22 17:42 ` Junio C Hamano
2007-04-22 18:11 ` Nicolas Pitre
2 siblings, 0 replies; 22+ messages in thread
From: David Lang @ 2007-04-22 9:20 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sun, 22 Apr 2007, David Lang wrote:
> On Sat, 21 Apr 2007, Junio C Hamano wrote:
>
>> David Lang <david.lang@digitalinsight.com> writes:
>>
>>> 1. it would be useful in many cases for the filter program to know
>>> what file it's working on (and probably some other things), so there
>>> are probably some command-line arguments that should be able to be
>>> passed to the filter.
>>
>> I can see that you missed the class when Linus talked about how
>> messy things would get once you allow the conversion to be
>> stateful. I was in the class and remembered it ;-)
>>
>> Although I initially considered interpolating "%P" with
>> pathname, I ended up deciding against it, to discourage people
>> from abusing the filter for stateful conversion that changes the
>> results depending on time, pathname, commit, branch and stuff.
>
> I didn't miss it, I just don't think that the path in the repository is
> nessasarily as dangerous as the other things (time, branch, etc)
to clarify a bit more. I have a perl program that I can point at the
'interesting' files on my systems and have it create a 'generic' version of that
file. I can then take that generic version of the file to any machine in the
cluster and with the same program create a version of that generic file that's
correct for the other system. however to know which substatutions are
appropriate to do for the file, it needs to know the filename (well, I guess I
could create a whole bunch of seperate config files, and then define all the
files with different filters, each filter including the config file to use fo
rthat specific file, but this seems like a really ugly way to do it)
> one thing that was listed as a possibilty was to use the sha1 of the file,
> but you would force the filter to calculate that itself. it's already
> available when extracting and recalcuating it is a waste
ignore this comment, I see you posted an example of this.
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 9:09 ` David Lang
2007-04-22 9:20 ` David Lang
@ 2007-04-22 17:42 ` Junio C Hamano
2007-04-22 21:05 ` David Lang
2007-04-22 18:11 ` Nicolas Pitre
2 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2007-04-22 17:42 UTC (permalink / raw)
To: David Lang; +Cc: git
David Lang <david.lang@digitalinsight.com> writes:
>> The conversion is not about overriding the mode bits recorded in
>> tree objects, nor making git as a replacement for build procedure.
>
> what build procedures?
>
> I'm talking about doing things like managing files in /etc
That's exactly what "build procedure" is about. Nobody sane
should be managing /etc files *directly* under any SCM. A saner
practice is to have a separate source area with Makefile to
regenerate /etc files, so that (1) "make" manages the target
host specific customizations, (2) "make diff" can be used to
compare and sanity check the result of "make" against what is
currently installed under /etc, and (3) "make install" manages
the permission bits. The same applies to dotfiles under $HOME/.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 17:42 ` Junio C Hamano
@ 2007-04-22 21:05 ` David Lang
0 siblings, 0 replies; 22+ messages in thread
From: David Lang @ 2007-04-22 21:05 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sun, 22 Apr 2007, Junio C Hamano wrote:
> David Lang <david.lang@digitalinsight.com> writes:
>
>>> The conversion is not about overriding the mode bits recorded in
>>> tree objects, nor making git as a replacement for build procedure.
>>
>> what build procedures?
>>
>> I'm talking about doing things like managing files in /etc
>
> That's exactly what "build procedure" is about. Nobody sane
> should be managing /etc files *directly* under any SCM. A saner
> practice is to have a separate source area with Makefile to
> regenerate /etc files, so that (1) "make" manages the target
> host specific customizations, (2) "make diff" can be used to
> compare and sanity check the result of "make" against what is
> currently installed under /etc, and (3) "make install" manages
> the permission bits. The same applies to dotfiles under $HOME/.
this I disagree with this, simply becouse it discourages people from useing a
SCM for these things due to all the other work that needs to be done
doing it this way you need to
create a new work area
create a build system
find all the tools that change the files and change them to work on the files in
the work area (and note that some of these tools are distro provided that you
will have to fork, replace, or live without)
change all sysadmin work habits to use the new work area
not to mention, pick up the pieces when someone/something makes changes to the
files directly.
by comparison, working in /etc directly all you need to do is to train the
sysadmins to do a commit after they make changes, and how to revert to the
prior version if things break (and then train the senior sysadmins to do the
checkouts of specific versions, resolve merges, and other 'advanced' things to
pickup the pices when things get messed up)
in fact, you can even get a lot of utility out of a daily cron job to do a
commit and teaching a few people about git while the remainder don't even know
that it's running.
everything above is just dealing with the simple case of managing the config
files on a single system
now, I agree that there are advantages to not working directly in /etc and
instead working in a seperate area with a build system to actually modify the
files, but getting to that point is a lot of work, and getting everyone
(including management) convinced that it's worth that much up-front effort for
the long-term benifits is hard. and if the sysadmins are under water currently,
it won't happen becouse they can't let other things fall apart while they go and
build this new system.
I think this is a case of perfect being the enemy of better.
the simple case of handling files in /etc/directly is so non-intrusive that
distros could ship it with a nightly cron job enabled by default (ideally the
cron job would detect that no changes were made and not do a commit) and once
you have a SCM in place managing the files by default it's much more likely that
sysadmin tools will start to be changed to work with it directly.
This should not be a huge change for git to suppor this either (although I admit
it is a change)
with filters enabled at all you need to be able to disable any optimizations
that use the working tree instead of opeing up an object/pack
so the remaining changes are
1. define a dependancy that allows for filter config files to be extracted
before the filter is run on checkout
2. allow the filter to do git-add of a file while it is running on git-add (or
otherwise tell git that a file needs to be added before a commit is done). I
don't know if this would work today, or if there are locks that would cause a
deadlock (I suspect a deadlock from comments about git-lib needing to be made
re-entrent)
3. move the filter from a pipe/pipe model to a pipe/file model (it's fair to
require the filters to support - as a filename for stdin/stdout git needs this,
it's fairly standard for unix utilities to do this)
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 9:09 ` David Lang
2007-04-22 9:20 ` David Lang
2007-04-22 17:42 ` Junio C Hamano
@ 2007-04-22 18:11 ` Nicolas Pitre
2007-04-22 20:27 ` [PATCH 4/4] Add 'filter' attribute and external filter driverdefinition David Lang
2 siblings, 1 reply; 22+ messages in thread
From: Nicolas Pitre @ 2007-04-22 18:11 UTC (permalink / raw)
To: David Lang; +Cc: Junio C Hamano, git
On Sun, 22 Apr 2007, David Lang wrote:
> On Sat, 21 Apr 2007, Junio C Hamano wrote:
>
> > > 3. why specify seperate clean/smudge programs instead of just one
> > > script with a read/write parameter?
> >
> > I think the most common two ways have clean as a cleaner and
> > smudge as a no-op (similar to crlf=input conversion), or clean
> > and smudge are inverse operations (similar to crlf=true
> > conversion. I do not see a sane case where clean and smudge are
> > the same, unless you are thinking about the toy demonstration
> > test piece I added to t0021 which uses rot13 as both clean and
> > smudge filters.
>
> actually, I'm thinking of much more complicated filters, where it's easier to
> have one program do both functions then it is to have two seperate programs
> (like tar -c /tar -x)
Just specify the same program in both entries with the appropriate
parameter and be happy.
It is much easier to have two entries with the same program than having
only one entry when you actually have two separate programs.
Nicolas
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driverdefinition.
2007-04-22 18:11 ` Nicolas Pitre
@ 2007-04-22 20:27 ` David Lang
0 siblings, 0 replies; 22+ messages in thread
From: David Lang @ 2007-04-22 20:27 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Junio C Hamano, git
On Sun, 22 Apr 2007, Nicolas Pitre wrote:
> On Sun, 22 Apr 2007, David Lang wrote:
>
>> On Sat, 21 Apr 2007, Junio C Hamano wrote:
>>
>>>> 3. why specify seperate clean/smudge programs instead of just one
>>>> script with a read/write parameter?
>>>
>>> I think the most common two ways have clean as a cleaner and
>>> smudge as a no-op (similar to crlf=input conversion), or clean
>>> and smudge are inverse operations (similar to crlf=true
>>> conversion. I do not see a sane case where clean and smudge are
>>> the same, unless you are thinking about the toy demonstration
>>> test piece I added to t0021 which uses rot13 as both clean and
>>> smudge filters.
>>
>> actually, I'm thinking of much more complicated filters, where it's easier to
>> have one program do both functions then it is to have two seperate programs
>> (like tar -c /tar -x)
>
> Just specify the same program in both entries with the appropriate
> parameter and be happy.
>
> It is much easier to have two entries with the same program than having
> only one entry when you actually have two separate programs.
agreed, this is why I said that this was a fairly minor thing.
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-21 10:40 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Junio C Hamano
2007-04-22 0:39 ` Shawn O. Pearce
2007-04-22 1:33 ` David Lang
@ 2007-04-22 5:47 ` Linus Torvalds
2007-04-22 6:12 ` Junio C Hamano
2 siblings, 1 reply; 22+ messages in thread
From: Linus Torvalds @ 2007-04-22 5:47 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sat, 21 Apr 2007, Junio C Hamano wrote:
>
> The interface is similar to the custom low-level merge drivers.
>
> First you configure your filter driver by defining 'filter.<name>.*'
> variables in the configuration.
>
> filter.<name>.clean filter command to run upon checkin
> filter.<name>.smudge filter command to run upon checkout
I have to say, I'm obviously not a huge fan of playing games, but the
diffs are very clean.
Are they actually *useful?* I dunno. I'm a bit nervous about what this
means for any actual user of the feature, but I have to admit to being
charmed by a clean implementation.
I suspect that this gets some complaining off our back, but I *also*
suspect that people will actually end up really screwing themselves with
something like this and then blaming us and causing a huge pain down the
line when we've supported this and people want "extended semantics" that
are no longer clean.
But I'm not sure how valid an argument that really is. I do happen to
believe in the "give them rope" philosophy. I think you can probably screw
yourself royally with this, but hey, anybody who does that only has
himself to blame ...
Linus
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
2007-04-22 5:47 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Linus Torvalds
@ 2007-04-22 6:12 ` Junio C Hamano
0 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2007-04-22 6:12 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Linus Torvalds <torvalds@linux-foundation.org> writes:
> I suspect that this gets some complaining off our back, but I *also*
> suspect that people will actually end up really screwing themselves with
> something like this and then blaming us and causing a huge pain down the
> line when we've supported this and people want "extended semantics" that
> are no longer clean.
>
> But I'm not sure how valid an argument that really is. I do happen to
> believe in the "give them rope" philosophy. I think you can probably screw
> yourself royally with this, but hey, anybody who does that only has
> himself to blame ...
That's exactly my argument when I had an exchange with Nico on
this thread. I haven't decided if I want to make them (I sent
out a replacement patch for the tip one, and added another one)
merged in 'next' yet for that exact reason.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 0/4] External 'filter' attributes and drivers
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
` (4 preceding siblings ...)
2007-04-21 10:40 ` [PATCH 4/4] Add 'filter' attribute and external filter driver definition Junio C Hamano
@ 2007-04-21 20:03 ` Alex Riesen
2007-04-22 1:19 ` David Lang
2007-04-22 5:20 ` Shawn O. Pearce
7 siblings, 0 replies; 22+ messages in thread
From: Alex Riesen @ 2007-04-21 20:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano, Sat, Apr 21, 2007 12:40:55 +0200:
>
> [1/4] is Alex's earlier patch, rebased on top of 'next'.
>
Oh, thanks. I was almost about to resend it.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 0/4] External 'filter' attributes and drivers
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
` (5 preceding siblings ...)
2007-04-21 20:03 ` [PATCH 0/4] External 'filter' attributes and drivers Alex Riesen
@ 2007-04-22 1:19 ` David Lang
2007-04-22 5:20 ` Shawn O. Pearce
7 siblings, 0 replies; 22+ messages in thread
From: David Lang @ 2007-04-22 1:19 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sat, 21 Apr 2007, Junio C Hamano wrote:
> I know this is controversial, but here is a small four patch
> series to let you insert arbitrary external filter in checkin
> and checkout codepath.
>
> [PATCH 1/4] Simplify calling of CR/LF conversion routines
> [PATCH 2/4] convert.c: restructure the attribute checking part.
> [PATCH 3/4] lockfile: record the primary process.
> [PATCH 4/4] Add 'filter' attribute and external filter driver definition.
>
>
> [1/4] is Alex's earlier patch, rebased on top of 'next'.
>
> [3/4] is necessary for the series because otherwise 'git add'
> would not work with any external filter, but the change is
> applicable to 'master'.
>
> [4/4] is the body of the change. I wanted to like run-command.h
> process spawning infrastructure, but I suspect I did not use it
> optimally. People who were more involved in its evolution
> hopefully have suggestions for better use of it.
thanks for doing this.
from other discussions on the subject I was under the impression that in some
cases git commands would look at the file in the working directory instead of
extracing it from the object blob if the index and timestamp indicated that the
file had not been changed.
with an external filter in use this is not a safe thing to do and that needs to
be disabled if gitattributes specifies an external filter (and possibly if
gitattributes specifies crlf)
this brings up a potential performance problem. if you have to look for
.gitattributes in all parent directories for each file manipulation tha may want
to use the working tree optimization, you can end up with a lot fo time wasted
in just looking up non-existant files (or in finding files and parseing them).
Apache has this problem with their .htaccess files wher eyou can get a pretty
substantial performance boost by disabling them. it may be worth it to be able
to not use .gitattribute files, but only use the repository-wide gitattributes
file.
Finally, a how-to question
in my case I have two uses for this
1. a permission preserving filter.
This records the permissions on checking, sets them on checkout
2. host specific file munging for config files
I have a XML file that specifies what munging should be done for each host and
a perl script that does this munging
in both of these cases I have a config for the filter (the list of permissions
or the xml file defining the munging) that should be checked into the repository
as part of the commit
the problems I see are
1. the config file needs to be checked out before any files that go through the
filter so that the filter has it available to use
2. when checking in files there is a potential of having the config file change
between one addition to the index and another. Part of this is in the "well,
don't do that then catagory" of things, but for things like the permission
filter this is harder to do
if there is one master permission file then each file checkin could potentially
add/modify a line in it. as long as lines related to other files aren't changed
you could re-add it to the index, overriding the prior version.
if you do one permission file per file you are tracking the permissions of (say
.permissions.$file) then you don't have to worry about different versions of the
file, but you still need to have the filter (which is running as part of a
checking) do a checkin of another file. This approach would also make it harder
to know which files need to be checked out before the filter can run.
any thoughts on how to deal with these issues?
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 0/4] External 'filter' attributes and drivers
2007-04-21 10:40 [PATCH 0/4] External 'filter' attributes and drivers Junio C Hamano
` (6 preceding siblings ...)
2007-04-22 1:19 ` David Lang
@ 2007-04-22 5:20 ` Shawn O. Pearce
2007-04-22 9:01 ` David Lang
7 siblings, 1 reply; 22+ messages in thread
From: Shawn O. Pearce @ 2007-04-22 5:20 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <junkio@cox.net> wrote:
> I know this is controversial, but here is a small four patch
> series to let you insert arbitrary external filter in checkin
> and checkout codepath.
This series is some pretty nice work.
But I really don't think we want filters. Actually, I'm very
against them, and I'm actually very against the CRLF work that
has already been added. Since the CRLF ship has sailed I won't
try to call it back to port. But I don't want to see the filter
stuff raise anchor...
I've only really seen a few arguments for the filters:
1) Better compress structured content (e.g. ODF) by storing the
ZIP as a tree, allowing normal deltification within packfiles
to apply to the contained files.
2) Use a custom diff function on special files (e.g. ODF) as they
are otherwise unreadable with the internal xdiff based engine.
3) Mutate content prior to extracting from the tree, e.g. printing.
Let me try to address these points.
#1: There are a limited number of content formats that we could
reasonably filter into the repository such that the standard
deltification routines will have good space/performance benefits.
Most of them today are ZIP archives (e.g. ODF, JAR).
Why don't we just teach the packfile format how to better compress
these types of streams? Let read_sha1_file() and pack-objects do all
of the heavy translation work, just as they do today for text files.
Explode them into a "tree-like" thing that allows deltification
against any other content (even cross ZIP streams) just like we
do with trees, but always expose them to the working directory
level of the system as blobs.
This way we never get into the mess that David Lang pointed out
where we have many optimizations that reuse working tree files when
stat data matches; nor do we have to worry about major structural
differences between the working tree (1 file) and the repository
format (exploded ZIP as 10,000 files).
#2: We already support using any diff tool you want: set the
GIT_EXTERNAL_DIFF environment variable before running a program that
generates a diff. As Junio pointed out on #git tonight, that could
be any shell script that decides how to produce the diff based on
its own logic. Though we could also use the new attribute stuff
to select diff programs, much like we do now for merge conflict
resolution in merge-recursive.
#3: This has already been discussed at length on the list.
Letting the build system perform this sort of work is better than
making the VCS do it; especially when you want the VCS to do its
sole job well (track the state of the working directory) and the
build system do its sole job well (produce files suitable for use
outside of the repository).
So despite the fact that I tried to make 4/4 shorter, I really
don't think we should be doing this...
--
Shawn.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 0/4] External 'filter' attributes and drivers
2007-04-22 5:20 ` Shawn O. Pearce
@ 2007-04-22 9:01 ` David Lang
0 siblings, 0 replies; 22+ messages in thread
From: David Lang @ 2007-04-22 9:01 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: Junio C Hamano, git
On Sun, 22 Apr 2007, Shawn O. Pearce wrote:
> Junio C Hamano <junkio@cox.net> wrote:
>> I know this is controversial, but here is a small four patch
>> series to let you insert arbitrary external filter in checkin
>> and checkout codepath.
>
> This series is some pretty nice work.
>
> But I really don't think we want filters. Actually, I'm very
> against them, and I'm actually very against the CRLF work that
> has already been added. Since the CRLF ship has sailed I won't
> try to call it back to port. But I don't want to see the filter
> stuff raise anchor...
>
> I've only really seen a few arguments for the filters:
>
> 1) Better compress structured content (e.g. ODF) by storing the
> ZIP as a tree, allowing normal deltification within packfiles
> to apply to the contained files.
>
> 2) Use a custom diff function on special files (e.g. ODF) as they
> are otherwise unreadable with the internal xdiff based engine.
>
> 3) Mutate content prior to extracting from the tree, e.g. printing.
>
>
> Let me try to address these points.
>
> #1: There are a limited number of content formats that we could
> reasonably filter into the repository such that the standard
> deltification routines will have good space/performance benefits.
> Most of them today are ZIP archives (e.g. ODF, JAR).
I think that there are a lot more that could benifit from application specific
knowledge. git today just deals with line-based formats. anything that has more
structure to it could benifit from a diff/merge/delta/etc specific to it
> Why don't we just teach the packfile format how to better compress
> these types of streams? Let read_sha1_file() and pack-objects do all
> of the heavy translation work, just as they do today for text files.
> Explode them into a "tree-like" thing that allows deltification
> against any other content (even cross ZIP streams) just like we
> do with trees, but always expose them to the working directory
> level of the system as blobs.
>
> This way we never get into the mess that David Lang pointed out
> where we have many optimizations that reuse working tree files when
> stat data matches; nor do we have to worry about major structural
> differences between the working tree (1 file) and the repository
> format (exploded ZIP as 10,000 files).
>
nobody is suggesting that the working tree format be one file and the repository
format be 1000 files. but if you think that skipping borrowing from a working
tree is expensive, consider how expsndive it is to uncompress and recompress
files.
>
> #2: We already support using any diff tool you want: set the
> GIT_EXTERNAL_DIFF environment variable before running a program that
> generates a diff. As Junio pointed out on #git tonight, that could
> be any shell script that decides how to produce the diff based on
> its own logic. Though we could also use the new attribute stuff
> to select diff programs, much like we do now for merge conflict
> resolution in merge-recursive.
but not diff tools per file, only per repository. not all files in a repository
should be handled the same way.
>
> #3: This has already been discussed at length on the list.
> Letting the build system perform this sort of work is better than
> making the VCS do it; especially when you want the VCS to do its
> sole job well (track the state of the working directory) and the
> build system do its sole job well (produce files suitable for use
> outside of the repository).
sometimes there isn't a build system to shove this work off to.
I'll add a few more
managing permissions on files
managing system config files (where you want many systems to share what's
logicly the same config, but each system needs it's own specifics)
David Lang
^ permalink raw reply [flat|nested] 22+ messages in thread