Git development

Git development
 help / color / mirror / Atom feed

* [PATCH 1/2] Allow cloning to an existing empty directory
From: Alexander Potashev @ 2009-01-08 23:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Alexander Potashev
In-Reply-To: <1231457063-29186-1-git-send-email-aspotashev@gmail.com>

The die() message changed accordingly.

The previous behaviour was to only allow cloning when the destination
directory doesn't exist.

A new inline function is_pseudo_dir_name is used to check if the
directory name is either "." or "..". It returns a non-zero value if
the given string is "." or "..". It's applicable to a lot of other Git
source code.

Signed-off-by: Alexander Potashev <aspotashev@gmail.com>
---
 builtin-clone.c |    8 +++++---
 dir.c           |   19 +++++++++++++++++++
 dir.h           |    8 ++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index f1a1a0c..e732f15 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -357,6 +357,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 	struct stat buf;
 	const char *repo_name, *repo, *work_tree, *git_dir;
 	char *path, *dir;
+	int dest_exists;
 	const struct ref *refs, *head_points_at, *remote_head, *mapped_refs;
 	struct strbuf key = STRBUF_INIT, value = STRBUF_INIT;
 	struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT;
@@ -406,8 +407,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		dir = guess_dir_name(repo_name, is_bundle, option_bare);
 	strip_trailing_slashes(dir);
 
-	if (!stat(dir, &buf))
-		die("destination directory '%s' already exists.", dir);
+	if ((dest_exists = !stat(dir, &buf)) && !is_empty_dir(dir))
+		die("destination path '%s' already exists and is not "
+			"an empty directory.", dir);
 
 	strbuf_addf(&reflog_msg, "clone: from %s", repo);
 
@@ -431,7 +433,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		if (safe_create_leading_directories_const(work_tree) < 0)
 			die("could not create leading directories of '%s': %s",
 					work_tree, strerror(errno));
-		if (mkdir(work_tree, 0755))
+		if (!dest_exists && mkdir(work_tree, 0755))
 			die("could not create work tree dir '%s': %s.",
 					work_tree, strerror(errno));
 		set_git_work_tree(work_tree);
diff --git a/dir.c b/dir.c
index 0131983..bd97e50 100644
--- a/dir.c
+++ b/dir.c
@@ -779,6 +779,25 @@ int is_inside_dir(const char *dir)
 	return get_relative_cwd(buffer, sizeof(buffer), dir) != NULL;
 }
 
+int is_empty_dir(const char *path)
+{
+	DIR *dir = opendir(path);
+	struct dirent *e;
+	int ret = 1;
+
+	if (!dir)
+		return 0;
+
+	while ((e = readdir(dir)) != NULL)
+		if (!is_pseudo_dir_name(e->d_name)) {
+			ret = 0;
+			break;
+		}
+
+	closedir(dir);
+	return ret;
+}
+
 int remove_dir_recursively(struct strbuf *path, int only_empty)
 {
 	DIR *dir = opendir(path->buf);
diff --git a/dir.h b/dir.h
index 768425a..940e057 100644
--- a/dir.h
+++ b/dir.h
@@ -77,6 +77,14 @@ extern int file_exists(const char *);
 extern char *get_relative_cwd(char *buffer, int size, const char *dir);
 extern int is_inside_dir(const char *dir);
 
+static inline int is_pseudo_dir_name(const char *name)
+{
+	return name[0] == '.' && (name[1] == '\0' ||
+		(name[1] == '.' && name[2] == '\0')); /* "." and ".." */
+}
+
+extern int is_empty_dir(const char *dir);
+
 extern void setup_standard_excludes(struct dir_struct *dir);
 extern int remove_dir_recursively(struct strbuf *path, int only_empty);
 
-- 
1.6.1.77.g84c9

^ permalink raw reply related

* [PATCH 2/2] Use is_pseudo_dir_name everywhere
From: Alexander Potashev @ 2009-01-08 23:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Alexander Potashev
In-Reply-To: <1231457063-29186-2-git-send-email-aspotashev@gmail.com>

Signed-off-by: Alexander Potashev <aspotashev@gmail.com>
---
 builtin-count-objects.c |    5 ++---
 builtin-fsck.c          |   14 ++++----------
 builtin-prune.c         |   14 ++++----------
 builtin-rerere.c        |   11 +++++------
 dir.c                   |   12 ++++--------
 entry.c                 |    5 ++---
 remote.c                |    6 ++----
 transport.c             |    4 +---
 8 files changed, 24 insertions(+), 47 deletions(-)

diff --git a/builtin-count-objects.c b/builtin-count-objects.c
index ab35b65..492a173 100644
--- a/builtin-count-objects.c
+++ b/builtin-count-objects.c
@@ -5,6 +5,7 @@
  */
 
 #include "cache.h"
+#include "dir.h"
 #include "builtin.h"
 #include "parse-options.h"
 
@@ -21,9 +22,7 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
 		const char *cp;
 		int bad = 0;
 
-		if ((ent->d_name[0] == '.') &&
-		    (ent->d_name[1] == 0 ||
-		     ((ent->d_name[1] == '.') && (ent->d_name[2] == 0))))
+		if (is_pseudo_dir_name(ent->d_name))
 			continue;
 		for (cp = ent->d_name; *cp; cp++) {
 			int ch = *cp;
diff --git a/builtin-fsck.c b/builtin-fsck.c
index 297b2c4..291ca8e 100644
--- a/builtin-fsck.c
+++ b/builtin-fsck.c
@@ -10,6 +10,7 @@
 #include "tree-walk.h"
 #include "fsck.h"
 #include "parse-options.h"
+#include "dir.h"
 
 #define REACHABLE 0x0001
 #define SEEN      0x0002
@@ -395,19 +396,12 @@ static void fsck_dir(int i, char *path)
 	while ((de = readdir(dir)) != NULL) {
 		char name[100];
 		unsigned char sha1[20];
-		int len = strlen(de->d_name);
 
-		switch (len) {
-		case 2:
-			if (de->d_name[1] != '.')
-				break;
-		case 1:
-			if (de->d_name[0] != '.')
-				break;
+		if (is_pseudo_dir_name(de->d_name))
 			continue;
-		case 38:
+		if (strlen(de->d_name) == 38) {
 			sprintf(name, "%02x", i);
-			memcpy(name+2, de->d_name, len+1);
+			memcpy(name+2, de->d_name, 39);
 			if (get_sha1_hex(name, sha1) < 0)
 				break;
 			add_sha1_list(sha1, DIRENT_SORT_HINT(de));
diff --git a/builtin-prune.c b/builtin-prune.c
index 7b4ec80..06b61ea 100644
--- a/builtin-prune.c
+++ b/builtin-prune.c
@@ -5,6 +5,7 @@
 #include "builtin.h"
 #include "reachable.h"
 #include "parse-options.h"
+#include "dir.h"
 
 static const char * const prune_usage[] = {
 	"git prune [-n] [-v] [--expire <time>] [--] [<head>...]",
@@ -61,19 +62,12 @@ static int prune_dir(int i, char *path)
 	while ((de = readdir(dir)) != NULL) {
 		char name[100];
 		unsigned char sha1[20];
-		int len = strlen(de->d_name);
 
-		switch (len) {
-		case 2:
-			if (de->d_name[1] != '.')
-				break;
-		case 1:
-			if (de->d_name[0] != '.')
-				break;
+		if (is_pseudo_dir_name(de->d_name))
 			continue;
-		case 38:
+		if (strlen(de->d_name) == 38) {
 			sprintf(name, "%02x", i);
-			memcpy(name+2, de->d_name, len+1);
+			memcpy(name+2, de->d_name, 39);
 			if (get_sha1_hex(name, sha1) < 0)
 				break;
 
diff --git a/builtin-rerere.c b/builtin-rerere.c
index d4dec6b..1ac5225 100644
--- a/builtin-rerere.c
+++ b/builtin-rerere.c
@@ -1,5 +1,6 @@
 #include "builtin.h"
 #include "cache.h"
+#include "dir.h"
 #include "string-list.h"
 #include "rerere.h"
 #include "xdiff/xdiff.h"
@@ -59,17 +60,15 @@ static void garbage_collect(struct string_list *rr)
 	git_config(git_rerere_gc_config, NULL);
 	dir = opendir(git_path("rr-cache"));
 	while ((e = readdir(dir))) {
-		const char *name = e->d_name;
-		if (name[0] == '.' &&
-		    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
+		if (is_pseudo_dir_name (e->d_name))
 			continue;
-		then = rerere_created_at(name);
+		then = rerere_created_at(e->d_name);
 		if (!then)
 			continue;
-		cutoff = (has_resolution(name)
+		cutoff = (has_resolution(e->d_name)
 			  ? cutoff_resolve : cutoff_noresolve);
 		if (then < now - cutoff * 86400)
-			string_list_append(name, &to_remove);
+			string_list_append(e->d_name, &to_remove);
 	}
 	for (i = 0; i < to_remove.nr; i++)
 		unlink_rr_item(to_remove.items[i].string);
diff --git a/dir.c b/dir.c
index bd97e50..cdd3beb 100644
--- a/dir.c
+++ b/dir.c
@@ -585,10 +585,8 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
 			int len, dtype;
 			int exclude;
 
-			if ((de->d_name[0] == '.') &&
-			    (de->d_name[1] == 0 ||
-			     !strcmp(de->d_name + 1, ".") ||
-			     !strcmp(de->d_name + 1, "git")))
+			if (is_pseudo_dir_name(de->d_name) ||
+			     !strcmp(de->d_name, ".git"))
 				continue;
 			len = strlen(de->d_name);
 			/* Ignore overly long pathnames! */
@@ -812,10 +810,8 @@ int remove_dir_recursively(struct strbuf *path, int only_empty)
 	len = path->len;
 	while ((e = readdir(dir)) != NULL) {
 		struct stat st;
-		if ((e->d_name[0] == '.') &&
-		    ((e->d_name[1] == 0) ||
-		     ((e->d_name[1] == '.') && e->d_name[2] == 0)))
-			continue; /* "." and ".." */
+		if (is_pseudo_dir_name(e->d_name))
+			continue;
 
 		strbuf_setlen(path, len);
 		strbuf_addstr(path, e->d_name);
diff --git a/entry.c b/entry.c
index aa2ee46..9c6a9cf 100644
--- a/entry.c
+++ b/entry.c
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "blob.h"
+#include "dir.h"
 
 static void create_directories(const char *path, const struct checkout *state)
 {
@@ -62,9 +63,7 @@ static void remove_subtree(const char *path)
 	*name++ = '/';
 	while ((de = readdir(dir)) != NULL) {
 		struct stat st;
-		if ((de->d_name[0] == '.') &&
-		    ((de->d_name[1] == 0) ||
-		     ((de->d_name[1] == '.') && de->d_name[2] == 0)))
+		if (is_pseudo_dir_name(de->d_name))
 			continue;
 		strcpy(name, de->d_name);
 		if (lstat(pathbuf, &st))
diff --git a/remote.c b/remote.c
index 570e112..2fb5143 100644
--- a/remote.c
+++ b/remote.c
@@ -4,6 +4,7 @@
 #include "commit.h"
 #include "diff.h"
 #include "revision.h"
+#include "dir.h"
 
 static struct refspec s_tag_refspec = {
 	0,
@@ -634,10 +635,7 @@ static struct refspec *parse_push_refspec(int nr_refspec, const char **refspec)
 
 static int valid_remote_nick(const char *name)
 {
-	if (!name[0] || /* not empty */
-	    (name[0] == '.' && /* not "." */
-	     (!name[1] || /* not ".." */
-	      (name[1] == '.' && !name[2]))))
+	if (!name[0] || is_pseudo_dir_name(name))
 		return 0;
 	return !strchr(name, '/'); /* no slash */
 }
diff --git a/transport.c b/transport.c
index 56831c5..d4e3c25 100644
--- a/transport.c
+++ b/transport.c
@@ -50,9 +50,7 @@ static int read_loose_refs(struct strbuf *path, int name_offset,
 	memset (&list, 0, sizeof(list));
 
 	while ((de = readdir(dir))) {
-		if (de->d_name[0] == '.' && (de->d_name[1] == '\0' ||
-				(de->d_name[1] == '.' &&
-				 de->d_name[2] == '\0')))
+		if (is_pseudo_dir_name(de->d_name))
 			continue;
 		ALLOC_GROW(list.entries, list.nr + 1, list.alloc);
 		list.entries[list.nr++] = xstrdup(de->d_name);
-- 
1.6.1.77.g84c9

^ permalink raw reply related

* Re: gitweb index performance (Re: [PATCH] gitweb: support the rel=vcs-* microformat)
From: J.H. @ 2009-01-08 23:53 UTC (permalink / raw)
  To: Joey Hess; +Cc: git
In-Reply-To: <20090108195446.GB18025@gnu.kitenet.net>

Joey Hess wrote:
> Giuseppe Bilotta wrote:
>   
>>> There is a small overhead in including the microformat on project list
>>> and forks list pages, but getting the project descriptions for those pages
>>> already incurs a similar overhead, and the ability to get every repo url
>>> in one place seems worthwhile.
>>>       
>> I agree with this, although people with very large project lists may
>> differ ... do we have timings on these?
>>     
>
> AFAICS, when displaying the project list, gitweb reads each project's
> description file, falling back to reading its config file if there is no
> description file.
>
> If performance was a problem here, the thing to do would be to add
> project descriptions to the $project_list file, and use those in
> preference to the description files. If a large site has done that,
> they've not sent in the patch. :-)
>   

No because all the large sites have pain points and issues elsewhere in 
the app.  Most of the large sites (which I can at least speak for 
Kernel.org) went and have built in full caching layers into gitweb 
itself to deal with the problem.  This means that we don't have to worry 
about nickle and dime performance improvements that are specific to one 
section, but can do a very broad sweep and get dramatically better 
performance across all of gitweb.  Those patches have all made it back 
out onto the mailing list, but for a number of different reasons none 
have been accepted into the mainline branch.

> With my patch, it will read each cloneurl file too. The best way to
> optimise that for large sites seems to be to add an option that would
> ignore the cloneurl files and config file and always use
> @git_base_url_list.
>
> I checked the only large site I have access to (git.debian.org) and they
> use a $project_list file, but I see no other performance tuning. That's
> a 2 ghz machine; it takes gitweb 28 (!) seconds to generate the nearly 1
> MB index web page for 1671 repositories:
>   

Look at either Lea's or my caching engines, it will help dramatically on 
something of that size.

> /srv/git.debian.org/http/cgi-bin/gitweb.cgi  3.04s user 9.24s system 43% cpu 28.515 total
>
> Notice that most of the time is spent by child processes. For each
> repository, gitweb runs git-for-each-ref to determine the time of the
> last commit.
>
> If that is removed (say if there were a way to get the info w/o
> forking), performance improves nicely:
>
> ./gitweb.cgi > /dev/null  1.29s user 1.08s system 69% cpu 3.389 total
>
> Making it not read description files for each project, as I suggest above,
> is the next best optimisation:
>
> ./gitweb.cgi > /dev/null  1.08s user 0.05s system 96% cpu 1.170 total
>
> So, I think it makes sense to optimise gitweb and offer knobs for performance
> tuning at the expense of the flexability of description and cloneurl files.
> But, git-for-each-ref is swamping everything else
The problem is the knobs are going to be very fine grained, you really 
are better off looking at one of the caching engines that's available 
now.  Performance options are hard, because it's difficult to relay to 
anyone the complex tradeoffs, thus keeping knobs like that to a minimum 
are really a necessity.

- John 'Warthog9' Hawley

^ permalink raw reply

* Re: [RFC/PATCH 2/3] replace_object: add mechanism to replace objects found in "refs/replace/"
From: Junio C Hamano @ 2009-01-08 23:55 UTC (permalink / raw)
  To: Christian Couder; +Cc: git
In-Reply-To: <200901081831.22616.chriscool@tuxfamily.org>

Christian Couder <chriscool@tuxfamily.org> writes:

> Yeah, but read_sha1_file is called to read all object files, not just 
> commits. So putting the hook there will:
>
> 	1) add a lookup overhead when reading any object,
> 	2) make it possible to replace any object,

I actually see (2) as an improvement, and (1) as an associated cost.

> And there is also the following problem:
>
> 	3) this function is often called like this:
>
> 	buffer = read_sha1_file(sha1, &type, &size);
> 	if (!buffer)
> 		die("Cannot read %s", sha1_to_hex(sha1));
>
> 	so in case of error, it will give an error message with a bad sha1
> 	in it because the sha1 of the file that we cannot read is the sha1
> 	in the replace ref not the one passed to read_sha1_file.

You have refs/replace/$A that records $B, to tell git that the real object
$A in the history is replaced by another object $B.  The caller feeds $A
in the above snippet to read_sha1_file(), and your read_sha1_file()
notices that it needs to read $B instead, returns the buffer from the
object $B, and reports its type and size.  If there is no $B available, it
may return NULL and the caller says "I asked for $A but in this repository
I cannot get to it".  That sounds consistent to me, but I agree it would
be more helpful to report "and the reason why I cannot get to it is
because you have replacement defined as $B which you do not have."

> To avoid the above problems, maybe we can try to also improve what 
> read_sha1_file does:
>
> 1) allow callers to pass a type in the "type" argument and only lookup in 
> the replace refs if we say we want a commit, but this makes calling this 
> function more error prone

This is debatable, but can go either way.

> 2) when we say we want an object with a given type, check if the object we 
> read has this type (and die if not)

That we already do anyway, don't we?  parse_commit() gets data from
read_sha1_file() and would complain if it gets a blob, etc.

> 3) die in read_sha1_file when there is an error and we are replacing so that 
> callers don't need to die themself and so that we can always report an 
> accurate sha1 in the error message

I expect the use of graft and object replacement (or if you insist,
"commit replacement") rather rare, and I think it is probably Ok to
declare it a grave repository misconfiguration if somebody claims that $A
is replaced by $B without actually having $B:

	void *read_sha1_file(sha1, type, size)
        {
		void *data;
        	unsigned char *replacement = lookup_replace_object(sha1);
                if (replacement) {
                	data = read_sha1_file(replacement, type, size);
                        if (!data)
                        	die("replacement %s not found for %s",
                                    get_sha1_hex(replacement),
                                    get_sha1_hex(sha1));
		} else {
			data = read_object(sha1, type, size);
		}
                ... existing code ...
                return data;                
        }

To disable replacement for connectivity walkers, lookup_replace_object()
can look at a some global defined in environment.c, perhaps.

^ permalink raw reply

* [RFC PATCH] make diff --color-words customizable
From: Thomas Rast @ 2009-01-09  0:05 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin

Allows for user-configurable word splits when using --color-words.
This can make the diff more readable if the regex is configured
according to the language of the file.

For now the (POSIX extended) regex must be set via the environment
GIT_DIFF_WORDS_REGEX.  Each (non-overlapping) match of the regex is
considered a word.  Anything characters not matched are considered
whitespace.  For example, for C try

  GIT_DIFF_WORDS_REGEX='[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*|(\+|-|&|\|){1,2}|\S'

and for TeX try

  GIT_DIFF_WORDS_REGEX='\\[a-zA-Z@]+ *|\{|\}|\\.|[^\{} [:space:]]+'

Signed-off-by: Thomas Rast <trast@student.ethz.ch>

---

Word diff becomes much more useful especially with TeX, where it is
common to run together \sequences\of\commands\like\this that the
current --color-words treats as a single word.

Apart from possible bugs, the main issue is: where should I put the
configuration for this?


 diff.c |  142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 127 insertions(+), 15 deletions(-)

diff --git a/diff.c b/diff.c
index d235482..c1e24de 100644
--- a/diff.c
+++ b/diff.c
@@ -321,6 +321,7 @@ struct diff_words_buffer {
 	long alloc;
 	long current; /* output pointer */
 	int suppressed_newline;
+	enum diff_word_boundaries *boundaries;
 };
 
 static void diff_words_append(char *line, unsigned long len,
@@ -336,21 +337,35 @@ static void diff_words_append(char *line, unsigned long len,
 	buffer->text.size += len;
 }
 
+enum diff_word_boundaries {
+	DIFF_WORD_CONT,
+	DIFF_WORD_START,
+	DIFF_WORD_SPACE
+};
+
+
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
 	FILE *file;
+	enum diff_word_boundaries *minus_boundaries, *plus_boundaries;
 };
 
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
+static int print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
 		int suppress_newline)
 {
 	const char *ptr;
 	int eol = 0;
 
 	if (len == 0)
-		return;
+		return len;
 
 	ptr  = buffer->text.ptr + buffer->current;
+
+	if (buffer->boundaries[buffer->current+len-1] == DIFF_WORD_START) {
+		buffer->boundaries[buffer->current+len-1] = DIFF_WORD_CONT;
+		len--;
+	}
+
 	buffer->current += len;
 
 	if (ptr[len - 1] == '\n') {
@@ -368,6 +383,8 @@ static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, in
 		else
 			putc('\n', file);
 	}
+
+	return len;
 }
 
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
@@ -391,13 +408,79 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 				   &diff_words->plus, len, DIFF_FILE_NEW, 0);
 			break;
 		case ' ':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_PLAIN, 0);
+			len = print_word(diff_words->file,
+					 &diff_words->plus, len, DIFF_PLAIN, 0);
 			diff_words->minus.current += len;
 			break;
 	}
 }
 
+static char *worddiff_default = "\\S+";
+static regex_t worddiff_regex;
+static int worddiff_regex_compiled = 0;
+
+static int scan_word_boundaries(struct diff_words_buffer *buf)
+{
+	enum diff_word_boundaries *boundaries = buf->boundaries;
+	char *text = buf->text.ptr;
+	int len = buf->text.size;
+
+	int i = 0;
+	int count = 0;
+	int ret;
+	regmatch_t matches[1];
+	int offset, wordlen;
+	char *strz;
+
+	if (!text)
+		return 0;
+
+	if (!worddiff_regex_compiled) {
+		char *wd_pat = getenv("GIT_DIFF_WORDS_REGEX");
+		if (!wd_pat)
+			wd_pat = worddiff_default;
+		ret = regcomp(&worddiff_regex, wd_pat, REG_EXTENDED);
+		if (ret) {
+			char errbuf[1024];
+			regerror(ret, &worddiff_regex, errbuf, 1024);
+			die("word diff regex failed to compile: '%s': %s",
+			    wd_pat, errbuf);
+		}
+		worddiff_regex_compiled = 1;
+	}
+
+	strz = xmalloc(len+1);
+	memcpy(strz, text, len);
+	strz[len] = '\0';
+
+	while (i < len) {
+		ret = regexec(&worddiff_regex, strz+i, 1, matches, 0);
+		if (ret == REG_NOMATCH) {
+			/* the rest is whitespace */
+			while (i < len)
+				boundaries[i++] = DIFF_WORD_SPACE;
+			break;
+		}
+
+		offset = matches[0].rm_so;
+		while (offset-- > 0 && i < len)
+			boundaries[i++] = DIFF_WORD_SPACE;
+
+		wordlen = matches[0].rm_eo - matches[0].rm_so;
+		if (wordlen-- > 0 && i < len) {
+			boundaries[i++] = DIFF_WORD_START;
+			count++;
+		}
+		while (wordlen-- > 0 && i < len)
+			boundaries[i++] = DIFF_WORD_CONT;
+	}
+
+	free(strz);
+
+	return count;
+}
+
+
 /* this executes the word diff on the accumulated buffers */
 static void diff_words_show(struct diff_words_data *diff_words)
 {
@@ -406,23 +489,50 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	xdemitcb_t ecb;
 	mmfile_t minus, plus;
 	int i;
+	char *p;
+	int bcount;
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	minus.size = diff_words->minus.text.size;
-	minus.ptr = xmalloc(minus.size);
-	memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
-	for (i = 0; i < minus.size; i++)
-		if (isspace(minus.ptr[i]))
-			minus.ptr[i] = '\n';
+
+	diff_words->minus.boundaries = xmalloc(diff_words->minus.text.size * sizeof(enum diff_word_boundaries));
+	bcount = scan_word_boundaries(&diff_words->minus);
+	minus.size = diff_words->minus.text.size + bcount;
+	minus.ptr = xmalloc(minus.size + bcount);
+	p = minus.ptr;
+	for (i = 0; i < diff_words->minus.text.size; i++) {
+		switch (diff_words->minus.boundaries[i]) {
+		case DIFF_WORD_START:
+			*p++ = '\n';
+			/* fall through */
+		case DIFF_WORD_CONT:
+			*p++ = diff_words->minus.text.ptr[i];
+			break;
+		case DIFF_WORD_SPACE:
+			*p++ = '\n';
+			break;
+		}
+	}
 	diff_words->minus.current = 0;
 
-	plus.size = diff_words->plus.text.size;
+	diff_words->plus.boundaries = xmalloc(diff_words->plus.text.size * sizeof(enum diff_word_boundaries));
+	bcount = scan_word_boundaries(&diff_words->plus);
+	plus.size = diff_words->plus.text.size + bcount;
 	plus.ptr = xmalloc(plus.size);
-	memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
-	for (i = 0; i < plus.size; i++)
-		if (isspace(plus.ptr[i]))
-			plus.ptr[i] = '\n';
+	p = plus.ptr;
+	for (i = 0; i < diff_words->plus.text.size; i++) {
+		switch (diff_words->plus.boundaries[i]) {
+		case DIFF_WORD_START:
+			*p++ = '\n';
+			/* fall through */
+		case DIFF_WORD_CONT:
+			*p++ = diff_words->plus.text.ptr[i];
+			break;
+		case DIFF_WORD_SPACE:
+			*p++ = '\n';
+			break;
+		}
+	}
 	diff_words->plus.current = 0;
 
 	xpp.flags = XDF_NEED_MINIMAL;
@@ -432,6 +542,8 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	free(minus.ptr);
 	free(plus.ptr);
 	diff_words->minus.text.size = diff_words->plus.text.size = 0;
+	free(diff_words->minus.boundaries);
+	free(diff_words->plus.boundaries);
 
 	if (diff_words->minus.suppressed_newline) {
 		putc('\n', diff_words->file);
-- 
tg: (c123b7c..) t/word-diff-regex (depends on: origin/master)

^ permalink raw reply related

* git-cache-meta -- simple file meta data caching and applying
From: jidanni @ 2009-01-09  0:13 UTC (permalink / raw)
  To: git

Gentlemen, I have whipped up this:

#!/bin/sh -e
#git-cache-meta -- simple file meta data caching and applying.
#Simpler than etckeeper, metastore, setgitperms, etc.
: ${GIT_CACHE_META_FILE=.git_cache_meta}
case $@ in
    --store|--stdout)
	case $1 in --store) exec > $GIT_CACHE_META_FILE; esac
	find $(git ls-files) \
	    \( -user ${USER?} -o -printf 'chowm %u %p\n' \) \
	    \( -group $USER -o -printf 'chgrp %g %p\n' \) \
	    \( \( -type l -o -perm 755 -o -perm 644 \) -o -printf 'chmod %#m %p\n' \);;
    --apply) sh -e $GIT_CACHE_META_FILE;;
    *) 1>&2 echo "Usage: $0 --store|--stdout|--apply"; exit 1;;
esac

^ permalink raw reply

* Re: gitweb index performance (Re: [PATCH] gitweb: support the rel=vcs-* microformat)
From: Miklos Vajna @ 2009-01-09  0:16 UTC (permalink / raw)
  To: J.H., git; +Cc: Joey Hess
In-Reply-To: <496691EC.1070805@eaglescrag.net>

[-- Attachment #1: Type: text/plain, Size: 433 bytes --]

On Thu, Jan 08, 2009 at 03:53:16PM -0800, "J.H." <warthog19@eaglescrag.net> wrote:
> Look at either Lea's or my caching engines, it will help dramatically on 
> something of that size.

repo.or.cz uses a single patch for caching the project list only:

http://repo.or.cz/w/git/repo.git?a=commit;h=152fb0b22d36c6981ac3c4403b69ad91b27a1bc6

you are probably better off with such a small patch instead of using a
gitweb fork.

[-- Attachment #2: Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply

* Re: gitweb index performance (Re: [PATCH] gitweb: support the rel=vcs-* microformat)
From: Johannes Schindelin @ 2009-01-09  0:19 UTC (permalink / raw)
  To: J.H.; +Cc: Joey Hess, git
In-Reply-To: <496691EC.1070805@eaglescrag.net>

Hi,

On Thu, 8 Jan 2009, J.H. wrote:

> Look at either Lea's or my caching engines, it will help dramatically on 
> something of that size.

Speaking of which, do you have any performance comparisons between the 
two?

Ciao,
Dscho

^ permalink raw reply

* Re: git-cache-meta -- simple file meta data caching and applying
From: Jay Soffian @ 2009-01-09  0:22 UTC (permalink / raw)
  To: jidanni; +Cc: git
In-Reply-To: <87hc49jq04.fsf@jidanni.org>

On Thu, Jan 8, 2009 at 7:13 PM,  <jidanni@jidanni.org> wrote:
> Gentlemen, I have whipped up this:
>
> #!/bin/sh -e
> #git-cache-meta -- simple file meta data caching and applying.
> #Simpler than etckeeper, metastore, setgitperms, etc.
> : ${GIT_CACHE_META_FILE=.git_cache_meta}
> case $@ in
>    --store|--stdout)
>        case $1 in --store) exec > $GIT_CACHE_META_FILE; esac
>        find $(git ls-files) \
>            \( -user ${USER?} -o -printf 'chowm %u %p\n' \) \
>            \( -group $USER -o -printf 'chgrp %g %p\n' \) \
>            \( \( -type l -o -perm 755 -o -perm 644 \) -o -printf 'chmod %#m %p\n' \);;
>    --apply) sh -e $GIT_CACHE_META_FILE;;
>    *) 1>&2 echo "Usage: $0 --store|--stdout|--apply"; exit 1;;
> esac

It doesn't handle paths which contain white-space. "chown" is typo'd
as "chowm". To be useful, the contribution might also include
instructions on how it should be used with git, and perhaps also
reasoning for why someone would want to use it in place of etckeeper,
metastore, setgitperms, etc.

j.

^ permalink raw reply

* Re: [RFC PATCH] make diff --color-words customizable
From: Johannes Schindelin @ 2009-01-09  0:25 UTC (permalink / raw)
  To: Thomas Rast; +Cc: git
In-Reply-To: <1231459505-14395-1-git-send-email-trast@student.ethz.ch>

Hi,

On Fri, 9 Jan 2009, Thomas Rast wrote:

> Allows for user-configurable word splits when using --color-words. This 
> can make the diff more readable if the regex is configured according to 
> the language of the file.
> 
> For now the (POSIX extended) regex must be set via the environment
> GIT_DIFF_WORDS_REGEX.  Each (non-overlapping) match of the regex is
> considered a word.  Anything characters not matched are considered
> whitespace.  For example, for C try
> 
>   GIT_DIFF_WORDS_REGEX='[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*|(\+|-|&|\|){1,2}|\S'
> 
> and for TeX try
> 
>   GIT_DIFF_WORDS_REGEX='\\[a-zA-Z@]+ *|\{|\}|\\.|[^\{} [:space:]]+'

Interesting idea.  However, I think it would be better to do the opposite, 
have _word_ patterns.  And even better to have _one_ pattern.

Then we could have a --color-words-regex=<regex> option.

BTW I think you could do what you intended to do with a _way_ smaller 
and more intuitive patch.

Ciao,
Dscho

^ permalink raw reply

* Re: gitweb index performance (Re: [PATCH] gitweb: support the rel=vcs-* microformat)
From: J.H. @ 2009-01-09  0:26 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Joey Hess, git
In-Reply-To: <alpine.DEB.1.00.0901090118431.30769@pacific.mpi-cbg.de>

Johannes Schindelin wrote:
> Hi,
>
> On Thu, 8 Jan 2009, J.H. wrote:
>
>   
>> Look at either Lea's or my caching engines, it will help dramatically on 
>> something of that size.
>>     
>
> Speaking of which, do you have any performance comparisons between the 
> two?
>   
Lea's got some - I can see if I can dig up my copy (or if she's paying 
attention maybe she can publish them), though either one is orders of 
magnitude faster than the normal code.  Beyond that it waffles back and 
forth which one is faster & why mainly because of the approaches we each 
took on the caching.  Generally speaking I would push people more 
towards Lea's than my work, if nothing else hers is more in line with 
current gitweb, though I have had some thoughts about undoing my file 
breakout and getting my code base back up to speed.

- John 'Warthog9' Hawley

^ permalink raw reply

* Re: git-cache-meta -- simple file meta data caching and applying
From: Jay Soffian @ 2009-01-09  0:28 UTC (permalink / raw)
  To: jidanni; +Cc: git
In-Reply-To: <76718490901081622q618c43d0t333882cbe44f6b30@mail.gmail.com>

On Thu, Jan 8, 2009 at 7:22 PM, Jay Soffian <jaysoffian@gmail.com> wrote:
> It doesn't handle paths which contain white-space. "chown" is typo'd
> as "chowm". To be useful, the contribution might also include
> instructions on how it should be used with git, and perhaps also
> reasoning for why someone would want to use it in place of etckeeper,
> metastore, setgitperms, etc.

It will also blow-up if the output of "git ls-files" exceeds
limitations on number of arguments. Also, might be worth mentioning it
requires GNU find.

j.

^ permalink raw reply

* Re: [RFC PATCH] make diff --color-words customizable
From: Thomas Rast @ 2009-01-09  0:50 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git
In-Reply-To: <alpine.DEB.1.00.0901090121432.30769@pacific.mpi-cbg.de>

[-- Attachment #1: Type: text/plain, Size: 1604 bytes --]

Johannes Schindelin wrote:
> On Fri, 9 Jan 2009, Thomas Rast wrote:
> 
> > Allows for user-configurable word splits when using --color-words. This 
> > can make the diff more readable if the regex is configured according to 
> > the language of the file.
> > 
> > For now the (POSIX extended) regex must be set via the environment
> > GIT_DIFF_WORDS_REGEX.  Each (non-overlapping) match of the regex is
> > considered a word.  Anything characters not matched are considered
> > whitespace.  For example, for C try
> > 
> >   GIT_DIFF_WORDS_REGEX='[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*|(\+|-|&|\|){1,2}|\S'
[...]
> Interesting idea.  However, I think it would be better to do the opposite, 
> have _word_ patterns.  And even better to have _one_ pattern.

I'm not sure I understand.  It _is_ a single pattern.  The examples
just have several cases to distinguish various semantic groups that
can occur, as a sort of "half tokenizer".  (The C example isn't very
complete however.)

> BTW I think you could do what you intended to do with a _way_ smaller 
> and more intuitive patch.

How?

I don't think the existing mechanism, which just replaces all
whitespace with newlines and does a line diff to find out which words
changed, can "just" be adapted.  We will have to insert extra newlines
at points where the regex said to split a word, but where there was no
whitespace in the original content.  If there's a significantly easier
way to do that than I hacked up, please share.

Or maybe I got your original code all wrong?

-- 
Thomas Rast
trast@{inf,student}.ethz.ch

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply

* Re: [PATCH] Wrap inflateInit to retry allocation after releasing pack memory
From: Junio C Hamano @ 2009-01-09  1:43 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: R. Tyler Ballance, Shawn O. Pearce, Nicolas Pitre,
	Jan Krüger, Git ML, kb
In-Reply-To: <alpine.LFD.2.00.0901081216060.3283@localhost.localdomain>

Linus Torvalds <torvalds@linux-foundation.org> writes:

> Junio - I think we should apply this, and likely to the stable branch too. 

Yeah, I didn't lose the patch.

> Add the re-trying the inflateInit() after shrinking pack windows on top of 
> it.

That too.

^ permalink raw reply

* Re: git-cache-meta -- simple file meta data caching and applying
From: jidanni @ 2009-01-09  2:50 UTC (permalink / raw)
  To: jaysoffian; +Cc: git
In-Reply-To: <76718490901081628x62da43bcia5cdbb160b0ff24a@mail.gmail.com>

Fixed. Manly the program aims to be tiny. Perhaps I should include a
full example of using it with git-bundle.

#!/bin/sh -e
#git-cache-meta -- file meta data caching for possible use with
#git-bundle, git-fast-export, git-archive, hooks, as a simple
#alternative to etckeeper, metastore, setgitperms. Requires GNU Find.
: ${GIT_CACHE_META_FILE=.git_cache_meta}
case $@ in
    --store|--stdout)
	case $1 in --store) exec > $GIT_CACHE_META_FILE; esac
	git ls-files|xargs -I{} find {} \
	    \( -user ${USER?} -o -printf 'chown %u "%p"\n' \) \
	    \( -group $USER   -o -printf 'chgrp %g "%p"\n' \) \
	    \( \( -type l -o -perm 755 -o -perm 644 \) \
			      -o -printf 'chmod %#m "%p"\n' \);;
    --apply) sh -e $GIT_CACHE_META_FILE;;
    *) 1>&2 echo "Usage: $0 --store|--stdout|--apply"; exit 1;;
esac

^ permalink raw reply

* Re: git-cache-meta -- simple file meta data caching and applying
From: Boyd Stephen Smith Jr. @ 2009-01-09  4:29 UTC (permalink / raw)
  To: jidanni; +Cc: git
In-Reply-To: <87hc49jq04.fsf@jidanni.org>

[-- Attachment #1: Type: text/plain, Size: 865 bytes --]

On Thursday 08 January 2009, jidanni@jidanni.org wrote 
about 'git-cache-meta -- simple file meta data caching and applying':
>Gentlemen, I have whipped up this:
>
>#!/bin/sh -e
>#git-cache-meta -- simple file meta data caching and applying.
>#Simpler than etckeeper, metastore, setgitperms, etc.

You *might* look at pristine-tar from Debian.  It's specifically designed 
to be able to generate the .orig.tar.gz that Debian packages are based on 
from a VCS checkout, but it's code might be useful.

I do like your script though.  It's simple and the output is plain text so 
it is easy to have your VCS maintain it.
-- 
Boyd Stephen Smith Jr.                     ,= ,-_-. =. 
bss@iguanasuicide.net                     ((_/)o o(\_))
ICQ: 514984 YM/AIM: DaTwinkDaddy           `-'(. .)`-' 
http://iguanasuicide.net/                      \_/     

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply

* 1.5.6.5 fails to clone git.kernel.org/[...]/rostedt/linux-2.6-rt
From: Tim Shepard @ 2009-01-09  6:24 UTC (permalink / raw)
  To: git

I have git 1.5.6.5 installed from the Debian/lenny package.

Poking around in http://git.kernel.org/ looking for a git repository
that might have the latest -rt development happening, I found

  http://git.kernel.org/?p=linux/kernel/git/rostedt/linux-2.6-rt.git

which looked promising.

But when I tried cloning it using:

    git clone rsync://rsync.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-rt.git linux-2.6-rt

it pulled several hundred megabytes through my (rather slow) DSL line
and then printed out

  error: Trying to write ref refs/tags/v2.6.11 with nonexistant object 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c
  fatal: Cannot update the ref 'refs/tags/v2.6.11'.

and then removed everything it had just pulled.

Looking at http://git.kernel.org/?p=linux/kernel/git/rostedt/linux-2.6-rt.git;a=tags
I see that apparently v2.6.11 and v.2.6.11-tree both point at a tree
object and not a commit.

Is this a bug in git?    (Even if there is something wrong with the
git repository I was trying to clone, the fact that it removed all the
work that took a long time to pull is annoying.)

Is this problem already fixed in some newer version of git?

			-Tim Shepard
			 shep@alum.mit.edu

^ permalink raw reply

* Re: [PATCH 0/3] Teach Git about the patience diff algorithm
From: Junio C Hamano @ 2009-01-09  6:54 UTC (permalink / raw)
  To: Adeodato Simó
  Cc: Linus Torvalds, Clemens Buchacher, Johannes Schindelin,
	Pierre Habouzit, davidel, Francis Galiegue, Git ML
In-Reply-To: <20090108195511.GA8734@chistera.yi.org>

Adeodato Simó <dato@net.com.org.es> writes:

> * Linus Torvalds [Fri, 02 Jan 2009 08:42:04 -0800]:
>
>> Yes, this one is a real patience diff change, but it's also the same one 
>> that I've seen in the google fanboi findings. What google did _not_ show 
>> was any real-life examples, or anybody doing any critical analysis.
>
> This comes a bit late and maybe it's redundant, but somebody just sent
> to a Debian mailing list a patch that was hard to read, and patience
> improved it. (I realize it's quite similar in spirit to the "toy
> patience example" that google returns, but this at list is a *real*
> example where patience helped me read a patch.)
>
> I'm also attaching bzr diff output, because it's still more readable
> IMHO. (I realize that's independent of patience, as you explained, but
> I'm making a point that it'd be nice to have this addressed by somebody
> knowledgeable.)

I found thd difference between the Bzr diff and Dscho diff somewhat
interesting.  It shows that sometimes it makes the results easier to read
to consider blank lines (and lines with only punctuation marks) that match
to be non-matching when they appear inside a block of other consecutive
non-matching lines, even though the result may become larger.

The part Bzr gives this result:

> +/****************************************************************************
>   Write data to a fd.
>  ****************************************************************************/
>  
>  ssize_t write_data(int fd, const char *buffer, size_t N)
>  {
> -	size_t total=0;
>  	ssize_t ret;
> -	char addr[INET6_ADDRSTRLEN];
> ... all "removed"
> -	while (total < N) {
> -		total += ret;
> -	}
> -	return (ssize_t)total;
> +	struct iovec iov;
> +
> +	iov.iov_base = CONST_DISCARD(char *, buffer);
> ... all "added"
> +
> +
> +	return -1;
>  }
>  
>  /****************************************************************************

is shown by the Dscho git-diff like this:

>   Write data to a fd.
>  ****************************************************************************/
>  
>  ssize_t write_data(int fd, const char *buffer, size_t N)
>  {
> -	size_t total=0;
>  	ssize_t ret;
> -	char addr[INET6_ADDRSTRLEN];
> +	struct iovec iov;
>  
> -	while (total < N) {
> -		ret = sys_write(fd,buffer + total,N - total);
> +	iov.iov_base = CONST_DISCARD(char *, buffer);
> +	iov.iov_len = N;
>  
> -		if (ret == -1) {
> -			if (fd == get_client_fd()) {
> ... all "removed"
> -
> -		if (ret == 0) {
> -			return total;
> -		}
> +	ret = write_data_iov(fd, &iov, 1);
> +	if (ret >= 0) {
> +		return ret;
> +	}
>  
> -		total += ret;
> +	if (fd == get_client_fd()) {
> +		char addr[INET6_ADDRSTRLEN];
> ... all "added"
> +		DEBUG(0,("write_data: write failure. Error = %s\n",
> +			 strerror(errno) ));
>  	}
> -	return (ssize_t)total;
> +
> +	return -1;
>  }

If we find the "common" context lines that have only blank and punctuation
letters in Dscho output, turn each of them into "-" and "+", and rearrange
them so that all "-" are together followed by "+", it will match Bzr
output.

^ permalink raw reply

* Re: [PATCH 2/2] Use is_pseudo_dir_name everywhere
From: Johannes Sixt @ 2009-01-09  7:03 UTC (permalink / raw)
  To: Alexander Potashev; +Cc: Junio C Hamano, Git Mailing List
In-Reply-To: <1231457063-29186-3-git-send-email-aspotashev@gmail.com>

Alexander Potashev schrieb:
> -		if ((ent->d_name[0] == '.') &&
> -		    (ent->d_name[1] == 0 ||
> -		     ((ent->d_name[1] == '.') && (ent->d_name[2] == 0))))
> +		if (is_pseudo_dir_name(ent->d_name))

Nit-pick: When I read the resulting code, then I will have to look up that
  is_pseudo_dir_name() indeed only checks for "." and "..". But if it were
named is_dot_or_dotdot(), then I would have to do that.

-- Hannes

^ permalink raw reply

* Re: [PATCH 2/2] Use is_pseudo_dir_name everywhere
From: Johannes Sixt @ 2009-01-09  7:22 UTC (permalink / raw)
  Cc: Alexander Potashev, Junio C Hamano, Git Mailing List
In-Reply-To: <4966F6BB.90408@viscovery.net>

Johannes Sixt schrieb:
> Alexander Potashev schrieb:
>> -		if ((ent->d_name[0] == '.') &&
>> -		    (ent->d_name[1] == 0 ||
>> -		     ((ent->d_name[1] == '.') && (ent->d_name[2] == 0))))
>> +		if (is_pseudo_dir_name(ent->d_name))
> 
> Nit-pick: When I read the resulting code, then I will have to look up that
>   is_pseudo_dir_name() indeed only checks for "." and "..". But if it were
> named is_dot_or_dotdot(), then I would have to do that.

... then I would *not* have to do that, of course.

-- Hannes

^ permalink raw reply

* Re: [PATCH] allow 8bit data in email body sent by send-email
From: Jeff King @ 2009-01-09  7:28 UTC (permalink / raw)
  To: Andre Przywara; +Cc: git
In-Reply-To: <1231422657-15305-1-git-send-email-andre.przywara@amd.com>

On Thu, Jan 08, 2009 at 02:50:57PM +0100, Andre Przywara wrote:

> when sending patch files via git send-email, the perl script assumes
> 7bit characters only. If there are other bytes in the body (foreign language
> characters in names or translations), some servers (like vger.kernel.org)
> reject the mail because of th?t. This patch always adds an 8bit header line
> to each mail.

This should be done already by git-format-patch when you generate the
patch to feed to send-email. What exactly is the workflow you use to
generate this problem? Does it matter where the non-ascii characters are
(commit versus patch, etc)? What version of git are you using?

> diff --git a/git-send-email.perl b/git-send-email.perl
> index 77ca8fe..68a462c 100755
> --- a/git-send-email.perl
> +++ b/git-send-email.perl
> @@ -793,6 +793,7 @@ To: $to${ccline}
>  Subject: $subject
>  Date: $date
>  Message-Id: $message_id
> +Content-Transfer-Encoding: 8bit
>  X-Mailer: git-send-email $gitversion
>  ";

This fix isn't right anyway. For one thing, if you're going to include
C-T-E, you should also include a MIME-Version header. But more
importantly, we are already handling encoding elsewhere. So
unconditionally adding this means that you may conflict with existing
MIME headers in the @xh variable.

-Peff

^ permalink raw reply

* Re: [PATCH 2/2] Use is_pseudo_dir_name everywhere
From: Junio C Hamano @ 2009-01-09  8:33 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Alexander Potashev, Git Mailing List
In-Reply-To: <4966FB36.2030409@viscovery.net>

Johannes Sixt <j.sixt@viscovery.net> writes:

> Johannes Sixt schrieb:
>> Alexander Potashev schrieb:
>>> -		if ((ent->d_name[0] == '.') &&
>>> -		    (ent->d_name[1] == 0 ||
>>> -		     ((ent->d_name[1] == '.') && (ent->d_name[2] == 0))))
>>> +		if (is_pseudo_dir_name(ent->d_name))
>> 
>> Nit-pick: When I read the resulting code, then I will have to look up that
>>   is_pseudo_dir_name() indeed only checks for "." and "..". But if it were
>> named is_dot_or_dotdot(), then I would have to do that.
>
> ... then I would *not* have to do that, of course.

I think the unstated motivation of this choice of the name is to keep the
door open to include lost+found and friends to the repertoire, and perhaps
to have an isolated place for customization for non-POSIX platforms and
for local conventions.  It is more like is_uninteresting_dirent_name().

As long as this function is used only to detect and skip "uninteresting"
dirent, I think that is not a bad direction.

On the other hand, I am a bit worried about is_empty_dir() abused outside
its intended purpose to say "this directory does not have anything
interesting".  E.g. "Oh, it's empty so we can nuke it":

	if (is_empty_dir(dir))
        	rmdir(dir);

even though the current callers do not do something crazy like this (the
usual order we do things is rmdir() and then check for errors).

^ permalink raw reply

* Re: Funny: git -p submodule summary
From: Jeff King @ 2009-01-09  8:38 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git
In-Reply-To: <alpine.DEB.1.00.0901081601240.30769@pacific.mpi-cbg.de>

On Thu, Jan 08, 2009 at 04:07:08PM +0100, Johannes Schindelin wrote:

> Just try this with a submodule that has more changes than fit on a screen:
> 
> 	$ git -p submodule summary
> 
> In my tests, it consistently fscks up my console.  I wonder if this is 
> related to ea27a18(spawn pager via run_command interface).
> 
> *reverts that commit* Yep, that fixes it.

Hmm. I can reproduce your problem here, like this:

  $ git -p submodule summary

but when I tried to dig deeper using strace, the problem goes away:

  $ strace -f -o foo.out git -p submodule summary

However, I was able to get some data by stracing _just_ less:

  $ GIT_PAGER='strace -f -o foo.out less' git -p submodule summary

and that reproduces the problem. And here's the interesting bit:

  open("/dev/tty", O_RDONLY|O_LARGEFILE)  = 3
  ...
  read(0, $SOME_SUBMODULE_OUTPUT, ...)
  write(1, $SOME_SUBMODULE_OUTPUT, ...)
  read(3, 0xbfd3442f, 1)                  = -1 EIO (Input/output error)

That is, after displaying the actual output, the next thing less does is
try to get input from the user on /dev/tty. But it returns EIO. At which
point less just dies, leaving your terminal in a funny state.

Hrm. Here's a theory. The new pager behavior goes something like this
for a builtin:

  1. fork, child becomes pager
  2. register atexit handler to wait for pager to finish
  3. run builtin
  4. exit, triggering handler

but for an external command (like a shell script), we exec the command,
and presumably forget about out atexit handler entirely. Which means
that our shell script writes all of its output and exits before less has
a chance to prompt and get a response from the tty.

And I'll admit to being very hazy on the details of process groups and
controlling terminals, but from what I recall, perhaps the EIO is
related to the process group leader being dead. Which means we could
paper over it by putting less in its own pgrp.

So here's a little test (in bash):

  $ set +m ;# disable job control, leaving bash as the pgrp leader
  $ git -p submodule summary

A-ha. That "works" in the sense that less runs fine and show the output.
So it is a pgrp issue.  _But_ we still have a problem, which is that the
original process has exited, and bash thinks the command is finished. So
now annoyingly we have both bash and less trying to read from the
terminal.

So the _real_ problem is that we are not always triggering the "wait for
pager to finish" code because we exec and forget about it. Which means
this strategy of "git runs child pager" will never work properly.
Instead, we have to use three processes: git and the pager become child
processes, while the original process waits for both to exit and returns
the proper exit code from git.

Let me try to work up a patch.

-Peff

PS I believe this is related to a similar bug which I have been hunting
   fruitlessly for a few weeks: if you use ^C to kill git, the pager
   will sometimes do funny things. I also traced that to an EIO on
   reading from the terminal, which makes sense: we are killing git
   before it gets a chance to do wait_for_pager.

^ permalink raw reply

* Re: 1.5.6.5 fails to clone git.kernel.org/[...]/rostedt/linux-2.6-rt
From: Junio C Hamano @ 2009-01-09  8:58 UTC (permalink / raw)
  To: Tim Shepard; +Cc: git, Johannes Schindelin, Daniel Barkalow
In-Reply-To: <E1LLAn5-0001JM-00@alva.home>

Tim Shepard <shep@alum.mit.edu> writes:

> I have git 1.5.6.5 installed from the Debian/lenny package.
>
> Poking around in http://git.kernel.org/ looking for a git repository
> that might have the latest -rt development happening, I found
>
>   http://git.kernel.org/?p=linux/kernel/git/rostedt/linux-2.6-rt.git
>
> which looked promising.
>
> But when I tried cloning it using:
>
>     git clone rsync://rsync.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-rt.git linux-2.6-rt
>
> it pulled several hundred megabytes through my (rather slow) DSL line
> and then printed out
>
>   error: Trying to write ref refs/tags/v2.6.11 with nonexistant object 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c
>   fatal: Cannot update the ref 'refs/tags/v2.6.11'.
>
> and then removed everything it had just pulled.
>
> Looking at http://git.kernel.org/?p=linux/kernel/git/rostedt/linux-2.6-rt.git;a=tags
> I see that apparently v2.6.11 and v.2.6.11-tree both point at a tree
> object and not a commit.
>
> Is this a bug in git?

It is not a problem for the tag pointing to a tree, but linux-2.6-rt.git
tree uses (like everybody else) objects/alternates to borrow objects from
the repository of Linus.

I think we lost the alternate object store support when git-fetch was
rewritten from the original shell script (that did support fetching from
such a repository over rsync:// transport) to a reimplementation in C,
with commit b888d61 (Make fetch a builtin, 2007-09-10).  

Later, cd547b4 (fetch/push: readd rsync support, 2007-10-01) attempted to
resurrect some rsync support (b888d61 lost rsync support completely for
git-fetch), but introduced these lines in transport.c:

	/* NEEDSWORK: handle one level of alternates */
	result = run_command(&rsync);

These have not been touched ever since, and then finally commit 8434c2f
(Build in clone, 2008-04-27) killed support of cloning from a repository
that uses alternates for rsync transport for git-clone (before that, the
shell script version had a special case to still allow such operation over
rsync).

It appears practically nobody uses rsync:// transport anymore these days;
you are unfortunately the first one who noticed it.

^ permalink raw reply

* Re: [PATCH, resend] git-commit: colored status when color.ui is set
From: Junio C Hamano @ 2009-01-09  9:00 UTC (permalink / raw)
  To: markus.heidelberg; +Cc: git
In-Reply-To: <200901081953.01418.markus.heidelberg@web.de>

Markus Heidelberg <markus.heidelberg@web.de> writes:

> When using "git commit" and there was nothing to commit (the editor
> wasn't launched), the status output wasn't colored, even though color.ui
> was set. Only when setting color.status it worked.
>
> Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
> ---
>  builtin-commit.c |    3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
>
> diff --git a/builtin-commit.c b/builtin-commit.c
> index e88b78f..2d90f74 100644
> --- a/builtin-commit.c
> +++ b/builtin-commit.c
> @@ -945,6 +945,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
>  
>  	git_config(git_commit_config, NULL);
>  
> +	if (wt_status_use_color == -1)
> +		wt_status_use_color = git_use_color_default;
> +
>  	argc = parse_and_validate_options(argc, argv, builtin_commit_usage, prefix);
>  
>  	index_file = prepare_index(argc, argv, prefix);

My first reaction was:

	When the editor does get launched, what would the new code do with
	your patch?  Would we see bunch of escape codes in the editor now?

But we do disable color explicitly when we generate contents to feed the
editor in that case since bc5d248 (builtin-commit: do not color status
output shown in the message template, 2007-11-18), so that fear is
unfounded.

Thanks for a reminder, will queue.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox