Git development
 help / color / mirror / Atom feed
* [PATCH 1/4] Add color_fwrite(), a function coloring each line individually
From: Johannes Schindelin @ 2009-01-11 19:59 UTC (permalink / raw)
  To: git, Thomas Rast
In-Reply-To: <alpine.DEB.1.00.0901112057300.3586@pacific.mpi-cbg.de>


We have to set the color before every line and reset it before every
newline.  Add a function color_fwrite() which does that for us.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 color.c |   24 ++++++++++++++++++++++++
 color.h |    1 +
 2 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/color.c b/color.c
index fc0b72a..bff24ac 100644
--- a/color.c
+++ b/color.c
@@ -191,3 +191,27 @@ int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
 	va_end(args);
 	return r;
 }
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ */
+void color_fwrite(FILE *f, const char *color, size_t count, const char *buf)
+{
+	if (!*color) {
+		fwrite(buf, count, 1, f);
+		return;
+	}
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+		fputs(color, f);
+		fwrite(buf, p ? p - buf : count, 1, f);
+		fputs(COLOR_RESET, f);
+		if (!p)
+			return;
+		fputc('\n', f);
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+}
+
+
diff --git a/color.h b/color.h
index 6cf5c88..9fb58f5 100644
--- a/color.h
+++ b/color.h
@@ -19,5 +19,6 @@ int git_config_colorbool(const char *var, const char *value, int stdout_is_tty);
 void color_parse(const char *var, const char *value, char *dst);
 int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
 int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
+void color_fwrite(FILE *f, const char *color, size_t count, const char *buf);
 
 #endif /* COLOR_H */
-- 
1.6.1.186.g48f3bc4

^ permalink raw reply related

* [PATCH 2/4] color-words: refactor word splitting and use ALLOC_GROW()
From: Johannes Schindelin @ 2009-01-11 19:59 UTC (permalink / raw)
  To: git, Thomas Rast
In-Reply-To: <alpine.DEB.1.00.0901112057300.3586@pacific.mpi-cbg.de>


Word splitting is now performed by the function diff_words_fill(),
avoiding having the same code twice.

In the same spirit, avoid duplicating the code of ALLOC_GROW().

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 diff.c |   40 +++++++++++++++++++---------------------
 1 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/diff.c b/diff.c
index f67e0b2..6d87ea5 100644
--- a/diff.c
+++ b/diff.c
@@ -326,10 +326,7 @@ struct diff_words_buffer {
 static void diff_words_append(char *line, unsigned long len,
 		struct diff_words_buffer *buffer)
 {
-	if (buffer->text.size + len > buffer->alloc) {
-		buffer->alloc = (buffer->text.size + len) * 3 / 2;
-		buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc);
-	}
+	ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc);
 	line++;
 	len--;
 	memcpy(buffer->text.ptr + buffer->text.size, line, len);
@@ -398,6 +395,22 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 	}
 }
 
+/*
+ * This function splits the words in buffer->text, and stores the list with
+ * newline separator into out.
+ */
+static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
+{
+	int i;
+	out->size = buffer->text.size;
+	out->ptr = xmalloc(out->size);
+	memcpy(out->ptr, buffer->text.ptr, out->size);
+	for (i = 0; i < out->size; i++)
+		if (isspace(out->ptr[i]))
+			out->ptr[i] = '\n';
+	buffer->current = 0;
+}
+
 /* this executes the word diff on the accumulated buffers */
 static void diff_words_show(struct diff_words_data *diff_words)
 {
@@ -405,26 +418,11 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	xdemitconf_t xecfg;
 	xdemitcb_t ecb;
 	mmfile_t minus, plus;
-	int i;
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	minus.size = diff_words->minus.text.size;
-	minus.ptr = xmalloc(minus.size);
-	memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
-	for (i = 0; i < minus.size; i++)
-		if (isspace(minus.ptr[i]))
-			minus.ptr[i] = '\n';
-	diff_words->minus.current = 0;
-
-	plus.size = diff_words->plus.text.size;
-	plus.ptr = xmalloc(plus.size);
-	memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
-	for (i = 0; i < plus.size; i++)
-		if (isspace(plus.ptr[i]))
-			plus.ptr[i] = '\n';
-	diff_words->plus.current = 0;
-
+	diff_words_fill(&diff_words->minus, &minus);
+	diff_words_fill(&diff_words->plus, &plus);
 	xpp.flags = XDF_NEED_MINIMAL;
 	xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
-- 
1.6.1.186.g48f3bc4

^ permalink raw reply related

* [PATCH 3/4] color-words: refactor to allow for 0-character word boundaries
From: Johannes Schindelin @ 2009-01-11 19:59 UTC (permalink / raw)
  To: git, Thomas Rast
In-Reply-To: <alpine.DEB.1.00.0901112057300.3586@pacific.mpi-cbg.de>


Up until now, the color-words code assumed that word boundaries are
identical to white space characters.

Therefore, it could get away with a very simple scheme: it copied the
hunks, substituted newlines for each white space character, called
libxdiff with the processed text, but then identified the text to
print out by the offsets (which agreed since the original text had the
same length).

This code was ugly, for a number of reasons:

- it was impossible to introduce 0-character word boundaries,

- we had to print everything word by word, and

- the code needed extra special handling of newlines in the removed part.

Fix all of these issues by processing the text such that

- we build word lists, separated by newlines,

- we remember the original offsets for every word, and

- after calling libxdiff on the wordlists, we parse the hunk headers, and
  find the corresponding offsets, and then

- we print the removed/added parts in one go.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 diff.c |  150 +++++++++++++++++++++++++++++++++++-----------------------------
 1 files changed, 82 insertions(+), 68 deletions(-)

diff --git a/diff.c b/diff.c
index 6d87ea5..2a3d301 100644
--- a/diff.c
+++ b/diff.c
@@ -319,8 +319,10 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 struct diff_words_buffer {
 	mmfile_t text;
 	long alloc;
-	long current; /* output pointer */
-	int suppressed_newline;
+	struct diff_words_orig {
+		const char *begin, *end;
+	} *orig;
+	int orig_nr, orig_alloc;
 };
 
 static void diff_words_append(char *line, unsigned long len,
@@ -335,80 +337,79 @@ static void diff_words_append(char *line, unsigned long len,
 
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
+	const char *current_plus;
 	FILE *file;
 };
 
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
-		int suppress_newline)
+static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 {
-	const char *ptr;
-	int eol = 0;
+	struct diff_words_data *diff_words = priv;
+	int minus_first, minus_len, plus_first, plus_len;
+	const char *minus_begin, *minus_end, *plus_begin, *plus_end;
 
-	if (len == 0)
+	if (line[0] != '@' || parse_hunk_header(line, len,
+			&minus_first, &minus_len, &plus_first, &plus_len))
 		return;
 
-	ptr  = buffer->text.ptr + buffer->current;
-	buffer->current += len;
-
-	if (ptr[len - 1] == '\n') {
-		eol = 1;
-		len--;
-	}
-
-	fputs(diff_get_color(1, color), file);
-	fwrite(ptr, len, 1, file);
-	fputs(diff_get_color(1, DIFF_RESET), file);
-
-	if (eol) {
-		if (suppress_newline)
-			buffer->suppressed_newline = 1;
-		else
-			putc('\n', file);
-	}
-}
-
-static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
-{
-	struct diff_words_data *diff_words = priv;
+	minus_begin = diff_words->minus.orig[minus_first].begin;
+	minus_end = minus_len == 0 ? minus_begin :
+		diff_words->minus.orig[minus_first + minus_len - 1].end;
+	plus_begin = diff_words->plus.orig[plus_first].begin;
+	plus_end = plus_len == 0 ? plus_begin :
+		diff_words->plus.orig[plus_first + plus_len - 1].end;
 
-	if (diff_words->minus.suppressed_newline) {
-		if (line[0] != '+')
-			putc('\n', diff_words->file);
-		diff_words->minus.suppressed_newline = 0;
-	}
+	if (diff_words->current_plus != plus_begin)
+		fwrite(diff_words->current_plus,
+				plus_begin - diff_words->current_plus, 1,
+				diff_words->file);
+	if (minus_begin != minus_end)
+		color_fwrite(diff_words->file, diff_get_color(1, DIFF_FILE_OLD),
+				minus_end - minus_begin, minus_begin);
+	if (plus_begin != plus_end)
+		color_fwrite(diff_words->file, diff_get_color(1, DIFF_FILE_NEW),
+				plus_end - plus_begin, plus_begin);
 
-	len--;
-	switch (line[0]) {
-		case '-':
-			print_word(diff_words->file,
-				   &diff_words->minus, len, DIFF_FILE_OLD, 1);
-			break;
-		case '+':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_FILE_NEW, 0);
-			break;
-		case ' ':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_PLAIN, 0);
-			diff_words->minus.current += len;
-			break;
-	}
+	diff_words->current_plus = plus_end;
 }
 
 /*
- * This function splits the words in buffer->text, and stores the list with
- * newline separator into out.
+ * This function splits the words in buffer->text, stores the list with
+ * newline separator into out, and saves the offsets of the original words
+ * in buffer->orig.
  */
 static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 {
-	int i;
-	out->size = buffer->text.size;
-	out->ptr = xmalloc(out->size);
-	memcpy(out->ptr, buffer->text.ptr, out->size);
-	for (i = 0; i < out->size; i++)
-		if (isspace(out->ptr[i]))
-			out->ptr[i] = '\n';
-	buffer->current = 0;
+	int i, j;
+
+	out->size = 0;
+	out->ptr = xmalloc(buffer->text.size);
+
+	/* fake an empty "0th" word */
+	ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
+	buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
+	buffer->orig_nr = 1;
+
+	for (i = 0; i < buffer->text.size; i++) {
+		if (isspace(buffer->text.ptr[i]))
+			continue;
+		for (j = i + 1; j < buffer->text.size &&
+				!isspace(buffer->text.ptr[j]); j++)
+			; /* find the end of the word */
+
+		/* store original boundaries */
+		ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
+				buffer->orig_alloc);
+		buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i;
+		buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j;
+		buffer->orig_nr++;
+
+		/* store one word */
+		memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
+		out->ptr[out->size + j - i] = '\n';
+		out->size += j - i + 1;
+
+		i = j - 1;
+	}
 }
 
 /* this executes the word diff on the accumulated buffers */
@@ -419,22 +420,33 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	xdemitcb_t ecb;
 	mmfile_t minus, plus;
 
+	/* special case: only removal */
+	if (!diff_words->plus.text.size) {
+		color_fwrite(diff_words->file, diff_get_color(1, DIFF_FILE_OLD),
+			diff_words->minus.text.size, diff_words->minus.text.ptr);
+		diff_words->minus.text.size = 0;
+		return;
+	}
+
+	diff_words->current_plus = diff_words->plus.text.ptr;
+
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
 	diff_words_fill(&diff_words->minus, &minus);
 	diff_words_fill(&diff_words->plus, &plus);
 	xpp.flags = XDF_NEED_MINIMAL;
-	xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
+	xecfg.ctxlen = 0;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
 		      &xpp, &xecfg, &ecb);
 	free(minus.ptr);
 	free(plus.ptr);
+	if (diff_words->current_plus != diff_words->plus.text.ptr +
+			diff_words->plus.text.size)
+		fwrite(diff_words->current_plus,
+			diff_words->plus.text.ptr + diff_words->plus.text.size
+			- diff_words->current_plus, 1,
+			diff_words->file);
 	diff_words->minus.text.size = diff_words->plus.text.size = 0;
-
-	if (diff_words->minus.suppressed_newline) {
-		putc('\n', diff_words->file);
-		diff_words->minus.suppressed_newline = 0;
-	}
 }
 
 typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
@@ -458,7 +470,9 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 			diff_words_show(ecbdata->diff_words);
 
 		free (ecbdata->diff_words->minus.text.ptr);
+		free (ecbdata->diff_words->minus.orig);
 		free (ecbdata->diff_words->plus.text.ptr);
+		free (ecbdata->diff_words->plus.orig);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
-- 
1.6.1.186.g48f3bc4

^ permalink raw reply related

* [PATCH 4/4] color-words: take an optional regular expression describing words
From: Johannes Schindelin @ 2009-01-11 20:00 UTC (permalink / raw)
  To: git, Thomas Rast
In-Reply-To: <alpine.DEB.1.00.0901112057300.3586@pacific.mpi-cbg.de>


In some applications, words are not delimited by white space.  To
allow for that, you can specify a regular expression describing
what makes a word with

	git diff --color-words='^[A-Za-z0-9]*'

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 diff.c |   49 +++++++++++++++++++++++++++++++++++++++++--------
 diff.h |    1 +
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index 2a3d301..d6bba72 100644
--- a/diff.c
+++ b/diff.c
@@ -333,12 +333,14 @@ static void diff_words_append(char *line, unsigned long len,
 	len--;
 	memcpy(buffer->text.ptr + buffer->text.size, line, len);
 	buffer->text.size += len;
+	buffer->text.ptr[buffer->text.size] = '\0';
 }
 
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
 	const char *current_plus;
 	FILE *file;
+	regex_t *word_regex;
 };
 
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
@@ -372,17 +374,36 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 	diff_words->current_plus = plus_end;
 }
 
+static int find_word_boundary(mmfile_t *buffer, int i, regex_t *word_regex)
+{
+	if (i >= buffer->size)
+		return i;
+
+	if (word_regex) {
+		regmatch_t match[1];
+		if (!regexec(word_regex, buffer->ptr + i, 1, match, 0))
+			i += match[0].rm_eo;
+	}
+	else
+		while (i < buffer->size && !isspace(buffer->ptr[i]))
+			i++;
+
+	return i;
+}
+
 /*
  * This function splits the words in buffer->text, stores the list with
  * newline separator into out, and saves the offsets of the original words
  * in buffer->orig.
  */
-static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
+static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
+		regex_t *word_regex)
 {
 	int i, j;
+	long alloc = 0;
 
 	out->size = 0;
-	out->ptr = xmalloc(buffer->text.size);
+	out->ptr = NULL;
 
 	/* fake an empty "0th" word */
 	ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
@@ -390,11 +411,9 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 	buffer->orig_nr = 1;
 
 	for (i = 0; i < buffer->text.size; i++) {
-		if (isspace(buffer->text.ptr[i]))
+		j = find_word_boundary(&buffer->text, i, word_regex);
+		if (i == j)
 			continue;
-		for (j = i + 1; j < buffer->text.size &&
-				!isspace(buffer->text.ptr[j]); j++)
-			; /* find the end of the word */
 
 		/* store original boundaries */
 		ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
@@ -404,6 +423,7 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 		buffer->orig_nr++;
 
 		/* store one word */
+		ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
 		memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
 		out->ptr[out->size + j - i] = '\n';
 		out->size += j - i + 1;
@@ -432,8 +452,8 @@ static void diff_words_show(struct diff_words_data *diff_words)
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	diff_words_fill(&diff_words->minus, &minus);
-	diff_words_fill(&diff_words->plus, &plus);
+	diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
+	diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
 	xpp.flags = XDF_NEED_MINIMAL;
 	xecfg.ctxlen = 0;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
@@ -473,6 +493,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 		free (ecbdata->diff_words->minus.orig);
 		free (ecbdata->diff_words->plus.text.ptr);
 		free (ecbdata->diff_words->plus.orig);
+		free(ecbdata->diff_words->word_regex);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
@@ -1495,6 +1516,14 @@ static void builtin_diff(const char *name_a,
 			ecbdata.diff_words =
 				xcalloc(1, sizeof(struct diff_words_data));
 			ecbdata.diff_words->file = o->file;
+			if (o->word_regex) {
+				ecbdata.diff_words->word_regex = (regex_t *)
+					xmalloc(sizeof(regex_t));
+				if (regcomp(ecbdata.diff_words->word_regex,
+						o->word_regex, REG_EXTENDED))
+					die ("Invalid regular expression: %s",
+							o->word_regex);
+			}
 		}
 		xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
 			      &xpp, &xecfg, &ecb);
@@ -2510,6 +2539,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_OPT_CLR(options, COLOR_DIFF);
 	else if (!strcmp(arg, "--color-words"))
 		options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+	else if (!prefixcmp(arg, "--color-words=")) {
+		options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+		options->word_regex = arg + 14;
+	}
 	else if (!strcmp(arg, "--exit-code"))
 		DIFF_OPT_SET(options, EXIT_WITH_STATUS);
 	else if (!strcmp(arg, "--quiet"))
diff --git a/diff.h b/diff.h
index 4d5a327..23cd90c 100644
--- a/diff.h
+++ b/diff.h
@@ -98,6 +98,7 @@ struct diff_options {
 
 	int stat_width;
 	int stat_name_width;
+	const char *word_regex;
 
 	/* this is set by diffcore for DIFF_FORMAT_PATCH */
 	int found_changes;
-- 
1.6.1.186.g48f3bc4

^ permalink raw reply related

* Re: current git kernel has strange problems during bisect
From: Linus Torvalds @ 2009-01-11 20:04 UTC (permalink / raw)
  To: Sam Ravnborg
  Cc: Christian Borntraeger, Johannes Schindelin, git,
	Linux Kernel Mailing List
In-Reply-To: <20090111194258.GA4840@uranus.ravnborg.org>



On Sun, 11 Jan 2009, Sam Ravnborg wrote:
> 
> The cost of moving this piece of history from one git tree to another
> git tree is that we make it harder to debug the kernel for the advanced user
> that knows how to do bisect.
> 
> It is not like this history would be lost - one just had to look
> somewhere else to find it.
> 
> That may be a bad pain/benefit ratio - time will tell.

Umm. No. 

Time is exactly what makes it useful. It will make all the downsides 
shrink, and the advantages stay.

> There should be a way to avoid such pain when bisecting without
> having to mark a semi-random (for the average person) commit as good.

Well, you don't actually have to mark that semi-random one as good either. 
What you can do is to just mark anything that _only_ contains fs/btrfs as 
good. IOW, you don't have to know the magic number - you just have to be 
told that "oh, if you only have btrfs files, and you're not actively 
bisecting a btrfs bug, just do 'git bisect good' and continue".

Yeah, you'll hit it a few times, but you don't even have to compile things 
or boot anything, so it's not actually going to be all that much slower 
than just knowing about the magic point either.

So now you can consider yourself told how to solve it. It wasn't that 
hard. And the advantage is that we have real history.

			Linus

^ permalink raw reply

* Re: [PATCH] Cleanup of unused symcache variable inside diff-lib.c
From: Kjetil Barvik @ 2009-01-11 20:09 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano
In-Reply-To: <alpine.DEB.1.00.0901112044230.3586@pacific.mpi-cbg.de>

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
<snipp>
> My question was more: why do you do additional work and put a git diff 
> --raw between the commit message and the diffstat when that information is 
> in the patch already?

 Ok, I see.  I (re)used the 'git format-patch' command from previous
 run, and this time it was (without the line-breaks):

    git format-patch  --stat  --patch-with-raw -1 --signoff -M -C
       --summary --full-index --subject-prefix="PATCH"
       --output-directory ../diff_lib_c_symcache_cleanup_v1/

  Regarding the '--summary' I think it is a nice thing to do (so I try
  to always use it), and the '--full-index' is such that the git software
  have a less chance of producing a collision.

  -- kjetil

^ permalink raw reply

* [RFC PATCH 0/3] Support for importing in builtin-fetch
From: Daniel Barkalow @ 2009-01-11 20:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

This series is an attempt to standardize importers and exporters for 
non-git upstream repositories, and to allow the normal git programs to use 
them transparently when configured to do so. So far, it only includes 
fetch/import, but the implementation there is sufficient to get "git 
fetch" to import data from a foreign system, set the configured remotes/ 
refs, and produce a working FETCH_HEAD (so "git pull" also works).

Part 4/3 is yet another p4 importer, which is mostly interesting as an 
example (although it does do a passable job of supporting integrates, 
which I haven't seen in another client-side p4 importer).

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* [RFC PATCH 1/3] Add "vcs" config option in remotes
From: Daniel Barkalow @ 2009-01-11 20:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

This will indicate to programs using the remote that it should be
accessed through a VCS helper. Until programs support it, have them
fail it the option is set.

Clone doesn't have a way to end up with a foreign remote, and I didn't 
touch "git remote".

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
 Documentation/config.txt |    4 ++++
 builtin-fetch.c          |    4 ++++
 builtin-ls-remote.c      |    2 ++
 builtin-push.c           |    3 +++
 remote.c                 |    2 ++
 remote.h                 |    2 ++
 6 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 7408bb2..3159e8c 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1205,6 +1205,10 @@ remote.<name>.tagopt::
 	Setting this value to \--no-tags disables automatic tag following when
 	fetching from remote <name>
 
+remote.<name>.vcs::
+	Setting this to a value <vcs> will cause git to interact with
+	the remote with the git-vcs-<vcs> helper.
+
 remotes.<group>::
 	The list of remotes which are fetched by "git remote update
 	<group>".  See linkgit:git-remote[1].
diff --git a/builtin-fetch.c b/builtin-fetch.c
index de6f307..7b46f8f 100644
--- a/builtin-fetch.c
+++ b/builtin-fetch.c
@@ -635,6 +635,10 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	else
 		remote = remote_get(argv[0]);
 
+	if (remote->foreign_vcs) {
+		die("Using foreign VCSes for fetch is not yet supported.");
+	}
+
 	transport = transport_get(remote, remote->url[0]);
 	if (verbosity >= 2)
 		transport->verbose = 1;
diff --git a/builtin-ls-remote.c b/builtin-ls-remote.c
index 78a88f7..d910be9 100644
--- a/builtin-ls-remote.c
+++ b/builtin-ls-remote.c
@@ -87,6 +87,8 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix)
 		}
 	}
 	remote = nongit ? NULL : remote_get(dest);
+	if (remote && remote->foreign_vcs)
+		die("ls-remote not supported for foreign VCSes");
 	if (remote && !remote->url_nr)
 		die("remote %s has no configured URL", dest);
 	transport = transport_get(remote, remote ? remote->url[0] : dest);
diff --git a/builtin-push.c b/builtin-push.c
index 122fdcf..3fdedba 100644
--- a/builtin-push.c
+++ b/builtin-push.c
@@ -53,6 +53,9 @@ static int do_push(const char *repo, int flags)
 	int i, errs;
 	struct remote *remote = remote_get(repo);
 
+	if (remote->foreign_vcs)
+		die("Pushing with foreign VCSes not supported.");
+
 	if (!remote)
 		die("bad repository '%s'", repo);
 
diff --git a/remote.c b/remote.c
index 570e112..02135ce 100644
--- a/remote.c
+++ b/remote.c
@@ -406,6 +406,8 @@ static int handle_config(const char *key, const char *value, void *cb)
 	} else if (!strcmp(subkey, ".proxy")) {
 		return git_config_string((const char **)&remote->http_proxy,
 					 key, value);
+	} else if (!strcmp(subkey, ".vcs")) {
+		return git_config_string(&remote->foreign_vcs, key, value);
 	}
 	return 0;
 }
diff --git a/remote.h b/remote.h
index a46a5be..625c7de 100644
--- a/remote.h
+++ b/remote.h
@@ -11,6 +11,8 @@ struct remote {
 	const char *name;
 	int origin;
 
+	const char *foreign_vcs;
+
 	const char **url;
 	int url_nr;
 	int url_alloc;
-- 
1.6.0.6

^ permalink raw reply related

* [RFC PATCH 2/3] Add specification of git-vcs helpers
From: Daniel Barkalow @ 2009-01-11 20:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

So far, I've actually used list and import; I've also implemented an 
export, but haven't figured out exactly how the user should cause it to be 
used.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
 Documentation/git-vcs.txt |   93 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 93 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/git-vcs.txt

diff --git a/Documentation/git-vcs.txt b/Documentation/git-vcs.txt
new file mode 100644
index 0000000..fa02b76
--- /dev/null
+++ b/Documentation/git-vcs.txt
@@ -0,0 +1,93 @@
+git-vcs-*(1)
+============
+
+NAME
+----
+git-vcs-* - Helper programs for interoperation with foreign systems
+
+SYNOPSIS
+--------
+'git vcs-<system>' <command> [options] [arguments]
+
+DESCRIPTION
+-----------
+
+These programs are normally not used directly by end users, but are
+invoked by various git programs that interact with remote repositories
+when the repository they would operate on is part of a foreign
+system.
+
+Each 'git vcs-<system>' is a helper for interoperating with a
+particular version control system. Different helpers have different
+capabilities (limited both be the particular helper and by the
+capabilities of the system they connect to), and they report what
+capabilities they support.
+
+In general, these programs interact with a portion of the refs
+namespace that isn't used by the rest of git. The configuration will
+then (generally) map these refs into the remotes namespace. This
+allows the importer to do whatever it wants with its refs without
+affecting the state visible to normal git programs.
+
+COMMANDS
+--------
+
+'capabilities'::
+	Prints the capabilities of the helper, one per line. These are:
+	 - import: the basic import command
+	 - marks: import should be done with a saved marks file
+	 - find-new-branches: detect new branches
+	 - export: the general export command
+	 - fork: create a new branch and export to it
+	 - anonymous-fork: make commits on a branch without an inherent name
+	 - merge: merge branches (of whatever type the system supports)
+
+	If the helper doesn't support "merge", the default for pull is
+	to rebase instead of merging.
+
+'list'::
+	Takes the remote name, and outputs the names of refs. These
+	may be followed, after a single space, by "changed" or
+	"unchanged", indicating whether the foreign repository has
+	changed from the state in the ref. If the helper doesn't know,
+	it doesn't have to provide a value. (In particular, it
+	shouldn't do expensive operations, such as importing the
+	content, to see whether it matches.)
+
+'import'::
+	Takes the remote name and a list of names of refs, and imports
+	whatever it describes, by outputting it in git-fast-import
+	format.
+
+'export'::
+	Sends the branch to the foreign system and reimports it in
+	fast-import format.
+
+	Reads a list of commits from stdin, where each commit has no
+	parents which were neither produced by an earlier import nor
+	appearing earlier in the list, where some commit has the old
+	value of the branch as a parent, and where all commits listed
+	are ancestors of the last one. Furthermore:
+
+	 - if the system doesn't support merges, each of these commits
+	   has only a single parent;
+
+	 - if the system doesn't support anonymous branches, the first
+	   commit has the old value of the branch as a parent (if the
+	   branch already had a value), and all parents are either the
+	   commit listed immediately before or produced by an earlier
+	   import;
+
+	 - if the system doesn't support many-way merges, each commit
+	   has at most two parents.
+
+	export produces output in fast-import format giving the
+	content after a round-trip through the foreign system. This
+	also contains extra headers to report the mapping of original
+	git commits to reimported git commits (to facilitate rewriting
+	local branches to use the history-as-reimported instead of the
+	git-only version).
+
+	export reports how much it managed to export by producing
+	commits in the fast-import stream that replace the listed
+	items that were successfully exported.
-- 
1.6.0.6

^ permalink raw reply related

* [RFC PATCH 4/3] Add example git-vcs-p4
From: Daniel Barkalow @ 2009-01-11 20:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

This implements the "list" and "import" commands, and has an
implementation of "export" which isn't used yet.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
 Documentation/git-vcs-p4.txt |   33 ++
 Makefile                     |    3 +
 builtin.h                    |    2 +
 git.c                        |    2 +
 p4-notes                     |   33 ++
 p4client.c                   |   50 +++
 p4client.h                   |   10 +
 vcs-p4.c                     |  945 ++++++++++++++++++++++++++++++++++++++++++
 vcs-p4.h                     |  119 ++++++
 9 files changed, 1197 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/git-vcs-p4.txt
 create mode 100644 p4-notes
 create mode 100644 p4client.c
 create mode 100644 p4client.h
 create mode 100644 vcs-p4.c
 create mode 100644 vcs-p4.h

diff --git a/Documentation/git-vcs-p4.txt b/Documentation/git-vcs-p4.txt
new file mode 100644
index 0000000..4039d24
--- /dev/null
+++ b/Documentation/git-vcs-p4.txt
@@ -0,0 +1,33 @@
+Config
+------
+
+vcs-p4.port::
+	The value to use for P4PORT
+
+vcs-p4.client::
+	The value to use for P4CLIENT
+
+vcs-p4.codelineformat::
+	A regular expression to match valid codelines; a codeline is a
+	directory that contains exactly those files that belong to a
+	version of a project. Importing history with integrations will
+	generally discover codelines not explicitly marked to be
+	imported, found when a file in a known codeline, whose full
+	path is therefore the codeline path plus a relative path, is
+	integrated from a file with a name that ends with that
+	relative path. However, files will sometimes be integrated
+	from non-codelines (that is, from a directory that contains
+	unrelated files whose history should not be tracked), and this
+	option can be used to ignore some directories.
+
+	Note that, properly, the history of the individual files from
+	a non-codeline which got integrated into a codeline should
+	contribute but that this is not presently supported.
+
+remotes.*.url::
+	The perforce location of a codeline to track. Other codelines
+	may be discovered by git-vcs-p4, but it will make no attempt
+	to get versions in these locations more recent than the last
+	versions that contribute at present to the tracked codelines,
+	and it will not make them available for matching in "fetch"
+	patterns.
diff --git a/Makefile b/Makefile
index dee97c1..3f40452 100644
--- a/Makefile
+++ b/Makefile
@@ -501,6 +501,9 @@ LIB_OBJS += wt-status.o
 LIB_OBJS += xdiff-interface.o
 LIB_OBJS += preload-index.o
 
+LIB_OBJS += p4client.o
+LIB_OBJS += vcs-p4.o
+
 BUILTIN_OBJS += builtin-add.o
 BUILTIN_OBJS += builtin-annotate.o
 BUILTIN_OBJS += builtin-apply.o
diff --git a/builtin.h b/builtin.h
index 1495cf6..9039ad5 100644
--- a/builtin.h
+++ b/builtin.h
@@ -21,6 +21,8 @@ extern int commit_tree(const char *msg, unsigned char *tree,
 		const char *author);
 extern int check_pager_config(const char *cmd);
 
+extern int cmd_p4(int argc, const char **argv, const char *prefix);
+
 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_apply(int argc, const char **argv, const char *prefix);
diff --git a/git.c b/git.c
index a53e24f..9ba92fe 100644
--- a/git.c
+++ b/git.c
@@ -265,6 +265,8 @@ static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
+		{ "vcs-p4", cmd_p4 },
+
 		{ "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
 		{ "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE },
 		{ "annotate", cmd_annotate, RUN_SETUP },
diff --git a/p4-notes b/p4-notes
new file mode 100644
index 0000000..bd95903
--- /dev/null
+++ b/p4-notes
@@ -0,0 +1,33 @@
+People using branches in p4 work like svn, except that the branches
+are not rooted at predictable places. Furthermore, there is not a
+uniform tree layout within a depot.
+
+Therefore, in order to generate a git repository from p4, it is
+necessary to specify a root within the depot as the working tree root
+in git. On the other hand, it should be possible to determine from the
+p4 history what portions of the depot outside of the root should be
+considered as branches, as it tracks "integrations".
+
+In theory, anyway, it should even be possible to produce a git
+repository with submodules when a similar thing has been done with
+integrations in p4, by determining that there are integrations into a
+subdirectory of the root.
+
+---
+
+Overview of operation:
+
+ - Allocate codeline
+ - Import codeline
+   - use p4_filelog to find the files and their revisions in the codeline
+   - For each file,
+
+---
+Saving processed state
+
+ - Record for each codeline
+   - What are all the changesets?
+
+ - Record for each codeline/changeset
+   - What's the commit
+
diff --git a/p4client.c b/p4client.c
new file mode 100644
index 0000000..09adc47
--- /dev/null
+++ b/p4client.c
@@ -0,0 +1,50 @@
+#include "p4client.h"
+
+#include "cache.h"
+#include "run-command.h"
+
+static const char *const *envp;
+
+void p4_init(const char *const *env)
+{
+	envp = env;
+}
+
+static struct child_process child;
+
+int p4_call(int fds[], const char *arg0, int argc, const char **argv)
+{
+	int i;
+	memset(&child, 0, sizeof(child));
+	if (fds) {
+		child.in = -1;
+		child.out = -1;
+	} else {
+		child.no_stdin = 1;
+		child.no_stdout = 1;
+	}
+	child.err = 0;
+	child.argv = xcalloc(argc + 3, sizeof(*argv));
+	child.argv[0] = "p4";
+	child.argv[1] = arg0;
+	child.env = envp;
+	for (i = 0; i < argc; i++)
+		child.argv[i + 2] = argv[i];
+	child.argv[argc + 2] = NULL;
+	start_command(&child);
+	if (fds) {
+		fds[0] = child.in;
+		fds[1] = child.out;
+	}
+	return 0;
+}
+
+int p4_complete(void)
+{
+	if (!child.no_stdin)
+		close(child.in);
+	if (!child.no_stdout)
+		close(child.out);
+	finish_command(&child);
+	return 0;
+}
diff --git a/p4client.h b/p4client.h
new file mode 100644
index 0000000..2fa2cc3
--- /dev/null
+++ b/p4client.h
@@ -0,0 +1,10 @@
+#ifndef P4CLIENT_H
+#define P4CLIENT_H
+
+void p4_init(const char *const *env);
+
+int p4_call(int fds[], const char *arg0, int argc, const char **argv);
+
+int p4_complete();
+
+#endif
diff --git a/vcs-p4.c b/vcs-p4.c
new file mode 100644
index 0000000..3ac1e38
--- /dev/null
+++ b/vcs-p4.c
@@ -0,0 +1,945 @@
+#include "cache.h"
+#include "vcs-p4.h"
+#include "strbuf.h"
+#include "remote.h"
+#include "commit.h"
+#include "tree.h"
+#include "tree-walk.h"
+#include "diff.h"
+
+#include "p4client.h"
+
+/** Should we try to find codelines that branch off of the relevant
+ * ones, for future reference? This lets us find new things in
+ * ls-remote without making the user tell us.
+ **/
+static int find_new_codelines;
+
+static regex_t *codeline_regex;
+
+#define CODELINE_TAG "Codeline: "
+#define CHANGESET_TAG "Changeset: "
+
+/** List functions **/
+
+static void add_to_revision_list(struct p4_revision_list **list,
+				 struct p4_revision *revision)
+{
+	while (*list)
+		list = &(*list)->next;
+	*list = xcalloc(1, sizeof(**list));
+	(*list)->revision = revision;
+}
+
+/** Functions to find or create representations **/
+
+static struct p4_depot *get_depot(void)
+{
+	struct p4_depot *depot = xcalloc(1, sizeof(*depot));
+	depot->next_mark = 1;
+	return depot;
+}
+
+static struct p4_changeset *get_changeset(struct p4_codeline *codeline,
+					  long number);
+
+static char *codeline_to_refname(const char *path) {
+	struct strbuf buf;
+	if (prefixcmp(path, "//"))
+		return NULL;
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "refs/p4/%s", path + 2);
+	return strbuf_detach(&buf, NULL);
+}
+
+static char *refname_to_codeline(const char *refname) {
+	struct strbuf buf;
+	if (prefixcmp(refname, "refs/p4/"))
+		return NULL;
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "//%s", refname + strlen("refs/p4/"));
+	return strbuf_detach(&buf, NULL);
+}
+
+static struct p4_codeline *get_codeline(struct p4_depot *depot, const char *path)
+{
+	struct p4_codeline **posn, *codeline;
+	unsigned char sha1[20];
+
+	if (codeline_regex && regexec(codeline_regex, path, 0, NULL, 0))
+		return NULL;
+
+	for (posn = &depot->codelines; *posn; posn = &(*posn)->next)
+		if (!strcmp(path, (*posn)->path))
+			return *posn;
+	codeline = xcalloc(1, sizeof(*codeline));
+	codeline->depot = depot;
+	codeline->path = xstrdup(path);
+
+	codeline->refname = codeline_to_refname(path);
+	if (!get_sha1(codeline->refname, sha1)) {
+		struct commit *commit = lookup_commit(sha1);
+		char *field;
+		parse_commit(commit);
+		printf("progress found commit for %s\n", codeline->refname);
+		field = strstr(commit->buffer, CHANGESET_TAG);
+		if (!field) {
+			fprintf(stderr, "Couldn't find changeset line in commit\n");
+		} else {
+			struct p4_changeset *changeset;
+			codeline->finished_changeset =
+				atoi(field + strlen(CHANGESET_TAG));
+			printf("progress for changeset %lu\n",
+			       codeline->finished_changeset);
+			changeset = get_changeset(codeline, codeline->finished_changeset);
+			changeset->commit = commit;
+			codeline->history = changeset;
+		}
+	}
+	*posn = codeline;
+	return codeline;
+}
+
+static struct p4_codeline *find_codeline(struct p4_depot *depot, const char *path)
+{
+	struct p4_codeline **posn;
+	for (posn = &depot->codelines; *posn; posn = &(*posn)->next)
+		if (!prefixcmp(path, (*posn)->path))
+			return *posn;
+	return NULL;
+}
+
+/** Inserts the changeset at the right place in order for the codeline **/
+static struct p4_changeset *get_changeset(struct p4_codeline *codeline,
+					  long number)
+{
+	struct p4_changeset **posn = &codeline->changesets;
+	struct p4_changeset *changeset, *prev = NULL;
+	while (*posn && (*posn)->number < number) {
+		prev = *posn;
+		posn = &(*posn)->next;
+	}
+	if (*posn && (*posn)->number == number)
+		return *posn;
+	printf("# add changeset %lu in %s\n", number, codeline->path);
+	changeset = xcalloc(1, sizeof(*changeset));
+	changeset->codeline = codeline;
+	changeset->next = *posn;
+	changeset->previous = prev;
+	if (changeset->next)
+		changeset->next->previous = changeset;
+	else
+		codeline->head = changeset;
+	*posn = changeset;
+	changeset->number = number;
+	codeline->num_changesets++;
+	return changeset;
+}
+
+static struct p4_changeset *changeset_from_commit(struct p4_depot *depot,
+						  struct commit *commit)
+{
+	unsigned long number = 0;
+	char *codeline = NULL, *field;
+	parse_commit(commit);
+	field = strstr(commit->buffer, CHANGESET_TAG);
+	if (field)
+		number = atoi(field + strlen(CHANGESET_TAG));
+	field = strstr(commit->buffer, CODELINE_TAG);
+	if (field) {
+		char *end;
+		codeline = field + strlen(CODELINE_TAG);
+		end = strchr(codeline, '\n');
+		if (end)
+			*end = '\0';
+	}
+	if (number && codeline)
+		return get_changeset(get_codeline(depot, codeline), number);
+	return NULL;
+}
+
+static struct p4_file *get_file_by_full(struct p4_codeline *codeline,
+					const char *fullpath)
+{
+	const char *rel = fullpath + strlen(codeline->path);
+	struct p4_file **posn;
+	for (posn = &codeline->files; *posn; posn = &(*posn)->next) {
+		if (!strcmp((*posn)->name, rel))
+			return *posn;
+	}
+	*posn = xcalloc(1, sizeof(**posn));
+	(*posn)->codeline = codeline;
+	(*posn)->name = xstrdup(rel);
+	return *posn;
+}
+
+static struct p4_file *get_related_file(struct p4_file *base, const char *path)
+{
+	int basenamelen = strlen(base->name);
+	int reldirlen = strlen(path) - basenamelen;
+	struct p4_codeline *codeline;
+	if (reldirlen > 0 && !strcmp(path + reldirlen, base->name)) {
+		/* File with the same name in another codeline */
+		char *other = xstrndup(path, reldirlen);
+		printf("# find %s in %s\n", path, other);
+		codeline = get_codeline(base->codeline->depot, other);
+		if (codeline)
+			return get_file_by_full(codeline, path);
+		return NULL;
+	}
+	codeline = find_codeline(base->codeline->depot, path);
+	if (codeline) {
+		/* File with a different name in some known codeline */
+		return get_file_by_full(codeline, path);
+	}
+	/* Not in any known codeline; need to recheck this after
+	 * discovering codelines completes.
+	 */
+	return NULL;
+}
+
+static struct p4_revision *get_revision(struct p4_file *file, unsigned number)
+{
+	struct p4_revision **posn;
+	struct p4_revision *revision;
+	for (posn = &file->revisions; *posn && (*posn)->number < number;
+	     posn = &(*posn)->next)
+		;
+	if (!*posn || (*posn)->number != number) {
+		revision = xcalloc(1, sizeof(*revision));
+		revision->next = *posn;
+		*posn = revision;
+		revision->number = number;
+		revision->file = file;
+	}
+	return *posn;
+}
+
+static int parse_p4_date(const char *date)
+{
+	struct tm tm;
+	memset(&tm, 0, sizeof(tm));
+	tm.tm_year = strtol(date, NULL, 10) - 1900;
+	tm.tm_mon = strtol(date + 5, NULL, 10) - 1;
+	tm.tm_mday = strtol(date + 8, NULL, 10);
+	tm.tm_hour = strtol(date + 11, NULL, 10);
+	tm.tm_min = strtol(date + 14, NULL, 10);
+	tm.tm_sec = strtol(date + 17, NULL, 10);
+	return mktime(&tm);
+}
+
+static const char *get_file_type(char *text)
+{
+	if (!prefixcmp(text, "text"))
+		return "text";
+	if (!prefixcmp(text, "ktext"))
+		return "ktext";
+	if (!prefixcmp(text, "xtext"))
+		return "xtext";
+	if (!prefixcmp(text, "kxtext"))
+		return "kxtext";
+	return "unknown";
+}
+
+static const char *get_file_mode(const char *type)
+{
+	if (!strcmp(type, "kxtext") || !strcmp(type, "xtext"))
+		return "100755";
+	return "100644";
+}
+
+static void output_data(struct strbuf *buf)
+{
+	printf("data %d\n", buf->len);
+	fwrite(buf->buf, 1, buf->len, stdout);
+	printf("\n");
+}
+
+static int write_blob(struct p4_codeline *codeline,
+		      const unsigned char *sha1,
+		      const char *path)
+{
+	struct strbuf buf;
+	void *content;
+	enum object_type type;
+	unsigned long size;
+	int fd;
+
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/%s", codeline->working, path);
+	content = read_sha1_file(sha1, &type, &size);
+	fd = open(buf.buf, O_WRONLY | O_CREAT, 0666);
+	if (fd < 0) {
+		die("Got err %d", errno);
+	}
+	write_or_die(fd, content, size);
+	return 0;
+}
+
+/** P4 operations **/
+
+static int p4_where(struct p4_codeline *codeline)
+{
+	int fds[2];
+	const char *argv[1];
+	struct strbuf buf;
+
+	strbuf_init(&buf, 0);
+	strbuf_addstr(&buf, codeline->path);
+	argv[0] = buf.buf;
+	p4_call(fds, "where", 1, argv);
+	FILE *input = fdopen(fds[1], "r");
+
+	while (!strbuf_getline(&buf, input, '\n')) {
+		char *working = strrchr(buf.buf, ' ');
+		if (working)
+			codeline->working = xstrdup(working + 1);
+	}
+	p4_complete();
+	return codeline->working ? 0 : -1;
+}
+
+static void p4_sync(struct p4_codeline *codeline)
+{
+	const char *argv[1];
+	struct strbuf buf;
+
+	printf("progress syncing %s/...\n", codeline->working);
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/...@%lu",
+		    codeline->working, codeline->head->number);
+	argv[0] = buf.buf;
+	p4_call(NULL, "sync", 1, argv);
+	p4_complete();
+}
+
+static void p4_edit(struct p4_codeline *codeline, const char *path)
+{
+	const char *argv[1];
+	struct strbuf buf;
+
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/%s", codeline->working, path);
+	argv[0] = buf.buf;
+	p4_call(NULL, "edit", 1, argv);
+	strbuf_release(&buf);
+	p4_complete();
+}
+
+static void p4_add(struct p4_codeline *codeline, const char *path)
+{
+	const char *argv[1];
+	struct strbuf buf;
+
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/%s", codeline->working, path);
+	argv[0] = buf.buf;
+	p4_call(NULL, "add", 1, argv);
+	strbuf_release(&buf);
+	p4_complete();
+}
+
+static void p4_delete(struct p4_codeline *codeline, const char *path)
+{
+	const char *argv[1];
+	struct strbuf buf;
+
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/%s", codeline->working, path);
+	argv[0] = buf.buf;
+	p4_call(NULL, "delete", 1, argv);
+	strbuf_release(&buf);
+	p4_complete();
+}
+
+static void p4_submit(struct commit *commit)
+{
+	int fds[2];
+	const char *argv[1];
+	int skip = 0;
+	argv[0] = "-o";
+	p4_call(fds, "change", 1, argv);
+
+	struct strbuf message;
+	struct strbuf line;
+
+	FILE *input = fdopen(fds[1], "r");
+
+	strbuf_init(&message, 0);
+	strbuf_init(&line, 0);
+
+	while (!strbuf_getline(&line, input, '\n')) {
+		if (!skip) {
+			strbuf_addstr(&message, line.buf);
+			strbuf_addch(&message, '\n');
+		}
+		if (line.buf[0] != '\t')
+			skip = 0;
+		if (!strcmp(line.buf, "Description:")) {
+			char *posn;
+			parse_commit(commit);
+			posn = strstr(commit->buffer, "\n\n");
+			if (posn)
+				posn += 2;
+			while (*posn) {
+				char *eol = strchr(posn, '\n');
+				strbuf_addstr(&message, "\t");
+				if (eol) {
+					eol++;
+					strbuf_add(&message, posn, eol - posn);
+					posn = eol;
+				} else {
+					strbuf_addstr(&message, posn);
+					break;
+				}
+			}
+			strbuf_addstr(&message, "\n");
+			skip = 1;
+		}
+	}
+
+	fclose(input);
+	p4_complete();
+
+	printf("%s\n", message.buf);
+
+	argv[0] = "-i";
+	p4_call(fds, "submit", 1, argv);
+
+	write_or_die(fds[0], message.buf, message.len);
+	close(fds[0]);
+
+	input = fdopen(fds[1], "r");
+	while (!strbuf_getline(&line, input, '\n'))
+		fprintf(stderr, "%s\n", line.buf);
+	p4_complete();
+}
+
+static void p4_print(struct p4_revision *revision)
+{
+	int fds[2];
+	const char *argv[2];
+	struct strbuf line;
+	strbuf_init(&line, 0);
+	strbuf_addf(&line, "%s%s#%lu",
+		    revision->file->codeline->path,
+		    revision->file->name, revision->number);
+	argv[1] = line.buf;
+	argv[0] = "-q";
+	p4_call(fds, "print", 2, argv);
+
+	strbuf_reset(&line);
+	strbuf_read(&line, fds[1], 0);
+	printf("data %d\n%s\n", line.len, line.buf);
+	close(fds[1]);
+	p4_complete();
+}
+
+static void p4_change(struct p4_changeset *changeset)
+{
+	int fds[2];
+	const char *argv[2];
+	struct strbuf line;
+	struct strbuf message;
+	int date = 0;
+	char *user = NULL;
+
+	strbuf_init(&line, 0);
+	strbuf_addf(&line, "%lu", changeset->number);
+	argv[1] = line.buf;
+	argv[0] = "-o";
+	p4_call(fds, "change", 2, argv);
+
+	FILE *input = fdopen(fds[1], "r");
+
+	while (!strbuf_getline(&line, input, '\n')) {
+		if (!prefixcmp(line.buf, "User:\t"))
+			user = xstrdup(line.buf + 6);
+		else if (!prefixcmp(line.buf, "Date:\t"))
+			date = parse_p4_date(line.buf + 6);
+		else if (!prefixcmp(line.buf, "Description:"))
+			break;
+	}
+	printf("committer %s <%s> %d +0000\n", user, user, date);
+	free(user);
+
+	strbuf_init(&message, 0);
+
+	while (!strbuf_getline(&line, input, '\n')) {
+		strbuf_addstr(&message, line.buf + (line.buf[0] == '\t'));
+		strbuf_addch(&message, '\n');
+	}
+
+	strbuf_addf(&message, CODELINE_TAG "%s\n" CHANGESET_TAG "%lu\n",
+		    changeset->codeline->path, changeset->number);
+	output_data(&message);
+	fclose(input);
+	p4_complete();
+}
+
+/** Finds all files in the codeline, and all revisions of those files,
+ * and all of the changesets they are from, and looks up the codelines
+ * and files they integrate or branch.
+ **/
+static void p4_filelog(struct p4_codeline *codeline)
+{
+	int fds[2];
+	struct strbuf line;
+
+	struct p4_file *file = NULL;
+	struct p4_revision *revision = NULL;
+	const char *arg;
+
+	if (codeline->filelog_done)
+		return;
+
+	printf("progress looking at codeline %s\n", codeline->path);
+
+	strbuf_init(&line, 0);
+	strbuf_addstr(&line, codeline->path);
+	strbuf_addstr(&line, "/...");
+	arg = line.buf;
+	p4_call(fds, "filelog", 1, &arg);
+
+	FILE *input = fdopen(fds[1], "r");
+
+	while (!strbuf_getline(&line, input, '\n')) {
+		if (prefixcmp(line.buf, "...")) {
+			if (file) {
+				// we're done with one; set HEAD number
+				// also need this at the end
+			}
+			file = get_file_by_full(codeline, line.buf);
+		} else if (prefixcmp(line.buf, "... ...")) {
+// ... #<rev> change <change> <op> on <date> by <client> (<type>) '<oneline>'
+			int rev, change;
+			char *posn = line.buf + strlen("... #");
+			rev = strtoul(posn, &posn, 10);
+			posn += strlen(" change ");
+			change = strtoul(posn, &posn, 10);
+			posn = strchr(posn, '(') + 1;
+			revision = get_revision(file, rev);
+			revision->changeset = get_changeset(codeline, change);
+			revision->type = get_file_type(posn);
+			add_to_revision_list(&revision->changeset->revisions,
+					     revision);
+		} else {
+// ... ... <op> <direction> <path>#<rev>
+			const char *path;
+			int rev, from = 0;
+			char *type = line.buf + strlen("... ... ");
+			char *posn = strrchr(type, ' ') + 1;
+
+			from = (!prefixcmp(type, "ignored") &&
+				posn == type + strlen("ignored") + 1) ||
+				!prefixcmp(strchr(type, ' '), " from");
+
+			path = posn;
+			posn = strchr(posn, '#');
+			*(posn++) = '\0';
+			do {
+				/* ???? What does a list of revisions mean? */
+				rev = strtoul(posn, &posn, 10);
+				if (*posn != ',')
+					break;
+				posn += 2;
+			} while (1);
+			if (from) {
+				struct p4_file *rel_file =
+					get_related_file(file, path);
+				if (!rel_file)
+					printf("# Couldn't find %s related to %s %s\n",
+					    path, file->codeline->path,
+					    file->name);
+				if (rel_file && rel_file->codeline != codeline)
+					add_to_revision_list(&revision->integrated,
+							     get_revision(rel_file, rev));
+			} else if (find_new_codelines) {
+				/* This is an "<op> into <path>#<rev>" line.
+				 * We just want to try to create a codeline.
+				 */
+				get_related_file(file, path);
+			}
+		}
+	}
+	fclose(input);
+	p4_complete();
+	if (codeline->history)
+		codeline->unreported = codeline->history->next;
+	else
+		codeline->unreported = codeline->changesets;
+	codeline->filelog_done = 1;
+}
+
+/** Functions to import things (i.e., fill out the representations) **/
+
+static struct p4_changeset_list *
+find_codeline_changeset(struct p4_changeset_list **list,
+			struct p4_codeline *codeline)
+{
+	while (*list) {
+		if ((*list)->changeset->codeline == codeline)
+			return *list;
+		list = &(*list)->next;
+	}
+	*list = xcalloc(1, sizeof(**list));
+	return *list;
+}
+
+static void resolve_changeset_integrates(struct p4_changeset *changeset)
+{
+	struct p4_revision_list *posn;
+	struct p4_changeset_list *changesets = NULL;
+	/* For each codeline, we want the highest numbered changeset
+	 * that introduced a revision that has been integrated.
+	 */
+	for (posn = changeset->revisions; posn; posn = posn->next) {
+		struct p4_revision_list *rev_ints = posn->revision->integrated;
+		while (rev_ints) {
+			struct p4_changeset_list *item;
+			if (rev_ints->revision->file->codeline == changeset->codeline) {
+				rev_ints = rev_ints->next;
+				continue;
+			}
+			/* The revision doesn't have the changeset
+			 * filled out unless we call this.
+			 */
+			p4_filelog(rev_ints->revision->file->codeline);
+			item = find_codeline_changeset(&changesets,
+						       rev_ints->revision->file->codeline);
+			if (!item->changeset ||
+			    item->changeset->number < rev_ints->revision->changeset->number) {
+				printf("progress %lu integrates %s#%lu from %lu\n",
+				       changeset->number,
+				       rev_ints->revision->file->name,
+				       rev_ints->revision->number,
+				       rev_ints->revision->changeset->number);
+				item->changeset = rev_ints->revision->changeset;
+			}
+			rev_ints = rev_ints->next;
+		}
+	}
+	/* We could issue a warning if the state of other files didn't
+	 * match and yet didn't get integrated, but that's a lot of
+	 * work and there's no good way to represent the case of a
+	 * commit contributing to but not being completely obsoleted
+	 * by another commit.
+	 */
+	changeset->integrated = changesets;
+	while (changesets) {
+		printf("# integrate %lu from %lu\n", changeset->number, changesets->changeset->number);
+		changesets = changesets->next;
+	}
+}
+
+static struct p4_codeline *import_depot(struct p4_depot *depot, const char *refname)
+{
+	struct p4_codeline *target, *posn;
+	char *path = refname_to_codeline(refname);
+	target = get_codeline(depot, path);
+
+	if (!target)
+		die("Invalid codeline: %s", path);
+
+	free(path);
+
+	p4_filelog(target);
+
+	printf("progress resolving integrates\n");
+
+	/* Now resolve all the integrates in changesets */
+	for (posn = depot->codelines; posn; posn = posn->next) {
+		struct p4_changeset *changeset;
+		for (changeset = posn->unreported; changeset; changeset = changeset->next) {
+			resolve_changeset_integrates(changeset);
+		}
+	}
+
+	return target;
+}
+
+static void name_changeset(struct p4_changeset *changeset)
+{
+	if (changeset->commit)
+		printf("%s\n", sha1_to_hex(changeset->commit->object.sha1));
+	else
+		printf(":%d\n", changeset->mark);
+}
+
+static void lookup_git_changeset(struct p4_codeline *codeline,
+				 struct p4_changeset *changeset)
+{
+	while (!changeset->commit) {
+		struct commit *parent = codeline->history->commit->parents->item;
+		parse_commit(parent);
+		codeline->history->previous->commit = parent;
+		codeline->history = codeline->history->previous;
+	}
+}
+
+static void report_codeline(struct p4_codeline *codeline,
+			    struct p4_changeset *until);
+
+static void identify_changeset(struct p4_changeset *changeset)
+{
+	if (changeset->mark || changeset->commit)
+		return;
+	if (changeset->codeline->finished_changeset >= changeset->number)
+		lookup_git_changeset(changeset->codeline, changeset);
+	else
+		report_codeline(changeset->codeline, changeset);
+}
+
+static void report_codeline(struct p4_codeline *codeline, struct p4_changeset *until)
+{
+	struct p4_changeset *changeset;
+	struct p4_revision_list *rev;
+
+	printf("progress importing content of codeline %s", codeline->path);
+	if (until)
+		printf(" (up to changeset %lu)", until->number);
+	printf("\n");
+
+	for (changeset = codeline->unreported; changeset; changeset = changeset->next) {
+		struct p4_changeset_list *integrated = changeset->integrated;
+		printf("progress check %lu\n", changeset->number);
+
+		while (integrated) {
+			identify_changeset(integrated->changeset);
+			integrated = integrated->next;
+		}
+		printf("progress import changeset %lu\n",
+		       changeset->number);
+		printf("# changeset %lu\n", changeset->number);
+		printf("commit %s\n", codeline->refname);
+		changeset->mark = codeline->depot->next_mark++;
+		printf("mark :%d\n", changeset->mark);
+		p4_change(changeset);
+		if (changeset->previous) {
+			printf("from ");
+			name_changeset(changeset->previous);
+		}
+		integrated = changeset->integrated;
+		while (integrated) {
+			printf("merge ");
+			name_changeset(integrated->changeset);
+			integrated = integrated->next;
+		}
+
+		for (rev = changeset->revisions; rev; rev = rev->next) {
+			printf("M %s inline %s\n",
+			       get_file_mode(rev->revision->type),
+			       rev->revision->file->name + 1);
+			p4_print(rev->revision);
+		}
+		printf("\n");
+		codeline->unreported = changeset->next;
+		if (changeset == until)
+			break;
+	}
+	printf("checkpoint\n");
+}
+
+static void import_p4(int ref_nr, const char **refs)
+{
+	int i;
+	struct p4_depot *depot = get_depot();
+	struct p4_codeline *target;
+	save_commit_buffer = 1;
+
+	for (i = 0; i < ref_nr; i++) {
+		target = import_depot(depot, refs[i]);
+
+		identify_changeset(target->head);
+	}
+}
+
+static void export_change(struct diff_options *options,
+			  unsigned old_mode, unsigned new_mode,
+			  const unsigned char *old_sha1,
+			  const unsigned char *new_sha1,
+			  const char *path)
+{
+	struct p4_codeline *codeline = options->format_callback_data;
+	p4_edit(codeline, path);
+	write_blob(codeline, new_sha1, path);
+}
+
+static void export_add_remove(struct diff_options *options,
+			      int addremove, unsigned mode,
+			      const unsigned char *sha1,
+			      const char *path)
+{
+	struct p4_codeline *codeline = options->format_callback_data;
+	if (addremove == '+') {
+		write_blob(codeline, sha1, path);
+		p4_add(codeline, path);
+	} else if (addremove == '-') {
+		p4_delete(codeline, path);
+	}
+}
+
+static void export_commit(struct p4_codeline *codeline,
+			  struct commit *git_commit, struct commit *git_parent)
+{
+	struct tree_desc pre, post;
+	struct diff_options opts;
+	memset(&opts, 0, sizeof(opts));
+	parse_tree(git_commit->tree);
+	parse_tree(git_parent->tree);
+	init_tree_desc(&pre, git_parent->tree->buffer, git_parent->tree->size);
+	init_tree_desc(&post, git_commit->tree->buffer, git_commit->tree->size);
+	opts.change = export_change;
+	opts.add_remove = export_add_remove;
+	opts.format_callback_data = codeline;
+	opts.flags = DIFF_OPT_RECURSIVE;
+	diff_tree(&pre, &post, "/", &opts);
+	p4_submit(git_commit);
+}
+
+static void export_p4(struct remote *remote, const char *branch)
+{
+	struct p4_depot *depot = get_depot();
+	const char *codeline = remote->url[0];
+	struct p4_codeline *target;
+	struct strbuf buf;
+
+	// check client
+
+	target = import_depot(depot, codeline);
+
+	strbuf_init(&buf, 0);
+
+	while (!strbuf_getline(&buf, stdin, '\n')) {
+		struct p4_changeset *parent = NULL, *integrate = NULL;
+		unsigned char sha1[20];
+		struct commit *commit, *git_parent = NULL;
+		struct commit_list *parents;
+		get_sha1(buf.buf, sha1);
+		commit = lookup_commit(sha1);
+		parse_commit(commit);
+		for (parents = commit->parents; parents; parents = parents->next) {
+			struct p4_changeset *p4_parent =
+				changeset_from_commit(depot, parents->item);
+			if (p4_parent) {
+				if (p4_parent->codeline == target) {
+					parent = p4_parent;
+					git_parent = parents->item;
+				} else
+					integrate = p4_parent;
+			}
+		}
+		if (target->head != parent) {
+			printf("progress not up-to-date\n");
+			return;
+		}
+		if (p4_where(target))
+			break;
+		p4_sync(target);
+
+		if (!parent) {
+			// Need to start new codeline
+		}
+		export_commit(target, commit, git_parent);
+	}
+}
+
+static const char **env;
+static int env_nr;
+static int env_alloc;
+
+static int handle_config(const char *key, const char *value, void *cb)
+{
+	struct strbuf buf;
+	const char *subkey;
+	if (!prefixcmp(key, "vcs-p4.")) {
+		subkey = key + 7;
+		if (!strcmp(subkey, "port")) {
+			strbuf_init(&buf, 0);
+			strbuf_addf(&buf, "P4PORT=%s", value);
+
+			ALLOC_GROW(env, env_nr + 1, env_alloc);
+			env[env_nr++] = strbuf_detach(&buf, NULL);
+		}
+		if (!strcmp(subkey, "client")) {
+			strbuf_init(&buf, 0);
+			strbuf_addf(&buf, "P4CLIENT=%s", value);
+
+			ALLOC_GROW(env, env_nr + 1, env_alloc);
+			env[env_nr++] = strbuf_detach(&buf, NULL);
+		}
+		if (!strcmp(subkey, "codelineformat")) {
+			codeline_regex = (regex_t*)xmalloc(sizeof(regex_t));
+			if (regcomp(codeline_regex, value, REG_EXTENDED)) {
+				free(codeline_regex);
+				fprintf(stderr, "Invalid codeline pattern: %s",
+					value);
+			}
+		}
+	}
+	return 0;
+}
+
+int cmd_p4(int argc, const char **argv, const char *prefix)
+{
+	struct remote *remote;
+
+	git_config(handle_config, NULL);
+
+	//ALLOC_GROW(env, env_nr + 1, env_alloc);
+	//env[env_nr++] = "P4PORT=localhost:1666";
+
+	ALLOC_GROW(env, env_nr + 1, env_alloc);
+	env[env_nr++] = NULL;
+
+	p4_init(env);
+
+	if (!strcmp(argv[1], "capabilities")) {
+		printf("import\n");
+		printf("find-new-branches\n");
+		printf("export\n");
+		printf("fork\n");
+		printf("merge\n");
+		return 0;
+	}
+	if (!strcmp(argv[1], "import")) {
+		prefix = setup_git_directory();
+		remote = remote_get(argv[2]);
+		import_p4(argc - 3, argv + 3);
+		return 0;
+	}
+	if (!strcmp(argv[1], "list")) {
+		int i;
+		prefix = setup_git_directory();
+		remote = remote_get(argv[2]);
+		for (i = 0; i < remote->url_nr; i++) {
+			printf("%s\n", codeline_to_refname(remote->url[i]));
+		}
+		return 0;
+	}
+	if (!strcmp(argv[1], "export")) {
+		remote = remote_get(argv[2]);
+
+		export_p4(remote, argv[3]);
+		// 1: check whether the import of the target location
+		//    is up-to-date
+
+		// 2: find the target location in the client view
+
+		// 3: bring the client view up-to-date with the target
+		//    location
+
+		// 4: recheck that this matches the tree
+
+		// 5: open the necessary files in the client
+
+		// 6: replace the necessary files in the filesystem
+
+		// 7: submit
+
+		// 8: reimport
+
+		// 9: go back to (3)
+	}
+	return 1;
+}
diff --git a/vcs-p4.h b/vcs-p4.h
new file mode 100644
index 0000000..55aa307
--- /dev/null
+++ b/vcs-p4.h
@@ -0,0 +1,119 @@
+#ifndef VCS_P4_H
+#define VCS_P4_H
+
+struct p4_depot {
+	struct p4_codeline *codelines;
+
+	int next_mark;
+};
+
+/** Note that multiple codelines can have changesets with the same
+ * number.
+ **/
+struct p4_changeset {
+	struct p4_codeline *codeline;
+
+	unsigned long number;
+
+	/** Used only if a previous import found this changeset **/
+	struct commit *commit;
+
+	/** Used only if this changeset is newly imported in this operation. **/
+	int mark;
+
+	const char *message;
+
+	struct p4_revision_list *revisions;
+
+	/** Not explicit in p4 **/
+	struct p4_changeset_list *integrated;
+
+	/** Next and previous in codeline **/
+	struct p4_changeset *next;
+	struct p4_changeset *previous;
+};
+
+struct p4_changeset_list {
+	struct p4_changeset *changeset;
+	struct p4_changeset_list *next;
+};
+
+struct p4_revision {
+	unsigned long number;
+
+	const char *type;
+
+	struct p4_file *file;
+	struct p4_changeset *changeset;
+
+	struct p4_revision_list *integrated;
+
+	/** Next in file **/
+	struct p4_revision *next;
+};
+
+/** Represents a collection of revisions of different files
+ **/
+struct p4_revision_list {
+	struct p4_revision *revision;
+	struct p4_revision_list *next;
+};
+
+struct p4_file {
+	struct p4_codeline *codeline;
+	const char *name;
+
+	unsigned head_number;
+
+	struct p4_revision *revisions;
+
+	/** Next file in codeline **/
+	struct p4_file *next;
+};
+
+/** perforce doesn't record codelines; we have to reverse-engineer
+ * them from how people seem to be branching.
+ **/
+struct p4_codeline {
+	struct p4_depot *depot;
+
+	/** Base path of codeline **/
+	const char *path;
+
+	/** git refname to import into **/
+	const char *refname;
+
+	struct p4_file *files;
+	struct p4_changeset *changesets;
+
+	int filelog_done;
+
+	/* The incremental state is that we have some changeset that
+	 * we previously imported up to, and we have git history going
+	 * back from that point, of which we've looked up some and
+	 * could look up more as needed. Also, there's p4-only history
+	 * going forward after the common history, and we've imported
+	 * some of that, and could import more as needed. Since
+	 * codelines are sorted by changeset number, we can tell which
+	 * way to go to get a name for a changeset.
+	 */
+	struct p4_changeset *history;
+	struct p4_changeset *unreported;
+
+	struct p4_changeset *head;
+
+	unsigned long finished_changeset;
+
+	/** For reporting **/
+	unsigned long num_changesets;
+
+	/** Next codeline in depot **/
+	struct p4_codeline *next;
+
+	/** Filesystem location of working directory for this codeline
+	 * on the client.
+	 **/
+	char *working;
+};
+
+#endif
-- 
1.6.0.6

^ permalink raw reply related

* [RFC PATCH 3/3] Support fetching from foreign VCSes
From: Daniel Barkalow @ 2009-01-11 20:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

This supports a useful subset of the usual fetch logic, mostly in the
config file.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
 builtin-fetch.c |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/builtin-fetch.c b/builtin-fetch.c
index 7b46f8f..14e037e 100644
--- a/builtin-fetch.c
+++ b/builtin-fetch.c
@@ -614,6 +614,136 @@ static void set_option(const char *name, const char *value)
 			name, transport->url);
 }
 
+static struct ref *list_foreign(struct remote *remote)
+{
+	struct child_process importer;
+	struct ref *ret = NULL;
+	struct ref **end = &ret;
+	struct strbuf buf;
+	memset(&importer, 0, sizeof(importer));
+	importer.in = 0;
+	importer.no_stdin = 1;
+	importer.out = -1;
+	importer.err = 0;
+	importer.argv = xcalloc(5, sizeof(*importer.argv));
+	strbuf_init(&buf, 80);
+	strbuf_addf(&buf, "vcs-%s", remote->foreign_vcs);
+	importer.argv[0] = buf.buf;
+	importer.argv[1] = "list";
+	importer.argv[2] = remote->name;
+	importer.git_cmd = 1;
+	start_command(&importer);
+
+	strbuf_reset(&buf);
+	while (1) {
+		char *eol, *eon;
+		if (strbuf_read(&buf, importer.out, 80) <= 0)
+			break;
+		while (1) {
+			eol = strchr(buf.buf, '\n');
+			if (!eol)
+				break;
+			*eol = '\0';
+			eon = strchr(buf.buf, ' ');
+			if (eon)
+				*eon = '\0';
+			*end = alloc_ref(buf.buf);
+			end = &((*end)->next);
+			strbuf_remove(&buf, 0, eol - buf.buf + 1);
+		}
+	}
+
+	finish_command(&importer);
+	strbuf_release(&buf);
+	return ret;
+}
+
+static int import_foreign(struct remote *remote, struct ref *refs)
+{
+	struct child_process importer;
+	struct child_process fastimport;
+	struct ref *posn;
+	int count = 0;
+	struct strbuf buf;
+
+	for (posn = refs; posn; posn = posn->next)
+		count++;
+
+	memset(&importer, 0, sizeof(importer));
+	importer.in = 0;
+	importer.no_stdin = 1;
+	importer.out = -1;
+	importer.err = 0;
+	importer.argv = xcalloc(5 + count, sizeof(*importer.argv));
+	strbuf_init(&buf, 80);
+	strbuf_addf(&buf, "vcs-%s", remote->foreign_vcs);
+	importer.argv[0] = buf.buf;
+	importer.argv[1] = "import";
+	importer.argv[2] = remote->name;
+	count = 0;
+	for (posn = refs; posn; posn = posn->next) {
+		importer.argv[3 + count] = posn->name;
+		count++;
+	}
+	importer.git_cmd = 1;
+	start_command(&importer);
+
+	memset(&fastimport, 0, sizeof(fastimport));
+	fastimport.in = importer.out;
+	fastimport.argv = xcalloc(2, sizeof(*fastimport.argv));
+	fastimport.argv[0] = "fast-import";
+	fastimport.argv[1] = "--quiet";
+	fastimport.git_cmd = 1;
+	start_command(&fastimport);
+
+	finish_command(&importer);
+	finish_command(&fastimport);
+	strbuf_release(&buf);
+	return 0;
+}
+
+static int fetch_foreign(struct remote *remote)
+{
+	struct ref *remote_refs = list_foreign(remote);
+	struct ref *ref_map = NULL;
+	struct ref *rm;
+	struct ref **tail = &ref_map;
+	struct branch *branch;
+	int i;
+
+	int exit_code = import_foreign(remote, remote_refs);
+	if (exit_code)
+		return exit_code;
+
+	/* if not appending, truncate FETCH_HEAD */
+	if (!append) {
+		char *filename = git_path("FETCH_HEAD");
+		FILE *fp = fopen(filename, "w");
+		if (!fp)
+			return error("cannot open %s: %s\n", filename, strerror(errno));
+		fclose(fp);
+	}
+
+	for (rm = remote_refs; rm; rm = rm->next)
+		read_ref(rm->name, rm->old_sha1);
+
+	branch = branch_get(NULL);
+
+	for (i = 0; i < remote->fetch_refspec_nr; i++) {
+		get_fetch_map(remote_refs, &remote->fetch[i], &tail, 0);
+		if (!strcmp(branch->remote_name, remote->name))
+			add_merge_config(&ref_map, remote_refs, branch, &tail);
+	}
+
+	for (rm = ref_map; rm; rm = rm->next)
+		if (rm->peer_ref)
+			read_ref(rm->peer_ref->name, rm->peer_ref->old_sha1);
+
+	store_updated_refs("foreign", remote->name, ref_map);
+
+	return exit_code;
+}
+
 int cmd_fetch(int argc, const char **argv, const char *prefix)
 {
 	struct remote *remote;
@@ -635,9 +765,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	else
 		remote = remote_get(argv[0]);
 
-	if (remote->foreign_vcs) {
-		die("Using foreign VCSes for fetch is not yet supported.");
-	}
+	if (remote->foreign_vcs)
+		return fetch_foreign(remote);
 
 	transport = transport_get(remote, remote->url[0]);
 	if (verbosity >= 2)
-- 
1.6.0.6

^ permalink raw reply related

* Re: [PATCH] Cleanup of unused symcache variable inside diff-lib.c
From: Johannes Schindelin @ 2009-01-11 20:17 UTC (permalink / raw)
  To: Kjetil Barvik; +Cc: git, Junio C Hamano
In-Reply-To: <86eiz98v0s.fsf@broadpark.no>

Hi,

On Sun, 11 Jan 2009, Kjetil Barvik wrote:

>     git format-patch

Necessary.

>  --stat

Not necessary.

>  --patch-with-raw

As I showed you, also not necessary.

> -1

Necessary.

> --signoff

If you had signed off on your commits already, not necessary.  (FWIW I 
keep a sign off as a marker that I actually think this is good to be 
submitted, or at least close.)

> -M -C

Usualy not necessary, unless you rename some file, or introduce a huge 
code 
duplication.

>        --summary

Dunno.  I think it's not really necessary, you see that from the diff.

> --full-index

I find it makes the patch hard to read, as the index line will always 
wrap.  And it's not necessary, as it is so highly unlikely that the hash 
is unique in your repository, but not mine.

> --subject-prefix="PATCH"

Not necessary.

>        --output-directory ../diff_lib_c_symcache_cleanup_v1/

If you insist...

Sure, you can make it complicated, but I usually prefer something like

	$ git format-patch -3 --cover-letter

Nice 'n easy.

Ciao,
Dscho

^ permalink raw reply

* Re: [RFC PATCH 0/3] Support for importing in builtin-fetch
From: Johannes Schindelin @ 2009-01-11 20:20 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git, Junio C Hamano
In-Reply-To: <alpine.LNX.1.00.0901110318530.19665@iabervon.org>

Hi,

On Sun, 11 Jan 2009, Daniel Barkalow wrote:

> This series is an attempt to standardize importers and exporters for 
> non-git upstream repositories, and to allow the normal git programs to 
> use them transparently when configured to do so.

Cute!

Ciao,
Dscho

^ permalink raw reply

* [JGIT] Blame functionality for jgit
From: Manuel Woelker @ 2009-01-11 20:23 UTC (permalink / raw)
  To: spearce, Robin Rosenberg; +Cc: git

Hello there,

Over the weekend I have been hacking the jgit sources a little to see
if I can add blame/praise/annotate functionality to it. The results
can be found at http://github.com/manuel-woelker/egit/tree/blame . All
work is in the blame branch in org.spearce.jgit.blame package.

I largely ported the cgit blame algorithm described here
https://kerneltrap.org/mailarchive/git/2006/10/12/224187 , the
relevant file is builtin-blame.c cf.
http://repo.or.cz/w/git.git?a=blob;f=builtin-blame.c;hb=HEAD

The structure has been kept largely intact, but I have tried to
translate the concepts to idiomatic java, with the bulk of the logic
now in the Scoreboard class

The blame algorithm needs to use a diff algorithm to find common parts
in files. AFAICT there is no diff implementation in jgit at the
moment. I used the incava java-diff library, (see
http://www.incava.org/projects/java/java-diff ), but I introduced an
interface that should make it possible to swap implementations with a
minimum of effort. To compile I just create a new eclipse project with
the java-diff sources.

Currently renames, copies etc. are not supported, so only files with
the same name can receive the blame. Unmodified renames and copies
should be fairly simple to implement. Modified renames and copies
might prove to be a little bit harder, so that would have to wait
until jgit can follow history across renames/copies.

There are some simple unit tests to check the basic functionality. I
also "blamed" SUBMITTING_PATCHES in the egit repo, and got the same
results as cgit. I am certain that there a some bugs lurking in the
code, but overall it looks quite promising.

I would like to hear your thoughts on a couple of topics:
 - Merge/patch/diff/blame functionality needs a diff implementation,
what are our options within technical and license constraints?
 - What is the roadmap for these features?
 - Can you see this blame effort getting integrated upstream?

I would love to contribute more effort to egit and the blame
functionality in particular. To me, "blame" is one of the killer
features of modern SCMs.

Last no least, kudos to the git and egit teams for their hard work on
making git such a great piece of software.
 - Manuel Woelker

^ permalink raw reply

* Re: What's cooking in git.git (Jan 2009, #02; Sun, 11)
From: Junio C Hamano @ 2009-01-11 20:24 UTC (permalink / raw)
  To: Alexander Potashev; +Cc: git
In-Reply-To: <20090111122128.GA16108@myhost>

Alexander Potashev <aspotashev@gmail.com> writes:

>> * jc/maint-format-patch (Sat Jan 10 12:41:33 2009 -0800) 1 commit
>>  + format-patch: show patch text for the root commit
>
> My testcases ([PATCH] Add new testcases for format-patch root commits)
> for this don't satisfy the target behaviour.

I thought I squashed the test case from your original to it and they seem
to pass for me, but maybe you are talking about some other tests?  If you
know of breakages please send in incremental updates.

>> * ap/clone-into-empty (Fri Jan 9 02:24:23 2009 +0300) 2 commits
>>  - Use is_pseudo_dir_name everywhere
>>  - Allow cloning to an existing empty directory
>
> As far as I understood from your message, you don't think that cloning
> into empty directories is necessary. So, I thought, the best solution for
> yesterday was "[PATCH] add is_dot_or_dotdot inline function" (to make you
> happy ;)).

I merely said "I am not particularly interested in it."  That's quite
different from "I oppose and reject".

As long as the new feature is maintainability-wise low-impact and does not
hurt users who do _not_ use it, I am not opposed to have a new feature
even when I see it is only narrowly useful.

If a topic brings in a large change that helps to support only one
particular workflow better, while making it cumbersome to update the
resulting code to support some other workflow later, even if the change is
useful for users of that one particular workflow, I may oppose it.  It
would be high-impact from the maintainability point of view [*1*].

But I do not think your "clone here" falls into that category.

It is really up to you to follow through with it, and people with similar
needs to cheer you on.  I thought you took a good strategy to first get
dot-or-dotdot in (which is generally useful), hoping to bring up the
"clone here" topic again by building on top of it later.

> Btw, I've sent some worthwhile patches, I but haven't got any reply from you:
> 	[PATCH] use || instead of | in logical expressions
> 	[PATCH] Replace deprecated dashed git commands in usage
> 	[PATCH] remove unnecessary 'if'
> It's better if you say "No" than nothing.

I do not recall the last one.

The first one I thought was a trivial janitor patch that (1) didn't matter
very deeply but made things somewhat easier to read, and more importantly
(2) you had "oops" reply to yourself.

I often clean up trivial "oops" in a patch that fixes bugs or adds
features to avoid extra round trip with the contributor, but that is only
when bugfix and enhancements are worthwhile by itself.

The purpose of a clean-up patch is to clean things up.  If it itself has
"oops" in it, that fails its own criteria of goodness.  Please don't
expect/force me to spend time cleaning up "oops" in a clean-up patch, but
submit a replacement I can apply straight out of my mailbox.

The second one I was expecting to hear from people who were involved in
the discussion back when we standardized on dashless form to show hands as
I recall these messages were deliberately left with dashed form for some
reason (perhaps to help avoiding "man git foo" vs "man git-foo"
confusion).

[Footnote]

*1* Such a change probably needs to be justified either by showing any
other workflow does not make sense (so supporting that one true workflow
well is sufficient) or by demonstrating that support for some other
equally valid workflows can be included trivially, or both.

^ permalink raw reply

* Re: current git kernel has strange problems during bisect
From: Andi Kleen @ 2009-01-11 20:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christian Borntraeger, Johannes Schindelin, git,
	Linux Kernel Mailing List
In-Reply-To: <alpine.LFD.2.00.0901111113150.6528@localhost.localdomain>

Linus Torvalds <torvalds@linux-foundation.org> writes:
>
> For bisect purposes, if you know you're not chasing down a btrfs issue, 
> you can do
>
> 	git bisect good 34353029534a08e41cfb8be647d734b9ce9ebff8

Could you perhaps add some standard tag for that commit? That 
would make it easier than to always find the exact btrfs commit.

Just an idea.

-Andi

-- 
ak@linux.intel.com

^ permalink raw reply

* Re: stopping patches from just floating by
From: Daniel Barkalow @ 2009-01-11 20:37 UTC (permalink / raw)
  To: jidanni; +Cc: git
In-Reply-To: <87y6xhkb2f.fsf@jidanni.org>

On Mon, 12 Jan 2009, jidanni@jidanni.org wrote:

> I notice lots of "Merge branch qq/bla". And think, hmmm, Mr. QQ must
> be using Documentation/everyday.txt's [[Individual Developer
> (Participant)]] git-push methods, for a more efficient way of getting
> his patches included by the maintainer.

Nope, that would lead to "Merge git://qq.org/git bla". In fact, Junio is 
getting the patches by email, and then using that workflow in his own tree 
to manage the maintence process. That is, he creates a branch for each 
patch series that someone sends that looks ready for more exposure, by 
starting with some plausible commit and applying the series of patches, 
and then he can merge than branch into pu, next, master, or maint as 
needed. This is particularly useful for pu, where he redoes all the merges 
frequently; if he worked by just applying the patches, every time he 
wanted to drop a series that he'd applied before some other series, he'd 
have to pick out the conflict resolutions from the original author's 
changes.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* Re: [PATCH] Cleanup of unused symcache variable inside diff-lib.c
From: Kjetil Barvik @ 2009-01-11 20:50 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano
In-Reply-To: <alpine.DEB.1.00.0901112112540.3586@pacific.mpi-cbg.de>

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

<snipp>
>> --signoff
>
> If you had signed off on your commits already, not necessary.  (FWIW I 
> keep a sign off as a marker that I actually think this is good to be 
> submitted, or at least close.)

  From the Documentation/SubmittingPatches file, for me I think that the
  '-signoff' is supposed to mean:

  [....]
   - if you want your work included in git.git, add a "Signed-off-by:
     Your Name <you@example.com>" line to the commit message (or just
     use the option "-s" when committing) to confirm that you agree to
     the Developer's Certificate of Origin
  [....]
  The sign-off is a simple line at the end of the explanation for the
  patch, which certifies that you wrote it or otherwise have the right
  to pass it on as a open-source patch.  The rules are pretty simple: if
  you can certify the below:

        Developer's Certificate of Origin 1.1

        By making a contribution to this project, I certify that:
  [....]
  then you just add a line saying

	Signed-off-by: Random J Developer <random@developer.example.org>

  This line can be automatically added by git if you run the git-commit
  command with the -s option.
  [....]
  
  And the 'a' and the 'd' in the DCO I do agree with in this particular
  situation, so I added a '--signoff' to the patches.

  -- kjetil

^ permalink raw reply

* Re: current git kernel has strange problems during bisect
From: Johannes Schindelin @ 2009-01-11 20:51 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Linus Torvalds, Christian Borntraeger, git,
	Linux Kernel Mailing List
In-Reply-To: <87tz85fuxr.fsf@basil.nowhere.org>

Hi,

On Sun, 11 Jan 2009, Andi Kleen wrote:

> Linus Torvalds <torvalds@linux-foundation.org> writes:
> >
> > For bisect purposes, if you know you're not chasing down a btrfs issue, 
> > you can do
> >
> > 	git bisect good 34353029534a08e41cfb8be647d734b9ce9ebff8
> 
> Could you perhaps add some standard tag for that commit? That 
> would make it easier than to always find the exact btrfs commit.
> 
> Just an idea.

Well, AFAICT what Linus hinted at is that you do not need such a standard 
tag.  Indeed, you would only clutter the history with such tags, when it 
usually is just a matter of saying "git bisect good" whenever you _know_ 
you are hitting known-good history.

Ciao,
Dscho

^ permalink raw reply

* Re: [PATCH] filter-branch: add git_commit_non_empty_tree and  --prune-empty.
From: Junio C Hamano @ 2009-01-11 20:52 UTC (permalink / raw)
  To: Pierre Habouzit; +Cc: Johannes Schindelin, Jay Soffian, git, pasky, srabbelier
In-Reply-To: <20090111142732.GA18484@artemis.corp>

Pierre Habouzit <madcoder@debian.org> writes:

> On Sun, Jan 11, 2009 at 01:35:15PM +0000, Johannes Schindelin wrote:
> ...
>> And I suggested to merge the tests with Sverre's patch.  That suggestion 
>> also went unaddressed.
>
> I can't find any mails from Sverre in the same thread, but maybe I'm not
> searching in the proper place...

This is in 'master' already; if there was anything missing in your patch,
please fill in with updates.

^ permalink raw reply

* Re: stopping patches from just floating by
From: jidanni @ 2009-01-11 20:53 UTC (permalink / raw)
  To: barkalow; +Cc: git
In-Reply-To: <alpine.LNX.1.00.0901111524110.19665@iabervon.org>

I see. Say, for my forthcoming 40 minor grammar fixes that affect 20
files in Documentation/*, I just couldn't bear spamming this list with
more that one or two [PATCH] mails. OK one long mail it will be then,
with several commits encompassing many diffs. Hope that will be OK.
(Then another long mail next month, when I finish looking at the second
half of the man pages.)

^ permalink raw reply

* Re: [PATCH] filter-branch: add git_commit_non_empty_tree and  --prune-empty.
From: Johannes Schindelin @ 2009-01-11 20:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Pierre Habouzit, Jay Soffian, git, pasky, srabbelier
In-Reply-To: <7vab9xsgzf.fsf@gitster.siamese.dyndns.org>

Hi,

On Sun, 11 Jan 2009, Junio C Hamano wrote:

> Pierre Habouzit <madcoder@debian.org> writes:
> 
> > On Sun, Jan 11, 2009 at 01:35:15PM +0000, Johannes Schindelin wrote:
> > ...
> >> And I suggested to merge the tests with Sverre's patch.  That 
> >> suggestion also went unaddressed.
> >
> > I can't find any mails from Sverre in the same thread, but maybe I'm 
> > not searching in the proper place...
> 
> This is in 'master' already; if there was anything missing in your 
> patch, please fill in with updates.

I _think_ that I asked for test cases to use that workflow, so that we can 
be sure that what's in the man page actually works.

Ciao,
Dscho

^ permalink raw reply

* Re: Lightweight tag ?
From: Junio C Hamano @ 2009-01-11 21:04 UTC (permalink / raw)
  To: Francis Moreau; +Cc: git
In-Reply-To: <38b2ab8a0901111044x5db21d74i25ff7a26bb702a5d@mail.gmail.com>

"Francis Moreau" <francis.moro@gmail.com> writes:

> My problem is that I don't see their point !
>
> They behave the same way like the annotated tags: when pushing to a
> repo the lightweight tags are pushed as well, and pulling from a repo
> with lightweight tags give the same results (all of this with the
> --tags switch).

Don't use explicit --tags blindly.  It says "no matter what kind of tag,
transfer everything under refs/tags".  Otherwise you won't see a
difference.

^ permalink raw reply

* Re: [PATCH 4/4] pager: do wait_for_pager on signal death
From: Junio C Hamano @ 2009-01-11 21:13 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Johannes Schindelin, git
In-Reply-To: <20090111113649.GD29791@coredump.intra.peff.net>

Thanks; I agree with you that I do not see a good reason _not_ to be
handling those other signals at some of the "just handle SIGINT" sites,
and I like the direction in whcih this series is taking us very much.

^ permalink raw reply

* Re: current git kernel has strange problems during bisect
From: Christian Borntraeger @ 2009-01-11 21:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Sam Ravnborg, Johannes Schindelin, git, Linux Kernel Mailing List
In-Reply-To: <alpine.LFD.2.00.0901111200330.6528@localhost.localdomain>

Am Sonntag 11 Januar 2009 schrieb Linus Torvalds:
> Well, you don't actually have to mark that semi-random one as good either. 
> What you can do is to just mark anything that _only_ contains fs/btrfs as 
> good. IOW, you don't have to know the magic number - you just have to be 
> told that "oh, if you only have btrfs files, and you're not actively 
> bisecting a btrfs bug, just do 'git bisect good' and continue".

That should work.

<rant>
Still, I am a bit frustrated. During this weekend I reported 2 regressions 
(wlan and ata)  and I still try to find out why suspend/resume stopped 
working. In the meantime I have identified 2 patches (one was already known, 
I reported the 2nd to the usb maintainers) after 2.6.28 that caused suspend 
to ram regressions. In rc1 S2R was broken again. So I tried bisecting the 
third patch - which finally brought me to the btrfs bisect problem.

For me, this was the most annoying  merge window ever.

In my opinion we should really avoid subtree merges in the future as a curtesy 
to people who do the uncool work of testing, problem tracking and bisecting. 
</rant>

Christian

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox