Git development
 help / color / mirror / Atom feed
* [PATCH 2/4] create_tmp_packfile(): a helper function
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git
In-Reply-To: <1319846051-462-1-git-send-email-gitster@pobox.com>

Factor out a small logic out of the private write_pack_file() function
in builtin/pack-objects.c

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |   12 +++---------
 pack-write.c           |   10 ++++++++++
 pack.h                 |    3 +++
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6643c16..3258fa9 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -584,16 +584,10 @@ static void write_pack_file(void)
 		unsigned char sha1[20];
 		char *pack_tmp_name = NULL;
 
-		if (pack_to_stdout) {
+		if (pack_to_stdout)
 			f = sha1fd_throughput(1, "<stdout>", progress_state);
-		} else {
-			char tmpname[PATH_MAX];
-			int fd;
-			fd = odb_mkstemp(tmpname, sizeof(tmpname),
-					 "pack/tmp_pack_XXXXXX");
-			pack_tmp_name = xstrdup(tmpname);
-			f = sha1fd(fd, pack_tmp_name);
-		}
+		else
+			f = create_tmp_packfile(&pack_tmp_name);
 
 		offset = write_pack_header(f, nr_remaining);
 		if (!offset)
diff --git a/pack-write.c b/pack-write.c
index 46f3f84..863cce8 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -328,3 +328,13 @@ int encode_in_pack_object_header(enum object_type type, uintmax_t size, unsigned
 	*hdr = c;
 	return n;
 }
+
+struct sha1file *create_tmp_packfile(char **pack_tmp_name)
+{
+	char tmpname[PATH_MAX];
+	int fd;
+
+	fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+	*pack_tmp_name = xstrdup(tmpname);
+	return sha1fd(fd, *pack_tmp_name);
+}
diff --git a/pack.h b/pack.h
index d429d8a..0027ac6 100644
--- a/pack.h
+++ b/pack.h
@@ -84,4 +84,7 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
 #define PH_ERROR_PACK_SIGNATURE	(-2)
 #define PH_ERROR_PROTOCOL	(-3)
 extern int read_pack_header(int fd, struct pack_header *);
+
+extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
+
 #endif
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related

* [PATCH 3/4] finish_tmp_packfile(): a helper function
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git
In-Reply-To: <1319846051-462-1-git-send-email-gitster@pobox.com>

Factor out a small logic out of the private write_pack_file() function
in builtin/pack-objects.c.

This changes the order of finishing multi-pack generation slightly. The
code used to

 - adjust shared perm of temporary packfile
 - rename temporary packfile to the final name
 - update mtime of the packfile under the final name
 - adjust shared perm of temporary idxfile
 - rename temporary idxfile to the final name

but because the helper does not want to do the mtime thing, the updated
code does that step first and then all the rest.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |   33 ++++++++++-----------------------
 pack-write.c           |   31 +++++++++++++++++++++++++++++++
 pack.h                 |    1 +
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 3258fa9..b458b6d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -617,20 +617,8 @@ static void write_pack_file(void)
 
 		if (!pack_to_stdout) {
 			struct stat st;
-			const char *idx_tmp_name;
 			char tmpname[PATH_MAX];
 
-			idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
-						      &pack_idx_opts, sha1);
-
-			snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
-				 base_name, sha1_to_hex(sha1));
-			free_pack_by_name(tmpname);
-			if (adjust_shared_perm(pack_tmp_name))
-				die_errno("unable to make temporary pack file readable");
-			if (rename(pack_tmp_name, tmpname))
-				die_errno("unable to rename temporary pack file");
-
 			/*
 			 * Packs are runtime accessed in their mtime
 			 * order since newer packs are more likely to contain
@@ -638,28 +626,27 @@ static void write_pack_file(void)
 			 * packs then we should modify the mtime of later ones
 			 * to preserve this property.
 			 */
-			if (stat(tmpname, &st) < 0) {
+			if (stat(pack_tmp_name, &st) < 0) {
 				warning("failed to stat %s: %s",
-					tmpname, strerror(errno));
+					pack_tmp_name, strerror(errno));
 			} else if (!last_mtime) {
 				last_mtime = st.st_mtime;
 			} else {
 				struct utimbuf utb;
 				utb.actime = st.st_atime;
 				utb.modtime = --last_mtime;
-				if (utime(tmpname, &utb) < 0)
+				if (utime(pack_tmp_name, &utb) < 0)
 					warning("failed utime() on %s: %s",
 						tmpname, strerror(errno));
 			}
 
-			snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
-				 base_name, sha1_to_hex(sha1));
-			if (adjust_shared_perm(idx_tmp_name))
-				die_errno("unable to make temporary index file readable");
-			if (rename(idx_tmp_name, tmpname))
-				die_errno("unable to rename temporary index file");
-
-			free((void *) idx_tmp_name);
+			/* Enough space for "-<sha-1>.pack"? */
+			if (sizeof(tmpname) <= strlen(base_name) + 50)
+				die("pack base name '%s' too long", base_name);
+			snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
+			finish_tmp_packfile(tmpname, pack_tmp_name,
+					    written_list, nr_written,
+					    &pack_idx_opts, sha1);
 			free(pack_tmp_name);
 			puts(sha1_to_hex(sha1));
 		}
diff --git a/pack-write.c b/pack-write.c
index 863cce8..cadc3e1 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -338,3 +338,34 @@ struct sha1file *create_tmp_packfile(char **pack_tmp_name)
 	*pack_tmp_name = xstrdup(tmpname);
 	return sha1fd(fd, *pack_tmp_name);
 }
+
+void finish_tmp_packfile(char *name_buffer,
+			 const char *pack_tmp_name,
+			 struct pack_idx_entry **written_list,
+			 uint32_t nr_written,
+			 struct pack_idx_option *pack_idx_opts,
+			 unsigned char sha1[])
+{
+	const char *idx_tmp_name;
+	char *end_of_name_prefix = strrchr(name_buffer, 0);
+
+	if (adjust_shared_perm(pack_tmp_name))
+		die_errno("unable to make temporary pack file readable");
+
+	idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
+				      pack_idx_opts, sha1);
+	if (adjust_shared_perm(idx_tmp_name))
+		die_errno("unable to make temporary index file readable");
+
+	sprintf(end_of_name_prefix, "%s.pack", sha1_to_hex(sha1));
+	free_pack_by_name(name_buffer);
+
+	if (rename(pack_tmp_name, name_buffer))
+		die_errno("unable to rename temporary pack file");
+
+	sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1));
+	if (rename(idx_tmp_name, name_buffer))
+		die_errno("unable to rename temporary index file");
+
+	free((void *)idx_tmp_name);
+}
diff --git a/pack.h b/pack.h
index 0027ac6..cfb0f69 100644
--- a/pack.h
+++ b/pack.h
@@ -86,5 +86,6 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
 extern int read_pack_header(int fd, struct pack_header *);
 
 extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
+extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
 
 #endif
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related

* [PATCH 4/4] Bulk check-in
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git
In-Reply-To: <1319846051-462-1-git-send-email-gitster@pobox.com>

This extends the earlier approach to stream a large file directly from the
filesystem to its own packfile, and allows "git add" to send large files
directly into a single pack. Older code used to spawn fast-import, but
the new bulk_checkin API replaces it.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile         |    2 +
 builtin/add.c    |    5 ++
 bulk-checkin.c   |  159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 bulk-checkin.h   |   16 ++++++
 sha1_file.c      |   67 ++---------------------
 t/t1050-large.sh |   26 +++++++--
 6 files changed, 206 insertions(+), 69 deletions(-)
 create mode 100644 bulk-checkin.c
 create mode 100644 bulk-checkin.h

diff --git a/Makefile b/Makefile
index 3139c19..418dd2e 100644
--- a/Makefile
+++ b/Makefile
@@ -505,6 +505,7 @@ LIB_H += argv-array.h
 LIB_H += attr.h
 LIB_H += blob.h
 LIB_H += builtin.h
+LIB_H += bulk-checkin.h
 LIB_H += cache.h
 LIB_H += cache-tree.h
 LIB_H += color.h
@@ -591,6 +592,7 @@ LIB_OBJS += base85.o
 LIB_OBJS += bisect.o
 LIB_OBJS += blob.o
 LIB_OBJS += branch.o
+LIB_OBJS += bulk-checkin.o
 LIB_OBJS += bundle.o
 LIB_OBJS += cache-tree.o
 LIB_OBJS += color.o
diff --git a/builtin/add.c b/builtin/add.c
index c59b0c9..1c42900 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -13,6 +13,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "revision.h"
+#include "bulk-checkin.h"
 
 static const char * const builtin_add_usage[] = {
 	"git add [options] [--] <filepattern>...",
@@ -458,11 +459,15 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 		free(seen);
 	}
 
+	plug_bulk_checkin();
+
 	exit_status |= add_files_to_cache(prefix, pathspec, flags);
 
 	if (add_new_files)
 		exit_status |= add_files(&dir, flags);
 
+	unplug_bulk_checkin();
+
  finish:
 	if (active_cache_changed) {
 		if (write_cache(newfd, active_cache, active_nr) ||
diff --git a/bulk-checkin.c b/bulk-checkin.c
new file mode 100644
index 0000000..cad7a0b
--- /dev/null
+++ b/bulk-checkin.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#include "bulk-checkin.h"
+#include "csum-file.h"
+#include "pack.h"
+
+static int pack_compression_level = Z_DEFAULT_COMPRESSION;
+
+static struct bulk_checkin_state {
+	unsigned plugged:1;
+
+	char *pack_tmp_name;
+	struct sha1file *f;
+	off_t offset;
+	struct pack_idx_option pack_idx_opts;
+
+	struct pack_idx_entry **written;
+	uint32_t alloc_written;
+	uint32_t nr_written;
+} state;
+
+static void finish_bulk_checkin(struct bulk_checkin_state *state)
+{
+	unsigned char sha1[20];
+	char packname[PATH_MAX];
+	int i;
+
+	if (!state->f)
+		return;
+
+	if (state->nr_written == 1) {
+		sha1close(state->f, sha1, CSUM_FSYNC);
+	} else {
+		int fd = sha1close(state->f, sha1, 0);
+		fixup_pack_header_footer(fd, sha1, state->pack_tmp_name,
+					 state->nr_written, sha1,
+					 state->offset);
+		close(fd);
+	}
+
+	sprintf(packname, "%s/pack/pack-", get_object_directory());
+	finish_tmp_packfile(packname, state->pack_tmp_name,
+			    state->written, state->nr_written,
+			    &state->pack_idx_opts, sha1);
+	for (i = 0; i < state->nr_written; i++)
+		free(state->written[i]);
+	free(state->written);
+	memset(state, 0, sizeof(*state));
+
+	/* Make objects we just wrote available to ourselves */
+	reprepare_packed_git();
+}
+
+static void deflate_to_pack(struct bulk_checkin_state *state,
+			    unsigned char sha1[],
+			    int fd, size_t size, enum object_type type,
+			    const char *path, unsigned flags)
+{
+	unsigned char obuf[16384];
+	unsigned hdrlen;
+	git_zstream s;
+	git_SHA_CTX ctx;
+	int write_object = (flags & HASH_WRITE_OBJECT);
+	int status = Z_OK;
+	struct pack_idx_entry *idx = NULL;
+
+	hdrlen = sprintf((char *)obuf, "%s %" PRIuMAX, typename(type), size) + 1;
+	git_SHA1_Init(&ctx);
+	git_SHA1_Update(&ctx, obuf, hdrlen);
+
+	if (write_object) {
+		idx = xcalloc(1, sizeof(*idx));
+		idx->offset = state->offset;
+		crc32_begin(state->f);
+	}
+	memset(&s, 0, sizeof(s));
+	git_deflate_init(&s, pack_compression_level);
+
+	hdrlen = encode_in_pack_object_header(type, size, obuf);
+	s.next_out = obuf + hdrlen;
+	s.avail_out = sizeof(obuf) - hdrlen;
+
+	while (status != Z_STREAM_END) {
+		unsigned char ibuf[16384];
+
+		if (size && !s.avail_in) {
+			ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
+			if (xread(fd, ibuf, rsize) != rsize)
+				die("failed to read %d bytes from '%s'",
+				    (int)rsize, path);
+			git_SHA1_Update(&ctx, ibuf, rsize);
+			s.next_in = ibuf;
+			s.avail_in = rsize;
+			size -= rsize;
+		}
+
+		status = git_deflate(&s, size ? 0 : Z_FINISH);
+
+		if (!s.avail_out || status == Z_STREAM_END) {
+			size_t written = s.next_out - obuf;
+			if (write_object) {
+				sha1write(state->f, obuf, written);
+				state->offset += written;
+			}
+			s.next_out = obuf;
+			s.avail_out = sizeof(obuf);
+		}
+
+		switch (status) {
+		case Z_OK:
+		case Z_BUF_ERROR:
+		case Z_STREAM_END:
+			continue;
+		default:
+			die("unexpected deflate failure: %d", status);
+		}
+	}
+	git_deflate_end(&s);
+	git_SHA1_Final(sha1, &ctx);
+	if (write_object) {
+		idx->crc32 = crc32_end(state->f);
+		hashcpy(idx->sha1, sha1);
+		ALLOC_GROW(state->written,
+			   state->nr_written + 1, state->alloc_written);
+		state->written[state->nr_written++] = idx;
+	}
+}
+
+int index_bulk_checkin(unsigned char *sha1,
+		       int fd, size_t size, enum object_type type,
+		       const char *path, unsigned flags)
+{
+	if (!state.f && (flags & HASH_WRITE_OBJECT)) {
+		state.f = create_tmp_packfile(&state.pack_tmp_name);
+		reset_pack_idx_option(&state.pack_idx_opts);
+		/* Pretend we are going to write only one object */
+		state.offset = write_pack_header(state.f, 1);
+		if (!state.offset)
+			die_errno("unable to write pack header");
+	}
+
+	deflate_to_pack(&state, sha1, fd, size, type, path, flags);
+	if (!state.plugged)
+		finish_bulk_checkin(&state);
+	return 0;
+}
+
+void plug_bulk_checkin(void)
+{
+	state.plugged = 1;
+}
+
+void unplug_bulk_checkin(void)
+{
+	state.plugged = 0;
+	if (state.f)
+		finish_bulk_checkin(&state);
+}
diff --git a/bulk-checkin.h b/bulk-checkin.h
new file mode 100644
index 0000000..4f599f8
--- /dev/null
+++ b/bulk-checkin.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef BULK_CHECKIN_H
+#define BULK_CHECKIN_H
+
+#include "cache.h"
+
+extern int index_bulk_checkin(unsigned char sha1[],
+			      int fd, size_t size, enum object_type type,
+			      const char *path, unsigned flags);
+
+extern void plug_bulk_checkin(void);
+extern void unplug_bulk_checkin(void);
+
+#endif
diff --git a/sha1_file.c b/sha1_file.c
index 27f3b9b..c96e366 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -18,6 +18,7 @@
 #include "refs.h"
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
+#include "bulk-checkin.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -2679,10 +2680,8 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
 }
 
 /*
- * This creates one packfile per large blob, because the caller
- * immediately wants the result sha1, and fast-import can report the
- * object name via marks mechanism only by closing the created
- * packfile.
+ * This creates one packfile per large blob unless bulk-checkin
+ * machinery is "plugged".
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
@@ -2696,65 +2695,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size,
 			enum object_type type, const char *path,
 			unsigned flags)
 {
-	struct child_process fast_import;
-	char export_marks[512];
-	const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
-	char tmpfile[512];
-	char fast_import_cmd[512];
-	char buf[512];
-	int len, tmpfd;
-
-	strcpy(tmpfile, git_path("hashstream_XXXXXX"));
-	tmpfd = git_mkstemp_mode(tmpfile, 0600);
-	if (tmpfd < 0)
-		die_errno("cannot create tempfile: %s", tmpfile);
-	if (close(tmpfd))
-		die_errno("cannot close tempfile: %s", tmpfile);
-	sprintf(export_marks, "--export-marks=%s", tmpfile);
-
-	memset(&fast_import, 0, sizeof(fast_import));
-	fast_import.in = -1;
-	fast_import.argv = argv;
-	fast_import.git_cmd = 1;
-	if (start_command(&fast_import))
-		die_errno("index-stream: git fast-import failed");
-
-	len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
-		      (unsigned long) size);
-	write_or_whine(fast_import.in, fast_import_cmd, len,
-		       "index-stream: feeding fast-import");
-	while (size) {
-		char buf[10240];
-		size_t sz = size < sizeof(buf) ? size : sizeof(buf);
-		ssize_t actual;
-
-		actual = read_in_full(fd, buf, sz);
-		if (actual < 0)
-			die_errno("index-stream: reading input");
-		if (write_in_full(fast_import.in, buf, actual) != actual)
-			die_errno("index-stream: feeding fast-import");
-		size -= actual;
-	}
-	if (close(fast_import.in))
-		die_errno("index-stream: closing fast-import");
-	if (finish_command(&fast_import))
-		die_errno("index-stream: finishing fast-import");
-
-	tmpfd = open(tmpfile, O_RDONLY);
-	if (tmpfd < 0)
-		die_errno("index-stream: cannot open fast-import mark");
-	len = read(tmpfd, buf, sizeof(buf));
-	if (len < 0)
-		die_errno("index-stream: reading fast-import mark");
-	if (close(tmpfd) < 0)
-		die_errno("index-stream: closing fast-import mark");
-	if (unlink(tmpfile))
-		die_errno("index-stream: unlinking fast-import mark");
-	if (len != 44 ||
-	    memcmp(":1 ", buf, 3) ||
-	    get_sha1_hex(buf + 3, sha1))
-		die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
-	return 0;
+	return index_bulk_checkin(sha1, fd, size, type, path, flags);
 }
 
 int index_fd(unsigned char *sha1, int fd, struct stat *st,
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index deba111..36def25 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -7,14 +7,28 @@ test_description='adding and checking out large blobs'
 
 test_expect_success setup '
 	git config core.bigfilethreshold 200k &&
-	echo X | dd of=large bs=1k seek=2000
+	echo X | dd of=large bs=1k seek=2000 &&
+	echo Y | dd of=huge bs=1k seek=2500
 '
 
-test_expect_success 'add a large file' '
-	git add large &&
-	# make sure we got a packfile and no loose objects
-	test -f .git/objects/pack/pack-*.pack &&
-	test ! -f .git/objects/??/??????????????????????????????????????
+test_expect_success 'add a large file or two' '
+	git add large huge &&
+	# make sure we got a single packfile and no loose objects
+	bad= count=0 &&
+	for p in .git/objects/pack/pack-*.pack
+	do
+		count=$(( $count + 1 ))
+		test -f "$p" && continue
+		bad=t
+	done &&
+	test -z "$bad" &&
+	test $count = 1 &&
+	for l in .git/objects/??/??????????????????????????????????????
+	do
+		test -f "$l" || continue
+		bad=t
+	done &&
+	test -z "$bad"
 '
 
 test_expect_success 'checkout a large file' '
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related

* sparse checkout using exclusions
From: Eric Raible @ 2011-10-29  0:17 UTC (permalink / raw)
  To: git@vger.kernel.org

Hi all.

I was just about to send a long message about using exclusions
in sparse-checkout, when I did one last search and saw that all
of my problems were fixed by using '/*' instead of '*' as the
first line in .git/info/sparse-checkout.

Might it make sense for the example in git-read-tree.html to be
updated to include the leading slash?

    /*
    !unwanted

- Eric

^ permalink raw reply

* Git is exploding
From: Øyvind A. Holm @ 2011-10-29  0:39 UTC (permalink / raw)
  To: git

Found an interesting "Popularity Contest" graph on debian.org (via
Thomas Bassetto on G+):

http://bit.ly/rNxVN0

Very cool indeed. Maybe it's the rise of GitHub, or simply that the
user interface is mature enough that also "regular" users feel
comfortable with it.

Regards,
Øyvind

^ permalink raw reply

* Fork freedesktop project to bitbucket, make changes, generate patch back to freedesktop?
From: Alec Taylor @ 2011-10-29  3:35 UTC (permalink / raw)
  To: git

Good afternoon,

I've forked a [git] freedesktop project to [git] bitbucket.

I am working with a team extending the functionality of this project.

After many MANY adds, commits and pushes back and forth on the
bitbucket project, we then want to send this freedesktop project a
PATCH with the changes we've made.

Can you tell me the command I need to do this?

Thanks for all suggestions,

Alec Taylor

^ permalink raw reply

* Bitbucket now has git
From: Alec Taylor @ 2011-10-29  3:36 UTC (permalink / raw)
  To: git

Please update http://git-scm.com/tools

^ permalink raw reply

* Re: sparse checkout using exclusions
From: Ramkumar Ramachandra @ 2011-10-29  5:46 UTC (permalink / raw)
  To: Eric Raible; +Cc: git@vger.kernel.org
In-Reply-To: <4EAB4632.5080101@nextest.com>

Hi Eric,

Eric Raible writes:
> Might it make sense for the example in git-read-tree.html to be
> updated to include the leading slash?

This issue was fixed in 5e821231 (git-read-tree.txt: update sparse
checkout examples, 2011-09-26).

Cheers.

-- Ram

^ permalink raw reply

* Re: Git is exploding
From: Miles Bader @ 2011-10-29  8:28 UTC (permalink / raw)
  To: Øyvind A. Holm; +Cc: git
In-Reply-To: <8762j8jje9.fsf@catnip.gol.com>

2011/10/29 Miles Bader <miles@gnu.org>:
> That the sharpness of that graph is pretty amazing though; what
> happened in 2010Q1?

Actually, now I realize what happened:  that's the date the Debian
"git-core" package was renamed "git" (the "git" package used to be
"gnu interactive tools")!!

-Miles

-- 
Cat is power.  Cat is peace.

^ permalink raw reply

* Re: Git is exploding
From: Tay Ray Chuan @ 2011-10-29  8:36 UTC (permalink / raw)
  To: Øyvind A. Holm; +Cc: git
In-Reply-To: <CAA787r=jeBv9moineaJVY=urYzEX+d7n23ED-txAGhLS+OPbmg@mail.gmail.com>

On Sat, Oct 29, 2011 at 8:39 AM, Øyvind A. Holm <sunny@sunbase.org> wrote:
> Found an interesting "Popularity Contest" graph on debian.org (via
> Thomas Bassetto on G+):
>
> http://bit.ly/rNxVN0
>
> Very cool indeed. Maybe it's the rise of GitHub, or simply that the
> user interface is mature enough that also "regular" users feel
> comfortable with it.

How were the numbers gathered? I looked around the page but gave up.

-- 
Cheers,
Ray Chuan

^ permalink raw reply

* Re: Git is exploding
From: Ramkumar Ramachandra @ 2011-10-29  9:12 UTC (permalink / raw)
  To: Tay Ray Chuan; +Cc: Øyvind A. Holm, git
In-Reply-To: <CALUzUxqHNByaV+TL2p4wBcwaLNpiaATw14Jgkb1YwcfXxNkMrg@mail.gmail.com>

Hi Tay,

Tay Ray Chuan writes:
> How were the numbers gathered? I looked around the page but gave up.

See: http://popcon.debian.org/FAQ

-- Ram

^ permalink raw reply

* Re: [PATCHv2 3/3] completion: match ctags symbol names in grep patterns
From: SZEDER Gábor @ 2011-10-29 12:47 UTC (permalink / raw)
  To: Jeff King; +Cc: git
In-Reply-To: <20111028060517.GA3993@sigill.intra.peff.net>

Hi,


On Thu, Oct 27, 2011 at 11:05:20PM -0700, Jeff King wrote:
> On Sun, Oct 23, 2011 at 11:29:28PM +0200, SZEDER Gábor wrote:
> 
> > On Fri, Oct 21, 2011 at 01:30:21PM -0400, Jeff King wrote:
> > > This incorporates the suggestions from Gábor's review, with one
> > > exception: it still looks only in the current directory for the "tags"
> > > files. I think that might have some performance implications, so I'd
> > > rather add it separately, if at all.
> > 
> > I agree that scanning through a whole working tree for tags files
> > would cost too much.  But I think that a tags file at the top of the
> > working tree is common enough to be supported, and checking its
> > existence is fairly cheap.
> 
> Actually, it's not too expensive. Asking git for the top of the working
> tree means it has to traverse up to there anyway. So the trick is just
> doing our search without invoking too many external tools which would
> cause unnecessary forks.
> 
> The patch is below, but I'm still not sure it's a good idea.
> 
> Grep only looks in the current subdirectory for matches.

Unless the user explicitly specifies the path(s)...  But that comes at
the end of the command line, so the completion script could have no
idea about it at the time of 'git grep <TAB>'.

> > So how about something like this for the case arm? (I didn't actually
> > tested it.)
> > 
> > 		local tagsfile
> > 		if test -r tags; then
> > 			tagsfile=tags
> > 		else
> > 			local dir="$(__gitdir)"
> 
> You don't want __gitdir here, but rather "git rev-parse --show-cdup".

Oh, yes, indeed.

But there was a point in using __gitdir() here: it respects the
--git-dir= option.  Which brings up the question: what
should 'git --git-dir=/some/where/.git grep <TAB>' offer?

So in the end I agree that it's not a good idea.

> > Btw, there is a bug in the case statement: 'git --no-pager grep <TAB>'
> > offers refs instead of symbols, because $cword is not 2 and $prev
> > doesn't start with a dash.  But it's not worse than the current
> > behavior, so I don't think this bug is a show-stopper for the patch.
> 
> Yeah. The intent of the "$cword is 2" thing is to know that we are the
> first non-option argument. Arguably, _get_comp_words_by_ref should
> somehow tell us which position we are completing relative to the actual
> command name.

_get_comp_words_by_ref() is a general completion function, the purpose
of which is to provide a bash-version-independent equivalent of
$COMP_WORDS and $COMP_CWORD by working around the different word
splitting rules.  It doesn't know about git and its commands at all.

But there is the while loop in _git() that looks for the git command
(among other things) on the command line, which could store the index
of the command name in $words in a variable.  This variable could then
be used in that case statement (and probably in a couple of other
places, too).


Best,
Gábor


> ---
> diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
> index af283cb..b0ed657 100755
> --- a/contrib/completion/git-completion.bash
> +++ b/contrib/completion/git-completion.bash
> @@ -1429,6 +1429,39 @@ _git_gitk ()
>  	_gitk
>  }
>  
> +__git_cdup_dirs() {
> +	local prefix=$(git rev-parse --show-cdup 2>/dev/null)
> +	local oldifs=$IFS
> +	local dots
> +	local i
> +	IFS=/
> +	for i in $prefix; do
> +		dots=$dots../
> +		echo "$dots"
> +	done
> +	IFS=$oldifs
> +}

No need for $oldifs here; do a local IFS=/ instead, and then it just
goes out of scope when returning from the function.

> +
> +__git_find_in_cdup_one() {
> +	local dir=$1; shift
> +	for i in "$@"; do
> +		if test -r "$dir$i"; then
> +			echo "$dir$i"
> +			return 0
> +		fi
> +	done
> +	return 1
> +}
> +
> +__git_find_in_cdup() {
> +	__git_find_in_cdup_one "" "$@" && return
> +
> +	local dir
> +	for dir in $(__git_cdup_dirs); do
> +		__git_find_in_cdup_one "$dir" "$@" && return
> +	done
> +}
> +
>  __git_match_ctag() {
>  	awk "/^${1////\\/}/ { print \$1 }" "$2"
>  }
> @@ -1457,8 +1490,9 @@ _git_grep ()
>  
>  	case "$cword,$prev" in
>  	2,*|*,-*)
> -		if test -r tags; then
> -			__gitcomp "$(__git_match_ctag "$cur" tags)"
> +		local tags=$(__git_find_in_cdup tags)
> +		if test -n "$tags"; then
> +			__gitcomp "$(__git_match_ctag "$cur" "$tags")"
>  			return
>  		fi
>  		;;

^ permalink raw reply

* Re: git slow over https
From: Mika Fischer @ 2011-10-29 15:15 UTC (permalink / raw)
  To: Daniel Stenberg; +Cc: Git Mailing List
In-Reply-To: <alpine.DEB.2.00.1110282019510.28338@tvnag.unkk.fr>

Thanks for the pointer. Doing it this way fixes things for me. I'll
send a patch soon. I'd appreciate it if you could check it quicky.

Best,
 Mika

On Fri, Oct 28, 2011 at 20:28, Daniel Stenberg <daniel@haxx.se> wrote:
> On Fri, 28 Oct 2011, Mika Fischer wrote:
>
>> 1) What's the purpose of the select in http.c:673? Can it be removed?
>> 2) If it serves a useful purpose, what can be the reason that it hurts
>> performance so much in my case?
>
> The purpose must be to avoid busy-looping in case there's nothing to read.
>
> It should probably use curl_multi_fdset [1] to get a decent set to wait for
> instead so that it'll return fast if there is pending data. The timeout for
> select can in fact also get extended with the use of curl_multi_timeout [2].
>
> 1 = http://curl.haxx.se/libcurl/c/curl_multi_fdset.html
> 2 = http://curl.haxx.se/libcurl/c/curl_multi_timeout.html
>
> --
>
>  / daniel.haxx.se
>
>

^ permalink raw reply

* [PATCH] http.c: Use curl_multi_fdset to select on curl fds instead of just sleeping
From: Mika Fischer @ 2011-10-29 15:20 UTC (permalink / raw)
  To: git; +Cc: Mika Fischer

Previously, when nothing could be read from the connections curl had
open, git would just sleep unconditionally for 50ms. This patch changes
this behavior and instead obtains the recommended timeout and the actual
file descriptors from curl. This should eliminate time spent sleeping when
data could actually be read/written on the socket.

Signed-off-by: Mika Fischer <mika.fischer@zoopnet.de>
---
 http.c |   21 ++++++++++++++++-----
 1 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/http.c b/http.c
index a4bc770..12180f3 100644
--- a/http.c
+++ b/http.c
@@ -649,6 +649,7 @@ void run_active_slot(struct active_request_slot *slot)
 	fd_set excfds;
 	int max_fd;
 	struct timeval select_timeout;
+	long int curl_timeout;
 	int finished = 0;
 
 	slot->finished = &finished;
@@ -664,14 +665,24 @@ void run_active_slot(struct active_request_slot *slot)
 		}
 
 		if (slot->in_use && !data_received) {
-			max_fd = 0;
+			curl_multi_timeout(curlm, &curl_timeout);
+			if (curl_timeout == 0) {
+				continue;
+			} else if (curl_timeout == -1) {
+				select_timeout.tv_sec  = 0;
+				select_timeout.tv_usec = 50000;
+			} else {
+				select_timeout.tv_sec  =  curl_timeout / 1000;
+				select_timeout.tv_usec = (curl_timeout % 1000) * 1000;
+			}
+
+			max_fd = -1;
 			FD_ZERO(&readfds);
 			FD_ZERO(&writefds);
 			FD_ZERO(&excfds);
-			select_timeout.tv_sec = 0;
-			select_timeout.tv_usec = 50000;
-			select(max_fd, &readfds, &writefds,
-			       &excfds, &select_timeout);
+			curl_multi_fdset(curlm, &readfds, &writefds, &excfds, &max_fd);
+
+			select(max_fd+1, &readfds, &writefds, &excfds, &select_timeout);
 		}
 	}
 #else
-- 
1.7.7.1.489.g1fee

^ permalink raw reply related

* Re: What's cooking in git.git (Oct 2011, #11; Fri, 28)
From: Erik Faye-Lund @ 2011-10-29 15:42 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, msysGit
In-Reply-To: <7vzkglrnmc.fsf@alter.siamese.dyndns.org>

Cc'ing the msysgit list.

On Fri, Oct 28, 2011 at 8:12 PM, Junio C Hamano <gitster@pobox.com> wrote:
> * ef/mingw-upload-archive (2011-10-26) 3 commits
>  - upload-archive: use start_command instead of fork
>  - compat/win32/poll.c: upgrade from upstream
>  - mingw: move poll out of sys-folder
>
> Are msysgit folks OK with this series (I didn't see msysgit list Cc'ed on
> these patches)? If so let's move this forward, as the changes to the core
> part seem solid.
>

The msysgit list not being Cc'ed on the patches was a slip-up on my
behalf. I believe the changes are relatively uncontroversial from an
msysgit point of view, though. However, an ack/nack would be
appreciated ;)

Or does people prefer me re-sending the series, with the msysgit list Cc'ed?

^ permalink raw reply

* Re: [PATCH] http.c: Use curl_multi_fdset to select on curl fds instead of just sleeping
From: Daniel Stenberg @ 2011-10-29 20:33 UTC (permalink / raw)
  To: Mika Fischer; +Cc: git
In-Reply-To: <1319901621-482-1-git-send-email-mika.fischer@zoopnet.de>

On Sat, 29 Oct 2011, Mika Fischer wrote:

> Previously, when nothing could be read from the connections curl had open, 
> git would just sleep unconditionally for 50ms. This patch changes this 
> behavior and instead obtains the recommended timeout and the actual file 
> descriptors from curl. This should eliminate time spent sleeping when data 
> could actually be read/written on the socket.

It looks fine to me, from a libcurl perspective. I only have one comment about 
this:

> +			curl_multi_fdset(curlm, &readfds, &writefds, &excfds, &max_fd);
> +
> +			select(max_fd+1, &readfds, &writefds, &excfds, &select_timeout);

At times, curl_multi_fdset() might return -1 in max_fd, as when there's no 
internal socket around to provide to the application to wait for.

Calling select() with max_fd+1 (== 0) will then not be appreciated by all 
implementations of select() so that case should probably also be covered by 
the 50ms sleep approach...

-- 

  / daniel.haxx.se

^ permalink raw reply

* [PATCH] Display change history as a diff between two dirs
From: Roland Kaufmann @ 2011-10-29 20:51 UTC (permalink / raw)
  To: gitster; +Cc: git

Watching patches serially it can be difficult to get an overview of how
a pervasive change is distributed through-out different modules. Thus:

Extract snapshots of the files that have changed between two revisions
into temporary directories and launch a graphical tool to show the diff
between them.

Use existing functionality in git-diff to figure out which files have
changed, and to get the files themselves.

Based on a script called 'git-diffc' by Nitin Gupta.

Signed-off-by: Roland Kaufmann <rlndkfmn+git@gmail.com>
---

Requests for such a scripts surface occationally, so I believe it could
be useful to have in the distribution itself.

 Documentation/git-dirdiff.txt |   55 +++++++++++++++++++++++++++++++++++++++++
 Makefile                      |    2 +
 git-dirdiff--helper.sh        |   28 +++++++++++++++++++++
 git-dirdiff.sh                |   34 +++++++++++++++++++++++++
 4 files changed, 119 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/git-dirdiff.txt
 create mode 100755 git-dirdiff--helper.sh
 create mode 100755 git-dirdiff.sh

diff --git a/Documentation/git-dirdiff.txt b/Documentation/git-dirdiff.txt
new file mode 100644
index 0000000..bdd2581
--- /dev/null
+++ b/Documentation/git-dirdiff.txt
@@ -0,0 +1,55 @@
+git-dirdiff(1)
+==============
+
+NAME
+----
+git-dirdiff - Show changes using directory compare
+
+SYNOPSIS
+--------
+[verse]
+'git dirdiff' [<options>] [<commit> [<commit>]] [--] [<path>...]
+
+DESCRIPTION
+-----------
+'git dirdiff' is a git command that allows you to compare revisions
+as a difference between two directories. 'git dirdiff' is a frontend
+to linkgit:git-diff[1].
+
+OPTIONS
+-------
+See linkgit:git-diff[1] for the list of supported options.
+
+CONFIG VARIABLES
+----------------
+'git dirdiff' uses the same config variables as linkgit:git-difftool[1]
+to determine which difftool should be used.
+
+TEMPORARY FILES
+---------------
+'git dirdiff' creates a directory with 'mktemp' to hold snapshots of the
+files which are different in the two revisions. This directory is removed
+when the diff viewer terminates.
+
+NOTES
+-----
+The diff viewer must support being passed directories instead of files
+as its arguments.
++
+Files that are not put under version control are not included when
+viewing the difference between a revision and the working directory.
+
+SEE ALSO
+--------
+linkgit:git-diff[1]::
+	 Show changes between commits, commit and working tree, etc
+
+linkgit:git-difftool[1]::
+	Show changes using common diff tools
+
+linkgit:git-config[1]::
+	 Get and set repository or global options
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index 3139c19..03771cf 100644
--- a/Makefile
+++ b/Makefile
@@ -365,6 +365,8 @@ unexport CDPATH
 SCRIPT_SH += git-am.sh
 SCRIPT_SH += git-bisect.sh
 SCRIPT_SH += git-difftool--helper.sh
+SCRIPT_SH += git-dirdiff.sh
+SCRIPT_SH += git-dirdiff--helper.sh
 SCRIPT_SH += git-filter-branch.sh
 SCRIPT_SH += git-lost-found.sh
 SCRIPT_SH += git-merge-octopus.sh
diff --git a/git-dirdiff--helper.sh b/git-dirdiff--helper.sh
new file mode 100755
index 0000000..bc0b49d
--- /dev/null
+++ b/git-dirdiff--helper.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+#
+# Accumulate files in a changeset into a pre-defined directory.
+#
+# Copyright (C) 2011 Roland Kaufmann
+# Based on a script called git-diffc by Nitin Gupta
+#
+# This file is licensed under the GPL v2, or a later version
+# at the discretion of the official Git maintainer.
+
+# bail out if there is any problems copying
+set -e
+
+# check that we are called by git-dirdiff
+if [ -z $__GIT_DIFF_DIR ]; then
+  echo Error: Do not call $(basename $0) directly 1>&2
+  exit 1
+fi
+
+# don't attempt to copy new or removed files
+if [ "$2" != "/dev/null" ]; then
+  mkdir -p $__GIT_DIFF_DIR/old/$(dirname $1)
+  cp $2 $__GIT_DIFF_DIR/old/$1
+fi
+if [ "$5" != "/dev/null" ]; then
+  mkdir -p $__GIT_DIFF_DIR/new/$(dirname $1)
+  cp $5 $__GIT_DIFF_DIR/new/$1
+fi
diff --git a/git-dirdiff.sh b/git-dirdiff.sh
new file mode 100755
index 0000000..4e75eda
--- /dev/null
+++ b/git-dirdiff.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+#
+# Display differences between two commits with a directory comparison.
+#
+# Copyright (C) 2011 Roland Kaufmann
+# Based on a script called git-diffc by Nitin Gupta
+#
+# This file is licensed under the GPL v2, or a later version
+# at the discretion of the official Git maintainer.
+
+# bail out if there is any problems in getting a diff
+set -e
+
+# create a temporary directory to hold snapshots of changed files
+__GIT_DIFF_DIR=$(mktemp --tmpdir -d git-dirdiff.XXXXXX)
+export __GIT_DIFF_DIR
+
+# cleanup after we're done
+trap 'rm -rf $__GIT_DIFF_DIR' 0
+
+# list all files that have changed. store this list in a separate
+# file so that we can test the exit status of this command. (if we had
+# bash we could use pipefail, or if we had Posix we could use mkfifo)
+git diff --raw "$@" > $__GIT_DIFF_DIR/toc
+
+# let the helper script accumulate them into the temporary directory
+cut -f 2- -s $__GIT_DIFF_DIR/toc | while read f; do
+  GIT_EXTERNAL_DIFF=git-dirdiff--helper git --no-pager diff "$@" $f
+done
+
+# run original diff program, reckoning it will understand directories
+# modes and shas does not apply to the root directories so submit dummy
+# values for those, hoping that the diff tool does not use them.
+git-difftool--helper - $__GIT_DIFF_DIR/old deadbeef 0755 $__GIT_DIFF_DIR/new babeface 0755
-- 
1.7.1

^ permalink raw reply related

* [PATCH] document 'T' status from git-status
From: Mark Dominus @ 2011-10-30  0:06 UTC (permalink / raw)
  To: git, gitster; +Cc: Mark Dominus, Mark Dominus

From: Mark Dominus <mjd@icgroup.com>

Signed-off-by: Mark Dominus <mjd@plover.com>
---
 Documentation/git-status.txt |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index 3d51717..e7fc5c3 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -122,6 +122,8 @@ codes can be interpreted as follows:
 * 'R' = renamed
 * 'C' = copied
 * 'U' = updated but unmerged
+* 'T' = file type changed
+  (typically from plain file to symlink, or vice versa)
 
 Ignored files are not listed, unless `--ignored` option is in effect,
 in which case `XY` are `!!`.
@@ -134,9 +136,11 @@ in which case `XY` are `!!`.
     D         [ M]   deleted from index
     R        [ MD]   renamed in index
     C        [ MD]   copied in index
+    T        [ MD]   file type changed in index
     [MARC]           index and work tree matches
     [ MARC]     M    work tree changed since index
     [ MARC]     D    deleted in work tree
+    [ MARC]     T    file type changed in work tree
     -------------------------------------------------
     D           D    unmerged, both deleted
     A           U    unmerged, added by us
-- 
1.7.7.dirty

^ permalink raw reply related

* Re: [RFC/PATCH] define the way new representation types are encoded in the pack
From: Nguyen Thai Ngoc Duy @ 2011-10-30  5:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Nicolas Pitre, Shawn O. Pearce, Jeff King
In-Reply-To: <7vwrbptzjm.fsf@alter.siamese.dyndns.org>

On Fri, Oct 28, 2011 at 1:12 PM, Junio C Hamano <gitster@pobox.com> wrote:
> As people may be able to guess from the name, CAT_TREE is envisioned to
> encode a large data (primarily of type "blob") by recording the object
> name of a tree object and probably the total length, and would represent
> the concatenation of all blobs contained in the tree object when the tree
> is traversed in some fixed order (e.g. Avery's "bup split"). I am guessing
> that the payload for CAT_TREE representation type will be:
>
>  - 20-byte object name for the top-level tree object;

Because all blobs in this tree object must be in a fixed order, and
they won't likely have meaningful names nor permission, should
CAT_TREE payload is a SHA-1 sequence of all blobs (or cat-trees if we
want nested trees) instead? IOW the tree is integrated into cat-tree
object, not as a separate tree object.

>  - type of the basic object (commit, tree, blob, or tag) it represents,
>   even though it is unlikely that we would want to record such a large
>   commit or tag that needs CAT_TREE representation;
>
>  - the total length of the basic object it represents, even though it is
>   redundant (you could traverse and sum the sizes of blobs contained in
>   the tree object), it would help sha1_object_info() and friends. This
>   will be the "some size" I mentioned in the previous message for this
>   representation type.

Not sure if it's related to representation types, but is there any way
(perhaps FLAT_BLOB type?) we can mark an object uncompressed, so we
can mmap() and access it directly?
-- 
Duy

^ permalink raw reply

* Re: [PATCH/WIP 03/11] t5403: avoid doing "git add foo/bar" where foo/.git exists
From: Nguyen Thai Ngoc Duy @ 2011-10-30  5:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vobx2z60w.fsf@alter.siamese.dyndns.org>

On Fri, Oct 28, 2011 at 12:41 AM, Junio C Hamano <gitster@pobox.com> wrote:
>> ... Should we stick with one way only?
>
> Whatever we have been doing should not change, especially in corner cases.

I disagree. If it's not right, then we should change it even though it
may face unpleasant consequences from misusing it. And I don't think
it's sane to behave like what we're doing now:

$ GIT_DIR=clone2/.git git ls-files --stage
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       1

$ ls -l clone2/2 3
-rw-r--r-- 1 pclouds users 0 Th10 30 12:40 3
-rw-r--r-- 1 pclouds users 0 Th10 30 12:40 clone2/2

$ GIT_DIR=clone2/.git git add clone2/2 3

$ GIT_DIR=clone2/.git git ls-files --stage
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       1
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       3

$ GIT_DIR=clone2/.git git add clone2/2

$ GIT_DIR=clone2/.git git ls-files --stage
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       1
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       3
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       clone2/2

"git add" behaves inconsistently when "clone2/2" and "3" are given and
when clone2/2 is given alone. This is just bad to me.

Note that this has nothing to do with read_directory() discussion we
had in the notes-merge patch, I agree we should keep the prefix. This
is about the calculating common prefix automatically from pathspec.
But prefix and pathspec are treated differently by read_directory().
In "git add clone2/2 3", common prefix is "" while in "git add
clone2/2" common prefix is "clone2".
-- 
Duy

^ permalink raw reply

* Re: [PATCH] Display change history as a diff between two dirs
From: Junio C Hamano @ 2011-10-30  6:09 UTC (permalink / raw)
  To: Roland Kaufmann; +Cc: git
In-Reply-To: <4EAC6765.4030003@gmail.com>

Roland Kaufmann <rlndkfmn+git@gmail.com> writes:

> diff --git a/git-dirdiff--helper.sh b/git-dirdiff--helper.sh
> new file mode 100755
> index 0000000..bc0b49d
> --- /dev/null
> +++ b/git-dirdiff--helper.sh
> @@ -0,0 +1,28 @@
> ...
> +# bail out if there is any problems copying
> +set -e

I do not think any of our scripted Porcelains use "set -e"; rather they
try to catch errors and produce messages that are more appropriate for
specific error sites.

> +# check that we are called by git-dirdiff
> +if [ -z $__GIT_DIFF_DIR ]; then
> +  echo Error: Do not call $(basename $0) directly 1>&2
> +  exit 1
> +fi

(style)

	if test -z "$__GIT_DIFF_DIR"
        then
        	echo >&2 "..."
		exit 1
	fi

If this were to become part of the main Porcelain set, you would probably
source in the git-sh-setup helper near the beginning with

	. git-sh-setup

and use "die" instead of "echo && exit 1".

> diff --git a/git-dirdiff.sh b/git-dirdiff.sh
> new file mode 100755
> index 0000000..4e75eda
> --- /dev/null
> +++ b/git-dirdiff.sh
> @@ -0,0 +1,34 @@
> ...
> +# create a temporary directory to hold snapshots of changed files
> +__GIT_DIFF_DIR=$(mktemp --tmpdir -d git-dirdiff.XXXXXX)
> +export __GIT_DIFF_DIR

I do not think any of our scripted Porcelains use "mktemp" especially
"mktemp -d". How portable is it?

> +git diff --raw "$@" > $__GIT_DIFF_DIR/toc
> +
> +# let the helper script accumulate them into the temporary directory
> +cut -f 2- -s $__GIT_DIFF_DIR/toc | while read f; do
> +  GIT_EXTERNAL_DIFF=git-dirdiff--helper git --no-pager diff "$@" $f
> +done

(style)

	... upstream command ... |
        while read f
	do
		...
	done

It also is not clear what could be used in "$@". Obviously you would not
want things like "-U20" and "--stat" fed to the first "list of paths"
phase, but there may be some options you may want to give to the inner
"external diff" thing.

It is not clear to me why you need to grab the list of paths in toc and
iterate over them one by one. IOW, why isn't this sufficient?

    # dirdiff--helper will copy the files in the temporary directory
    GIT_EXTERNAL_DIFF=git-dirdiff--helper git --no-pager diff "$@"

    # Now the temporary old/ and new/ are populated, compare them
    git-difftool--helper - .....

Overall, I found it interesting, but I am not convinced yet that this
should be in the set of main Porcelains.  It seems to be a hack to work
around the design of the current external diff interface that specifically
targets tools that are about comparing single pair of paths at a time.

If "compare two sets of files, each extracted in its own temporary
directory" turns out to be sufficiently useful thing to do (which I
suspect is true), we would probably want to make it an option to "git
diff" itself, and not a separate git subcommand like "dirdiff". I can see
"git diff" (and possibly even things like "git log -p") populating two
temporary directories (your old/ and new/) and running a custom program
specified by GIT_EXTERNAL_TREEDIFF environment variable, instead of doing
any textual diff generation internally.

I wouldn't mind carrying a polished version of this in contrib/ for a
cycle or two in order to let people try it out and get kinks out of its
design.

For example, how well does this approach work with -M/-C (I do not think
it would do anything useful, but I haven't thought things through).  It
would be nice if we gave the hint to the external program that compares
the populated temporary directories how paths in the preimage tree and
postimage tree correspond to each other.

^ permalink raw reply

* Re: [PATCH] document 'T' status from git-status
From: Junio C Hamano @ 2011-10-30  6:25 UTC (permalink / raw)
  To: Mark Dominus; +Cc: git, Mark Dominus
In-Reply-To: <1319933204-21587-1-git-send-email-mjd@plover.com>

Mark Dominus <mjd@plover.com> writes:

> From: Mark Dominus <mjd@icgroup.com>
>
> Signed-off-by: Mark Dominus <mjd@plover.com>
> ---
>  Documentation/git-status.txt |    4 ++++
>  1 files changed, 4 insertions(+), 0 deletions(-)
>
> diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
> index 3d51717..e7fc5c3 100644
> --- a/Documentation/git-status.txt
> +++ b/Documentation/git-status.txt
> @@ -122,6 +122,8 @@ codes can be interpreted as follows:
>  * 'R' = renamed
>  * 'C' = copied
>  * 'U' = updated but unmerged
> +* 'T' = file type changed
> +  (typically from plain file to symlink, or vice versa)
>  
>  Ignored files are not listed, unless `--ignored` option is in effect,
>  in which case `XY` are `!!`.
> @@ -134,9 +136,11 @@ in which case `XY` are `!!`.
>      D         [ M]   deleted from index
>      R        [ MD]   renamed in index
>      C        [ MD]   copied in index
> +    T        [ MD]   file type changed in index
>      [MARC]           index and work tree matches
>      [ MARC]     M    work tree changed since index
>      [ MARC]     D    deleted in work tree
> +    [ MARC]     T    file type changed in work tree

The current organization of this table may need to be rethought, but if we
were to keep it, then this change is far from sufficient. For example, you
do not explain what XY = TT means.

	Side note. The reason why we do not have two independent tables,
	one of which shows "if X is M then it means updated in index, if X
	is A then..." and the other shows "if Y is ' ' then index and
	working tree matches, if Y is 'M' then ...", is because this table
	is meant to show _possible_ combinations. For example, the row
	with X = D shows that only two possible value for Y are ' ' and M
	and A or D are not possible values for Y in that case.

It may be easier to explain if you treated that 'T' is merely a variant of
'M' (this comment applies to the first hunk of your patch that starts at
line 122), i.e.

 Documentation/git-status.txt |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index 3d51717..bc2552e 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -126,6 +126,10 @@ codes can be interpreted as follows:
 Ignored files are not listed, unless `--ignored` option is in effect,
 in which case `XY` are `!!`.
 
+Note that a change in filetype (i.e. a regular file to symbolic link or
+vice versa) is a special case of "modified" and shown as 'T' instead of
+'M'; in the table below an 'M' could be a 'T'.
+
     X          Y     Meaning
     -------------------------------------------------
               [MD]   not updated

^ permalink raw reply related

* Re: [PATCH/WIP 03/11] t5403: avoid doing "git add foo/bar" where foo/.git exists
From: Junio C Hamano @ 2011-10-30  7:08 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git
In-Reply-To: <CACsJy8DdQXXoYT2gB2L5z6pdCNU_vL2w7c8eJvKRGX2T9iAC3Q@mail.gmail.com>

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> Note that this has nothing to do with read_directory() discussion we
> had in the notes-merge patch...

I think we are in agreement on that point.

Going back to your example...

> $ GIT_DIR=clone2/.git git add clone2/2 3
>
> $ GIT_DIR=clone2/.git git ls-files --stage
> 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       1
> 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0       3

You probably found a bug here. It is simply wrong to choose not to add
clone2/2, especially without telling the caller anything.

	Side note. I just did this and I am not getting what you saw above.

        $ mkdir -p /var/tmp/j/y && cd /var/tmp/j/y
        $ git init; git init clone2
        $ : >3; : >clone2/2
        $ GIT_DIR=clone2/.git git add clone2/2 3
        $ GIT_DIR=clone2/.git git ls-files
        3
	clone2/2

	The behavour is different when clone2/.git already has commit, and
        whatever codepath that gives these two different behaviour needs
        to be fixed.

By the way, I think I know where you are coming from.

If we think clone2/ and everything underneath belongs to a repository that
is _not_ governed by our GIT_DIR (which usually is .git), it may be nicer
when the user attempts to add clone2/2 (which would normally belong to
clone2/.git) to at least warn about it, or even error out. I would not be
entirely opposed to a change in the behaviour if the above example were
done without GIT_DIR and produced an error, like this:

    $ git add clone2/2 3; echo $?
    error: clone2/2 is outside our repository, possibly governed by clone2/.git
    1
    $ git ls-files
    1

After all, if clone2 were a submodule of our repository, we do notice and
error out an attempt to add clone2/2 to our repository, so if we changed
the way how "git add" behaves to do the above, I can buy an argument that
calls it a bugfix.

When GIT_DIR=clone2/.git is given, however, the caller explicitly declines
the repository discovery. We do not know how the repository we are dealing
with (which we were explicitly told with $GIT_DIR) and a directory whose
name is ".git" under "clone2" we happened to find in read_directory()
relates to each other, especially when our index does not have clone2 as
our submodule.

We however *do* know that our working tree is our current directory, so
it would be wrong to do this:

    $ GIT_DIR=clone2/.git git add clone2/2 3; echo $?
    error: 3 is outside our repository, possibly goverened by .git
    1

The command should just add clone2/2 and 3 as it was told to.

^ permalink raw reply

* Re: [RFC/PATCH] define the way new representation types are encoded in the pack
From: Junio C Hamano @ 2011-10-30  7:11 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git, Nicolas Pitre, Shawn O. Pearce, Jeff King
In-Reply-To: <CACsJy8Cz0R_s+VYRd+1wTTfbt_vH5dd3ALgZip0xn7rfYf6gpw@mail.gmail.com>

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> Because all blobs in this tree object must be in a fixed order, and
> they won't likely have meaningful names nor permission, should
> CAT_TREE payload is a SHA-1 sequence of all blobs (or cat-trees if we
> want nested trees) instead? IOW the tree is integrated into cat-tree
> object, not as a separate tree object.

I have no problem with that (I am not worried about minor details of the
actual implementation of cat-tree yet).

> Not sure if it's related to representation types, but is there any way
> (perhaps FLAT_BLOB type?) we can mark an object uncompressed, so we
> can mmap() and access it directly?

In pack? Loose? Both?

What kind of payload and use case do you have in mind?

^ permalink raw reply

* Re: [RFC/PATCH] define the way new representation types are encoded in the pack
From: Junio C Hamano @ 2011-10-30  7:13 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git, Nicolas Pitre, Shawn O. Pearce, Jeff King
In-Reply-To: <7v4nyrrm1w.fsf@alter.siamese.dyndns.org>

Junio C Hamano <gitster@pobox.com> writes:

> Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:
>
>> Because all blobs in this tree object must be in a fixed order, and
>> they won't likely have meaningful names nor permission, should
>> CAT_TREE payload is a SHA-1 sequence of all blobs (or cat-trees if we
>> want nested trees) instead? IOW the tree is integrated into cat-tree
>> object, not as a separate tree object.
>
> I have no problem with that (I am not worried about minor details of the
> actual implementation of cat-tree yet).

Side note. It should be renamed "split-object" or something if we go the
route you suggest, as "tree"-ness of the actual representation is not
essential.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox