Git development
 help / color / mirror / Atom feed
* [PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from
From: Nguyễn Thái Ngọc Duy @ 2013-01-24  8:42 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jens Lehmann,
	Nguyễn Thái Ngọc Duy

We currently know if an object is loose or packed. We do not know if
it's from the repo's object database, or via alternates
mechanism. With this patch, sha1_object_info_extended() can tell if an
object comes from alternates source (and which one).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 How about this way instead: we keep track of where objects come from
 so we can verify object source when we create or update something
 that contains SHA-1.

 1/7 and 2/7 prepare for tracking object source. The rest verifies
 that new commits, trees, tags, indexes, refs or reflogs do not refer
 to an external source.

 This adds some cost when add_submodule_odb() is used. I did not
 measure, but I guess the added cost is much smaller compared to
 forking, especially on Windows. No breakages detected by the test
 suite, which is really good (or my code is really broken).

 builtin/index-pack.c |  2 +-
 cache.h              |  4 +++-
 fast-import.c        |  2 +-
 sha1_file.c          | 66 ++++++++++++++++++++++++++++++++++++----------------
 4 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 43d364b..a7de3f8 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1393,7 +1393,7 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
 
 static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
 {
-	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1, NULL);
 
 	if (!p)
 		die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/cache.h b/cache.h
index c257953..92854ab 100644
--- a/cache.h
+++ b/cache.h
@@ -978,6 +978,7 @@ struct pack_window {
 extern struct packed_git {
 	struct packed_git *next;
 	struct pack_window *windows;
+	struct alternate_object_database *alt;
 	off_t pack_size;
 	const void *index_data;
 	size_t index_size;
@@ -1066,7 +1067,7 @@ extern void close_pack_windows(struct packed_git *);
 extern void unuse_pack(struct pack_window **);
 extern void free_pack_by_name(const char *);
 extern void clear_delta_base_cache(void);
-extern struct packed_git *add_packed_git(const char *, int, int);
+extern struct packed_git *add_packed_git(const char *, int, int, struct alternate_object_database *);
 extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
 extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
 extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
@@ -1102,6 +1103,7 @@ struct object_info {
 			unsigned int is_delta;
 		} packed;
 	} u;
+	struct alternate_object_database *alt;
 };
 extern int sha1_object_info_extended(const unsigned char *, struct object_info *);
 
diff --git a/fast-import.c b/fast-import.c
index c2a814e..4bf732e 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -964,7 +964,7 @@ static void end_packfile(void)
 		idx_name = keep_pack(create_index());
 
 		/* Register the packfile with core git's machinery. */
-		new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+		new_p = add_packed_git(idx_name, strlen(idx_name), 1, NULL);
 		if (!new_p)
 			die("core git rejected index %s", idx_name);
 		all_packs[pack_id] = new_p;
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..afc7355 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -933,7 +933,8 @@ static void try_to_free_pack_memory(size_t size)
 	release_pack_memory(size, -1);
 }
 
-struct packed_git *add_packed_git(const char *path, int path_len, int local)
+struct packed_git *add_packed_git(const char *path, int path_len, int local,
+				  struct alternate_object_database *alt)
 {
 	static int have_set_try_to_free_routine;
 	struct stat st;
@@ -973,6 +974,7 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local)
 	p->mtime = st.st_mtime;
 	if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
 		hashclr(p->sha1);
+	p->alt = alt;
 	return p;
 }
 
@@ -1000,7 +1002,8 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
-static void prepare_packed_git_one(char *objdir, int local)
+static void prepare_packed_git_one(char *objdir, int local,
+				   struct alternate_object_database *alt)
 {
 	/* Ensure that this buffer is large enough so that we can
 	   append "/pack/" without clobbering the stack even if
@@ -1041,7 +1044,7 @@ static void prepare_packed_git_one(char *objdir, int local)
 		/* See if it really is a valid .idx file with corresponding
 		 * .pack file that we can map.
 		 */
-		p = add_packed_git(path, len + namelen, local);
+		p = add_packed_git(path, len + namelen, local, alt);
 		if (!p)
 			continue;
 		install_packed_git(p);
@@ -1110,11 +1113,11 @@ void prepare_packed_git(void)
 
 	if (prepare_packed_git_run_once)
 		return;
-	prepare_packed_git_one(get_object_directory(), 1);
+	prepare_packed_git_one(get_object_directory(), 1, NULL);
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
 		alt->name[-1] = 0;
-		prepare_packed_git_one(alt->base, 0);
+		prepare_packed_git_one(alt->base, 0, alt);
 		alt->name[-1] = '/';
 	}
 	rearrange_packed_git();
@@ -1215,15 +1218,19 @@ static int git_open_noatime(const char *name)
 	}
 }
 
-static int open_sha1_file(const unsigned char *sha1)
+static int open_sha1_file(const unsigned char *sha1,
+			  struct alternate_object_database **p_alt)
 {
 	int fd;
 	char *name = sha1_file_name(sha1);
 	struct alternate_object_database *alt;
 
 	fd = git_open_noatime(name);
-	if (fd >= 0)
+	if (fd >= 0) {
+		if (p_alt)
+			*p_alt = NULL;
 		return fd;
+	}
 
 	prepare_alt_odb();
 	errno = ENOENT;
@@ -1231,18 +1238,23 @@ static int open_sha1_file(const unsigned char *sha1)
 		name = alt->name;
 		fill_sha1_path(name, sha1);
 		fd = git_open_noatime(alt->base);
-		if (fd >= 0)
+		if (fd >= 0) {
+			if (p_alt)
+				*p_alt = alt;
 			return fd;
+		}
 	}
 	return -1;
 }
 
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+static void *map_sha1_file_extended(const unsigned char *sha1,
+				    unsigned long *size,
+				    struct alternate_object_database **p_alt)
 {
 	void *map;
 	int fd;
 
-	fd = open_sha1_file(sha1);
+	fd = open_sha1_file(sha1, p_alt);
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1261,6 +1273,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 	return map;
 }
 
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+{
+	return map_sha1_file_extended(sha1, size, NULL);
+}
+
 /*
  * There used to be a second loose object header format which
  * was meant to mimic the in-pack format, allowing for direct
@@ -2096,7 +2113,9 @@ static int fill_pack_entry(const unsigned char *sha1,
 	return 1;
 }
 
-static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+static int find_pack_entry(const unsigned char *sha1,
+			   struct pack_entry *e,
+			   struct alternate_object_database **p_alt)
 {
 	struct packed_git *p;
 
@@ -2104,14 +2123,19 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) {
+		if (p_alt)
+			*p_alt = last_found_pack->alt;
 		return 1;
+	}
 
 	for (p = packed_git; p; p = p->next) {
 		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
 			continue;
 
 		last_found_pack = p;
+		if (p_alt)
+			*p_alt = p->alt;
 		return 1;
 	}
 	return 0;
@@ -2130,7 +2154,9 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 
 }
 
-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep)
+static int sha1_loose_object_info(const unsigned char *sha1,
+				  unsigned long *sizep,
+				  struct alternate_object_database **p_alt)
 {
 	int status;
 	unsigned long mapsize, size;
@@ -2138,7 +2164,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
 	git_zstream stream;
 	char hdr[32];
 
-	map = map_sha1_file(sha1, &mapsize);
+	map = map_sha1_file_extended(sha1, &mapsize, p_alt);
 	if (!map)
 		return error("unable to find %s", sha1_to_hex(sha1));
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
@@ -2168,9 +2194,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 		return co->type;
 	}
 
-	if (!find_pack_entry(sha1, &e)) {
+	if (!find_pack_entry(sha1, &e, &oi->alt)) {
 		/* Most likely it's a loose object. */
-		status = sha1_loose_object_info(sha1, oi->sizep);
+		status = sha1_loose_object_info(sha1, oi->sizep, &oi->alt);
 		if (status >= 0) {
 			oi->whence = OI_LOOSE;
 			return status;
@@ -2178,7 +2204,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 
 		/* Not a loose object; someone else may have just packed it. */
 		reprepare_packed_git();
-		if (!find_pack_entry(sha1, &e))
+		if (!find_pack_entry(sha1, &e, &oi->alt))
 			return status;
 	}
 
@@ -2213,7 +2239,7 @@ static void *read_packed_sha1(const unsigned char *sha1,
 	struct pack_entry e;
 	void *data;
 
-	if (!find_pack_entry(sha1, &e))
+	if (!find_pack_entry(sha1, &e, NULL))
 		return NULL;
 	data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
 	if (!data) {
@@ -2618,14 +2644,14 @@ int has_pack_index(const unsigned char *sha1)
 int has_sha1_pack(const unsigned char *sha1)
 {
 	struct pack_entry e;
-	return find_pack_entry(sha1, &e);
+	return find_pack_entry(sha1, &e, NULL);
 }
 
 int has_sha1_file(const unsigned char *sha1)
 {
 	struct pack_entry e;
 
-	if (find_pack_entry(sha1, &e))
+	if (find_pack_entry(sha1, &e, NULL))
 		return 1;
 	return has_loose_object(sha1);
 }
-- 
1.8.0.rc3.18.g0d9b108

^ permalink raw reply related

* Re: [PATCH 1/2] upload-pack: avoid parsing objects during ref advertisement
From: Jeff King @ 2013-01-24  7:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, git-dev
In-Reply-To: <7vehhiozkb.fsf@alter.siamese.dyndns.org>

On Fri, Jan 18, 2013 at 03:12:52PM -0800, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > When we advertise a ref, the first thing we do is parse the
> > pointed-to object. This gives us two things:
> >
> >   1. a "struct object" we can use to store flags
> >
> >   2. the type of the object, so we know whether we need to
> >      dereference it as a tag
> >
> > Instead, we can just use lookup_unknown_object to get an
> > object struct, and then fill in just the type field using
> > sha1_object_info (which, in the case of packed files, can
> > find the information without actually inflating the object
> > data).
> >
> > This can save time if you have a large number of refs, and
> > the client isn't actually going to request those refs (e.g.,
> > because most of them are already up-to-date).
> 
> This is an old news, but do you recall why this patch did not update
> the code in mark_our_ref() that gets "struct object *o" from parse_object()
> the same way and mark them with OUR_REF flag?
> 
> I was wondering about code consolidation like this.

It was just because I did my measuring on raw upload-pack, so I didn't
notice that mark_our_ref was doing the same potentially slow thing. We
only call mark_our_ref during the second half of the stateless-rpc
conversation, and I did not measure that (and it would be a pain to do
so in isolation).

But it should be able to get the exact same speedups that we get from
send_ref. It probably matters less in the long run, because the
advertising phase is going to be called more frequently (e.g., for every
no-op fetch), and once we are calling mark_our_ref, we are presumably
about to do do actual packing work. However, there's no reason not to
get what speed we can there, too.

> diff --git a/upload-pack.c b/upload-pack.c
> index 95d8313..609cd6c 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -722,15 +722,18 @@ static void receive_needs(void)
>  	free(shallows.objects);
>  }
>  
> +static int mark_our_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data);
> +
>  static int send_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
>  {
>  	static const char *capabilities = "multi_ack thin-pack side-band"
>  		" side-band-64k ofs-delta shallow no-progress"
>  		" include-tag multi_ack_detailed";
> -	struct object *o = lookup_unknown_object(sha1);
>  	const char *refname_nons = strip_namespace(refname);
>  	unsigned char peeled[20];
>  
> +	mark_our_ref(refname, sha1, flag, cb_data);
> +
>  	if (capabilities)
>  		packet_write(1, "%s %s%c%s%s agent=%s\n",
>  			     sha1_to_hex(sha1), refname_nons,
> @@ -740,10 +743,6 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo
>  	else
>  		packet_write(1, "%s %s\n", sha1_to_hex(sha1), refname_nons);
>  	capabilities = NULL;
> -	if (!(o->flags & OUR_REF)) {
> -		o->flags |= OUR_REF;
> -		nr_our_refs++;
> -	}
>  	if (!peel_ref(refname, peeled))
>  		packet_write(1, "%s %s^{}\n", sha1_to_hex(peeled), refname_nons);
>  	return 0;

Right, I think this is a nice cleanup.

> @@ -751,7 +750,7 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo
>  
>  static int mark_our_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
>  {
> -	struct object *o = parse_object(sha1);
> +	struct object *o = parse_object(sha1); /* lookup-unknown??? */
>  	if (!o)
>  		die("git upload-pack: cannot find object %s:", sha1_to_hex(sha1));
>  	if (!(o->flags & OUR_REF)) {

And yeah, this should use lookup_unknown_object to extend the
optimization to mark_our_ref (and avoid removing it for the
ref-advertisement case, of course).

-Peff

^ permalink raw reply

* [PATCH 3/3] mergetool--lib: Add TortoiseMerge as a tool
From: Alexey Shumkin @ 2013-01-24  7:16 UTC (permalink / raw)
  To: git; +Cc: Pat Thoyts, Alexey Shumkin, Junio C Hamano
In-Reply-To: <1359011768-7665-1-git-send-email-Alex.Crezoff@gmail.com>

Also added Russian translation of the added error message
"%s cannot be used without a base"

Signed-off-by: Alexey Shumkin <Alex.Crezoff@gmail.com>
---
 git-gui/lib/mergetool.tcl | 10 ++++++++++
 git-gui/po/ru.po          |  4 ++++
 2 files changed, 14 insertions(+)

diff --git a/git-gui/lib/mergetool.tcl b/git-gui/lib/mergetool.tcl
index 837ce17..d978770 100644
--- a/git-gui/lib/mergetool.tcl
+++ b/git-gui/lib/mergetool.tcl
@@ -257,6 +257,16 @@ proc merge_resolve_tool2 {} {
 			set cmdline [list "$merge_tool_path" -o "$MERGED" "$LOCAL" "$REMOTE"]
 		}
 	}
+	tortoisemerge {
+		if {$base_stage ne {}} {
+			set cmdline [list "$merge_tool_path" \
+				-base:$BASE -mine:$LOCAL \
+				-theirs:$REMOTE -merged:$MERGED]
+		} else {
+			error_popup [mc "%s cannot be used without a base" "TortoiseMerge"]
+			return
+		}
+	}
 	vimdiff {
 		error_popup [mc "Not a GUI merge tool: '%s'" $tool]
 		return
diff --git a/git-gui/po/ru.po b/git-gui/po/ru.po
index ca4343b..e9ef810 100644
--- a/git-gui/po/ru.po
+++ b/git-gui/po/ru.po
@@ -1948,6 +1948,10 @@ msgstr "Конфликтующий файл не существует"
 msgid "Not a GUI merge tool: '%s'"
 msgstr "'%s' не является программой слияния"
 
+#: lib/mergetool.tcl:280
+msgid "%s cannot be used without a base"
+msgstr "%s не может использоваться без базовой версии"
+
 #: lib/mergetool.tcl:268
 #, tcl-format
 msgid "Unsupported merge tool '%s'"
-- 
1.8.1.1.10.g9255f3f

^ permalink raw reply related

* [PATCH 2/3] mergetool--lib: Add diffuse as a tool
From: Alexey Shumkin @ 2013-01-24  7:16 UTC (permalink / raw)
  To: git; +Cc: Pat Thoyts, Alexey Shumkin, Junio C Hamano
In-Reply-To: <1359011768-7665-1-git-send-email-Alex.Crezoff@gmail.com>

Signed-off-by: Alexey Shumkin <Alex.Crezoff@gmail.com>
---
 git-gui/lib/mergetool.tcl | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/git-gui/lib/mergetool.tcl b/git-gui/lib/mergetool.tcl
index 4fc1cab..837ce17 100644
--- a/git-gui/lib/mergetool.tcl
+++ b/git-gui/lib/mergetool.tcl
@@ -194,6 +194,15 @@ proc merge_resolve_tool2 {} {
 			set cmdline [list "$merge_tool_path" "$LOCAL" "$REMOTE" -mergeoutput="$MERGED"]
 		}
 	}
+	diffuse {
+		if {$base_stage ne {}} {
+			set cmdline [list "$merge_tool_path" \
+				"$LOCAL" "$MERGED" "$REMOTE" "$BASE"]
+		} else {
+			set cmdline [list "$merge_tool_path" \
+				"$LOCAL" "$MERGED" "$REMOTE"]
+		}
+	}
 	ecmerge {
 		if {$base_stage ne {}} {
 			set cmdline [list "$merge_tool_path" "$BASE" "$LOCAL" "$REMOTE" --default --mode=merge3 --to="$MERGED"]
-- 
1.8.1.1.10.g9255f3f

^ permalink raw reply related

* [PATCH 1/3] mergetool--lib: fix startup options for gvimdiff tool
From: Alexey Shumkin @ 2013-01-24  7:16 UTC (permalink / raw)
  To: git; +Cc: Pat Thoyts, Alexey Shumkin, Junio C Hamano

Options are taken from <Git source>/mergetools/vim

Signed-off-by: Alexey Shumkin <Alex.Crezoff@gmail.com>
---
 git-gui/lib/mergetool.tcl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/git-gui/lib/mergetool.tcl b/git-gui/lib/mergetool.tcl
index 3c8e73b..4fc1cab 100644
--- a/git-gui/lib/mergetool.tcl
+++ b/git-gui/lib/mergetool.tcl
@@ -211,7 +211,13 @@ proc merge_resolve_tool2 {} {
 		}
 	}
 	gvimdiff {
-		set cmdline [list "$merge_tool_path" -f "$LOCAL" "$MERGED" "$REMOTE"]
+		if {$base_stage ne {}} {
+			set cmdline [list "$merge_tool_path" -f -d -c "wincmd J" \
+				"$MERGED" "$LOCAL" "$BASE" "$REMOTE"]
+		} else {
+			set cmdline [list "$merge_tool_path" -f -d -c "wincmd l" \
+				"$LOCAL" "$MERGED" "$REMOTE"]
+		}
 	}
 	kdiff3 {
 		if {$base_stage ne {}} {
-- 
1.8.1.1.10.g9255f3f

^ permalink raw reply related

* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
From: Jeff King @ 2013-01-24  7:07 UTC (permalink / raw)
  To: Jonathon Mah; +Cc: Junio C Hamano, git
In-Reply-To: <8988071A-1DF3-463E-8AF9-AE4EA200D786@me.com>

On Wed, Jan 23, 2013 at 01:25:04PM -0800, Jonathon Mah wrote:

> Several areas of code would free buffers for object structs that
> contained them ("struct tree" and "struct commit"), but without clearing
> the "parsed" flag. parse_object would clear the flag for "struct tree",
> but commits would remain in an invalid state (marked as parsed but with
> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
> invalid objects stay around and can lead to bad behavior.
> 
> In particular, running pickaxe on all refs in a repository with a cached
> textconv could segfault. If the textconv cache ref was evaluated first
> by cmd_log_walk, a subsequent notes_cache_match_validity call would
> dereference NULL.

Just to be sure I understand, what is going on is something like this?

  Log reads all refs, including notes. After showing the notes commit,
  log frees the buffer to save memory.  Later, doing the diff on a
  commit causes us to use the notes_cache mechanism. That will look at
  the commit at the tip of the notes ref, which log has already
  processed. It will call parse_commit, but that will do nothing, as the
  parsed flag is already set, but the commit buffer remains NULL.

I wonder if this could be the source of the segfault here, too:

  http://thread.gmane.org/gmane.comp.version-control.git/214322/focus=214355

If you have refs pointing to both a submodule and its superproject, a
"log --all --submodule -p" might process the submodule's commits first,
free their buffers, and then want to show them again as part of the
submodule diff. This code in builtin/log.c:

                if (!rev->reflog_info) {
                        /* we allow cycles in reflog ancestry */
                        free(commit->buffer);
                        commit->buffer = NULL;
                }

assumes we will only ever look at a commit once (except for
reflog-walking), but submodule diff violates that.

-Peff

^ permalink raw reply

* Re: [PATCH v4 3/3] push: introduce REJECT_FETCH_FIRST and REJECT_NEEDS_FORCE
From: Jeff King @ 2013-01-24  6:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Chris Rorvick
In-Reply-To: <1358978130-12216-4-git-send-email-gitster@pobox.com>

On Wed, Jan 23, 2013 at 01:55:30PM -0800, Junio C Hamano wrote:

> If we do not have the current object at the tip of the remote, we do
> not even know that object, when fetched, is something that can be
> merged.  In such a case, suggesting to pull first just like
> non-fast-forward case may not be technically correct, but in
> practice, most such failures are seen when you try to push your work
> to a branch without knowing that somebody else already pushed to
> update the same branch since you forked, so "pull first" would work
> as a suggestion most of the time.
> 
> In these cases, the current code already rejects such a push on the
> client end, but we used the same error and advice messages as the
> ones used when rejecting a non-fast-forward push, i.e. pull from
> there and integrate before pushing again.  Introduce new
> rejection reasons and reword the messages appropriately.

So obviously from our previous discussion, I agree with the general
behavior of this patch. Let me get nit-picky on the message itself,
though:

> +static const char message_advice_ref_fetch_first[] =
> +	N_("Updates were rejected because you do not have the object at the tip\n"
> +	   "of the remote. You may want to first merge the remote changes (e.g.\n"
> +	   " 'git pull') before pushing again.\n"
> +	   "See the 'Note about fast-forwards' in 'git push --help' for details.");
> +

The condition that triggers this message is going to come up fairly
often for new git users (e.g., anyone using a central repo model), which
I think is why the original message_advice_pull_before_push has gotten
so much attention.  And in most cases, users will be seeing this message
now instead of "pull before push", because the common triggering cause
is somebody else pushing unrelated work.

The existing message says:

  Updates were rejected because a pushed branch tip is behind its remote
  counterpart. Check out this branch and merge the remote changes
  (e.g. 'git pull') before pushing again.

I wonder: will the new message be as comprehensible to a new user as the
old?

They are quite similar, but something about the presence of the word
"behind" in the latter makes me think it helps explain what is going on
a bit more. When I read the new one, my first question is "why don't I
have that object?". Of course, saying "behind" in this case would not be
strictly accurate, because we do not even know the remote has a commit.

I wonder if we can reword it to explain more about why we do not have
the object, without getting too inaccurate. Something like:

  Updates were rejected because the remote contains objects that you do
  not have locally. This is usually caused by another repository pushing
  to the same ref. You may want to first merge the remote changes (e.g.,
  'git pull') before pushing again.

I was also tempted to s/objects/work/, which is more vague, but is less
jargon-y for new users who do not know how git works.

Also, how should this interact with the checkout-then-pull-then-push
advice? We make a distinction for the non-fastforward case between HEAD
and other refs. Should we be making the same distinction here?

-Peff

^ permalink raw reply

* Re: [PATCH v2 2/3] push: introduce REJECT_FETCH_FIRST and REJECT_NEEDS_FORCE
From: Jeff King @ 2013-01-24  6:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Chris Rorvick
In-Reply-To: <7vip6nj22m.fsf@alter.siamese.dyndns.org>

On Wed, Jan 23, 2013 at 08:28:49AM -0800, Junio C Hamano wrote:

> How about doing this?
> 
> For "needs force" cases, we say this instead:
> 
>  hint: you cannot update a ref that points at a non-commit object, or
>  hint: update a ref to point at a non-commit object, without --force.
> 
> Being explicit about "non-commit" twice will catch user's eyes and
> cause him to double check that it is not a mistyped LHS of the push
> refspec (if he is sending a non-commit) or mistyped RHS (if the ref
> is pointing at a non-commit).  If he _is_ trying to push a blob out,
> the advice makes it clear what to do next: he does want to force it.

Yeah, I think that is sensible.

> Note that you _could_ split the "needs force" case into two, namely,
> "cannot replace a non-commit" and "cannot push a non-commit".  You
> could even further split them [...etc...]

I do not think it is worth worrying too much about. This should really
not happen very often, and the user should be able to investigate and
figure out what is going on. I think making the error message extremely
specific is just going to end up making it harder to understand.

> If we did that, then we could loosen the "You should fetch first"
> case to say something like this:
> 
>  hint: you do not have the object at the tip of the remote ref;
>  hint: perhaps you want to pull from there first?

Yeah, better. I'll comment on the specific message you used in response
to the patch itself.

> This explicitly denies one of Chris's wish "we shouldn't suggest to
> merge something that we may not be able to", but in the "You should
> fetch first" case, we cannot fundamentally know if we can merge
> until we fetch.  I agree with you that the most common case is that
> the unknown object is a commit, and that suggesting to pull is a
> good compromise.

I thought the wish was more about "we shouldn't suggest to merge
something we _know_ we will not be able to", and you are still handling
that (i.e., the "needs force" case).

-Peff

^ permalink raw reply

* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
From: Jeff King @ 2013-01-24  6:33 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathon Mah, git
In-Reply-To: <7vfw1rfmw2.fsf@alter.siamese.dyndns.org>

On Wed, Jan 23, 2013 at 04:25:01PM -0800, Junio C Hamano wrote:

> > With the object cache, isn't modifying the object unsafe in
> > general? Instead of auditing code paths, it's now necessary to
> > audit _all_ code that uses "struct object", which seems
> > infeasible.
> 
> The object layer was designed with "run one thing and one thing
> well, and then let the _exit(2) take care of the clean-up" model in
> mind; modifying the object, e.g. updating commit->parents list,
> in-core by revision traversal machinery is very much within the
> scope of its intended uses.

Yeah, although I think the "parsed" flag really is a bit overloaded for
commits.

Most code which uses "struct commit" will want the parents, timestamp,
and tree information filled in. And so they call parse_commit to make
sure that is the case. Afterwards, those fields are valid and the
"parsed" flag is set. The buffer field may or may not be valid
afterwards, depending on save_commit_buffer.

Some other code may also care about the rest of the commit information.
It also calls parse_commit, but with save_commit_buffer turned on.
However, that may or may not actually initialize the buffer, depending
on who has been using the object before us.

It works in practice most of the time because we only perform one "task"
per invocation, and that task either keeps the commit messages around
the first time, or doesn't. But it is a bit of a ticking time bomb
anytime we violate that assumption.

I think it would be saner for callers who only care about ancestry info
call to call parse_commit, and then anybody who is about to access the
buffer to call a new ensure_commit_buffer function, which would make
sure the buffer is accessible (even if save_commit_buffer is false).

Then save_commit_buffer becomes just an optimization: save it for later
during parsing, so we don't have to do it later. That would also let us
optimize better if we end up with a commit ancestry cache which can do
parse_commit much cheaper than accessing the full object.

For example, my commit cache patches hook into parse_commit, but they
can only do so safely when save_commit_buffer is false. So git-rev-list
benefits, but git-log does not. However, if we knew that log would
lazily load the commit data when needed, we could use the cache
there, too. It would not be a win if you are showing every commit
anyway, but if you have limiting that does not depend on the commit
object itself (e.g., path limiting in the tree), you could avoid loading
some commits entirely.

> > I'm just trying to fix the segfault demonstrated in the test
> > attached to the patch.
> 
> Can offending readers that dereference NULL without checking if
> buffer has been freed be updated so that they read_sha1_file(), read
> the contents from the result returned from the function (instead of
> reading from .buffer), and free the memory when they are done?

Looks like builtin/blame.c:get_commit_info does this already, although
it leaves the buffer attached to the commit and does not free it.

The ensure_commit_buffer function could look something like:

  int ensure_commit_buffer(struct commit *item)
  {
          enum object_type type;
          unsigned long size;

          if (!item)
                  return -1;
          if (!item->object.parsed)
                  return parse_commit(item);
          if (item->buffer)
                  return 0;

          item->buffer = read_sha1_file(item->object.sha1, &type, &size);
          if (!item->buffer)
                  return error("Could not read %s",
                               sha1_to_hex(item->object.sha1);
          return 0;
  }

and blame could replace its in-line processing with that. It does leave
open the question of whether callers should free() the result. But that
would be up to each user of the object (and it would be an optimization
issue, not a correctness issue, as long as each user called
ensure_commit_buffer before accessing it).

But the first step there would be to audit all of the accesses of
commit->buffer to make sure that they call ensure_commit_buffer
(presumably they already call parse_commit).

-Peff

^ permalink raw reply

* Re: [PATCH/RFC] Revoke write access to refs and odb after importing another repo's odb
From: Duy Nguyen @ 2013-01-24  5:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, git, Heiko Voigt
In-Reply-To: <7vwqv3fw2b.fsf@alter.siamese.dyndns.org>

On Thu, Jan 24, 2013 at 4:06 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Jens Lehmann <Jens.Lehmann@web.de> writes:
>
>> This is a false positive. The merge algorithm picked a fast-forward
>> in a submodule as a proper merge result and records that in a
>> gitlink. But as Duy pointed out this could be easily fixed by
>> turning the readonly flag off in that case.
>
> I see that as "easily circumvented and not an effective protection",
> though.
>
> In theory, adding a gitlink to the index, removing a gitlink to the
> index and modifying an existing gitlink in the index to another
> gitlink in the index and writing the resulting in-core index out to
> the on-disk index should be allowed, even after objects from the
> submodule object database have contaminated our in-core object pool,
> as long as you do not run cache_tree_update().  I am not sure if that
> single loophole would be sufficient, though.

The problem is we don't know which entries are updated in index. We
don't keep track of them. And I think in the unpack-trees case, we
scape the whole index then copy over, making it look like the whole
index is updated (even with the same content). One way to check this
is verify the source of all non-gitlink entries in index before
writing to disk (only when readonly flag is on, of course).
sha1_object_info_extended() should help (or be extended to do the
job). Hmm.. if we do this, we could also verify if new sha-1 objects
do not refer to an external source, if so allow them to be created.
-- 
Duy

^ permalink raw reply

* Re: [PATCH v3 01/10] wildmatch: fix "**" special case
From: Duy Nguyen @ 2013-01-24  5:51 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vwqv3dw2n.fsf@alter.siamese.dyndns.org>

On Thu, Jan 24, 2013 at 11:49 AM, Junio C Hamano <gitster@pobox.com> wrote:
>> The only problem I see is, without the version string, there's no way
>> to know if "**" is supported. Old git versions will happily take "**"
>> and interpret as "*". When you advise someone to use "**" you might
>> need to add "check if you have this version of git". This problem does
>> not exist with pathspec magic like :(glob)
>
> OK, so what do we want to do when we do the real "USE_WILDMATCH"
> that is not the current experimental curiosity?  Use ":(wild)" or
> something?

I don't think we need to support two different sets of wildcards in
the long run. I'm thinking of adding ":(glob)" with WM_PATHNAME.
Pathspec without :(glob) still uses the current wildcards (i.e. no
FNM_PATHNAME). At some point, like 2.0, we either switch the behavior
of patterns-without-:(glob) to WM_PATHNAME, or just disable wildcards
when :(glob) is not present.
-- 
Duy

^ permalink raw reply

* Re: [PATCH v3 0/2] Make git-svn work with gitdir links
From: Junio C Hamano @ 2013-01-24  5:29 UTC (permalink / raw)
  To: Eric Wong; +Cc: Barry Wardell, git
In-Reply-To: <20130123023235.GA24135@dcvr.yhbt.net>

Eric Wong <normalperson@yhbt.net> writes:

> diff --git a/git-svn.perl b/git-svn.perl
> index c232798..e5bd292 100755
> --- a/git-svn.perl
> +++ b/git-svn.perl
> @@ -332,11 +332,13 @@ if ($cmd && $cmd =~ /(?:clone|init|multi-init)$/) {
>  		$ENV{GIT_DIR} = command_oneline([qw/rev-parse --git-dir/]);
>  	} "Unable to find .git directory\n";
>  	my $cdup = undef;
> +	my $git_dir = delete $ENV{GIT_DIR};
>  	git_cmd_try {
>  		$cdup = command_oneline(qw/rev-parse --show-cdup/);
>  		chomp $cdup if ($cdup);
>  		$cdup = "." unless ($cdup && length $cdup);
> -	} "Already at toplevel, but $ENV{GIT_DIR} not found\n";
> +	} "Already at toplevel, but $git_dir not found\n";
> +	$ENV{GIT_DIR} = $git_dir;
>  	chdir $cdup or die "Unable to chdir up to '$cdup'\n";
>  	$_repository = Git->repository(Repository => $ENV{GIT_DIR});
>  }

This does not look quite right, though.

Can't the user have his own $GIT_DIR when this command is invoked?
The first command_oneline() runs rev-parse with that environment and
get the user specified value of GIT_DIR in $ENV{GIT_DIR}, but by
doing a "delete" before running --show-cdup, you are not honoring
that GIT_DIR (and GIT_WORK_TREE if exists) the user gave you.  You
already used that GIT_DIR when you asked rev-parse --git-dir to find
what the GIT_DIR value should be, so you would be operating with
values of $git_dir and $cdup that you discovered in an inconsistent
way, no?

Shouldn't it be more like this instead?

	my ($git_dir, $cdup) = undef;
        try {
		$git_dir = command_oneline(qw(rev-parse --git-dir));
	} "Unable to ...";
        try {
		$cdup = command_oneline(qw(rev-parse --show-cdup));
		... tweak $cdup ...
	} "Unable to ...";
	if (defined $git_dir) { $ENV{GIT_DIR} = $git_dir; }
	chdir $cdup;

^ permalink raw reply

* Re: What's cooking in git.git (Jan 2013, #08; Tue, 22)
From: Junio C Hamano @ 2013-01-24  5:04 UTC (permalink / raw)
  To: John Keeping; +Cc: git, Eric S. Raymond, Chris Rorvick
In-Reply-To: <20130123211237.GR7498@serenity.lan>

John Keeping <john@keeping.me.uk> writes:

>> Is it "it does not work yet with cvsps3", or "it will not ever work
>> with cvsps3"?  The impression I am getting is that it is the latter.
>
> The existing script (git-cvsimport.perl) won't ever work with cvsps-3
> since features it relies on have been removed.

I think you knew I already knew that.  I was hoping that cvsimport-3
that has multiple backend support may be able to start working by
reading the fast-import stream cvsps3 produces, once you sort out
the "last exported timestamp" issue out.  As far as the end users
are concerned, they would still be using cvsimport, even though the
wrapper may redirect the invocation to cvsimport-3.

In any case, something like that will not happen in the near term,
if ever, so "cvsimport will not work if you only have cvsps3" is a
good thing to add to its documentation.

Care to roll a proper patch with a log message?  I'll discard the
topic for now and replace it with your documentation update.

>> Also, should we have a suggestion to people who are *not* performing
>> a one-shot import, i.e. doing incremental or bidirectional?
>
> As far as I know cvsps is the only backend that attempts to support
> partial exports but the support for that in its fast-export mode needs
> work before I would consider it reliable.  For now the existing
> git-cvsimport is the best option I'm aware of.

Thanks.

^ permalink raw reply

* Re: [PATCH v3 01/10] wildmatch: fix "**" special case
From: Junio C Hamano @ 2013-01-24  4:49 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: git
In-Reply-To: <CACsJy8DiVy7tcG_t2JENKoPSFWV24Tneh4q=upPPJML4VESMag@mail.gmail.com>

Duy Nguyen <pclouds@gmail.com> writes:

> If we do that, we need to do the same in tree_entry_interesting(). In
> other words, pathspec learns the new glob syntax. It's fine for an
> experimental flag like USE_WILDMATCH. But after fnmatch is replaced by
> wildmatch unconditionally (thus USE_WILDMATCH becomes obsolete), then
> what? Should people who expects new syntax with USE_WILDMATCH continue
> to have new syntax? How does a user know which syntax may be used in
> his friend's "git" binary?

Good point.

> On a related subject, I've been meaning to write a mail about the
> other use of patterns in git (e.g. in git-branch, git-tag,
> git-apply...) but never got around to. Some use without FNM_PATHNAME,
> some do and the document does not go into details about the
> differences. We might want to unify the syntax there too. It'll
> probably break backward compatibility so we can do the same when we
> switch pathspec syntax.

Right now, I think for-each-ref is the only one with FNM_PATHNAME.
With the experimental USE_WILDMATCH, "for-each-ref refs/**/master"
will give us what is naturally expected.  With a working wildmatch,
I think it probably makes sense to globally enable FNM_PATHNAME;
it would probably be nice if we could do so at Git 2.0 version bump
boundary, but I suspect we are not ready yet (as you pointed out,
there are still codepaths that need to be adjusted).

> The only problem I see is, without the version string, there's no way
> to know if "**" is supported. Old git versions will happily take "**"
> and interpret as "*". When you advise someone to use "**" you might
> need to add "check if you have this version of git". This problem does
> not exist with pathspec magic like :(glob)

OK, so what do we want to do when we do the real "USE_WILDMATCH"
that is not the current experimental curiosity?  Use ":(wild)" or
something?

^ permalink raw reply

* Re: [PATCH 0/3] fixup remaining cvsimport tests
From: Michael Haggerty @ 2013-01-24  3:15 UTC (permalink / raw)
  To: John Keeping; +Cc: Chris Rorvick, Junio C Hamano, git
In-Reply-To: <20130123110312.GK7498@serenity.lan>

On 01/23/2013 12:03 PM, John Keeping wrote:
> On Wed, Jan 23, 2013 at 10:54:36AM +0100, Michael Haggerty wrote:
>> On 01/20/2013 09:17 PM, Chris Rorvick wrote:
>>> I have never used cvs2git, but I suspect Eric's efforts in making it a
>>> potential backend for cvsimport are a better use of time.
> 
> Is it possible to perform an incremental import with cvs2git?  This
> seems to be the one use case where the old cvsimport script (with cvsps
> 2.x) still performs the best.
> 
> I suppose that just re-running the full import will do the right thing
> since the commits in Git should be identical, but would it be possible
> to do better given the right information about a previous run?

No, cvs2git does not support incremental imports.  One user has reported
that he *usually* gets identical commits for the overlapping history
when he re-runs a full import, and last I heard he was using this as a
kind of incremental import.  We make an effort to make imports
reproducible, at least when using a single version of cvs2git (for
example, we process things in deterministic order rather than the order
they happen come out of a file directory or Python hashmap).  But the
cycle-breaking heuristics in particular can give different results if
history is added, not to mention the fact that CVS allows the user to
make changes with retroactive and non-timestamped effects (e.g., adding
or removing files from an existing branch/tag, changing a file's default
branch from vendor to HEAD, changing the log messages of old revisions,
obsoleting revisions).  And if your repository is large, a full import
can take a while.

It would be possible to enhance cvs2git to handle incremental imports
(well, at least if you rule out a few CVS commands that change deep
history).  But I don't believe anybody is working on this.

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/

^ permalink raw reply

* Re: auto packing with simultaneous pushes: "error: Could not stat 'objects/[…]/[…]'"
From: Ivan D Vasin @ 2013-01-24  1:39 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vbocffmpn.fsf@alter.siamese.dyndns.org>

On Wed, Jan 23, 2013 at 7:28 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Ivan D Vasin <nisavid@gmail.com> writes:
>
>> my suggestion is that an auto pack should lock the repository,
>> preventing at least other auto packs (and perhaps other operations)
>> ...
>>
>> ``git fsck`` is successful on both of our repos and on the bare repo
>> to which we pushed.
>
> Successful after you pushed, or before you pushed, or both?

both

>
> I suspect both.
>
> I do not think such a lock is necessary for correctness of the
> operation, but running two auto packing sumultaneously is wasteful,
> so it would help performance.  But that would produce a larger
> problem.  What if your modified auto-packer takes a lock and then
> dies without relinquishing the lock?  The repository will never be
> repacked after such an event forever?

perhaps the lock could contain the PID of the auto pack process.  if
that PID has gone away, the lock is ignored and replaced with a new
one.

that's what comes to my mind.  of course, there could be other ways to
handle this that i'm not thinking of.

in any case, the error messages, though spurious, are alarming to the
uninformed user.  it looks like Git is saying that there is actual
data loss, where in fact there is none.  if Git doesn't prevent these
messages from appearing (via locking behavior or otherwise), then it
should at least annotate them with a message that describes their
possibly spurious nature and perhaps instructs the user to verify
everything with ``git fsck``.

^ permalink raw reply

* [PATCH] don't use timers if NO_SETITIMER is set
From: Sébastien Boisvert @ 2013-01-24  1:38 UTC (permalink / raw)
  To: git; +Cc: Sébastien Boisvert

With NO_SETITIMER, the user experience on legacy Lustre is fixed,
but there is no early progress.

The patch has no effect on the resulting git executable if NO_SETITIMER is
not set (the default). So by default this patch has no effect at all, which
is good.

git tests:

$ make clean
$ make NO_SETITIMER=YesPlease
$ make test NO_SETITIMER=YesPlease &> make-test.log

$ grep "^not ok" make-test.log |grep -v "# TODO known breakage"|wc -l
0
$ grep "^ok" make-test.log |wc -l
9531
$ grep "^not ok" make-test.log |wc -l
65

No timers with NO_SETITIMER:

$ objdump -d ./git|grep setitimer|wc -l
0
$ objdump -d ./git|grep alarm|wc -l
0

Timers without NO_SETITIMER:

$ objdump -d /software/apps/git/1.8.1/bin/git|grep setitimer|wc -l
5
$ objdump -d /software/apps/git/1.8.1/bin/git|grep alarm|wc -l
0

Signed-off-by: Sébastien Boisvert <sebastien.boisvert@calculquebec.ca>
---
 builtin/log.c |    7 +++++++
 daemon.c      |    6 ++++++
 progress.c    |    8 ++++++++
 upload-pack.c |    2 ++
 4 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/builtin/log.c b/builtin/log.c
index 8f0b2e8..f8321c7 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -198,7 +198,9 @@ static void show_early_header(struct rev_info *rev, const char *stage, int nr)
 	printf(_("Final output: %d %s\n"), nr, stage);
 }
 
+#ifndef NO_SETITIMER
 static struct itimerval early_output_timer;
+#endif
 
 static void log_show_early(struct rev_info *revs, struct commit_list *list)
 {
@@ -240,9 +242,12 @@ static void log_show_early(struct rev_info *revs, struct commit_list *list)
 	 * trigger every second even if we're blocked on a
 	 * reader!
 	 */
+
+	#ifndef NO_SETITIMER
 	early_output_timer.it_value.tv_sec = 0;
 	early_output_timer.it_value.tv_usec = 500000;
 	setitimer(ITIMER_REAL, &early_output_timer, NULL);
+	#endif
 }
 
 static void early_output(int signal)
@@ -274,9 +279,11 @@ static void setup_early_output(struct rev_info *rev)
 	 *
 	 * This is a one-time-only trigger.
 	 */
+	#ifndef NO_SETITIMER
 	early_output_timer.it_value.tv_sec = 0;
 	early_output_timer.it_value.tv_usec = 100000;
 	setitimer(ITIMER_REAL, &early_output_timer, NULL);
+	#endif
 }
 
 static void finish_early_output(struct rev_info *rev)
diff --git a/daemon.c b/daemon.c
index 4602b46..eb82c19 100644
--- a/daemon.c
+++ b/daemon.c
@@ -611,9 +611,15 @@ static int execute(void)
 	if (addr)
 		loginfo("Connection from %s:%s", addr, port);
 
+	#ifndef NO_SETITIMER
 	alarm(init_timeout ? init_timeout : timeout);
+	#endif
+
 	pktlen = packet_read_line(0, line, sizeof(line));
+
+	#ifndef NO_SETITIMER
 	alarm(0);
+	#endif
 
 	len = strlen(line);
 	if (pktlen != len)
diff --git a/progress.c b/progress.c
index 3971f49..b84ccc7 100644
--- a/progress.c
+++ b/progress.c
@@ -45,7 +45,10 @@ static void progress_interval(int signum)
 static void set_progress_signal(void)
 {
 	struct sigaction sa;
+
+	#ifndef NO_SETITIMER
 	struct itimerval v;
+	#endif
 
 	progress_update = 0;
 
@@ -55,16 +58,21 @@ static void set_progress_signal(void)
 	sa.sa_flags = SA_RESTART;
 	sigaction(SIGALRM, &sa, NULL);
 
+	#ifndef NO_SETITIMER
 	v.it_interval.tv_sec = 1;
 	v.it_interval.tv_usec = 0;
 	v.it_value = v.it_interval;
 	setitimer(ITIMER_REAL, &v, NULL);
+	#endif
 }
 
 static void clear_progress_signal(void)
 {
+	#ifndef NO_SETITIMER
 	struct itimerval v = {{0,},};
 	setitimer(ITIMER_REAL, &v, NULL);
+	#endif
+
 	signal(SIGALRM, SIG_IGN);
 	progress_update = 0;
 }
diff --git a/upload-pack.c b/upload-pack.c
index 95d8313..e0b8b32 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -47,7 +47,9 @@ static int stateless_rpc;
 
 static void reset_timeout(void)
 {
+	#ifndef NO_SETITIMER
 	alarm(timeout);
+	#endif
 }
 
 static int strip(char *line, int len)
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH] git-svn: cleanup sprintf usage for uppercasing hex
From: Jonathan Nieder @ 2013-01-24  1:33 UTC (permalink / raw)
  To: Eric Wong; +Cc: git, Junio C Hamano
In-Reply-To: <20130124012810.GA8096@dcvr.yhbt.net>

Eric Wong wrote:

> We do not need to call uc() separately for sprintf("%x")
> as sprintf("%X") is available.

For what it's worth,
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

^ permalink raw reply

* [PULL] git-svn updates for master
From: Eric Wong @ 2013-01-24  1:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Barry Wardell
In-Reply-To: <20130124012810.GA8096@dcvr.yhbt.net>

The following changes since commit ec3ae6ec46ed48383ae40643990f169b65a563cc:

  Merge git://ozlabs.org/~paulus/gitk (2013-01-23 08:35:03 -0800)

are available in the git repository at:


  git://bogomips.org/git-svn master

for you to fetch changes up to 812ed405ac961093b7eb916246d5f288630edfb2:

  git-svn: cleanup sprintf usage for uppercasing hex (2013-01-24 01:30:04 +0000)

----------------------------------------------------------------
Barry Wardell (1):
      git-svn: Simplify calculation of GIT_DIR

Eric Wong (1):
      git-svn: cleanup sprintf usage for uppercasing hex

 git-svn.perl             | 38 +++++++++++++++-----------------------
 perl/Git/SVN.pm          |  4 ++--
 perl/Git/SVN/Editor.pm   |  2 +-
 t/t9100-git-svn-basic.sh |  8 ++++++++
 4 files changed, 26 insertions(+), 26 deletions(-)

-- 
Eric Wong

^ permalink raw reply

* Re: [PATCH/RFC] Revoke write access to refs and odb after importing another repo's odb
From: Duy Nguyen @ 2013-01-24  1:30 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jens Lehmann
In-Reply-To: <7v1udbj0kt.fsf@alter.siamese.dyndns.org>

On Thu, Jan 24, 2013 at 12:01 AM, Junio C Hamano <gitster@pobox.com> wrote:
> I however have this suspicion that this will become a losing battle
> and we would be better off getting rid of add_submodule_odb();
> instead operations that work across repositories will be done as a
> subprocess, which will get us back closer to one of the original
> design goals of submodule support to have a clear separation between
> the superproject and its submodules.

It does not have to be subprocess. Thomas Rast did some work on
support multithread access to object db by basically replicating all
datastructure per thread. If that work is complete, we have something
like "odb container" that could be used to access objects from another
repository and it won't contaminate the original odb. The same thing
can be done for ref and index access.
-- 
Duy

^ permalink raw reply

* [PATCH] git-svn: cleanup sprintf usage for uppercasing hex
From: Eric Wong @ 2013-01-24  1:28 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

We do not need to call uc() separately for sprintf("%x")
as sprintf("%X") is available.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
 perl/Git/SVN.pm        | 4 ++--
 perl/Git/SVN/Editor.pm | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index 59215fa..490e330 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -490,7 +490,7 @@ sub refname {
 	#
 	# Additionally, % must be escaped because it is used for escaping
 	# and we want our escaped refname to be reversible
-	$refname =~ s{([ \%~\^:\?\*\[\t])}{uc sprintf('%%%02x',ord($1))}eg;
+	$refname =~ s{([ \%~\^:\?\*\[\t])}{sprintf('%%%02X',ord($1))}eg;
 
 	# no slash-separated component can begin with a dot .
 	# /.* becomes /%2E*
@@ -2377,7 +2377,7 @@ sub map_path {
 
 sub uri_encode {
 	my ($f) = @_;
-	$f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#uc sprintf("%%%02x",ord($1))#eg;
+	$f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#sprintf("%%%02X",ord($1))#eg;
 	$f
 }
 
diff --git a/perl/Git/SVN/Editor.pm b/perl/Git/SVN/Editor.pm
index 3db1521..fa0d3c6 100644
--- a/perl/Git/SVN/Editor.pm
+++ b/perl/Git/SVN/Editor.pm
@@ -146,7 +146,7 @@ sub url_path {
 	my ($self, $path) = @_;
 	if ($self->{url} =~ m#^https?://#) {
 		# characters are taken from subversion/libsvn_subr/path.c
-		$path =~ s#([^~a-zA-Z0-9_./!$&'()*+,-])#uc sprintf("%%%02x",ord($1))#eg;
+		$path =~ s#([^~a-zA-Z0-9_./!$&'()*+,-])#sprintf("%%%02X",ord($1))#eg;
 	}
 	$self->{url} . '/' . $self->repo_path($path);
 }
-- 
Eric Wong

^ permalink raw reply related

* Re: [PATCH v3 0/2] Make git-svn work with gitdir links
From: Eric Wong @ 2013-01-24  1:27 UTC (permalink / raw)
  To: Barry Wardell; +Cc: git, Junio C Hamano
In-Reply-To: <CAHrK+Z-uXAEgd_HuisbioO8=D7DEdmceeUEz3A1Jr_rtm7a3WA@mail.gmail.com>

Barry Wardell <barry.wardell@gmail.com> wrote:
> On Wed, Jan 23, 2013 at 2:32 AM, Eric Wong <normalperson@yhbt.net> wrote:
> > Does squashing this on top of your changes fix all your failures?
> > I plan on squashing both your changes together with the below:
> 
> Yes, I can confirm that applying this patch on top of mine makes all
> git-svn tests pass again. I have also re-run the tests without my patch
> applied and found that they do all indeed pass, so I apologize for my
> previous incorrect comment.

Thanks, squashed, tested and pushed (have another unrelated patch coming)

^ permalink raw reply

* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
From: Junio C Hamano @ 2013-01-24  0:25 UTC (permalink / raw)
  To: Jonathon Mah; +Cc: git, Jeff King
In-Reply-To: <1C90CE32-F559-4E76-915E-93642F614552@me.com>

Jonathon Mah <jmah@me.com> writes:

> No, I haven't audited the code paths (I have only the loosest
> familiarity with the source). Indeed, I found that clearing the
> 'parsed' flag in fsck.c (traverse_one_object()) is incorrect and
> causes test failures.
>
> With the object cache, isn't modifying the object unsafe in
> general? Instead of auditing code paths, it's now necessary to
> audit _all_ code that uses "struct object", which seems
> infeasible.

The object layer was designed with "run one thing and one thing
well, and then let the _exit(2) take care of the clean-up" model in
mind; modifying the object, e.g. updating commit->parents list,
in-core by revision traversal machinery is very much within the
scope of its intended uses.

> I'm just trying to fix the segfault demonstrated in the test
> attached to the patch.

Can offending readers that dereference NULL without checking if
buffer has been freed be updated so that they read_sha1_file(), read
the contents from the result returned from the function (instead of
reading from .buffer), and free the memory when they are done?

That would be a fix that would be very much isolated and easy to
audit.

^ permalink raw reply

* auto packing with simultaneous pushes: "error: Could not stat 'objects/[…]/[…]'"
From: Ivan D Vasin @ 2013-01-23 23:50 UTC (permalink / raw)
  To: git

my coworker and i just pushed some commits seconds apart from each
other.  both of our pushes triggered a server-side auto pack
(presumably as part of ``receive-pack``, as a result of the setting
``receive.autogc = true``).  each auto pack produced multiple errors
of the form ``error: Could not stat 'objects/[…]/[…]'``.  there
appears to be no overlap between the two auto packs' lists of failed
object hashes.

my suspicion is that the two auto pack processes each generated a list
of objects to pack, then proceeded to pack them, removing the
corresponding files as they went along.  as each one encountered an
entry in its list that was previously handled by the other process, it
found that it was unable to stat the corresponding file because it had
been removed by the other process.

my suggestion is that an auto pack should lock the repository,
preventing at least other auto packs (and perhaps other operations)
from running until it is finished.  alternatively, the auto pack can
skip over objects that were already packed by another process;
however, i imagine this would be much less efficient if done
correctly.

here is the console output from my push:

$ git push
X11 forwarding request failed on channel 0
Counting objects: 32, done.
Delta compression using up to 8 threads.
Compressing objects: 100% (18/18), done.
Writing objects: 100% (18/18), 2.70 KiB, done.
Total 18 (delta 12), reused 0 (delta 0)
Auto packing the repository for optimum performance.
error: Could not stat 'objects/00/
c706ccf0ef06092134ccb1f9fdf0e7d39a9d5f'
error: Could not stat 'objects/02/7c2932047c9c9c7bacd6e1abdc3810866161bc'
error: Could not stat 'objects/13/b2c3f0a55e8de099553cad6901a150f19b83c2'
error: Could not stat 'objects/2a/3992fa9c096f7514a62781a3c3923a996a7073'
error: Could not stat 'objects/3d/cb6e9055b9b51911aae52f4bb0ca8d8719d645'
error: Could not stat 'objects/50/c2f6d8e0de1eba988256597cc1d6e1a387902c'
error: Could not stat 'objects/50/9f494f6b60741957a45cf51f8d097f0578c89a'
error: Could not stat 'objects/52/6597c12d730c5903d46f598c6ec438cfabfe0f'
error: Could not stat 'objects/52/a6b4ee4bebb043d21858455fd0cd33b4812a42'
error: Could not stat 'objects/53/71c6f99023efecb3a4c16daeeee63f534d63b2'
error: Could not stat 'objects/5f/bd49df3c4abdc93c7118f762ac770a0fcd6ce4'
error: Could not stat 'objects/67/900e6e598e8e1c7bde5776fd220f7daf147cde'
error: Could not stat 'objects/72/42ecd310d0286da6c3967985c6446cf105b0bd'
error: Could not stat 'objects/79/4afecb2d348dde5ad4d24cf62c8fcd4db3b63a'
error: Could not stat 'objects/7e/4c612c51c5d76f03f12c04241d3b3c5aa212e4'
error: Could not stat 'objects/84/fd1e038d4345bac1fa2b33edb73fd73813ac4f'
error: Could not stat 'objects/89/c32fdbf3400c797d01659f9ee644aa8a713574'
error: Could not stat 'objects/8b/cbe10b0d09325b49876416341d6cf4bccc9e35'
error: Could not stat 'objects/8d/6dcc95d6bea57cfafe8a08fed7947561682b68'
error: Could not stat 'objects/b9/67af0f23deb87f1708689d159ee6444503462a'
error: Could not stat 'objects/bb/9d6fafe3ba2189582ac48088ee6f732755dddb'
error: Could not stat 'objects/e0/549c20e56d6ec270ec3ffee6c93a6d6cb8f6b5'
error: Could not stat 'objects/f2/737f0d8635265a56d57c27a3ddf155da48b07a'
error: Could not stat 'objects/fc/8b2f0837aefa21d86de65f6230c86f64addb8d'
error: Could not stat 'objects/fc/ac21d190770fdee23e5e69cdba36eefd6e001a'
error: Could not stat 'objects/fd/ae521d6a3404ceabff223385d7b5b5c783f0fe'
To git@code.adverplex.com:syseng
   fb49a86..e161cf5  HEAD -> uber_release_alpha

here is the console output from my coworker's push:

$ git push
Counting objects: 181, done.
Delta compression using up to 4 threads.
Compressing objects: 100% (154/154), done.
Writing objects: 100% (159/159), 1.81 MiB, done.
Total 159 (delta 74), reused 0 (delta 0)
Auto packing the repository for optimum performance.
error: Could not stat 'objects/06/b2aee0b4adca015d072be4ef6a655c118b6af3'
error: Could not stat 'objects/0d/66f41003c9112b8093dfee5fd56e8db25a142b'
error: Could not stat 'objects/0d/6c3e296fb7b83998677d207f9df6a69e144168'
error: Could not stat 'objects/14/64e2bd863a15e08be7aa2f33b8df5b5f695608'
error: Could not stat 'objects/14/19cad4e2a38720186eb36b3c756d05e1963877'
error: Could not stat 'objects/16/1b9e65004a9120ea250ef8ef8292d5416ed984'
error: Could not stat 'objects/23/b6bc50c16fe3740393c8a08cf57cc07a56c778'
error: Could not stat 'objects/60/1d7c562cafaf6c71c57dbbd0bab55c3698d0c7'
error: Could not stat 'objects/6c/c5c7ededba86ce291b531def840e757ce3eccc'
error: Could not stat 'objects/74/8d6bece2b4ccec41584d22152d0fd903e01908'
error: Could not stat 'objects/9a/6c21779e7c063b87bbf055e4c6b6baecc8a3e1'
error: Could not stat 'objects/9a/6519e7bb625f681ead373e9a9226f862ee1550'
error: Could not stat 'objects/9b/7027422cbce1f5f2c5929ae1313ecee2cd3537'
error: Could not stat 'objects/a4/7e0b5906c2b552158a1e5943063e983532988f'
error: Could not stat 'objects/a4/6db00ea7b7d2e618731d7b4f1e59b5f0276aff'
error: Could not stat 'objects/ab/b3a8fd9585af5a7f6dbd609e6b4ed8c4359ca6'
error: Could not stat 'objects/ad/0c98f2674807beee093cead1179d95756c1788'
error: Could not stat 'objects/bf/4fd1b9cc617d3fc832350783064418e0d7fdfd'
error: Could not stat 'objects/bf/c65d6bc705dfc5018c3d25228a4469d069f3b8'
error: Could not stat 'objects/c0/adb5b39efd31698789ee8aefcedf91913fc7f1'
error: Could not stat 'objects/ca/f4ee60f223039d2ec6185adbb4c28ab7df461e'
error: Could not stat 'objects/ca/e3b7c60b4b3a34bba4b4a5738edf0f7786a784'
error: Could not stat 'objects/cb/4a470752088b708290713b0a513e22bb0dcb15'
error: Could not stat 'objects/d3/d44ba75332e3e7e21a89eafe724e682cf124dd'
error: Could not stat 'objects/d3/8a635a420af938f680ab30c6252fdddd448a5d'
error: Could not stat 'objects/d8/f102045fc608b65cb1d75ad8de11a673ebd656'
error: Could not stat 'objects/da/97ac29d9024f589b4aec46bc0ea1f597b00802'
error: Could not stat 'objects/e5/3e9d05c4e9a3e336453af3b43fe06dfa5c6d57'
error: Could not stat 'objects/ea/b586bcc6925bcb34cdeede1562a851d5b58d90'
error: Could not stat 'objects/ec/f2cf1dd8d6045242676cd22320902c085ecbb3'
error: Could not stat 'objects/f3/819de13ed535bdd92580e2f2b01580714188e1'
To git@code.adverplex.com:syseng
   fb49a86..a39c897  master -> master

``git fsck`` is successful on both of our repos and on the bare repo
to which we pushed.

^ permalink raw reply

* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
From: Jonathon Mah @ 2013-01-23 23:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jeff King
In-Reply-To: <7vsj5rfspy.fsf@alter.siamese.dyndns.org>

[Adding Jeff King to CC; I meant to copy you in the original but forgot, sorry]

On 2013-01-23, at 14:19, Junio C Hamano <gitster@pobox.com> wrote:

> Jonathon Mah <jmah@me.com> writes:
> 
>> Add a new function "free_object_buffer", which marks the object as
>> un-parsed and frees the buffer. Only trees and commits have buffers;
>> other types are not affected. If the tree or commit buffer is already
>> NULL, the "parsed" flag is still cleared so callers can control the free
>> themselves (index-pack.c uses this).
>> 
>> Several areas of code would free buffers for object structs that
>> contained them ("struct tree" and "struct commit"), but without clearing
>> the "parsed" flag. parse_object would clear the flag for "struct tree",
>> but commits would remain in an invalid state (marked as parsed but with
>> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
>> invalid objects stay around and can lead to bad behavior.
>> 
>> In particular, running pickaxe on all refs in a repository with a cached
>> textconv could segfault. If the textconv cache ref was evaluated first
>> by cmd_log_walk, a subsequent notes_cache_match_validity call would
>> dereference NULL.
> 
> Conceptually this is a right thing to do, but it is unclear why this
> conversion is safe in the existing code.
> 
> A codepath that used to free() and assign NULL to a buffer without
> resetting .parsed would have assumed that it can find out the parsed
> properties of the object (e.g. .parents) without re-parsing the
> object, and much more importantly, the modifications made by that
> codepath will not be clobbered by later call to parse_object().
> 
> IIRC, revision traversal machinery rewrites commit->parents but
> discards buffer when it knows that the log message is not needed
> (save_commit_buffer controls this behaviour).  I do not offhand know
> if there are other instances of existing code that depend on the
> current behaviour, but have you audited all the codepaths that are
> affected by this patch and codepaths that work on objects this patch
> unmarks their .parsed field will not have such a problem?

No, I haven't audited the code paths (I have only the loosest familiarity with the source). Indeed, I found that clearing the 'parsed' flag in fsck.c (traverse_one_object()) is incorrect and causes test failures.

With the object cache, isn't modifying the object unsafe in general? Instead of auditing code paths, it's now necessary to audit _all_ code that uses "struct object", which seems infeasible.

Anyway, I don't care about the implementation (Junio does that extremely well), I'm just trying to fix the segfault demonstrated in the test attached to the patch.



Jonathon Mah
me@JonathonMah.com

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox