* [PATCH] parse_object: clear "parsed" when freeing buffers
@ 2013-01-23 21:25 Jonathon Mah
2013-01-23 22:19 ` Junio C Hamano
2013-01-24 7:07 ` Jeff King
0 siblings, 2 replies; 9+ messages in thread
From: Jonathon Mah @ 2013-01-23 21:25 UTC (permalink / raw)
To: git
Add a new function "free_object_buffer", which marks the object as
un-parsed and frees the buffer. Only trees and commits have buffers;
other types are not affected. If the tree or commit buffer is already
NULL, the "parsed" flag is still cleared so callers can control the free
themselves (index-pack.c uses this).
Several areas of code would free buffers for object structs that
contained them ("struct tree" and "struct commit"), but without clearing
the "parsed" flag. parse_object would clear the flag for "struct tree",
but commits would remain in an invalid state (marked as parsed but with
a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
invalid objects stay around and can lead to bad behavior.
In particular, running pickaxe on all refs in a repository with a cached
textconv could segfault. If the textconv cache ref was evaluated first
by cmd_log_walk, a subsequent notes_cache_match_validity call would
dereference NULL.
Signed-off-by: Jonathon Mah <me@JonathonMah.com>
---
I found an old email where Jeff noted that this would be bad (yet the buffer manipulation remained).
<http://permalink.gmane.org/gmane.comp.version-control.git/188000>
builtin/fsck.c | 17 ++---------------
builtin/index-pack.c | 3 +++
builtin/log.c | 9 +++------
builtin/reflog.c | 3 +--
builtin/rev-list.c | 3 +--
http-push.c | 3 +--
list-objects.c | 3 +--
object.c | 25 +++++++++++++++++++++++--
object.h | 3 +++
reachable.c | 3 +--
revision.c | 3 +--
t/t4042-diff-textconv-caching.sh | 11 +++++++++++
upload-pack.c | 3 +--
walker.c | 5 +----
14 files changed, 53 insertions(+), 41 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb9a2cd..82b3612 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -133,10 +133,7 @@ static int traverse_one_object(struct object *obj)
return 1; /* error already displayed */
}
result = fsck_walk(obj, mark_object, obj);
- if (tree) {
- free(tree->buffer);
- tree->buffer = NULL;
- }
+ free_object_buffer(obj);
return result;
}
@@ -303,26 +300,16 @@ static int fsck_obj(struct object *obj)
if (fsck_object(obj, check_strict, fsck_error_func))
return -1;
- if (obj->type == OBJ_TREE) {
- struct tree *item = (struct tree *) obj;
-
- free(item->buffer);
- item->buffer = NULL;
- }
+ free_object_buffer(obj);
if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
-
- free(commit->buffer);
- commit->buffer = NULL;
-
if (!commit->parents && show_root)
printf("root %s\n", sha1_to_hex(commit->object.sha1));
}
if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
-
if (show_tags && tag->tagged) {
printf("tagged %s %s", typename(tag->tagged->type), sha1_to_hex(tag->tagged->sha1));
printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 43d364b..0eb39ae 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -750,13 +750,16 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
if (fsck_walk(obj, mark_link, NULL))
die(_("Not all child objects of %s are reachable"), sha1_to_hex(obj->sha1));
+ /* set buffer to NULL so it isn't freed */
if (obj->type == OBJ_TREE) {
struct tree *item = (struct tree *) obj;
item->buffer = NULL;
+ free_object_buffer(obj);
}
if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
commit->buffer = NULL;
+ free_object_buffer(obj);
}
obj->flags |= FLAG_CHECKED;
}
diff --git a/builtin/log.c b/builtin/log.c
index e7b7db1..433b874 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -305,11 +305,9 @@ static int cmd_log_walk(struct rev_info *rev)
* but we didn't actually show the commit.
*/
rev->max_count++;
- if (!rev->reflog_info) {
+ if (!rev->reflog_info)
/* we allow cycles in reflog ancestry */
- free(commit->buffer);
- commit->buffer = NULL;
- }
+ free_object_buffer(&commit->object);
free_commit_list(commit->parents);
commit->parents = NULL;
if (saved_nrl < rev->diffopt.needed_rename_limit)
@@ -1399,8 +1397,7 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix)
reopen_stdout(numbered_files ? NULL : commit, NULL, &rev, quiet))
die(_("Failed to create output files"));
shown = log_tree_commit(&rev, commit);
- free(commit->buffer);
- commit->buffer = NULL;
+ free_object_buffer(&commit->object);
/* We put one extra blank line between formatted
* patches and this flag is used by log-tree code
diff --git a/builtin/reflog.c b/builtin/reflog.c
index b3c9e27..c9a660f 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -94,8 +94,7 @@ static int tree_is_complete(const unsigned char *sha1)
complete = 0;
}
}
- free(tree->buffer);
- tree->buffer = NULL;
+ free_object_buffer(tree->buffer);
if (complete)
tree->object.flags |= SEEN;
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 67701be..6855892 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -170,8 +170,7 @@ static void finish_commit(struct commit *commit, void *data)
free_commit_list(commit->parents);
commit->parents = NULL;
}
- free(commit->buffer);
- commit->buffer = NULL;
+ free_object_buffer(&commit->object);
}
static void finish_object(struct object *obj,
diff --git a/http-push.c b/http-push.c
index 8701c12..042a410 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1347,8 +1347,7 @@ static struct object_list **process_tree(struct tree *tree,
break;
}
- free(tree->buffer);
- tree->buffer = NULL;
+ free_object_buffer(obj);
return p;
}
diff --git a/list-objects.c b/list-objects.c
index 3dd4a96..fb4b531 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -123,8 +123,7 @@ static void process_tree(struct rev_info *revs,
cb_data);
}
strbuf_setlen(base, baselen);
- free(tree->buffer);
- tree->buffer = NULL;
+ free_object_buffer(obj);
}
static void mark_edge_parents_uninteresting(struct commit *commit,
diff --git a/object.c b/object.c
index 4af3451..8e161f8 100644
--- a/object.c
+++ b/object.c
@@ -149,8 +149,6 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t
struct tree *tree = lookup_tree(sha1);
if (tree) {
obj = &tree->object;
- if (!tree->buffer)
- tree->object.parsed = 0;
if (!tree->object.parsed) {
if (parse_tree_buffer(tree, buffer, size))
return NULL;
@@ -225,6 +223,29 @@ struct object *parse_object(const unsigned char *sha1)
return NULL;
}
+void free_object_buffer(struct object *item)
+{
+ if (!item)
+ return;
+
+ if (item->type == OBJ_TREE) {
+ struct tree *tree = (struct tree *)item;
+ tree->object.parsed = 0;
+ tree->size = 0;
+ if (tree->buffer) {
+ free(tree->buffer);
+ tree->buffer = NULL;
+ }
+ } else if (item->type == OBJ_COMMIT) {
+ struct commit *commit = (struct commit *)item;
+ commit->object.parsed = 0;
+ if (commit->buffer) {
+ free(commit->buffer);
+ commit->buffer = NULL;
+ }
+ }
+}
+
struct object_list *object_list_insert(struct object *item,
struct object_list **list_p)
{
diff --git a/object.h b/object.h
index 6a97b6b..cbc730c 100644
--- a/object.h
+++ b/object.h
@@ -63,6 +63,9 @@ struct object *parse_object(const unsigned char *sha1);
*/
struct object *parse_object_buffer(const unsigned char *sha1, enum object_type type, unsigned long size, void *buffer, int *eaten_p);
+/** If the object's type has a buffer, it's freed and marked as un-parsed. */
+void free_object_buffer(struct object *item);
+
/** Returns the object, with potentially excess memory allocated. **/
struct object *lookup_unknown_object(const unsigned char *sha1);
diff --git a/reachable.c b/reachable.c
index bf79706..c29d3e0 100644
--- a/reachable.c
+++ b/reachable.c
@@ -80,8 +80,7 @@ static void process_tree(struct tree *tree,
else
process_blob(lookup_blob(entry.sha1), p, &me, entry.path, cp);
}
- free(tree->buffer);
- tree->buffer = NULL;
+ free_object_buffer(obj);
}
static void process_tag(struct tag *tag, struct object_array *p,
diff --git a/revision.c b/revision.c
index 95d21e6..43c5eec 100644
--- a/revision.c
+++ b/revision.c
@@ -133,8 +133,7 @@ void mark_tree_uninteresting(struct tree *tree)
* We don't care about the tree any more
* after it has been marked uninteresting.
*/
- free(tree->buffer);
- tree->buffer = NULL;
+ free_object_buffer(obj);
}
void mark_parents_uninteresting(struct commit *commit)
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 91f8198..d7e26ca 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,15 @@ test_expect_success 'switching diff driver produces correct results' '
test_cmp expect actual
'
+cat >expect <<EOF
+./helper other (refs/notes/textconv/magic)
+one
+EOF
+# add empty commit on master to make bug more reproducible
+test_expect_success 'pickaxe with cached textconv' '
+ git commit --allow-empty -m another &&
+ git log -S other --pretty=tformat:%s%d --all >actual &&
+ test_cmp expect actual
+'
+
test_done
diff --git a/upload-pack.c b/upload-pack.c
index 6142421..1feacbc 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -81,8 +81,7 @@ static void show_commit(struct commit *commit, void *data)
die("broken output pipe");
fputc('\n', pack_pipe);
fflush(pack_pipe);
- free(commit->buffer);
- commit->buffer = NULL;
+ free_object_buffer(&commit->object);
}
static void show_object(struct object *obj,
diff --git a/walker.c b/walker.c
index be389dc..bae4876 100644
--- a/walker.c
+++ b/walker.c
@@ -56,10 +56,7 @@ static int process_tree(struct walker *walker, struct tree *tree)
if (!obj || process(walker, obj))
return -1;
}
- free(tree->buffer);
- tree->buffer = NULL;
- tree->size = 0;
- tree->object.parsed = 0;
+ free_object_buffer(&tree->object);
return 0;
}
--
1.8.1.1
Jonathon Mah
me@JonathonMah.com
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-23 21:25 [PATCH] parse_object: clear "parsed" when freeing buffers Jonathon Mah
@ 2013-01-23 22:19 ` Junio C Hamano
2013-01-23 23:36 ` Jonathon Mah
2013-01-24 7:07 ` Jeff King
1 sibling, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2013-01-23 22:19 UTC (permalink / raw)
To: Jonathon Mah; +Cc: git
Jonathon Mah <jmah@me.com> writes:
> Add a new function "free_object_buffer", which marks the object as
> un-parsed and frees the buffer. Only trees and commits have buffers;
> other types are not affected. If the tree or commit buffer is already
> NULL, the "parsed" flag is still cleared so callers can control the free
> themselves (index-pack.c uses this).
>
> Several areas of code would free buffers for object structs that
> contained them ("struct tree" and "struct commit"), but without clearing
> the "parsed" flag. parse_object would clear the flag for "struct tree",
> but commits would remain in an invalid state (marked as parsed but with
> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
> invalid objects stay around and can lead to bad behavior.
>
> In particular, running pickaxe on all refs in a repository with a cached
> textconv could segfault. If the textconv cache ref was evaluated first
> by cmd_log_walk, a subsequent notes_cache_match_validity call would
> dereference NULL.
Conceptually this is a right thing to do, but it is unclear why this
conversion is safe in the existing code.
A codepath that used to free() and assign NULL to a buffer without
resetting .parsed would have assumed that it can find out the parsed
properties of the object (e.g. .parents) without re-parsing the
object, and much more importantly, the modifications made by that
codepath will not be clobbered by later call to parse_object().
IIRC, revision traversal machinery rewrites commit->parents but
discards buffer when it knows that the log message is not needed
(save_commit_buffer controls this behaviour). I do not offhand know
if there are other instances of existing code that depend on the
current behaviour, but have you audited all the codepaths that are
affected by this patch and codepaths that work on objects this patch
unmarks their .parsed field will not have such a problem?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-23 22:19 ` Junio C Hamano
@ 2013-01-23 23:36 ` Jonathon Mah
2013-01-24 0:25 ` Junio C Hamano
0 siblings, 1 reply; 9+ messages in thread
From: Jonathon Mah @ 2013-01-23 23:36 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Jeff King
[Adding Jeff King to CC; I meant to copy you in the original but forgot, sorry]
On 2013-01-23, at 14:19, Junio C Hamano <gitster@pobox.com> wrote:
> Jonathon Mah <jmah@me.com> writes:
>
>> Add a new function "free_object_buffer", which marks the object as
>> un-parsed and frees the buffer. Only trees and commits have buffers;
>> other types are not affected. If the tree or commit buffer is already
>> NULL, the "parsed" flag is still cleared so callers can control the free
>> themselves (index-pack.c uses this).
>>
>> Several areas of code would free buffers for object structs that
>> contained them ("struct tree" and "struct commit"), but without clearing
>> the "parsed" flag. parse_object would clear the flag for "struct tree",
>> but commits would remain in an invalid state (marked as parsed but with
>> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
>> invalid objects stay around and can lead to bad behavior.
>>
>> In particular, running pickaxe on all refs in a repository with a cached
>> textconv could segfault. If the textconv cache ref was evaluated first
>> by cmd_log_walk, a subsequent notes_cache_match_validity call would
>> dereference NULL.
>
> Conceptually this is a right thing to do, but it is unclear why this
> conversion is safe in the existing code.
>
> A codepath that used to free() and assign NULL to a buffer without
> resetting .parsed would have assumed that it can find out the parsed
> properties of the object (e.g. .parents) without re-parsing the
> object, and much more importantly, the modifications made by that
> codepath will not be clobbered by later call to parse_object().
>
> IIRC, revision traversal machinery rewrites commit->parents but
> discards buffer when it knows that the log message is not needed
> (save_commit_buffer controls this behaviour). I do not offhand know
> if there are other instances of existing code that depend on the
> current behaviour, but have you audited all the codepaths that are
> affected by this patch and codepaths that work on objects this patch
> unmarks their .parsed field will not have such a problem?
No, I haven't audited the code paths (I have only the loosest familiarity with the source). Indeed, I found that clearing the 'parsed' flag in fsck.c (traverse_one_object()) is incorrect and causes test failures.
With the object cache, isn't modifying the object unsafe in general? Instead of auditing code paths, it's now necessary to audit _all_ code that uses "struct object", which seems infeasible.
Anyway, I don't care about the implementation (Junio does that extremely well), I'm just trying to fix the segfault demonstrated in the test attached to the patch.
Jonathon Mah
me@JonathonMah.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-23 23:36 ` Jonathon Mah
@ 2013-01-24 0:25 ` Junio C Hamano
2013-01-24 6:33 ` Jeff King
0 siblings, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2013-01-24 0:25 UTC (permalink / raw)
To: Jonathon Mah; +Cc: git, Jeff King
Jonathon Mah <jmah@me.com> writes:
> No, I haven't audited the code paths (I have only the loosest
> familiarity with the source). Indeed, I found that clearing the
> 'parsed' flag in fsck.c (traverse_one_object()) is incorrect and
> causes test failures.
>
> With the object cache, isn't modifying the object unsafe in
> general? Instead of auditing code paths, it's now necessary to
> audit _all_ code that uses "struct object", which seems
> infeasible.
The object layer was designed with "run one thing and one thing
well, and then let the _exit(2) take care of the clean-up" model in
mind; modifying the object, e.g. updating commit->parents list,
in-core by revision traversal machinery is very much within the
scope of its intended uses.
> I'm just trying to fix the segfault demonstrated in the test
> attached to the patch.
Can offending readers that dereference NULL without checking if
buffer has been freed be updated so that they read_sha1_file(), read
the contents from the result returned from the function (instead of
reading from .buffer), and free the memory when they are done?
That would be a fix that would be very much isolated and easy to
audit.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-24 0:25 ` Junio C Hamano
@ 2013-01-24 6:33 ` Jeff King
2013-01-24 16:51 ` Junio C Hamano
0 siblings, 1 reply; 9+ messages in thread
From: Jeff King @ 2013-01-24 6:33 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Jonathon Mah, git
On Wed, Jan 23, 2013 at 04:25:01PM -0800, Junio C Hamano wrote:
> > With the object cache, isn't modifying the object unsafe in
> > general? Instead of auditing code paths, it's now necessary to
> > audit _all_ code that uses "struct object", which seems
> > infeasible.
>
> The object layer was designed with "run one thing and one thing
> well, and then let the _exit(2) take care of the clean-up" model in
> mind; modifying the object, e.g. updating commit->parents list,
> in-core by revision traversal machinery is very much within the
> scope of its intended uses.
Yeah, although I think the "parsed" flag really is a bit overloaded for
commits.
Most code which uses "struct commit" will want the parents, timestamp,
and tree information filled in. And so they call parse_commit to make
sure that is the case. Afterwards, those fields are valid and the
"parsed" flag is set. The buffer field may or may not be valid
afterwards, depending on save_commit_buffer.
Some other code may also care about the rest of the commit information.
It also calls parse_commit, but with save_commit_buffer turned on.
However, that may or may not actually initialize the buffer, depending
on who has been using the object before us.
It works in practice most of the time because we only perform one "task"
per invocation, and that task either keeps the commit messages around
the first time, or doesn't. But it is a bit of a ticking time bomb
anytime we violate that assumption.
I think it would be saner for callers who only care about ancestry info
call to call parse_commit, and then anybody who is about to access the
buffer to call a new ensure_commit_buffer function, which would make
sure the buffer is accessible (even if save_commit_buffer is false).
Then save_commit_buffer becomes just an optimization: save it for later
during parsing, so we don't have to do it later. That would also let us
optimize better if we end up with a commit ancestry cache which can do
parse_commit much cheaper than accessing the full object.
For example, my commit cache patches hook into parse_commit, but they
can only do so safely when save_commit_buffer is false. So git-rev-list
benefits, but git-log does not. However, if we knew that log would
lazily load the commit data when needed, we could use the cache
there, too. It would not be a win if you are showing every commit
anyway, but if you have limiting that does not depend on the commit
object itself (e.g., path limiting in the tree), you could avoid loading
some commits entirely.
> > I'm just trying to fix the segfault demonstrated in the test
> > attached to the patch.
>
> Can offending readers that dereference NULL without checking if
> buffer has been freed be updated so that they read_sha1_file(), read
> the contents from the result returned from the function (instead of
> reading from .buffer), and free the memory when they are done?
Looks like builtin/blame.c:get_commit_info does this already, although
it leaves the buffer attached to the commit and does not free it.
The ensure_commit_buffer function could look something like:
int ensure_commit_buffer(struct commit *item)
{
enum object_type type;
unsigned long size;
if (!item)
return -1;
if (!item->object.parsed)
return parse_commit(item);
if (item->buffer)
return 0;
item->buffer = read_sha1_file(item->object.sha1, &type, &size);
if (!item->buffer)
return error("Could not read %s",
sha1_to_hex(item->object.sha1);
return 0;
}
and blame could replace its in-line processing with that. It does leave
open the question of whether callers should free() the result. But that
would be up to each user of the object (and it would be an optimization
issue, not a correctness issue, as long as each user called
ensure_commit_buffer before accessing it).
But the first step there would be to audit all of the accesses of
commit->buffer to make sure that they call ensure_commit_buffer
(presumably they already call parse_commit).
-Peff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-24 6:33 ` Jeff King
@ 2013-01-24 16:51 ` Junio C Hamano
0 siblings, 0 replies; 9+ messages in thread
From: Junio C Hamano @ 2013-01-24 16:51 UTC (permalink / raw)
To: Jeff King; +Cc: Jonathon Mah, git
Jeff King <peff@peff.net> writes:
> The ensure_commit_buffer function could look something like:
>
> int ensure_commit_buffer(struct commit *item)
> {
> enum object_type type;
> unsigned long size;
>
> if (!item)
> return -1;
> if (!item->object.parsed)
> return parse_commit(item);
> if (item->buffer)
> return 0;
>
> item->buffer = read_sha1_file(item->object.sha1, &type, &size);
> if (!item->buffer)
> return error("Could not read %s",
> sha1_to_hex(item->object.sha1);
> return 0;
> }
The more important issue is when to release them.
Only the subcommands that know that what they are doing do not need
to access commit->buffer and they operate solely on parsed data are
suppposed to flip the save_commit_buffer bit off to reclaim the
memory early, so new callers that violate that assumption may want
to call ensure_*, but if they did so when somebody cares about the
memory pressure enough to turn the bit off need to free the buffer
themselves after they use it. Otherwise the resulting program may
still be "correct" but we will end up keeping buffer that nobody
will use, just in case it is asked again.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-23 21:25 [PATCH] parse_object: clear "parsed" when freeing buffers Jonathon Mah
2013-01-23 22:19 ` Junio C Hamano
@ 2013-01-24 7:07 ` Jeff King
2013-01-24 21:16 ` Jonathon Mah
2013-01-24 23:27 ` Jonathon Mah
1 sibling, 2 replies; 9+ messages in thread
From: Jeff King @ 2013-01-24 7:07 UTC (permalink / raw)
To: Jonathon Mah; +Cc: Junio C Hamano, git
On Wed, Jan 23, 2013 at 01:25:04PM -0800, Jonathon Mah wrote:
> Several areas of code would free buffers for object structs that
> contained them ("struct tree" and "struct commit"), but without clearing
> the "parsed" flag. parse_object would clear the flag for "struct tree",
> but commits would remain in an invalid state (marked as parsed but with
> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
> invalid objects stay around and can lead to bad behavior.
>
> In particular, running pickaxe on all refs in a repository with a cached
> textconv could segfault. If the textconv cache ref was evaluated first
> by cmd_log_walk, a subsequent notes_cache_match_validity call would
> dereference NULL.
Just to be sure I understand, what is going on is something like this?
Log reads all refs, including notes. After showing the notes commit,
log frees the buffer to save memory. Later, doing the diff on a
commit causes us to use the notes_cache mechanism. That will look at
the commit at the tip of the notes ref, which log has already
processed. It will call parse_commit, but that will do nothing, as the
parsed flag is already set, but the commit buffer remains NULL.
I wonder if this could be the source of the segfault here, too:
http://thread.gmane.org/gmane.comp.version-control.git/214322/focus=214355
If you have refs pointing to both a submodule and its superproject, a
"log --all --submodule -p" might process the submodule's commits first,
free their buffers, and then want to show them again as part of the
submodule diff. This code in builtin/log.c:
if (!rev->reflog_info) {
/* we allow cycles in reflog ancestry */
free(commit->buffer);
commit->buffer = NULL;
}
assumes we will only ever look at a commit once (except for
reflog-walking), but submodule diff violates that.
-Peff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-24 7:07 ` Jeff King
@ 2013-01-24 21:16 ` Jonathon Mah
2013-01-24 23:27 ` Jonathon Mah
1 sibling, 0 replies; 9+ messages in thread
From: Jonathon Mah @ 2013-01-24 21:16 UTC (permalink / raw)
To: Jeff King; +Cc: Junio C Hamano, git
On 2013-01-23, at 23:07, Jeff King <peff@peff.net> wrote:
> On Wed, Jan 23, 2013 at 01:25:04PM -0800, Jonathon Mah wrote:
>
>> Several areas of code would free buffers for object structs that
>> contained them ("struct tree" and "struct commit"), but without clearing
>> the "parsed" flag. parse_object would clear the flag for "struct tree",
>> but commits would remain in an invalid state (marked as parsed but with
>> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
>> invalid objects stay around and can lead to bad behavior.
>>
>> In particular, running pickaxe on all refs in a repository with a cached
>> textconv could segfault. If the textconv cache ref was evaluated first
>> by cmd_log_walk, a subsequent notes_cache_match_validity call would
>> dereference NULL.
>
> Just to be sure I understand, what is going on is something like this?
>
> Log reads all refs, including notes. After showing the notes commit,
> log frees the buffer to save memory. Later, doing the diff on a
> commit causes us to use the notes_cache mechanism. That will look at
> the commit at the tip of the notes ref, which log has already
> processed. It will call parse_commit, but that will do nothing, as the
> parsed flag is already set, but the commit buffer remains NULL.
>
> I wonder if this could be the source of the segfault here, too:
>
> http://thread.gmane.org/gmane.comp.version-control.git/214322/focus=214355
Indeed, that description matches what I see. The other segfault seems to be in the same place too.
The segfault hits with the following stack (optimization off):
0 git-log parse_commit_header + 39 (pretty.c:738)
1 git-log format_commit_one + 3102 (pretty.c:1138)
2 git-log format_commit_item + 177 (pretty.c:1203)
3 git-log strbuf_expand + 172 (strbuf.c:247)
4 git-log format_commit_message + 196 (pretty.c:1263)
5 git-log notes_cache_match_validity + 215 (notes-cache.c:22)
6 git-log notes_cache_init + 212 (notes-cache.c:41)
7 git-log userdiff_get_textconv + 176 (userdiff.c:301)
8 git-log get_textconv + 66 (diff.c:2233)
9 git-log has_changes + 53 (diffcore-pickaxe.c:205)
10 git-log pickaxe + 299 (diffcore-pickaxe.c:40)
11 git-log diffcore_pickaxe_count + 344 (diffcore-pickaxe.c:275)
12 git-log diffcore_pickaxe + 58 (diffcore-pickaxe.c:289)
13 git-log diffcore_std + 162 (diff.c:4673)
14 git-log log_tree_diff_flush + 43 (log-tree.c:721)
15 git-log log_tree_diff + 566 (log-tree.c:826)
16 git-log log_tree_commit + 63 (log-tree.c:847)
17 git-log cmd_log_walk + 172 (log.c:301)
18 git-log cmd_log + 186 (log.c:568)
19 git-log run_builtin + 475 (git.c:273)
20 git-log handle_internal_command + 199 (git.c:434)
21 git-log main + 149 (git.c:523)
22 libdyld.dylib start + 1
commit->message was freed and nulled earlier in a call to cmd_log_walk. This test reproduces it reliably on my machine:
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 91f8198..d7e26ca 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,15 @@ test_expect_success 'switching diff driver produces correct results' '
test_cmp expect actual
'
+cat >expect <<EOF
+./helper other (refs/notes/textconv/magic)
+one
+EOF
+# add empty commit on master to make bug more reproducible
+test_expect_success 'pickaxe with cached textconv' '
+ git commit --allow-empty -m another &&
+ git log -S other --pretty=tformat:%s%d --all >actual &&
+ test_cmp expect actual
+'
+
test_done
Jonathon Mah
me@JonathonMah.com
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] parse_object: clear "parsed" when freeing buffers
2013-01-24 7:07 ` Jeff King
2013-01-24 21:16 ` Jonathon Mah
@ 2013-01-24 23:27 ` Jonathon Mah
1 sibling, 0 replies; 9+ messages in thread
From: Jonathon Mah @ 2013-01-24 23:27 UTC (permalink / raw)
To: Jeff King; +Cc: Junio C Hamano, git
On 2013-01-23, at 23:07, Jeff King <peff@peff.net> wrote:
> On Wed, Jan 23, 2013 at 01:25:04PM -0800, Jonathon Mah wrote:
>
>> Several areas of code would free buffers for object structs that
>> contained them ("struct tree" and "struct commit"), but without clearing
>> the "parsed" flag. parse_object would clear the flag for "struct tree",
>> but commits would remain in an invalid state (marked as parsed but with
>> a NULL buffer). Because the objects are uniqued (ccdc6037fee), the
>> invalid objects stay around and can lead to bad behavior.
>>
>> In particular, running pickaxe on all refs in a repository with a cached
>> textconv could segfault. If the textconv cache ref was evaluated first
>> by cmd_log_walk, a subsequent notes_cache_match_validity call would
>> dereference NULL.
>
> Just to be sure I understand, what is going on is something like this?
>
> Log reads all refs, including notes. After showing the notes commit,
> log frees the buffer to save memory. Later, doing the diff on a
> commit causes us to use the notes_cache mechanism. That will look at
> the commit at the tip of the notes ref, which log has already
> processed. It will call parse_commit, but that will do nothing, as the
> parsed flag is already set, but the commit buffer remains NULL.
>
> I wonder if this could be the source of the segfault here, too:
>
> http://thread.gmane.org/gmane.comp.version-control.git/214322/focus=214355
Indeed, that description matches what I see. The other segfault seems to be in the same place too.
The segfault hits with the following stack (optimization off):
0 git-log parse_commit_header + 39 (pretty.c:738)
1 git-log format_commit_one + 3102 (pretty.c:1138)
2 git-log format_commit_item + 177 (pretty.c:1203)
3 git-log strbuf_expand + 172 (strbuf.c:247)
4 git-log format_commit_message + 196 (pretty.c:1263)
5 git-log notes_cache_match_validity + 215 (notes-cache.c:22)
6 git-log notes_cache_init + 212 (notes-cache.c:41)
7 git-log userdiff_get_textconv + 176 (userdiff.c:301)
8 git-log get_textconv + 66 (diff.c:2233)
9 git-log has_changes + 53 (diffcore-pickaxe.c:205)
10 git-log pickaxe + 299 (diffcore-pickaxe.c:40)
11 git-log diffcore_pickaxe_count + 344 (diffcore-pickaxe.c:275)
12 git-log diffcore_pickaxe + 58 (diffcore-pickaxe.c:289)
13 git-log diffcore_std + 162 (diff.c:4673)
14 git-log log_tree_diff_flush + 43 (log-tree.c:721)
15 git-log log_tree_diff + 566 (log-tree.c:826)
16 git-log log_tree_commit + 63 (log-tree.c:847)
17 git-log cmd_log_walk + 172 (log.c:301)
18 git-log cmd_log + 186 (log.c:568)
19 git-log run_builtin + 475 (git.c:273)
20 git-log handle_internal_command + 199 (git.c:434)
21 git-log main + 149 (git.c:523)
22 libdyld.dylib start + 1
commit->message was freed and nulled earlier in a call to cmd_log_walk. This test reproduces it reliably on my machine:
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 91f8198..d7e26ca 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,15 @@ test_expect_success 'switching diff driver produces correct results' '
test_cmp expect actual
'
+cat >expect <<EOF
+./helper other (refs/notes/textconv/magic)
+one
+EOF
+# add empty commit on master to make bug more reproducible
+test_expect_success 'pickaxe with cached textconv' '
+ git commit --allow-empty -m another &&
+ git log -S other --pretty=tformat:%s%d --all >actual &&
+ test_cmp expect actual
+'
+
test_done
Jonathon Mah
me@JonathonMah.com
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2013-01-24 23:28 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-23 21:25 [PATCH] parse_object: clear "parsed" when freeing buffers Jonathon Mah
2013-01-23 22:19 ` Junio C Hamano
2013-01-23 23:36 ` Jonathon Mah
2013-01-24 0:25 ` Junio C Hamano
2013-01-24 6:33 ` Jeff King
2013-01-24 16:51 ` Junio C Hamano
2013-01-24 7:07 ` Jeff King
2013-01-24 21:16 ` Jonathon Mah
2013-01-24 23:27 ` Jonathon Mah
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).