git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFH] unpack-trees: cache_entry lifetime issue?
@ 2012-03-02 17:25 René Scharfe
  2012-03-02 19:17 ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: René Scharfe @ 2012-03-02 17:25 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

Hello,

below is the output of "git grep -hnW unpack_nondirectories",
which shows some parts of unpack-trees.c that I use as context to
ask: Should we check for o->merge in line 775, before using src[0]?

If o->merge is 0, the src[0] will be NULL right up to the call of
unpack_nondirectories() in line 772.  There it may be set (in line
582).  In that case we'll end up at line 779, where mark_ce_used()
is applied to it.

I suspect that this is unintended and that line 775 should rather
read "if (o->merge && src[0]) {".  Can someone with a better
understanding of unpack-trees confirm or refute that suspicion?

Thanks,
René


542:static int unpack_nondirectories(int n, unsigned long mask,
543-				 unsigned long dirmask,
544-				 struct cache_entry **src,
545-				 const struct name_entry *names,
546-				 const struct traverse_info *info)
547-{
548-	int i;
549-	struct unpack_trees_options *o = info->data;
550-	unsigned long conflicts;
551-
552-	/* Do we have *only* directories? Nothing to do */
553-	if (mask == dirmask && !src[0])
554-		return 0;
555-
556-	conflicts = info->conflicts;
557-	if (o->merge)
558-		conflicts >>= 1;
559-	conflicts |= dirmask;
560-
561-	/*
562-	 * Ok, we've filled in up to any potential index entry in src[0],
563-	 * now do the rest.
564-	 */
565-	for (i = 0; i < n; i++) {
566-		int stage;
567-		unsigned int bit = 1ul << i;
568-		if (conflicts & bit) {
569-			src[i + o->merge] = o->df_conflict_entry;
570-			continue;
571-		}
572-		if (!(mask & bit))
573-			continue;
574-		if (!o->merge)
575-			stage = 0;
576-		else if (i + 1 < o->head_idx)
577-			stage = 1;
578-		else if (i + 1 > o->head_idx)
579-			stage = 3;
580-		else
581-			stage = 2;

582-		src[i + o->merge] = create_ce_entry(info, names + i, stage);

583-	}
584-
585-	if (o->merge)
586-		return call_unpack_fn(src, o);
587-
588-	for (i = 0; i < n; i++)
589-		if (src[i] && src[i] != o->df_conflict_entry)
590-			add_entry(o, src[i], 0, 0);
591-	return 0;
592-}
593-
--
721=static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
722-{
723-	struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
724-	struct unpack_trees_options *o = info->data;
725-	const struct name_entry *p = names;
726-
727-	/* Find first entry with a real name (we could use "mask" too) */
728-	while (!p->mode)
729-		p++;
730-
731-	if (o->debug_unpack)
732-		debug_unpack_callback(n, mask, dirmask, names, info);
733-
734-	/* Are we supposed to look at the index too? */
735-	if (o->merge) {
736-		while (1) {
737-			int cmp;
738-			struct cache_entry *ce;
739-
740-			if (o->diff_index_cached)
741-				ce = next_cache_entry(o);
742-			else
743-				ce = find_cache_entry(info, p);
744-
745-			if (!ce)
746-				break;
747-			cmp = compare_entry(ce, info, p);
748-			if (cmp < 0) {
749-				if (unpack_index_entry(ce, o) < 0)
750-					return unpack_failed(o, NULL);
751-				continue;
752-			}
753-			if (!cmp) {
754-				if (ce_stage(ce)) {
755-					/*
756-					 * If we skip unmerged index
757-					 * entries, we'll skip this
758-					 * entry *and* the tree
759-					 * entries associated with it!
760-					 */
761-					if (o->skip_unmerged) {
762-						add_same_unmerged(ce, o);
763-						return mask;
764-					}
765-				}
766-				src[0] = ce;
767-			}
768-			break;
769-		}
770-	}
771-

772:	if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
773-		return -1;
774-
775-	if (src[0]) {
776-		if (ce_stage(src[0]))
777-			mark_ce_used_same_name(src[0], o);
778-		else
779-			mark_ce_used(src[0], o);
780-	}

781-
782-	/* Now handle any directories.. */
783-	if (dirmask) {
784-		unsigned long conflicts = mask & ~dirmask;
785-		if (o->merge) {
786-			conflicts <<= 1;
787-			if (src[0])
788-				conflicts |= 1;
789-		}
790-
791-		/* special case: "diff-index --cached" looking at a tree */
792-		if (o->diff_index_cached &&
793-		    n == 1 && dirmask == 1 && S_ISDIR(names->mode)) {
794-			int matches;
795-			matches = cache_tree_matches_traversal(o->src_index->cache_tree,
796-							       names, info);
797-			/*
798-			 * Everything under the name matches; skip the
799-			 * entire hierarchy.  diff_index_cached codepath
800-			 * special cases D/F conflicts in such a way that
801-			 * it does not do any look-ahead, so this is safe.
802-			 */
803-			if (matches) {
804-				o->cache_bottom += matches;
805-				return mask;
806-			}
807-		}
808-
809-		if (traverse_trees_recursive(n, dirmask, conflicts,
810-					     names, info) < 0)
811-			return -1;
812-		return mask;
813-	}
814-
815-	return mask;
816-}
817-

PS: These functions are a tad too long, aren't they? ;-)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFH] unpack-trees: cache_entry lifetime issue?
  2012-03-02 17:25 [RFH] unpack-trees: cache_entry lifetime issue? René Scharfe
@ 2012-03-02 19:17 ` Junio C Hamano
  2012-03-03 14:14   ` René Scharfe
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2012-03-02 19:17 UTC (permalink / raw)
  To: René Scharfe; +Cc: git, Nguyễn Thái Ngọc Duy

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> which shows some parts of unpack-trees.c that I use as context to
> ask: Should we check for o->merge in line 775, before using src[0]?
>
> If o->merge is 0, the src[0] will be NULL right up to the call of
> unpack_nondirectories() in line 772.  There it may be set (in line
> 582).  In that case we'll end up at line 779, where mark_ce_used()
> is applied to it.
>
> I suspect that this is unintended and that line 775 should rather
> read "if (o->merge && src[0]) {".  Can someone with a better
> understanding of unpack-trees confirm or refute that suspicion?

Yeah, src[0] is meant to hold the entry from the current index to take it
as well as our tree into account during o->merge, and I do not think it
should affect when we are only reading tree(s) into the index.

I think da165f4 (unpack-trees.c: prepare for looking ahead in the index,
2010-01-07) simply forgot that the codepath also has to work when it is
not merging.

Having said that, I do not know offhand if we just should nothing in
no-merge case, or we should be doing something else instead, without
thinking a bit more.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFH] unpack-trees: cache_entry lifetime issue?
  2012-03-02 19:17 ` Junio C Hamano
@ 2012-03-03 14:14   ` René Scharfe
  2012-03-05 22:01     ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: René Scharfe @ 2012-03-03 14:14 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Nguyễn Thái Ngọc Duy

Am 02.03.2012 20:17, schrieb Junio C Hamano:
> René Scharfe<rene.scharfe@lsrfire.ath.cx>  writes:
> 
>> which shows some parts of unpack-trees.c that I use as context to
>> ask: Should we check for o->merge in line 775, before using src[0]?
>>
>> If o->merge is 0, the src[0] will be NULL right up to the call of
>> unpack_nondirectories() in line 772.  There it may be set (in line
>> 582).  In that case we'll end up at line 779, where mark_ce_used()
>> is applied to it.
>>
>> I suspect that this is unintended and that line 775 should rather
>> read "if (o->merge&&  src[0]) {".  Can someone with a better
>> understanding of unpack-trees confirm or refute that suspicion?
> 
> Yeah, src[0] is meant to hold the entry from the current index to take it
> as well as our tree into account during o->merge, and I do not think it
> should affect when we are only reading tree(s) into the index.
> 
> I think da165f4 (unpack-trees.c: prepare for looking ahead in the index,
> 2010-01-07) simply forgot that the codepath also has to work when it is
> not merging.
> 
> Having said that, I do not know offhand if we just should nothing in
> no-merge case, or we should be doing something else instead, without
> thinking a bit more.

Thanks.

Next question: Should the function fn() in struct unpack_trees_options
be able to replace src[0], and unpack_callback() is then supposed to
use the new pointer after calling unpack_nondirectories()?  If not
then we can clean up things a bit by moving the src array into
unpack_nondirectories().

For now, just this patch, which cleans up memory, but not the code:

-- >8 --
Subject: unpack-trees: plug minor memory leak

The allocations made by unpack_nondirectories() using create_ce_entry()
are never freed.  In the case of a merge, we hand them to
call_unpack_fn() and never look at them again.  In the non-merge case,
we duplicate them using add_entry() and later only look at the first
allocated element (src[0]), perhaps even only by mistake.

To clean up after ourselves, explicitly loop through the entries and
free their memory for merges.  For non-merges, split out the actual
addition from add_entry() into the new helper do_add_entry().  Then
call that non-duplicating function instead of add_entry() to avoid the
leak.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 unpack-trees.c |   35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 7c9ecf6..c594e4a 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -102,21 +102,28 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
 		opts->unpack_rejects[i].strdup_strings = 1;
 }
 
-static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
-	unsigned int set, unsigned int clear)
+static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
+			 unsigned int set, unsigned int clear)
 {
-	unsigned int size = ce_size(ce);
-	struct cache_entry *new = xmalloc(size);
-
 	clear |= CE_HASHED | CE_UNHASHED;
 
 	if (set & CE_REMOVE)
 		set |= CE_WT_REMOVE;
 
+	ce->next = NULL;
+	ce->ce_flags = (ce->ce_flags & ~clear) | set;
+	add_index_entry(&o->result, ce,
+			ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
+}
+
+static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
+	unsigned int set, unsigned int clear)
+{
+	unsigned int size = ce_size(ce);
+	struct cache_entry *new = xmalloc(size);
+
 	memcpy(new, ce, size);
-	new->next = NULL;
-	new->ce_flags = (new->ce_flags & ~clear) | set;
-	add_index_entry(&o->result, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
+	do_add_entry(o, new, set, clear);
 }
 
 /*
@@ -582,12 +589,18 @@ static int unpack_nondirectories(int n, unsigned long mask,
 		src[i + o->merge] = create_ce_entry(info, names + i, stage);
 	}
 
-	if (o->merge)
-		return call_unpack_fn(src, o);
+	if (o->merge) {
+		int rc = call_unpack_fn(src, o);
+		for (i = 0; i < n; i++) {
+			if (src[i + 1] != o->df_conflict_entry)
+				free(src[i + 1]);
+		}
+		return rc;
+	}
 
 	for (i = 0; i < n; i++)
 		if (src[i] && src[i] != o->df_conflict_entry)
-			add_entry(o, src[i], 0, 0);
+			do_add_entry(o, src[i], 0, 0);
 	return 0;
 }
 
-- 
1.7.9.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFH] unpack-trees: cache_entry lifetime issue?
  2012-03-03 14:14   ` René Scharfe
@ 2012-03-05 22:01     ` Junio C Hamano
  2012-03-06 19:37       ` René Scharfe
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2012-03-05 22:01 UTC (permalink / raw)
  To: René Scharfe; +Cc: git, Nguyễn Thái Ngọc Duy

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> Next question: Should the function fn() in struct unpack_trees_options
> be able to replace src[0], and unpack_callback() is then supposed to
> use the new pointer after calling unpack_nondirectories()?  If not
> then we can clean up things a bit by moving the src array into
> unpack_nondirectories().

Sorry, I have no idea.  What kind of usage pattern do you have in
mind?

> For now, just this patch, which cleans up memory, but not the code:
>
> -- >8 --
> Subject: unpack-trees: plug minor memory leak
>
> The allocations made by unpack_nondirectories() using create_ce_entry()
> are never freed.  In the case of a merge, we hand them to
> call_unpack_fn() and never look at them again.

That assumes that whatever callbacks that are called will only look
at but never takes ownership of the cache entry given to them.  I
*think* everybody eventually calls "add_entry()" that duplicates the
cache entry before storing it to the index, but I didn't go through
all the codepaths.  Assuming you did, I think this is a good change.

> In the non-merge case,
> we duplicate them using add_entry() and later only look at the first
> allocated element (src[0]), perhaps even only by mistake.
>
> To clean up after ourselves, explicitly loop through the entries and
> free their memory for merges.  For non-merges, split out the actual
> addition from add_entry() into the new helper do_add_entry().  Then
> call that non-duplicating function instead of add_entry() to avoid the
> leak.
>
> Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
> ---
>  unpack-trees.c |   35 ++++++++++++++++++++++++-----------
>  1 file changed, 24 insertions(+), 11 deletions(-)
>
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 7c9ecf6..c594e4a 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -102,21 +102,28 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
>  		opts->unpack_rejects[i].strdup_strings = 1;
>  }
>  
> -static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
> -	unsigned int set, unsigned int clear)
> +static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
> +			 unsigned int set, unsigned int clear)
>  {
> -	unsigned int size = ce_size(ce);
> -	struct cache_entry *new = xmalloc(size);
> -
>  	clear |= CE_HASHED | CE_UNHASHED;
>  
>  	if (set & CE_REMOVE)
>  		set |= CE_WT_REMOVE;
>  
> +	ce->next = NULL;
> +	ce->ce_flags = (ce->ce_flags & ~clear) | set;
> +	add_index_entry(&o->result, ce,
> +			ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
> +}
> +
> +static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
> +	unsigned int set, unsigned int clear)
> +{
> +	unsigned int size = ce_size(ce);
> +	struct cache_entry *new = xmalloc(size);
> +
>  	memcpy(new, ce, size);
> -	new->next = NULL;
> -	new->ce_flags = (new->ce_flags & ~clear) | set;
> -	add_index_entry(&o->result, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
> +	do_add_entry(o, new, set, clear);
>  }
>  
>  /*
> @@ -582,12 +589,18 @@ static int unpack_nondirectories(int n, unsigned long mask,
>  		src[i + o->merge] = create_ce_entry(info, names + i, stage);
>  	}
>  
> -	if (o->merge)
> -		return call_unpack_fn(src, o);
> +	if (o->merge) {
> +		int rc = call_unpack_fn(src, o);
> +		for (i = 0; i < n; i++) {
> +			if (src[i + 1] != o->df_conflict_entry)
> +				free(src[i + 1]);
> +		}
> +		return rc;
> +	}
>  
>  	for (i = 0; i < n; i++)
>  		if (src[i] && src[i] != o->df_conflict_entry)
> -			add_entry(o, src[i], 0, 0);
> +			do_add_entry(o, src[i], 0, 0);
>  	return 0;
>  }

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFH] unpack-trees: cache_entry lifetime issue?
  2012-03-05 22:01     ` Junio C Hamano
@ 2012-03-06 19:37       ` René Scharfe
  0 siblings, 0 replies; 5+ messages in thread
From: René Scharfe @ 2012-03-06 19:37 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Nguyễn Thái Ngọc Duy

Am 05.03.2012 23:01, schrieb Junio C Hamano:
> René Scharfe<rene.scharfe@lsrfire.ath.cx>  writes:
> 
>> Next question: Should the function fn() in struct unpack_trees_options
>> be able to replace src[0], and unpack_callback() is then supposed to
>> use the new pointer after calling unpack_nondirectories()?  If not
>> then we can clean up things a bit by moving the src array into
>> unpack_nondirectories().
> 
> Sorry, I have no idea.  What kind of usage pattern do you have in
> mind?

Well, I can't imagine a merge callback that needs this ability, but
that really doesn't mean much.  Just want to avoid taking away options
that turn out to be useful later.

>> For now, just this patch, which cleans up memory, but not the code:
>>
>> -- >8 --
>> Subject: unpack-trees: plug minor memory leak
>>
>> The allocations made by unpack_nondirectories() using create_ce_entry()
>> are never freed.  In the case of a merge, we hand them to
>> call_unpack_fn() and never look at them again.
> 
> That assumes that whatever callbacks that are called will only look
> at but never takes ownership of the cache entry given to them.  I
> *think* everybody eventually calls "add_entry()" that duplicates the
> cache entry before storing it to the index, but I didn't go through
> all the codepaths.  Assuming you did, I think this is a good change.

I did that a while back and while doing that again just now with
tired eyes after a day's work noticed how deep the call chains are
and that it's certainly possible that I missed a place that assumed
it owned the cache entry.

For increased confidence, perhaps it's better to first patch the
non-merge leak, then clean up and const'ify the merge case and only
then free().  Or to declare that merge functions can take ownership
of the cache entries.  Timid patch for the first part below.

-- >8 --
Subject: unpack-trees: plug minor memory leak

The allocations made by unpack_nondirectories() using create_ce_entry()
are never freed.

In the non-merge case, we duplicate them using add_entry() and later
only look at the first allocated element (src[0]), perhaps even only
by mistake.  Split out the actual addition from add_entry() into the
new helper do_add_entry() and call this non-duplicating function
instead of add_entry() to avoid the leak.

Valgrind reports this for the command "git archive v1.7.9" without
the patch:

  ==13372== LEAK SUMMARY:
  ==13372==    definitely lost: 230,986 bytes in 2,325 blocks
  ==13372==    indirectly lost: 0 bytes in 0 blocks
  ==13372==      possibly lost: 98 bytes in 1 blocks
  ==13372==    still reachable: 2,259,198 bytes in 3,243 blocks
  ==13372==         suppressed: 0 bytes in 0 blocks

And with the patch applied:

  ==13375== LEAK SUMMARY:
  ==13375==    definitely lost: 65 bytes in 1 blocks
  ==13375==    indirectly lost: 0 bytes in 0 blocks
  ==13375==      possibly lost: 0 bytes in 0 blocks
  ==13375==    still reachable: 2,364,417 bytes in 3,245 blocks
  ==13375==         suppressed: 0 bytes in 0 blocks

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 unpack-trees.c |   25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 7c9ecf6..f9c74bd 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -102,21 +102,28 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
 		opts->unpack_rejects[i].strdup_strings = 1;
 }
 
-static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
-	unsigned int set, unsigned int clear)
+static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
+			 unsigned int set, unsigned int clear)
 {
-	unsigned int size = ce_size(ce);
-	struct cache_entry *new = xmalloc(size);
-
 	clear |= CE_HASHED | CE_UNHASHED;
 
 	if (set & CE_REMOVE)
 		set |= CE_WT_REMOVE;
 
+	ce->next = NULL;
+	ce->ce_flags = (ce->ce_flags & ~clear) | set;
+	add_index_entry(&o->result, ce,
+			ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
+}
+
+static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
+	unsigned int set, unsigned int clear)
+{
+	unsigned int size = ce_size(ce);
+	struct cache_entry *new = xmalloc(size);
+
 	memcpy(new, ce, size);
-	new->next = NULL;
-	new->ce_flags = (new->ce_flags & ~clear) | set;
-	add_index_entry(&o->result, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
+	do_add_entry(o, new, set, clear);
 }
 
 /*
@@ -587,7 +594,7 @@ static int unpack_nondirectories(int n, unsigned long mask,
 
 	for (i = 0; i < n; i++)
 		if (src[i] && src[i] != o->df_conflict_entry)
-			add_entry(o, src[i], 0, 0);
+			do_add_entry(o, src[i], 0, 0);
 	return 0;
 }
 
-- 
1.7.9.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-03-06 19:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-02 17:25 [RFH] unpack-trees: cache_entry lifetime issue? René Scharfe
2012-03-02 19:17 ` Junio C Hamano
2012-03-03 14:14   ` René Scharfe
2012-03-05 22:01     ` Junio C Hamano
2012-03-06 19:37       ` René Scharfe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).