git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/1]  merge: fix cache_entry use-after-free
@ 2015-10-08 18:47 David Turner
  2015-10-08 18:47 ` David Turner
  0 siblings, 1 reply; 5+ messages in thread
From: David Turner @ 2015-10-08 18:47 UTC (permalink / raw)
  To: git; +Cc: David Turner

Keith diagnosed the problem and wrote the patch.  I wrote the commit
message and am sending it upstream with his OK.

Keith McGuigan (1):
  merge: fix cache_entry use-after-free

 cache.h        | 27 +++++++++++++++++++++++++++
 name-hash.c    |  7 ++++++-
 read-cache.c   |  5 ++++-
 split-index.c  | 12 +++++++-----
 unpack-trees.c |  6 ++++--
 5 files changed, 48 insertions(+), 9 deletions(-)

-- 
2.4.2.644.g97b850b-twtrsrc

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 0/1] merge: fix cache_entry use-after-free
  2015-10-08 18:47 [PATCH 0/1] merge: fix cache_entry use-after-free David Turner
@ 2015-10-08 18:47 ` David Turner
  2015-10-08 20:00   ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: David Turner @ 2015-10-08 18:47 UTC (permalink / raw)
  To: git; +Cc: Keith McGuigan, David Turner

From: Keith McGuigan <kmcguigan@twitter.com>

During merges, we would previously free entries that we no longer need
in the destination index.  But those entries might also be stored in
the dir_entry cache, and when a later call to add_to_index found them,
they would be used after being freed.

To prevent this, add a ref count for struct cache_entry.  Whenever
a cache entry is added to a data structure, the ref count is incremented;
when it is removed from the data structure, it is decremented.  When
it hits zero, the cache_entry is freed.

Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
---
 cache.h        | 27 +++++++++++++++++++++++++++
 name-hash.c    |  7 ++++++-
 read-cache.c   |  5 ++++-
 split-index.c  | 12 +++++++-----
 unpack-trees.c |  6 ++++--
 5 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/cache.h b/cache.h
index 752031e..7563658 100644
--- a/cache.h
+++ b/cache.h
@@ -149,6 +149,7 @@ struct stat_data {
 
 struct cache_entry {
 	struct hashmap_entry ent;
+	unsigned int ref_count; /* count the number of refs to this in dir_hash */
 	struct stat_data ce_stat_data;
 	unsigned int ce_mode;
 	unsigned int ce_flags;
@@ -213,6 +214,32 @@ struct cache_entry {
 struct pathspec;
 
 /*
+ * Increment the cache_entry reference count.  Should be called
+ * whenever a pointer to a cache_entry is retained in a data structure,
+ * thus marking it as alive.
+ */
+static inline void add_ce_ref(struct cache_entry *ce)
+{
+	assert(ce != NULL && ce->ref_count >= 0);
+	++ce->ref_count;
+}
+
+/*
+ * Decrement the cache_entry reference count.  Should be called whenever
+ * a pointer to a cache_entry is dropped.  Once the counter drops to 0
+ * the cache_entry memory will be safely freed.
+ */
+static inline void drop_ce_ref(struct cache_entry *ce)
+{
+	if (ce != NULL) {
+		assert(ce->ref_count >= 0);
+		if (--ce->ref_count < 1) {
+			free(ce);
+		}
+	}
+}
+
+/*
  * Copy the sha1 and stat state of a cache entry from one to
  * another. But we never change the name, or the hash state!
  */
diff --git a/name-hash.c b/name-hash.c
index 702cd05..f12c919 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -66,6 +66,7 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate,
 		dir = xcalloc(1, sizeof(struct dir_entry));
 		hashmap_entry_init(dir, memihash(ce->name, namelen));
 		dir->namelen = namelen;
+		add_ce_ref(ce);
 		dir->ce = ce;
 		hashmap_add(&istate->dir_hash, dir);
 
@@ -92,7 +93,9 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
 	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
 	while (dir && !(--dir->nr)) {
 		struct dir_entry *parent = dir->parent;
-		hashmap_remove(&istate->dir_hash, dir, NULL);
+		struct dir_entry *removed = hashmap_remove(&istate->dir_hash, dir, NULL);
+		assert(removed == dir);
+		drop_ce_ref(dir->ce);
 		free(dir);
 		dir = parent;
 	}
@@ -105,6 +108,7 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 	ce->ce_flags |= CE_HASHED;
 	hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));
 	hashmap_add(&istate->name_hash, ce);
+	add_ce_ref(ce);
 
 	if (ignore_case)
 		add_dir_entry(istate, ce);
@@ -147,6 +151,7 @@ void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
 		return;
 	ce->ce_flags &= ~CE_HASHED;
 	hashmap_remove(&istate->name_hash, ce, ce);
+	drop_ce_ref(ce);
 
 	if (ignore_case)
 		remove_dir_entry(istate, ce);
diff --git a/read-cache.c b/read-cache.c
index 87204a5..442de34 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -53,6 +53,7 @@ static const char *alternate_index_output;
 static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
 	istate->cache[nr] = ce;
+	add_ce_ref(ce);
 	add_name_hash(istate, ce);
 }
 
@@ -62,7 +63,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
 
 	replace_index_entry_in_base(istate, old, ce);
 	remove_name_hash(istate, old);
-	free(old);
+	drop_ce_ref(old);
 	set_index_entry(istate, nr, ce);
 	ce->ce_flags |= CE_UPDATE_IN_BASE;
 	istate->cache_changed |= CE_ENTRY_CHANGED;
@@ -75,6 +76,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 
 	new = xmalloc(cache_entry_size(namelen));
 	copy_cache_entry(new, old);
+	new->ref_count = 0;
 	new->ce_flags &= ~CE_HASHED;
 	new->ce_namelen = namelen;
 	new->index = 0;
@@ -1426,6 +1428,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 {
 	struct cache_entry *ce = xmalloc(cache_entry_size(len));
 
+	ce->ref_count = 0;
 	ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
 	ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
 	ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
diff --git a/split-index.c b/split-index.c
index 968b780..ad28f7b 100644
--- a/split-index.c
+++ b/split-index.c
@@ -124,7 +124,7 @@ static void replace_entry(size_t pos, void *data)
 	src->ce_flags |= CE_UPDATE_IN_BASE;
 	src->ce_namelen = dst->ce_namelen;
 	copy_cache_entry(dst, src);
-	free(src);
+	drop_ce_ref(src);
 	si->nr_replacements++;
 }
 
@@ -227,7 +227,7 @@ void prepare_to_write_split_index(struct index_state *istate)
 			base->ce_flags = base_flags;
 			if (ret)
 				ce->ce_flags |= CE_UPDATE_IN_BASE;
-			free(base);
+			drop_ce_ref(base);
 			si->base->cache[ce->index - 1] = ce;
 		}
 		for (i = 0; i < si->base->cache_nr; i++) {
@@ -302,7 +302,7 @@ void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce
 	    ce == istate->split_index->base->cache[ce->index - 1])
 		ce->ce_flags |= CE_REMOVE;
 	else
-		free(ce);
+		drop_ce_ref(ce);
 }
 
 void replace_index_entry_in_base(struct index_state *istate,
@@ -314,8 +314,10 @@ void replace_index_entry_in_base(struct index_state *istate,
 	    istate->split_index->base &&
 	    old->index <= istate->split_index->base->cache_nr) {
 		new->index = old->index;
-		if (old != istate->split_index->base->cache[new->index - 1])
-			free(istate->split_index->base->cache[new->index - 1]);
+		if (old != istate->split_index->base->cache[new->index - 1]) {
+			struct cache_entry *ce = istate->split_index->base->cache[new->index - 1];
+			drop_ce_ref(ce);
+		}
 		istate->split_index->base->cache[new->index - 1] = new;
 	}
 }
diff --git a/unpack-trees.c b/unpack-trees.c
index f932e80..1a0a637 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -606,8 +606,10 @@ static int unpack_nondirectories(int n, unsigned long mask,
 					o);
 		for (i = 0; i < n; i++) {
 			struct cache_entry *ce = src[i + o->merge];
-			if (ce != o->df_conflict_entry)
-				free(ce);
+			if (ce != o->df_conflict_entry) {
+				drop_ce_ref(ce);
+				src[i + o->merge] = NULL;
+			}
 		}
 		return rc;
 	}
-- 
2.4.2.644.g97b850b-twtrsrc

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/1] merge: fix cache_entry use-after-free
  2015-10-08 18:47 ` David Turner
@ 2015-10-08 20:00   ` Junio C Hamano
  2015-10-09 22:16     ` David Turner
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2015-10-08 20:00 UTC (permalink / raw)
  To: David Turner; +Cc: git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

> From: Keith McGuigan <kmcguigan@twitter.com>
>
> During merges, we would previously free entries that we no longer need
> in the destination index.  But those entries might also be stored in
> the dir_entry cache, and when a later call to add_to_index found them,
> they would be used after being freed.
>
> To prevent this, add a ref count for struct cache_entry.  Whenever
> a cache entry is added to a data structure, the ref count is incremented;
> when it is removed from the data structure, it is decremented.  When
> it hits zero, the cache_entry is freed.
>
> Signed-off-by: David Turner <dturner@twopensource.com>
> Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
> ---

Thanks.

> @@ -213,6 +214,32 @@ struct cache_entry {
>  struct pathspec;
>  
>  /*
> + * Increment the cache_entry reference count.  Should be called
> + * whenever a pointer to a cache_entry is retained in a data structure,
> + * thus marking it as alive.
> + */
> +static inline void add_ce_ref(struct cache_entry *ce)
> +{
> +	assert(ce != NULL && ce->ref_count >= 0);
> +	++ce->ref_count;
> +}

We tend to prefer post-increment when the distinction between pre-
or post- does not make any difference, which is the case here.

> diff --git a/name-hash.c b/name-hash.c
> index 702cd05..f12c919 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -92,7 +93,9 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
>  	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
>  	while (dir && !(--dir->nr)) {
>  		struct dir_entry *parent = dir->parent;
> -		hashmap_remove(&istate->dir_hash, dir, NULL);
> +		struct dir_entry *removed = hashmap_remove(&istate->dir_hash, dir, NULL);
> +		assert(removed == dir);
> +		drop_ce_ref(dir->ce);

This is curious.  In remove_name_hash() you do not have the
corresponding assert.  Why is it necessary here (or is it
unnecessary over there)?

> @@ -147,6 +151,7 @@ void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
>  		return;
>  	ce->ce_flags &= ~CE_HASHED;
>  	hashmap_remove(&istate->name_hash, ce, ce);
> +	drop_ce_ref(ce);
>  
>  	if (ignore_case)
>  		remove_dir_entry(istate, ce);
> diff --git a/read-cache.c b/read-cache.c
> index 87204a5..442de34 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -53,6 +53,7 @@ static const char *alternate_index_output;
>  static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
>  {
>  	istate->cache[nr] = ce;
> +	add_ce_ref(ce);
>  	add_name_hash(istate, ce);
>  }

What happens to the existing entry that used to be istate->cache[nr],
which may or may not be 'ce' that is replacing it?

It turns out that all three calling sites are safe, but it is not
immediately obvious why.  Perhaps some comment in front of the
function is in order, to warn those who may have to add a new caller
or restructure the existing calling chain, that istate->cache[nr] is
expected not to hold a live reference when the function is called,
or something?

> @@ -314,8 +314,10 @@ void replace_index_entry_in_base(struct index_state *istate,
>  	    istate->split_index->base &&
>  	    old->index <= istate->split_index->base->cache_nr) {
>  		new->index = old->index;
> -		if (old != istate->split_index->base->cache[new->index - 1])
> -			free(istate->split_index->base->cache[new->index - 1]);
> +		if (old != istate->split_index->base->cache[new->index - 1]) {
> +			struct cache_entry *ce = istate->split_index->base->cache[new->index - 1];
> +			drop_ce_ref(ce);
> +		}
>  		istate->split_index->base->cache[new->index - 1] = new;

Does 'new' already have the right refcount at this point?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/1] merge: fix cache_entry use-after-free
  2015-10-08 20:00   ` Junio C Hamano
@ 2015-10-09 22:16     ` David Turner
  2015-10-09 22:51       ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: David Turner @ 2015-10-09 22:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git mailing list, Duy Nguyen

+Duy Nguyen, who knows the split index better.

On Thu, 2015-10-08 at 13:00 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> > From: Keith McGuigan <kmcguigan@twitter.com>
> >
> > During merges, we would previously free entries that we no longer need
> > in the destination index.  But those entries might also be stored in
> > the dir_entry cache, and when a later call to add_to_index found them,
> > they would be used after being freed.
> >
> > To prevent this, add a ref count for struct cache_entry.  Whenever
> > a cache entry is added to a data structure, the ref count is incremented;
> > when it is removed from the data structure, it is decremented.  When
> > it hits zero, the cache_entry is freed.
> >
> > Signed-off-by: David Turner <dturner@twopensource.com>
> > Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
> > ---
> 
> Thanks.
> 
> > @@ -213,6 +214,32 @@ struct cache_entry {
> >  struct pathspec;
> >  
> >  /*
> > + * Increment the cache_entry reference count.  Should be called
> > + * whenever a pointer to a cache_entry is retained in a data structure,
> > + * thus marking it as alive.
> > + */
> > +static inline void add_ce_ref(struct cache_entry *ce)
> > +{
> > +	assert(ce != NULL && ce->ref_count >= 0);
> > +	++ce->ref_count;
> > +}
> 
> We tend to prefer post-increment when the distinction between pre-
> or post- does not make any difference, which is the case here.

Will fix.

> > diff --git a/name-hash.c b/name-hash.c
> > index 702cd05..f12c919 100644
> > --- a/name-hash.c
> > +++ b/name-hash.c
> > @@ -92,7 +93,9 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
> >  	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
> >  	while (dir && !(--dir->nr)) {
> >  		struct dir_entry *parent = dir->parent;
> > -		hashmap_remove(&istate->dir_hash, dir, NULL);
> > +		struct dir_entry *removed = hashmap_remove(&istate->dir_hash, dir, NULL);
> > +		assert(removed == dir);
> > +		drop_ce_ref(dir->ce);
> 
> This is curious.  In remove_name_hash() you do not have the
> corresponding assert.  Why is it necessary here (or is it
> unnecessary over there)?

It is unnecessary in any case: it's an assert rather than a check for an
expected (or even unexpected) case.  That just happens to be where Keith
first managed to track down the use-after free, so that's where he
happened to put the assert while trying to debug it.  I'm in general in
favor of more asserts rather than fewer, so I would prefer to keep it
in.  But I can remove it if you like.

> > @@ -147,6 +151,7 @@ void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
> >  		return;
> >  	ce->ce_flags &= ~CE_HASHED;
> >  	hashmap_remove(&istate->name_hash, ce, ce);
> > +	drop_ce_ref(ce);
> >  
> >  	if (ignore_case)
> >  		remove_dir_entry(istate, ce);
> > diff --git a/read-cache.c b/read-cache.c
> > index 87204a5..442de34 100644
> > --- a/read-cache.c
> > +++ b/read-cache.c
> > @@ -53,6 +53,7 @@ static const char *alternate_index_output;
> >  static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
> >  {
> >  	istate->cache[nr] = ce;
> > +	add_ce_ref(ce);
> >  	add_name_hash(istate, ce);
> >  }
> 
> What happens to the existing entry that used to be istate->cache[nr],
> which may or may not be 'ce' that is replacing it?
> 
> It turns out that all three calling sites are safe, but it is not
> immediately obvious why.  Perhaps some comment in front of the
> function is in order, to warn those who may have to add a new caller
> or restructure the existing calling chain, that istate->cache[nr] is
> expected not to hold a live reference when the function is called,
> or something?

Happy to add it if you want, but to me it was clear without because if
there were a value in istate->cache[nr], that old value would be leaked.
Let me know.

> > @@ -314,8 +314,10 @@ void replace_index_entry_in_base(struct index_state *istate,
> >  	    istate->split_index->base &&
> >  	    old->index <= istate->split_index->base->cache_nr) {
> >  		new->index = old->index;
> > -		if (old != istate->split_index->base->cache[new->index - 1])
> > -			free(istate->split_index->base->cache[new->index - 1]);
> > +		if (old != istate->split_index->base->cache[new->index - 1]) {
> > +			struct cache_entry *ce = istate->split_index->base->cache[new->index - 1];
> > +			drop_ce_ref(ce);
> > +		}
> >  		istate->split_index->base->cache[new->index - 1] = new;
> 
> Does 'new' already have the right refcount at this point?

I spoke to Keith, and he thinks we do need an add_ce_ref there. I'll fix
that on the reroll.  Duy, do you agree?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/1] merge: fix cache_entry use-after-free
  2015-10-09 22:16     ` David Turner
@ 2015-10-09 22:51       ` Junio C Hamano
  0 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2015-10-09 22:51 UTC (permalink / raw)
  To: David Turner; +Cc: git mailing list, Duy Nguyen

David Turner <dturner@twopensource.com> writes:

>> > +		assert(removed == dir);
>> > +		drop_ce_ref(dir->ce);
>> 
>> This is curious.  In remove_name_hash() you do not have the
>> corresponding assert.  Why is it necessary here (or is it
>> unnecessary over there)?
>
> It is unnecessary in any case: it's an assert rather than a check for an
> expected (or even unexpected) case.  That just happens to be where Keith
> first managed to track down the use-after free, so that's where he
> happened to put the assert while trying to debug it.  I'm in general in
> favor of more asserts rather than fewer, so I would prefer to keep it
> in.  But I can remove it if you like.

OK, it was just the inconsistency between the two made them look
curious, as if one of them is more likely to get broken, or the
patch author was not so sure about the assumption, or something.

>> > +	add_ce_ref(ce);
>> >  	add_name_hash(istate, ce);
>> >  }
>> 
>> What happens to the existing entry that used to be istate->cache[nr],
>> which may or may not be 'ce' that is replacing it?
>> 
>> It turns out that all three calling sites are safe, but it is not
>> immediately obvious why.  Perhaps some comment in front of the
>> function is in order, to warn those who may have to add a new caller
>> or restructure the existing calling chain, that istate->cache[nr] is
>> expected not to hold a live reference when the function is called,
>> or something?
>
> Happy to add it if you want, but to me it was clear without because if
> there were a value in istate->cache[nr], that old value would be leaked.

OK, that's sort-of-cheating, but is a sound short-cut ;-).

>> > +		if (old != istate->split_index->base->cache[new->index - 1]) {
>> > +			struct cache_entry *ce = istate->split_index->base->cache[new->index - 1];
>> > +			drop_ce_ref(ce);
>> > +		}
>> >  		istate->split_index->base->cache[new->index - 1] = new;
>> 
>> Does 'new' already have the right refcount at this point?
>
> I spoke to Keith, and he thinks we do need an add_ce_ref there. I'll fix
> that on the reroll.  Duy, do you agree?

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-10-09 22:52 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-08 18:47 [PATCH 0/1] merge: fix cache_entry use-after-free David Turner
2015-10-08 18:47 ` David Turner
2015-10-08 20:00   ` Junio C Hamano
2015-10-09 22:16     ` David Turner
2015-10-09 22:51       ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).