git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] merge: fix cache_entry use-after-free
@ 2015-10-12 22:03 David Turner
  2015-10-12 22:28 ` Junio C Hamano
  2015-10-14 21:17 ` Junio C Hamano
  0 siblings, 2 replies; 6+ messages in thread
From: David Turner @ 2015-10-12 22:03 UTC (permalink / raw)
  To: git; +Cc: Keith McGuigan

From: Keith McGuigan <kmcguigan@twitter.com>

During merges, we would previously free entries that we no longer need
in the destination index.  But those entries might also be stored in
the dir_entry cache, and when a later call to add_to_index found them,
they would be used after being freed.

To prevent this, add a ref count for struct cache_entry.  Whenever
a cache entry is added to a data structure, the ref count is incremented;
when it is removed from the data structure, it is decremented.  When
it hits zero, the cache_entry is freed.

Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
---

This version addresses Junio's comments on v1.  It adds a missing
add_ce_ref, and fixes a formatting nit.

 cache.h        | 27 +++++++++++++++++++++++++++
 name-hash.c    |  7 ++++++-
 read-cache.c   |  6 +++++-
 split-index.c  | 13 ++++++++-----
 unpack-trees.c |  6 ++++--
 5 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/cache.h b/cache.h
index 752031e..738f76d 100644
--- a/cache.h
+++ b/cache.h
@@ -149,6 +149,7 @@ struct stat_data {
 
 struct cache_entry {
 	struct hashmap_entry ent;
+	unsigned int ref_count; /* count the number of refs to this in dir_hash */
 	struct stat_data ce_stat_data;
 	unsigned int ce_mode;
 	unsigned int ce_flags;
@@ -213,6 +214,32 @@ struct cache_entry {
 struct pathspec;
 
 /*
+ * Increment the cache_entry reference count.  Should be called
+ * whenever a pointer to a cache_entry is retained in a data structure,
+ * thus marking it as alive.
+ */
+static inline void add_ce_ref(struct cache_entry *ce)
+{
+	assert(ce != NULL && ce->ref_count >= 0);
+	ce->ref_count++;
+}
+
+/*
+ * Decrement the cache_entry reference count.  Should be called whenever
+ * a pointer to a cache_entry is dropped.  Once the counter drops to 0
+ * the cache_entry memory will be safely freed.
+ */
+static inline void drop_ce_ref(struct cache_entry *ce)
+{
+	if (ce != NULL) {
+		assert(ce->ref_count >= 0);
+		if (--ce->ref_count < 1) {
+			free(ce);
+		}
+	}
+}
+
+/*
  * Copy the sha1 and stat state of a cache entry from one to
  * another. But we never change the name, or the hash state!
  */
diff --git a/name-hash.c b/name-hash.c
index 702cd05..f12c919 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -66,6 +66,7 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate,
 		dir = xcalloc(1, sizeof(struct dir_entry));
 		hashmap_entry_init(dir, memihash(ce->name, namelen));
 		dir->namelen = namelen;
+		add_ce_ref(ce);
 		dir->ce = ce;
 		hashmap_add(&istate->dir_hash, dir);
 
@@ -92,7 +93,9 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
 	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
 	while (dir && !(--dir->nr)) {
 		struct dir_entry *parent = dir->parent;
-		hashmap_remove(&istate->dir_hash, dir, NULL);
+		struct dir_entry *removed = hashmap_remove(&istate->dir_hash, dir, NULL);
+		assert(removed == dir);
+		drop_ce_ref(dir->ce);
 		free(dir);
 		dir = parent;
 	}
@@ -105,6 +108,7 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 	ce->ce_flags |= CE_HASHED;
 	hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));
 	hashmap_add(&istate->name_hash, ce);
+	add_ce_ref(ce);
 
 	if (ignore_case)
 		add_dir_entry(istate, ce);
@@ -147,6 +151,7 @@ void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
 		return;
 	ce->ce_flags &= ~CE_HASHED;
 	hashmap_remove(&istate->name_hash, ce, ce);
+	drop_ce_ref(ce);
 
 	if (ignore_case)
 		remove_dir_entry(istate, ce);
diff --git a/read-cache.c b/read-cache.c
index 87204a5..8b685bb 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -52,7 +52,9 @@ static const char *alternate_index_output;
 
 static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
+	/* istate->cache[nr] is assumed to not hold a live value */
 	istate->cache[nr] = ce;
+	add_ce_ref(ce);
 	add_name_hash(istate, ce);
 }
 
@@ -62,7 +64,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
 
 	replace_index_entry_in_base(istate, old, ce);
 	remove_name_hash(istate, old);
-	free(old);
+	drop_ce_ref(old);
 	set_index_entry(istate, nr, ce);
 	ce->ce_flags |= CE_UPDATE_IN_BASE;
 	istate->cache_changed |= CE_ENTRY_CHANGED;
@@ -75,6 +77,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 
 	new = xmalloc(cache_entry_size(namelen));
 	copy_cache_entry(new, old);
+	new->ref_count = 0;
 	new->ce_flags &= ~CE_HASHED;
 	new->ce_namelen = namelen;
 	new->index = 0;
@@ -1426,6 +1429,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 {
 	struct cache_entry *ce = xmalloc(cache_entry_size(len));
 
+	ce->ref_count = 0;
 	ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
 	ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
 	ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
diff --git a/split-index.c b/split-index.c
index 968b780..61b5631 100644
--- a/split-index.c
+++ b/split-index.c
@@ -124,7 +124,7 @@ static void replace_entry(size_t pos, void *data)
 	src->ce_flags |= CE_UPDATE_IN_BASE;
 	src->ce_namelen = dst->ce_namelen;
 	copy_cache_entry(dst, src);
-	free(src);
+	drop_ce_ref(src);
 	si->nr_replacements++;
 }
 
@@ -227,7 +227,7 @@ void prepare_to_write_split_index(struct index_state *istate)
 			base->ce_flags = base_flags;
 			if (ret)
 				ce->ce_flags |= CE_UPDATE_IN_BASE;
-			free(base);
+			drop_ce_ref(base);
 			si->base->cache[ce->index - 1] = ce;
 		}
 		for (i = 0; i < si->base->cache_nr; i++) {
@@ -302,7 +302,7 @@ void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce
 	    ce == istate->split_index->base->cache[ce->index - 1])
 		ce->ce_flags |= CE_REMOVE;
 	else
-		free(ce);
+		drop_ce_ref(ce);
 }
 
 void replace_index_entry_in_base(struct index_state *istate,
@@ -314,8 +314,11 @@ void replace_index_entry_in_base(struct index_state *istate,
 	    istate->split_index->base &&
 	    old->index <= istate->split_index->base->cache_nr) {
 		new->index = old->index;
-		if (old != istate->split_index->base->cache[new->index - 1])
-			free(istate->split_index->base->cache[new->index - 1]);
+		if (old != istate->split_index->base->cache[new->index - 1]) {
+			struct cache_entry *ce = istate->split_index->base->cache[new->index - 1];
+			drop_ce_ref(ce);
+		}
 		istate->split_index->base->cache[new->index - 1] = new;
+		add_ce_ref(new);
 	}
 }
diff --git a/unpack-trees.c b/unpack-trees.c
index f932e80..1a0a637 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -606,8 +606,10 @@ static int unpack_nondirectories(int n, unsigned long mask,
 					o);
 		for (i = 0; i < n; i++) {
 			struct cache_entry *ce = src[i + o->merge];
-			if (ce != o->df_conflict_entry)
-				free(ce);
+			if (ce != o->df_conflict_entry) {
+				drop_ce_ref(ce);
+				src[i + o->merge] = NULL;
+			}
 		}
 		return rc;
 	}
-- 
2.4.2.644.g97b850b-twtrsrc

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] merge: fix cache_entry use-after-free
  2015-10-12 22:03 [PATCH v2] merge: fix cache_entry use-after-free David Turner
@ 2015-10-12 22:28 ` Junio C Hamano
  2015-10-13 19:22   ` David Turner
  2015-10-14 21:17 ` Junio C Hamano
  1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2015-10-12 22:28 UTC (permalink / raw)
  To: David Turner; +Cc: git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

> From: Keith McGuigan <kmcguigan@twitter.com>
>
> During merges, we would previously free entries that we no longer need
> in the destination index.  But those entries might also be stored in
> the dir_entry cache, and when a later call to add_to_index found them,
> they would be used after being freed.
>
> To prevent this, add a ref count for struct cache_entry.  Whenever
> a cache entry is added to a data structure, the ref count is incremented;
> when it is removed from the data structure, it is decremented.  When
> it hits zero, the cache_entry is freed.
>
> Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
> ---

I'll forge your "messenger's sign-off" here ;-)

> diff --git a/unpack-trees.c b/unpack-trees.c
> index f932e80..1a0a637 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -606,8 +606,10 @@ static int unpack_nondirectories(int n, unsigned long mask,
>  					o);
>  		for (i = 0; i < n; i++) {
>  			struct cache_entry *ce = src[i + o->merge];
> -			if (ce != o->df_conflict_entry)
> -				free(ce);
> +			if (ce != o->df_conflict_entry) {
> +				drop_ce_ref(ce);
> +				src[i + o->merge] = NULL;
> +			}

This one smelled iffy.  I think it is safe because the caller does
not look at src[] other than src[0] after this function returns, and
this setting to NULL happens only when o->merge is set to 1, so I do
not think this is buggy, but at the same time I do not think setting
to NULL is necessary.

Other than that, looks nice.  Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] merge: fix cache_entry use-after-free
  2015-10-12 22:28 ` Junio C Hamano
@ 2015-10-13 19:22   ` David Turner
  2015-10-13 21:05     ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: David Turner @ 2015-10-13 19:22 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Keith McGuigan

On Mon, 2015-10-12 at 15:28 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> > From: Keith McGuigan <kmcguigan@twitter.com>
> >
> > During merges, we would previously free entries that we no longer need
> > in the destination index.  But those entries might also be stored in
> > the dir_entry cache, and when a later call to add_to_index found them,
> > they would be used after being freed.
> >
> > To prevent this, add a ref count for struct cache_entry.  Whenever
> > a cache entry is added to a data structure, the ref count is incremented;
> > when it is removed from the data structure, it is decremented.  When
> > it hits zero, the cache_entry is freed.
> >
> > Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
> > ---
> 
> I'll forge your "messenger's sign-off" here ;-)

Thanks.

> > diff --git a/unpack-trees.c b/unpack-trees.c
> > index f932e80..1a0a637 100644
> > --- a/unpack-trees.c
> > +++ b/unpack-trees.c
> > @@ -606,8 +606,10 @@ static int unpack_nondirectories(int n, unsigned long mask,
> >  					o);
> >  		for (i = 0; i < n; i++) {
> >  			struct cache_entry *ce = src[i + o->merge];
> > -			if (ce != o->df_conflict_entry)
> > -				free(ce);
> > +			if (ce != o->df_conflict_entry) {
> > +				drop_ce_ref(ce);
> > +				src[i + o->merge] = NULL;
> > +			}
> 
> This one smelled iffy.  I think it is safe because the caller does
> not look at src[] other than src[0] after this function returns, and
> this setting to NULL happens only when o->merge is set to 1, so I do
> not think this is buggy, but at the same time I do not think setting
> to NULL is necessary.
> 
> Other than that, looks nice.  Thanks.

I like to set a pointer to NULL after I free the thing pointed to by it,
because it helps make use-after-free bugs easier to detect.  

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] merge: fix cache_entry use-after-free
  2015-10-13 19:22   ` David Turner
@ 2015-10-13 21:05     ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2015-10-13 21:05 UTC (permalink / raw)
  To: David Turner; +Cc: git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

>> This one smelled iffy.  I think it is safe because the caller does
>> not look at src[] other than src[0] after this function returns, and
>> this setting to NULL happens only when o->merge is set to 1, so I do
>> not think this is buggy, but at the same time I do not think setting
>> to NULL is necessary.
>> 
>> Other than that, looks nice.  Thanks.
>
> I like to set a pointer to NULL after I free the thing pointed to by it,
> because it helps make use-after-free bugs easier to detect.  

Yes, but it also helps unintended bugs to creep in if done blindly,
and that is why I vetted what happens in the codepath of the caller
of this function after src[] entries are NULLed (the caller could be
expecting to do things only to NULLed fields, for example, in which
case clearing them like this patch would have changed the behaviour
of the caller after this function returns).

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] merge: fix cache_entry use-after-free
  2015-10-12 22:03 [PATCH v2] merge: fix cache_entry use-after-free David Turner
  2015-10-12 22:28 ` Junio C Hamano
@ 2015-10-14 21:17 ` Junio C Hamano
  2015-10-14 22:00   ` David Turner
  1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2015-10-14 21:17 UTC (permalink / raw)
  To: David Turner; +Cc: git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

> +	unsigned int ref_count; /* count the number of refs to this in dir_hash */

Me makes a mental note of the type used...

> @@ -213,6 +214,32 @@ struct cache_entry {
>  struct pathspec;
>  
>  /*
> + * Increment the cache_entry reference count.  Should be called
> + * whenever a pointer to a cache_entry is retained in a data structure,
> + * thus marking it as alive.
> + */
> +static inline void add_ce_ref(struct cache_entry *ce)
> +{
> +	assert(ce != NULL && ce->ref_count >= 0);

... and notices that ce->ref_count will always be non-negative here


> +	ce->ref_count++;
> +}
> +
> +/*
> + * Decrement the cache_entry reference count.  Should be called whenever
> + * a pointer to a cache_entry is dropped.  Once the counter drops to 0
> + * the cache_entry memory will be safely freed.
> + */
> +static inline void drop_ce_ref(struct cache_entry *ce)
> +{
> +	if (ce != NULL) {
> +		assert(ce->ref_count >= 0);

... and here.

By not checking integer overflow/wraparound, the code is assuming
that a ce entry will never referenced more than 4 billion times on
32-bit platform.  And that is a sensible assumption as there aren't
that many pointers in the address space to make that many reference
anyway.

Perhaps the code can assume the number won't be more than 2 billion
and use a signed type instead for the reference counting?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] merge: fix cache_entry use-after-free
  2015-10-14 21:17 ` Junio C Hamano
@ 2015-10-14 22:00   ` David Turner
  0 siblings, 0 replies; 6+ messages in thread
From: David Turner @ 2015-10-14 22:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Keith McGuigan

On Wed, 2015-10-14 at 14:17 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> > +	unsigned int ref_count; /* count the number of refs to this in dir_hash */
> 
> Me makes a mental note of the type used...
> 
> > @@ -213,6 +214,32 @@ struct cache_entry {
> >  struct pathspec;
> >  
> >  /*
> > + * Increment the cache_entry reference count.  Should be called
> > + * whenever a pointer to a cache_entry is retained in a data structure,
> > + * thus marking it as alive.
> > + */
> > +static inline void add_ce_ref(struct cache_entry *ce)
> > +{
> > +	assert(ce != NULL && ce->ref_count >= 0);
> 
> ... and notices that ce->ref_count will always be non-negative here
> 
> 
> > +	ce->ref_count++;
> > +}
> > +
> > +/*
> > + * Decrement the cache_entry reference count.  Should be called whenever
> > + * a pointer to a cache_entry is dropped.  Once the counter drops to 0
> > + * the cache_entry memory will be safely freed.
> > + */
> > +static inline void drop_ce_ref(struct cache_entry *ce)
> > +{
> > +	if (ce != NULL) {
> > +		assert(ce->ref_count >= 0);
> 
> ... and here.
> 
> By not checking integer overflow/wraparound, the code is assuming
> that a ce entry will never referenced more than 4 billion times on
> 32-bit platform.  And that is a sensible assumption as there aren't
> that many pointers in the address space to make that many reference
> anyway.
> 
> Perhaps the code can assume the number won't be more than 2 billion
> and use a signed type instead for the reference counting?

Will do.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-10-14 22:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-12 22:03 [PATCH v2] merge: fix cache_entry use-after-free David Turner
2015-10-12 22:28 ` Junio C Hamano
2015-10-13 19:22   ` David Turner
2015-10-13 21:05     ` Junio C Hamano
2015-10-14 21:17 ` Junio C Hamano
2015-10-14 22:00   ` David Turner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).