git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] merge: fix cache_entry use-after-free
@ 2015-10-14 22:07 David Turner
  2015-10-15  3:35 ` René Scharfe
  0 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-10-14 22:07 UTC (permalink / raw)
  To: git; +Cc: Keith McGuigan, David Turner

From: Keith McGuigan <kmcguigan@twitter.com>

During merges, we would previously free entries that we no longer need
in the destination index.  But those entries might also be stored in
the dir_entry cache, and when a later call to add_to_index found them,
they would be used after being freed.

To prevent this, add a ref count for struct cache_entry.  Whenever
a cache entry is added to a data structure, the ref count is incremented;
when it is removed from the data structure, it is decremented.  When
it hits zero, the cache_entry is freed.

Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
Signed-off-by: David Turner <dturner@twopensource.com>
---

Fix type of ref_count (from unsigned int to int).


 cache.h        | 27 +++++++++++++++++++++++++++
 name-hash.c    |  7 ++++++-
 read-cache.c   |  6 +++++-
 split-index.c  | 13 ++++++++-----
 unpack-trees.c |  6 ++++--
 5 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/cache.h b/cache.h
index 752031e..7906026 100644
--- a/cache.h
+++ b/cache.h
@@ -149,6 +149,7 @@ struct stat_data {
 
 struct cache_entry {
 	struct hashmap_entry ent;
+	int ref_count; /* count the number of refs to this in dir_hash */
 	struct stat_data ce_stat_data;
 	unsigned int ce_mode;
 	unsigned int ce_flags;
@@ -213,6 +214,32 @@ struct cache_entry {
 struct pathspec;
 
 /*
+ * Increment the cache_entry reference count.  Should be called
+ * whenever a pointer to a cache_entry is retained in a data structure,
+ * thus marking it as alive.
+ */
+static inline void add_ce_ref(struct cache_entry *ce)
+{
+	assert(ce != NULL && ce->ref_count >= 0);
+	ce->ref_count++;
+}
+
+/*
+ * Decrement the cache_entry reference count.  Should be called whenever
+ * a pointer to a cache_entry is dropped.  Once the counter drops to 0
+ * the cache_entry memory will be safely freed.
+ */
+static inline void drop_ce_ref(struct cache_entry *ce)
+{
+	if (ce != NULL) {
+		assert(ce->ref_count >= 0);
+		if (--ce->ref_count < 1) {
+			free(ce);
+		}
+	}
+}
+
+/*
  * Copy the sha1 and stat state of a cache entry from one to
  * another. But we never change the name, or the hash state!
  */
diff --git a/name-hash.c b/name-hash.c
index 702cd05..f12c919 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -66,6 +66,7 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate,
 		dir = xcalloc(1, sizeof(struct dir_entry));
 		hashmap_entry_init(dir, memihash(ce->name, namelen));
 		dir->namelen = namelen;
+		add_ce_ref(ce);
 		dir->ce = ce;
 		hashmap_add(&istate->dir_hash, dir);
 
@@ -92,7 +93,9 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
 	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
 	while (dir && !(--dir->nr)) {
 		struct dir_entry *parent = dir->parent;
-		hashmap_remove(&istate->dir_hash, dir, NULL);
+		struct dir_entry *removed = hashmap_remove(&istate->dir_hash, dir, NULL);
+		assert(removed == dir);
+		drop_ce_ref(dir->ce);
 		free(dir);
 		dir = parent;
 	}
@@ -105,6 +108,7 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 	ce->ce_flags |= CE_HASHED;
 	hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));
 	hashmap_add(&istate->name_hash, ce);
+	add_ce_ref(ce);
 
 	if (ignore_case)
 		add_dir_entry(istate, ce);
@@ -147,6 +151,7 @@ void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
 		return;
 	ce->ce_flags &= ~CE_HASHED;
 	hashmap_remove(&istate->name_hash, ce, ce);
+	drop_ce_ref(ce);
 
 	if (ignore_case)
 		remove_dir_entry(istate, ce);
diff --git a/read-cache.c b/read-cache.c
index 87204a5..8b685bb 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -52,7 +52,9 @@ static const char *alternate_index_output;
 
 static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
+	/* istate->cache[nr] is assumed to not hold a live value */
 	istate->cache[nr] = ce;
+	add_ce_ref(ce);
 	add_name_hash(istate, ce);
 }
 
@@ -62,7 +64,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
 
 	replace_index_entry_in_base(istate, old, ce);
 	remove_name_hash(istate, old);
-	free(old);
+	drop_ce_ref(old);
 	set_index_entry(istate, nr, ce);
 	ce->ce_flags |= CE_UPDATE_IN_BASE;
 	istate->cache_changed |= CE_ENTRY_CHANGED;
@@ -75,6 +77,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 
 	new = xmalloc(cache_entry_size(namelen));
 	copy_cache_entry(new, old);
+	new->ref_count = 0;
 	new->ce_flags &= ~CE_HASHED;
 	new->ce_namelen = namelen;
 	new->index = 0;
@@ -1426,6 +1429,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 {
 	struct cache_entry *ce = xmalloc(cache_entry_size(len));
 
+	ce->ref_count = 0;
 	ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
 	ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
 	ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
diff --git a/split-index.c b/split-index.c
index 968b780..61b5631 100644
--- a/split-index.c
+++ b/split-index.c
@@ -124,7 +124,7 @@ static void replace_entry(size_t pos, void *data)
 	src->ce_flags |= CE_UPDATE_IN_BASE;
 	src->ce_namelen = dst->ce_namelen;
 	copy_cache_entry(dst, src);
-	free(src);
+	drop_ce_ref(src);
 	si->nr_replacements++;
 }
 
@@ -227,7 +227,7 @@ void prepare_to_write_split_index(struct index_state *istate)
 			base->ce_flags = base_flags;
 			if (ret)
 				ce->ce_flags |= CE_UPDATE_IN_BASE;
-			free(base);
+			drop_ce_ref(base);
 			si->base->cache[ce->index - 1] = ce;
 		}
 		for (i = 0; i < si->base->cache_nr; i++) {
@@ -302,7 +302,7 @@ void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce
 	    ce == istate->split_index->base->cache[ce->index - 1])
 		ce->ce_flags |= CE_REMOVE;
 	else
-		free(ce);
+		drop_ce_ref(ce);
 }
 
 void replace_index_entry_in_base(struct index_state *istate,
@@ -314,8 +314,11 @@ void replace_index_entry_in_base(struct index_state *istate,
 	    istate->split_index->base &&
 	    old->index <= istate->split_index->base->cache_nr) {
 		new->index = old->index;
-		if (old != istate->split_index->base->cache[new->index - 1])
-			free(istate->split_index->base->cache[new->index - 1]);
+		if (old != istate->split_index->base->cache[new->index - 1]) {
+			struct cache_entry *ce = istate->split_index->base->cache[new->index - 1];
+			drop_ce_ref(ce);
+		}
 		istate->split_index->base->cache[new->index - 1] = new;
+		add_ce_ref(new);
 	}
 }
diff --git a/unpack-trees.c b/unpack-trees.c
index f932e80..1a0a637 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -606,8 +606,10 @@ static int unpack_nondirectories(int n, unsigned long mask,
 					o);
 		for (i = 0; i < n; i++) {
 			struct cache_entry *ce = src[i + o->merge];
-			if (ce != o->df_conflict_entry)
-				free(ce);
+			if (ce != o->df_conflict_entry) {
+				drop_ce_ref(ce);
+				src[i + o->merge] = NULL;
+			}
 		}
 		return rc;
 	}
-- 
2.4.2.644.g97b850b-twtrsrc

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-14 22:07 [PATCH v3] merge: fix cache_entry use-after-free David Turner
@ 2015-10-15  3:35 ` René Scharfe
  2015-10-15 19:02   ` David Turner
  0 siblings, 1 reply; 9+ messages in thread
From: René Scharfe @ 2015-10-15  3:35 UTC (permalink / raw)
  To: David Turner, git; +Cc: Keith McGuigan

Am 15.10.2015 um 00:07 schrieb David Turner:
> From: Keith McGuigan <kmcguigan@twitter.com>
>
> During merges, we would previously free entries that we no longer need
> in the destination index.  But those entries might also be stored in
> the dir_entry cache, and when a later call to add_to_index found them,
> they would be used after being freed.
>
> To prevent this, add a ref count for struct cache_entry.  Whenever
> a cache entry is added to a data structure, the ref count is incremented;
> when it is removed from the data structure, it is decremented.  When
> it hits zero, the cache_entry is freed.
>
> Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
> Signed-off-by: David Turner <dturner@twopensource.com>
> ---
>
> Fix type of ref_count (from unsigned int to int).
>
>
>   cache.h        | 27 +++++++++++++++++++++++++++
>   name-hash.c    |  7 ++++++-
>   read-cache.c   |  6 +++++-
>   split-index.c  | 13 ++++++++-----
>   unpack-trees.c |  6 ++++--
>   5 files changed, 50 insertions(+), 9 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index 752031e..7906026 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -149,6 +149,7 @@ struct stat_data {
>
>   struct cache_entry {
>   	struct hashmap_entry ent;
> +	int ref_count; /* count the number of refs to this in dir_hash */
>   	struct stat_data ce_stat_data;
>   	unsigned int ce_mode;
>   	unsigned int ce_flags;
> @@ -213,6 +214,32 @@ struct cache_entry {
>   struct pathspec;
>
>   /*
> + * Increment the cache_entry reference count.  Should be called
> + * whenever a pointer to a cache_entry is retained in a data structure,
> + * thus marking it as alive.
> + */
> +static inline void add_ce_ref(struct cache_entry *ce)
> +{
> +	assert(ce != NULL && ce->ref_count >= 0);
> +	ce->ref_count++;
> +}
> +
> +/*
> + * Decrement the cache_entry reference count.  Should be called whenever
> + * a pointer to a cache_entry is dropped.  Once the counter drops to 0
> + * the cache_entry memory will be safely freed.
> + */
> +static inline void drop_ce_ref(struct cache_entry *ce)
> +{
> +	if (ce != NULL) {
> +		assert(ce->ref_count >= 0);

Shouldn't this be "> 0" to prevent double frees?

> +		if (--ce->ref_count < 1) {
> +			free(ce);
> +		}
> +	}
> +}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-15  3:35 ` René Scharfe
@ 2015-10-15 19:02   ` David Turner
  2015-10-15 20:38     ` René Scharfe
  2015-10-15 20:51     ` Junio C Hamano
  0 siblings, 2 replies; 9+ messages in thread
From: David Turner @ 2015-10-15 19:02 UTC (permalink / raw)
  To: René Scharfe; +Cc: git, Keith McGuigan

On Thu, 2015-10-15 at 05:35 +0200, René Scharfe wrote:
> Am 15.10.2015 um 00:07 schrieb David Turner:
> > From: Keith McGuigan <kmcguigan@twitter.com>
> >
> > During merges, we would previously free entries that we no longer need
> > in the destination index.  But those entries might also be stored in
> > the dir_entry cache, and when a later call to add_to_index found them,
> > they would be used after being freed.
> >
> > To prevent this, add a ref count for struct cache_entry.  Whenever
> > a cache entry is added to a data structure, the ref count is incremented;
> > when it is removed from the data structure, it is decremented.  When
> > it hits zero, the cache_entry is freed.
> >
> > Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
> > Signed-off-by: David Turner <dturner@twopensource.com>
> > ---
> >
> > Fix type of ref_count (from unsigned int to int).
> >
> >
> >   cache.h        | 27 +++++++++++++++++++++++++++
> >   name-hash.c    |  7 ++++++-
> >   read-cache.c   |  6 +++++-
> >   split-index.c  | 13 ++++++++-----
> >   unpack-trees.c |  6 ++++--
> >   5 files changed, 50 insertions(+), 9 deletions(-)
> >
> > diff --git a/cache.h b/cache.h
> > index 752031e..7906026 100644
> > --- a/cache.h
> > +++ b/cache.h
> > @@ -149,6 +149,7 @@ struct stat_data {
> >
> >   struct cache_entry {
> >   	struct hashmap_entry ent;
> > +	int ref_count; /* count the number of refs to this in dir_hash */
> >   	struct stat_data ce_stat_data;
> >   	unsigned int ce_mode;
> >   	unsigned int ce_flags;
> > @@ -213,6 +214,32 @@ struct cache_entry {
> >   struct pathspec;
> >
> >   /*
> > + * Increment the cache_entry reference count.  Should be called
> > + * whenever a pointer to a cache_entry is retained in a data structure,
> > + * thus marking it as alive.
> > + */
> > +static inline void add_ce_ref(struct cache_entry *ce)
> > +{
> > +	assert(ce != NULL && ce->ref_count >= 0);
> > +	ce->ref_count++;
> > +}
> > +
> > +/*
> > + * Decrement the cache_entry reference count.  Should be called whenever
> > + * a pointer to a cache_entry is dropped.  Once the counter drops to 0
> > + * the cache_entry memory will be safely freed.
> > + */
> > +static inline void drop_ce_ref(struct cache_entry *ce)
> > +{
> > +	if (ce != NULL) {
> > +		assert(ce->ref_count >= 0);
> 
> Shouldn't this be "> 0" to prevent double frees?

No.  If the ref_count is 1, then there is still some reference to the
ce.  If it is 0, there is no reference to it, and the next check (< 1)
will succeed and the ce will get freed.  

> > +		if (--ce->ref_count < 1) {
> > +			free(ce);
> > +		}
> > +	}
> > +}
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-15 19:02   ` David Turner
@ 2015-10-15 20:38     ` René Scharfe
  2015-10-15 20:51     ` Junio C Hamano
  1 sibling, 0 replies; 9+ messages in thread
From: René Scharfe @ 2015-10-15 20:38 UTC (permalink / raw)
  To: David Turner; +Cc: git, Keith McGuigan

Am 15.10.2015 um 21:02 schrieb David Turner:
> On Thu, 2015-10-15 at 05:35 +0200, René Scharfe wrote:
>> Am 15.10.2015 um 00:07 schrieb David Turner:
>>> From: Keith McGuigan <kmcguigan@twitter.com>
>>>
>>> During merges, we would previously free entries that we no longer need
>>> in the destination index.  But those entries might also be stored in
>>> the dir_entry cache, and when a later call to add_to_index found them,
>>> they would be used after being freed.
>>>
>>> To prevent this, add a ref count for struct cache_entry.  Whenever
>>> a cache entry is added to a data structure, the ref count is incremented;
>>> when it is removed from the data structure, it is decremented.  When
>>> it hits zero, the cache_entry is freed.
>>>
>>> Signed-off-by: Keith McGuigan <kmcguigan@twitter.com>
>>> Signed-off-by: David Turner <dturner@twopensource.com>
>>> ---
>>>
>>> Fix type of ref_count (from unsigned int to int).
>>>
>>>
>>>    cache.h        | 27 +++++++++++++++++++++++++++
>>>    name-hash.c    |  7 ++++++-
>>>    read-cache.c   |  6 +++++-
>>>    split-index.c  | 13 ++++++++-----
>>>    unpack-trees.c |  6 ++++--
>>>    5 files changed, 50 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/cache.h b/cache.h
>>> index 752031e..7906026 100644
>>> --- a/cache.h
>>> +++ b/cache.h
>>> @@ -149,6 +149,7 @@ struct stat_data {
>>>
>>>    struct cache_entry {
>>>    	struct hashmap_entry ent;
>>> +	int ref_count; /* count the number of refs to this in dir_hash */
>>>    	struct stat_data ce_stat_data;
>>>    	unsigned int ce_mode;
>>>    	unsigned int ce_flags;
>>> @@ -213,6 +214,32 @@ struct cache_entry {
>>>    struct pathspec;
>>>
>>>    /*
>>> + * Increment the cache_entry reference count.  Should be called
>>> + * whenever a pointer to a cache_entry is retained in a data structure,
>>> + * thus marking it as alive.
>>> + */
>>> +static inline void add_ce_ref(struct cache_entry *ce)
>>> +{
>>> +	assert(ce != NULL && ce->ref_count >= 0);
>>> +	ce->ref_count++;
>>> +}
>>> +
>>> +/*
>>> + * Decrement the cache_entry reference count.  Should be called whenever
>>> + * a pointer to a cache_entry is dropped.  Once the counter drops to 0
>>> + * the cache_entry memory will be safely freed.
>>> + */
>>> +static inline void drop_ce_ref(struct cache_entry *ce)
>>> +{
>>> +	if (ce != NULL) {
>>> +		assert(ce->ref_count >= 0);
>>
>> Shouldn't this be "> 0" to prevent double frees?
>
> No.  If the ref_count is 1, then there is still some reference to the
> ce.  If it is 0, there is no reference to it, and the next check (< 1)
> will succeed and the ce will get freed.
>
>>> +		if (--ce->ref_count < 1) {
>>> +			free(ce);
>>> +		}
>>> +	}
>>> +}

OK, let me think out loud, step by step:

Given ref_count == 1 then the assert passes, ref_count gets decremented 
to 0, which is less than 1, so ce is freed.

Given ref_count == 0 then the assert passes, refcount gets decremented 
to -1, which is less than 1, so ce is freed again.

Where did I go wrong?

René

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-15 19:02   ` David Turner
  2015-10-15 20:38     ` René Scharfe
@ 2015-10-15 20:51     ` Junio C Hamano
  2015-10-16  7:05       ` David Turner
  1 sibling, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2015-10-15 20:51 UTC (permalink / raw)
  To: David Turner; +Cc: René Scharfe, git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

>> > +static inline void drop_ce_ref(struct cache_entry *ce)
>> > +{
>> > +	if (ce != NULL) {
>> > +		assert(ce->ref_count >= 0);
>> 
>> Shouldn't this be "> 0" to prevent double frees?
>
> No.  If the ref_count is 1, then there is still some reference to the
> ce.  If it is 0, there is no reference to it, and the next check (< 1)
> will succeed and the ce will get freed.  
>
>> > +		if (--ce->ref_count < 1) {
>> > +			free(ce);
>> > +		}
>> > +	}
>> > +}

Hmm, but it still feels fuzzy, no?  I cannot tell from the above
exchange if a ce with ref_count==0 upon entry to this function is
supposed to have somebody pointing at it, somebody just assigned
NULL (or another pointer) to the last pointer that was pointing at
it, or what else.  If the expected calling sequence were:

	drop_ce_ref(thing->ce);
	thing->ce = NULL;

and the original thing->ce were the last reference to the cache
entry about to be freed, its refcnt is better be 1 not 0.  And when
this function decrements refcnt down to 0, the cache entry is freed.

Admittedly, if the original refcnt were 0, with signed refcnt, it
will decrement to -1 and it will be freed, too, but I do not think
that is what was intended---refcnt is initialized to 0 upon creating
an unreferenced cache entry, and set_index_entry() and friends that
store a pointer to anything calls add_ce_ref(), so I read the code
as intending to make "0 means no pointer points at it".

As far as I can tell, the only effect of this assert() that uses >=0
not >0 is to avoid triggering it on this calling sequence:

    new_ce = new_cache_entry();
    drop_ce_ref(new_ce);

that is, you create because you _might_ use it, and then later
decide not to use it (and the free() part wouldn't have worked
correctly with unsigned refcnt ;-).

By the way, the log message says "During merges, we would previously
free entries that we no longer need in the destination index.  But
those entries might also be stored in the dir_entry cache," as if
this issue were present and waiting to trigger for all merges for
all people.  Given that Linus does hundreds of merges in a day
during the merge window (and I do several dozens a day), I am quite
surprised that nobody noticed this issue.  If there is a more
specific condition that allows this bug to trigger (e.g. "dir-entry
cache is used only under such and such conditions") that the log
message does not talk about to explain why this bug was not seen
widely, it would be a good thing to add.  It is very puzzling
otherwise.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-15 20:51     ` Junio C Hamano
@ 2015-10-16  7:05       ` David Turner
  2015-10-16 16:04         ` Junio C Hamano
  0 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-10-16  7:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: René Scharfe, git, Keith McGuigan

On Thu, 2015-10-15 at 13:51 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> >> > +static inline void drop_ce_ref(struct cache_entry *ce)
> >> > +{
> >> > +	if (ce != NULL) {
> >> > +		assert(ce->ref_count >= 0);
> >> 
> >> Shouldn't this be "> 0" to prevent double frees?
> >
> > No.  If the ref_count is 1, then there is still some reference to the
> > ce.  If it is 0, there is no reference to it, and the next check (< 1)
> > will succeed and the ce will get freed.  
> >
> >> > +		if (--ce->ref_count < 1) {
> >> > +			free(ce);
> >> > +		}
> >> > +	}
> >> > +}
> 
> Hmm, but it still feels fuzzy, no?  I cannot tell from the above
> exchange if a ce with ref_count==0 upon entry to this function is
> supposed to have somebody pointing at it, somebody just assigned
> NULL (or another pointer) to the last pointer that was pointing at
> it, or what else.  If the expected calling sequence were:
> 
> 	drop_ce_ref(thing->ce);
> 	thing->ce = NULL;
> 
> and the original thing->ce were the last reference to the cache
> entry about to be freed, its refcnt is better be 1 not 0.  And when
> this function decrements refcnt down to 0, the cache entry is freed.
> 
> Admittedly, if the original refcnt were 0, with signed refcnt, it
> will decrement to -1 and it will be freed, too, but I do not think
> that is what was intended---refcnt is initialized to 0 upon creating
> an unreferenced cache entry, and set_index_entry() and friends that
> store a pointer to anything calls add_ce_ref(), so I read the code
> as intending to make "0 means no pointer points at it".
> 
> As far as I can tell, the only effect of this assert() that uses >=0
> not >0 is to avoid triggering it on this calling sequence:
> 
>     new_ce = new_cache_entry();
>     drop_ce_ref(new_ce);
> 
> that is, you create because you _might_ use it, and then later
> decide not to use it (and the free() part wouldn't have worked
> correctly with unsigned refcnt ;-).

You and René are right: there is an off-by-one here.  Will fix and
re-roll.  Thanks.

> By the way, the log message says "During merges, we would previously
> free entries that we no longer need in the destination index.  But
> those entries might also be stored in the dir_entry cache," as if
> this issue were present and waiting to trigger for all merges for
> all people.  Given that Linus does hundreds of merges in a day
> during the merge window (and I do several dozens a day), I am quite
> surprised that nobody noticed this issue.  If there is a more
> specific condition that allows this bug to trigger (e.g. "dir-entry
> cache is used only under such and such conditions") that the log
> message does not talk about to explain why this bug was not seen
> widely, it would be a good thing to add.  It is very puzzling
> otherwise.

We also do dozens or hundreds of merges per day and only saw this quite
rarely.  Interestingly, we could only trigger it with an alternate
hashing function for git's hashmap implementation, and only once a
certain bug in that implementation was *fixed*.  But that was just a
trigger; it was certainly not the cause.  The bug would, in general,
have caused more hash collisions due to worse mixing, but I believe it
is possible that some particular collision would have been present in
the non-bugged version of the code but not in the bugged version.

It may have also had something to do with a case-insensitive filesystem;
we never saw anyone reproduce it on anything but OS X, and even there it
was quite difficult to reproduce.

In short, I don't think we know why the bug was not seen widely.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-16  7:05       ` David Turner
@ 2015-10-16 16:04         ` Junio C Hamano
  2015-10-19 22:27           ` David Turner
  0 siblings, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2015-10-16 16:04 UTC (permalink / raw)
  To: David Turner; +Cc: René Scharfe, git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

> We also do dozens or hundreds of merges per day and only saw this quite
> rarely.  Interestingly, we could only trigger it with an alternate
> hashing function for git's hashmap implementation, and only once a
> certain bug in that implementation was *fixed*.  But that was just a
> trigger; it was certainly not the cause.  The bug would, in general,
> have caused more hash collisions due to worse mixing, but I believe it
> is possible that some particular collision would have been present in
> the non-bugged version of the code but not in the bugged version.
>
> It may have also had something to do with a case-insensitive filesystem;
> we never saw anyone reproduce it on anything but OS X, and even there it
> was quite difficult to reproduce.
>
> In short, I don't think we know why the bug was not seen widely.

It has been a long time since I looked at name-hash.c and I am fuzzy
on what the four functions (cache|index)_(file|dir)_exists are meant
for, but I have this feeling that the original premise of the patch,
"we may free a ce because we no longer use it in the index, but it
may still need to keep a reference to it in name-hash" may be wrong
in the first place.  The proposed "fix" conceptually feels wrong.

The whole point of the name-hash is so that we can detect collisions
in two names, one of which wants to have a file in one place while
the other wants to have a directory, at the same path in the index.
The pathnames and cache entries registered in the name-hash have to
be the ones that are relevant to the index in quesition.  If a new
ce will be added to the index, the name-hash will have to know about
its path (and that is what CE_HASHED bit is about).  On the other
hand, if you are going to remove an existing ce from the index, its
sub-paths should no longer prevent other cache entries to be there.

E.g. if you have "a/b", it must prevent "A" from entering the index
and the name-hash helps you to do so; when you remove "a/b", then
name-hash must now allow "A" to enter the index.  So "a/b" must be
removed from the name-hash by calling remove_name_hash() and normal
codepaths indeed do so.

I do not doubt the existence of "use-after-free bug" you observed,
but I am not convinced that refcounting is "fixing" the problem; it
smells like papering over a different bug that is the root cause of
the use-after-free.

For example, if we forget to "unhash" a ce from name-hash when we
remove a ce from the index (or after we "hashed" it, expecting to
add it to the index, but in the end decided not to add to the index,
perhaps), we would see a now-freed ce still in the name-hash.
Checking a path against the name-hash in such a state would have to
use the ce->name from that stale ce, which is a use-after-free bug.

In such a situation, isn't the real cause of the bug the fact that
the stale ce that is no longer relevant to the true index contents
still in name-hash?  The refcounting does not fix the fact that a
ce->name of a stale ce that is no longer relevant being used for D/F
collision checking.

I am not saying that I found such a codepath that forgets to unhash,
but from the overall design and purpose of the name-hash subsystem,
anything that deliberately _allows_ a stale ce that does not exist
in the index in there smells like a workaround going in a wrong
direction.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-16 16:04         ` Junio C Hamano
@ 2015-10-19 22:27           ` David Turner
  2015-10-20  2:13             ` Junio C Hamano
  0 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-10-19 22:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: René Scharfe, git, Keith McGuigan

On Fri, 2015-10-16 at 09:04 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> > We also do dozens or hundreds of merges per day and only saw this quite
> > rarely.  Interestingly, we could only trigger it with an alternate
> > hashing function for git's hashmap implementation, and only once a
> > certain bug in that implementation was *fixed*.  But that was just a
> > trigger; it was certainly not the cause.  The bug would, in general,
> > have caused more hash collisions due to worse mixing, but I believe it
> > is possible that some particular collision would have been present in
> > the non-bugged version of the code but not in the bugged version.
> >
> > It may have also had something to do with a case-insensitive filesystem;
> > we never saw anyone reproduce it on anything but OS X, and even there it
> > was quite difficult to reproduce.
> >
> > In short, I don't think we know why the bug was not seen widely.
> 
> It has been a long time since I looked at name-hash.c and I am fuzzy
> on what the four functions (cache|index)_(file|dir)_exists are meant
> for, but I have this feeling that the original premise of the patch,
> "we may free a ce because we no longer use it in the index, but it
> may still need to keep a reference to it in name-hash" may be wrong
> in the first place.  The proposed "fix" conceptually feels wrong.
>
> The whole point of the name-hash is so that we can detect collisions
> in two names, one of which wants to have a file in one place while
> the other wants to have a directory, at the same path in the index.
> The pathnames and cache entries registered in the name-hash have to
> be the ones that are relevant to the index in quesition.  If a new
> ce will be added to the index, the name-hash will have to know about
> its path (and that is what CE_HASHED bit is about).  On the other
> hand, if you are going to remove an existing ce from the index, its
> sub-paths should no longer prevent other cache entries to be there.
> 
> E.g. if you have "a/b", it must prevent "A" from entering the index
> and the name-hash helps you to do so; when you remove "a/b", then
> name-hash must now allow "A" to enter the index.  So "a/b" must be
> removed from the name-hash by calling remove_name_hash() and normal
> codepaths indeed do so.
>
> I do not doubt the existence of "use-after-free bug" you observed,
> but I am not convinced that refcounting is "fixing" the problem; it
> smells like papering over a different bug that is the root cause of
> the use-after-free.
> 
> For example, if we forget to "unhash" a ce from name-hash when we
> remove a ce from the index (or after we "hashed" it, expecting to
> add it to the index, but in the end decided not to add to the index,
> perhaps), we would see a now-freed ce still in the name-hash.
> Checking a path against the name-hash in such a state would have to
> use the ce->name from that stale ce, which is a use-after-free bug.
> 
> In such a situation, isn't the real cause of the bug the fact that
> the stale ce that is no longer relevant to the true index contents
> still in name-hash?  The refcounting does not fix the fact that a
> ce->name of a stale ce that is no longer relevant being used for D/F
> collision checking.
> 
> I am not saying that I found such a codepath that forgets to unhash,
> but from the overall design and purpose of the name-hash subsystem,
> anything that deliberately _allows_ a stale ce that does not exist
> in the index in there smells like a workaround going in a wrong
> direction.

I've spent some time looking into this, but I don't quite have a repro.
I do have some comments which might be interesting.

The problem is not the name_hash, but with the dir_hash.  The dir_hash
is only even populated on case-insensitive filesystems (which is why you
and Linus don't see this bug).  The dir_hash is not designed to catch
d/f conflicts, but rather case conflicts (one side of a merge has
foo/bar; the other has FOO/bar).  For each directory, it maintains a
pointer to a cache_entry.  Then add_to_index uses that cache_entry to 
rewrite incoming entries' parent directories to the expected case.
Weirdly, that part of add_to_index is, so far as I can tell, never
called during a merge.  So where's are we using the freed value?
Probably in dir_entry_cmp, while adding another entry to the hashmap;
that's where our crash seems to have happened.  And our failure depended
on the details of the hash function as well; if a certain collision did
not happen, we would not see it.

There is, I think, another way to handle this: we could *copy* the path
into the struct dir_entry instead of pointing to a cache_entry.  But
this seems like a bunch of extra work; the refcounting is simpler.  

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] merge: fix cache_entry use-after-free
  2015-10-19 22:27           ` David Turner
@ 2015-10-20  2:13             ` Junio C Hamano
  0 siblings, 0 replies; 9+ messages in thread
From: Junio C Hamano @ 2015-10-20  2:13 UTC (permalink / raw)
  To: David Turner; +Cc: René Scharfe, git, Keith McGuigan

David Turner <dturner@twopensource.com> writes:

> The problem is not the name_hash, but with the dir_hash.  The dir_hash
> is only even populated on case-insensitive filesystems (which is why you
> and Linus don't see this bug).  The dir_hash is not designed to catch
> d/f conflicts, but rather case conflicts (one side of a merge has
> foo/bar; the other has FOO/bar).  For each directory, it maintains a
> pointer to a cache_entry.  Then add_to_index uses that cache_entry to 
> rewrite incoming entries' parent directories to the expected case.
> Weirdly, that part of add_to_index is, so far as I can tell, never
> called during a merge.  So where's are we using the freed value?
> Probably in dir_entry_cmp, while adding another entry to the hashmap;
> that's where our crash seems to have happened.  And our failure depended
> on the details of the hash function as well; if a certain collision did
> not happen, we would not see it.
>
> There is, I think, another way to handle this: we could *copy* the path
> into the struct dir_entry instead of pointing to a cache_entry.  But
> this seems like a bunch of extra work; the refcounting is simpler.  

This codepath is a mess.  Whoever wrote hash_dir_entry() seems to
have already known that the code is buggy by leaving this comment
there:

 * Note that the cache_entry stored with the dir_entry merely
 * supplies the name of the directory (up to dir_entry.namelen). We
 * track the number of 'active' files in a directory in dir_entry.nr,
 * so we can tell if the directory is still relevant, e.g. for git
 * status. However, if cache_entries are removed, we cannot pinpoint
 * an exact cache_entry that's still active. It is very possible that
 * multiple dir_entries point to the same cache_entry.

If you add "a/frotz" to the index, the code will somehow create a
dir_entry to represent that the canonical case for directory "a/" is
such, so that when you later try to add "A/nitfol", the code can
grab the canonical path "a/" from dir_entry hash and turn it into
"a/nitfol".  That is what happens in add_to_index in read-cache.c

But what happens if, after adding "a/frotz" and "a/nitfol" to the
index like that, you remove "a/frotz"?  I do not see any code that
notices the poitner to "a/frotz" will become stale and replace it
with a pointer to "a/nitfol" that is still in the system.  The next
time you try to add "a/xyzzy", find_dir_entry() will try to see if
there is an existing entry that matches "a/xyzzy"'s directory, and
one of the dir_entries has a stale pointer to ce for "a/frotz" that
is already gone.

I think the root cause of the bug is the external interface to the
index_dir_exists() function.  For the above processing, there is no
reason for dir_entry() to hold a pointer to ce for "a/frotz" or
"a/nitfol".  All it needs to have is an embedded string in the
structure itself, i.e.

        struct dir_entry {
                struct hashmap_entry ent;
                struct dir_entry *parent;
                int nr;
                unsigned int namelen;
                char name[FLEX_ARRAY];
        };

The caller of index_dir_exists() in add_to_index() can be adjusted
to work with a new function in name-hash.c that does this part:

	const char *startPtr = ce->name;
	const char *ptr = startPtr;
	while (*ptr) {
		while (*ptr && *ptr != '/')
			++ptr;
		if (*ptr == '/') {
			struct cache_entry *foundce;
			++ptr;
			foundce = index_dir_exists(istate, ce->name, ptr - ce->name - 1);
			if (foundce) {
				memcpy((void *)startPtr, foundce->name + (startPtr - ce->name), ptr - startPtr);
				startPtr = ptr;
			}
		}
	}

Perhaps name it "adjust_dirname_case(istate, ce->name)" and have it
do the whole while loop above, all inside name-hash.c.  That would
make this caller a lot easier to read and understand what is going
on.

The other one, directory_exists_in_index_icase() in dir.c, expects
that a ce is returned, but the way it uses the returned value does
not really require the function to return a ce.  It does look at the
ce->ce_mode but that is only because the way index_dir_exists()
tells its caller if there is a directory (hence some files inside
it) or if there is a submodule (hence there is nothing below it) by
making a fallback call internally to index_file_exists() and returns
the ce for gitlink only when (1) there is no directory in dir_hash
and (2) file_hash as a submodule in that path.

A more cleaner helper function to give what this caller really wants
to know that name-hash.c can provide would be a function that takes
pathname and len and returns an enum: "there is nothing there",
"there is a directory", or "there is a submodule".  For completeness
of the API, even though this sole caller does not need it, we could
throw in "there is a regular file" there, too, if we wanted to.

If I were to clean this up, I would probably use the above updated
definition of struct dir_entry and:

 * drop the fallback "return gitlink if there is one" from
   index_dir_exists(), and make index_dir_exists() return 0 (false)
   or 1 (true).

 * have directory_exists_in_index() first call the updated
   index_dir_exists() via the cache_dir_exists() convenience macro.
   If it returned true, it should return index_direcgtory.  If it
   returned false, make a call to cache_file_exists() to see if
   there is a gitlink and rturn index_gitdir when that is the case.
   When all of the above fails, return index_nonexistent.

That way, we do not have to worry about "this ce no longer exists in
the main index but we need to keep it around", saving 8-bytes from
each ce and refcounting mess in the code.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-10-20  2:14 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-14 22:07 [PATCH v3] merge: fix cache_entry use-after-free David Turner
2015-10-15  3:35 ` René Scharfe
2015-10-15 19:02   ` David Turner
2015-10-15 20:38     ` René Scharfe
2015-10-15 20:51     ` Junio C Hamano
2015-10-16  7:05       ` David Turner
2015-10-16 16:04         ` Junio C Hamano
2015-10-19 22:27           ` David Turner
2015-10-20  2:13             ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).