Re: 2.6.16rc1-git4 slab corruption.

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Pekka Enberg <penberg@cs.helsinki.fi>
To: manfred@colorfullife.com
Cc: Chris Mason <mason@suse.com>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	Dave Jones <davej@redhat.com>
Subject: Re: 2.6.16rc1-git4 slab corruption.
Date: Wed, 01 Feb 2006 18:27:38 +0200	[thread overview]
Message-ID: <1138811259.8604.2.camel@localhost> (raw)
In-Reply-To: <20060201160921.GC5875@redhat.com>

On 2/1/06, Dave Jones <davej@redhat.com> wrote:
> > > Manfred had a nice 'check all slabs before they're freed' patch, which might
> > > be worth resurrecting for some tests. It may be that we're corrupting rarely
> > > free'd slabs, making them hard to hit.

On Wed, Feb 01, 2006 at 09:27:02AM +0200, Pekka Enberg wrote:
> > Do you know where I can find that patch? I would like to try to sneak
> > that past Andrew. It seems silly not to have these useful slab
> > debugging patches within mainline.

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> Here's the last version that I had that was rediffed against
> 2.6.13 or .14 (I forget which, it's been a while since I used it).

Thanks Dave. Manfred, is there are reason this wasn't merged with
mainline? Needs bit of cleanup but seems useful for detecting slab
corruption early.

			Pekka

>  
> diff -urNp --exclude-from=/home/davej/.exclude linux-1000/mm/slab.c linux-1010/mm/slab.c
> --- linux-1000/mm/slab.c
> +++ linux-1010/mm/slab.c
> @@ -189,7 +189,7 @@
>   */
>  
>  #define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
> -#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
> +#define BUFCTL_ALLOC	(((kmem_bufctl_t)(~0U))-1)
>  #define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
>  
>  /* Max number of objs-per-slab for caches which use off-slab slabs.
> @@ -355,6 +355,7 @@ struct kmem_cache_s {
>  #if DEBUG
>  	int			dbghead;
>  	int			reallen;
> +	unsigned long		redzonetest;
>  #endif
>  };
>  
> @@ -370,6 +371,7 @@ struct kmem_cache_s {
>   */
>  #define REAPTIMEOUT_CPUC	(2*HZ)
>  #define REAPTIMEOUT_LIST3	(4*HZ)
> +#define REDZONETIMEOUT		(300*HZ)
>  
>  #if STATS
>  #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
> @@ -1446,7 +1448,11 @@ next:
>  	} 
>  
>  	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
> -					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
> +					((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
> +#if DEBUG
> +	cachep->redzonetest = jiffies + REDZONETIMEOUT +
> +					((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
> +#endif
>  
>  	/* Need the semaphore to access the chain. */
>  	down(&cache_chain_sem);
> @@ -2043,7 +2049,7 @@ retry:
>  			slabp->inuse++;
>  			next = slab_bufctl(slabp)[slabp->free];
>  #if DEBUG
> -			slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
> +			slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
>  #endif
>  		       	slabp->free = next;
>  		}
> @@ -2181,7 +2187,7 @@ static void free_block(kmem_cache_t *cac
>  		objnr = (objp - slabp->s_mem) / cachep->objsize;
>  		check_slabp(cachep, slabp);
>  #if DEBUG
> -		if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
> +		if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
>  			printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n",
>  						cachep->name, objp);
>  			BUG();
> @@ -2409,7 +2415,7 @@ got_slabp:
>  	slabp->inuse++;
>  	next = slab_bufctl(slabp)[slabp->free];
>  #if DEBUG
> -	slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
> +	slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
>  #endif
>  	slabp->free = next;
>  	check_slabp(cachep, slabp);
> @@ -2615,6 +2621,94 @@ unsigned int kmem_cache_size(kmem_cache_
>  
>  EXPORT_SYMBOL(kmem_cache_size);
>  
> +#if DEBUG
> +static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
> +{
> +	int i;
> +
> +	if (!(cachep->flags & SLAB_RED_ZONE))
> +		return;	/* no redzone data to check */
> +
> +#if CONFIG_DEBUG_PAGEALLOC
> +	/* Page alloc debugging on for this cache. Mapping & Unmapping happens
> +	 * without any locking, thus parallel checks are impossible.
> +	 */
> +	if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
> +		return;
> +#endif
> +
> +	for (i=0;i<cachep->num;i++) {
> +		void *objp = slabp->s_mem + cachep->objsize * i;
> +		unsigned long red1, red2;
> +
> +		red1 = *dbg_redzone1(cachep, objp);
> +		red2 = *dbg_redzone2(cachep, objp);
> +
> +		/* simplest case: marked as inactive */
> +		if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
> +			continue;
> +
> +		/* tricky case: if the bufctl value is BUFCTL_ALLOC, then
> +		 * the object is either allocated or somewhere in a cpu
> +		 * cache. The cpu caches are lockless and there might be
> +		 * a concurrent alloc/free call, thus we must accept random
> +		 * combinations of RED_ACTIVE and _INACTIVE
> +		 */
> +		if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
> +				(red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
> +				(red2 == RED_INACTIVE || red2 == RED_ACTIVE))
> +			continue;
> +
> +		printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%x\n",
> +				cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
> +		print_objinfo(cachep, objp, 2);
> +	}
> +}
> +
> +/*
> + * Perform a self test on all slabs from a cache
> + */
> +static void check_redzone(kmem_cache_t *cachep)
> +{
> +	struct list_head *q;
> +	struct slab *slabp;
> +
> +	check_spinlock_acquired(cachep);
> +
> +	list_for_each(q,&cachep->lists.slabs_full) {
> +		slabp = list_entry(q, struct slab, list);
> +
> +		if (slabp->inuse != cachep->num) {
> +			printk(KERN_INFO "slab %s: wrong slabp found in full slab chain at %p (%d/%d).\n",
> +					cachep->name, slabp, slabp->inuse, cachep->num);
> +		}
> +		check_slabp(cachep, slabp);
> +		check_slabuse(cachep, slabp);
> +	}
> +	list_for_each(q,&cachep->lists.slabs_partial) {
> +		slabp = list_entry(q, struct slab, list);
> +
> +		if (slabp->inuse == cachep->num || slabp->inuse == 0) {
> +			printk(KERN_INFO "slab %s: wrong slab found in partial chain at %p (%d/%d).\n",
> +					cachep->name, slabp, slabp->inuse, cachep->num);
> +		}
> +		check_slabp(cachep, slabp);
> +		check_slabuse(cachep, slabp);
> +	}
> +	list_for_each(q,&cachep->lists.slabs_free) {
> +		slabp = list_entry(q, struct slab, list);
> +
> +		if (slabp->inuse != 0) {
> +			printk(KERN_INFO "slab %s: wrong slab found in free chain at %p (%d/%d).\n",
> +					cachep->name, slabp, slabp->inuse, cachep->num);
> +		}
> +		check_slabp(cachep, slabp);
> +		check_slabuse(cachep, slabp);
> +	}
> +}
> +
> +#endif
> +
>  struct ccupdate_struct {
>  	kmem_cache_t *cachep;
>  	struct array_cache *new[NR_CPUS];
> @@ -2798,6 +2892,12 @@ static void cache_reap(void *unused)
>  
>  		drain_array_locked(searchp, ac_data(searchp), 0);
>  
> +#if DEBUG
> +		if(time_before(searchp->redzonetest, jiffies)) {
> +			searchp->redzonetest = jiffies + REDZONETIMEOUT;
> +			check_redzone(searchp);
> +		}
> +#endif
>  		if(time_after(searchp->lists.next_reap, jiffies))
>  			goto next_unlock;
>  
> 
>

next prev parent reply	other threads:[~2006-02-01 16:27 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-31 18:03 2.6.16rc1-git4 slab corruption Dave Jones
2006-01-31 19:08 ` Chris Mason
2006-01-31 22:15   ` Dave Jones
2006-02-01  7:27     ` Pekka Enberg
2006-02-01 16:09       ` Dave Jones
2006-02-01 16:27         ` Pekka Enberg [this message]
2006-02-01 16:29         ` Pekka Enberg
2006-02-01 16:38           ` Dave Jones
2006-02-01 17:09         ` Pekka Enberg
2006-02-02  5:07 ` Dave Jones
2006-02-02  7:11   ` Pekka Enberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1138811259.8604.2.camel@localhost \
    --to=penberg@cs.helsinki.fi \
    --cc=davej@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    --cc=mason@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox