All of lore.kernel.org
 help / color / mirror / Atom feed
From: kanoj@google.engr.sgi.com (Kanoj Sarcar)
To: Andrea Arcangeli <andrea@suse.de>
Cc: Ben LaHaise <bcrl@redhat.com>,
	riel@nl.linux.org, Linus Torvalds <torvalds@transmeta.com>,
	linux-mm@kvack.org
Subject: Re: [patch] take 2 Re: PG_swap_entry bug in recent kernels
Date: Fri, 7 Apr 2000 13:12:11 -0700 (PDT)	[thread overview]
Message-ID: <200004072012.NAA10407@google.engr.sgi.com> (raw)
In-Reply-To: <Pine.LNX.4.21.0004071205300.737-100000@alpha.random> from "Andrea Arcangeli" at Apr 07, 2000 12:45:13 PM

Andrea, 

The swaplist locking changes in swapfile.c in your patch are okay, but
are unneeded when you consider the kernel_lock is already held in most
of those paths. (Complete removal of kernel_lock in those paths are a
little harder, at least when last I tried it). A bigger problem might
be that you are violating lock orders when you grab the vmlist_lock
from inside code that already has tasklist_lock in readmode (your
change in unuse_process()). I may be wrong, so you should try stress
testing with swapdevice removal with a large number of runnable
processes.

Also, did you have a good reason to want to make lookup_swap_cache()
invoke find_get_page(), and not find_lock_page()? I coded some of the 
MP race fixes with the swap cache, some of the logic is in 
Documentation/vm/locking. I remember some intense reasoning and
thinking of obscure conditions, so I am just cautious about any
locking changes.

Kanoj

> --- ref/mm/swap_state.c	Thu Apr  6 01:00:52 2000
> +++ swap-entry-1/mm/swap_state.c	Fri Apr  7 12:29:00 2000
> @@ -126,9 +126,14 @@
>  		UnlockPage(page);
>  	}
>  
> -	ClearPageSwapEntry(page);
> -
> -	__free_page(page);
> +	/*
> +	 * Only the last unmap have to lose the swap entry
> +	 * information that we have cached into page->index.
> +	 */
> +	if (put_page_testzero(page)) {
> +		page->flags &= ~(1UL << PG_swap_entry);
> +		__free_pages_ok(page, 0);
> +	}
>  }
>  
>  
> @@ -151,7 +156,7 @@
>  		 * Right now the pagecache is 32-bit only.  But it's a 32 bit index. =)
>  		 */
>  repeat:
> -		found = find_lock_page(&swapper_space, entry.val);
> +		found = find_get_page(&swapper_space, entry.val);
>  		if (!found)
>  			return 0;
>  		/*
> @@ -163,7 +168,6 @@
>  		 * is enough to check whether the page is still in the scache.
>  		 */
>  		if (!PageSwapCache(found)) {
> -			UnlockPage(found);
>  			__free_page(found);
>  			goto repeat;
>  		}
> @@ -172,13 +176,11 @@
>  #ifdef SWAP_CACHE_INFO
>  		swap_cache_find_success++;
>  #endif
> -		UnlockPage(found);
>  		return found;
>  	}
>  
>  out_bad:
>  	printk (KERN_ERR "VM: Found a non-swapper swap page!\n");
> -	UnlockPage(found);
>  	__free_page(found);
>  	return 0;
>  }
> diff -urN ref/mm/swapfile.c swap-entry-1/mm/swapfile.c
> --- ref/mm/swapfile.c	Thu Apr  6 01:00:52 2000
> +++ swap-entry-1/mm/swapfile.c	Fri Apr  7 12:35:59 2000
> @@ -212,22 +212,22 @@
>  
>  	/* We have the old entry in the page offset still */
>  	if (!page->index)
> -		goto new_swap_entry;
> +		goto null_swap_entry;
>  	entry.val = page->index;
>  	type = SWP_TYPE(entry);
>  	if (type >= nr_swapfiles)
> -		goto new_swap_entry;
> +		goto bad_nofile;
> +	swap_list_lock();
>  	p = type + swap_info;
>  	if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
> -		goto new_swap_entry;
> +		goto unlock_list;
>  	offset = SWP_OFFSET(entry);
>  	if (offset >= p->max)
> -		goto new_swap_entry;
> +		goto bad_offset;
>  	/* Has it been re-used for something else? */
> -	swap_list_lock();
>  	swap_device_lock(p);
>  	if (p->swap_map[offset])
> -		goto unlock_new_swap_entry;
> +		goto unlock;
>  
>  	/* We're cool, we can just use the old one */
>  	p->swap_map[offset] = 1;
> @@ -236,11 +236,24 @@
>  	swap_list_unlock();
>  	return entry;
>  
> -unlock_new_swap_entry:
> +unlock:
>  	swap_device_unlock(p);
> +unlock_list:
>  	swap_list_unlock();
> +clear_swap_entry:
> +	ClearPageSwapEntry(page);
>  new_swap_entry:
>  	return get_swap_page();
> +
> +null_swap_entry:
> +	printk(KERN_WARNING __FUNCTION__ " null swap entry\n");
> +	goto clear_swap_entry;
> +bad_nofile:
> +	printk(KERN_WARNING __FUNCTION__ " nonexistent swap file\n");
> +	goto clear_swap_entry;
> +bad_offset:
> +	printk(KERN_WARNING __FUNCTION__ " bad offset\n");
> +	goto unlock_list;
>  }
>  
>  /*
> @@ -263,8 +276,11 @@
>  		/* If this entry is swap-cached, then page must already
>                     hold the right address for any copies in physical
>                     memory */
> -		if (pte_page(pte) != page)
> +		if (pte_page(pte) != page) {
> +			if (page->index == entry.val)
> +				ClearPageSwapEntry(page);
>  			return;
> +		}
>  		/* We will be removing the swap cache in a moment, so... */
>  		set_pte(dir, pte_mkdirty(pte));
>  		return;
> @@ -358,10 +374,20 @@
>  	 */
>  	if (!mm)
>  		return;
> +	/*
> +	 * Avoid the vmas to go away from under us
> +	 * and also avoids the task to play with
> +	 * pagetables while we're running. If the
> +	 * vmlist_modify_lock wouldn't acquire the
> +	 * mm->page_table_lock spinlock we should
> +	 * acquire it by hand.
> +	 */
> +	vmlist_access_lock(mm);
>  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
>  		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
>  		unuse_vma(vma, pgd, entry, page);
>  	}
> +	vmlist_access_unlock(mm);
>  	return;
>  }
>  
> @@ -418,8 +444,10 @@
>  		shm_unuse(entry, page);
>  		/* Now get rid of the extra reference to the temporary
>                     page we've been using. */
> -		if (PageSwapCache(page))
> +		if (PageSwapCache(page)) {
>  			delete_from_swap_cache(page);
> +			ClearPageSwapEntry(page);
> +		}
>  		__free_page(page);
>  		/*
>  		 * Check for and clear any overflowed swap map counts.
> @@ -488,8 +516,8 @@
>  		swap_list.next = swap_list.head;
>  	}
>  	nr_swap_pages -= p->pages;
> -	swap_list_unlock();
>  	p->flags = SWP_USED;
> +	swap_list_unlock();
>  	err = try_to_unuse(type);
>  	if (err) {
>  		/* re-insert swap space back into swap_list */
> 
> Andrea
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux.eu.org/Linux-MM/
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

  parent reply	other threads:[~2000-04-07 20:12 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2000-04-03 22:22 PG_swap_entry bug in recent kernels Ben LaHaise
2000-04-04 15:06 ` Andrea Arcangeli
2000-04-04 15:46   ` Rik van Riel
2000-04-04 16:50     ` Andrea Arcangeli
2000-04-04 17:06       ` Ben LaHaise
2000-04-04 18:03         ` Andrea Arcangeli
2000-04-06 22:11           ` [patch] take 2 " Ben LaHaise
2000-04-07 10:45             ` Andrea Arcangeli
2000-04-07 11:29               ` Rik van Riel
2000-04-07 12:00                 ` Andrea Arcangeli
2000-04-07 12:54                   ` Rik van Riel
2000-04-07 13:14                     ` Andrea Arcangeli
2000-04-07 20:12               ` Kanoj Sarcar [this message]
2000-04-07 23:26                 ` Andrea Arcangeli
2000-04-08  0:11                   ` Kanoj Sarcar
2000-04-08  0:37                     ` Kanoj Sarcar
2000-04-08 13:20                       ` Andrea Arcangeli
2000-04-08 21:39                         ` Kanoj Sarcar
2000-04-08 23:02                           ` Andrea Arcangeli
2000-04-08 23:18                             ` Kanoj Sarcar
2000-04-08 23:58                               ` Andrea Arcangeli
2000-04-08 13:30                     ` Andrea Arcangeli
2000-04-08 17:39                       ` Andrea Arcangeli
2000-04-07 23:54                 ` Andrea Arcangeli
2000-04-08  0:15                   ` Kanoj Sarcar
2000-04-08 13:14                     ` Andrea Arcangeli
2000-04-08 21:47                       ` Kanoj Sarcar
2000-04-08 23:10                         ` Andrea Arcangeli
2000-04-08 23:21                           ` Kanoj Sarcar
2000-04-08 23:39                             ` Andrea Arcangeli
2000-04-09  0:40                               ` Kanoj Sarcar
2000-04-10  8:55                                 ` andrea
2000-04-11  2:45                                   ` Kanoj Sarcar
2000-04-11 16:22                                     ` Andrea Arcangeli
2000-04-11 17:40                                       ` Rik van Riel
2000-04-11 18:20                                         ` Kanoj Sarcar
2000-04-21 18:23                                         ` Andrea Arcangeli
2000-04-21 21:00                                           ` Rik van Riel
2000-04-22  1:12                                             ` Andrea Arcangeli
2000-04-22  1:51                                               ` Linus Torvalds
2000-04-22 18:29                                                 ` Rik van Riel
2000-04-22 19:58                                                   ` Linus Torvalds
2000-04-11 18:26                                       ` Kanoj Sarcar
2000-04-10 19:10                         ` Stephen C. Tweedie
2000-04-08  0:04                 ` Andrea Arcangeli
     [not found] <yttem7xstk2.fsf@vexeta.dc.fi.udc.es>
2000-04-23  0:52 ` Andrea Arcangeli
     [not found] <yttk8ho26s8.fsf@vexeta.dc.fi.udc.es>
2000-04-23 16:07 ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200004072012.NAA10407@google.engr.sgi.com \
    --to=kanoj@google.engr.sgi.com \
    --cc=andrea@suse.de \
    --cc=bcrl@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=riel@nl.linux.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.