public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: William Lee Irwin III <wli@holomorphy.com>
To: linux-kernel@vger.kernel.org
Cc: riel@surriel.com
Subject: [PATCH] per-page pte_chain locking
Date: Thu, 18 Apr 2002 15:09:44 -0700	[thread overview]
Message-ID: <20020418220944.GY21206@holomorphy.com> (raw)

This is basically the same patch as before with a correction to some
code in page_add_rmap() under a #ifdef DEBUG_RMAP (whose bug I
introduced myself).

riel, please apply.


Cheers,
Bill


# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.404   -> 1.406  
#	           mm/rmap.c	1.3     -> 1.5    
#	  include/linux/mm.h	1.39    -> 1.40   
#	         mm/vmscan.c	1.61    -> 1.62   
#	        mm/filemap.c	1.66    -> 1.67   
#	           mm/swap.c	1.18    -> 1.19   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/04/17	wli@elm3b52.eng.beaverton.ibm.com	1.405
# mm.h:
#   Add pte_chain's lock bit and locking routines to the list of page flag accessor declarations.
# vmscan.c:
#   Protect accesses to page->pte_chain with the pte_chain_lock
# rmap.c:
#   Convert reverse-mapping helper routines to using per-page pte_chain locks, with comment updates.
# swap.c:
#   Protect against concurrent modification of page->pte_chain in drop_page()
# filemap.c:
#   truncate_complete_page() requires eliminating all references arising from ->pte_chain; if it is non-NULL, BUG()
# --------------------------------------------
# 02/04/18	wli@holomorphy.com	1.406
# rmap.c:
#   Repair the DEBUG_RMAP code in page_add_rmap() so it doesn't deadlock.
# --------------------------------------------
#
diff -Nru a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h	Thu Apr 18 08:17:26 2002
+++ b/include/linux/mm.h	Thu Apr 18 08:17:26 2002
@@ -299,6 +299,7 @@
 #define PG_arch_1		13
 #define PG_reserved		14
 #define PG_launder		15	/* written out by VM pressure.. */
+#define PG_chainlock		16	/* lock bit for ->pte_chain */
 
 /* Make it prettier to test the above... */
 #define UnlockPage(page)	unlock_page(page)
@@ -315,6 +316,20 @@
 #define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
 #define PageLaunder(page)	test_bit(PG_launder, &(page)->flags)
 #define SetPageLaunder(page)	set_bit(PG_launder, &(page)->flags)
+
+/*
+ * inlines for acquisition and release of PG_chainlock
+ */
+static inline void pte_chain_lock(struct page *page)
+{
+	while (test_and_set_bit(PG_chainlock, &page->flags))
+		cpu_relax();
+}
+
+static inline void pte_chain_unlock(struct page *page)
+{
+	clear_bit(PG_chainlock, &page->flags);
+}
 
 /*
  * The zone field is never updated after free_area_init_core()
diff -Nru a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c	Thu Apr 18 08:17:26 2002
+++ b/mm/filemap.c	Thu Apr 18 08:17:26 2002
@@ -233,8 +233,14 @@
 
 static void truncate_complete_page(struct page *page)
 {
-	/* Leave it on the LRU if it gets converted into anonymous buffers */
-	if (!page->pte_chain && (!page->buffers || do_flushpage(page, 0)))
+	/*
+	 * Leave it on the LRU if it gets converted into anonymous buffers
+	 * It is furthermore assumed that no process has mapped this page.
+	 */
+	if (page->pte_chain)
+		BUG();
+
+	if (!page->buffers || do_flushpage(page, 0))
 		lru_cache_del(page);
 
 	/*
diff -Nru a/mm/rmap.c b/mm/rmap.c
--- a/mm/rmap.c	Thu Apr 18 08:17:26 2002
+++ b/mm/rmap.c	Thu Apr 18 08:17:26 2002
@@ -13,9 +13,9 @@
 
 /*
  * Locking:
- * - the page->pte_chain is protected by the pagemap_lru_lock,
- *   we probably want to change this to a per-page lock in the
- *   future
+ * - the page->pte_chain is protected by the PG_chainlock bit,
+ *   which nests within the pagemap_lru_lock, then the
+ *   mm->page_table_lock, and then the page lock.
  * - because swapout locking is opposite to the locking order
  *   in the page fault path, the swapout path uses trylocks
  *   on the mm->page_table_lock
@@ -59,7 +59,7 @@
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of processes which referenced the page.
- * Caller needs to hold the pagemap_lru_lock.
+ * Caller needs to hold the pte_chain_lock.
  */
 int FASTCALL(page_referenced(struct page *));
 int page_referenced(struct page * page)
@@ -91,14 +91,7 @@
 void page_add_rmap(struct page * page, pte_t * ptep)
 {
 	struct pte_chain * pte_chain;
-	zone_t *zone;
-
-	if (!VALID_PAGE(page) || PageReserved(page))
-		return;
 
-	zone = page_zone(page);
-
-	spin_lock(&pagemap_lru_lock);
 #ifdef DEBUG_RMAP
 	if (!page || !ptep)
 		BUG();
@@ -106,6 +99,10 @@
 		BUG();
 	if (!ptep_to_mm(ptep));
 		BUG();
+	if (!VALID_PAGE(page) || PageReserved(page))
+		return;
+
+	pte_chain_lock(page);
 	{
 		struct pte_chain * pc;
 		for (pc = page->pte_chain; pc; pc = pc->next) {
@@ -113,15 +110,22 @@
 				BUG();
 		}
 	}
+	pte_chain_unlock(page);
 #endif
-	pte_chain = pte_chain_alloc(zone);
+
+	if (!VALID_PAGE(page) || PageReserved(page))
+		return;
+
+	pte_chain = pte_chain_alloc(page_zone(page));
+
+	pte_chain_lock(page);
 
 	/* Hook up the pte_chain to the page. */
 	pte_chain->ptep = ptep;
 	pte_chain->next = page->pte_chain;
 	page->pte_chain = pte_chain;
 
-	spin_unlock(&pagemap_lru_lock);
+	pte_chain_unlock(page);
 }
 
 /**
@@ -147,7 +151,7 @@
 
 	zone = page_zone(page);
 
-	spin_lock(&pagemap_lru_lock);
+	pte_chain_lock(page);
 	for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) {
 		if (pc->ptep == ptep) {
 			pte_chain_free(pc, prev_pc, page, zone);
@@ -165,7 +169,7 @@
 #endif
 
 out:
-	spin_unlock(&pagemap_lru_lock);
+	pte_chain_unlock(page);
 	return;
 			
 }
@@ -181,7 +185,8 @@
  * Locking:
  *	pagemap_lru_lock		page_launder()
  *	    page lock			page_launder(), trylock
- *		mm->page_table_lock	try_to_unmap_one(), trylock
+ *		pte_chain_lock		page_launder()
+ *		    mm->page_table_lock	try_to_unmap_one(), trylock
  */
 int FASTCALL(try_to_unmap_one(struct page *, pte_t *));
 int try_to_unmap_one(struct page * page, pte_t * ptep)
@@ -300,7 +305,7 @@
  * is over its RSS (resident set size) limit.  For shared pages
  * we penalise it only if all processes using it are over their
  * rss limits.
- * The caller needs to hold the pagemap_lru_lock.
+ * The caller needs to hold the page's pte_chain_lock.
  */
 int FASTCALL(page_over_rsslimit(struct page *));
 int page_over_rsslimit(struct page * page)
@@ -364,6 +369,7 @@
  * This function unlinks pte_chain from the singly linked list it
  * may be on and adds the pte_chain to the free list. May also be
  * called for new pte_chain structures which aren't on any list yet.
+ * Caller needs to hold the pte_chain_lock if the page is non-NULL.
  */
 static inline void pte_chain_free(struct pte_chain * pte_chain,
 		struct pte_chain * prev_pte_chain, struct page * page,
@@ -385,6 +391,7 @@
  *
  * Returns a pointer to a fresh pte_chain structure. Allocates new
  * pte_chain structures as required.
+ * Caller needs to hold the page's pte_chain_lock.
  */
 static inline struct pte_chain * pte_chain_alloc(zone_t * zone)
 {
diff -Nru a/mm/swap.c b/mm/swap.c
--- a/mm/swap.c	Thu Apr 18 08:17:26 2002
+++ b/mm/swap.c	Thu Apr 18 08:17:26 2002
@@ -104,6 +104,7 @@
 	}
 
 	/* Make sure the page really is reclaimable. */
+	pte_chain_lock(page);
 	if (!page->mapping || PageDirty(page) || page->pte_chain ||
 			page->buffers || page_count(page) > 1)
 		deactivate_page_nolock(page);
@@ -119,6 +120,7 @@
 			add_page_to_inactive_clean_list(page);
 		}
 	}
+	pte_chain_unlock(page);
 }
 
 /*
diff -Nru a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	Thu Apr 18 08:17:26 2002
+++ b/mm/vmscan.c	Thu Apr 18 08:17:26 2002
@@ -47,6 +47,7 @@
 	page->age -= min(PAGE_AGE_DECL, (int)page->age);
 }
 
+/* Must be called with page's pte_chain_lock held. */
 static inline int page_mapping_inuse(struct page * page)
 {
 	struct address_space * mapping = page->mapping;
@@ -108,14 +109,21 @@
 		}
 
 		/* Page cannot be reclaimed ?  Move to inactive_dirty list. */
+		pte_chain_lock(page);
 		if (unlikely(page->pte_chain || page->buffers ||
 				PageReferenced(page) || PageDirty(page) ||
 				page_count(page) > 1 || TryLockPage(page))) {
 			del_page_from_inactive_clean_list(page);
 			add_page_to_inactive_dirty_list(page);
+			pte_chain_unlock(page);
 			continue;
 		}
 
+		/*
+		 * From here until reaching either the bottom of the loop
+		 * or found_page: the pte_chain_lock is held.
+		 */
+
 		/* OK, remove the page from the caches. */
                 if (PageSwapCache(page)) {
 			entry.val = page->index;
@@ -132,6 +140,7 @@
 		printk(KERN_ERR "VM: reclaim_page, found unknown page\n");
 		list_del(page_lru);
 		zone->inactive_clean_pages--;
+		pte_chain_unlock(page);
 		UnlockPage(page);
 	}
 	spin_unlock(&pagecache_lock);
@@ -141,6 +150,7 @@
 
 found_page:
 	del_page_from_inactive_clean_list(page);
+	pte_chain_unlock(page);
 	spin_unlock(&pagecache_lock);
 	spin_unlock(&pagemap_lru_lock);
 	if (entry.val)
@@ -243,13 +253,16 @@
 		 * The page is in active use or really unfreeable. Move to
 		 * the active list and adjust the page age if needed.
 		 */
+		pte_chain_lock(page);
 		if (page_referenced(page) && page_mapping_inuse(page) &&
 				!page_over_rsslimit(page)) {
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_active_list(page);
 			page->age = max((int)page->age, PAGE_AGE_START);
+			pte_chain_unlock(page);
 			continue;
 		}
+		pte_chain_unlock(page);
 
 		/*
 		 * Page is being freed, don't worry about it.
@@ -270,8 +283,10 @@
 		 *
 		 * XXX: implement swap clustering ?
 		 */
+		pte_chain_lock(page);
 		if (page->pte_chain && !page->mapping && !page->buffers) {
 			page_cache_get(page);
+			pte_chain_unlock(page);
 			spin_unlock(&pagemap_lru_lock);
 			if (!add_to_swap(page)) {
 				activate_page(page);
@@ -282,24 +297,28 @@
 			}
 			page_cache_release(page);
 			spin_lock(&pagemap_lru_lock);
-		}
+		} else
+			pte_chain_unlock(page);
 
 		/*
 		 * The page is mapped into the page tables of one or more
 		 * processes. Try to unmap it here.
 		 */
+		pte_chain_lock(page);
 		if (page->pte_chain) {
 			switch (try_to_unmap(page)) {
 				case SWAP_ERROR:
 				case SWAP_FAIL:
 					goto page_active;
 				case SWAP_AGAIN:
+					pte_chain_unlock(page);
 					UnlockPage(page);
 					continue;
 				case SWAP_SUCCESS:
 					; /* try to free the page below */
 			}
 		}
+		pte_chain_unlock(page);
 
 		if (PageDirty(page) && page->mapping) {
 			/*
@@ -386,10 +405,12 @@
 		 * This test is not safe from races, but only the one
 		 * in reclaim_page() needs to be.
 		 */
+		pte_chain_lock(page);
 		if (page->mapping && !PageDirty(page) && !page->pte_chain &&
 				page_count(page) == 1) {
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_inactive_clean_list(page);
+			pte_chain_unlock(page);
 			UnlockPage(page);
 			cleaned_pages++;
 		} else {
@@ -401,6 +422,7 @@
 page_active:
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_active_list(page);
+			pte_chain_unlock(page);
 			UnlockPage(page);
 		}
 	}
@@ -471,8 +493,12 @@
 		 * If the object the page is in is not in use we don't
 		 * bother with page aging.  If the page is touched again
 		 * while on the inactive_clean list it'll be reactivated.
+		 * From here until the end of the current iteration
+		 * the pte_chain_lock is held.
 		 */
+		pte_chain_lock(page);
 		if (!page_mapping_inuse(page)) {
+			pte_chain_unlock(page);
 			drop_page(page);
 			continue;
 		}
@@ -496,9 +522,12 @@
 			list_add(page_lru, &zone->active_list);
 		} else {
 			deactivate_page_nolock(page);
-			if (++nr_deactivated > target)
-				break;
+			if (++nr_deactivated > target) {
+				pte_chain_unlock(page);
+				goto done;
+			}
 		}
+		pte_chain_unlock(page);
 
 		/* Low latency reschedule point */
 		if (current->need_resched) {
@@ -507,6 +536,8 @@
 			spin_lock(&pagemap_lru_lock);
 		}
 	}
+
+done:
 	spin_unlock(&pagemap_lru_lock);
 
 	return nr_deactivated;

             reply	other threads:[~2002-04-18 22:10 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-04-18 22:09 William Lee Irwin III [this message]
  -- strict thread matches above, loose matches on Subject: below --
2002-04-18  4:36 [PATCH] per-page pte_chain locking William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20020418220944.GY21206@holomorphy.com \
    --to=wli@holomorphy.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=riel@surriel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox