public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>
To: linux-kernel <linux-kernel@vger.kernel.org>
Cc: lse-tech@lists.sourceforge.net
Subject: [PATCH] breaking up the pagemap_lru_lock in rmap
Date: Mon, 04 Mar 2002 10:04:51 -0800	[thread overview]
Message-ID: <194860000.1015265091@flay> (raw)

High contention on the pagemap_lru lock seems to be a major
scalability problem for rmap at the moment. Based on wli's and
Rik's suggestions, I've made a first cut at a patch to split up the
lock into a per-page lock for each pte_chain.

This isn't ready to go yet - I'm not going to pretend it works. I'm
looking for feedback on the approach, and any obvious blunders
I've made in coding. I plan to move the lock in the pages_struct
into the flags field to save space once it's working reliably.

If I may steal akpm's favourite disclaimer - "I know diddly squat
about ......" ;-) Flame away .....

Thanks,

Martin.

diff -urN linux-2.4.17-rmap/include/linux/mm.h linux-2.4.17-rmap-mjb/include/linux/mm.h
--- linux-2.4.17-rmap/include/linux/mm.h	Thu Feb 14 17:53:17 2002
+++ linux-2.4.17-rmap-mjb/include/linux/mm.h	Tue Feb 26 10:34:23 2002
@@ -160,6 +160,7 @@
 					   protected by pagemap_lru_lock !! */
 	unsigned char age;		/* Page aging counter. */
 	unsigned char zone;		/* Memory zone the page belongs to. */
+	spinlock_t pte_chain_lock;
 	struct pte_chain * pte_chain;	/* Reverse pte mapping pointer. */
 	struct page **pprev_hash;	/* Complement to *next_hash. */
 	struct buffer_head * buffers;	/* Buffer maps us to a disk block. */
diff -urN linux-2.4.17-rmap/mm/filemap.c linux-2.4.17-rmap-mjb/mm/filemap.c
--- linux-2.4.17-rmap/mm/filemap.c	Thu Feb 14 17:53:17 2002
+++ linux-2.4.17-rmap-mjb/mm/filemap.c	Fri Feb 15 17:23:36 2002
@@ -235,8 +235,13 @@
 static void truncate_complete_page(struct page *page)
 {
 	/* Leave it on the LRU if it gets converted into anonymous buffers */
-	if (!page->pte_chain && (!page->buffers || do_flushpage(page, 0)))
+	pte_chain_lock(page);
+	if (!page->pte_chain && (!page->buffers || do_flushpage(page, 0))) {
+		pte_chain_unlock(page);
 		lru_cache_del(page);
+	} else {
+		pte_chain_unlock(page);
+	}
 
 	/*
 	 * We remove the page from the page cache _after_ we have
diff -urN linux-2.4.17-rmap/mm/page_alloc.c linux-2.4.17-rmap-mjb/mm/page_alloc.c
--- linux-2.4.17-rmap/mm/page_alloc.c	Thu Feb 14 17:53:17 2002
+++ linux-2.4.17-rmap-mjb/mm/page_alloc.c	Tue Feb 26 10:58:42 2002
@@ -127,8 +127,10 @@
 		BUG();
 	if (PageInactiveClean(page))
 		BUG();
+	pte_chain_lock(page);
 	if (page->pte_chain)
 		BUG();
+	pte_chain_unlock(page);
 	page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
 	page->age = PAGE_AGE_START;
 	
@@ -989,6 +991,7 @@
 			struct page *page = mem_map + offset + i;
 			set_page_zone(page, pgdat->node_id * MAX_NR_ZONES + j);
 			init_page_count(page);
+			page->pte_chain_lock = SPIN_LOCK_UNLOCKED;
 			__SetPageReserved(page);
 			memlist_init(&page->list);
 			if (j != ZONE_HIGHMEM)
diff -urN linux-2.4.17-rmap/mm/rmap.c linux-2.4.17-rmap-mjb/mm/rmap.c
--- linux-2.4.17-rmap/mm/rmap.c	Thu Feb 14 17:53:17 2002
+++ linux-2.4.17-rmap-mjb/mm/rmap.c	Tue Feb 26 16:50:55 2002
@@ -13,9 +13,8 @@
 
 /*
  * Locking:
- * - the page->pte_chain is protected by the pagemap_lru_lock,
- *   we probably want to change this to a per-page lock in the
- *   future
+ * - the page->pte_chain is protected by a per-page lock.
+ *
  * - because swapout locking is opposite to the locking order
  *   in the page fault path, the swapout path uses trylocks
  *   on the mm->page_table_lock
@@ -53,13 +52,25 @@
 static inline void pte_chain_free(struct pte_chain *, struct pte_chain *, struct page *);
 static void alloc_new_pte_chains(void);
 
+/* lock the pte_chain for this page */
+inline void pte_chain_lock(struct page *page)
+{
+        spin_lock(&page->pte_chain_lock);
+}
+
+/* unlock the pte_chain for this page */
+inline void pte_chain_unlock(struct page *page)
+{
+        spin_unlock(&page->pte_chain_lock);
+}
+
 /**
  * page_referenced - test if the page was referenced
  * @page: the page to test
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of processes which referenced the page.
- * Caller needs to hold the pagemap_lru_lock.
+ * Caller needs to hold the page's pte_chain lock.
  */
 int FASTCALL(page_referenced(struct page *));
 int page_referenced(struct page * page)
@@ -95,7 +106,7 @@
 	if (!VALID_PAGE(page) || PageReserved(page))
 		return;
 
-	spin_lock(&pagemap_lru_lock);
+	pte_chain_lock(page);
 #ifdef DEBUG_RMAP
 	if (!page || !ptep)
 		BUG();
@@ -118,7 +129,7 @@
 	pte_chain->next = page->pte_chain;
 	page->pte_chain = pte_chain;
 
-	spin_unlock(&pagemap_lru_lock);
+	pte_chain_unlock(page);
 }
 
 /**
@@ -141,7 +152,7 @@
 	if (!VALID_PAGE(page) || PageReserved(page))
 		return;
 
-	spin_lock(&pagemap_lru_lock);
+	pte_chain_lock(page);
 	for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) {
 		if (pc->ptep == ptep) {
 			pte_chain_free(pc, prev_pc, page);
@@ -159,9 +170,8 @@
 #endif
 
 out:
-	spin_unlock(&pagemap_lru_lock);
+	pte_chain_unlock(page);
 	return;
-			
 }
 
 /**
@@ -240,7 +250,7 @@
  * @page: the page to get unmapped
  *
  * Tries to remove all the page table entries which are mapping this
- * page, used in the pageout path.  Caller must hold pagemap_lru_lock
+ * page, used in the pageout path.  Caller must hold pte_chain_lock
  * and the page lock.  Return values are:
  *
  * SWAP_SUCCESS	- we succeeded in removing all mappings
@@ -294,7 +304,7 @@
  * we make the optimisation of only checking the first process
  * in the pte_chain list, this should catch hogs while not
  * evicting pages shared by many processes.
- * The caller needs to hold the pagemap_lru_lock.
+ * The caller needs to hold the page's pte_chain lock
  */
 int FASTCALL(page_over_rsslimit(struct page *));
 int page_over_rsslimit(struct page * page)
@@ -322,7 +332,7 @@
  * This function unlinks pte_chain from the singly linked list it
  * may be on and adds the pte_chain to the free list. May also be
  * called for new pte_chain structures which aren't on any list yet.
- * Caller needs to hold the pagemap_lru_list.
+ * Caller needs to hold the page's pte_chain lock if page is not NULL
  */
 static inline void pte_chain_free(struct pte_chain * pte_chain, struct pte_chain * prev_pte_chain, struct page * page)
 {
@@ -341,7 +351,7 @@
  *
  * Returns a pointer to a fresh pte_chain structure. Allocates new
  * pte_chain structures as required.
- * Caller needs to hold the pagemap_lru_lock.
+ * Caller needs to hold the page's pte_chain lock
  */
 static inline struct pte_chain * pte_chain_alloc(void)
 {
diff -urN linux-2.4.17-rmap/mm/swap.c linux-2.4.17-rmap-mjb/mm/swap.c
--- linux-2.4.17-rmap/mm/swap.c	Thu Feb 14 17:53:17 2002
+++ linux-2.4.17-rmap-mjb/mm/swap.c	Fri Feb 15 17:35:53 2002
@@ -106,12 +106,14 @@
 		UnlockPage(page);
 	}
 
+	pte_chain_lock(page);
 	/* Make sure the page really is reclaimable. */
 	if (!page->mapping || PageDirty(page) || page->pte_chain ||
 			page->buffers || page_count(page) > 1)
 		deactivate_page_nolock(page);
 
 	else if (page_count(page) == 1) {
+		pte_chain_unlock(page);
 		ClearPageReferenced(page);
 		page->age = 0;
 		if (PageActive(page)) {
@@ -121,6 +123,8 @@
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_inactive_clean_list(page);
 		}
+	} else {
+		pte_chain_unlock(page);
 	}
 }
 
diff -urN linux-2.4.17-rmap/mm/vmscan.c linux-2.4.17-rmap-mjb/mm/vmscan.c
--- linux-2.4.17-rmap/mm/vmscan.c	Thu Feb 14 17:53:17 2002
+++ linux-2.4.17-rmap-mjb/mm/vmscan.c	Fri Feb 15 17:14:30 2002
@@ -48,6 +48,7 @@
 	page->age -= min(PAGE_AGE_DECL, (int)page->age);
 }
 
+/* Must be called with the page's pte_chain lock held */
 static inline int page_mapping_inuse(struct page * page)
 {
 	struct address_space * mapping = page->mapping;
@@ -109,13 +110,16 @@
 		}
 
 		/* Page cannot be reclaimed ?  Move to inactive_dirty list. */
+		pte_chain_lock(page);
 		if (unlikely(page->pte_chain || page->buffers ||
 				PageReferenced(page) || PageDirty(page) ||
 				page_count(page) > 1 || TryLockPage(page))) {
 			del_page_from_inactive_clean_list(page);
 			add_page_to_inactive_dirty_list(page);
+			pte_chain_unlock(page);
 			continue;
 		}
+		pte_chain_unlock(page);
 
 		/* OK, remove the page from the caches. */
                 if (PageSwapCache(page)) {
@@ -239,6 +243,7 @@
 			continue;
 		}
 
+		pte_chain_lock(page);
 		/*
 		 * The page is in active use or really unfreeable. Move to
 		 * the active list and adjust the page age if needed.
@@ -248,21 +253,26 @@
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_active_list(page);
 			page->age = max((int)page->age, PAGE_AGE_START);
+			pte_chain_unlock(page);
 			continue;
 		}
 
 		/*
 		 * Page is being freed, don't worry about it.
 		 */
-		if (unlikely(page_count(page)) == 0)
+		if (unlikely(page_count(page)) == 0) {
+			pte_chain_unlock(page);
 			continue;
+		}
 
 		/*
 		 * The page is locked. IO in progress?
 		 * Move it to the back of the list.
 		 */
-		if (unlikely(TryLockPage(page)))
+		if (unlikely(TryLockPage(page))) {
+			pte_chain_unlock(page);
 			continue;
+		}
 
 		/*
 		 * Anonymous process memory without backing store. Try to
@@ -272,6 +282,7 @@
 		 */
 		if (page->pte_chain && !page->mapping && !page->buffers) {
 			page_cache_get(page);
+			pte_chain_unlock(page);
 			spin_unlock(&pagemap_lru_lock);
 			if (!add_to_swap(page)) {
 				activate_page(page);
@@ -282,6 +293,7 @@
 			}
 			page_cache_release(page);
 			spin_lock(&pagemap_lru_lock);
+			pte_chain_lock(page);
 		}
 
 		/*
@@ -295,6 +307,7 @@
 					goto page_active;
 				case SWAP_AGAIN:
 					UnlockPage(page);
+					pte_chain_unlock(page);
 					continue;
 				case SWAP_SUCCESS:
 					; /* try to free the page below */
@@ -319,6 +332,7 @@
 				page_cache_get(page);
 				spin_unlock(&pagemap_lru_lock);
 
+				pte_chain_unlock(page);
 				writepage(page);
 				page_cache_release(page);
 
@@ -348,6 +362,7 @@
 					 */
 					spin_lock(&pagemap_lru_lock);
 					UnlockPage(page);
+					pte_chain_unlock(page);
 					__lru_cache_del(page);
 
 					/* effectively free the page here */
@@ -369,6 +384,7 @@
 			} else {
 				/* failed to drop the buffers so stop here */
 				UnlockPage(page);
+				pte_chain_unlock(page);
 				page_cache_release(page);
 
 				spin_lock(&pagemap_lru_lock);
@@ -403,6 +419,7 @@
 			add_page_to_active_list(page);
 			UnlockPage(page);
 		}
+		pte_chain_unlock(page);
 	}
 	spin_unlock(&pagemap_lru_lock);
 
@@ -473,7 +490,9 @@
 		 * bother with page aging.  If the page is touched again
 		 * while on the inactive_clean list it'll be reactivated.
 		 */
+		pte_chain_lock(page);
 		if (!page_mapping_inuse(page)) {
+			pte_chain_unlock(page);
 			drop_page(page);
 			continue;
 		}
@@ -497,9 +516,12 @@
 			list_add(page_lru, &zone->active_list);
 		} else {
 			deactivate_page_nolock(page);
-			if (++nr_deactivated > target)
+			if (++nr_deactivated > target) {
+				pte_chain_unlock(page);
 				break;
+			}
 		}
+		pte_chain_unlock(page);
 
 		/* Low latency reschedule point */
 		if (current->need_resched) {


             reply	other threads:[~2002-03-04 18:05 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-03-04 18:04 Martin J. Bligh [this message]
2002-03-04 20:17 ` [PATCH] breaking up the pagemap_lru_lock in rmap peter
2002-03-05  2:02 ` Andrea Arcangeli
2002-03-05  5:17   ` Martin J. Bligh
2002-03-05 12:31     ` Rik van Riel
2002-03-09  5:20   ` breaking up the pagemap_lru_lock (was in rmap, now everywhere ;-) ) Martin J. Bligh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=194860000.1015265091@flay \
    --to=martin.bligh@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox