All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@digeo.com>
To: Dave McCracken <dmccr@us.ibm.com>
Cc: Linux Memory Management <linux-mm@kvack.org>
Subject: Re: [PATCH 2.5.42-mm3] More shared page table fixes
Date: Tue, 15 Oct 2002 23:07:11 -0700	[thread overview]
Message-ID: <3DAD020F.1AC3D34F@digeo.com> (raw)
In-Reply-To: 75990000.1034696450@baldur.austin.ibm.com

Dave McCracken wrote:
> 
> This patch gets the unmap_all_pages function right for PAE-enabled
> machines.  It also adds a forgotten spinlock to pte_try_to_share.
> 

This one's bad, Dave.  Machine keels over halfway through the first
dbench run with CONFIG_SHAREPTE=n.  General memory corruption and
mayhem.

Here's what I have - I'll drop it in experimental/ if I manage to get
another patchset ready tonight.



 mm/memory.c |  166 +++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 103 insertions(+), 63 deletions(-)

--- 2.5.42/mm/memory.c~shpte-unmap_all_pages_fix	Tue Oct 15 14:18:42 2002
+++ 2.5.42-akpm/mm/memory.c	Tue Oct 15 14:18:42 2002
@@ -372,6 +372,7 @@ static pte_t *pte_try_to_share(struct mm
 	struct vm_area_struct *lvma;
 	struct page *ptepage;
 	unsigned long base;
+	pte_t *pte = NULL;
 
 	/*
 	 * It already has a pte page.  No point in checking further.
@@ -394,6 +395,8 @@ static pte_t *pte_try_to_share(struct mm
 
 	as = vma->vm_file->f_dentry->d_inode->i_mapping;
 
+	spin_lock(&as->i_shared_lock);
+
 	list_for_each_entry(lvma, &as->i_mmap_shared, shared) {
 		pgd_t *lpgd;
 		pmd_t *lpmd;
@@ -431,9 +434,11 @@ static pte_t *pte_try_to_share(struct mm
 		else
 			pmdval = pmd_wrprotect(*lpmd);
 		set_pmd(pmd, pmdval);
-		return pte_page_map(ptepage, address);
+		pte = pte_page_map(ptepage, address);
+		break;
 	}
-	return NULL;
+	spin_unlock(&as->i_shared_lock);
+	return pte;
 }
 #endif
 
@@ -846,14 +851,16 @@ static void zap_pmd_range(mmu_gather_t *
 	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
 		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
 	do {
-		ptepage = pmd_page(*pmd);
-		pte_page_lock(ptepage);
+		if (pmd_present(*pmd)) {
+			ptepage = pmd_page(*pmd);
+			pte_page_lock(ptepage);
 #ifdef CONFIG_SHAREPTE
-		if (page_count(ptepage) > 1)
-			BUG();
+			if (page_count(ptepage) > 1)
+				BUG();
 #endif
-		zap_pte_range(tlb, pmd, address, end - address);
-		pte_page_unlock(ptepage);
+			zap_pte_range(tlb, pmd, address, end - address);
+			pte_page_unlock(ptepage);
+		}
 		address = (address + PMD_SIZE) & PMD_MASK; 
 		pmd++;
 	} while (address < end);
@@ -938,72 +945,105 @@ void unmap_all_pages(struct mm_struct *m
 	mmu_gather_t *tlb;
 	pgd_t *pgd;
 	pmd_t *pmd;
-	unsigned long address;
-	unsigned long end;
+	unsigned long address = 0;
+	unsigned long vm_end = 0, prev_end, pmd_end;
 
 	tlb = tlb_gather_mmu(mm, 1);
 
 	vma = mm->mmap;
-	if (!vma)
-		goto out;
-
-	mm->map_count--;
-	if (is_vm_hugetlb_page(vma)) {
-		vma->vm_ops->close(vma);
-		goto next_vma;
-	}
-
-	address = vma->vm_start;
-	end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+	for (;;) {
+		if (address >= vm_end) {
+			if (!vma)
+				goto out;
 
-	pgd = pgd_offset(mm, address);
-	pmd = pmd_offset(pgd, address);
-	do {
-		do {
-			if (pmd_none(*pmd))
-				goto skip_pmd;
-			if (pmd_bad(*pmd)) {
-				pmd_ERROR(*pmd);
-				pmd_clear(pmd);
-				goto skip_pmd;
-			}
-		
-			ptepage = pmd_page(*pmd);
-			pte_page_lock(ptepage);
-			if (page_count(ptepage) > 1) {
-				pmd_clear(pmd);
-				pgtable_remove_rmap_locked(ptepage, mm);
-				mm->rss -= ptepage->private;
-				put_page(ptepage);
-			} else {
-				zap_pte_range(tlb, pmd, address, end - address);
-			}
-			pte_page_unlock(ptepage);
-skip_pmd:
-			pmd++;
-			address = (address + PMD_SIZE) & PMD_MASK;
-			if (address >= vma->vm_end) {
+			address = vma->vm_start;
 next_vma:
-				vma = vma->vm_next;
-				if (!vma)
-					goto out;
-
-				mm->map_count--;
-				if (is_vm_hugetlb_page(vma)) {
+			prev_end = vm_end;
+			vm_end = vma->vm_end;
+			mm->map_count--;
+			/*
+			 * Advance the vma pointer to the next vma.
+			 * To facilitate coalescing adjacent vmas, the
+			 * pointer always points to the next one
+			 * beyond the range we're currently working
+			 * on, which means vma will be null on the
+			 * last iteration.
+			 */
+			vma = vma->vm_next;
+			if (vma) {
+				/*
+				 * Go ahead and include hugetlb vmas
+				 * in the range we process.  The pmd
+				 * entry will be cleared by close, so
+				 * we'll just skip over them.  This is
+				 * easier than trying to avoid them.
+				 */
+				if (is_vm_hugetlb_page(vma))
 					vma->vm_ops->close(vma);
+
+				/*
+				 * Coalesce adjacent vmas and process
+				 * them all in one iteration.
+				 */
+				if (vma->vm_start == prev_end) {
 					goto next_vma;
 				}
+			}
+		}
+		pgd = pgd_offset(mm, address);
+		do {
+			if (pgd_none(*pgd))
+				goto skip_pgd;
 
-				address = vma->vm_start;
-				end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-				pgd = pgd_offset(mm, address);
-				pmd = pmd_offset(pgd, address);
+			if (pgd_bad(*pgd)) {
+				pgd_ERROR(*pgd);
+				pgd_clear(pgd);
+skip_pgd:
+				address += PGDIR_SIZE;
+				if (address > vm_end)
+					address = vm_end;
+				goto next_pgd;
 			}
-		} while (address < end);
-		pgd++;
-		pmd = pmd_offset(pgd, address);
-		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-	} while (vma);
+			pmd = pmd_offset(pgd, address);
+			if (vm_end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+				pmd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
+			else
+				pmd_end = vm_end;
+
+			for (;;) {
+				if (pmd_none(*pmd))
+					goto next_pmd;
+				if (pmd_bad(*pmd)) {
+					pmd_ERROR(*pmd);
+					pmd_clear(pmd);
+					goto next_pmd;
+				}
+				
+				ptepage = pmd_page(*pmd);
+				pte_page_lock(ptepage);
+				if (page_count(ptepage) > 1) {
+					pmd_clear(pmd);
+					pgtable_remove_rmap_locked(ptepage, mm);
+					mm->rss -= ptepage->private;
+					put_page(ptepage);
+				} else
+					zap_pte_range(tlb, pmd, address,
+						      vm_end - address);
+
+				pte_page_unlock(ptepage);
+next_pmd:
+				address += PMD_SIZE;
+				if (address >= pmd_end) {
+					address = pmd_end;
+					break;
+				}
+				pmd++;
+			}
+next_pgd:
+			pgd++;
+		} while (address < vm_end);
+
+	}
 
 out:
 	clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);

.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

      reply	other threads:[~2002-10-16  6:07 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-10-15 15:40 [PATCH 2.5.42-mm3] More shared page table fixes Dave McCracken
2002-10-16  6:07 ` Andrew Morton [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3DAD020F.1AC3D34F@digeo.com \
    --to=akpm@digeo.com \
    --cc=dmccr@us.ibm.com \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.