public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] invalidate_mmap_range() misses remap_file_pages()-affected targets
@ 2003-10-12  8:48 William Lee Irwin III
  2003-10-12 10:34 ` William Lee Irwin III
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: William Lee Irwin III @ 2003-10-12  8:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm

invalidate_mmap_range(), and hence vmtruncate(), can miss its targets
due to remap_file_pages() disturbing the former invariant of file
offsets only being mapped within vmas tagged as mapping file offset
ranges containing them.

This patch uses the VM_NONLINEAR flag to detect when this could happen,
and does the full pagetable walk over the full range of virtualspace
the vma tracks to ensure that the search proceeds over vmas and virtual
addresses possibly cacheing file offsets outside vmas' "natural range"
due to remap_file_pages().

A further twist is that remap_file_pages() now needs to set VM_NONLINEAR
in a manner synchronized with invalidate_mmap_range(); this is done by
protecting the setting of VM_NONLINEAR with ->i_shared_sem. This will
suffice to exclude ->populate() even though it doesn't surround it, as
vmtruncate() alters the inode size prior to the invalidation. More
general uses of invalidate_mmap_range() may need to hold it during the
->populate() calls as they may not have protection from the inode size.

Untested, though it appears to compile. The only disturbing signs are
tlb_remove_tlb_entry() on an uncleared pte, which is at variance with
zap_pte_range() (include/asm-ppc64/tlb.h suggests zap_pte_range() is
erroneous) and a semantic question with respect to PTE_FILE ptes, i.e.
whether to clear or ignore (zap_page_range() clears as it stands now).

vs. 2.6.0-test7-bk3


diff -prauN linux-2.6.0-test7-bk3/mm/fremap.c rfp-2.6.0-test7-bk3-1/mm/fremap.c
--- linux-2.6.0-test7-bk3/mm/fremap.c	2003-10-08 12:24:00.000000000 -0700
+++ rfp-2.6.0-test7-bk3-1/mm/fremap.c	2003-10-12 00:48:45.000000000 -0700
@@ -201,9 +201,19 @@ long sys_remap_file_pages(unsigned long 
 			end > start && start >= vma->vm_start &&
 				end <= vma->vm_end) {
 
-		/* Must set VM_NONLINEAR before any pages are populated. */
-		if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff)
+		/*
+		 * Must set VM_NONLINEAR before any pages are populated.
+		 * Take ->i_shared_sem to lock out invalidate_mmap_range().
+		 */
+		if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff) {
+			struct file *file = vma->vm_file;
+			struct address_space *mapping;
+
+			mapping = file->f_dentry->d_inode->i_mapping;
+			down(&mapping->i_shared_sem);
 			vma->vm_flags |= VM_NONLINEAR;
+			up(&mapping->i_shared_sem);
+		}
 
 		/* ->populate can take a long time, so downgrade the lock. */
 		downgrade_write(&mm->mmap_sem);
diff -prauN linux-2.6.0-test7-bk3/mm/memory.c rfp-2.6.0-test7-bk3-1/mm/memory.c
--- linux-2.6.0-test7-bk3/mm/memory.c	2003-10-08 12:24:04.000000000 -0700
+++ rfp-2.6.0-test7-bk3-1/mm/memory.c	2003-10-12 01:10:30.000000000 -0700
@@ -1077,6 +1077,102 @@ out:
 	return ret;
 }
 
+static void
+invalidate_mmap_nonlinear_range(struct vm_area_struct *vma,
+					const unsigned long pgoff,
+					const unsigned long len)
+{
+	unsigned long addr;
+	pgd_t *pgd;
+	struct mmu_gather *tlb;
+
+	spin_lock(&vma->vm_mm->page_table_lock);
+	addr = vma->vm_start;
+	pgd = pgd_offset(vma->vm_mm, addr);
+	tlb = tlb_gather_mmu(vma->vm_mm, vma->vm_start);
+
+	tlb_start_vma(tlb, vma);
+	while (1) {
+		pmd_t *pmd;
+
+		if (pgd_none(*pgd)) {
+			addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
+			goto skip_pgd;
+		} else if (pgd_bad(*pgd)) {
+			pgd_ERROR(*pgd);
+			pgd_clear(pgd);
+skip_pgd:		addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
+			if (!addr || addr >= vma->vm_end)
+				break;
+			goto next_pgd;
+		}
+
+		pmd = pmd_offset(pgd, addr);
+		do {
+			pte_t *pte;
+
+			if (pmd_none(*pmd)) {
+				goto skip_pmd;
+			} else if (pmd_bad(*pmd)) {
+				pmd_ERROR(*pmd);
+				pmd_clear(pmd);
+skip_pmd:			addr = (addr + PMD_SIZE) & PMD_MASK;
+				if (!addr || addr >= vma->vm_end)
+					goto out;
+				goto next_pmd;
+			}
+			pte = pte_offset_map(pmd, addr);
+			do {
+				unsigned long pfn;
+				struct page *page;
+
+				if (pte_none(*pte))
+					goto next_pte;
+				if (!pte_present(*pte)) {
+					unsigned long index;
+					if (!pte_file(*pte))
+						goto next_pte;
+					index = pte_to_pgoff(*pte);
+					if (index >= pgoff &&
+							index - pgoff < len)
+						pte_clear(pte);
+					goto next_pte;
+				}
+				pfn = pte_pfn(*pte);
+				if (!pfn_valid(pfn))
+					goto next_pte;
+				page = pfn_to_page(pfn);
+				if (page->index < pgoff ||
+						page->index - pgoff >= len)
+					goto next_pte;
+				tlb_remove_tlb_entry(tlb, pte, addr);
+				if (pte_dirty(*pte))
+					set_page_dirty(page);
+				if (page->mapping &&
+						pte_young(*pte) &&
+						!PageSwapCache(page))
+					mark_page_accessed(page);
+				tlb->freed++;
+				page_remove_rmap(page, pte);
+				tlb_remove_page(tlb, page);
+				pte_clear(pte);
+next_pte:			addr += PAGE_SIZE;
+				if (addr >= vma->vm_end) {
+					pte_unmap(pte);
+					goto out;
+				}
+				++pte;
+			} while ((unsigned long)pte & PTE_TABLE_MASK);
+			pte_unmap(pte - 1);
+next_pmd:		++pmd;
+		} while ((unsigned long)pmd & PMD_TABLE_MASK);
+next_pgd:	++pgd;
+	}
+out:	tlb_end_vma(tlb, vma);
+	tlb_finish_mmu(tlb, vma->vm_start, vma->vm_end);
+	spin_unlock(&vma->vm_mm->page_table_lock);
+}
+
 /*
  * Helper function for invalidate_mmap_range().
  * Both hba and hlen are page numbers in PAGE_SIZE units.
@@ -1100,6 +1196,10 @@ invalidate_mmap_range_list(struct list_h
 		hea = ULONG_MAX;
 	list_for_each(curr, head) {
 		vp = list_entry(curr, struct vm_area_struct, shared);
+		if (unlikely(vp->vm_flags & VM_NONLINEAR)) {
+			invalidate_mmap_nonlinear_range(vp, hba, hlen);
+			continue;
+		}
 		vba = vp->vm_pgoff;
 		vea = vba + ((vp->vm_end - vp->vm_start) >> PAGE_SHIFT) - 1;
 		if (hea < vba || vea < hba)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2003-10-13  0:56 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-10-12  8:48 [RFC] invalidate_mmap_range() misses remap_file_pages()-affected targets William Lee Irwin III
2003-10-12 10:34 ` William Lee Irwin III
2003-10-12 11:56   ` Andrew Morton
2003-10-12 19:51     ` William Lee Irwin III
2003-10-13  0:59       ` William Lee Irwin III
2003-10-12 11:53 ` Andrew Morton
2003-10-12 19:38   ` William Lee Irwin III
2003-10-12 20:28 ` Rik van Riel
2003-10-12 21:19   ` William Lee Irwin III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox