public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: William Lee Irwin III <wli@holomorphy.com>
To: linux-kernel@vger.kernel.org
Cc: akpm@osdl.org
Subject: [RFC] invalidate_mmap_range() misses remap_file_pages()-affected targets
Date: Sun, 12 Oct 2003 01:48:42 -0700	[thread overview]
Message-ID: <20031012084842.GB765@holomorphy.com> (raw)

invalidate_mmap_range(), and hence vmtruncate(), can miss its targets
due to remap_file_pages() disturbing the former invariant of file
offsets only being mapped within vmas tagged as mapping file offset
ranges containing them.

This patch uses the VM_NONLINEAR flag to detect when this could happen,
and does the full pagetable walk over the full range of virtualspace
the vma tracks to ensure that the search proceeds over vmas and virtual
addresses possibly cacheing file offsets outside vmas' "natural range"
due to remap_file_pages().

A further twist is that remap_file_pages() now needs to set VM_NONLINEAR
in a manner synchronized with invalidate_mmap_range(); this is done by
protecting the setting of VM_NONLINEAR with ->i_shared_sem. This will
suffice to exclude ->populate() even though it doesn't surround it, as
vmtruncate() alters the inode size prior to the invalidation. More
general uses of invalidate_mmap_range() may need to hold it during the
->populate() calls as they may not have protection from the inode size.

Untested, though it appears to compile. The only disturbing signs are
tlb_remove_tlb_entry() on an uncleared pte, which is at variance with
zap_pte_range() (include/asm-ppc64/tlb.h suggests zap_pte_range() is
erroneous) and a semantic question with respect to PTE_FILE ptes, i.e.
whether to clear or ignore (zap_page_range() clears as it stands now).

vs. 2.6.0-test7-bk3


diff -prauN linux-2.6.0-test7-bk3/mm/fremap.c rfp-2.6.0-test7-bk3-1/mm/fremap.c
--- linux-2.6.0-test7-bk3/mm/fremap.c	2003-10-08 12:24:00.000000000 -0700
+++ rfp-2.6.0-test7-bk3-1/mm/fremap.c	2003-10-12 00:48:45.000000000 -0700
@@ -201,9 +201,19 @@ long sys_remap_file_pages(unsigned long 
 			end > start && start >= vma->vm_start &&
 				end <= vma->vm_end) {
 
-		/* Must set VM_NONLINEAR before any pages are populated. */
-		if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff)
+		/*
+		 * Must set VM_NONLINEAR before any pages are populated.
+		 * Take ->i_shared_sem to lock out invalidate_mmap_range().
+		 */
+		if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff) {
+			struct file *file = vma->vm_file;
+			struct address_space *mapping;
+
+			mapping = file->f_dentry->d_inode->i_mapping;
+			down(&mapping->i_shared_sem);
 			vma->vm_flags |= VM_NONLINEAR;
+			up(&mapping->i_shared_sem);
+		}
 
 		/* ->populate can take a long time, so downgrade the lock. */
 		downgrade_write(&mm->mmap_sem);
diff -prauN linux-2.6.0-test7-bk3/mm/memory.c rfp-2.6.0-test7-bk3-1/mm/memory.c
--- linux-2.6.0-test7-bk3/mm/memory.c	2003-10-08 12:24:04.000000000 -0700
+++ rfp-2.6.0-test7-bk3-1/mm/memory.c	2003-10-12 01:10:30.000000000 -0700
@@ -1077,6 +1077,102 @@ out:
 	return ret;
 }
 
+static void
+invalidate_mmap_nonlinear_range(struct vm_area_struct *vma,
+					const unsigned long pgoff,
+					const unsigned long len)
+{
+	unsigned long addr;
+	pgd_t *pgd;
+	struct mmu_gather *tlb;
+
+	spin_lock(&vma->vm_mm->page_table_lock);
+	addr = vma->vm_start;
+	pgd = pgd_offset(vma->vm_mm, addr);
+	tlb = tlb_gather_mmu(vma->vm_mm, vma->vm_start);
+
+	tlb_start_vma(tlb, vma);
+	while (1) {
+		pmd_t *pmd;
+
+		if (pgd_none(*pgd)) {
+			addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
+			goto skip_pgd;
+		} else if (pgd_bad(*pgd)) {
+			pgd_ERROR(*pgd);
+			pgd_clear(pgd);
+skip_pgd:		addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
+			if (!addr || addr >= vma->vm_end)
+				break;
+			goto next_pgd;
+		}
+
+		pmd = pmd_offset(pgd, addr);
+		do {
+			pte_t *pte;
+
+			if (pmd_none(*pmd)) {
+				goto skip_pmd;
+			} else if (pmd_bad(*pmd)) {
+				pmd_ERROR(*pmd);
+				pmd_clear(pmd);
+skip_pmd:			addr = (addr + PMD_SIZE) & PMD_MASK;
+				if (!addr || addr >= vma->vm_end)
+					goto out;
+				goto next_pmd;
+			}
+			pte = pte_offset_map(pmd, addr);
+			do {
+				unsigned long pfn;
+				struct page *page;
+
+				if (pte_none(*pte))
+					goto next_pte;
+				if (!pte_present(*pte)) {
+					unsigned long index;
+					if (!pte_file(*pte))
+						goto next_pte;
+					index = pte_to_pgoff(*pte);
+					if (index >= pgoff &&
+							index - pgoff < len)
+						pte_clear(pte);
+					goto next_pte;
+				}
+				pfn = pte_pfn(*pte);
+				if (!pfn_valid(pfn))
+					goto next_pte;
+				page = pfn_to_page(pfn);
+				if (page->index < pgoff ||
+						page->index - pgoff >= len)
+					goto next_pte;
+				tlb_remove_tlb_entry(tlb, pte, addr);
+				if (pte_dirty(*pte))
+					set_page_dirty(page);
+				if (page->mapping &&
+						pte_young(*pte) &&
+						!PageSwapCache(page))
+					mark_page_accessed(page);
+				tlb->freed++;
+				page_remove_rmap(page, pte);
+				tlb_remove_page(tlb, page);
+				pte_clear(pte);
+next_pte:			addr += PAGE_SIZE;
+				if (addr >= vma->vm_end) {
+					pte_unmap(pte);
+					goto out;
+				}
+				++pte;
+			} while ((unsigned long)pte & PTE_TABLE_MASK);
+			pte_unmap(pte - 1);
+next_pmd:		++pmd;
+		} while ((unsigned long)pmd & PMD_TABLE_MASK);
+next_pgd:	++pgd;
+	}
+out:	tlb_end_vma(tlb, vma);
+	tlb_finish_mmu(tlb, vma->vm_start, vma->vm_end);
+	spin_unlock(&vma->vm_mm->page_table_lock);
+}
+
 /*
  * Helper function for invalidate_mmap_range().
  * Both hba and hlen are page numbers in PAGE_SIZE units.
@@ -1100,6 +1196,10 @@ invalidate_mmap_range_list(struct list_h
 		hea = ULONG_MAX;
 	list_for_each(curr, head) {
 		vp = list_entry(curr, struct vm_area_struct, shared);
+		if (unlikely(vp->vm_flags & VM_NONLINEAR)) {
+			invalidate_mmap_nonlinear_range(vp, hba, hlen);
+			continue;
+		}
 		vba = vp->vm_pgoff;
 		vea = vba + ((vp->vm_end - vp->vm_start) >> PAGE_SHIFT) - 1;
 		if (hea < vba || vea < hba)

             reply	other threads:[~2003-10-12  8:45 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-10-12  8:48 William Lee Irwin III [this message]
2003-10-12 10:34 ` [RFC] invalidate_mmap_range() misses remap_file_pages()-affected targets William Lee Irwin III
2003-10-12 11:56   ` Andrew Morton
2003-10-12 19:51     ` William Lee Irwin III
2003-10-13  0:59       ` William Lee Irwin III
2003-10-12 11:53 ` Andrew Morton
2003-10-12 19:38   ` William Lee Irwin III
2003-10-12 20:28 ` Rik van Riel
2003-10-12 21:19   ` William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20031012084842.GB765@holomorphy.com \
    --to=wli@holomorphy.com \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox