linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Linux Kernel <linux-kernel@vger.kernel.org>
Cc: Linux-MM <linux-mm@kvack.org>,
	Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>,
	Hugh Dickins <hughd@google.com>, Dave Jones <davej@redhat.com>,
	Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@redhat.com>,
	Kirill Shutemov <kirill.shutemov@linux.intel.com>,
	Sasha Levin <sasha.levin@oracle.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Mel Gorman <mgorman@suse.de>
Subject: [PATCH 3/7] mm: Convert p[te|md]_mknonnuma and remaining page table manipulations
Date: Fri, 14 Nov 2014 13:33:02 +0000	[thread overview]
Message-ID: <1415971986-16143-4-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1415971986-16143-1-git-send-email-mgorman@suse.de>

With PROT_NONE, the traditional page table manipulation functions are
sufficient.

Needs-signed-off: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Needs-signed-off: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |  3 +--
 mm/huge_memory.c        | 33 +++++++--------------------------
 mm/memory.c             | 17 +++++++++++------
 mm/mempolicy.c          |  2 +-
 mm/migrate.c            |  2 +-
 mm/mprotect.c           |  2 +-
 mm/pgtable-generic.c    |  2 --
 7 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad9051b..554bbe3 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -31,8 +31,7 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
 			 unsigned long new_addr, unsigned long old_end,
 			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-			unsigned long addr, pgprot_t newprot,
-			int prot_numa);
+			unsigned long addr, pgprot_t newprot);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f6e5a8b..8295c9a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1366,9 +1366,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	goto out;
 clear_pmdnuma:
 	BUG_ON(!PageLocked(page));
-	pmd = pmd_mknonnuma(pmd);
+	pmd = pmd_modify(pmd, vma->vm_page_prot);
 	set_pmd_at(mm, haddr, pmdp, pmd);
-	VM_BUG_ON(pmd_protnone_numa(*pmdp));
 	update_mmu_cache_pmd(vma, addr, pmdp);
 	unlock_page(page);
 out_unlock:
@@ -1502,7 +1501,7 @@ out:
  *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
  */
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long addr, pgprot_t newprot, int prot_numa)
+		unsigned long addr, pgprot_t newprot)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
@@ -1511,29 +1510,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
 		pmd_t entry;
 		ret = 1;
-		if (!prot_numa) {
-			entry = pmdp_get_and_clear(mm, addr, pmd);
-			if (pmd_protnone_numa(entry))
-				entry = pmd_mknonnuma(entry);
-			entry = pmd_modify(entry, newprot);
-			ret = HPAGE_PMD_NR;
-			set_pmd_at(mm, addr, pmd, entry);
-			BUG_ON(pmd_write(entry));
-		} else {
-			struct page *page = pmd_page(*pmd);
-
-			/*
-			 * Do not trap faults against the zero page. The
-			 * read-only data is likely to be read-cached on the
-			 * local CPU cache and it is less useful to know about
-			 * local vs remote hits on the zero page.
-			 */
-			if (!is_huge_zero_page(page) &&
-			    !pmd_protnone_numa(*pmd)) {
-				pmdp_set_numa(mm, addr, pmd);
-				ret = HPAGE_PMD_NR;
-			}
-		}
+		entry = pmdp_get_and_clear(mm, addr, pmd);
+		entry = pmd_modify(entry, newprot);
+		ret = HPAGE_PMD_NR;
+		set_pmd_at(mm, addr, pmd, entry);
+		BUG_ON(pmd_write(entry));
 		spin_unlock(ptl);
 	}
 
diff --git a/mm/memory.c b/mm/memory.c
index 96ceb0a..62604b1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3105,7 +3105,8 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
 }
 
 static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
+		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd,
+		   unsigned int fault_flags)
 {
 	struct page *page = NULL;
 	spinlock_t *ptl;
@@ -3120,9 +3121,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	* validation through pte_unmap_same(). It's of NUMA type but
 	* the pfn may be screwed if the read is non atomic.
 	*
-	* ptep_modify_prot_start is not called as this is clearing
-	* the _PAGE_NUMA bit and it is not really expected that there
-	* would be concurrent hardware modifications to the PTE.
+	* We can safely just do a "set_pte_at()", because the old
+	* page table entry is not accessible, so there would be no
+	* concurrent hardware modifications to the PTE.
 	*/
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
@@ -3131,7 +3132,11 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out;
 	}
 
-	pte = pte_mknonnuma(pte);
+	/* Make it present again */
+	pte = pte_modify(pte, vma->vm_page_prot);
+	pte = pte_mkyoung(pte);
+	if (fault_flags & FAULT_FLAG_WRITE)
+		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 	set_pte_at(mm, addr, ptep, pte);
 	update_mmu_cache(vma, addr, ptep);
 
@@ -3221,7 +3226,7 @@ static int handle_pte_fault(struct mm_struct *mm,
 	}
 
 	if (pte_protnone_numa(entry))
-		return do_numa_page(mm, vma, address, entry, pte, pmd);
+		return do_numa_page(mm, vma, address, entry, pte, pmd, flags);
 
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e58725a..9d61dce 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -633,7 +633,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
 {
 	int nr_updated;
 
-	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
+	nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
 	if (nr_updated)
 		count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 0143995..26fa71f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1896,7 +1896,7 @@ out_fail:
 out_dropref:
 	ptl = pmd_lock(mm, pmd);
 	if (pmd_same(*pmd, entry)) {
-		entry = pmd_mknonnuma(entry);
+		entry = pmd_modify(entry, vma->vm_page_prot);
 		set_pmd_at(mm, mmun_start, pmd, entry);
 		update_mmu_cache_pmd(vma, address, &entry);
 	}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e93ddac..dc65c0f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -141,7 +141,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 				split_huge_page_pmd(vma, addr, pmd);
 			else {
 				int nr_ptes = change_huge_pmd(vma, pmd, addr,
-						newprot, prot_numa);
+						newprot);
 
 				if (nr_ptes) {
 					if (nr_ptes == HPAGE_PMD_NR) {
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index a2d8587..c25f94b 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -193,8 +193,6 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
 	pmd_t entry = *pmdp;
-	if (pmd_protnone_numa(entry))
-		entry = pmd_mknonnuma(entry);
 	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
 	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 }
-- 
1.8.4.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2014-11-14 13:33 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-14 13:32 [RFC PATCH 0/7] Replace _PAGE_NUMA with PAGE_NONE protections Mel Gorman
2014-11-14 13:33 ` [PATCH 1/7] mm: Add p[te|md] protnone helpers for use by NUMA balancing Mel Gorman
2014-11-14 13:33 ` [PATCH 2/7] mm: Convert p[te|md]_numa users to p[te|md]_protnone_numa Mel Gorman
2014-11-14 13:33 ` Mel Gorman [this message]
2014-11-14 13:33 ` [PATCH 4/7] mm: Remove remaining references to NUMA hinting bits and helpers Mel Gorman
2014-11-14 13:33 ` [PATCH 5/7] mm: numa: Do not trap faults on the huge zero page Mel Gorman
2014-11-14 13:33 ` [PATCH 6/7] x86: mm: Restore original pte_special check Mel Gorman
2014-11-14 13:33 ` [PATCH 7/7] mm: numa: Add paranoid check around pte_protnone_numa Mel Gorman
2014-11-15  1:41 ` [RFC PATCH 0/7] Replace _PAGE_NUMA with PAGE_NONE protections Linus Torvalds
2014-11-15  3:29 ` Sasha Levin
2014-11-18 15:42   ` Mel Gorman
2014-11-18 16:33     ` Sasha Levin
2014-11-18 16:56       ` Aneesh Kumar K.V
2014-11-18 17:14         ` Mel Gorman
2014-11-18 17:18         ` Sasha Levin
2014-11-19 13:14           ` Mel Gorman
2014-11-17  8:26 ` Aneesh Kumar K.V
2014-11-18 16:01   ` Mel Gorman
2014-11-18 16:33     ` Aneesh Kumar K.V
2014-11-18 17:08       ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415971986-16143-4-git-send-email-mgorman@suse.de \
    --to=mgorman@suse.de \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=davej@redhat.com \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@redhat.com \
    --cc=riel@redhat.com \
    --cc=sasha.levin@oracle.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).