From: Ingo Molnar <mingo@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
"H. Peter Anvin" <hpa@zytor.com>, Borislav Petkov <bp@alien8.de>,
Andrew Morton <akpm@linux-foundation.org>,
Andy Lutomirski <luto@kernel.org>,
Denys Vlasenko <dvlasenk@redhat.com>
Subject: [GIT PULL] x86/mm changes for v4.4
Date: Tue, 3 Nov 2015 12:16:49 +0100 [thread overview]
Message-ID: <20151103111649.GA3477@gmail.com> (raw)
Linus,
Please pull the latest x86-mm-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-mm-for-linus
# HEAD: e1a58320a38dfa72be48a0f1a3a92273663ba6db x86/mm: Warn on W^X mappings
The main changes are: continued PAT work by Toshi Kani, plus a new boot time
warning about insecure RWX kernel mappings, by Stephen Smalley.
The new CONFIG_DEBUG_WX=y warning is marked default-y if CONFIG_DEBUG_RODATA=y is
already eanbled, as a special exception, as these bugs are hard to notice and this
check already found several live bugs.
Thanks,
Ingo
------------------>
Stephen Smalley (1):
x86/mm: Warn on W^X mappings
Toshi Kani (11):
x86/vdso32: Define PGTABLE_LEVELS to 32bit VDSO
x86/asm: Move PUD_PAGE macros to page_types.h
x86/asm: Add pud/pmd mask interfaces to handle large PAT bit
x86/asm: Fix pud/pmd interfaces to handle large PAT bit
x86/asm: Add pud_pgprot() and pmd_pgprot()
x86/mm: Fix page table dump to show PAT bit
x86/mm: Fix slow_virt_to_phys() to handle large PAT bit
x86/mm: Fix gup_huge_p?d() to handle large PAT bit
x86/mm: Fix try_preserve_large_page() to handle large PAT bit
x86/mm: Fix __split_large_page() to handle large PAT bit
x86/mm: Fix no-change case in try_preserve_large_page()
arch/x86/Kconfig.debug | 36 ++++++++++++-
arch/x86/entry/vdso/vdso32/vclock_gettime.c | 2 +
arch/x86/include/asm/page_64_types.h | 3 --
arch/x86/include/asm/page_types.h | 3 ++
arch/x86/include/asm/pgtable.h | 25 ++++++---
arch/x86/include/asm/pgtable_types.h | 40 ++++++++++++--
arch/x86/mm/Makefile | 2 +-
arch/x86/mm/dump_pagetables.c | 81 ++++++++++++++++++++++-------
arch/x86/mm/gup.c | 18 +++----
arch/x86/mm/init_32.c | 2 +
arch/x86/mm/init_64.c | 2 +
arch/x86/mm/pageattr.c | 79 +++++++++++++++++-----------
12 files changed, 219 insertions(+), 74 deletions(-)
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8c0d3266173..3e0baf726eef 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -65,10 +65,14 @@ config EARLY_PRINTK_EFI
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized.
+config X86_PTDUMP_CORE
+ def_bool n
+
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
+ select X86_PTDUMP_CORE
---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
@@ -79,7 +83,8 @@ config X86_PTDUMP
config EFI_PGT_DUMP
bool "Dump the EFI pagetable"
- depends on EFI && X86_PTDUMP
+ depends on EFI
+ select X86_PTDUMP_CORE
---help---
Enable this if you want to dump the EFI page table before
enabling virtual mode. This can be used to debug miscellaneous
@@ -105,6 +110,35 @@ config DEBUG_RODATA_TEST
feature as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
+config DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ depends on DEBUG_RODATA
+ default y
+ select X86_PTDUMP_CORE
+ ---help---
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+
+ Look for a message in dmesg output like this:
+
+ x86/mm: Checked W+X mappings: passed, no W+X pages found.
+
+ or like this, if the check failed:
+
+ x86/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 175cc72c0f68..87a86e017f0e 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -14,11 +14,13 @@
*/
#undef CONFIG_64BIT
#undef CONFIG_X86_64
+#undef CONFIG_PGTABLE_LEVELS
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
#undef CONFIG_NR_CPUS
#define CONFIG_X86_32 1
+#define CONFIG_PGTABLE_LEVELS 2
#define CONFIG_PAGE_OFFSET 0
#define CONFIG_ILLEGAL_POINTER_VALUE 0
#define CONFIG_NR_CPUS 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 4edd53b79a81..4928cf0d5af0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -26,9 +26,6 @@
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
-
/*
* Set __PAGE_OFFSET to the most negative possible address +
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c7c712f2648b..c5b7fb2774d0 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -20,6 +20,9 @@
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 867da5bbb4a3..c0b41f111a9a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -19,6 +19,13 @@
#include <asm/x86_init.h>
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_checkwx(void);
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
static inline unsigned long pud_pfn(pud_t pud)
{
- return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
@@ -379,7 +386,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
return __pgprot(preservebits | addbits);
}
-#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x))
+#define pmd_pgprot(x) __pgprot(pmd_flags(x))
+#define pud_pgprot(x) __pgprot(pud_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
@@ -502,14 +511,15 @@ static inline int pmd_none(pmd_t pmd)
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
- return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
+ return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
+#define pmd_page(pmd) \
+ pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -570,14 +580,15 @@ static inline int pud_present(pud_t pud)
static inline unsigned long pud_page_vaddr(pud_t pud)
{
- return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
+ return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+#define pud_page(pud) \
+ pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
/* Find an entry in the second-level page table.. */
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 13f310bfc09a..dd5b0aa9dd2f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -209,10 +209,10 @@ enum page_cache_mode {
#include <linux/types.h>
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
@@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
}
#endif
+static inline pudval_t pud_pfn_mask(pud_t pud)
+{
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ else
+ return PTE_PFN_MASK;
+}
+
+static inline pudval_t pud_flags_mask(pud_t pud)
+{
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
+ else
+ return ~PTE_PFN_MASK;
+}
+
static inline pudval_t pud_flags(pud_t pud)
{
- return native_pud_val(pud) & PTE_FLAGS_MASK;
+ return native_pud_val(pud) & pud_flags_mask(pud);
+}
+
+static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
+{
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ else
+ return PTE_PFN_MASK;
+}
+
+static inline pmdval_t pmd_flags_mask(pmd_t pmd)
+{
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
+ else
+ return ~PTE_PFN_MASK;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
- return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+ return native_pmd_val(pmd) & pmd_flags_mask(pmd);
}
static inline pte_t native_make_pte(pteval_t val)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index a482d105172b..65c47fda26fc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f0cedf3395af..1bf417e9cc13 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -32,6 +32,8 @@ struct pg_state {
const struct addr_marker *marker;
unsigned long lines;
bool to_dmesg;
+ bool check_wx;
+ unsigned long wx_pages;
};
struct addr_marker {
@@ -155,7 +157,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, " ");
if ((level == 4 && pr & _PAGE_PAT) ||
((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
- pt_dump_cont_printf(m, dmsg, "pat ");
+ pt_dump_cont_printf(m, dmsg, "PAT ");
else
pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_GLOBAL)
@@ -198,8 +200,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
- cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
+ prot = pgprot_val(new_prot);
+ cur = pgprot_val(st->current_prot);
if (!st->level) {
/* First entry */
@@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st,
const char *unit = units;
unsigned long delta;
int width = sizeof(unsigned long) * 2;
+ pgprotval_t pr = pgprot_val(st->current_prot);
+
+ if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
+ WARN_ONCE(1,
+ "x86/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address,
+ (void *)st->start_address);
+ st->wx_pages += (st->current_address -
+ st->start_address) / PAGE_SIZE;
+ }
/*
* Now print the actual finished series
@@ -269,13 +281,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
{
int i;
pte_t *start;
+ pgprotval_t prot;
start = (pte_t *) pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- pgprot_t prot = pte_pgprot(*start);
-
+ prot = pte_flags(*start);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- note_page(m, st, prot, 4);
+ note_page(m, st, __pgprot(prot), 4);
start++;
}
}
@@ -287,18 +299,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
{
int i;
pmd_t *start;
+ pgprotval_t prot;
start = (pmd_t *) pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
- pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
-
- if (pmd_large(*start) || !pmd_present(*start))
+ if (pmd_large(*start) || !pmd_present(*start)) {
+ prot = pmd_flags(*start);
note_page(m, st, __pgprot(prot), 3);
- else
+ } else {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
+ }
} else
note_page(m, st, __pgprot(0), 3);
start++;
@@ -318,19 +331,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
{
int i;
pud_t *start;
+ pgprotval_t prot;
start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
- pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
-
- if (pud_large(*start) || !pud_present(*start))
+ if (pud_large(*start) || !pud_present(*start)) {
+ prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 2);
- else
+ } else {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
+ }
} else
note_page(m, st, __pgprot(0), 2);
@@ -344,13 +358,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+ bool checkwx)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
pgd_t *start = swapper_pg_dir;
#endif
+ pgprotval_t prot;
int i;
struct pg_state st = {};
@@ -359,16 +375,20 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
st.to_dmesg = true;
}
+ st.check_wx = checkwx;
+ if (checkwx)
+ st.wx_pages = 0;
+
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start)) {
- pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
-
- if (pgd_large(*start) || !pgd_present(*start))
+ if (pgd_large(*start) || !pgd_present(*start)) {
+ prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);
- else
+ } else {
walk_pud_level(m, &st, *start,
i * PGD_LEVEL_MULT);
+ }
} else
note_page(m, &st, __pgprot(0), 1);
@@ -378,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
/* Flush out the last page */
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
note_page(m, &st, __pgprot(0), 0);
+ if (!checkwx)
+ return;
+ if (st.wx_pages)
+ pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
+ st.wx_pages);
+ else
+ pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
}
+void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+{
+ ptdump_walk_pgd_level_core(m, pgd, false);
+}
+
+void ptdump_walk_pgd_level_checkwx(void)
+{
+ ptdump_walk_pgd_level_core(NULL, NULL, true);
+}
+
+#ifdef CONFIG_X86_PTDUMP
static int ptdump_show(struct seq_file *m, void *v)
{
ptdump_walk_pgd_level(m, NULL);
@@ -397,10 +435,13 @@ static const struct file_operations ptdump_fops = {
.llseek = seq_lseek,
.release = single_release,
};
+#endif
static int pt_dump_init(void)
{
+#ifdef CONFIG_X86_PTDUMP
struct dentry *pe;
+#endif
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
@@ -412,10 +453,12 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
+#ifdef CONFIG_X86_PTDUMP
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
+#endif
return 0;
}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 81bf3d2af3eb..ae9a37bf1371 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
- pte_t pte = *(pte_t *)&pmd;
struct page *head, *page;
int refs;
mask = _PAGE_PRESENT|_PAGE_USER;
if (write)
mask |= _PAGE_RW;
- if ((pte_flags(pte) & mask) != mask)
+ if ((pmd_flags(pmd) & mask) != mask)
return 0;
/* hugepages are never "special" */
- VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
+ VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
refs = 0;
- head = pte_page(pte);
+ head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
- pte_t pte = *(pte_t *)&pud;
struct page *head, *page;
int refs;
mask = _PAGE_PRESENT|_PAGE_USER;
if (write)
mask |= _PAGE_RW;
- if ((pte_flags(pte) & mask) != mask)
+ if ((pud_flags(pud) & mask) != mask)
return 0;
/* hugepages are never "special" */
- VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
+ VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
refs = 0;
- head = pte_page(pte);
+ head = pud_page(pud);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7562f42914b4..cb4ef3de61f9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -957,6 +957,8 @@ void mark_rodata_ro(void)
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
#endif
mark_nxdata_nx();
+ if (__supported_pte_mask & _PAGE_NX)
+ debug_checkwx();
}
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30564e2752d3..f8b157366700 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1150,6 +1150,8 @@ void mark_rodata_ro(void)
free_init_pages("unused kernel",
(unsigned long) __va(__pa_symbol(rodata_end)),
(unsigned long) __va(__pa_symbol(_sdata)));
+
+ debug_checkwx();
}
#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2c44c0792301..e2621a8e8213 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address)
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
- phys_addr_t phys_addr;
- unsigned long offset;
+ unsigned long phys_addr, offset;
enum pg_level level;
- unsigned long pmask;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
- pmask = page_level_mask(level);
- offset = virt_addr & ~pmask;
- phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
- return (phys_addr | offset);
+
+ switch (level) {
+ case PG_LEVEL_1G:
+ phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+ offset = virt_addr & ~PUD_PAGE_MASK;
+ break;
+ case PG_LEVEL_2M:
+ phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+ offset = virt_addr & ~PMD_PAGE_MASK;
+ break;
+ default:
+ phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ offset = virt_addr & ~PAGE_MASK;
+ }
+
+ return (phys_addr_t)(phys_addr | offset);
}
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
@@ -458,7 +468,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
@@ -478,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
switch (level) {
case PG_LEVEL_2M:
-#ifdef CONFIG_X86_64
+ old_prot = pmd_pgprot(*(pmd_t *)kpte);
+ old_pfn = pmd_pfn(*(pmd_t *)kpte);
+ break;
case PG_LEVEL_1G:
-#endif
- psize = page_level_size(level);
- pmask = page_level_mask(level);
+ old_prot = pud_pgprot(*(pud_t *)kpte);
+ old_pfn = pud_pfn(*(pud_t *)kpte);
break;
default:
do_split = -EINVAL;
goto out_unlock;
}
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+
/*
* Calculate the number of pages, which fit into this large
* page starting at address:
@@ -504,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* up accordingly.
*/
old_pte = *kpte;
- old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte));
+ req_prot = pgprot_large_2_4k(old_prot);
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
@@ -530,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
req_prot = canon_pgprot(req_prot);
/*
- * old_pte points to the large page base address. So we need
+ * old_pfn points to the large page base pfn. So we need
* to add the offset of the virtual address:
*/
- pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+ pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
cpa->pfn = pfn;
new_prot = static_protections(req_prot, address, pfn);
@@ -544,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* the pages in the range we try to preserve:
*/
addr = address & pmask;
- pfn = pte_pfn(old_pte);
+ pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
@@ -574,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* The address is aligned and the number of pages
* covers the full page.
*/
- new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
+ new_pte = pfn_pte(old_pfn, new_prot);
__set_pmd_pte(kpte, address, new_pte);
cpa->flags |= CPA_FLUSHTLB;
do_split = 0;
@@ -591,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
struct page *base)
{
pte_t *pbase = (pte_t *)page_address(base);
- unsigned long pfn, pfninc = 1;
+ unsigned long ref_pfn, pfn, pfninc = 1;
unsigned int i, level;
pte_t *tmp;
pgprot_t ref_prot;
@@ -608,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
}
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
- ref_prot = pte_pgprot(pte_clrhuge(*kpte));
- /* promote PAT bit to correct position */
- if (level == PG_LEVEL_2M)
+ switch (level) {
+ case PG_LEVEL_2M:
+ ref_prot = pmd_pgprot(*(pmd_t *)kpte);
+ /* clear PSE and promote PAT bit to correct position */
ref_prot = pgprot_large_2_4k(ref_prot);
+ ref_pfn = pmd_pfn(*(pmd_t *)kpte);
+ break;
-#ifdef CONFIG_X86_64
- if (level == PG_LEVEL_1G) {
+ case PG_LEVEL_1G:
+ ref_prot = pud_pgprot(*(pud_t *)kpte);
+ ref_pfn = pud_pfn(*(pud_t *)kpte);
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+
/*
- * Set the PSE flags only if the PRESENT flag is set
+ * Clear the PSE flags if the PRESENT flag is not set
* otherwise pmd_present/pmd_huge will return true
* even on a non present pmd.
*/
- if (pgprot_val(ref_prot) & _PAGE_PRESENT)
- pgprot_val(ref_prot) |= _PAGE_PSE;
- else
+ if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
pgprot_val(ref_prot) &= ~_PAGE_PSE;
+ break;
+
+ default:
+ spin_unlock(&pgd_lock);
+ return 1;
}
-#endif
/*
* Set the GLOBAL flags only if the PRESENT flag is set
@@ -643,7 +664,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
/*
* Get the target pfn from the original entry:
*/
- pfn = pte_pfn(*kpte);
+ pfn = ref_pfn;
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
next reply other threads:[~2015-11-03 11:17 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-03 11:16 Ingo Molnar [this message]
2015-11-04 19:26 ` [GIT PULL] x86/mm changes for v4.4 Linus Torvalds
2015-11-04 23:39 ` Dave Jones
2015-11-05 1:31 ` Linus Torvalds
2015-11-05 2:17 ` Dave Jones
2015-11-05 21:27 ` Linus Torvalds
2015-11-05 21:33 ` Linus Torvalds
2015-11-06 11:39 ` Matt Fleming
[not found] ` <20151106113943.GB2651-mF/unelCI9GS6iBeEJttW/XRex20P6io@public.gmane.org>
2015-11-07 7:05 ` Ingo Molnar
2015-11-07 7:05 ` Ingo Molnar
[not found] ` <20151107070554.GB6235-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2015-11-07 10:03 ` Matt Fleming
2015-11-07 10:03 ` Matt Fleming
2015-11-05 22:04 ` Linus Torvalds
2015-11-05 22:27 ` Borislav Petkov
2015-11-06 6:55 ` Ingo Molnar
2015-11-06 7:05 ` Andy Lutomirski
[not found] ` <CALCETrU2dn4TEj_2QiCPy4Mjw6hCbB84k1RnPzx7sLNygj4D5Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-11-06 13:09 ` Matt Fleming
2015-11-06 13:09 ` Matt Fleming
[not found] ` <20151106130948.GD2651-mF/unelCI9GS6iBeEJttW/XRex20P6io@public.gmane.org>
2015-11-06 13:24 ` Borislav Petkov
2015-11-06 13:24 ` Borislav Petkov
2015-11-07 7:03 ` Ingo Molnar
2015-11-06 7:44 ` Ingo Molnar
2015-11-06 12:39 ` Matt Fleming
[not found] ` <20151106123912.GC2651-mF/unelCI9GS6iBeEJttW/XRex20P6io@public.gmane.org>
2015-11-07 7:09 ` Ingo Molnar
2015-11-07 7:09 ` Ingo Molnar
2015-11-07 7:39 ` Ard Biesheuvel
2015-11-08 6:58 ` Kees Cook
2015-11-08 7:55 ` Ard Biesheuvel
2015-11-08 7:55 ` Ard Biesheuvel
2015-11-09 21:08 ` Kees Cook
2015-11-10 7:08 ` Ard Biesheuvel
[not found] ` <CAKv+Gu9ct9Rwi+_-0KtLq3Gw2Rn+QLhSVt_zbn4zBxfk_Qs16g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-11-10 20:11 ` Kees Cook
2015-11-10 20:11 ` Kees Cook
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151103111649.GA3477@gmail.com \
--to=mingo@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=bp@alien8.de \
--cc=dvlasenk@redhat.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.