From: Yu-cheng Yu <yu-cheng.yu@intel.com>
To: x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>,
linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-mm@kvack.org, linux-arch@vger.kernel.org,
linux-api@vger.kernel.org, Arnd Bergmann <arnd@arndb.de>,
Andy Lutomirski <luto@amacapital.net>,
Balbir Singh <bsingharora@gmail.com>,
Cyrill Gorcunov <gorcunov@gmail.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Eugene Syromiatnikov <esyr@redhat.com>,
Florian Weimer <fweimer@redhat.com>,
"H.J. Lu" <hjl.tools@gmail.com>, Jann Horn <jannh@google.com>,
Jonathan Corbet <corbet@lwn.net>,
Kees Cook <keescook@chromium.org>,
Mike Kravetz <mike.kravetz@oracle.com>,
Nadav Amit <nadav.amit@gmail.com>,
Oleg Nesterov <oleg@redhat.com>,
Pa
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Subject: [RFC PATCH v6 11/26] x86/mm: Introduce _PAGE_DIRTY_SW
Date: Mon, 19 Nov 2018 13:47:54 -0800 [thread overview]
Message-ID: <20181119214809.6086-12-yu-cheng.yu@intel.com> (raw)
In-Reply-To: <20181119214809.6086-1-yu-cheng.yu@intel.com>
A RO and dirty PTE exists in the following cases:
(a) A page is modified and then shared with a fork()'ed child;
(b) A R/O page that has been COW'ed;
(c) A SHSTK page.
The processor does not read the dirty bit for (a) and (b), but
checks the dirty bit for (c). To prevent the use of non-SHSTK
memory as SHSTK, we introduce a spare bit of the 64-bit PTE as
_PAGE_BIT_DIRTY_SW and use that for (a) and (b). This results
to the following possible PTE settings:
Modified PTE: (R/W + DIRTY_HW)
Modified and shared PTE: (R/O + DIRTY_SW)
R/O PTE COW'ed: (R/O + DIRTY_SW)
SHSTK PTE: (R/O + DIRTY_HW)
SHSTK PTE COW'ed: (R/O + DIRTY_HW)
SHSTK PTE shared: (R/O + DIRTY_SW)
Note that _PAGE_BIT_DRITY_SW is only used in R/O PTEs but
not R/W PTEs.
When this patch is applied, there are six free bits left in
the 64-bit PTE. There is no more free bit in the 32-bit
PTE (except for PAE) and shadow stack is not implemented
for the 32-bit kernel.
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
arch/x86/include/asm/pgtable.h | 129 ++++++++++++++++++++++-----
arch/x86/include/asm/pgtable_types.h | 21 ++++-
2 files changed, 128 insertions(+), 22 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index be71584bbd9a..db4b9d22d2f7 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -118,9 +118,9 @@ extern pmdval_t early_pmd_flags;
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
{
- return pte_flags(pte) & _PAGE_DIRTY;
+ return pte_flags(pte) & _PAGE_DIRTY_BITS;
}
@@ -142,9 +142,9 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_dirty(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_DIRTY;
+ return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
}
static inline int pmd_young(pmd_t pmd)
@@ -152,9 +152,9 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED;
}
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
{
- return pud_flags(pud) & _PAGE_DIRTY;
+ return pud_flags(pud) & _PAGE_DIRTY_BITS;
}
static inline int pud_young(pud_t pud)
@@ -293,9 +293,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pte_t pte_move_flags(pte_t pte, pteval_t from, pteval_t to)
+{
+ if (pte_flags(pte) & from)
+ pte = pte_set_flags(pte_clear_flags(pte, from), to);
+ return pte;
+}
+#else
+static inline pte_t pte_move_flags(pte_t pte, pteval_t from, pteval_t to)
+{
+ return pte;
+}
+#endif
+
static inline pte_t pte_mkclean(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_DIRTY);
+ return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
}
static inline pte_t pte_mkold(pte_t pte)
@@ -305,6 +319,7 @@ static inline pte_t pte_mkold(pte_t pte)
static inline pte_t pte_wrprotect(pte_t pte)
{
+ pte = pte_move_flags(pte, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
return pte_clear_flags(pte, _PAGE_RW);
}
@@ -315,9 +330,24 @@ static inline pte_t pte_mkexec(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
+ pteval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHADOW_STACK_USER) ||
+ pte_write(pte)) ? _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+ return pte_set_flags(pte, dirty | _PAGE_SOFT_DIRTY);
+}
+
+#ifdef CONFIG_ARCH_HAS_SHSTK
+static inline pte_t pte_mkdirty_shstk(pte_t pte)
+{
+ pte = pte_clear_flags(pte, _PAGE_DIRTY_SW);
return pte_set_flags(pte, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
}
+static inline bool pte_dirty_hw(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_DIRTY_HW;
+}
+#endif
+
static inline pte_t pte_mkyoung(pte_t pte)
{
return pte_set_flags(pte, _PAGE_ACCESSED);
@@ -325,6 +355,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
static inline pte_t pte_mkwrite(pte_t pte)
{
+ pte = pte_move_flags(pte, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
return pte_set_flags(pte, _PAGE_RW);
}
@@ -372,6 +403,20 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
return native_make_pmd(v & ~clear);
}
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pmd_t pmd_move_flags(pmd_t pmd, pmdval_t from, pmdval_t to)
+{
+ if (pmd_flags(pmd) & from)
+ pmd = pmd_set_flags(pmd_clear_flags(pmd, from), to);
+ return pmd;
+}
+#else
+static inline pmd_t pmd_move_flags(pmd_t pmd, pmdval_t from, pmdval_t to)
+{
+ return pmd;
+}
+#endif
+
static inline pmd_t pmd_mkold(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_ACCESSED);
@@ -379,19 +424,36 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
static inline pmd_t pmd_mkclean(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_DIRTY);
+ return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
}
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
+ pmd = pmd_move_flags(pmd, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
return pmd_clear_flags(pmd, _PAGE_RW);
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
+ pmdval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHADOW_STACK_USER) ||
+ (pmd_flags(pmd) & _PAGE_RW)) ?
+ _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+ return pmd_set_flags(pmd, dirty | _PAGE_SOFT_DIRTY);
+}
+
+#ifdef CONFIG_ARCH_HAS_SHSTK
+static inline pmd_t pmd_mkdirty_shstk(pmd_t pmd)
+{
+ pmd = pmd_clear_flags(pmd, _PAGE_DIRTY_SW);
return pmd_set_flags(pmd, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
}
+static inline bool pmd_dirty_hw(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_DIRTY_HW;
+}
+#endif
+
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_DEVMAP);
@@ -409,6 +471,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
+ pmd = pmd_move_flags(pmd, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
return pmd_set_flags(pmd, _PAGE_RW);
}
@@ -426,6 +489,20 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
return native_make_pud(v & ~clear);
}
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pud_t pud_move_flags(pud_t pud, pudval_t from, pudval_t to)
+{
+ if (pud_flags(pud) & from)
+ pud = pud_set_flags(pud_clear_flags(pud, from), to);
+ return pud;
+}
+#else
+static inline pud_t pud_move_flags(pud_t pud, pudval_t from, pudval_t to)
+{
+ return pud;
+}
+#endif
+
static inline pud_t pud_mkold(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_ACCESSED);
@@ -433,17 +510,22 @@ static inline pud_t pud_mkold(pud_t pud)
static inline pud_t pud_mkclean(pud_t pud)
{
- return pud_clear_flags(pud, _PAGE_DIRTY);
+ return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
}
static inline pud_t pud_wrprotect(pud_t pud)
{
+ pud = pud_move_flags(pud, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
return pud_clear_flags(pud, _PAGE_RW);
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- return pud_set_flags(pud, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
+ pudval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHADOW_STACK_USER) ||
+ (pud_flags(pud) & _PAGE_RW)) ?
+ _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+
+ return pud_set_flags(pud, dirty | _PAGE_SOFT_DIRTY);
}
static inline pud_t pud_mkdevmap(pud_t pud)
@@ -463,6 +545,7 @@ static inline pud_t pud_mkyoung(pud_t pud)
static inline pud_t pud_mkwrite(pud_t pud)
{
+ pud = pud_move_flags(pud, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
return pud_set_flags(pud, _PAGE_RW);
}
@@ -594,19 +677,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+ if ((pte_write(pte) && !(pgprot_val(newprot) & _PAGE_RW)))
+ return pte_move_flags(__pte(val), _PAGE_DIRTY_HW,
+ _PAGE_DIRTY_SW);
return __pte(val);
}
-static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
- pmdval_t val = pmd_val(pmd), oldval = val;
-
- val &= _HPAGE_CHG_MASK;
- val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
- val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
- return __pmd(val);
-}
-
/* mprotect needs to preserve PAT bits when updating vm_page_prot */
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
@@ -1158,6 +1234,19 @@ static inline int pmd_write(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_RW;
}
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pmdval_t val = pmd_val(pmd), oldval = val;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
+ if ((pmd_write(pmd) && !(pgprot_val(newprot) & _PAGE_RW)))
+ return pmd_move_flags(__pmd(val), _PAGE_DIRTY_HW,
+ _PAGE_DIRTY_SW);
+ return __pmd(val);
+}
+
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b6a7f088f53..6a2b42490fb2 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,6 +23,7 @@
#define _PAGE_BIT_SOFTW2 10 /* " */
#define _PAGE_BIT_SOFTW3 11 /* " */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
+#define _PAGE_BIT_SOFTW5 57 /* available for programmer */
#define _PAGE_BIT_SOFTW4 58 /* available for programmer */
#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */
#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */
@@ -34,6 +35,7 @@
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
+#define _PAGE_BIT_DIRTY_SW _PAGE_BIT_SOFTW5 /* was written to */
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
@@ -109,6 +111,21 @@
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
#endif
+/*
+ * _PAGE_DIRTY_HW: set by the processor when a page is written.
+ * _PAGE_DIRTY_SW: a spare bit tracking a written, but now R/O page.
+ * [R/W + _PAGE_DIRTY_HW] <-> [R/O + _PAGE_DIRTY_SW].
+ * _PAGE_SOFT_DIRTY: a spare bit used to track written pages since a time point
+ * set by the system admin; see Documentation/admin-guide/mm/soft-dirty.rst.
+ */
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+#define _PAGE_DIRTY_SW (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY_SW)
+#else
+#define _PAGE_DIRTY_SW (_AT(pteval_t, 0))
+#endif
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY_HW | _PAGE_DIRTY_SW)
+
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
@@ -122,9 +139,9 @@
* instance, and is *not* included in this mask since
* pte_modify() does modify it.
*/
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY_HW | \
- _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+ _PAGE_DIRTY_SW | _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
/*
--
2.17.1
WARNING: multiple messages have this Message-ID (diff)
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
To: x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>,
linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-mm@kvack.org, linux-arch@vger.kernel.org,
linux-api@vger.kernel.org, Arnd Bergmann <arnd@arndb.de>,
Andy Lutomirski <luto@amacapital.net>,
Balbir Singh <bsingharora@gmail.com>,
Cyrill Gorcunov <gorcunov@gmail.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Eugene Syromiatnikov <esyr@redhat.com>,
Florian Weimer <fweimer@redhat.com>,
"H.J. Lu" <hjl.tools@gmail.com>, Jann Horn <jannh@google.com>,
Jonathan Corbet <corbet@lwn.net>,
Kees Cook <keescook@chromium.org>,
Mike Kravetz <mike.kravetz@oracle.com>,
Nadav Amit <nadav.amit@gmail.com>,
Oleg Nesterov <oleg@redhat.com>, Pavel Machek <pavel@ucw.cz>,
Peter Zijlstra <peterz@infradead.org>,
Randy Dunlap <rdunlap@infradead.org>,
"Ravi V. Shankar" <ravi.v.shankar@intel.com>,
Vedvyas Shanbhogue <vedvyas.shanbhogue@intel.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Subject: [RFC PATCH v6 11/26] x86/mm: Introduce _PAGE_DIRTY_SW
Date: Mon, 19 Nov 2018 13:47:54 -0800 [thread overview]
Message-ID: <20181119214809.6086-12-yu-cheng.yu@intel.com> (raw)
Message-ID: <20181119214754.TI4D2n84We4-JRRm6IpxsnxSTWNiGyCFm7xy6E1s4vg@z> (raw)
In-Reply-To: <20181119214809.6086-1-yu-cheng.yu@intel.com>
A RO and dirty PTE exists in the following cases:
(a) A page is modified and then shared with a fork()'ed child;
(b) A R/O page that has been COW'ed;
(c) A SHSTK page.
The processor does not read the dirty bit for (a) and (b), but
checks the dirty bit for (c). To prevent the use of non-SHSTK
memory as SHSTK, we introduce a spare bit of the 64-bit PTE as
_PAGE_BIT_DIRTY_SW and use that for (a) and (b). This results
to the following possible PTE settings:
Modified PTE: (R/W + DIRTY_HW)
Modified and shared PTE: (R/O + DIRTY_SW)
R/O PTE COW'ed: (R/O + DIRTY_SW)
SHSTK PTE: (R/O + DIRTY_HW)
SHSTK PTE COW'ed: (R/O + DIRTY_HW)
SHSTK PTE shared: (R/O + DIRTY_SW)
Note that _PAGE_BIT_DRITY_SW is only used in R/O PTEs but
not R/W PTEs.
When this patch is applied, there are six free bits left in
the 64-bit PTE. There is no more free bit in the 32-bit
PTE (except for PAE) and shadow stack is not implemented
for the 32-bit kernel.
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
arch/x86/include/asm/pgtable.h | 129 ++++++++++++++++++++++-----
arch/x86/include/asm/pgtable_types.h | 21 ++++-
2 files changed, 128 insertions(+), 22 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index be71584bbd9a..db4b9d22d2f7 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -118,9 +118,9 @@ extern pmdval_t early_pmd_flags;
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
{
- return pte_flags(pte) & _PAGE_DIRTY;
+ return pte_flags(pte) & _PAGE_DIRTY_BITS;
}
@@ -142,9 +142,9 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_dirty(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_DIRTY;
+ return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
}
static inline int pmd_young(pmd_t pmd)
@@ -152,9 +152,9 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED;
}
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
{
- return pud_flags(pud) & _PAGE_DIRTY;
+ return pud_flags(pud) & _PAGE_DIRTY_BITS;
}
static inline int pud_young(pud_t pud)
@@ -293,9 +293,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pte_t pte_move_flags(pte_t pte, pteval_t from, pteval_t to)
+{
+ if (pte_flags(pte) & from)
+ pte = pte_set_flags(pte_clear_flags(pte, from), to);
+ return pte;
+}
+#else
+static inline pte_t pte_move_flags(pte_t pte, pteval_t from, pteval_t to)
+{
+ return pte;
+}
+#endif
+
static inline pte_t pte_mkclean(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_DIRTY);
+ return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
}
static inline pte_t pte_mkold(pte_t pte)
@@ -305,6 +319,7 @@ static inline pte_t pte_mkold(pte_t pte)
static inline pte_t pte_wrprotect(pte_t pte)
{
+ pte = pte_move_flags(pte, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
return pte_clear_flags(pte, _PAGE_RW);
}
@@ -315,9 +330,24 @@ static inline pte_t pte_mkexec(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
+ pteval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHADOW_STACK_USER) ||
+ pte_write(pte)) ? _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+ return pte_set_flags(pte, dirty | _PAGE_SOFT_DIRTY);
+}
+
+#ifdef CONFIG_ARCH_HAS_SHSTK
+static inline pte_t pte_mkdirty_shstk(pte_t pte)
+{
+ pte = pte_clear_flags(pte, _PAGE_DIRTY_SW);
return pte_set_flags(pte, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
}
+static inline bool pte_dirty_hw(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_DIRTY_HW;
+}
+#endif
+
static inline pte_t pte_mkyoung(pte_t pte)
{
return pte_set_flags(pte, _PAGE_ACCESSED);
@@ -325,6 +355,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
static inline pte_t pte_mkwrite(pte_t pte)
{
+ pte = pte_move_flags(pte, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
return pte_set_flags(pte, _PAGE_RW);
}
@@ -372,6 +403,20 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
return native_make_pmd(v & ~clear);
}
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pmd_t pmd_move_flags(pmd_t pmd, pmdval_t from, pmdval_t to)
+{
+ if (pmd_flags(pmd) & from)
+ pmd = pmd_set_flags(pmd_clear_flags(pmd, from), to);
+ return pmd;
+}
+#else
+static inline pmd_t pmd_move_flags(pmd_t pmd, pmdval_t from, pmdval_t to)
+{
+ return pmd;
+}
+#endif
+
static inline pmd_t pmd_mkold(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_ACCESSED);
@@ -379,19 +424,36 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
static inline pmd_t pmd_mkclean(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_DIRTY);
+ return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
}
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
+ pmd = pmd_move_flags(pmd, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
return pmd_clear_flags(pmd, _PAGE_RW);
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
+ pmdval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHADOW_STACK_USER) ||
+ (pmd_flags(pmd) & _PAGE_RW)) ?
+ _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+ return pmd_set_flags(pmd, dirty | _PAGE_SOFT_DIRTY);
+}
+
+#ifdef CONFIG_ARCH_HAS_SHSTK
+static inline pmd_t pmd_mkdirty_shstk(pmd_t pmd)
+{
+ pmd = pmd_clear_flags(pmd, _PAGE_DIRTY_SW);
return pmd_set_flags(pmd, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
}
+static inline bool pmd_dirty_hw(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_DIRTY_HW;
+}
+#endif
+
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_DEVMAP);
@@ -409,6 +471,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
+ pmd = pmd_move_flags(pmd, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
return pmd_set_flags(pmd, _PAGE_RW);
}
@@ -426,6 +489,20 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
return native_make_pud(v & ~clear);
}
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pud_t pud_move_flags(pud_t pud, pudval_t from, pudval_t to)
+{
+ if (pud_flags(pud) & from)
+ pud = pud_set_flags(pud_clear_flags(pud, from), to);
+ return pud;
+}
+#else
+static inline pud_t pud_move_flags(pud_t pud, pudval_t from, pudval_t to)
+{
+ return pud;
+}
+#endif
+
static inline pud_t pud_mkold(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_ACCESSED);
@@ -433,17 +510,22 @@ static inline pud_t pud_mkold(pud_t pud)
static inline pud_t pud_mkclean(pud_t pud)
{
- return pud_clear_flags(pud, _PAGE_DIRTY);
+ return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
}
static inline pud_t pud_wrprotect(pud_t pud)
{
+ pud = pud_move_flags(pud, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
return pud_clear_flags(pud, _PAGE_RW);
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- return pud_set_flags(pud, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
+ pudval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHADOW_STACK_USER) ||
+ (pud_flags(pud) & _PAGE_RW)) ?
+ _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+
+ return pud_set_flags(pud, dirty | _PAGE_SOFT_DIRTY);
}
static inline pud_t pud_mkdevmap(pud_t pud)
@@ -463,6 +545,7 @@ static inline pud_t pud_mkyoung(pud_t pud)
static inline pud_t pud_mkwrite(pud_t pud)
{
+ pud = pud_move_flags(pud, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
return pud_set_flags(pud, _PAGE_RW);
}
@@ -594,19 +677,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+ if ((pte_write(pte) && !(pgprot_val(newprot) & _PAGE_RW)))
+ return pte_move_flags(__pte(val), _PAGE_DIRTY_HW,
+ _PAGE_DIRTY_SW);
return __pte(val);
}
-static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
- pmdval_t val = pmd_val(pmd), oldval = val;
-
- val &= _HPAGE_CHG_MASK;
- val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
- val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
- return __pmd(val);
-}
-
/* mprotect needs to preserve PAT bits when updating vm_page_prot */
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
@@ -1158,6 +1234,19 @@ static inline int pmd_write(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_RW;
}
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pmdval_t val = pmd_val(pmd), oldval = val;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
+ if ((pmd_write(pmd) && !(pgprot_val(newprot) & _PAGE_RW)))
+ return pmd_move_flags(__pmd(val), _PAGE_DIRTY_HW,
+ _PAGE_DIRTY_SW);
+ return __pmd(val);
+}
+
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b6a7f088f53..6a2b42490fb2 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,6 +23,7 @@
#define _PAGE_BIT_SOFTW2 10 /* " */
#define _PAGE_BIT_SOFTW3 11 /* " */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
+#define _PAGE_BIT_SOFTW5 57 /* available for programmer */
#define _PAGE_BIT_SOFTW4 58 /* available for programmer */
#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */
#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */
@@ -34,6 +35,7 @@
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
+#define _PAGE_BIT_DIRTY_SW _PAGE_BIT_SOFTW5 /* was written to */
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
@@ -109,6 +111,21 @@
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
#endif
+/*
+ * _PAGE_DIRTY_HW: set by the processor when a page is written.
+ * _PAGE_DIRTY_SW: a spare bit tracking a written, but now R/O page.
+ * [R/W + _PAGE_DIRTY_HW] <-> [R/O + _PAGE_DIRTY_SW].
+ * _PAGE_SOFT_DIRTY: a spare bit used to track written pages since a time point
+ * set by the system admin; see Documentation/admin-guide/mm/soft-dirty.rst.
+ */
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+#define _PAGE_DIRTY_SW (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY_SW)
+#else
+#define _PAGE_DIRTY_SW (_AT(pteval_t, 0))
+#endif
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY_HW | _PAGE_DIRTY_SW)
+
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
@@ -122,9 +139,9 @@
* instance, and is *not* included in this mask since
* pte_modify() does modify it.
*/
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY_HW | \
- _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+ _PAGE_DIRTY_SW | _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
/*
--
2.17.1
next prev parent reply other threads:[~2018-11-19 21:47 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-19 21:47 [RFC PATCH v6 00/26] Control-flow Enforcement: Shadow Stack Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 01/26] Documentation/x86: Add CET description Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-20 9:52 ` Ingo Molnar
2018-11-20 9:52 ` Ingo Molnar
2018-11-20 20:36 ` Yu-cheng Yu
2018-11-20 20:36 ` Yu-cheng Yu
2018-11-21 7:24 ` Ingo Molnar
2018-11-21 7:24 ` Ingo Molnar
2018-11-19 21:47 ` [RFC PATCH v6 02/26] x86/cpufeatures: Add CET CPU feature flags for Control-flow Enforcement Technology (CET) Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 03/26] x86/fpu/xstate: Change names to separate XSAVES system and user states Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 04/26] x86/fpu/xstate: Introduce XSAVES system states Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-12-04 16:01 ` Borislav Petkov
2018-12-04 16:01 ` Borislav Petkov
2018-12-04 17:08 ` Yu-cheng Yu
2018-12-04 17:08 ` Yu-cheng Yu
2018-12-04 18:16 ` Borislav Petkov
2018-12-04 18:16 ` Borislav Petkov
2018-11-19 21:47 ` [RFC PATCH v6 05/26] x86/fpu/xstate: Add XSAVES system states for shadow stack Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 06/26] x86/cet: Add control protection exception handler Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 07/26] x86/cet/shstk: Add Kconfig option for user-mode shadow stack Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 08/26] mm: Introduce VM_SHSTK for shadow stack memory Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 09/26] mm/mmap: Prevent Shadow Stack VMA merges Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 10/26] x86/mm: Change _PAGE_DIRTY to _PAGE_DIRTY_HW Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu [this message]
2018-11-19 21:47 ` [RFC PATCH v6 11/26] x86/mm: Introduce _PAGE_DIRTY_SW Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 12/26] drm/i915/gvt: Update _PAGE_DIRTY to _PAGE_DIRTY_BITS Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 13/26] x86/mm: Modify ptep_set_wrprotect and pmdp_set_wrprotect for _PAGE_DIRTY_SW Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 14/26] x86/mm: Shadow stack page fault error checking Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 15/26] mm: Handle shadow stack page fault Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:47 ` [RFC PATCH v6 16/26] mm: Handle THP/HugeTLB " Yu-cheng Yu
2018-11-19 21:47 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 17/26] mm: Update can_follow_write_pte/pmd for shadow stack Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 18/26] mm: Introduce do_mmap_locked() Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 19/26] x86/cet/shstk: User-mode shadow stack support Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 20/26] x86/cet/shstk: Introduce WRUSS instruction Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 21/26] x86/cet/shstk: Signal handling for shadow stack Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 22/26] x86/cet/shstk: ELF header parsing of Shadow Stack Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2019-04-25 11:02 ` Dave Martin
2019-04-25 11:02 ` Dave Martin
2019-04-25 15:14 ` Yu-cheng Yu
2019-04-25 15:14 ` Yu-cheng Yu
2019-04-25 15:35 ` Dave Martin
2019-04-25 15:35 ` Dave Martin
2019-04-25 16:11 ` Dave Martin
2019-04-25 16:11 ` Dave Martin
2019-04-25 16:20 ` Yu-cheng Yu
2019-04-25 16:20 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 23/26] x86/cet/shstk: Handle thread shadow stack Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 24/26] mm/mmap: Add Shadow stack pages to memory accounting Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 25/26] x86/cet/shstk: Add arch_prctl functions for Shadow Stack Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-19 21:48 ` [RFC PATCH v6 26/26] x86/cet/shstk: Add Shadow Stack instructions to opcode map Yu-cheng Yu
2018-11-19 21:48 ` Yu-cheng Yu
2018-11-22 16:53 ` [RFC PATCH v6 00/26] Control-flow Enforcement: Shadow Stack Andy Lutomirski
2018-11-22 16:53 ` Andy Lutomirski
2018-11-26 17:38 ` Yu-cheng Yu
2018-11-26 17:38 ` Yu-cheng Yu
2018-11-26 18:29 ` Andy Lutomirski
2018-11-26 18:29 ` Andy Lutomirski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181119214809.6086-12-yu-cheng.yu@intel.com \
--to=yu-cheng.yu@intel.com \
--cc=arnd@arndb.de \
--cc=bsingharora@gmail.com \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=esyr@redhat.com \
--cc=fweimer@redhat.com \
--cc=gorcunov@gmail.com \
--cc=hjl.tools@gmail.com \
--cc=hpa@zytor.com \
--cc=jannh@google.com \
--cc=keescook@chromium.org \
--cc=linux-api@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@amacapital.net \
--cc=mike.kravetz@oracle.com \
--cc=mingo@redhat.com \
--cc=nadav.amit@gmail.com \
--cc=oleg@redhat.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.