diff for duplicates of <1536957543.12990.9.camel@intel.com> diff --git a/a/content_digest b/N1/content_digest index f9fd4d5..2b1643b 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -35,7 +35,9 @@ Mike Kravetz <mike.kravetz@oracle.com> Nadav Amit <nadav.amit@gmail.com> Oleg Nesterov <oleg@redhat.com> - " Pavel Machek <pavel@ucw.cz>\0" + Pavel Machek <pavel@ucw.cz> + ravi.v.shankar@intel.com + " vedvyas.shanbhogue@intel.com\0" "\00:1\0" "b\0" "On Fri, 2018-08-31 at 18:29 +0200, Peter Zijlstra wrote:\n" @@ -105,4 +107,4 @@ "+#endif\n" "\302\240}" -32b70e7976d080f456ba7a0406cf0bbce61e3f30a34b44696e9f190fce358c05 +688220fb0040e94d7d8e8625dd50df1758580853a90d7832c2872d714a41da50
diff --git a/a/1.txt b/N2/1.txt index 4e2da67..3878a56 100644 --- a/a/1.txt +++ b/N2/1.txt @@ -11,14 +11,14 @@ On Fri, 2018-08-31 at 18:29 +0200, Peter Zijlstra wrote: > > We don't need a race. > > > > I think the cmpxchg will be slower, even without a race, than the code -> > that was there before. The cmpxchg is a simple, straightforward +> > that was there before.A A The cmpxchg is a simple, straightforward > > solution, but we're putting it in place of a plain memory write, which > > is suboptimal. > Note quite, the clear_bit() is LOCK prefixed. With the updated ptep_set_wrprotect() below, I did MADV_WILLNEED to a shadow stack of 8 MB, then 10,000 fork()'s, but could not prove it is more or less -efficient than the other. So can we say this is probably fine in terms of +efficient than the other. A So can we say this is probably fine in terms of efficiency? Yu-cheng @@ -30,30 +30,30 @@ Yu-cheng +++ b/arch/x86/include/asm/pgtable.h @@ -1203,7 +1203,36 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - static inline void ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) - { +A static inline void ptep_set_wrprotect(struct mm_struct *mm, +A A A A A A A unsigned long addr, pte_t *ptep) +A { +#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER + pte_t new_pte, pte = READ_ONCE(*ptep); + + /* -+ * Some processors can start a write, but end up -+ * seeing a read-only PTE by the time they get -+ * to the Dirty bit. In this case, they will -+ * set the Dirty bit, leaving a read-only, Dirty -+ * PTE which looks like a Shadow Stack PTE. -+ * -+ * However, this behavior has been improved and -+ * will not occur on processors supporting -+ * Shadow Stacks. Without this guarantee, a -+ * transition to a non-present PTE and flush the -+ * TLB would be needed. -+ * -+ * When changing a writable PTE to read-only and -+ * if the PTE has _PAGE_DIRTY_HW set, we move -+ * that bit to _PAGE_DIRTY_SW so that the PTE is -+ * not a valid Shadow Stack PTE. -+ */ ++ A * Some processors can start a write, but end up ++ A * seeing a read-only PTE by the time they get ++ A * to the Dirty bit.A A In this case, they will ++ A * set the Dirty bit, leaving a read-only, Dirty ++ A * PTE which looks like a Shadow Stack PTE. ++ A * ++ A * However, this behavior has been improved and ++ A * will not occur on processors supporting ++ A * Shadow Stacks.A A Without this guarantee, a ++ A * transition to a non-present PTE and flush the ++ A * TLB would be needed. ++ A * ++ A * When changing a writable PTE to read-only and ++ A * if the PTE has _PAGE_DIRTY_HW set, we move ++ A * that bit to _PAGE_DIRTY_SW so that the PTE is ++ A * not a valid Shadow Stack PTE. ++ A */ + do { + new_pte = pte_wrprotect(pte); + new_pte.pte |= (new_pte.pte & _PAGE_DIRTY_HW) >> @@ -61,6 +61,6 @@ mm_struct *mm, + new_pte.pte &= ~_PAGE_DIRTY_HW; + } while (!try_cmpxchg(ptep, &pte, new_pte)); +#else - clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); +A clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); +#endif - } +A } diff --git a/a/content_digest b/N2/content_digest index f9fd4d5..08c0c3b 100644 --- a/a/content_digest +++ b/N2/content_digest @@ -35,7 +35,9 @@ Mike Kravetz <mike.kravetz@oracle.com> Nadav Amit <nadav.amit@gmail.com> Oleg Nesterov <oleg@redhat.com> - " Pavel Machek <pavel@ucw.cz>\0" + Pavel Machek <pavel@ucw.cz> + ravi.v.shankar@intel.com + " vedvyas.shanbhogue@intel.com\0" "\00:1\0" "b\0" "On Fri, 2018-08-31 at 18:29 +0200, Peter Zijlstra wrote:\n" @@ -51,14 +53,14 @@ "> > We don't need a race.\n" "> > \n" "> > I think the cmpxchg will be slower, even without a race, than the code\n" - "> > that was there before.\302\240\302\240The cmpxchg is a simple, straightforward\n" + "> > that was there before.A A The cmpxchg is a simple, straightforward\n" "> > solution, but we're putting it in place of a plain memory write, which\n" "> > is suboptimal.\n" "> Note quite, the clear_bit() is LOCK prefixed.\n" "\n" "With the updated ptep_set_wrprotect() below, I did MADV_WILLNEED to a shadow\n" "stack of 8 MB, then 10,000 fork()'s, but could not prove it is more or less\n" - "efficient than the other. \302\240So can we say this is probably fine in terms of\n" + "efficient than the other. A So can we say this is probably fine in terms of\n" "efficiency?\n" "\n" "Yu-cheng\n" @@ -70,30 +72,30 @@ "+++ b/arch/x86/include/asm/pgtable.h\n" "@@ -1203,7 +1203,36 @@ static inline pte_t ptep_get_and_clear_full(struct\n" "mm_struct *mm,\n" - "\302\240static inline void ptep_set_wrprotect(struct mm_struct *mm,\n" - "\302\240\t\t\t\t\302\240\302\240\302\240\302\240\302\240\302\240unsigned long addr, pte_t *ptep)\n" - "\302\240{\n" + "A static inline void ptep_set_wrprotect(struct mm_struct *mm,\n" + "A \t\t\t\tA A A A A A unsigned long addr, pte_t *ptep)\n" + "A {\n" "+#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER\n" "+\tpte_t new_pte, pte = READ_ONCE(*ptep);\n" "+\n" "+\t/*\n" - "+\t\302\240* Some processors can start a write, but end up\n" - "+\t\302\240* seeing a read-only PTE by the time they get\n" - "+\t\302\240* to the Dirty bit.\302\240\302\240In this case, they will\n" - "+\t\302\240* set the Dirty bit, leaving a read-only, Dirty\n" - "+\t\302\240* PTE which looks like a Shadow Stack PTE.\n" - "+\t\302\240*\n" - "+\t\302\240* However, this behavior has been improved and\n" - "+\t\302\240* will not occur on processors supporting\n" - "+\t\302\240* Shadow Stacks.\302\240\302\240Without this guarantee, a\n" - "+\t\302\240* transition to a non-present PTE and flush the\n" - "+\t\302\240* TLB would be needed.\n" - "+\t\302\240*\n" - "+\t\302\240* When changing a writable PTE to read-only and\n" - "+\t\302\240* if the PTE has _PAGE_DIRTY_HW set, we move\n" - "+\t\302\240* that bit to _PAGE_DIRTY_SW so that the PTE is\n" - "+\t\302\240* not a valid Shadow Stack PTE.\n" - "+\t\302\240*/\n" + "+\tA * Some processors can start a write, but end up\n" + "+\tA * seeing a read-only PTE by the time they get\n" + "+\tA * to the Dirty bit.A A In this case, they will\n" + "+\tA * set the Dirty bit, leaving a read-only, Dirty\n" + "+\tA * PTE which looks like a Shadow Stack PTE.\n" + "+\tA *\n" + "+\tA * However, this behavior has been improved and\n" + "+\tA * will not occur on processors supporting\n" + "+\tA * Shadow Stacks.A A Without this guarantee, a\n" + "+\tA * transition to a non-present PTE and flush the\n" + "+\tA * TLB would be needed.\n" + "+\tA *\n" + "+\tA * When changing a writable PTE to read-only and\n" + "+\tA * if the PTE has _PAGE_DIRTY_HW set, we move\n" + "+\tA * that bit to _PAGE_DIRTY_SW so that the PTE is\n" + "+\tA * not a valid Shadow Stack PTE.\n" + "+\tA */\n" "+\tdo {\n" "+\t\tnew_pte = pte_wrprotect(pte);\n" "+\t\tnew_pte.pte |= (new_pte.pte & _PAGE_DIRTY_HW) >>\n" @@ -101,8 +103,8 @@ "+\t\tnew_pte.pte &= ~_PAGE_DIRTY_HW;\n" "+\t} while (!try_cmpxchg(ptep, &pte, new_pte));\n" "+#else\n" - "\302\240\tclear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);\n" + "A \tclear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);\n" "+#endif\n" - "\302\240}" + A } -32b70e7976d080f456ba7a0406cf0bbce61e3f30a34b44696e9f190fce358c05 +c8d1f9d012d5939a34a7ec1bbef78a1ef9ed10faa6ad440dd2a45db16cf29b4c
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.