diff for duplicates of <87lh6mfv2j.fsf@linux.vnet.ibm.com> diff --git a/a/1.txt b/N1/1.txt index 5191919..8d31bba 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,29 +1,24 @@ Balbir Singh <bsingharora@gmail.com> writes: > On Tue, 2016-02-09 at 06:50 +0530, Aneesh Kumar K.V wrote: ->>=C2=A0 +>> >> Also make sure we wait for irq disable section in other cpus to finish >> before flipping a huge pte entry with a regular pmd entry. Code paths >> like find_linux_pte_or_hugepte depend on irq disable to get >> a stable pte_t pointer. A parallel thp split need to make sure we >> don't convert a pmd pte to a regular pmd entry without waiting for the >> irq disable section to finish. ->>=20 +>> >> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> >> --- ->> =C2=A0arch/powerpc/include/asm/book3s/64/pgtable.h |=C2=A0=C2=A04 ++++ ->> =C2=A0arch/powerpc/mm/pgtable_64.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= -=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 35 +>> arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++++ +>> arch/powerpc/mm/pgtable_64.c | 35 >> +++++++++++++++++++++++++++- ->> =C2=A0include/asm-generic/pgtable.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= -=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A08 = -+++++++ ->> =C2=A0mm/huge_memory.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= -=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= -=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A01 + ->> =C2=A04 files changed, 47 insertions(+), 1 deletion(-) ->>=20 +>> include/asm-generic/pgtable.h | 8 +++++++ +>> mm/huge_memory.c | 1 + +>> 4 files changed, 47 insertions(+), 1 deletion(-) +>> >> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h >> b/arch/powerpc/include/asm/book3s/64/pgtable.h >> index 8d1c41d28318..ac07a30a7934 100644 @@ -31,50 +26,46 @@ Balbir Singh <bsingharora@gmail.com> writes: >> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h >> @@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct >> mm_struct *mm, pmd_t *pmdp); ->> =C2=A0extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned l= -ong +>> extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long >> address, ->> =C2=A0 =C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp); ->> =C2=A0 +>> pmd_t *pmdp); +>> >> +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE >> +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma, ->> + =C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp); +>> + unsigned long address, pmd_t *pmdp); >> + ->> =C2=A0#define pmd_move_must_withdraw pmd_move_must_withdraw ->> =C2=A0struct spinlock; ->> =C2=A0static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_= -ptl, +>> #define pmd_move_must_withdraw pmd_move_must_withdraw +>> struct spinlock; +>> static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, >> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c >> index 3124a20d0fab..c8a00da39969 100644 >> --- a/arch/powerpc/mm/pgtable_64.c >> +++ b/arch/powerpc/mm/pgtable_64.c ->> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_str= -uct +>> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct >> *mm, pmd_t *pmdp) ->> =C2=A0 return pgtable; ->> =C2=A0} ->> =C2=A0 +>> return pgtable; +>> } +>> >> +void pmdp_huge_split_prepare(struct vm_area_struct *vma, ->> + =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp) +>> + unsigned long address, pmd_t *pmdp) >> +{ >> + VM_BUG_ON(address & ~HPAGE_PMD_MASK); >> + >> +#ifdef CONFIG_DEBUG_VM ->> + BUG_ON(REGION_ID(address) !=3D USER_REGION_ID); +>> + BUG_ON(REGION_ID(address) != USER_REGION_ID); >> +#endif >> + /* ->> + =C2=A0* We can't mark the pmd none here, because that will cause a race ->> + =C2=A0* against exit_mmap. We need to continue mark pmd TRANS HUGE, wh= -ile ->> + =C2=A0* we spilt, but at the same time we wan't rest of the ppc64 code ->> + =C2=A0* not to insert hash pte on this, because we will be modifying ->> + =C2=A0* the deposited pgtable in the caller of this function. Hence ->> + =C2=A0* clear the _PAGE_USER so that we move the fault handling to ->> + =C2=A0* higher level function and that will serialize against ptl. ->> + =C2=A0* We need to flush existing hash pte entries here even though, ->> + =C2=A0* the translation is still valid, because we will withdraw ->> + =C2=A0* pgtable_t after this. ->> + =C2=A0*/ +>> + * We can't mark the pmd none here, because that will cause a race +>> + * against exit_mmap. We need to continue mark pmd TRANS HUGE, while +>> + * we spilt, but at the same time we wan't rest of the ppc64 code +>> + * not to insert hash pte on this, because we will be modifying +>> + * the deposited pgtable in the caller of this function. Hence +>> + * clear the _PAGE_USER so that we move the fault handling to +>> + * higher level function and that will serialize against ptl. +>> + * We need to flush existing hash pte entries here even though, +>> + * the translation is still valid, because we will withdraw +>> + * pgtable_t after this. +>> + */ >> + pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); > > Can this break any checks for _PAGE_USER? From other paths? @@ -86,32 +77,30 @@ Should not, that is the same condition we use for autonuma. >> +} >> + >> + ->> =C2=A0/* ->> =C2=A0 * set a new huge pmd. We should not be called for updating ->> =C2=A0 * an existing pmd entry. That should go via pmd_hugepage_update. +>> /* +>> * set a new huge pmd. We should not be called for updating +>> * an existing pmd entry. That should go via pmd_hugepage_update. >> @@ -663,10 +687,19 @@ void set_pmd_at(struct mm_struct *mm, unsigned long >> addr, ->> =C2=A0 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); ->> =C2=A0} ->> =C2=A0 +>> return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); +>> } +>> >> +/* >> + * We use this to invalidate a pmdp entry before switching from a >> + * hugepte to regular pmd entry. >> + */ ->> =C2=A0void pmdp_invalidate(struct vm_area_struct *vma, unsigned long add= -ress, ->> =C2=A0 =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp) ->> =C2=A0{ +>> void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, +>> pmd_t *pmdp) +>> { >> - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); >> + pmd_hugepage_update(vma->vm_mm, address, pmdp, ~0UL, 0); >> + /* ->> + =C2=A0* This ensures that generic code that rely on IRQ disabling ->> + =C2=A0* to prevent a parallel THP split work as expected. ->> + =C2=A0*/ +>> + * This ensures that generic code that rely on IRQ disabling +>> + * to prevent a parallel THP split work as expected. +>> + */ >> + kick_all_cpus_sync(); > -> Seems expensive, anyway I think the right should do something like or a w= -rapper +> Seems expensive, anyway I think the right should do something like or a wrapper > for it > > on_each_cpu_mask(mm_cpumask(vma->vm_mm), do_nothing, NULL, 1); @@ -126,3 +115,9 @@ ignored idle cpus. But then that needs more verification. http://article.gmane.org/gmane.linux.ports.ppc.embedded/81105 -aneesh + +-- +To unsubscribe, send a message with 'unsubscribe linux-mm' in +the body to majordomo@kvack.org. For more info on Linux MM, +see: http://www.linux-mm.org/ . +Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> diff --git a/a/content_digest b/N1/content_digest index 8d16c38..7ef585b 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -18,29 +18,24 @@ "Balbir Singh <bsingharora@gmail.com> writes:\n" "\n" "> On Tue, 2016-02-09 at 06:50 +0530, Aneesh Kumar K.V wrote:\n" - ">>=C2=A0\n" + ">>\302\240\n" ">> Also make sure we wait for irq disable section in other cpus to finish\n" ">> before flipping a huge pte entry with a regular pmd entry. Code paths\n" ">> like find_linux_pte_or_hugepte depend on irq disable to get\n" ">> a stable pte_t pointer. A parallel thp split need to make sure we\n" ">> don't convert a pmd pte to a regular pmd entry without waiting for the\n" ">> irq disable section to finish.\n" - ">>=20\n" + ">> \n" ">> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>\n" ">> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>\n" ">> ---\n" - ">> =C2=A0arch/powerpc/include/asm/book3s/64/pgtable.h |=C2=A0=C2=A04 ++++\n" - ">> =C2=A0arch/powerpc/mm/pgtable_64.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=\n" - "=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 35\n" + ">> \302\240arch/powerpc/include/asm/book3s/64/pgtable.h |\302\240\302\2404 ++++\n" + ">> \302\240arch/powerpc/mm/pgtable_64.c\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240| 35\n" ">> +++++++++++++++++++++++++++-\n" - ">> =C2=A0include/asm-generic/pgtable.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=\n" - "=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A08 =\n" - "+++++++\n" - ">> =C2=A0mm/huge_memory.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=\n" - "=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=\n" - "=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A01 +\n" - ">> =C2=A04 files changed, 47 insertions(+), 1 deletion(-)\n" - ">>=20\n" + ">> \302\240include/asm-generic/pgtable.h\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240|\302\240\302\2408 +++++++\n" + ">> \302\240mm/huge_memory.c\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240|\302\240\302\2401 +\n" + ">> \302\2404 files changed, 47 insertions(+), 1 deletion(-)\n" + ">> \n" ">> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h\n" ">> b/arch/powerpc/include/asm/book3s/64/pgtable.h\n" ">> index 8d1c41d28318..ac07a30a7934 100644\n" @@ -48,50 +43,46 @@ ">> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h\n" ">> @@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct\n" ">> mm_struct *mm, pmd_t *pmdp);\n" - ">> =C2=A0extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned l=\n" - "ong\n" + ">> \302\240extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long\n" ">> address,\n" - ">> =C2=A0\t\t\t=C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp);\n" - ">> =C2=A0\n" + ">> \302\240\t\t\t\302\240\302\240\302\240\302\240pmd_t *pmdp);\n" + ">> \302\240\n" ">> +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE\n" ">> +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,\n" - ">> +\t\t\t\t=C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp);\n" + ">> +\t\t\t\t\302\240\302\240\302\240\302\240unsigned long address, pmd_t *pmdp);\n" ">> +\n" - ">> =C2=A0#define pmd_move_must_withdraw pmd_move_must_withdraw\n" - ">> =C2=A0struct spinlock;\n" - ">> =C2=A0static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_=\n" - "ptl,\n" + ">> \302\240#define pmd_move_must_withdraw pmd_move_must_withdraw\n" + ">> \302\240struct spinlock;\n" + ">> \302\240static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,\n" ">> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c\n" ">> index 3124a20d0fab..c8a00da39969 100644\n" ">> --- a/arch/powerpc/mm/pgtable_64.c\n" ">> +++ b/arch/powerpc/mm/pgtable_64.c\n" - ">> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_str=\n" - "uct\n" + ">> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct\n" ">> *mm, pmd_t *pmdp)\n" - ">> =C2=A0\treturn pgtable;\n" - ">> =C2=A0}\n" - ">> =C2=A0\n" + ">> \302\240\treturn pgtable;\n" + ">> \302\240}\n" + ">> \302\240\n" ">> +void pmdp_huge_split_prepare(struct vm_area_struct *vma,\n" - ">> +\t\t\t=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp)\n" + ">> +\t\t\t\302\240\302\240\302\240\302\240\302\240unsigned long address, pmd_t *pmdp)\n" ">> +{\n" ">> +\tVM_BUG_ON(address & ~HPAGE_PMD_MASK);\n" ">> +\n" ">> +#ifdef CONFIG_DEBUG_VM\n" - ">> +\tBUG_ON(REGION_ID(address) !=3D USER_REGION_ID);\n" + ">> +\tBUG_ON(REGION_ID(address) != USER_REGION_ID);\n" ">> +#endif\n" ">> +\t/*\n" - ">> +\t=C2=A0* We can't mark the pmd none here, because that will cause a race\n" - ">> +\t=C2=A0* against exit_mmap. We need to continue mark pmd TRANS HUGE, wh=\n" - "ile\n" - ">> +\t=C2=A0* we spilt, but at the same time we wan't rest of the ppc64 code\n" - ">> +\t=C2=A0* not to insert hash pte on this, because we will be modifying\n" - ">> +\t=C2=A0* the deposited pgtable in the caller of this function. Hence\n" - ">> +\t=C2=A0* clear the _PAGE_USER so that we move the fault handling to\n" - ">> +\t=C2=A0* higher level function and that will serialize against ptl.\n" - ">> +\t=C2=A0* We need to flush existing hash pte entries here even though,\n" - ">> +\t=C2=A0* the translation is still valid, because we will withdraw\n" - ">> +\t=C2=A0* pgtable_t after this.\n" - ">> +\t=C2=A0*/\n" + ">> +\t\302\240* We can't mark the pmd none here, because that will cause a race\n" + ">> +\t\302\240* against exit_mmap. We need to continue mark pmd TRANS HUGE, while\n" + ">> +\t\302\240* we spilt, but at the same time we wan't rest of the ppc64 code\n" + ">> +\t\302\240* not to insert hash pte on this, because we will be modifying\n" + ">> +\t\302\240* the deposited pgtable in the caller of this function. Hence\n" + ">> +\t\302\240* clear the _PAGE_USER so that we move the fault handling to\n" + ">> +\t\302\240* higher level function and that will serialize against ptl.\n" + ">> +\t\302\240* We need to flush existing hash pte entries here even though,\n" + ">> +\t\302\240* the translation is still valid, because we will withdraw\n" + ">> +\t\302\240* pgtable_t after this.\n" + ">> +\t\302\240*/\n" ">> +\tpmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);\n" ">\n" "> Can this break any checks for _PAGE_USER? From other paths?\n" @@ -103,32 +94,30 @@ ">> +}\n" ">> +\n" ">> +\n" - ">> =C2=A0/*\n" - ">> =C2=A0 * set a new huge pmd. We should not be called for updating\n" - ">> =C2=A0 * an existing pmd entry. That should go via pmd_hugepage_update.\n" + ">> \302\240/*\n" + ">> \302\240 * set a new huge pmd. We should not be called for updating\n" + ">> \302\240 * an existing pmd entry. That should go via pmd_hugepage_update.\n" ">> @@ -663,10 +687,19 @@ void set_pmd_at(struct mm_struct *mm, unsigned long\n" ">> addr,\n" - ">> =C2=A0\treturn set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));\n" - ">> =C2=A0}\n" - ">> =C2=A0\n" + ">> \302\240\treturn set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));\n" + ">> \302\240}\n" + ">> \302\240\n" ">> +/*\n" ">> + * We use this to invalidate a pmdp entry before switching from a\n" ">> + * hugepte to regular pmd entry.\n" ">> + */\n" - ">> =C2=A0void pmdp_invalidate(struct vm_area_struct *vma, unsigned long add=\n" - "ress,\n" - ">> =C2=A0\t\t=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp)\n" - ">> =C2=A0{\n" + ">> \302\240void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,\n" + ">> \302\240\t\t\302\240\302\240\302\240\302\240\302\240pmd_t *pmdp)\n" + ">> \302\240{\n" ">> -\tpmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);\n" ">> +\tpmd_hugepage_update(vma->vm_mm, address, pmdp, ~0UL, 0);\n" ">> +\t/*\n" - ">> +\t=C2=A0* This ensures that generic code that rely on IRQ disabling\n" - ">> +\t=C2=A0* to prevent a parallel THP split work as expected.\n" - ">> +\t=C2=A0*/\n" + ">> +\t\302\240* This ensures that generic code that rely on IRQ disabling\n" + ">> +\t\302\240* to prevent a parallel THP split work as expected.\n" + ">> +\t\302\240*/\n" ">> +\tkick_all_cpus_sync();\n" ">\n" - "> Seems expensive, anyway I think the right should do something like or a w=\n" - "rapper\n" + "> Seems expensive, anyway I think the right should do something like or a wrapper\n" "> for it\n" ">\n" "> on_each_cpu_mask(mm_cpumask(vma->vm_mm), do_nothing, NULL, 1);\n" @@ -142,6 +131,12 @@ "\n" "http://article.gmane.org/gmane.linux.ports.ppc.embedded/81105\n" "\n" - -aneesh + "-aneesh\n" + "\n" + "--\n" + "To unsubscribe, send a message with 'unsubscribe linux-mm' in\n" + "the body to majordomo@kvack.org. For more info on Linux MM,\n" + "see: http://www.linux-mm.org/ .\n" + "Don't email: <a href=mailto:\"dont@kvack.org\"> email@kvack.org </a>" -8a4197764441d21fb1b8bfaf0b47c1edb23019d629b5d6a404bc2471221e251d +0c56f0e049d8e21648a4f257488ea72e2a7daf5c7850865c4a3f3685ceadffbe
diff --git a/a/1.txt b/N2/1.txt index 5191919..e85c5fb 100644 --- a/a/1.txt +++ b/N2/1.txt @@ -1,29 +1,24 @@ Balbir Singh <bsingharora@gmail.com> writes: > On Tue, 2016-02-09 at 06:50 +0530, Aneesh Kumar K.V wrote: ->>=C2=A0 +>> >> Also make sure we wait for irq disable section in other cpus to finish >> before flipping a huge pte entry with a regular pmd entry. Code paths >> like find_linux_pte_or_hugepte depend on irq disable to get >> a stable pte_t pointer. A parallel thp split need to make sure we >> don't convert a pmd pte to a regular pmd entry without waiting for the >> irq disable section to finish. ->>=20 +>> >> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> >> --- ->> =C2=A0arch/powerpc/include/asm/book3s/64/pgtable.h |=C2=A0=C2=A04 ++++ ->> =C2=A0arch/powerpc/mm/pgtable_64.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= -=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 35 +>> arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++++ +>> arch/powerpc/mm/pgtable_64.c | 35 >> +++++++++++++++++++++++++++- ->> =C2=A0include/asm-generic/pgtable.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= -=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A08 = -+++++++ ->> =C2=A0mm/huge_memory.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= -=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= -=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A01 + ->> =C2=A04 files changed, 47 insertions(+), 1 deletion(-) ->>=20 +>> include/asm-generic/pgtable.h | 8 +++++++ +>> mm/huge_memory.c | 1 + +>> 4 files changed, 47 insertions(+), 1 deletion(-) +>> >> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h >> b/arch/powerpc/include/asm/book3s/64/pgtable.h >> index 8d1c41d28318..ac07a30a7934 100644 @@ -31,50 +26,46 @@ Balbir Singh <bsingharora@gmail.com> writes: >> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h >> @@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct >> mm_struct *mm, pmd_t *pmdp); ->> =C2=A0extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned l= -ong +>> extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long >> address, ->> =C2=A0 =C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp); ->> =C2=A0 +>> pmd_t *pmdp); +>> >> +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE >> +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma, ->> + =C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp); +>> + unsigned long address, pmd_t *pmdp); >> + ->> =C2=A0#define pmd_move_must_withdraw pmd_move_must_withdraw ->> =C2=A0struct spinlock; ->> =C2=A0static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_= -ptl, +>> #define pmd_move_must_withdraw pmd_move_must_withdraw +>> struct spinlock; +>> static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, >> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c >> index 3124a20d0fab..c8a00da39969 100644 >> --- a/arch/powerpc/mm/pgtable_64.c >> +++ b/arch/powerpc/mm/pgtable_64.c ->> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_str= -uct +>> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct >> *mm, pmd_t *pmdp) ->> =C2=A0 return pgtable; ->> =C2=A0} ->> =C2=A0 +>> return pgtable; +>> } +>> >> +void pmdp_huge_split_prepare(struct vm_area_struct *vma, ->> + =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp) +>> + unsigned long address, pmd_t *pmdp) >> +{ >> + VM_BUG_ON(address & ~HPAGE_PMD_MASK); >> + >> +#ifdef CONFIG_DEBUG_VM ->> + BUG_ON(REGION_ID(address) !=3D USER_REGION_ID); +>> + BUG_ON(REGION_ID(address) != USER_REGION_ID); >> +#endif >> + /* ->> + =C2=A0* We can't mark the pmd none here, because that will cause a race ->> + =C2=A0* against exit_mmap. We need to continue mark pmd TRANS HUGE, wh= -ile ->> + =C2=A0* we spilt, but at the same time we wan't rest of the ppc64 code ->> + =C2=A0* not to insert hash pte on this, because we will be modifying ->> + =C2=A0* the deposited pgtable in the caller of this function. Hence ->> + =C2=A0* clear the _PAGE_USER so that we move the fault handling to ->> + =C2=A0* higher level function and that will serialize against ptl. ->> + =C2=A0* We need to flush existing hash pte entries here even though, ->> + =C2=A0* the translation is still valid, because we will withdraw ->> + =C2=A0* pgtable_t after this. ->> + =C2=A0*/ +>> + * We can't mark the pmd none here, because that will cause a race +>> + * against exit_mmap. We need to continue mark pmd TRANS HUGE, while +>> + * we spilt, but at the same time we wan't rest of the ppc64 code +>> + * not to insert hash pte on this, because we will be modifying +>> + * the deposited pgtable in the caller of this function. Hence +>> + * clear the _PAGE_USER so that we move the fault handling to +>> + * higher level function and that will serialize against ptl. +>> + * We need to flush existing hash pte entries here even though, +>> + * the translation is still valid, because we will withdraw +>> + * pgtable_t after this. +>> + */ >> + pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); > > Can this break any checks for _PAGE_USER? From other paths? @@ -86,32 +77,30 @@ Should not, that is the same condition we use for autonuma. >> +} >> + >> + ->> =C2=A0/* ->> =C2=A0 * set a new huge pmd. We should not be called for updating ->> =C2=A0 * an existing pmd entry. That should go via pmd_hugepage_update. +>> /* +>> * set a new huge pmd. We should not be called for updating +>> * an existing pmd entry. That should go via pmd_hugepage_update. >> @@ -663,10 +687,19 @@ void set_pmd_at(struct mm_struct *mm, unsigned long >> addr, ->> =C2=A0 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); ->> =C2=A0} ->> =C2=A0 +>> return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); +>> } +>> >> +/* >> + * We use this to invalidate a pmdp entry before switching from a >> + * hugepte to regular pmd entry. >> + */ ->> =C2=A0void pmdp_invalidate(struct vm_area_struct *vma, unsigned long add= -ress, ->> =C2=A0 =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp) ->> =C2=A0{ +>> void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, +>> pmd_t *pmdp) +>> { >> - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); >> + pmd_hugepage_update(vma->vm_mm, address, pmdp, ~0UL, 0); >> + /* ->> + =C2=A0* This ensures that generic code that rely on IRQ disabling ->> + =C2=A0* to prevent a parallel THP split work as expected. ->> + =C2=A0*/ +>> + * This ensures that generic code that rely on IRQ disabling +>> + * to prevent a parallel THP split work as expected. +>> + */ >> + kick_all_cpus_sync(); > -> Seems expensive, anyway I think the right should do something like or a w= -rapper +> Seems expensive, anyway I think the right should do something like or a wrapper > for it > > on_each_cpu_mask(mm_cpumask(vma->vm_mm), do_nothing, NULL, 1); diff --git a/a/content_digest b/N2/content_digest index 8d16c38..2c51f76 100644 --- a/a/content_digest +++ b/N2/content_digest @@ -18,29 +18,24 @@ "Balbir Singh <bsingharora@gmail.com> writes:\n" "\n" "> On Tue, 2016-02-09 at 06:50 +0530, Aneesh Kumar K.V wrote:\n" - ">>=C2=A0\n" + ">>\302\240\n" ">> Also make sure we wait for irq disable section in other cpus to finish\n" ">> before flipping a huge pte entry with a regular pmd entry. Code paths\n" ">> like find_linux_pte_or_hugepte depend on irq disable to get\n" ">> a stable pte_t pointer. A parallel thp split need to make sure we\n" ">> don't convert a pmd pte to a regular pmd entry without waiting for the\n" ">> irq disable section to finish.\n" - ">>=20\n" + ">> \n" ">> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>\n" ">> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>\n" ">> ---\n" - ">> =C2=A0arch/powerpc/include/asm/book3s/64/pgtable.h |=C2=A0=C2=A04 ++++\n" - ">> =C2=A0arch/powerpc/mm/pgtable_64.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=\n" - "=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 35\n" + ">> \302\240arch/powerpc/include/asm/book3s/64/pgtable.h |\302\240\302\2404 ++++\n" + ">> \302\240arch/powerpc/mm/pgtable_64.c\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240| 35\n" ">> +++++++++++++++++++++++++++-\n" - ">> =C2=A0include/asm-generic/pgtable.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=\n" - "=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A08 =\n" - "+++++++\n" - ">> =C2=A0mm/huge_memory.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=\n" - "=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=\n" - "=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A01 +\n" - ">> =C2=A04 files changed, 47 insertions(+), 1 deletion(-)\n" - ">>=20\n" + ">> \302\240include/asm-generic/pgtable.h\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240|\302\240\302\2408 +++++++\n" + ">> \302\240mm/huge_memory.c\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240|\302\240\302\2401 +\n" + ">> \302\2404 files changed, 47 insertions(+), 1 deletion(-)\n" + ">> \n" ">> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h\n" ">> b/arch/powerpc/include/asm/book3s/64/pgtable.h\n" ">> index 8d1c41d28318..ac07a30a7934 100644\n" @@ -48,50 +43,46 @@ ">> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h\n" ">> @@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct\n" ">> mm_struct *mm, pmd_t *pmdp);\n" - ">> =C2=A0extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned l=\n" - "ong\n" + ">> \302\240extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long\n" ">> address,\n" - ">> =C2=A0\t\t\t=C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp);\n" - ">> =C2=A0\n" + ">> \302\240\t\t\t\302\240\302\240\302\240\302\240pmd_t *pmdp);\n" + ">> \302\240\n" ">> +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE\n" ">> +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,\n" - ">> +\t\t\t\t=C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp);\n" + ">> +\t\t\t\t\302\240\302\240\302\240\302\240unsigned long address, pmd_t *pmdp);\n" ">> +\n" - ">> =C2=A0#define pmd_move_must_withdraw pmd_move_must_withdraw\n" - ">> =C2=A0struct spinlock;\n" - ">> =C2=A0static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_=\n" - "ptl,\n" + ">> \302\240#define pmd_move_must_withdraw pmd_move_must_withdraw\n" + ">> \302\240struct spinlock;\n" + ">> \302\240static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,\n" ">> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c\n" ">> index 3124a20d0fab..c8a00da39969 100644\n" ">> --- a/arch/powerpc/mm/pgtable_64.c\n" ">> +++ b/arch/powerpc/mm/pgtable_64.c\n" - ">> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_str=\n" - "uct\n" + ">> @@ -646,6 +646,30 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct\n" ">> *mm, pmd_t *pmdp)\n" - ">> =C2=A0\treturn pgtable;\n" - ">> =C2=A0}\n" - ">> =C2=A0\n" + ">> \302\240\treturn pgtable;\n" + ">> \302\240}\n" + ">> \302\240\n" ">> +void pmdp_huge_split_prepare(struct vm_area_struct *vma,\n" - ">> +\t\t\t=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0unsigned long address, pmd_t *pmdp)\n" + ">> +\t\t\t\302\240\302\240\302\240\302\240\302\240unsigned long address, pmd_t *pmdp)\n" ">> +{\n" ">> +\tVM_BUG_ON(address & ~HPAGE_PMD_MASK);\n" ">> +\n" ">> +#ifdef CONFIG_DEBUG_VM\n" - ">> +\tBUG_ON(REGION_ID(address) !=3D USER_REGION_ID);\n" + ">> +\tBUG_ON(REGION_ID(address) != USER_REGION_ID);\n" ">> +#endif\n" ">> +\t/*\n" - ">> +\t=C2=A0* We can't mark the pmd none here, because that will cause a race\n" - ">> +\t=C2=A0* against exit_mmap. We need to continue mark pmd TRANS HUGE, wh=\n" - "ile\n" - ">> +\t=C2=A0* we spilt, but at the same time we wan't rest of the ppc64 code\n" - ">> +\t=C2=A0* not to insert hash pte on this, because we will be modifying\n" - ">> +\t=C2=A0* the deposited pgtable in the caller of this function. Hence\n" - ">> +\t=C2=A0* clear the _PAGE_USER so that we move the fault handling to\n" - ">> +\t=C2=A0* higher level function and that will serialize against ptl.\n" - ">> +\t=C2=A0* We need to flush existing hash pte entries here even though,\n" - ">> +\t=C2=A0* the translation is still valid, because we will withdraw\n" - ">> +\t=C2=A0* pgtable_t after this.\n" - ">> +\t=C2=A0*/\n" + ">> +\t\302\240* We can't mark the pmd none here, because that will cause a race\n" + ">> +\t\302\240* against exit_mmap. We need to continue mark pmd TRANS HUGE, while\n" + ">> +\t\302\240* we spilt, but at the same time we wan't rest of the ppc64 code\n" + ">> +\t\302\240* not to insert hash pte on this, because we will be modifying\n" + ">> +\t\302\240* the deposited pgtable in the caller of this function. Hence\n" + ">> +\t\302\240* clear the _PAGE_USER so that we move the fault handling to\n" + ">> +\t\302\240* higher level function and that will serialize against ptl.\n" + ">> +\t\302\240* We need to flush existing hash pte entries here even though,\n" + ">> +\t\302\240* the translation is still valid, because we will withdraw\n" + ">> +\t\302\240* pgtable_t after this.\n" + ">> +\t\302\240*/\n" ">> +\tpmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);\n" ">\n" "> Can this break any checks for _PAGE_USER? From other paths?\n" @@ -103,32 +94,30 @@ ">> +}\n" ">> +\n" ">> +\n" - ">> =C2=A0/*\n" - ">> =C2=A0 * set a new huge pmd. We should not be called for updating\n" - ">> =C2=A0 * an existing pmd entry. That should go via pmd_hugepage_update.\n" + ">> \302\240/*\n" + ">> \302\240 * set a new huge pmd. We should not be called for updating\n" + ">> \302\240 * an existing pmd entry. That should go via pmd_hugepage_update.\n" ">> @@ -663,10 +687,19 @@ void set_pmd_at(struct mm_struct *mm, unsigned long\n" ">> addr,\n" - ">> =C2=A0\treturn set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));\n" - ">> =C2=A0}\n" - ">> =C2=A0\n" + ">> \302\240\treturn set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));\n" + ">> \302\240}\n" + ">> \302\240\n" ">> +/*\n" ">> + * We use this to invalidate a pmdp entry before switching from a\n" ">> + * hugepte to regular pmd entry.\n" ">> + */\n" - ">> =C2=A0void pmdp_invalidate(struct vm_area_struct *vma, unsigned long add=\n" - "ress,\n" - ">> =C2=A0\t\t=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pmd_t *pmdp)\n" - ">> =C2=A0{\n" + ">> \302\240void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,\n" + ">> \302\240\t\t\302\240\302\240\302\240\302\240\302\240pmd_t *pmdp)\n" + ">> \302\240{\n" ">> -\tpmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);\n" ">> +\tpmd_hugepage_update(vma->vm_mm, address, pmdp, ~0UL, 0);\n" ">> +\t/*\n" - ">> +\t=C2=A0* This ensures that generic code that rely on IRQ disabling\n" - ">> +\t=C2=A0* to prevent a parallel THP split work as expected.\n" - ">> +\t=C2=A0*/\n" + ">> +\t\302\240* This ensures that generic code that rely on IRQ disabling\n" + ">> +\t\302\240* to prevent a parallel THP split work as expected.\n" + ">> +\t\302\240*/\n" ">> +\tkick_all_cpus_sync();\n" ">\n" - "> Seems expensive, anyway I think the right should do something like or a w=\n" - "rapper\n" + "> Seems expensive, anyway I think the right should do something like or a wrapper\n" "> for it\n" ">\n" "> on_each_cpu_mask(mm_cpumask(vma->vm_mm), do_nothing, NULL, 1);\n" @@ -144,4 +133,4 @@ "\n" -aneesh -8a4197764441d21fb1b8bfaf0b47c1edb23019d629b5d6a404bc2471221e251d +3c50d798de2293690ce2694639cb3327c080b19dae13defcc623000911c7007a
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.