* [PATCH V5 1/3] powerpc/mm: Add missing pmd accessors @ 2014-11-05 16:27 Aneesh Kumar K.V 2014-11-05 16:27 ` [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory Aneesh Kumar K.V 2014-11-05 16:27 ` [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast Aneesh Kumar K.V 0 siblings, 2 replies; 5+ messages in thread From: Aneesh Kumar K.V @ 2014-11-05 16:27 UTC (permalink / raw) To: akpm, Steve Capper, Andrea Arcangeli, benh, mpe, David Miller Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V This patch add documentation and missing accessors. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- arch/powerpc/include/asm/pgtable-ppc64-4k.h | 16 ++++++++- arch/powerpc/include/asm/pgtable-ppc64-64k.h | 3 ++ arch/powerpc/include/asm/pgtable-ppc64.h | 51 +++++++++++++++++++++------- arch/powerpc/mm/hugetlbpage.c | 3 ++ arch/powerpc/mm/pgtable_64.c | 22 ++++++++++-- 5 files changed, 78 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 7b935683f268..132ee1d482c2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -57,7 +57,21 @@ #define pgd_present(pgd) (pgd_val(pgd) != 0) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) #define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd)) + +#ifndef __ASSEMBLY__ + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} +extern struct page *pgd_page(pgd_t pgd); + +#endif /* !__ASSEMBLY__ */ #define pud_offset(pgdp, addr) \ (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index a56b82fb0609..1de35bbd02a6 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -38,4 +38,7 @@ /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff +#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) +#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) + #endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index ae153c40ab7c..d12092420560 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -152,7 +152,7 @@ #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_present(pmd) (pmd_val(pmd) != 0) +#define pmd_present(pmd) (!pmd_none(pmd)) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) extern struct page *pmd_page(pmd_t pmd); @@ -164,9 +164,21 @@ extern struct page *pmd_page(pmd_t pmd); #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) -#define pud_page(pud) virt_to_page(pud_page_vaddr(pud)) +extern struct page *pud_page(pud_t pud); + +static inline pte_t pud_pte(pud_t pud) +{ + return __pte(pud_val(pud)); +} + +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} +#define pud_write(pud) pte_write(pud_pte(pud)) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_write(pgd) pte_write(pgd_pte(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -422,7 +434,22 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); - +/* + * + * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs + * page. The hugetlbfs page table walking and mangling paths are totally + * separated form the core VM paths and they're differentiated by + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. + * + * pmd_trans_huge() is defined as false at build time if + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build + * time in such case. + * + * For ppc64 we need to differntiate from explicit hugepages from THP, because + * for THP we also track the subpage details at the pmd level. We don't do + * that for explicit huge pages. + * + */ static inline int pmd_trans_huge(pmd_t pmd) { /* @@ -431,16 +458,6 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); } -static inline int pmd_large(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_PRESENT; - return 0; -} - static inline int pmd_trans_splitting(pmd_t pmd) { if (pmd_trans_huge(pmd)) @@ -451,6 +468,14 @@ static inline int pmd_trans_splitting(pmd_t pmd) extern int has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int pmd_large(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..2e35b9a82929 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -62,6 +62,9 @@ static unsigned nr_gpages; /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. */ int pmd_huge(pmd_t pmd) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c8d709ab489d..e40ca0e3f2bf 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -36,6 +36,7 @@ #include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/slab.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -352,16 +353,31 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +#ifndef __PAGETABLE_PUD_FOLDED +/* 4 level page table */ +struct page *pgd_page(pgd_t pgd) +{ + if (pgd_huge(pgd)) + return pte_page(pgd_pte(pgd)); + return virt_to_page(pgd_page_vaddr(pgd)); +} +#endif + +struct page *pud_page(pud_t pud) +{ + if (pud_huge(pud)) + return pte_page(pud_pte(pud)); + return virt_to_page(pud_page_vaddr(pud)); +} + /* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd)) return pfn_to_page(pmd_pfn(pmd)); -#endif return virt_to_page(pmd_page_vaddr(pmd)); } -- 2.1.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory 2014-11-05 16:27 [PATCH V5 1/3] powerpc/mm: Add missing pmd accessors Aneesh Kumar K.V @ 2014-11-05 16:27 ` Aneesh Kumar K.V 2014-11-05 16:27 ` Aneesh Kumar K.V 2014-11-05 16:27 ` [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast Aneesh Kumar K.V 1 sibling, 1 reply; 5+ messages in thread From: Aneesh Kumar K.V @ 2014-11-05 16:27 UTC (permalink / raw) To: akpm, Steve Capper, Andrea Arcangeli, benh, mpe, David Miller Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V Update generic gup implementation with powerpc specific details. On powerpc at pmd level we can have hugepte, normal pmd pointer or a pointer to the hugepage directory. Tested-by: Steve Capper <steve.capper@linaro.org> Acked-by: Steve Capper <steve.capper@linaro.org> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- Changes from V4: * Add pmd accessor needed for the change to ppc64 * Drop the assumption that we can access pmd using pte_* functions. arch/powerpc/include/asm/page.h | 1 + include/linux/hugetlb.h | 46 +++++++++++++++++++++++ mm/gup.c | 81 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 120 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 26fe1ae15212..f973fce73a43 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd) #endif #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6e6d338641fe..e6b62f30ab21 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, } #endif /* !CONFIG_HUGETLB_PAGE */ +/* + * hugepages at page global directory. If arch support + * hugepages at pgd level, they need to define this. + */ +#ifndef pgd_huge +#define pgd_huge(x) 0 +#endif + +#ifndef pgd_write +static inline int pgd_write(pgd_t pgd) +{ + BUG(); + return 0; +} +#endif + +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif + +#ifndef is_hugepd +/* + * Some architectures requires a hugepage directory format that is + * required to support multiple hugepage sizes. For example + * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" + * introduced the same on powerpc. This allows for a more flexible hugepage + * pagetable layout. + */ +typedef struct { unsigned long pd; } hugepd_t; +#define is_hugepd(hugepd) (0) +#define __hugepd(x) ((hugepd_t) { (x) }) +static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr) +{ + return 0; +} +#else +extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr); +#endif #define HUGETLB_ANON_FILE "anon_hugepage" diff --git a/mm/gup.c b/mm/gup.c index cd62c8c90d4a..0ca1df9075ab 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3,7 +3,6 @@ #include <linux/err.h> #include <linux/spinlock.h> -#include <linux/hugetlb.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/rmap.h> @@ -12,6 +11,7 @@ #include <linux/sched.h> #include <linux/rwsem.h> +#include <linux/hugetlb.h> #include <asm/pgtable.h> #include "internal.h" @@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 1; } +static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, + unsigned long end, int write, + struct page **pages, int *nr) +{ + int refs; + struct page *head, *page, *tail; + + if (write && !pgd_write(orig)) + return 0; + + refs = 0; + head = pgd_page(orig); + page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pages, nr)) return 0; + } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { + /* + * architecture have different format for hugetlbfs + * pmd format and THP pmd format + */ + if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, + PMD_SHIFT, next, write, pages, nr)) + return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); @@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(pgdp, addr); + pudp = pud_offset(&pgd, addr); do { pud_t pud = ACCESS_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (pud_huge(pud)) { + if (unlikely(pud_huge(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, write, - pages, nr)) + pages, nr)) + return 0; + } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { + if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, + PUD_SHIFT, next, write, pages, nr)) return 0; } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) return 0; @@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { + pgd_t pgd = ACCESS_ONCE(*pgdp); + next = pgd_addr_end(addr, end); - if (pgd_none(*pgdp)) + if (pgd_none(pgd)) break; - else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr)) + if (unlikely(pgd_huge(pgd))) { + if (!gup_huge_pgd(pgd, pgdp, addr, next, write, + pages, &nr)) + break; + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, write, pages, &nr)) + break; + } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); -- 2.1.0 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory 2014-11-05 16:27 ` [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory Aneesh Kumar K.V @ 2014-11-05 16:27 ` Aneesh Kumar K.V 0 siblings, 0 replies; 5+ messages in thread From: Aneesh Kumar K.V @ 2014-11-05 16:27 UTC (permalink / raw) To: akpm, Steve Capper, Andrea Arcangeli, benh, mpe, David Miller Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V Update generic gup implementation with powerpc specific details. On powerpc at pmd level we can have hugepte, normal pmd pointer or a pointer to the hugepage directory. Tested-by: Steve Capper <steve.capper@linaro.org> Acked-by: Steve Capper <steve.capper@linaro.org> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- Changes from V4: * Add pmd accessor needed for the change to ppc64 * Drop the assumption that we can access pmd using pte_* functions. arch/powerpc/include/asm/page.h | 1 + include/linux/hugetlb.h | 46 +++++++++++++++++++++++ mm/gup.c | 81 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 120 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 26fe1ae15212..f973fce73a43 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd) #endif #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6e6d338641fe..e6b62f30ab21 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, } #endif /* !CONFIG_HUGETLB_PAGE */ +/* + * hugepages at page global directory. If arch support + * hugepages at pgd level, they need to define this. + */ +#ifndef pgd_huge +#define pgd_huge(x) 0 +#endif + +#ifndef pgd_write +static inline int pgd_write(pgd_t pgd) +{ + BUG(); + return 0; +} +#endif + +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif + +#ifndef is_hugepd +/* + * Some architectures requires a hugepage directory format that is + * required to support multiple hugepage sizes. For example + * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" + * introduced the same on powerpc. This allows for a more flexible hugepage + * pagetable layout. + */ +typedef struct { unsigned long pd; } hugepd_t; +#define is_hugepd(hugepd) (0) +#define __hugepd(x) ((hugepd_t) { (x) }) +static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr) +{ + return 0; +} +#else +extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr); +#endif #define HUGETLB_ANON_FILE "anon_hugepage" diff --git a/mm/gup.c b/mm/gup.c index cd62c8c90d4a..0ca1df9075ab 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3,7 +3,6 @@ #include <linux/err.h> #include <linux/spinlock.h> -#include <linux/hugetlb.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/rmap.h> @@ -12,6 +11,7 @@ #include <linux/sched.h> #include <linux/rwsem.h> +#include <linux/hugetlb.h> #include <asm/pgtable.h> #include "internal.h" @@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 1; } +static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, + unsigned long end, int write, + struct page **pages, int *nr) +{ + int refs; + struct page *head, *page, *tail; + + if (write && !pgd_write(orig)) + return 0; + + refs = 0; + head = pgd_page(orig); + page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pages, nr)) return 0; + } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { + /* + * architecture have different format for hugetlbfs + * pmd format and THP pmd format + */ + if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, + PMD_SHIFT, next, write, pages, nr)) + return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); @@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(pgdp, addr); + pudp = pud_offset(&pgd, addr); do { pud_t pud = ACCESS_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (pud_huge(pud)) { + if (unlikely(pud_huge(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, write, - pages, nr)) + pages, nr)) + return 0; + } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { + if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, + PUD_SHIFT, next, write, pages, nr)) return 0; } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) return 0; @@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { + pgd_t pgd = ACCESS_ONCE(*pgdp); + next = pgd_addr_end(addr, end); - if (pgd_none(*pgdp)) + if (pgd_none(pgd)) break; - else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr)) + if (unlikely(pgd_huge(pgd))) { + if (!gup_huge_pgd(pgd, pgdp, addr, next, write, + pages, &nr)) + break; + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, write, pages, &nr)) + break; + } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); -- 2.1.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast 2014-11-05 16:27 [PATCH V5 1/3] powerpc/mm: Add missing pmd accessors Aneesh Kumar K.V 2014-11-05 16:27 ` [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory Aneesh Kumar K.V @ 2014-11-05 16:27 ` Aneesh Kumar K.V 2014-11-05 16:27 ` Aneesh Kumar K.V 1 sibling, 1 reply; 5+ messages in thread From: Aneesh Kumar K.V @ 2014-11-05 16:27 UTC (permalink / raw) To: akpm, Steve Capper, Andrea Arcangeli, benh, mpe, David Miller Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/hugetlb.h | 8 +- arch/powerpc/include/asm/page.h | 3 +- arch/powerpc/include/asm/pgtable-ppc64.h | 1 - arch/powerpc/include/asm/pgtable.h | 6 +- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/gup.c | 235 ------------------------------- arch/powerpc/mm/hugetlbpage.c | 33 ++--- 8 files changed, 22 insertions(+), 267 deletions(-) delete mode 100644 arch/powerpc/mm/gup.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4e28c3..7af887dc6aed 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select HAVE_GENERIC_RCU_GUP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 766b77d527ac..1d53a65b4ec1 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) #endif /* CONFIG_PPC_BOOK3S_64 */ -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { /* @@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, */ unsigned long idx = 0; - pte_t *dir = hugepd_page(*hpdp); + pte_t *dir = hugepd_page(hpd); #ifndef CONFIG_PPC_FSL_BOOK3E - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); #endif return dir + idx; @@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, } #define hugepd_shift(x) 0 -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { return 0; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f973fce73a43..69c059887a2c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) } #endif -#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define is_hugepd(hpd) (hugepd_ok(hpd)) #define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 #define pgd_huge(pgd) 0 #endif /* CONFIG_HUGETLB_PAGE */ +#define __hugepd(x) ((hugepd_t) { (x) }) struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d12092420560..5600e434332f 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 316f9a5da173..a8805fee0df9 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -274,11 +274,9 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); - extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); + unsigned long end, int write, + struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #define has_transparent_hugepage() 0 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861616a1..438dcd3fd0d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d8746684f606..000000000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/vmstat.h> -#include <linux/pagemap.h> -#include <linux/rwsem.h> -#include <asm/pgtable.h> - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2e35b9a82929..d54d74ccdc5a 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif @@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); -- 2.1.0 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast 2014-11-05 16:27 ` [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast Aneesh Kumar K.V @ 2014-11-05 16:27 ` Aneesh Kumar K.V 0 siblings, 0 replies; 5+ messages in thread From: Aneesh Kumar K.V @ 2014-11-05 16:27 UTC (permalink / raw) To: akpm, Steve Capper, Andrea Arcangeli, benh, mpe, David Miller Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/hugetlb.h | 8 +- arch/powerpc/include/asm/page.h | 3 +- arch/powerpc/include/asm/pgtable-ppc64.h | 1 - arch/powerpc/include/asm/pgtable.h | 6 +- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/gup.c | 235 ------------------------------- arch/powerpc/mm/hugetlbpage.c | 33 ++--- 8 files changed, 22 insertions(+), 267 deletions(-) delete mode 100644 arch/powerpc/mm/gup.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4e28c3..7af887dc6aed 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select HAVE_GENERIC_RCU_GUP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 766b77d527ac..1d53a65b4ec1 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) #endif /* CONFIG_PPC_BOOK3S_64 */ -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { /* @@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, */ unsigned long idx = 0; - pte_t *dir = hugepd_page(*hpdp); + pte_t *dir = hugepd_page(hpd); #ifndef CONFIG_PPC_FSL_BOOK3E - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); #endif return dir + idx; @@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, } #define hugepd_shift(x) 0 -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { return 0; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f973fce73a43..69c059887a2c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) } #endif -#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define is_hugepd(hpd) (hugepd_ok(hpd)) #define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 #define pgd_huge(pgd) 0 #endif /* CONFIG_HUGETLB_PAGE */ +#define __hugepd(x) ((hugepd_t) { (x) }) struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d12092420560..5600e434332f 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 316f9a5da173..a8805fee0df9 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -274,11 +274,9 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); - extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); + unsigned long end, int write, + struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #define has_transparent_hugepage() 0 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861616a1..438dcd3fd0d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d8746684f606..000000000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/vmstat.h> -#include <linux/pagemap.h> -#include <linux/rwsem.h> -#include <asm/pgtable.h> - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2e35b9a82929..d54d74ccdc5a 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif @@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); -- 2.1.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-11-05 16:28 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2014-11-05 16:27 [PATCH V5 1/3] powerpc/mm: Add missing pmd accessors Aneesh Kumar K.V 2014-11-05 16:27 ` [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory Aneesh Kumar K.V 2014-11-05 16:27 ` Aneesh Kumar K.V 2014-11-05 16:27 ` [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast Aneesh Kumar K.V 2014-11-05 16:27 ` Aneesh Kumar K.V
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox