From: Paul Gortmaker <paul.gortmaker@windriver.com>
To: Nitin Gupta <nitin.m.gupta@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>,
Mike Kravetz <mike.kravetz@oracle.com>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Tom Hromatka <tom.hromatka@oracle.com>,
Michal Hocko <mhocko@suse.com>, Ingo Molnar <mingo@kernel.org>,
Babu Moger <babu.moger@oracle.com>,
bob picco <bob.picco@oracle.com>,
Thomas Tai <thomas.tai@oracle.com>,
Pavel Tatashin <pasha.tatashin@oracle.com>,
Atish Patra <atish.patra@oracle.com>,
sparclinux@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] sparc64: Add 16GB hugepage support
Date: Thu, 25 May 2017 03:34:42 +0000 [thread overview]
Message-ID: <20170525033442.GB877@windriver.com> (raw)
In-Reply-To: <1495672233-116815-1-git-send-email-nitin.m.gupta@oracle.com>
[[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
> Orabug: 25362942
>
> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
If this wasn't an accidental git send-email misfire, then there should
be a long log indicating the use case, the perforamnce increase, the
testing that was done, etc. etc.
Normally I'd not notice but since I was Cc'd I figured it was worth a
mention -- for example the vendor ID above doesn't mean a thing to
all the rest of us, hence why I suspect it was a git send-email misfire;
sadly, I think we've all accidentally done that at least once....
Paul.
--
> ---
> arch/sparc/include/asm/page_64.h | 3 +-
> arch/sparc/include/asm/pgtable_64.h | 5 +++
> arch/sparc/include/asm/tsb.h | 35 +++++++++++++++++-
> arch/sparc/kernel/tsb.S | 2 +-
> arch/sparc/mm/hugetlbpage.c | 74 ++++++++++++++++++++++++++-----------
> arch/sparc/mm/init_64.c | 41 ++++++++++++++++----
> 6 files changed, 128 insertions(+), 32 deletions(-)
>
> diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
> index 5961b2d..8ee1f97 100644
> --- a/arch/sparc/include/asm/page_64.h
> +++ b/arch/sparc/include/asm/page_64.h
> @@ -17,6 +17,7 @@
>
> #define HPAGE_SHIFT 23
> #define REAL_HPAGE_SHIFT 22
> +#define HPAGE_16GB_SHIFT 34
> #define HPAGE_2GB_SHIFT 31
> #define HPAGE_256MB_SHIFT 28
> #define HPAGE_64K_SHIFT 16
> @@ -28,7 +29,7 @@
> #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
> #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
> #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
> -#define HUGE_MAX_HSTATE 4
> +#define HUGE_MAX_HSTATE 5
> #endif
>
> #ifndef __ASSEMBLY__
> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
> index 6fbd931..2444b02 100644
> --- a/arch/sparc/include/asm/pgtable_64.h
> +++ b/arch/sparc/include/asm/pgtable_64.h
> @@ -414,6 +414,11 @@ static inline bool is_hugetlb_pmd(pmd_t pmd)
> return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
> }
>
> +static inline bool is_hugetlb_pud(pud_t pud)
> +{
> + return !!(pud_val(pud) & _PAGE_PUD_HUGE);
> +}
> +
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> static inline pmd_t pmd_mkhuge(pmd_t pmd)
> {
> diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
> index 32258e0..fbd8da7 100644
> --- a/arch/sparc/include/asm/tsb.h
> +++ b/arch/sparc/include/asm/tsb.h
> @@ -195,6 +195,36 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> nop; \
> 699:
>
> + /* PUD has been loaded into REG1, interpret the value, seeing
> + * if it is a HUGE PUD or a normal one. If it is not valid
> + * then jump to FAIL_LABEL. If it is a HUGE PUD, and it
> + * translates to a valid PTE, branch to PTE_LABEL.
> + *
> + * We have to propagate bits [32:22] from the virtual address
> + * to resolve at 4M granularity.
> + */
> +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> + brz,pn REG1, FAIL_LABEL; \
> + sethi %uhi(_PAGE_PUD_HUGE), REG2; \
> + sllx REG2, 32, REG2; \
> + andcc REG1, REG2, %g0; \
> + be,pt %xcc, 700f; \
> + sethi %hi(0x1ffc0000), REG2; \
> + brgez,pn REG1, FAIL_LABEL; \
> + sllx REG2, 1, REG2; \
> + brgez,pn REG1, FAIL_LABEL; \
> + andn REG1, REG2, REG1; \
> + and VADDR, REG2, REG2; \
> + brlz,pt REG1, PTE_LABEL; \
> + or REG1, REG2, REG1; \
> +700:
> +#else
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> + brz,pn REG1, FAIL_LABEL; \
> + nop;
> +#endif
> +
> /* PMD has been loaded into REG1, interpret the value, seeing
> * if it is a HUGE PMD or a normal one. If it is not valid
> * then jump to FAIL_LABEL. If it is a HUGE PMD, and it
> @@ -209,14 +239,14 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> sethi %uhi(_PAGE_PMD_HUGE), REG2; \
> sllx REG2, 32, REG2; \
> andcc REG1, REG2, %g0; \
> - be,pt %xcc, 700f; \
> + be,pt %xcc, 701f; \
> sethi %hi(4 * 1024 * 1024), REG2; \
> brgez,pn REG1, FAIL_LABEL; \
> andn REG1, REG2, REG1; \
> and VADDR, REG2, REG2; \
> brlz,pt REG1, PTE_LABEL; \
> or REG1, REG2, REG1; \
> -700:
> +701:
> #else
> #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> brz,pn REG1, FAIL_LABEL; \
> @@ -242,6 +272,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> srlx REG2, 64 - PAGE_SHIFT, REG2; \
> andn REG2, 0x7, REG2; \
> ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
> + USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
> brz,pn REG1, FAIL_LABEL; \
> sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
> srlx REG2, 64 - PAGE_SHIFT, REG2; \
> diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
> index 10689cf..a0a5a13 100644
> --- a/arch/sparc/kernel/tsb.S
> +++ b/arch/sparc/kernel/tsb.S
> @@ -117,7 +117,7 @@ tsb_miss_page_table_walk_sun4v_fastpath:
> /* Valid PTE is now in %g5. */
>
> #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> - sethi %uhi(_PAGE_PMD_HUGE), %g7
> + sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
> sllx %g7, 32, %g7
>
> andcc %g5, %g7, %g0
> diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
> index 7c29d38..62c1e62 100644
> --- a/arch/sparc/mm/hugetlbpage.c
> +++ b/arch/sparc/mm/hugetlbpage.c
> @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
> pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
>
> switch (shift) {
> + case HPAGE_16GB_SHIFT:
> + hugepage_size = _PAGE_SZ16GB_4V;
> + pte_val(entry) |= _PAGE_PUD_HUGE;
> + break;
> case HPAGE_2GB_SHIFT:
> hugepage_size = _PAGE_SZ2GB_4V;
> pte_val(entry) |= _PAGE_PMD_HUGE;
> @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
> unsigned int shift;
>
> switch (tte_szbits) {
> + case _PAGE_SZ16GB_4V:
> + shift = HPAGE_16GB_SHIFT;
> + break;
> case _PAGE_SZ2GB_4V:
> shift = HPAGE_2GB_SHIFT;
> break;
> @@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>
> pgd = pgd_offset(mm, addr);
> pud = pud_alloc(mm, pgd, addr);
> - if (pud) {
> + if (!pud)
> + return NULL;
> +
> + if (sz >= PUD_SIZE)
> + pte = (pte_t *)pud;
> + else {
> pmd = pmd_alloc(mm, pud, addr);
> if (!pmd)
> return NULL;
> @@ -288,12 +300,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
> if (!pgd_none(*pgd)) {
> pud = pud_offset(pgd, addr);
> if (!pud_none(*pud)) {
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_none(*pmd)) {
> - if (is_hugetlb_pmd(*pmd))
> - pte = (pte_t *)pmd;
> - else
> - pte = pte_offset_map(pmd, addr);
> + if (is_hugetlb_pud(*pud))
> + pte = (pte_t *)pud;
> + else {
> + pmd = pmd_offset(pud, addr);
> + if (!pmd_none(*pmd)) {
> + if (is_hugetlb_pmd(*pmd))
> + pte = (pte_t *)pmd;
> + else
> + pte = pte_offset_map(pmd, addr);
> + }
> }
> }
> }
> @@ -304,12 +320,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
> void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep, pte_t entry)
> {
> - unsigned int i, nptes, orig_shift, shift;
> - unsigned long size;
> + unsigned int nptes, orig_shift, shift;
> + unsigned long i, size;
> pte_t orig;
>
> size = huge_tte_to_size(entry);
> - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
> +
> + shift = PAGE_SHIFT;
> + if (size >= PUD_SIZE)
> + shift = PUD_SHIFT;
> + else if (size >= PMD_SIZE)
> + shift = PMD_SHIFT;
> + else
> + shift = PAGE_SHIFT;
> +
> nptes = size >> shift;
>
> if (!pte_present(*ptep) && pte_present(entry))
> @@ -332,19 +356,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep)
> {
> - unsigned int i, nptes, hugepage_shift;
> + unsigned int i, nptes, orig_shift, shift;
> unsigned long size;
> pte_t entry;
>
> entry = *ptep;
> size = huge_tte_to_size(entry);
> - if (size >= HPAGE_SIZE)
> - nptes = size >> PMD_SHIFT;
> +
> + shift = PAGE_SHIFT;
> + if (size >= PUD_SIZE)
> + shift = PUD_SHIFT;
> + else if (size >= PMD_SIZE)
> + shift = PMD_SHIFT;
> else
> - nptes = size >> PAGE_SHIFT;
> + shift = PAGE_SHIFT;
>
> - hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
> - huge_tte_to_shift(entry);
> + nptes = size >> shift;
> + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
>
> if (pte_present(entry))
> mm->context.hugetlb_pte_count -= nptes;
> @@ -353,11 +381,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> for (i = 0; i < nptes; i++)
> ptep[i] = __pte(0UL);
>
> - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
> + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
> /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
> if (size = HPAGE_SIZE)
> maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
> - hugepage_shift);
> + orig_shift);
>
> return entry;
> }
> @@ -370,7 +398,8 @@ int pmd_huge(pmd_t pmd)
>
> int pud_huge(pud_t pud)
> {
> - return 0;
> + return !pud_none(pud) &&
> + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
> }
>
> static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> @@ -434,8 +463,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
> next = pud_addr_end(addr, end);
> if (pud_none_or_clear_bad(pud))
> continue;
> - hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> - ceiling);
> + if (is_hugetlb_pud(*pud))
> + pud_clear(pud);
> + else
> + hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> + ceiling);
> } while (pud++, addr = next, addr != end);
>
> start &= PGDIR_MASK;
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 0cda653..7c0fe73 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
> hugepage_shift = ilog2(hugepage_size);
>
> switch (hugepage_shift) {
> + case HPAGE_16GB_SHIFT:
> + hv_pgsz_mask = HV_PGSZ_MASK_16GB;
> + hv_pgsz_idx = HV_PGSZ_IDX_16GB;
> + break;
> case HPAGE_2GB_SHIFT:
> hv_pgsz_mask = HV_PGSZ_MASK_2GB;
> hv_pgsz_idx = HV_PGSZ_IDX_2GB;
> @@ -376,6 +380,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
> {
> struct mm_struct *mm;
> unsigned long flags;
> + bool is_huge_tsb;
> pte_t pte = *ptep;
>
> if (tlb_type != hypervisor) {
> @@ -393,15 +398,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
>
> spin_lock_irqsave(&mm->context.lock, flags);
>
> + is_huge_tsb = false;
> #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> - if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
> - is_hugetlb_pmd(__pmd(pte_val(pte)))) {
> - /* We are fabricating 8MB pages using 4MB real hw pages. */
> - pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
> - address, pte_val(pte));
> - } else
> + if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
> + unsigned long hugepage_size = PAGE_SIZE;
> +
> + if (is_vm_hugetlb_page(vma))
> + hugepage_size = huge_page_size(hstate_vma(vma));
> +
> + if (hugepage_size >= PUD_SIZE) {
> + unsigned long mask = 0x1ffc00000UL;
> +
> + /* Transfer bits [32:22] from address to resolve
> + * at 4M granularity.
> + */
> + pte_val(pte) &= ~mask;
> + pte_val(pte) |= (address & mask);
> + } else if (hugepage_size >= PMD_SIZE) {
> + /* We are fabricating 8MB pages using 4MB
> + * real hw pages.
> + */
> + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> + }
> +
> + if (hugepage_size >= PMD_SIZE) {
> + __update_mmu_tsb_insert(mm, MM_TSB_HUGE,
> + REAL_HPAGE_SHIFT, address, pte_val(pte));
> + is_huge_tsb = true;
> + }
> + }
> #endif
> + if (!is_huge_tsb)
> __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
> address, pte_val(pte));
>
> --
> 2.9.2
>
WARNING: multiple messages have this Message-ID (diff)
From: Paul Gortmaker <paul.gortmaker@windriver.com>
To: Nitin Gupta <nitin.m.gupta@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>,
Mike Kravetz <mike.kravetz@oracle.com>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Tom Hromatka <tom.hromatka@oracle.com>,
Michal Hocko <mhocko@suse.com>, Ingo Molnar <mingo@kernel.org>,
Babu Moger <babu.moger@oracle.com>,
bob picco <bob.picco@oracle.com>,
Thomas Tai <thomas.tai@oracle.com>,
Pavel Tatashin <pasha.tatashin@oracle.com>,
Atish Patra <atish.patra@oracle.com>,
<sparclinux@vger.kernel.org>, <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] sparc64: Add 16GB hugepage support
Date: Wed, 24 May 2017 23:34:42 -0400 [thread overview]
Message-ID: <20170525033442.GB877@windriver.com> (raw)
In-Reply-To: <1495672233-116815-1-git-send-email-nitin.m.gupta@oracle.com>
[[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
> Orabug: 25362942
>
> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
If this wasn't an accidental git send-email misfire, then there should
be a long log indicating the use case, the perforamnce increase, the
testing that was done, etc. etc.
Normally I'd not notice but since I was Cc'd I figured it was worth a
mention -- for example the vendor ID above doesn't mean a thing to
all the rest of us, hence why I suspect it was a git send-email misfire;
sadly, I think we've all accidentally done that at least once....
Paul.
--
> ---
> arch/sparc/include/asm/page_64.h | 3 +-
> arch/sparc/include/asm/pgtable_64.h | 5 +++
> arch/sparc/include/asm/tsb.h | 35 +++++++++++++++++-
> arch/sparc/kernel/tsb.S | 2 +-
> arch/sparc/mm/hugetlbpage.c | 74 ++++++++++++++++++++++++++-----------
> arch/sparc/mm/init_64.c | 41 ++++++++++++++++----
> 6 files changed, 128 insertions(+), 32 deletions(-)
>
> diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
> index 5961b2d..8ee1f97 100644
> --- a/arch/sparc/include/asm/page_64.h
> +++ b/arch/sparc/include/asm/page_64.h
> @@ -17,6 +17,7 @@
>
> #define HPAGE_SHIFT 23
> #define REAL_HPAGE_SHIFT 22
> +#define HPAGE_16GB_SHIFT 34
> #define HPAGE_2GB_SHIFT 31
> #define HPAGE_256MB_SHIFT 28
> #define HPAGE_64K_SHIFT 16
> @@ -28,7 +29,7 @@
> #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
> #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
> #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
> -#define HUGE_MAX_HSTATE 4
> +#define HUGE_MAX_HSTATE 5
> #endif
>
> #ifndef __ASSEMBLY__
> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
> index 6fbd931..2444b02 100644
> --- a/arch/sparc/include/asm/pgtable_64.h
> +++ b/arch/sparc/include/asm/pgtable_64.h
> @@ -414,6 +414,11 @@ static inline bool is_hugetlb_pmd(pmd_t pmd)
> return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
> }
>
> +static inline bool is_hugetlb_pud(pud_t pud)
> +{
> + return !!(pud_val(pud) & _PAGE_PUD_HUGE);
> +}
> +
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> static inline pmd_t pmd_mkhuge(pmd_t pmd)
> {
> diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
> index 32258e0..fbd8da7 100644
> --- a/arch/sparc/include/asm/tsb.h
> +++ b/arch/sparc/include/asm/tsb.h
> @@ -195,6 +195,36 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> nop; \
> 699:
>
> + /* PUD has been loaded into REG1, interpret the value, seeing
> + * if it is a HUGE PUD or a normal one. If it is not valid
> + * then jump to FAIL_LABEL. If it is a HUGE PUD, and it
> + * translates to a valid PTE, branch to PTE_LABEL.
> + *
> + * We have to propagate bits [32:22] from the virtual address
> + * to resolve at 4M granularity.
> + */
> +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> + brz,pn REG1, FAIL_LABEL; \
> + sethi %uhi(_PAGE_PUD_HUGE), REG2; \
> + sllx REG2, 32, REG2; \
> + andcc REG1, REG2, %g0; \
> + be,pt %xcc, 700f; \
> + sethi %hi(0x1ffc0000), REG2; \
> + brgez,pn REG1, FAIL_LABEL; \
> + sllx REG2, 1, REG2; \
> + brgez,pn REG1, FAIL_LABEL; \
> + andn REG1, REG2, REG1; \
> + and VADDR, REG2, REG2; \
> + brlz,pt REG1, PTE_LABEL; \
> + or REG1, REG2, REG1; \
> +700:
> +#else
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> + brz,pn REG1, FAIL_LABEL; \
> + nop;
> +#endif
> +
> /* PMD has been loaded into REG1, interpret the value, seeing
> * if it is a HUGE PMD or a normal one. If it is not valid
> * then jump to FAIL_LABEL. If it is a HUGE PMD, and it
> @@ -209,14 +239,14 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> sethi %uhi(_PAGE_PMD_HUGE), REG2; \
> sllx REG2, 32, REG2; \
> andcc REG1, REG2, %g0; \
> - be,pt %xcc, 700f; \
> + be,pt %xcc, 701f; \
> sethi %hi(4 * 1024 * 1024), REG2; \
> brgez,pn REG1, FAIL_LABEL; \
> andn REG1, REG2, REG1; \
> and VADDR, REG2, REG2; \
> brlz,pt REG1, PTE_LABEL; \
> or REG1, REG2, REG1; \
> -700:
> +701:
> #else
> #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> brz,pn REG1, FAIL_LABEL; \
> @@ -242,6 +272,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> srlx REG2, 64 - PAGE_SHIFT, REG2; \
> andn REG2, 0x7, REG2; \
> ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
> + USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
> brz,pn REG1, FAIL_LABEL; \
> sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
> srlx REG2, 64 - PAGE_SHIFT, REG2; \
> diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
> index 10689cf..a0a5a13 100644
> --- a/arch/sparc/kernel/tsb.S
> +++ b/arch/sparc/kernel/tsb.S
> @@ -117,7 +117,7 @@ tsb_miss_page_table_walk_sun4v_fastpath:
> /* Valid PTE is now in %g5. */
>
> #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> - sethi %uhi(_PAGE_PMD_HUGE), %g7
> + sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
> sllx %g7, 32, %g7
>
> andcc %g5, %g7, %g0
> diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
> index 7c29d38..62c1e62 100644
> --- a/arch/sparc/mm/hugetlbpage.c
> +++ b/arch/sparc/mm/hugetlbpage.c
> @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
> pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
>
> switch (shift) {
> + case HPAGE_16GB_SHIFT:
> + hugepage_size = _PAGE_SZ16GB_4V;
> + pte_val(entry) |= _PAGE_PUD_HUGE;
> + break;
> case HPAGE_2GB_SHIFT:
> hugepage_size = _PAGE_SZ2GB_4V;
> pte_val(entry) |= _PAGE_PMD_HUGE;
> @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
> unsigned int shift;
>
> switch (tte_szbits) {
> + case _PAGE_SZ16GB_4V:
> + shift = HPAGE_16GB_SHIFT;
> + break;
> case _PAGE_SZ2GB_4V:
> shift = HPAGE_2GB_SHIFT;
> break;
> @@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>
> pgd = pgd_offset(mm, addr);
> pud = pud_alloc(mm, pgd, addr);
> - if (pud) {
> + if (!pud)
> + return NULL;
> +
> + if (sz >= PUD_SIZE)
> + pte = (pte_t *)pud;
> + else {
> pmd = pmd_alloc(mm, pud, addr);
> if (!pmd)
> return NULL;
> @@ -288,12 +300,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
> if (!pgd_none(*pgd)) {
> pud = pud_offset(pgd, addr);
> if (!pud_none(*pud)) {
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_none(*pmd)) {
> - if (is_hugetlb_pmd(*pmd))
> - pte = (pte_t *)pmd;
> - else
> - pte = pte_offset_map(pmd, addr);
> + if (is_hugetlb_pud(*pud))
> + pte = (pte_t *)pud;
> + else {
> + pmd = pmd_offset(pud, addr);
> + if (!pmd_none(*pmd)) {
> + if (is_hugetlb_pmd(*pmd))
> + pte = (pte_t *)pmd;
> + else
> + pte = pte_offset_map(pmd, addr);
> + }
> }
> }
> }
> @@ -304,12 +320,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
> void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep, pte_t entry)
> {
> - unsigned int i, nptes, orig_shift, shift;
> - unsigned long size;
> + unsigned int nptes, orig_shift, shift;
> + unsigned long i, size;
> pte_t orig;
>
> size = huge_tte_to_size(entry);
> - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
> +
> + shift = PAGE_SHIFT;
> + if (size >= PUD_SIZE)
> + shift = PUD_SHIFT;
> + else if (size >= PMD_SIZE)
> + shift = PMD_SHIFT;
> + else
> + shift = PAGE_SHIFT;
> +
> nptes = size >> shift;
>
> if (!pte_present(*ptep) && pte_present(entry))
> @@ -332,19 +356,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep)
> {
> - unsigned int i, nptes, hugepage_shift;
> + unsigned int i, nptes, orig_shift, shift;
> unsigned long size;
> pte_t entry;
>
> entry = *ptep;
> size = huge_tte_to_size(entry);
> - if (size >= HPAGE_SIZE)
> - nptes = size >> PMD_SHIFT;
> +
> + shift = PAGE_SHIFT;
> + if (size >= PUD_SIZE)
> + shift = PUD_SHIFT;
> + else if (size >= PMD_SIZE)
> + shift = PMD_SHIFT;
> else
> - nptes = size >> PAGE_SHIFT;
> + shift = PAGE_SHIFT;
>
> - hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
> - huge_tte_to_shift(entry);
> + nptes = size >> shift;
> + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
>
> if (pte_present(entry))
> mm->context.hugetlb_pte_count -= nptes;
> @@ -353,11 +381,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> for (i = 0; i < nptes; i++)
> ptep[i] = __pte(0UL);
>
> - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
> + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
> /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
> if (size == HPAGE_SIZE)
> maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
> - hugepage_shift);
> + orig_shift);
>
> return entry;
> }
> @@ -370,7 +398,8 @@ int pmd_huge(pmd_t pmd)
>
> int pud_huge(pud_t pud)
> {
> - return 0;
> + return !pud_none(pud) &&
> + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
> }
>
> static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> @@ -434,8 +463,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
> next = pud_addr_end(addr, end);
> if (pud_none_or_clear_bad(pud))
> continue;
> - hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> - ceiling);
> + if (is_hugetlb_pud(*pud))
> + pud_clear(pud);
> + else
> + hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> + ceiling);
> } while (pud++, addr = next, addr != end);
>
> start &= PGDIR_MASK;
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 0cda653..7c0fe73 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
> hugepage_shift = ilog2(hugepage_size);
>
> switch (hugepage_shift) {
> + case HPAGE_16GB_SHIFT:
> + hv_pgsz_mask = HV_PGSZ_MASK_16GB;
> + hv_pgsz_idx = HV_PGSZ_IDX_16GB;
> + break;
> case HPAGE_2GB_SHIFT:
> hv_pgsz_mask = HV_PGSZ_MASK_2GB;
> hv_pgsz_idx = HV_PGSZ_IDX_2GB;
> @@ -376,6 +380,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
> {
> struct mm_struct *mm;
> unsigned long flags;
> + bool is_huge_tsb;
> pte_t pte = *ptep;
>
> if (tlb_type != hypervisor) {
> @@ -393,15 +398,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
>
> spin_lock_irqsave(&mm->context.lock, flags);
>
> + is_huge_tsb = false;
> #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> - if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
> - is_hugetlb_pmd(__pmd(pte_val(pte)))) {
> - /* We are fabricating 8MB pages using 4MB real hw pages. */
> - pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
> - address, pte_val(pte));
> - } else
> + if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
> + unsigned long hugepage_size = PAGE_SIZE;
> +
> + if (is_vm_hugetlb_page(vma))
> + hugepage_size = huge_page_size(hstate_vma(vma));
> +
> + if (hugepage_size >= PUD_SIZE) {
> + unsigned long mask = 0x1ffc00000UL;
> +
> + /* Transfer bits [32:22] from address to resolve
> + * at 4M granularity.
> + */
> + pte_val(pte) &= ~mask;
> + pte_val(pte) |= (address & mask);
> + } else if (hugepage_size >= PMD_SIZE) {
> + /* We are fabricating 8MB pages using 4MB
> + * real hw pages.
> + */
> + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> + }
> +
> + if (hugepage_size >= PMD_SIZE) {
> + __update_mmu_tsb_insert(mm, MM_TSB_HUGE,
> + REAL_HPAGE_SHIFT, address, pte_val(pte));
> + is_huge_tsb = true;
> + }
> + }
> #endif
> + if (!is_huge_tsb)
> __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
> address, pte_val(pte));
>
> --
> 2.9.2
>
next prev parent reply other threads:[~2017-05-25 3:34 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-25 0:29 [PATCH] sparc64: Add 16GB hugepage support Nitin Gupta
2017-05-25 0:29 ` Nitin Gupta
2017-05-25 3:34 ` Paul Gortmaker [this message]
2017-05-25 3:34 ` Paul Gortmaker
2017-05-25 3:45 ` David Miller
2017-05-25 3:45 ` David Miller
2017-05-25 4:08 ` Nitin Gupta
2017-05-25 4:08 ` Nitin Gupta
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170525033442.GB877@windriver.com \
--to=paul.gortmaker@windriver.com \
--cc=atish.patra@oracle.com \
--cc=babu.moger@oracle.com \
--cc=bob.picco@oracle.com \
--cc=davem@davemloft.net \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mhocko@suse.com \
--cc=mike.kravetz@oracle.com \
--cc=mingo@kernel.org \
--cc=nitin.m.gupta@oracle.com \
--cc=pasha.tatashin@oracle.com \
--cc=sparclinux@vger.kernel.org \
--cc=thomas.tai@oracle.com \
--cc=tom.hromatka@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.