From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: benh@kernel.crashing.org, paulus@samba.org, mpe@ellerman.id.au,
Michael Neuling <mikey@neuling.org>
Cc: linuxppc-dev@lists.ozlabs.org,
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [RFC PATCH V1 09/33] powerpc/mm: Hugetlbfs is book3s_64 and fsl_book3e (32 or 64)
Date: Tue, 12 Jan 2016 12:45:44 +0530 [thread overview]
Message-ID: <1452582968-22669-10-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1452582968-22669-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
We move large part of fsl related code to hugetlbpage-book3e.c.
Only code movement. This also avoid #ifdef in the code.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/hugetlb.h | 1 +
arch/powerpc/mm/hugetlbpage-book3e.c | 293 +++++++++++++++++++++++++
arch/powerpc/mm/hugetlbpage-hash64.c | 121 +++++++++++
arch/powerpc/mm/hugetlbpage.c | 401 +----------------------------------
4 files changed, 416 insertions(+), 400 deletions(-)
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 7eac89b9f02e..0525f1c29afb 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -47,6 +47,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
#endif /* CONFIG_PPC_BOOK3S_64 */
+#define hugepd_none(hpd) ((hpd).pd == 0)
static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift)
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index ba47aaf33a4b..e6339ac45f0f 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -7,6 +7,39 @@
*/
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/bootmem.h>
+#include <linux/moduleparam.h>
+#include <linux/memblock.h>
+#include <asm/tlb.h>
+#include <asm/setup.h>
+
+/*
+ * Tracks gpages after the device tree is scanned and before the
+ * huge_boot_pages list is ready. On non-Freescale implementations, this is
+ * just used to track 16G pages and so is a single array. FSL-based
+ * implementations may have more than one gpage size, so we need multiple
+ * arrays
+ */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define MAX_NUMBER_GPAGES 128
+struct psize_gpages {
+ u64 gpage_list[MAX_NUMBER_GPAGES];
+ unsigned int nr_gpages;
+};
+static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
+#endif
+
+/*
+ * These macros define how to determine which level of the page table holds
+ * the hpdp.
+ */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
+#define HUGEPD_PUD_SHIFT PUD_SHIFT
+#else
+#define HUGEPD_PGD_SHIFT PUD_SHIFT
+#define HUGEPD_PUD_SHIFT PMD_SHIFT
+#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
@@ -151,3 +184,263 @@ void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
__flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
}
+
+static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
+ unsigned long address, unsigned pdshift, unsigned pshift)
+{
+ struct kmem_cache *cachep;
+ pte_t *new;
+
+ int i;
+ int num_hugepd = 1 << (pshift - pdshift);
+ cachep = hugepte_cache;
+
+ new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+
+ BUG_ON(pshift > HUGEPD_SHIFT_MASK);
+ BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
+
+ if (! new)
+ return -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ /*
+ * We have multiple higher-level entries that point to the same
+ * actual pte location. Fill in each as we go and backtrack on error.
+ * We need all of these so the DTLB pgtable walk code can find the
+ * right higher-level entry without knowing if it's a hugepage or not.
+ */
+ for (i = 0; i < num_hugepd; i++, hpdp++) {
+ if (unlikely(!hugepd_none(*hpdp)))
+ break;
+ else
+ /* We use the old format for PPC_FSL_BOOK3E */
+ hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
+ }
+ /* If we bailed from the for loop early, an error occurred, clean up */
+ if (i < num_hugepd) {
+ for (i = i - 1 ; i >= 0; i--, hpdp--)
+ hpdp->pd = 0;
+ kmem_cache_free(cachep, new);
+ }
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ hugepd_t *hpdp = NULL;
+ unsigned pshift = __ffs(sz);
+ unsigned pdshift = PGDIR_SHIFT;
+
+ addr &= ~(sz-1);
+
+ pg = pgd_offset(mm, addr);
+
+ if (pshift >= HUGEPD_PGD_SHIFT) {
+ hpdp = (hugepd_t *)pg;
+ } else {
+ pdshift = PUD_SHIFT;
+ pu = pud_alloc(mm, pg, addr);
+ if (pshift >= HUGEPD_PUD_SHIFT) {
+ hpdp = (hugepd_t *)pu;
+ } else {
+ pdshift = PMD_SHIFT;
+ pm = pmd_alloc(mm, pu, addr);
+ hpdp = (hugepd_t *)pm;
+ }
+ }
+
+ if (!hpdp)
+ return NULL;
+
+ BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
+
+ if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+ return NULL;
+
+ return hugepte_offset(*hpdp, addr, pdshift);
+}
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+/* Build list of addresses of gigantic pages. This function is used in early
+ * boot before the buddy allocator is setup.
+ */
+void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
+{
+ unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
+ int i;
+
+ if (addr == 0)
+ return;
+
+ gpage_freearray[idx].nr_gpages = number_of_pages;
+
+ for (i = 0; i < number_of_pages; i++) {
+ gpage_freearray[idx].gpage_list[i] = addr;
+ addr += page_size;
+ }
+}
+
+/*
+ * Moves the gigantic page addresses from the temporary list to the
+ * huge_boot_pages list.
+ */
+int alloc_bootmem_huge_page(struct hstate *hstate)
+{
+ struct huge_bootmem_page *m;
+ int idx = shift_to_mmu_psize(huge_page_shift(hstate));
+ int nr_gpages = gpage_freearray[idx].nr_gpages;
+
+ if (nr_gpages == 0)
+ return 0;
+
+#ifdef CONFIG_HIGHMEM
+ /*
+ * If gpages can be in highmem we can't use the trick of storing the
+ * data structure in the page; allocate space for this
+ */
+ m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0);
+ m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
+#else
+ m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
+#endif
+
+ list_add(&m->list, &huge_boot_pages);
+ gpage_freearray[idx].nr_gpages = nr_gpages;
+ gpage_freearray[idx].gpage_list[nr_gpages] = 0;
+ m->hstate = hstate;
+
+ return 1;
+}
+/*
+ * Scan the command line hugepagesz= options for gigantic pages; store those in
+ * a list that we use to allocate the memory once all options are parsed.
+ */
+
+unsigned long gpage_npages[MMU_PAGE_COUNT];
+
+static int __init do_gpage_early_setup(char *param, char *val,
+ const char *unused, void *arg)
+{
+ static phys_addr_t size;
+ unsigned long npages;
+
+ /*
+ * The hugepagesz and hugepages cmdline options are interleaved. We
+ * use the size variable to keep track of whether or not this was done
+ * properly and skip over instances where it is incorrect. Other
+ * command-line parsing code will issue warnings, so we don't need to.
+ *
+ */
+ if ((strcmp(param, "default_hugepagesz") == 0) ||
+ (strcmp(param, "hugepagesz") == 0)) {
+ size = memparse(val, NULL);
+ } else if (strcmp(param, "hugepages") == 0) {
+ if (size != 0) {
+ if (sscanf(val, "%lu", &npages) <= 0)
+ npages = 0;
+ if (npages > MAX_NUMBER_GPAGES) {
+ pr_warn("MMU: %lu pages requested for page "
+ "size %llu KB, limiting to "
+ __stringify(MAX_NUMBER_GPAGES) "\n",
+ npages, size / 1024);
+ npages = MAX_NUMBER_GPAGES;
+ }
+ gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
+ size = 0;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * This function allocates physical space for pages that are larger than the
+ * buddy allocator can handle. We want to allocate these in highmem because
+ * the amount of lowmem is limited. This means that this function MUST be
+ * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
+ * allocate to grab highmem.
+ */
+void __init reserve_hugetlb_gpages(void)
+{
+ static __initdata char cmdline[COMMAND_LINE_SIZE];
+ phys_addr_t size, base;
+ int i;
+
+ strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
+ NULL, &do_gpage_early_setup);
+
+ /*
+ * Walk gpage list in reverse, allocating larger page sizes first.
+ * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
+ * When we reach the point in the list where pages are no longer
+ * considered gpages, we're done.
+ */
+ for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
+ if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
+ continue;
+ else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
+ break;
+
+ size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
+ base = memblock_alloc_base(size * gpage_npages[i], size,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ add_gpage(base, size, gpage_npages[i]);
+ }
+}
+
+#define HUGEPD_FREELIST_SIZE \
+ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
+
+struct hugepd_freelist {
+ struct rcu_head rcu;
+ unsigned int index;
+ void *ptes[0];
+};
+
+static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
+
+static void hugepd_free_rcu_callback(struct rcu_head *head)
+{
+ struct hugepd_freelist *batch =
+ container_of(head, struct hugepd_freelist, rcu);
+ unsigned int i;
+
+ for (i = 0; i < batch->index; i++)
+ kmem_cache_free(hugepte_cache, batch->ptes[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void hugepd_free(struct mmu_gather *tlb, void *hugepte)
+{
+ struct hugepd_freelist **batchp;
+
+ batchp = this_cpu_ptr(&hugepd_freelist_cur);
+
+ if (atomic_read(&tlb->mm->mm_users) < 2 ||
+ cpumask_equal(mm_cpumask(tlb->mm),
+ cpumask_of(smp_processor_id()))) {
+ kmem_cache_free(hugepte_cache, hugepte);
+ put_cpu_var(hugepd_freelist_cur);
+ return;
+ }
+
+ if (*batchp == NULL) {
+ *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
+ (*batchp)->index = 0;
+ }
+
+ (*batchp)->ptes[(*batchp)->index++] = hugepte;
+ if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
+ call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
+ *batchp = NULL;
+ }
+ put_cpu_var(hugepd_freelist_cur);
+}
+#endif
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 9c224b012d62..9e457c83626b 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -14,6 +14,17 @@
#include <asm/cacheflush.h>
#include <asm/machdep.h>
+/*
+ * Tracks gpages after the device tree is scanned and before the
+ * huge_boot_pages list is ready. On non-Freescale implementations, this is
+ * just used to track 16G pages and so is a single array. FSL-based
+ * implementations may have more than one gpage size, so we need multiple
+ * arrays
+ */
+#define MAX_NUMBER_GPAGES 1024
+static u64 gpage_freearray[MAX_NUMBER_GPAGES];
+static unsigned nr_gpages;
+
extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long pa, unsigned long rlags,
unsigned long vflags, int psize, int ssize);
@@ -132,3 +143,113 @@ int hugepd_ok(hugepd_t hpd)
return 0;
}
#endif
+
+static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
+ unsigned long address, unsigned pdshift, unsigned pshift)
+{
+ struct kmem_cache *cachep;
+ pte_t *new;
+
+ cachep = PGT_CACHE(pdshift - pshift);
+
+ new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+
+ BUG_ON(pshift > HUGEPD_SHIFT_MASK);
+ BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
+
+ if (! new)
+ return -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ if (!hugepd_none(*hpdp))
+ kmem_cache_free(cachep, new);
+ else {
+ hpdp->pd = (unsigned long)new |
+ (shift_to_mmu_psize(pshift) << 2);
+ }
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+
+/*
+ * At this point we do the placement change only for BOOK3S 64. This would
+ * possibly work on other subarchs.
+ */
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ hugepd_t *hpdp = NULL;
+ unsigned pshift = __ffs(sz);
+ unsigned pdshift = PGDIR_SHIFT;
+
+ addr &= ~(sz-1);
+ pg = pgd_offset(mm, addr);
+
+ if (pshift == PGDIR_SHIFT)
+ /* 16GB huge page */
+ return (pte_t *) pg;
+ else if (pshift > PUD_SHIFT)
+ /*
+ * We need to use hugepd table
+ */
+ hpdp = (hugepd_t *)pg;
+ else {
+ pdshift = PUD_SHIFT;
+ pu = pud_alloc(mm, pg, addr);
+ if (pshift == PUD_SHIFT)
+ return (pte_t *)pu;
+ else if (pshift > PMD_SHIFT)
+ hpdp = (hugepd_t *)pu;
+ else {
+ pdshift = PMD_SHIFT;
+ pm = pmd_alloc(mm, pu, addr);
+ if (pshift == PMD_SHIFT)
+ /* 16MB hugepage */
+ return (pte_t *)pm;
+ else
+ hpdp = (hugepd_t *)pm;
+ }
+ }
+ if (!hpdp)
+ return NULL;
+
+ BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
+
+ if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+ return NULL;
+
+ return hugepte_offset(*hpdp, addr, pdshift);
+}
+
+
+/* Build list of addresses of gigantic pages. This function is used in early
+ * boot before the buddy allocator is setup.
+ */
+void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
+{
+ if (!addr)
+ return;
+ while (number_of_pages > 0) {
+ gpage_freearray[nr_gpages] = addr;
+ nr_gpages++;
+ number_of_pages--;
+ addr += page_size;
+ }
+}
+
+/* Moves the gigantic page addresses from the temporary list to the
+ * huge_boot_pages list.
+ */
+int alloc_bootmem_huge_page(struct hstate *hstate)
+{
+ struct huge_bootmem_page *m;
+ if (nr_gpages == 0)
+ return 0;
+ m = phys_to_virt(gpage_freearray[--nr_gpages]);
+ gpage_freearray[nr_gpages] = 0;
+ list_add(&m->list, &huge_boot_pages);
+ m->hstate = hstate;
+ return 1;
+}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 744e24bcb85c..c94502899e94 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -31,413 +31,14 @@
unsigned int HPAGE_SHIFT;
-/*
- * Tracks gpages after the device tree is scanned and before the
- * huge_boot_pages list is ready. On non-Freescale implementations, this is
- * just used to track 16G pages and so is a single array. FSL-based
- * implementations may have more than one gpage size, so we need multiple
- * arrays
- */
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define MAX_NUMBER_GPAGES 128
-struct psize_gpages {
- u64 gpage_list[MAX_NUMBER_GPAGES];
- unsigned int nr_gpages;
-};
-static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
-#else
-#define MAX_NUMBER_GPAGES 1024
-static u64 gpage_freearray[MAX_NUMBER_GPAGES];
-static unsigned nr_gpages;
-#endif
-
-#define hugepd_none(hpd) ((hpd).pd == 0)
-
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
/* Only called for hugetlbfs pages, hence can ignore THP */
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
}
-static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
- unsigned long address, unsigned pdshift, unsigned pshift)
-{
- struct kmem_cache *cachep;
- pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- int i;
- int num_hugepd = 1 << (pshift - pdshift);
- cachep = hugepte_cache;
-#else
- cachep = PGT_CACHE(pdshift - pshift);
-#endif
-
- new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
-
- BUG_ON(pshift > HUGEPD_SHIFT_MASK);
- BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
-
- if (! new)
- return -ENOMEM;
-
- spin_lock(&mm->page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
- /*
- * We have multiple higher-level entries that point to the same
- * actual pte location. Fill in each as we go and backtrack on error.
- * We need all of these so the DTLB pgtable walk code can find the
- * right higher-level entry without knowing if it's a hugepage or not.
- */
- for (i = 0; i < num_hugepd; i++, hpdp++) {
- if (unlikely(!hugepd_none(*hpdp)))
- break;
- else
- /* We use the old format for PPC_FSL_BOOK3E */
- hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
- }
- /* If we bailed from the for loop early, an error occurred, clean up */
- if (i < num_hugepd) {
- for (i = i - 1 ; i >= 0; i--, hpdp--)
- hpdp->pd = 0;
- kmem_cache_free(cachep, new);
- }
-#else
- if (!hugepd_none(*hpdp))
- kmem_cache_free(cachep, new);
- else {
-#ifdef CONFIG_PPC_BOOK3S_64
- hpdp->pd = (unsigned long)new |
- (shift_to_mmu_psize(pshift) << 2);
-#else
- hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
- }
-#endif
- spin_unlock(&mm->page_table_lock);
- return 0;
-}
-
-/*
- * These macros define how to determine which level of the page table holds
- * the hpdp.
- */
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
-#define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * At this point we do the placement change only for BOOK3S 64. This would
- * possibly work on other subarchs.
- */
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
-
- addr &= ~(sz-1);
- pg = pgd_offset(mm, addr);
-
- if (pshift == PGDIR_SHIFT)
- /* 16GB huge page */
- return (pte_t *) pg;
- else if (pshift > PUD_SHIFT)
- /*
- * We need to use hugepd table
- */
- hpdp = (hugepd_t *)pg;
- else {
- pdshift = PUD_SHIFT;
- pu = pud_alloc(mm, pg, addr);
- if (pshift == PUD_SHIFT)
- return (pte_t *)pu;
- else if (pshift > PMD_SHIFT)
- hpdp = (hugepd_t *)pu;
- else {
- pdshift = PMD_SHIFT;
- pm = pmd_alloc(mm, pu, addr);
- if (pshift == PMD_SHIFT)
- /* 16MB hugepage */
- return (pte_t *)pm;
- else
- hpdp = (hugepd_t *)pm;
- }
- }
- if (!hpdp)
- return NULL;
-
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
- return NULL;
-
- return hugepte_offset(*hpdp, addr, pdshift);
-}
-
-#else
-
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
-
- addr &= ~(sz-1);
-
- pg = pgd_offset(mm, addr);
-
- if (pshift >= HUGEPD_PGD_SHIFT) {
- hpdp = (hugepd_t *)pg;
- } else {
- pdshift = PUD_SHIFT;
- pu = pud_alloc(mm, pg, addr);
- if (pshift >= HUGEPD_PUD_SHIFT) {
- hpdp = (hugepd_t *)pu;
- } else {
- pdshift = PMD_SHIFT;
- pm = pmd_alloc(mm, pu, addr);
- hpdp = (hugepd_t *)pm;
- }
- }
-
- if (!hpdp)
- return NULL;
-
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
- return NULL;
-
- return hugepte_offset(*hpdp, addr, pdshift);
-}
-#endif
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy allocator is setup.
- */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
-{
- unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
- int i;
-
- if (addr == 0)
- return;
-
- gpage_freearray[idx].nr_gpages = number_of_pages;
-
- for (i = 0; i < number_of_pages; i++) {
- gpage_freearray[idx].gpage_list[i] = addr;
- addr += page_size;
- }
-}
-
-/*
- * Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
-{
- struct huge_bootmem_page *m;
- int idx = shift_to_mmu_psize(huge_page_shift(hstate));
- int nr_gpages = gpage_freearray[idx].nr_gpages;
-
- if (nr_gpages == 0)
- return 0;
-
-#ifdef CONFIG_HIGHMEM
- /*
- * If gpages can be in highmem we can't use the trick of storing the
- * data structure in the page; allocate space for this
- */
- m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0);
- m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
-#else
- m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
-#endif
-
- list_add(&m->list, &huge_boot_pages);
- gpage_freearray[idx].nr_gpages = nr_gpages;
- gpage_freearray[idx].gpage_list[nr_gpages] = 0;
- m->hstate = hstate;
-
- return 1;
-}
-/*
- * Scan the command line hugepagesz= options for gigantic pages; store those in
- * a list that we use to allocate the memory once all options are parsed.
- */
-
-unsigned long gpage_npages[MMU_PAGE_COUNT];
-
-static int __init do_gpage_early_setup(char *param, char *val,
- const char *unused, void *arg)
-{
- static phys_addr_t size;
- unsigned long npages;
-
- /*
- * The hugepagesz and hugepages cmdline options are interleaved. We
- * use the size variable to keep track of whether or not this was done
- * properly and skip over instances where it is incorrect. Other
- * command-line parsing code will issue warnings, so we don't need to.
- *
- */
- if ((strcmp(param, "default_hugepagesz") == 0) ||
- (strcmp(param, "hugepagesz") == 0)) {
- size = memparse(val, NULL);
- } else if (strcmp(param, "hugepages") == 0) {
- if (size != 0) {
- if (sscanf(val, "%lu", &npages) <= 0)
- npages = 0;
- if (npages > MAX_NUMBER_GPAGES) {
- pr_warn("MMU: %lu pages requested for page "
- "size %llu KB, limiting to "
- __stringify(MAX_NUMBER_GPAGES) "\n",
- npages, size / 1024);
- npages = MAX_NUMBER_GPAGES;
- }
- gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
- size = 0;
- }
- }
- return 0;
-}
-
-
-/*
- * This function allocates physical space for pages that are larger than the
- * buddy allocator can handle. We want to allocate these in highmem because
- * the amount of lowmem is limited. This means that this function MUST be
- * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
- * allocate to grab highmem.
- */
-void __init reserve_hugetlb_gpages(void)
-{
- static __initdata char cmdline[COMMAND_LINE_SIZE];
- phys_addr_t size, base;
- int i;
-
- strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
- NULL, &do_gpage_early_setup);
-
- /*
- * Walk gpage list in reverse, allocating larger page sizes first.
- * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
- * When we reach the point in the list where pages are no longer
- * considered gpages, we're done.
- */
- for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
- if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
- continue;
- else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
- break;
-
- size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
- base = memblock_alloc_base(size * gpage_npages[i], size,
- MEMBLOCK_ALLOC_ANYWHERE);
- add_gpage(base, size, gpage_npages[i]);
- }
-}
-
-#else /* !PPC_FSL_BOOK3E */
-
-/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy allocator is setup.
- */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
-{
- if (!addr)
- return;
- while (number_of_pages > 0) {
- gpage_freearray[nr_gpages] = addr;
- nr_gpages++;
- number_of_pages--;
- addr += page_size;
- }
-}
-
-/* Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
-{
- struct huge_bootmem_page *m;
- if (nr_gpages == 0)
- return 0;
- m = phys_to_virt(gpage_freearray[--nr_gpages]);
- gpage_freearray[nr_gpages] = 0;
- list_add(&m->list, &huge_boot_pages);
- m->hstate = hstate;
- return 1;
-}
-#endif
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define HUGEPD_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
-
-struct hugepd_freelist {
- struct rcu_head rcu;
- unsigned int index;
- void *ptes[0];
-};
-
-static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
-
-static void hugepd_free_rcu_callback(struct rcu_head *head)
-{
- struct hugepd_freelist *batch =
- container_of(head, struct hugepd_freelist, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++)
- kmem_cache_free(hugepte_cache, batch->ptes[i]);
-
- free_page((unsigned long)batch);
-}
-
-static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
-{
- struct hugepd_freelist **batchp;
-
- batchp = this_cpu_ptr(&hugepd_freelist_cur);
-
- if (atomic_read(&tlb->mm->mm_users) < 2 ||
- cpumask_equal(mm_cpumask(tlb->mm),
- cpumask_of(smp_processor_id()))) {
- kmem_cache_free(hugepte_cache, hugepte);
- put_cpu_var(hugepd_freelist_cur);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
- (*batchp)->index = 0;
- }
-
- (*batchp)->ptes[(*batchp)->index++] = hugepte;
- if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
- call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
- *batchp = NULL;
- }
- put_cpu_var(hugepd_freelist_cur);
-}
-#endif
+extern void hugepd_free(struct mmu_gather *tlb, void *hugepte);
static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
unsigned long start, unsigned long end,
unsigned long floor, unsigned long ceiling)
--
2.5.0
next prev parent reply other threads:[~2016-01-12 7:16 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-12 7:15 [RFC PATCH V1 00/33] Book3s abstraction in preparation for new MMU model Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 01/33] powerpc/mm: add _PAGE_HASHPTE similar to 4K hash Aneesh Kumar K.V
2016-01-13 2:48 ` Balbir Singh
2016-01-13 6:02 ` Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 02/33] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 03/33] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table Aneesh Kumar K.V
2016-01-13 8:52 ` Balbir Singh
2016-01-15 0:25 ` Balbir Singh
2016-01-18 7:32 ` Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 04/33] powerpc/mm: Copy pgalloc (part 1) Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 05/33] powerpc/mm: Copy pgalloc (part 2) Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 06/33] powerpc/mm: Copy pgalloc (part 3) Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 07/33] mm: arch hook for vm_get_page_prot Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 08/33] mm: Some arch may want to use HPAGE_PMD related values as variables Aneesh Kumar K.V
2016-01-12 7:15 ` Aneesh Kumar K.V [this message]
2016-01-12 7:15 ` [RFC PATCH V1 10/33] powerpc/mm: free_hugepd_range split to hash and nonhash Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 11/33] powerpc/mm: Use helper instead of opencoding Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 12/33] powerpc/mm: Move hash64 specific defintions to seperate header Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 13/33] powerpc/mm: Move swap related definition ot hash64 header Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 14/33] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
2016-01-12 7:42 ` Denis Kirjanov
2016-01-13 3:57 ` Benjamin Herrenschmidt
2016-01-13 6:07 ` Aneesh Kumar K.V
2016-01-13 7:18 ` Benjamin Herrenschmidt
2016-01-12 7:15 ` [RFC PATCH V1 15/33] powerpc/mm: Use helper for finding pte filter mask for gup Aneesh Kumar K.V
2016-01-13 8:13 ` Denis Kirjanov
2016-01-12 7:15 ` [RFC PATCH V1 16/33] powerpc/mm: Move hash page table related functions to pgtable-hash64.c Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 17/33] mm: Change pmd_huge_pte type in mm_struct Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 18/33] powerpc/mm: Add helper for update page flags during ioremap Aneesh Kumar K.V
2016-01-12 7:45 ` Denis Kirjanov
2016-01-12 7:15 ` [RFC PATCH V1 19/33] powerpc/mm: Rename hash specific page table bits (_PAGE* -> H_PAGE*) Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 20/33] powerpc/mm: Use flush_tlb_page in ptep_clear_flush_young Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 21/33] powerpc/mm: THP is only available on hash64 as of now Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 22/33] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 23/33] powerpc/mm: Create a new headers for tlbflush for hash64 Aneesh Kumar K.V
2016-01-12 7:15 ` [RFC PATCH V1 24/33] powerpc/mm: Hash linux abstraction for page table accessors Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 25/33] powerpc/mm: Hash linux abstraction for functions in pgtable-hash.c Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 26/33] powerpc/mm: Hash linux abstraction for mmu context handling code Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 27/33] powerpc/mm: Move hash related mmu-*.h headers to book3s/ Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 28/33] powerpc/mm: Hash linux abstractions for early init routines Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 29/33] powerpc/mm: Hash linux abstraction for THP Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 30/33] powerpc/mm: Hash linux abstraction for HugeTLB Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 31/33] powerpc/mm: Hash linux abstraction for page table allocator Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 32/33] powerpc/mm: Hash linux abstraction for tlbflush routines Aneesh Kumar K.V
2016-01-12 7:16 ` [RFC PATCH V1 33/33] powerpc/mm: Hash linux abstraction for pte swap encoding Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1452582968-22669-10-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=benh@kernel.crashing.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mikey@neuling.org \
--cc=mpe@ellerman.id.au \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.