From: David Hildenbrand <david@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: linux-s390@vger.kernel.org, linux-mm@kvack.org,
David Hildenbrand <david@redhat.com>,
Heiko Carstens <heiko.carstens@de.ibm.com>,
Vasily Gorbik <gor@linux.ibm.com>,
Christian Borntraeger <borntraeger@de.ibm.com>,
Gerald Schaefer <gerald.schaefer@de.ibm.com>
Subject: [PATCH v2 6/9] s390/vmem: cleanup empty page tables
Date: Wed, 22 Jul 2020 11:45:55 +0200 [thread overview]
Message-ID: <20200722094558.9828-7-david@redhat.com> (raw)
In-Reply-To: <20200722094558.9828-1-david@redhat.com>
Let's cleanup empty page tables. Consider only page tables that fully
fall into the idendity mapping and the vmemmap range.
As there are no valid accesses to vmem/vmemmap within non-populated ranges,
the single tlb flush at the end should be sufficient.
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/s390/mm/vmem.c | 102 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 101 insertions(+), 1 deletion(-)
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index a2b79681df69d..b831f9f9130aa 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -63,6 +63,15 @@ pte_t __ref *vmem_pte_alloc(void)
return pte;
}
+static void vmem_pte_free(unsigned long *table)
+{
+ /* We don't expect boot memory to be removed ever. */
+ if (!slab_is_available() ||
+ WARN_ON_ONCE(PageReserved(virt_to_page(table))))
+ return;
+ page_table_free(&init_mm, table);
+}
+
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
unsigned long end, bool add, bool direct)
@@ -105,6 +114,21 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
return ret;
}
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
+{
+ pte_t *pte;
+ int i;
+
+ /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
+ pte = pte_offset_kernel(pmd, start);
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++)
+ if (!pte_none(*pte))
+ return;
+
+ vmem_pte_free(__va(pmd_deref(*pmd)));
+ pmd_clear(pmd);
+}
+
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
unsigned long end, bool add, bool direct)
@@ -171,6 +195,8 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
ret = modify_pte_table(pmd, addr, next, add, direct);
if (ret)
goto out;
+ if (!add)
+ try_free_pte_table(pmd, addr & PMD_MASK);
}
ret = 0;
out:
@@ -179,6 +205,29 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
return ret;
}
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
+{
+ const unsigned long end = start + PUD_SIZE;
+ pmd_t *pmd;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ pmd = pmd_offset(pud, start);
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
+ if (!pmd_none(*pmd))
+ return;
+
+ vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+ pud_clear(pud);
+}
+
static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
bool add, bool direct)
{
@@ -225,6 +274,8 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
ret = modify_pmd_table(pud, addr, next, add, direct);
if (ret)
goto out;
+ if (!add)
+ try_free_pmd_table(pud, addr & PUD_MASK);
}
ret = 0;
out:
@@ -233,6 +284,29 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
return ret;
}
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+{
+ const unsigned long end = start + P4D_SIZE;
+ pud_t *pud;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ pud = pud_offset(p4d, start);
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++)
+ if (!pud_none(*pud))
+ return;
+
+ vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+ p4d_clear(p4d);
+}
+
static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
bool add, bool direct)
{
@@ -257,12 +331,37 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
ret = modify_pud_table(p4d, addr, next, add, direct);
if (ret)
goto out;
+ if (!add)
+ try_free_pud_table(p4d, addr & P4D_MASK);
}
ret = 0;
out:
return ret;
}
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
+{
+ const unsigned long end = start + PGDIR_SIZE;
+ p4d_t *p4d;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ p4d = p4d_offset(pgd, start);
+ for (i = 0; i < PTRS_PER_P4D; i++, p4d++)
+ if (!p4d_none(*p4d))
+ return;
+
+ vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+ pgd_clear(pgd);
+}
+
static int modify_pagetable(unsigned long start, unsigned long end, bool add,
bool direct)
{
@@ -291,6 +390,8 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
ret = modify_p4d_table(pgd, addr, next, add, direct);
if (ret)
goto out;
+ if (!add)
+ try_free_p4d_table(pgd, addr & PGDIR_MASK);
}
ret = 0;
out:
@@ -319,7 +420,6 @@ static int vmem_add_range(unsigned long start, unsigned long size)
/*
* Remove a physical memory range from the 1:1 mapping.
- * Currently only invalidates page table entries.
*/
static void vmem_remove_range(unsigned long start, unsigned long size)
{
--
2.26.2
next prev parent reply other threads:[~2020-07-22 9:46 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-22 9:45 [PATCH v2 0/9] s390: implement and optimize vmemmap_free() David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 1/9] s390/vmem: rename vmem_add_mem() to vmem_add_range() David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 2/9] s390/vmem: consolidate vmem_add_range() and vmem_remove_range() David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 3/9] s390/vmemmap: extend modify_pagetable() to handle vmemmap David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 4/9] s390/vmemmap: cleanup when vmemmap_populate() fails David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 5/9] s390/vmemmap: take the vmem_mutex when populating/freeing David Hildenbrand
2020-07-22 9:45 ` David Hildenbrand [this message]
2020-07-22 9:45 ` [PATCH v2 7/9] s390/vmemmap: fallback to PTEs if mapping large PMD fails David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 8/9] s390/vmemmap: remember unused sub-pmd ranges David Hildenbrand
2020-07-22 9:45 ` [PATCH v2 9/9] s390/vmemmap: avoid memset(PAGE_UNUSED) when adding consecutive sections David Hildenbrand
2020-07-24 14:32 ` [PATCH v2 0/9] s390: implement and optimize vmemmap_free() Heiko Carstens
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200722094558.9828-7-david@redhat.com \
--to=david@redhat.com \
--cc=borntraeger@de.ibm.com \
--cc=gerald.schaefer@de.ibm.com \
--cc=gor@linux.ibm.com \
--cc=heiko.carstens@de.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-s390@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).