All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Hildenbrand <david@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: linux-s390@vger.kernel.org, linux-mm@kvack.org,
	David Hildenbrand <david@redhat.com>,
	Heiko Carstens <heiko.carstens@de.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Gerald Schaefer <gerald.schaefer@de.ibm.com>
Subject: [PATCH v2 3/9] s390/vmemmap: extend modify_pagetable() to handle vmemmap
Date: Wed, 22 Jul 2020 11:45:52 +0200	[thread overview]
Message-ID: <20200722094558.9828-4-david@redhat.com> (raw)
In-Reply-To: <20200722094558.9828-1-david@redhat.com>

Extend our shiny new modify_pagetable() to handle !direct (vmemmap)
mappings. Convert vmemmap_populate() and implement vmemmap_free().

Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/s390/mm/vmem.c | 181 +++++++++++++++++++-------------------------
 1 file changed, 76 insertions(+), 105 deletions(-)

diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 177daf389d391..43fe1e2eb90ea 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -29,6 +29,15 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 	return (void *) memblock_phys_alloc(size, size);
 }
 
+static void vmem_free_pages(unsigned long addr, int order)
+{
+	/* We don't expect boot memory to be removed ever. */
+	if (!slab_is_available() ||
+	    WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+		return;
+	free_pages(addr, order);
+}
+
 void *vmem_crst_alloc(unsigned long val)
 {
 	unsigned long *table;
@@ -54,10 +63,12 @@ pte_t __ref *vmem_pte_alloc(void)
 	return pte;
 }
 
-static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end,
-			    bool add)
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, bool add, bool direct)
 {
 	unsigned long prot, pages = 0;
+	int ret = -ENOMEM;
 	pte_t *pte;
 
 	prot = pgprot_val(PAGE_KERNEL);
@@ -69,20 +80,34 @@ static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end,
 		if (!add) {
 			if (pte_none(*pte))
 				continue;
+			if (!direct)
+				vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
 			pte_clear(&init_mm, addr, pte);
 		} else if (pte_none(*pte)) {
-			pte_val(*pte) = addr | prot;
+			if (!direct) {
+				void *new_page = vmemmap_alloc_block(PAGE_SIZE,
+								     NUMA_NO_NODE);
+
+				if (!new_page)
+					goto out;
+				pte_val(*pte) = __pa(new_page) | prot;
+			} else
+				pte_val(*pte) = addr | prot;
 		} else
 			continue;
 
 		pages++;
 	}
-
-	update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
+	ret = 0;
+out:
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
+	return ret;
 }
 
-static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end,
-			    bool add)
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
+				  unsigned long end, bool add, bool direct)
 {
 	unsigned long next, prot, pages = 0;
 	int ret = -ENOMEM;
@@ -103,6 +128,9 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end,
 			if (pmd_large(*pmd) && !add) {
 				if (IS_ALIGNED(addr, PMD_SIZE) &&
 				    IS_ALIGNED(next, PMD_SIZE)) {
+					if (!direct)
+						vmem_free_pages(pmd_deref(*pmd),
+								get_order(PMD_SIZE));
 					pmd_clear(pmd);
 					pages++;
 				}
@@ -111,11 +139,27 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end,
 		} else if (pmd_none(*pmd)) {
 			if (IS_ALIGNED(addr, PMD_SIZE) &&
 			    IS_ALIGNED(next, PMD_SIZE) &&
-			    MACHINE_HAS_EDAT1 && addr &&
+			    MACHINE_HAS_EDAT1 && addr && direct &&
 			    !debug_pagealloc_enabled()) {
 				pmd_val(*pmd) = addr | prot;
 				pages++;
 				continue;
+			} else if (!direct && MACHINE_HAS_EDAT1) {
+				void *new_page;
+
+				/*
+				 * Use 1MB frames for vmemmap if available. We
+				 * always use large frames even if they are only
+				 * partially used. Otherwise we would have also
+				 * page tables since vmemmap_populate gets
+				 * called for each section separately.
+				 */
+				new_page = vmemmap_alloc_block(PMD_SIZE,
+							       NUMA_NO_NODE);
+				if (!new_page)
+					goto out;
+				pmd_val(*pmd) = __pa(new_page) | prot;
+				continue;
 			}
 			pte = vmem_pte_alloc();
 			if (!pte)
@@ -124,16 +168,19 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end,
 		} else if (pmd_large(*pmd))
 			continue;
 
-		modify_pte_table(pmd, addr, next, add);
+		ret = modify_pte_table(pmd, addr, next, add, direct);
+		if (ret)
+			goto out;
 	}
 	ret = 0;
 out:
-	update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
 	return ret;
 }
 
 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
-			    bool add)
+			    bool add, bool direct)
 {
 	unsigned long next, prot, pages = 0;
 	int ret = -ENOMEM;
@@ -162,7 +209,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 		} else if (pud_none(*pud)) {
 			if (IS_ALIGNED(addr, PUD_SIZE) &&
 			    IS_ALIGNED(next, PUD_SIZE) &&
-			    MACHINE_HAS_EDAT2 && addr &&
+			    MACHINE_HAS_EDAT2 && addr && direct &&
 			    !debug_pagealloc_enabled()) {
 				pud_val(*pud) = addr | prot;
 				pages++;
@@ -175,18 +222,19 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 		} else if (pud_large(*pud))
 			continue;
 
-		ret = modify_pmd_table(pud, addr, next, add);
+		ret = modify_pmd_table(pud, addr, next, add, direct);
 		if (ret)
 			goto out;
 	}
 	ret = 0;
 out:
-	update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
 	return ret;
 }
 
 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
-			    bool add)
+			    bool add, bool direct)
 {
 	unsigned long next;
 	int ret = -ENOMEM;
@@ -206,7 +254,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
 				goto out;
 		}
 
-		ret = modify_pud_table(p4d, addr, next, add);
+		ret = modify_pud_table(p4d, addr, next, add, direct);
 		if (ret)
 			goto out;
 	}
@@ -215,7 +263,8 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
 	return ret;
 }
 
-static int modify_pagetable(unsigned long start, unsigned long end, bool add)
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
+			    bool direct)
 {
 	unsigned long addr, next;
 	int ret = -ENOMEM;
@@ -239,7 +288,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add)
 			pgd_populate(&init_mm, pgd, p4d);
 		}
 
-		ret = modify_p4d_table(pgd, addr, next, add);
+		ret = modify_p4d_table(pgd, addr, next, add, direct);
 		if (ret)
 			goto out;
 	}
@@ -250,14 +299,14 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add)
 	return ret;
 }
 
-static int add_pagetable(unsigned long start, unsigned long end)
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
 {
-	return modify_pagetable(start, end, true);
+	return modify_pagetable(start, end, true, direct);
 }
 
-static int remove_pagetable(unsigned long start, unsigned long end)
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
 {
-	return modify_pagetable(start, end, false);
+	return modify_pagetable(start, end, false, direct);
 }
 
 /*
@@ -265,7 +314,7 @@ static int remove_pagetable(unsigned long start, unsigned long end)
  */
 static int vmem_add_range(unsigned long start, unsigned long size)
 {
-	return add_pagetable(start, start + size);
+	return add_pagetable(start, start + size, true);
 }
 
 /*
@@ -274,7 +323,7 @@ static int vmem_add_range(unsigned long start, unsigned long size)
  */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
-	remove_pagetable(start, start + size);
+	remove_pagetable(start, start + size, true);
 }
 
 /*
@@ -283,92 +332,14 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
-	unsigned long pgt_prot, sgt_prot;
-	unsigned long address = start;
-	pgd_t *pg_dir;
-	p4d_t *p4_dir;
-	pud_t *pu_dir;
-	pmd_t *pm_dir;
-	pte_t *pt_dir;
-	int ret = -ENOMEM;
-
-	pgt_prot = pgprot_val(PAGE_KERNEL);
-	sgt_prot = pgprot_val(SEGMENT_KERNEL);
-	if (!MACHINE_HAS_NX) {
-		pgt_prot &= ~_PAGE_NOEXEC;
-		sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
-	}
-	for (address = start; address < end;) {
-		pg_dir = pgd_offset_k(address);
-		if (pgd_none(*pg_dir)) {
-			p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
-			if (!p4_dir)
-				goto out;
-			pgd_populate(&init_mm, pg_dir, p4_dir);
-		}
-
-		p4_dir = p4d_offset(pg_dir, address);
-		if (p4d_none(*p4_dir)) {
-			pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
-			if (!pu_dir)
-				goto out;
-			p4d_populate(&init_mm, p4_dir, pu_dir);
-		}
-
-		pu_dir = pud_offset(p4_dir, address);
-		if (pud_none(*pu_dir)) {
-			pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
-			if (!pm_dir)
-				goto out;
-			pud_populate(&init_mm, pu_dir, pm_dir);
-		}
-
-		pm_dir = pmd_offset(pu_dir, address);
-		if (pmd_none(*pm_dir)) {
-			/* Use 1MB frames for vmemmap if available. We always
-			 * use large frames even if they are only partially
-			 * used.
-			 * Otherwise we would have also page tables since
-			 * vmemmap_populate gets called for each section
-			 * separately. */
-			if (MACHINE_HAS_EDAT1) {
-				void *new_page;
-
-				new_page = vmemmap_alloc_block(PMD_SIZE, node);
-				if (!new_page)
-					goto out;
-				pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
-				address = (address + PMD_SIZE) & PMD_MASK;
-				continue;
-			}
-			pt_dir = vmem_pte_alloc();
-			if (!pt_dir)
-				goto out;
-			pmd_populate(&init_mm, pm_dir, pt_dir);
-		} else if (pmd_large(*pm_dir)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
-			continue;
-		}
-
-		pt_dir = pte_offset_kernel(pm_dir, address);
-		if (pte_none(*pt_dir)) {
-			void *new_page;
-
-			new_page = vmemmap_alloc_block(PAGE_SIZE, node);
-			if (!new_page)
-				goto out;
-			pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
-		}
-		address += PAGE_SIZE;
-	}
-	ret = 0;
-out:
-	return ret;
+	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
+	return add_pagetable(start, end, false);
 }
 
 void vmemmap_free(unsigned long start, unsigned long end,
 		struct vmem_altmap *altmap)
 {
+	remove_pagetable(start, end, false);
 }
 
 void vmem_remove_mapping(unsigned long start, unsigned long size)
-- 
2.26.2

  parent reply	other threads:[~2020-07-22  9:46 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-22  9:45 [PATCH v2 0/9] s390: implement and optimize vmemmap_free() David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 1/9] s390/vmem: rename vmem_add_mem() to vmem_add_range() David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 2/9] s390/vmem: consolidate vmem_add_range() and vmem_remove_range() David Hildenbrand
2020-07-22  9:45 ` David Hildenbrand [this message]
2020-07-22  9:45 ` [PATCH v2 4/9] s390/vmemmap: cleanup when vmemmap_populate() fails David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 5/9] s390/vmemmap: take the vmem_mutex when populating/freeing David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 6/9] s390/vmem: cleanup empty page tables David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 7/9] s390/vmemmap: fallback to PTEs if mapping large PMD fails David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 8/9] s390/vmemmap: remember unused sub-pmd ranges David Hildenbrand
2020-07-22  9:45 ` [PATCH v2 9/9] s390/vmemmap: avoid memset(PAGE_UNUSED) when adding consecutive sections David Hildenbrand
2020-07-24 14:32 ` [PATCH v2 0/9] s390: implement and optimize vmemmap_free() Heiko Carstens

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200722094558.9828-4-david@redhat.com \
    --to=david@redhat.com \
    --cc=borntraeger@de.ibm.com \
    --cc=gerald.schaefer@de.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=heiko.carstens@de.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.