linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/7] New arch interfaces for manipulating multiple pages
@ 2023-02-11  3:39 Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 1/7] mm: Convert page_table_check_pte_set() to page_table_check_ptes_set() Matthew Wilcox (Oracle)
                   ` (8 more replies)
  0 siblings, 9 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

Here's my latest draft of a new set of page table manipulation APIs.  I've
only done alpha, arc and x86 (other than x86, I'm going alphabetically).
Before I go much further, some feedback might be a good idea.  Or if
someone wants to volunteer to do their architecture ;-)

Matthew Wilcox (Oracle) (7):
  mm: Convert page_table_check_pte_set() to page_table_check_ptes_set()
  mm: Add generic flush_icache_pages() and documentation
  mm: Add folio_flush_mapping()
  mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
  alpha: Implement the new page table range API
  arc: Implement the new page table range API
  x86: Implement the new page table range API

 Documentation/core-api/cachetlb.rst       | 35 ++++++-------
 arch/alpha/include/asm/cacheflush.h       | 10 ++++
 arch/alpha/include/asm/pgtable.h          | 18 ++++++-
 arch/arc/include/asm/cacheflush.h         |  7 ++-
 arch/arc/include/asm/pgtable-bits-arcv2.h | 20 ++++++--
 arch/arc/mm/cache.c                       | 61 ++++++++++++++---------
 arch/arc/mm/tlb.c                         | 18 ++++---
 arch/arm64/include/asm/pgtable.h          |  2 +-
 arch/riscv/include/asm/pgtable.h          |  2 +-
 arch/x86/include/asm/pgtable.h            | 21 ++++++--
 include/asm-generic/cacheflush.h          |  5 ++
 include/linux/cacheflush.h                |  4 +-
 include/linux/page_table_check.h          | 14 +++---
 include/linux/pagemap.h                   | 26 ++++++++--
 mm/page_table_check.c                     | 14 +++---
 mm/util.c                                 |  2 +-
 16 files changed, 176 insertions(+), 83 deletions(-)

-- 
2.39.1



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 1/7] mm: Convert page_table_check_pte_set() to page_table_check_ptes_set()
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 2/7] mm: Add generic flush_icache_pages() and documentation Matthew Wilcox (Oracle)
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

Tell the page table check how many PTEs & PFNs we want it to check.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/arm64/include/asm/pgtable.h |  2 +-
 arch/riscv/include/asm/pgtable.h |  2 +-
 arch/x86/include/asm/pgtable.h   |  2 +-
 include/linux/page_table_check.h | 14 +++++++-------
 mm/page_table_check.c            | 14 ++++++++------
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b6ba466e2e8a..69765dc697af 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -358,7 +358,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	page_table_check_pte_set(mm, addr, ptep, pte);
+	page_table_check_ptes_set(mm, addr, ptep, pte, 1);
 	return __set_pte_at(mm, addr, ptep, pte);
 }
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2d92de386837..13222fd5c4b4 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -459,7 +459,7 @@ static inline void __set_pte_at(struct mm_struct *mm,
 static inline void set_pte_at(struct mm_struct *mm,
 	unsigned long addr, pte_t *ptep, pte_t pteval)
 {
-	page_table_check_pte_set(mm, addr, ptep, pteval);
+	page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
 	__set_pte_at(mm, addr, ptep, pteval);
 }
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7425f32e5293..84be3e07b112 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1022,7 +1022,7 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	page_table_check_pte_set(mm, addr, ptep, pte);
+	page_table_check_ptes_set(mm, addr, ptep, pte, 1);
 	set_pte(ptep, pte);
 }
 
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
index 01e16c7696ec..ba269c7009e4 100644
--- a/include/linux/page_table_check.h
+++ b/include/linux/page_table_check.h
@@ -20,8 +20,8 @@ void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
 				  pmd_t pmd);
 void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
 				  pud_t pud);
-void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
-				pte_t *ptep, pte_t pte);
+void __page_table_check_ptes_set(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, unsigned int nr);
 void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
 				pmd_t *pmdp, pmd_t pmd);
 void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
@@ -73,14 +73,14 @@ static inline void page_table_check_pud_clear(struct mm_struct *mm,
 	__page_table_check_pud_clear(mm, addr, pud);
 }
 
-static inline void page_table_check_pte_set(struct mm_struct *mm,
+static inline void page_table_check_ptes_set(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep,
-					    pte_t pte)
+					    pte_t pte, unsigned int nr)
 {
 	if (static_branch_likely(&page_table_check_disabled))
 		return;
 
-	__page_table_check_pte_set(mm, addr, ptep, pte);
+	__page_table_check_ptes_set(mm, addr, ptep, pte, nr);
 }
 
 static inline void page_table_check_pmd_set(struct mm_struct *mm,
@@ -138,9 +138,9 @@ static inline void page_table_check_pud_clear(struct mm_struct *mm,
 {
 }
 
-static inline void page_table_check_pte_set(struct mm_struct *mm,
+static inline void page_table_check_ptes_set(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep,
-					    pte_t pte)
+					    pte_t pte, unsigned int nr)
 {
 }
 
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 25d8610c0042..e6f4d40caaa2 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -184,20 +184,22 @@ void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(__page_table_check_pud_clear);
 
-void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
-				pte_t *ptep, pte_t pte)
+void __page_table_check_ptes_set(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, unsigned int nr)
 {
+	unsigned int i;
+
 	if (&init_mm == mm)
 		return;
 
-	__page_table_check_pte_clear(mm, addr, *ptep);
+	for (i = 0; i < nr; i++)
+		__page_table_check_pte_clear(mm, addr, ptep[i]);
 	if (pte_user_accessible_page(pte)) {
-		page_table_check_set(mm, addr, pte_pfn(pte),
-				     PAGE_SIZE >> PAGE_SHIFT,
+		page_table_check_set(mm, addr, pte_pfn(pte), nr,
 				     pte_write(pte));
 	}
 }
-EXPORT_SYMBOL(__page_table_check_pte_set);
+EXPORT_SYMBOL(__page_table_check_ptes_set);
 
 void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
 				pmd_t *pmdp, pmd_t pmd)
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 2/7] mm: Add generic flush_icache_pages() and documentation
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 1/7] mm: Convert page_table_check_pte_set() to page_table_check_ptes_set() Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 3/7] mm: Add folio_flush_mapping() Matthew Wilcox (Oracle)
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

flush_icache_page() is deprecated but not yet removed, so add
a range version of it.  Change the documentation to refer to
update_mmu_cache_range() instead of update_mmu_cache().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 Documentation/core-api/cachetlb.rst | 35 +++++++++++++++--------------
 include/asm-generic/cacheflush.h    |  5 +++++
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst
index 5c0552e78c58..d4c9e2a28d36 100644
--- a/Documentation/core-api/cachetlb.rst
+++ b/Documentation/core-api/cachetlb.rst
@@ -88,13 +88,13 @@ changes occur:
 
 	This is used primarily during fault processing.
 
-5) ``void update_mmu_cache(struct vm_area_struct *vma,
-   unsigned long address, pte_t *ptep)``
+5) ``void update_mmu_cache_range(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep, unsigned int nr)``
 
-	At the end of every page fault, this routine is invoked to
-	tell the architecture specific code that a translation
-	now exists at virtual address "address" for address space
-	"vma->vm_mm", in the software page tables.
+	At the end of every page fault, this routine is invoked to tell
+	the architecture specific code that translations now exists
+	in the software page tables for address space "vma->vm_mm"
+	at virtual address "address" for "nr" consecutive pages.
 
 	A port may use this information in any way it so chooses.
 	For example, it could use this event to pre-load TLB
@@ -306,17 +306,18 @@ maps this page at its virtual address.
 	private".  The kernel guarantees that, for pagecache pages, it will
 	clear this bit when such a page first enters the pagecache.
 
-	This allows these interfaces to be implemented much more efficiently.
-	It allows one to "defer" (perhaps indefinitely) the actual flush if
-	there are currently no user processes mapping this page.  See sparc64's
-	flush_dcache_page and update_mmu_cache implementations for an example
-	of how to go about doing this.
+	This allows these interfaces to be implemented much more
+	efficiently.  It allows one to "defer" (perhaps indefinitely) the
+	actual flush if there are currently no user processes mapping this
+	page.  See sparc64's flush_dcache_page and update_mmu_cache_range
+	implementations for an example of how to go about doing this.
 
-	The idea is, first at flush_dcache_page() time, if page_file_mapping()
-	returns a mapping, and mapping_mapped on that mapping returns %false,
-	just mark the architecture private page flag bit.  Later, in
-	update_mmu_cache(), a check is made of this flag bit, and if set the
-	flush is done and the flag bit is cleared.
+	The idea is, first at flush_dcache_page() time, if
+	page_file_mapping() returns a mapping, and mapping_mapped on that
+	mapping returns %false, just mark the architecture private page
+	flag bit.  Later, in update_mmu_cache_range(), a check is made
+	of this flag bit, and if set the flush is done and the flag bit
+	is cleared.
 
 	.. important::
 
@@ -369,7 +370,7 @@ maps this page at its virtual address.
   ``void flush_icache_page(struct vm_area_struct *vma, struct page *page)``
 
 	All the functionality of flush_icache_page can be implemented in
-	flush_dcache_page and update_mmu_cache. In the future, the hope
+	flush_dcache_page and update_mmu_cache_range. In the future, the hope
 	is to remove this interface completely.
 
 The final category of APIs is for I/O to deliberately aliased address
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index f46258d1a080..09d51a680765 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -78,6 +78,11 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
 #endif
 
 #ifndef flush_icache_page
+static inline void flush_icache_pages(struct vm_area_struct *vma,
+				     struct page *page, unsigned int nr)
+{
+}
+
 static inline void flush_icache_page(struct vm_area_struct *vma,
 				     struct page *page)
 {
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 3/7] mm: Add folio_flush_mapping()
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 1/7] mm: Convert page_table_check_pte_set() to page_table_check_ptes_set() Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 2/7] mm: Add generic flush_icache_pages() and documentation Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-11  3:39 ` [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO Matthew Wilcox (Oracle)
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

This is the folio equivalent of page_mapping_file(), but rename it
to make it clear that it's very different from page_file_mapping().
Theoretically, there's nothing flush-only about it, but there are no
other users today, and I doubt there will be; it's almost always more
useful to know the swapfile's mapping or the swapcache's mapping.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagemap.h | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 51b75b89730e..647c5a036a97 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -369,6 +369,26 @@ static inline struct address_space *folio_file_mapping(struct folio *folio)
 	return folio->mapping;
 }
 
+/**
+ * folio_flush_mapping - Find the file mapping this folio belongs to.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to.  Anonymous folios return NULL, even if they're in
+ * the swap cache.  Other kinds of folio also return NULL.
+ *
+ * This is ONLY used by architecture cache flushing code.  If you aren't
+ * writing cache flushing code, you want either folio_mapping() or
+ * folio_file_mapping().
+ */
+static inline struct address_space *folio_flush_mapping(struct folio *folio)
+{
+	if (unlikely(folio_test_swapcache(folio)))
+		return swapcache_mapping(folio);
+
+	return folio->mapping;
+}
+
 static inline struct address_space *page_file_mapping(struct page *page)
 {
 	return folio_file_mapping(page_folio(page));
@@ -379,11 +399,7 @@ static inline struct address_space *page_file_mapping(struct page *page)
  */
 static inline struct address_space *page_mapping_file(struct page *page)
 {
-	struct folio *folio = page_folio(page);
-
-	if (unlikely(folio_test_swapcache(folio)))
-		return NULL;
-	return folio_mapping(folio);
+	return folio_flush_mapping(page_folio(page));
 }
 
 /**
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2023-02-11  3:39 ` [PATCH 3/7] mm: Add folio_flush_mapping() Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-12 15:51   ` Mike Rapoport
  2023-02-11  3:39 ` [PATCH 5/7] alpha: Implement the new page table range API Matthew Wilcox (Oracle)
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

Current best practice is to reuse the name of the function as a define
to indicate that the function is implemented by the architecture.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/cacheflush.h | 4 ++--
 mm/util.c                  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
index a6189d21f2ba..82136f3fcf54 100644
--- a/include/linux/cacheflush.h
+++ b/include/linux/cacheflush.h
@@ -7,14 +7,14 @@
 struct folio;
 
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+#ifndef flush_dcache_folio
 void flush_dcache_folio(struct folio *folio);
 #endif
 #else
 static inline void flush_dcache_folio(struct folio *folio)
 {
 }
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
+#define flush_dcache_folio flush_dcache_folio
 #endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
 
 #endif /* _LINUX_CACHEFLUSH_H */
diff --git a/mm/util.c b/mm/util.c
index cec9327b27b4..39ea7af8171c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1124,7 +1124,7 @@ void page_offline_end(void)
 }
 EXPORT_SYMBOL(page_offline_end);
 
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+#ifndef flush_dcache_folio
 void flush_dcache_folio(struct folio *folio)
 {
 	long i, nr = folio_nr_pages(folio);
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 5/7] alpha: Implement the new page table range API
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2023-02-11  3:39 ` [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-13  3:15   ` Yin, Fengwei
  2023-02-11  3:39 ` [PATCH 6/7] arc: " Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range() and flush_icache_pages().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/alpha/include/asm/cacheflush.h | 10 ++++++++++
 arch/alpha/include/asm/pgtable.h    | 18 +++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h
index 9945ff483eaf..3956460e69e2 100644
--- a/arch/alpha/include/asm/cacheflush.h
+++ b/arch/alpha/include/asm/cacheflush.h
@@ -57,6 +57,16 @@ extern void flush_icache_user_page(struct vm_area_struct *vma,
 #define flush_icache_page(vma, page) \
 	flush_icache_user_page((vma), (page), 0, 0)
 
+/*
+ * Both implementations of flush_icache_user_page flush the entire
+ * address space, so one call, no matter how many pages.
+ */
+static inline void flush_icache_pages(struct vm_area_struct *vma,
+		struct page *page, unsigned int nr)
+{
+	flush_icache_user_page(vma, page, 0, 0);
+}
+
 #include <asm-generic/cacheflush.h>
 
 #endif /* _ALPHA_CACHEFLUSH_H */
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index ba43cb841d19..1e3354e9731b 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -26,7 +26,18 @@ struct vm_area_struct;
  * hook is made available.
  */
 #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += 1UL << 32;
+	}
+}
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 /* PMD_SHIFT determines the size of the area a second-level page table can map */
 #define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3))
@@ -303,6 +314,11 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
 {
 }
 
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
+{
+}
+
 /*
  * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
  * are !pte_none() && !pte_present().
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 6/7] arc: Implement the new page table range API
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
                   ` (4 preceding siblings ...)
  2023-02-11  3:39 ` [PATCH 5/7] alpha: Implement the new page table range API Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-13  3:09   ` Yin, Fengwei
  2023-02-11  3:39 ` [PATCH 7/7] x86: " Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range(), flush_dcache_folio()
and flush_icache_pages().

This is a fairly deep change.  The PG_dc_clean flag changes from being a
per-page bit to being a per-folio bit (which means it cannot always be set
as we don't know that all pages in this folio were cleaned).  The internal
flush routines are enhanced to take the number of pages to flush.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/arc/include/asm/cacheflush.h         |  7 ++-
 arch/arc/include/asm/pgtable-bits-arcv2.h | 20 ++++++--
 arch/arc/mm/cache.c                       | 61 ++++++++++++++---------
 arch/arc/mm/tlb.c                         | 18 ++++---
 4 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index e201b4b1655a..04f65f588510 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -25,17 +25,20 @@
  * in update_mmu_cache()
  */
 #define flush_icache_page(vma, page)
+#define flush_icache_pages(vma, page, nr)
 
 void flush_cache_all(void);
 
 void flush_icache_range(unsigned long kstart, unsigned long kend);
 void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len);
-void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr);
-void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
+void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr);
+void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
 void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
+#define flush_dcache_folio flush_dcache_folio
 
 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
 void dma_cache_inv(phys_addr_t start, unsigned long sz);
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index 6e9f8ca6d6a1..4a1b2ce204c6 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -100,14 +100,24 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
 {
-	set_pte(ptep, pteval);
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += PAGE_SIZE;
+	}
 }
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep);
+void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep, unsigned int nr);
+
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 
 /*
  * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 55c6de138eae..3c16ee942a5c 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -752,17 +752,17 @@ static inline void arc_slc_enable(void)
  * There's a corollary case, where kernel READs from a userspace mapped page.
  * If the U-mapping is not congruent to K-mapping, former needs flushing.
  */
-void flush_dcache_page(struct page *page)
+void flush_dcache_folio(struct folio *folio)
 {
 	struct address_space *mapping;
 
 	if (!cache_is_vipt_aliasing()) {
-		clear_bit(PG_dc_clean, &page->flags);
+		clear_bit(PG_dc_clean, &folio->flags);
 		return;
 	}
 
 	/* don't handle anon pages here */
-	mapping = page_mapping_file(page);
+	mapping = folio_flush_mapping(folio);
 	if (!mapping)
 		return;
 
@@ -771,17 +771,27 @@ void flush_dcache_page(struct page *page)
 	 * Make a note that K-mapping is dirty
 	 */
 	if (!mapping_mapped(mapping)) {
-		clear_bit(PG_dc_clean, &page->flags);
-	} else if (page_mapcount(page)) {
-
+		clear_bit(PG_dc_clean, &folio->flags);
+	} else if (folio_mapped(folio)) {
 		/* kernel reading from page with U-mapping */
-		phys_addr_t paddr = (unsigned long)page_address(page);
-		unsigned long vaddr = page->index << PAGE_SHIFT;
+		phys_addr_t paddr = (unsigned long)folio_address(folio);
+		unsigned long vaddr = folio_pos(folio);
 
+		/*
+		 * vaddr is not actually the virtual address, but is
+		 * congruent to every user mapping.
+		 */
 		if (addr_not_cache_congruent(paddr, vaddr))
-			__flush_dcache_page(paddr, vaddr);
+			__flush_dcache_pages(paddr, vaddr,
+						folio_nr_pages(folio));
 	}
 }
+EXPORT_SYMBOL(flush_dcache_folio);
+
+void flush_dcache_page(struct page *page)
+{
+	return flush_dcache_folio(page_folio(page));
+}
 EXPORT_SYMBOL(flush_dcache_page);
 
 /*
@@ -921,18 +931,18 @@ void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len)
 }
 
 /* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
+void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr)
 {
-	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+	__ic_line_inv_vaddr(paddr, vaddr, nr * PAGE_SIZE);
 }
 
 /*
  * wrapper to clearout kernel or userspace mappings of a page
  * For kernel mappings @vaddr == @paddr
  */
-void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
+void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr)
 {
-	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
+	__dc_line_op(paddr, vaddr & PAGE_MASK, nr * PAGE_SIZE, OP_FLUSH_N_INV);
 }
 
 noinline void flush_cache_all(void)
@@ -962,10 +972,10 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 
 	u_vaddr &= PAGE_MASK;
 
-	__flush_dcache_page(paddr, u_vaddr);
+	__flush_dcache_pages(paddr, u_vaddr, 1);
 
 	if (vma->vm_flags & VM_EXEC)
-		__inv_icache_page(paddr, u_vaddr);
+		__inv_icache_pages(paddr, u_vaddr, 1);
 }
 
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
@@ -978,9 +988,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 		     unsigned long u_vaddr)
 {
 	/* TBD: do we really need to clear the kernel mapping */
-	__flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
-	__flush_dcache_page((phys_addr_t)page_address(page),
-			    (phys_addr_t)page_address(page));
+	__flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1);
+	__flush_dcache_pages((phys_addr_t)page_address(page),
+			    (phys_addr_t)page_address(page), 1);
 
 }
 
@@ -989,6 +999,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long u_vaddr, struct vm_area_struct *vma)
 {
+	struct folio *src = page_folio(from);
+	struct folio *dst = page_folio(to);
 	void *kfrom = kmap_atomic(from);
 	void *kto = kmap_atomic(to);
 	int clean_src_k_mappings = 0;
@@ -1005,7 +1017,7 @@ void copy_user_highpage(struct page *to, struct page *from,
 	 * addr_not_cache_congruent() is 0
 	 */
 	if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
-		__flush_dcache_page((unsigned long)kfrom, u_vaddr);
+		__flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1);
 		clean_src_k_mappings = 1;
 	}
 
@@ -1019,17 +1031,17 @@ void copy_user_highpage(struct page *to, struct page *from,
 	 * non copied user pages (e.g. read faults which wire in pagecache page
 	 * directly).
 	 */
-	clear_bit(PG_dc_clean, &to->flags);
+	clear_bit(PG_dc_clean, &dst->flags);
 
 	/*
 	 * if SRC was already usermapped and non-congruent to kernel mapping
 	 * sync the kernel mapping back to physical page
 	 */
 	if (clean_src_k_mappings) {
-		__flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom);
-		set_bit(PG_dc_clean, &from->flags);
+		__flush_dcache_pages((unsigned long)kfrom,
+					(unsigned long)kfrom, 1);
 	} else {
-		clear_bit(PG_dc_clean, &from->flags);
+		clear_bit(PG_dc_clean, &src->flags);
 	}
 
 	kunmap_atomic(kto);
@@ -1038,8 +1050,9 @@ void copy_user_highpage(struct page *to, struct page *from,
 
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	clear_page(to);
-	clear_bit(PG_dc_clean, &page->flags);
+	clear_bit(PG_dc_clean, &folio->flags);
 }
 EXPORT_SYMBOL(clear_user_page);
 
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 5f71445f26bd..0a996b65bb4e 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -467,8 +467,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
  * Note that flush (when done) involves both WBACK - so physical page is
  * in sync as well as INV - so any non-congruent aliases don't remain
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
-		      pte_t *ptep)
+void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long vaddr_unaligned, pte_t *ptep, unsigned int nr)
 {
 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
 	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
@@ -491,15 +491,19 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 	 */
 	if ((vma->vm_flags & VM_EXEC) ||
 	     addr_not_cache_congruent(paddr, vaddr)) {
-
-		int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
+		struct folio *folio = page_folio(page);
+		int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags);
 		if (dirty) {
+			unsigned long offset = offset_in_folio(folio, paddr);
+			nr = folio_nr_pages(folio);
+			paddr -= offset;
+			vaddr -= offset;
 			/* wback + inv dcache lines (K-mapping) */
-			__flush_dcache_page(paddr, paddr);
+			__flush_dcache_pages(paddr, paddr, nr);
 
 			/* invalidate any existing icache lines (U-mapping) */
 			if (vma->vm_flags & VM_EXEC)
-				__inv_icache_page(paddr, vaddr);
+				__inv_icache_pages(paddr, vaddr, nr);
 		}
 	}
 }
@@ -531,7 +535,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd)
 {
 	pte_t pte = __pte(pmd_val(*pmd));
-	update_mmu_cache(vma, addr, &pte);
+	update_mmu_cache_range(vma, addr, &pte, HPAGE_PMD_NR);
 }
 
 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 7/7] x86: Implement the new page table range API
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
                   ` (5 preceding siblings ...)
  2023-02-11  3:39 ` [PATCH 6/7] arc: " Matthew Wilcox (Oracle)
@ 2023-02-11  3:39 ` Matthew Wilcox (Oracle)
  2023-02-13 21:04 ` [PATCH 8/7] arm: " Matthew Wilcox (Oracle)
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
  8 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-11  3:39 UTC (permalink / raw)
  To: linux-mm, linux-arch; +Cc: Matthew Wilcox (Oracle)

Convert set_pte_at() into set_ptes() and add a noop
update_mmu_cache_range().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/x86/include/asm/pgtable.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 84be3e07b112..f424371ea143 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1019,13 +1019,22 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
 	return res;
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pte)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte, unsigned int nr)
 {
-	page_table_check_ptes_set(mm, addr, ptep, pte, 1);
-	set_pte(ptep, pte);
+	page_table_check_ptes_set(mm, addr, ptep, pte, nr);
+
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte = __pte(pte_val(pte) + PAGE_SIZE);
+	}
 }
 
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t pmd)
 {
@@ -1291,6 +1300,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep)
 {
 }
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+}
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmd)
 {
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
  2023-02-11  3:39 ` [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO Matthew Wilcox (Oracle)
@ 2023-02-12 15:51   ` Mike Rapoport
  2023-02-12 23:59     ` Matthew Wilcox
  0 siblings, 1 reply; 37+ messages in thread
From: Mike Rapoport @ 2023-02-12 15:51 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-mm, linux-arch

On Sat, Feb 11, 2023 at 03:39:45AM +0000, Matthew Wilcox (Oracle) wrote:
> Current best practice is to reuse the name of the function as a define
> to indicate that the function is implemented by the architecture.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/cacheflush.h | 4 ++--
>  mm/util.c                  | 2 +-

I'd expect a change in arch/ that removes
ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO and adds #define flush_dcache_folio

What am I missing?

>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
> index a6189d21f2ba..82136f3fcf54 100644
> --- a/include/linux/cacheflush.h
> +++ b/include/linux/cacheflush.h
> @@ -7,14 +7,14 @@
>  struct folio;
>  
>  #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
> -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
> +#ifndef flush_dcache_folio
>  void flush_dcache_folio(struct folio *folio);
>  #endif
>  #else
>  static inline void flush_dcache_folio(struct folio *folio)
>  {
>  }
> -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
> +#define flush_dcache_folio flush_dcache_folio
>  #endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
>  
>  #endif /* _LINUX_CACHEFLUSH_H */
> diff --git a/mm/util.c b/mm/util.c
> index cec9327b27b4..39ea7af8171c 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -1124,7 +1124,7 @@ void page_offline_end(void)
>  }
>  EXPORT_SYMBOL(page_offline_end);
>  
> -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
> +#ifndef flush_dcache_folio
>  void flush_dcache_folio(struct folio *folio)
>  {
>  	long i, nr = folio_nr_pages(folio);
> -- 
> 2.39.1
> 
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
  2023-02-12 15:51   ` Mike Rapoport
@ 2023-02-12 23:59     ` Matthew Wilcox
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox @ 2023-02-12 23:59 UTC (permalink / raw)
  To: Mike Rapoport; +Cc: linux-mm, linux-arch

On Sun, Feb 12, 2023 at 05:51:18PM +0200, Mike Rapoport wrote:
> On Sat, Feb 11, 2023 at 03:39:45AM +0000, Matthew Wilcox (Oracle) wrote:
> > Current best practice is to reuse the name of the function as a define
> > to indicate that the function is implemented by the architecture.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  include/linux/cacheflush.h | 4 ++--
> >  mm/util.c                  | 2 +-
> 
> I'd expect a change in arch/ that removes
> ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO and adds #define flush_dcache_folio
> 
> What am I missing?

That not a single architecture got round to implementing
flush_dcache_folio() yet ;-)

$ git grep ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO arch
(nothing)


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 6/7] arc: Implement the new page table range API
  2023-02-11  3:39 ` [PATCH 6/7] arc: " Matthew Wilcox (Oracle)
@ 2023-02-13  3:09   ` Yin, Fengwei
  2023-02-13 15:16     ` Matthew Wilcox
  0 siblings, 1 reply; 37+ messages in thread
From: Yin, Fengwei @ 2023-02-13  3:09 UTC (permalink / raw)
  To: linux-mm@kvack.org, willy@infradead.org,
	linux-arch@vger.kernel.org

On Sat, 2023-02-11 at 03:39 +0000, Matthew Wilcox (Oracle) wrote:
> Add set_ptes(), update_mmu_cache_range(), flush_dcache_folio()
> and flush_icache_pages().
> 
> This is a fairly deep change.  The PG_dc_clean flag changes from
> being a
> per-page bit to being a per-folio bit (which means it cannot always
> be set
> as we don't know that all pages in this folio were cleaned).  The
> internal
> flush routines are enhanced to take the number of pages to flush.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  arch/arc/include/asm/cacheflush.h         |  7 ++-
>  arch/arc/include/asm/pgtable-bits-arcv2.h | 20 ++++++--
>  arch/arc/mm/cache.c                       | 61 ++++++++++++++-------
> --
>  arch/arc/mm/tlb.c                         | 18 ++++---
>  4 files changed, 68 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/arc/include/asm/cacheflush.h
> b/arch/arc/include/asm/cacheflush.h
> index e201b4b1655a..04f65f588510 100644
> --- a/arch/arc/include/asm/cacheflush.h
> +++ b/arch/arc/include/asm/cacheflush.h
> @@ -25,17 +25,20 @@
>   * in update_mmu_cache()
>   */
>  #define flush_icache_page(vma, page)
> +#define flush_icache_pages(vma, page, nr)
Maybe just remove these two definitions because general
implementation is just no-op?

>  
>  void flush_cache_all(void);
>  
>  void flush_icache_range(unsigned long kstart, unsigned long kend);
>  void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr,
> int len);
> -void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr);
> -void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
> +void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr,
> unsigned nr);
> +void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr,
> unsigned nr);
>  
>  #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>  
>  void flush_dcache_page(struct page *page);
> +void flush_dcache_folio(struct folio *folio);
> +#define flush_dcache_folio flush_dcache_folio
>  
>  void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
>  void dma_cache_inv(phys_addr_t start, unsigned long sz);
> diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h
> b/arch/arc/include/asm/pgtable-bits-arcv2.h
> index 6e9f8ca6d6a1..4a1b2ce204c6 100644
> --- a/arch/arc/include/asm/pgtable-bits-arcv2.h
> +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
> @@ -100,14 +100,24 @@ static inline pte_t pte_modify(pte_t pte,
> pgprot_t newprot)
>         return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
> pgprot_val(newprot));
>  }
>  
> -static inline void set_pte_at(struct mm_struct *mm, unsigned long
> addr,
> -                             pte_t *ptep, pte_t pteval)
> +static inline void set_ptes(struct mm_struct *mm, unsigned long
> addr,
> +               pte_t *ptep, pte_t pte, unsigned int nr)
>  {
> -       set_pte(ptep, pteval);
> +       for (;;) {
> +               set_pte(ptep, pte);
> +               if (--nr == 0)
> +                       break;
> +               ptep++;
> +               pte_val(pte) += PAGE_SIZE;
> +       }
>  }
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
> pte, 1)
>  
> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long
> address,
> -                     pte_t *ptep);
> +void update_mmu_cache_range(struct vm_area_struct *vma, unsigned
> long address,
> +                     pte_t *ptep, unsigned int nr);
> +
> +#define update_mmu_cache(vma, addr, ptep) \
> +       update_mmu_cache_range(vma, addr, ptep, 1)
>  
>  /*
>   * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs
> that
> diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
> index 55c6de138eae..3c16ee942a5c 100644
> --- a/arch/arc/mm/cache.c
> +++ b/arch/arc/mm/cache.c
> @@ -752,17 +752,17 @@ static inline void arc_slc_enable(void)
>   * There's a corollary case, where kernel READs from a userspace
> mapped page.
>   * If the U-mapping is not congruent to K-mapping, former needs
> flushing.
>   */
> -void flush_dcache_page(struct page *page)
> +void flush_dcache_folio(struct folio *folio)
>  {
>         struct address_space *mapping;
>  
>         if (!cache_is_vipt_aliasing()) {
> -               clear_bit(PG_dc_clean, &page->flags);
> +               clear_bit(PG_dc_clean, &folio->flags);
>                 return;
>         }
>  
>         /* don't handle anon pages here */
> -       mapping = page_mapping_file(page);
> +       mapping = folio_flush_mapping(folio);
>         if (!mapping)
>                 return;
>  
> @@ -771,17 +771,27 @@ void flush_dcache_page(struct page *page)
>          * Make a note that K-mapping is dirty
>          */
>         if (!mapping_mapped(mapping)) {
> -               clear_bit(PG_dc_clean, &page->flags);
> -       } else if (page_mapcount(page)) {
> -
> +               clear_bit(PG_dc_clean, &folio->flags);
> +       } else if (folio_mapped(folio)) {
>                 /* kernel reading from page with U-mapping */
> -               phys_addr_t paddr = (unsigned
> long)page_address(page);
> -               unsigned long vaddr = page->index << PAGE_SHIFT;
> +               phys_addr_t paddr = (unsigned
> long)folio_address(folio);
> +               unsigned long vaddr = folio_pos(folio);
>  
> +               /*
> +                * vaddr is not actually the virtual address, but is
> +                * congruent to every user mapping.
> +                */
>                 if (addr_not_cache_congruent(paddr, vaddr))
> -                       __flush_dcache_page(paddr, vaddr);
> +                       __flush_dcache_pages(paddr, vaddr,
> +                                               folio_nr_pages(folio)
> );
>         }
>  }
> +EXPORT_SYMBOL(flush_dcache_folio);
> +
> +void flush_dcache_page(struct page *page)
> +{
> +       return flush_dcache_folio(page_folio(page));
> +}
I am wondering whether we should add flush_dcache_folio_range()
because it's possible just part of folio needs be flush. Thanks.


Regards
Yin, Fengwei

>  EXPORT_SYMBOL(flush_dcache_page);
>  
>  /*
> @@ -921,18 +931,18 @@ void __sync_icache_dcache(phys_addr_t paddr,
> unsigned long vaddr, int len)
>  }
>  
>  /* wrapper to compile time eliminate alignment checks in flush loop
> */
> -void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
> +void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr,
> unsigned nr)
>  {
> -       __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
> +       __ic_line_inv_vaddr(paddr, vaddr, nr * PAGE_SIZE);
>  }
>  
>  /*
>   * wrapper to clearout kernel or userspace mappings of a page
>   * For kernel mappings @vaddr == @paddr
>   */
> -void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
> +void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr,
> unsigned nr)
>  {
> -       __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE,
> OP_FLUSH_N_INV);
> +       __dc_line_op(paddr, vaddr & PAGE_MASK, nr * PAGE_SIZE,
> OP_FLUSH_N_INV);
>  }
>  
>  noinline void flush_cache_all(void)
> @@ -962,10 +972,10 @@ void flush_cache_page(struct vm_area_struct
> *vma, unsigned long u_vaddr,
>  
>         u_vaddr &= PAGE_MASK;
>  
> -       __flush_dcache_page(paddr, u_vaddr);
> +       __flush_dcache_pages(paddr, u_vaddr, 1);
>  
>         if (vma->vm_flags & VM_EXEC)
> -               __inv_icache_page(paddr, u_vaddr);
> +               __inv_icache_pages(paddr, u_vaddr, 1);
>  }
>  
>  void flush_cache_range(struct vm_area_struct *vma, unsigned long
> start,
> @@ -978,9 +988,9 @@ void flush_anon_page(struct vm_area_struct *vma,
> struct page *page,
>                      unsigned long u_vaddr)
>  {
>         /* TBD: do we really need to clear the kernel mapping */
> -       __flush_dcache_page((phys_addr_t)page_address(page),
> u_vaddr);
> -       __flush_dcache_page((phys_addr_t)page_address(page),
> -                           (phys_addr_t)page_address(page));
> +       __flush_dcache_pages((phys_addr_t)page_address(page),
> u_vaddr, 1);
> +       __flush_dcache_pages((phys_addr_t)page_address(page),
> +                           (phys_addr_t)page_address(page), 1);
>  
>  }
>  
> @@ -989,6 +999,8 @@ void flush_anon_page(struct vm_area_struct *vma,
> struct page *page,
>  void copy_user_highpage(struct page *to, struct page *from,
>         unsigned long u_vaddr, struct vm_area_struct *vma)
>  {
> +       struct folio *src = page_folio(from);
> +       struct folio *dst = page_folio(to);
>         void *kfrom = kmap_atomic(from);
>         void *kto = kmap_atomic(to);
>         int clean_src_k_mappings = 0;
> @@ -1005,7 +1017,7 @@ void copy_user_highpage(struct page *to, struct
> page *from,
>          * addr_not_cache_congruent() is 0
>          */
>         if (page_mapcount(from) && addr_not_cache_congruent(kfrom,
> u_vaddr)) {
> -               __flush_dcache_page((unsigned long)kfrom, u_vaddr);
> +               __flush_dcache_pages((unsigned long)kfrom, u_vaddr,
> 1);
>                 clean_src_k_mappings = 1;
>         }
>  
> @@ -1019,17 +1031,17 @@ void copy_user_highpage(struct page *to,
> struct page *from,
>          * non copied user pages (e.g. read faults which wire in
> pagecache page
>          * directly).
>          */
> -       clear_bit(PG_dc_clean, &to->flags);
> +       clear_bit(PG_dc_clean, &dst->flags);
>  
>         /*
>          * if SRC was already usermapped and non-congruent to kernel
> mapping
>          * sync the kernel mapping back to physical page
>          */
>         if (clean_src_k_mappings) {
> -               __flush_dcache_page((unsigned long)kfrom, (unsigned
> long)kfrom);
> -               set_bit(PG_dc_clean, &from->flags);
> +               __flush_dcache_pages((unsigned long)kfrom,
> +                                       (unsigned long)kfrom, 1);
>         } else {
> -               clear_bit(PG_dc_clean, &from->flags);
> +               clear_bit(PG_dc_clean, &src->flags);
>         }
>  
>         kunmap_atomic(kto);
> @@ -1038,8 +1050,9 @@ void copy_user_highpage(struct page *to, struct
> page *from,
>  
>  void clear_user_page(void *to, unsigned long u_vaddr, struct page
> *page)
>  {
> +       struct folio *folio = page_folio(page);
>         clear_page(to);
> -       clear_bit(PG_dc_clean, &page->flags);
> +       clear_bit(PG_dc_clean, &folio->flags);
>  }
>  EXPORT_SYMBOL(clear_user_page);
>  
> diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
> index 5f71445f26bd..0a996b65bb4e 100644
> --- a/arch/arc/mm/tlb.c
> +++ b/arch/arc/mm/tlb.c
> @@ -467,8 +467,8 @@ void create_tlb(struct vm_area_struct *vma,
> unsigned long vaddr, pte_t *ptep)
>   * Note that flush (when done) involves both WBACK - so physical
> page is
>   * in sync as well as INV - so any non-congruent aliases don't
> remain
>   */
> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long
> vaddr_unaligned,
> -                     pte_t *ptep)
> +void update_mmu_cache_range(struct vm_area_struct *vma,
> +               unsigned long vaddr_unaligned, pte_t *ptep, unsigned
> int nr)
>  {
>         unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
>         phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
> @@ -491,15 +491,19 @@ void update_mmu_cache(struct vm_area_struct
> *vma, unsigned long vaddr_unaligned,
>          */
>         if ((vma->vm_flags & VM_EXEC) ||
>              addr_not_cache_congruent(paddr, vaddr)) {
> -
> -               int dirty = !test_and_set_bit(PG_dc_clean, &page-
> >flags);
> +               struct folio *folio = page_folio(page);
> +               int dirty = !test_and_set_bit(PG_dc_clean, &folio-
> >flags);
>                 if (dirty) {
> +                       unsigned long offset = offset_in_folio(folio,
> paddr);
> +                       nr = folio_nr_pages(folio);
> +                       paddr -= offset;
> +                       vaddr -= offset;
>                         /* wback + inv dcache lines (K-mapping) */
> -                       __flush_dcache_page(paddr, paddr);
> +                       __flush_dcache_pages(paddr, paddr, nr);
>  
>                         /* invalidate any existing icache lines (U-
> mapping) */
>                         if (vma->vm_flags & VM_EXEC)
> -                               __inv_icache_page(paddr, vaddr);
> +                               __inv_icache_pages(paddr, vaddr, nr);
>                 }
>         }
>  }
> @@ -531,7 +535,7 @@ void update_mmu_cache_pmd(struct vm_area_struct
> *vma, unsigned long addr,
>                                  pmd_t *pmd)
>  {
>         pte_t pte = __pte(pmd_val(*pmd));
> -       update_mmu_cache(vma, addr, &pte);
> +       update_mmu_cache_range(vma, addr, &pte, HPAGE_PMD_NR);
>  }
>  
>  void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned
> long start,


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 5/7] alpha: Implement the new page table range API
  2023-02-11  3:39 ` [PATCH 5/7] alpha: Implement the new page table range API Matthew Wilcox (Oracle)
@ 2023-02-13  3:15   ` Yin, Fengwei
  0 siblings, 0 replies; 37+ messages in thread
From: Yin, Fengwei @ 2023-02-13  3:15 UTC (permalink / raw)
  To: linux-mm@kvack.org, willy@infradead.org,
	linux-arch@vger.kernel.org

On Sat, 2023-02-11 at 03:39 +0000, Matthew Wilcox (Oracle) wrote:
> Add set_ptes(), update_mmu_cache_range() and flush_icache_pages().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  arch/alpha/include/asm/cacheflush.h | 10 ++++++++++
>  arch/alpha/include/asm/pgtable.h    | 18 +++++++++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/alpha/include/asm/cacheflush.h
> b/arch/alpha/include/asm/cacheflush.h
> index 9945ff483eaf..3956460e69e2 100644
> --- a/arch/alpha/include/asm/cacheflush.h
> +++ b/arch/alpha/include/asm/cacheflush.h
> @@ -57,6 +57,16 @@ extern void flush_icache_user_page(struct
> vm_area_struct *vma,
>  #define flush_icache_page(vma, page) \
>         flush_icache_user_page((vma), (page), 0, 0)
Not related with this patch or ask for change. Just a question of mine.

So is it nore efficient to implement the flush_icache_page(s) as no-op.
and do the real flush in update_mmu_cache()?


Regards
Yin, Fengwei

>  
> +/*
> + * Both implementations of flush_icache_user_page flush the entire
> + * address space, so one call, no matter how many pages.
> + */
> +static inline void flush_icache_pages(struct vm_area_struct *vma,
> +               struct page *page, unsigned int nr)
> +{
> +       flush_icache_user_page(vma, page, 0, 0);
> +}
> +
>  #include <asm-generic/cacheflush.h>
>  
>  #endif /* _ALPHA_CACHEFLUSH_H */
> diff --git a/arch/alpha/include/asm/pgtable.h
> b/arch/alpha/include/asm/pgtable.h
> index ba43cb841d19..1e3354e9731b 100644
> --- a/arch/alpha/include/asm/pgtable.h
> +++ b/arch/alpha/include/asm/pgtable.h
> @@ -26,7 +26,18 @@ struct vm_area_struct;
>   * hook is made available.
>   */
>  #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
> -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
> +static inline void set_ptes(struct mm_struct *mm, unsigned long
> addr,
> +               pte_t *ptep, pte_t pte, unsigned int nr)
> +{
> +       for (;;) {
> +               set_pte(ptep, pte);
> +               if (--nr == 0)
> +                       break;
> +               ptep++;
> +               pte_val(pte) += 1UL << 32;
> +       }
> +}
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
> pte, 1)
>  
>  /* PMD_SHIFT determines the size of the area a second-level page
> table can map */
>  #define PMD_SHIFT      (PAGE_SHIFT + (PAGE_SHIFT-3))
> @@ -303,6 +314,11 @@ extern inline void update_mmu_cache(struct
> vm_area_struct * vma,
>  {
>  }
>  
> +static inline void update_mmu_cache_range(struct vm_area_struct
> *vma,
> +               unsigned long address, pte_t *ptep, unsigned int nr)
> +{
> +}
> +
>  /*
>   * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs
> that
>   * are !pte_none() && !pte_present().


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 6/7] arc: Implement the new page table range API
  2023-02-13  3:09   ` Yin, Fengwei
@ 2023-02-13 15:16     ` Matthew Wilcox
  2023-02-14  6:32       ` Yin, Fengwei
  0 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox @ 2023-02-13 15:16 UTC (permalink / raw)
  To: Yin, Fengwei; +Cc: linux-mm@kvack.org, linux-arch@vger.kernel.org

On Mon, Feb 13, 2023 at 03:09:37AM +0000, Yin, Fengwei wrote:
> > +++ b/arch/arc/include/asm/cacheflush.h
> > @@ -25,17 +25,20 @@
> >   * in update_mmu_cache()
> >   */
> >  #define flush_icache_page(vma, page)
> > +#define flush_icache_pages(vma, page, nr)
> Maybe just remove these two definitions because general
> implementation is just no-op?

Then arc would have to include asm-generic/cacheflush.h and I don't
particularly want to debug any issues that might cause.  This is
easier.

Long term, asm-generic/cacheflush.h's contents should be moved into
linux/cacheflush.h, but I've lacked the time to do that work.

To answer your question from the other email, the documentation says:

  ``void flush_icache_page(struct vm_area_struct *vma, struct page *page)``

        All the functionality of flush_icache_page can be implemented in
        flush_dcache_page and update_mmu_cache_range. In the future, the hope
        is to remove this interface completely.

I'm not planning on doing that to an architecture that I'm not set up
to test ...

> > +void flush_dcache_page(struct page *page)
> > +{
> > +       return flush_dcache_folio(page_folio(page));
> > +}
> I am wondering whether we should add flush_dcache_folio_range()
> because it's possible just part of folio needs be flush. Thanks.

We could.  I think it's up to the maintainers of architectures that
need their caches flushing to let us know what would be good for them.
Since I primarily work on x86, I have no personal desire to do this ;-)

One of the things that I've always found a little weird about
flush_dcache_page() (and now flush_dcache_folio()) is that it's used both
for flushing userspace writes (eg in filemap_read()) and for flushing
kernel writes (eg in __iomap_write_end()).  Probably it was designed for
an architecture that flushes by physical address rather than by virtual.

Anyway, if we do have a flush_dcache_folio_kernel(), I'd like it
to take byte offsets.  That would work well for __iomap_write_end();
it could be:

	flush_dcache_folio_kernel(folio, offset_in_folio(folio, pos), len);

But I'm not volunteering to do this work.


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 8/7] arm: Implement the new page table range API
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
                   ` (6 preceding siblings ...)
  2023-02-11  3:39 ` [PATCH 7/7] x86: " Matthew Wilcox (Oracle)
@ 2023-02-13 21:04 ` Matthew Wilcox (Oracle)
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
  8 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-13 21:04 UTC (permalink / raw)
  To: linux-mm, linux-arm-kernel, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range(), flush_dcache_folio() and
flush_icache_pages().

The PG_dcache_clear flag changes from being a per-page bit to being a
per-folio bit, which makes __dma_page_dev_to_cpu() a bit more exciting.
It also makes sense to add flush_cache_pages(), even though this isn't
used by generic code (yet?)

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/arm/include/asm/cacheflush.h |  24 ++++---
 arch/arm/include/asm/pgtable.h    |   5 +-
 arch/arm/include/asm/tlbflush.h   |  13 ++--
 arch/arm/mm/copypage-v4mc.c       |   5 +-
 arch/arm/mm/copypage-v6.c         |   5 +-
 arch/arm/mm/copypage-xscale.c     |   5 +-
 arch/arm/mm/dma-mapping.c         |  24 +++----
 arch/arm/mm/fault-armv.c          |  14 ++---
 arch/arm/mm/flush.c               | 101 ++++++++++++++++++------------
 arch/arm/mm/mm.h                  |   2 +-
 arch/arm/mm/mmu.c                 |  14 +++--
 11 files changed, 127 insertions(+), 85 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index a094f964c869..841e268d2374 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -231,14 +231,15 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
 					vma->vm_flags);
 }
 
-static inline void
-vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
+static inline void vivt_flush_cache_pages(struct vm_area_struct *vma,
+		unsigned long user_addr, unsigned long pfn, unsigned int nr)
 {
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 		unsigned long addr = user_addr & PAGE_MASK;
-		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
+		__cpuc_flush_user_range(addr, addr + nr * PAGE_SIZE,
+				vma->vm_flags);
 	}
 }
 
@@ -247,15 +248,17 @@ vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig
 		vivt_flush_cache_mm(mm)
 #define flush_cache_range(vma,start,end) \
 		vivt_flush_cache_range(vma,start,end)
-#define flush_cache_page(vma,addr,pfn) \
-		vivt_flush_cache_page(vma,addr,pfn)
+#define flush_cache_pages(vma, addr, pfn, nr) \
+		vivt_flush_cache_pages(vma, addr, pfn, nr)
 #else
-extern void flush_cache_mm(struct mm_struct *mm);
-extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
-extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn);
+void flush_cache_mm(struct mm_struct *mm);
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+void flush_cache_pages(struct vm_area_struct *vma, unsigned long user_addr,
+		unsigned long pfn, unsigned int nr);
 #endif
 
 #define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+#define flush_cache_page(vma, addr, pfn) flush_cache_pages(vma, addr, pfn, 1)
 
 /*
  * flush_icache_user_range is used when we want to ensure that the
@@ -289,7 +292,9 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
  * See update_mmu_cache for the user space part.
  */
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *);
+void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *folio);
+#define flush_dcache_folio flush_dcache_folio
 
 #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
@@ -321,6 +326,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
  * duplicate cache flushing elsewhere performed by flush_dcache_page().
  */
 #define flush_icache_page(vma,page)	do { } while (0)
+#define flush_icache_pages(vma, page, nr)	do { } while (0)
 
 /*
  * flush_cache_vmap() is used when creating mappings (eg, via vmap,
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index a58ccbb406ad..6525ac82bd50 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -207,8 +207,9 @@ static inline void __sync_icache_dcache(pte_t pteval)
 extern void __sync_icache_dcache(pte_t pteval);
 #endif
 
-void set_pte_at(struct mm_struct *mm, unsigned long addr,
-		      pte_t *ptep, pte_t pteval);
+void set_ptes(struct mm_struct *mm, unsigned long addr,
+		      pte_t *ptep, pte_t pteval, unsigned int nr);
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
 {
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index 0ccc985b90af..7d792e485f4f 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -619,18 +619,21 @@ extern void flush_bp_all(void);
  * If PG_dcache_clean is not set for the page, we need to ensure that any
  * cache entries for the kernels virtual memory range are written
  * back to the page. On ARMv6 and later, the cache coherency is handled via
- * the set_pte_at() function.
+ * the set_ptes() function.
  */
 #if __LINUX_ARM_ARCH__ < 6
-extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-	pte_t *ptep);
+void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long addr,
+		pte_t *ptep, unsigned int nr);
 #else
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-				    unsigned long addr, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 }
 #endif
 
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
+
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 #endif
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index f1da3b439b96..7ddd82b9fe8b 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -64,10 +64,11 @@ static void mc_copy_user_page(void *from, void *to)
 void v4_mc_copy_user_highpage(struct page *to, struct page *from,
 	unsigned long vaddr, struct vm_area_struct *vma)
 {
+	struct folio *src = page_folio(from);
 	void *kto = kmap_atomic(to);
 
-	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-		__flush_dcache_page(page_mapping_file(from), from);
+	if (!test_and_set_bit(PG_dcache_clean, &src->flags))
+		__flush_dcache_folio(folio_flush_mapping(src), src);
 
 	raw_spin_lock(&minicache_lock);
 
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index d8a115de5507..a1a71f36d850 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -69,11 +69,12 @@ static void discard_old_kernel_data(void *kto)
 static void v6_copy_user_highpage_aliasing(struct page *to,
 	struct page *from, unsigned long vaddr, struct vm_area_struct *vma)
 {
+	struct folio *src = page_folio(from);
 	unsigned int offset = CACHE_COLOUR(vaddr);
 	unsigned long kfrom, kto;
 
-	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-		__flush_dcache_page(page_mapping_file(from), from);
+	if (!test_and_set_bit(PG_dcache_clean, &src->flags))
+		__flush_dcache_folio(folio_flush_mapping(src), src);
 
 	/* FIXME: not highmem safe */
 	discard_old_kernel_data(page_address(to));
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index bcb485620a05..f1e29d3e8193 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -84,10 +84,11 @@ static void mc_copy_user_page(void *from, void *to)
 void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
 	unsigned long vaddr, struct vm_area_struct *vma)
 {
+	struct folio *src = page_folio(from);
 	void *kto = kmap_atomic(to);
 
-	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-		__flush_dcache_page(page_mapping_file(from), from);
+	if (!test_and_set_bit(PG_dcache_clean, &src->flags))
+		__flush_dcache_folio(folio_flush_mapping(src), src);
 
 	raw_spin_lock(&minicache_lock);
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 8bc01071474a..5ecfde41d70a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -693,6 +693,7 @@ static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 	size_t size, enum dma_data_direction dir)
 {
+	struct folio *folio = page_folio(page);
 	phys_addr_t paddr = page_to_phys(page) + off;
 
 	/* FIXME: non-speculating: not required */
@@ -707,19 +708,18 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 	 * Mark the D-cache clean for these pages to avoid extra flushing.
 	 */
 	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
-		unsigned long pfn;
-		size_t left = size;
-
-		pfn = page_to_pfn(page) + off / PAGE_SIZE;
-		off %= PAGE_SIZE;
-		if (off) {
-			pfn++;
-			left -= PAGE_SIZE - off;
+		ssize_t left = size;
+		size_t offset = offset_in_folio(folio, paddr);
+
+		if (offset) {
+			left -= folio_size(folio) - offset;
+			folio = folio_next(folio);
 		}
-		while (left >= PAGE_SIZE) {
-			page = pfn_to_page(pfn++);
-			set_bit(PG_dcache_clean, &page->flags);
-			left -= PAGE_SIZE;
+
+		while (left >= (ssize_t)folio_size(folio)) {
+			set_bit(PG_dcache_clean, &folio->flags);
+			left -= folio_size(folio);
+			folio = folio_next(folio);
 		}
 	}
 }
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 0e49154454a6..e2c869b8f012 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -178,8 +178,8 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
  *
  * Note that the pte lock will be held.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-	pte_t *ptep)
+void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long addr,
+		pte_t *ptep, unsigned int nr)
 {
 	unsigned long pfn = pte_pfn(*ptep);
 	struct address_space *mapping;
@@ -192,13 +192,13 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 	 * The zero page is never written to, so never has any dirty
 	 * cache lines, and therefore never needs to be flushed.
 	 */
-	page = pfn_to_page(pfn);
-	if (page == ZERO_PAGE(0))
+	if (is_zero_pfn(pfn))
 		return;
 
-	mapping = page_mapping_file(page);
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-		__flush_dcache_page(mapping, page);
+	folio = page_folio(pfn_to_page(pfn));
+	mapping = folio_flush_mapping(page);
+	if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
+		__flush_dcache_folio(mapping, folio);
 	if (mapping) {
 		if (cache_is_vivt())
 			make_coherent(mapping, vma, addr, ptep, pfn);
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 7ff9feea13a6..b56a65626798 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -95,10 +95,10 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
 		__flush_icache_all();
 }
 
-void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
+void flush_cache_pages(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn, unsigned int nr)
 {
 	if (cache_is_vivt()) {
-		vivt_flush_cache_page(vma, user_addr, pfn);
+		vivt_flush_cache_pages(vma, user_addr, pfn, nr);
 		return;
 	}
 
@@ -196,29 +196,31 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 #endif
 }
 
-void __flush_dcache_page(struct address_space *mapping, struct page *page)
+void __flush_dcache_folio(struct address_space *mapping, struct folio *folio)
 {
 	/*
 	 * Writeback any data associated with the kernel mapping of this
 	 * page.  This ensures that data in the physical page is mutually
 	 * coherent with the kernels mapping.
 	 */
-	if (!PageHighMem(page)) {
-		__cpuc_flush_dcache_area(page_address(page), page_size(page));
+	if (!folio_test_highmem(folio)) {
+		__cpuc_flush_dcache_area(folio_address(folio),
+					folio_size(folio));
 	} else {
 		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
-			for (i = 0; i < compound_nr(page); i++) {
-				void *addr = kmap_atomic(page + i);
+			for (i = 0; i < folio_nr_pages(folio); i++) {
+				void *addr = kmap_local_folio(folio,
+								i * PAGE_SIZE);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-				kunmap_atomic(addr);
+				kunmap_local(addr);
 			}
 		} else {
-			for (i = 0; i < compound_nr(page); i++) {
-				void *addr = kmap_high_get(page + i);
+			for (i = 0; i < folio_nr_pages(folio); i++) {
+				void *addr = kmap_high_get(folio_page(folio, i));
 				if (addr) {
 					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-					kunmap_high(page + i);
+					kunmap_high(folio_page(folio, i));
 				}
 			}
 		}
@@ -230,15 +232,14 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 * userspace colour, which is congruent with page->index.
 	 */
 	if (mapping && cache_is_vipt_aliasing())
-		flush_pfn_alias(page_to_pfn(page),
-				page->index << PAGE_SHIFT);
+		flush_pfn_alias(folio_pfn(folio), folio_pos(folio));
 }
 
-static void __flush_dcache_aliases(struct address_space *mapping, struct page *page)
+static void __flush_dcache_aliases(struct address_space *mapping, struct folio *folio)
 {
 	struct mm_struct *mm = current->active_mm;
-	struct vm_area_struct *mpnt;
-	pgoff_t pgoff;
+	struct vm_area_struct *vma;
+	pgoff_t pgoff, pgoff_end;
 
 	/*
 	 * There are possible user space mappings of this page:
@@ -246,21 +247,38 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 	 *   data in the current VM view associated with this page.
 	 * - aliasing VIPT: we only need to find one mapping of this page.
 	 */
-	pgoff = page->index;
+	pgoff = folio->index;
+	pgoff_end = pgoff + folio_nr_pages(folio) - 1;
 
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
-		unsigned long offset;
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff_end) {
+		long offset;
+		unsigned long start, pfn;
+		unsigned int nr;
 
 		/*
 		 * If this VMA is not in our MM, we can ignore it.
 		 */
-		if (mpnt->vm_mm != mm)
+		if (vma->vm_mm != mm)
 			continue;
-		if (!(mpnt->vm_flags & VM_MAYSHARE))
+		if (!(vma->vm_flags & VM_MAYSHARE))
 			continue;
-		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
-		flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
+
+		start = vma->vm_start;
+		pfn = folio_pfn(folio);
+		nr = folio_nr_pages(folio);
+		offset = pgoff - vma->vm_pgoff;
+		if (offset < 0) {
+			pfn -= offset;
+			nr += offset;
+			start -= offset * PAGE_SIZE;
+		} else {
+			start += offset * PAGE_SIZE;
+		}
+		if (start + nr * PAGE_SIZE > vma->vm_end)
+			nr = (vma->vm_end - start) / PAGE_SIZE;
+
+		flush_cache_pages(vma, start, pfn, nr);
 	}
 	flush_dcache_mmap_unlock(mapping);
 }
@@ -269,7 +287,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 void __sync_icache_dcache(pte_t pteval)
 {
 	unsigned long pfn;
-	struct page *page;
+	struct folio *folio;
 	struct address_space *mapping;
 
 	if (cache_is_vipt_nonaliasing() && !pte_exec(pteval))
@@ -279,14 +297,14 @@ void __sync_icache_dcache(pte_t pteval)
 	if (!pfn_valid(pfn))
 		return;
 
-	page = pfn_to_page(pfn);
+	folio = page_folio(pfn_to_page(pfn));
 	if (cache_is_vipt_aliasing())
-		mapping = page_mapping_file(page);
+		mapping = folio_flush_mapping(folio);
 	else
 		mapping = NULL;
 
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-		__flush_dcache_page(mapping, page);
+	if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
+		__flush_dcache_folio(mapping, folio);
 
 	if (pte_exec(pteval))
 		__flush_icache_all();
@@ -312,7 +330,7 @@ void __sync_icache_dcache(pte_t pteval)
  * Note that we disable the lazy flush for SMP configurations where
  * the cache maintenance operations are not automatically broadcasted.
  */
-void flush_dcache_page(struct page *page)
+void flush_dcache_folio(struct folio *folio)
 {
 	struct address_space *mapping;
 
@@ -320,31 +338,36 @@ void flush_dcache_page(struct page *page)
 	 * The zero page is never written to, so never has any dirty
 	 * cache lines, and therefore never needs to be flushed.
 	 */
-	if (page == ZERO_PAGE(0))
+	if (is_zero_pfn(folio_pfn(folio)))
 		return;
 
 	if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
-		if (test_bit(PG_dcache_clean, &page->flags))
-			clear_bit(PG_dcache_clean, &page->flags);
+		if (test_bit(PG_dcache_clean, &folio->flags))
+			clear_bit(PG_dcache_clean, &folio->flags);
 		return;
 	}
 
-	mapping = page_mapping_file(page);
+	mapping = folio_flush_mapping(folio);
 
 	if (!cache_ops_need_broadcast() &&
-	    mapping && !page_mapcount(page))
-		clear_bit(PG_dcache_clean, &page->flags);
+	    mapping && !folio_mapped(folio))
+		clear_bit(PG_dcache_clean, &folio->flags);
 	else {
-		__flush_dcache_page(mapping, page);
+		__flush_dcache_folio(mapping, folio);
 		if (mapping && cache_is_vivt())
-			__flush_dcache_aliases(mapping, page);
+			__flush_dcache_aliases(mapping, folio);
 		else if (mapping)
 			__flush_icache_all();
-		set_bit(PG_dcache_clean, &page->flags);
+		set_bit(PG_dcache_clean, &folio->flags);
 	}
 }
-EXPORT_SYMBOL(flush_dcache_page);
+EXPORT_SYMBOL(flush_dcache_folio);
 
+void flush_dcache_page(struct page *page)
+{
+	flush_dcache_folio(page_folio(page));
+}
+EXPORT_SYMBOL(flush_dcache_page);
 /*
  * Flush an anonymous page so that users of get_user_pages()
  * can safely access the data.  The expected sequence is:
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index d7ffccb7fea7..419316316711 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -45,7 +45,7 @@ struct mem_type {
 
 const struct mem_type *get_mem_type(unsigned int type);
 
-extern void __flush_dcache_page(struct address_space *mapping, struct page *page);
+void __flush_dcache_folio(struct address_space *mapping, struct folio *folio);
 
 /*
  * ARM specific vm_struct->flags bits.
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 463fc2a8448f..9947bbc32b04 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1788,7 +1788,7 @@ void __init paging_init(const struct machine_desc *mdesc)
 	bootmem_init();
 
 	empty_zero_page = virt_to_page(zero_page);
-	__flush_dcache_page(NULL, empty_zero_page);
+	__flush_dcache_folio(NULL, page_folio(empty_zero_page));
 }
 
 void __init early_mm_init(const struct machine_desc *mdesc)
@@ -1797,8 +1797,8 @@ void __init early_mm_init(const struct machine_desc *mdesc)
 	early_paging_init(mdesc);
 }
 
-void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
+void set_ptes(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval, unsigned int nr)
 {
 	unsigned long ext = 0;
 
@@ -1808,5 +1808,11 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr,
 		ext |= PTE_EXT_NG;
 	}
 
-	set_pte_ext(ptep, pteval, ext);
+	for (;;) {
+		set_pte_ext(ptep, pteval, ext);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pteval) += PAGE_SIZE;
+	}
 }
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 6/7] arc: Implement the new page table range API
  2023-02-13 15:16     ` Matthew Wilcox
@ 2023-02-14  6:32       ` Yin, Fengwei
  0 siblings, 0 replies; 37+ messages in thread
From: Yin, Fengwei @ 2023-02-14  6:32 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-mm@kvack.org, linux-arch@vger.kernel.org



On 2/13/2023 11:16 PM, Matthew Wilcox wrote:
> On Mon, Feb 13, 2023 at 03:09:37AM +0000, Yin, Fengwei wrote:
>>> +++ b/arch/arc/include/asm/cacheflush.h
>>> @@ -25,17 +25,20 @@
>>>   * in update_mmu_cache()
>>>   */
>>>  #define flush_icache_page(vma, page)
>>> +#define flush_icache_pages(vma, page, nr)
>> Maybe just remove these two definitions because general
>> implementation is just no-op?
> 
> Then arc would have to include asm-generic/cacheflush.h and I don't
> particularly want to debug any issues that might cause.  This is
> easier.
> 
> Long term, asm-generic/cacheflush.h's contents should be moved into
> linux/cacheflush.h, but I've lacked the time to do that work.
> 
> To answer your question from the other email, the documentation says:
> 
>   ``void flush_icache_page(struct vm_area_struct *vma, struct page *page)``
> 
>         All the functionality of flush_icache_page can be implemented in
>         flush_dcache_page and update_mmu_cache_range. In the future, the hope
>         is to remove this interface completely.
> 
> I'm not planning on doing that to an architecture that I'm not set up
> to test ...
Thanks a lot for the detail explanation.

> 
>>> +void flush_dcache_page(struct page *page)
>>> +{
>>> +       return flush_dcache_folio(page_folio(page));
>>> +}
>> I am wondering whether we should add flush_dcache_folio_range()
>> because it's possible just part of folio needs be flush. Thanks.
> 
> We could.  I think it's up to the maintainers of architectures that
> need their caches flushing to let us know what would be good for them.
> Since I primarily work on x86, I have no personal desire to do this ;-)
> 
> One of the things that I've always found a little weird about
> flush_dcache_page() (and now flush_dcache_folio()) is that it's used both
> for flushing userspace writes (eg in filemap_read()) and for flushing
> kernel writes (eg in __iomap_write_end()).  Probably it was designed for
> an architecture that flushes by physical address rather than by virtual.
I noticed the copy_page_from_iter_atomic() is using kmap_atomic(page) as
access address. So even if it's VIVT, if there is no highmem, it should
work with flush_dcace_page/folio(). arm is VIVT and seems no complain
about this. It may be very rare that it has no highmem?

> 
> Anyway, if we do have a flush_dcache_folio_kernel(), I'd like it
> to take byte offsets.  That would work well for __iomap_write_end();
> it could be:
> 
> 	flush_dcache_folio_kernel(folio, offset_in_folio(folio, pos), len);
> 
> But I'm not volunteering to do this work.
I'd like to give it a try. :).


Regards
Yin, Fengwei


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 9/7] arm64: Implement the new page table range API
  2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
                   ` (7 preceding siblings ...)
  2023-02-13 21:04 ` [PATCH 8/7] arm: " Matthew Wilcox (Oracle)
@ 2023-02-15  0:04 ` Matthew Wilcox (Oracle)
  2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
                     ` (5 more replies)
  8 siblings, 6 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15  0:04 UTC (permalink / raw)
  To: linux-mm, linux-arm-kernel, Catalin Marinas, Will Deacon,
	linux-arch
  Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().

The PG_dcache_clear flag changes from being a per-page bit to being a
per-folio bit.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/arm64/include/asm/cacheflush.h |  4 +++-
 arch/arm64/include/asm/pgtable.h    | 25 ++++++++++++++------
 arch/arm64/mm/flush.c               | 36 +++++++++++------------------
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 37185e978aeb..d115451ed263 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -114,7 +114,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define copy_to_user_page copy_to_user_page
 
 /*
- * flush_dcache_page is used when the kernel has written to the page
+ * flush_dcache_folio is used when the kernel has written to the page
  * cache page at virtual address page->virtual.
  *
  * If this page isn't mapped (ie, page_mapping == NULL), or it might
@@ -127,6 +127,8 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
  */
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *);
+#define flush_dcache_folio flush_dcache_folio
 
 static __always_inline void icache_inval_all_pou(void)
 {
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 69765dc697af..4d1b79dbff16 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -355,12 +355,21 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	set_pte(ptep, pte);
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pte)
-{
-	page_table_check_ptes_set(mm, addr, ptep, pte, 1);
-	return __set_pte_at(mm, addr, ptep, pte);
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	page_table_check_ptes_set(mm, addr, ptep, pte, nr);
+
+	for (;;) {
+		__set_pte_at(mm, addr, ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		addr += PAGE_SIZE;
+		pte_val(pte) += PAGE_SIZE;
+	}
 }
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 /*
  * Huge pte definitions.
@@ -1059,8 +1068,8 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-				    unsigned long addr, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 	/*
 	 * We don't do anything here, so there's a very small chance of
@@ -1069,6 +1078,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	 */
 }
 
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 #ifdef CONFIG_ARM64_PA_BITS_52
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 5f9379b3c8c8..deb781af0a3a 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -50,20 +50,13 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 
 void __sync_icache_dcache(pte_t pte)
 {
-	struct page *page = pte_page(pte);
+	struct folio *folio = page_folio(pte_page(pte));
 
-	/*
-	 * HugeTLB pages are always fully mapped, so only setting head page's
-	 * PG_dcache_clean flag is enough.
-	 */
-	if (PageHuge(page))
-		page = compound_head(page);
-
-	if (!test_bit(PG_dcache_clean, &page->flags)) {
-		sync_icache_aliases((unsigned long)page_address(page),
-				    (unsigned long)page_address(page) +
-					    page_size(page));
-		set_bit(PG_dcache_clean, &page->flags);
+	if (!test_bit(PG_dcache_clean, &folio->flags)) {
+		sync_icache_aliases((unsigned long)folio_address(folio),
+				    (unsigned long)folio_address(folio) +
+					    folio_size(folio));
+		set_bit(PG_dcache_clean, &folio->flags);
 	}
 }
 EXPORT_SYMBOL_GPL(__sync_icache_dcache);
@@ -73,17 +66,16 @@ EXPORT_SYMBOL_GPL(__sync_icache_dcache);
  * it as dirty for later flushing when mapped in user space (if executable,
  * see __sync_icache_dcache).
  */
-void flush_dcache_page(struct page *page)
+void flush_dcache_folio(struct folio *folio)
 {
-	/*
-	 * HugeTLB pages are always fully mapped and only head page will be
-	 * set PG_dcache_clean (see comments in __sync_icache_dcache()).
-	 */
-	if (PageHuge(page))
-		page = compound_head(page);
+	if (test_bit(PG_dcache_clean, &folio->flags))
+		clear_bit(PG_dcache_clean, &folio->flags);
+}
+EXPORT_SYMBOL(flush_dcache_folio);
 
-	if (test_bit(PG_dcache_clean, &page->flags))
-		clear_bit(PG_dcache_clean, &page->flags);
+void flush_dcache_page(struct page *page)
+{
+	flush_dcache_folio(page_folio(page));
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 10/7] riscv: Implement the new page table range API
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
@ 2023-02-15  0:04   ` Matthew Wilcox (Oracle)
  2023-02-15  8:38     ` Yin, Fengwei
  2023-02-16  8:16     ` Alexandre Ghiti
  2023-02-15  0:04   ` [PATCH 11/7] csky: " Matthew Wilcox (Oracle)
                     ` (4 subsequent siblings)
  5 siblings, 2 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15  0:04 UTC (permalink / raw)
  To: linux-mm, linux-riscv, Alexandre Ghiti, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, linux-arch
  Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().

The PG_dcache_clear flag changes from being a per-page bit to being a
per-folio bit.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
 arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
 arch/riscv/mm/cacheflush.c          | 11 ++---------
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 03e3b95ae6da..10e5e96f09b5 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
 
 #define PG_dcache_clean PG_arch_1
 
-static inline void flush_dcache_page(struct page *page)
+static inline void flush_dcache_folio(struct folio *folio)
 {
-	/*
-	 * HugeTLB pages are always fully mapped and only head page will be
-	 * set PG_dcache_clean (see comments in flush_icache_pte()).
-	 */
-	if (PageHuge(page))
-		page = compound_head(page);
-
-	if (test_bit(PG_dcache_clean, &page->flags))
-		clear_bit(PG_dcache_clean, &page->flags);
+	if (test_bit(PG_dcache_clean, &folio->flags))
+		clear_bit(PG_dcache_clean, &folio->flags);
 }
+#define flush_dcache_folio flush_dcache_folio
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
+static inline void flush_dcache_page(struct page *page)
+{
+	flush_dcache_folio(page_folio(page));
+}
+
 /*
  * RISC-V doesn't have an instruction to flush parts of the instruction cache,
  * so instead we just flush the whole thing.
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 13222fd5c4b4..03706c833e70 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 
 /* Commit new configuration to MMU hardware */
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-	unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
 {
 	/*
 	 * The kernel assumes that TLBs don't cache invalid entries, but
@@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
 	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
 	 */
-	flush_tlb_page(vma, address);
+	flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
 }
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 
 #define __HAVE_ARCH_UPDATE_MMU_TLB
 #define update_mmu_tlb update_mmu_cache
@@ -456,12 +458,21 @@ static inline void __set_pte_at(struct mm_struct *mm,
 	set_pte(ptep, pteval);
 }
 
-static inline void set_pte_at(struct mm_struct *mm,
-	unsigned long addr, pte_t *ptep, pte_t pteval)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pteval, unsigned int nr)
 {
-	page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
-	__set_pte_at(mm, addr, ptep, pteval);
+	page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
+
+	for (;;) {
+		__set_pte_at(mm, addr, ptep, pteval);
+		if (--nr == 0)
+			break;
+		ptep++;
+		addr += PAGE_SIZE;
+		pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
+	}
 }
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 static inline void pte_clear(struct mm_struct *mm,
 	unsigned long addr, pte_t *ptep)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 3cc07ed45aeb..b725c3f6f57f 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
 #ifdef CONFIG_MMU
 void flush_icache_pte(pte_t pte)
 {
-	struct page *page = pte_page(pte);
+	struct folio *folio = page_folio(pte_page(pte));
 
-	/*
-	 * HugeTLB pages are always fully mapped, so only setting head page's
-	 * PG_dcache_clean flag is enough.
-	 */
-	if (PageHuge(page))
-		page = compound_head(page);
-
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+	if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
 		flush_icache_all();
 }
 #endif /* CONFIG_MMU */
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 11/7] csky: Implement the new page table range API
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
  2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
@ 2023-02-15  0:04   ` Matthew Wilcox (Oracle)
  2023-02-15  0:04   ` [PATCH 12/7] hexagon: " Matthew Wilcox (Oracle)
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15  0:04 UTC (permalink / raw)
  To: linux-mm, Guo Ren, linux-csky, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().

The PG_dcache_clear flag changes from being a per-page bit to being a
per-folio bit.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/csky/abiv1/cacheflush.c         | 32 +++++++++++++++++-----------
 arch/csky/abiv1/inc/abi/cacheflush.h |  2 ++
 arch/csky/abiv2/cacheflush.c         | 30 +++++++++++++-------------
 arch/csky/abiv2/inc/abi/cacheflush.h | 10 +++++++--
 arch/csky/include/asm/pgtable.h      | 21 +++++++++++++++---
 5 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c
index fb91b069dc69..ba43f6c26b4f 100644
--- a/arch/csky/abiv1/cacheflush.c
+++ b/arch/csky/abiv1/cacheflush.c
@@ -14,43 +14,49 @@
 
 #define PG_dcache_clean		PG_arch_1
 
-void flush_dcache_page(struct page *page)
+void flush_dcache_folio(struct folio *folio)
 {
 	struct address_space *mapping;
 
-	if (page == ZERO_PAGE(0))
+	if (is_zero_pfn(folio_pfn(folio)))
 		return;
 
-	mapping = page_mapping_file(page);
+	mapping = folio_flush_mapping(folio);
 
-	if (mapping && !page_mapcount(page))
-		clear_bit(PG_dcache_clean, &page->flags);
+	if (mapping && !folio_mapped(folio))
+		clear_bit(PG_dcache_clean, &folio->flags);
 	else {
 		dcache_wbinv_all();
 		if (mapping)
 			icache_inv_all();
-		set_bit(PG_dcache_clean, &page->flags);
+		set_bit(PG_dcache_clean, &folio->flags);
 	}
 }
+EXPORT_SYMBOL(flush_dcache_folio);
+
+void flush_dcache_page(struct page *page)
+{
+	flush_dcache_folio(page_folio(page));
+}
 EXPORT_SYMBOL(flush_dcache_page);
 
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-	pte_t *ptep)
+void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long addr,
+		pte_t *ptep, unsigned int nr)
 {
 	unsigned long pfn = pte_pfn(*ptep);
-	struct page *page;
+	struct folio *folio;
 
 	if (!pfn_valid(pfn))
 		return;
 
-	page = pfn_to_page(pfn);
-	if (page == ZERO_PAGE(0))
+	if (is_zero_pfn(pfn))
 		return;
 
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+	folio = page_folio(pfn_to_page(pfn));
+	if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
 		dcache_wbinv_all();
 
-	if (page_mapping_file(page)) {
+	if (folio_flush_mapping(folio)) {
 		if (vma->vm_flags & VM_EXEC)
 			icache_inv_all();
 	}
diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index ed62e2066ba7..0d6cb65624c4 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -9,6 +9,8 @@
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *);
+#define flush_dcache_folio flush_dcache_folio
 
 #define flush_cache_mm(mm)			dcache_wbinv_all()
 #define flush_cache_page(vma, page, pfn)	cache_wbinv_all()
diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c
index 39c51399dd81..c1cf0d55a2a1 100644
--- a/arch/csky/abiv2/cacheflush.c
+++ b/arch/csky/abiv2/cacheflush.c
@@ -6,30 +6,30 @@
 #include <linux/mm.h>
 #include <asm/cache.h>
 
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *pte)
+void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long address,
+		pte_t *pte, unsigned int nr)
 {
-	unsigned long addr;
+	unsigned long pfn = pte_pfn(*pte);
 	struct page *page;
+	unsigned int i;
 
-	if (!pfn_valid(pte_pfn(*pte)))
+	if (!pfn_valid(pfn) || is_zero_pfn(pfn))
 		return;
 
-	page = pfn_to_page(pte_pfn(*pte));
-	if (page == ZERO_PAGE(0))
-		return;
+	folio = page_folio(pfn_to_page(pfn));
 
-	if (test_and_set_bit(PG_dcache_clean, &page->flags))
+	if (test_and_set_bit(PG_dcache_clean, &folio->flags))
 		return;
 
-	addr = (unsigned long) kmap_atomic(page);
-
-	dcache_wb_range(addr, addr + PAGE_SIZE);
+	for (i = 0; i < folio_nr_pages(folio); i++) {
+		unsigned long addr = (unsigned long) kmap_local_folio(folio,
+								i * PAGE_SIZE);
 
-	if (vma->vm_flags & VM_EXEC)
-		icache_inv_range(addr, addr + PAGE_SIZE);
-
-	kunmap_atomic((void *) addr);
+		dcache_wb_range(addr, addr + PAGE_SIZE);
+		if (vma->vm_flags & VM_EXEC)
+			icache_inv_range(addr, addr + PAGE_SIZE);
+		kunmap_local((void *) addr);
+	}
 }
 
 void flush_icache_deferred(struct mm_struct *mm)
diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h
index a565e00c3f70..9c728933a776 100644
--- a/arch/csky/abiv2/inc/abi/cacheflush.h
+++ b/arch/csky/abiv2/inc/abi/cacheflush.h
@@ -18,11 +18,17 @@
 
 #define PG_dcache_clean		PG_arch_1
 
+static inline void flush_dcache_folio(struct folio *folio)
+{
+	if (test_bit(PG_dcache_clean, &folio->flags))
+		clear_bit(PG_dcache_clean, &folio->flags);
+}
+#define flush_dcache_folio flush_dcache_folio
+
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 static inline void flush_dcache_page(struct page *page)
 {
-	if (test_bit(PG_dcache_clean, &page->flags))
-		clear_bit(PG_dcache_clean, &page->flags);
+	flush_dcache_folio(page_folio(page));
 }
 
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index d4042495febc..a30ae048233e 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -90,7 +90,20 @@ static inline void set_pte(pte_t *p, pte_t pte)
 	/* prevent out of order excution */
 	smp_mb();
 }
-#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += PAGE_SIZE;
+	}
+}
+
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 {
@@ -263,8 +276,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern void paging_init(void);
 
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *pte);
+void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long address,
+		pte_t *pte, unsigned int nr);
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 12/7] hexagon: Implement the new page table range API
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
  2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
  2023-02-15  0:04   ` [PATCH 11/7] csky: " Matthew Wilcox (Oracle)
@ 2023-02-15  0:04   ` Matthew Wilcox (Oracle)
  2023-02-15 16:22     ` Brian Cain
  2023-02-15  0:04   ` [PATCH 13/7] loongson: " Matthew Wilcox (Oracle)
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15  0:04 UTC (permalink / raw)
  To: linux-mm, Brian Cain, linux-hexagon, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes() and update_mmu_cache_range().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/hexagon/include/asm/cacheflush.h |  7 +++++--
 arch/hexagon/include/asm/pgtable.h    | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
index 6eff0730e6ef..63ca314ede89 100644
--- a/arch/hexagon/include/asm/cacheflush.h
+++ b/arch/hexagon/include/asm/cacheflush.h
@@ -58,12 +58,15 @@ extern void flush_cache_all_hexagon(void);
  * clean the cache when the PTE is set.
  *
  */
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-					unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
 {
 	/*  generic_ptrace_pokedata doesn't wind up here, does it?  */
 }
 
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
+
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long vaddr, void *dst, void *src, int len);
 #define copy_to_user_page copy_to_user_page
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 59393613d086..f58f1d920769 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -346,12 +346,24 @@ static inline int pte_exec(pte_t pte)
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
 
 /*
- * set_pte_at - update page table and do whatever magic may be
+ * set_ptes - update page table and do whatever magic may be
  * necessary to make the underlying hardware/firmware take note.
  *
  * VM may require a virtual instruction to alert the MMU.
  */
-#define set_pte_at(mm, addr, ptep, pte) set_pte(ptep, pte)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += PAGE_SIZE;
+	}
+}
+
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 {
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 13/7] loongson: Implement the new page table range API
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
                     ` (2 preceding siblings ...)
  2023-02-15  0:04   ` [PATCH 12/7] hexagon: " Matthew Wilcox (Oracle)
@ 2023-02-15  0:04   ` Matthew Wilcox (Oracle)
  2023-02-26  4:34     ` Matthew Wilcox
  2023-02-15 13:26   ` [PATCH 9/7] arm64: " Catalin Marinas
  2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
  5 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15  0:04 UTC (permalink / raw)
  To: linux-mm, Huacai Chen, WANG Xuerui, loongarch, linux-arch
  Cc: Matthew Wilcox (Oracle)

Add set_ptes() and update_mmu_cache_range().

THIS PATCH IS INCOMPLETE.  I DO NOT KNOW WHAT TO DO IN __update_tlb()

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/loongarch/include/asm/cacheflush.h |  2 ++
 arch/loongarch/include/asm/pgtable.h    | 30 ++++++++++++++++---------
 arch/loongarch/mm/tlb.c                 |  4 +++-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index 0681788eb474..7907eb42bfbd 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -47,8 +47,10 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
 #define flush_cache_vmap(start, end)			do { } while (0)
 #define flush_cache_vunmap(start, end)			do { } while (0)
 #define flush_icache_page(vma, page)			do { } while (0)
+#define flush_icache_pages(vma, page)			do { } while (0)
 #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
 #define flush_dcache_page(page)				do { } while (0)
+#define flush_dcache_folio(folio)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
 
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index d28fb9dbec59..0f5fa7c40c52 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -334,12 +334,20 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 	}
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += 1 << _PFN_SHIFT;
+	}
 }
 
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	/* Preserve global status for the pair */
@@ -442,14 +450,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
 }
 
-extern void __update_tlb(struct vm_area_struct *vma,
-			unsigned long address, pte_t *ptep);
+extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
+		pte_t *ptep, unsigned int nr);
 
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-			unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
 {
-	__update_tlb(vma, address, ptep);
+	__update_tlb(vma, address, ptep, nr);
 }
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 
 #define __HAVE_ARCH_UPDATE_MMU_TLB
 #define update_mmu_tlb	update_mmu_cache
@@ -457,7 +467,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 			unsigned long address, pmd_t *pmdp)
 {
-	__update_tlb(vma, address, (pte_t *)pmdp);
+	__update_tlb(vma, address, (pte_t *)pmdp, 1);
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 8bad6b0cff59..ac0b19dbd1dc 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -162,7 +162,8 @@ static void __update_hugetlb(struct vm_area_struct *vma, unsigned long address,
 #endif
 }
 
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address,
+		pte_t *ptep, unsigned int nr)
 {
 	int idx;
 	unsigned long flags;
@@ -187,6 +188,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep
 	write_csr_entryhi(address);
 	tlb_probe();
 	idx = read_csr_tlbidx();
+// I have no idea what to do here
 	write_csr_pagesize(PS_DEFAULT_SIZE);
 	write_csr_entrylo0(pte_val(*ptep++));
 	write_csr_entrylo1(pte_val(*ptep));
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 10/7] riscv: Implement the new page table range API
  2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
@ 2023-02-15  8:38     ` Yin, Fengwei
  2023-02-15 12:27       ` Yin, Fengwei
  2023-02-16  8:14       ` Alexandre Ghiti
  2023-02-16  8:16     ` Alexandre Ghiti
  1 sibling, 2 replies; 37+ messages in thread
From: Yin, Fengwei @ 2023-02-15  8:38 UTC (permalink / raw)
  To: linux-riscv@lists.infradead.org, linux-mm@kvack.org,
	alex@ghiti.fr, paul.walmsley@sifive.com, aou@eecs.berkeley.edu,
	palmer@dabbelt.com, willy@infradead.org,
	linux-arch@vger.kernel.org

On Wed, 2023-02-15 at 00:04 +0000, Matthew Wilcox (Oracle) wrote:
> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
> 
> The PG_dcache_clear flag changes from being a per-page bit to being a
> per-folio bit.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>  arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>  arch/riscv/mm/cacheflush.c          | 11 ++---------
>  3 files changed, 29 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/cacheflush.h
> b/arch/riscv/include/asm/cacheflush.h
> index 03e3b95ae6da..10e5e96f09b5 100644
> --- a/arch/riscv/include/asm/cacheflush.h
> +++ b/arch/riscv/include/asm/cacheflush.h
> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>  
>  #define PG_dcache_clean PG_arch_1
>  
> -static inline void flush_dcache_page(struct page *page)
> +static inline void flush_dcache_folio(struct folio *folio)
>  {
> -       /*
> -        * HugeTLB pages are always fully mapped and only head page
> will be
> -        * set PG_dcache_clean (see comments in flush_icache_pte()).
> -        */
> -       if (PageHuge(page))
> -               page = compound_head(page);
> -
> -       if (test_bit(PG_dcache_clean, &page->flags))
> -               clear_bit(PG_dcache_clean, &page->flags);
> +       if (test_bit(PG_dcache_clean, &folio->flags))
> +               clear_bit(PG_dcache_clean, &folio->flags);
>  }
> +#define flush_dcache_folio flush_dcache_folio
>  #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>  
> +static inline void flush_dcache_page(struct page *page)
> +{
> +       flush_dcache_folio(page_folio(page));
> +}
> +
>  /*
>   * RISC-V doesn't have an instruction to flush parts of the
> instruction cache,
>   * so instead we just flush the whole thing.
> diff --git a/arch/riscv/include/asm/pgtable.h
> b/arch/riscv/include/asm/pgtable.h
> index 13222fd5c4b4..03706c833e70 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte,
> pgprot_t newprot)
>  
>  
>  /* Commit new configuration to MMU hardware */
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> -       unsigned long address, pte_t *ptep)
> +static inline void update_mmu_cache_range(struct vm_area_struct
> *vma,
> +               unsigned long address, pte_t *ptep, unsigned int nr)
>  {
>         /*
>          * The kernel assumes that TLBs don't cache invalid entries,
> but
> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct
> vm_area_struct *vma,
>          * Relying on flush_tlb_fix_spurious_fault would suffice, but
>          * the extra traps reduce performance.  So, eagerly
> SFENCE.VMA.
>          */
> -       flush_tlb_page(vma, address);
> +       flush_tlb_range(vma, address, address + nr * PAGE_SIZE);

The flush_tlb_range() of riscv is a little bit strange to me. It gives
__sbi_tlb_flush_range() stride PAGE_SIZE. That means if (end - start)
is larger than stride, it will trigger flush_tlb_all().

So this change could trigger flush_tlb_all() while original
flush_tlb_page() just trigger flush_tlb_page().

My understanding is flush_tlb_page() should be better because 
flush_pmd_tlb_range() has PMD_SIZE as stride to avoid flush_tlb_all().
I must miss something here.

Regards
Yin, Fengwei

>  }
> +#define update_mmu_cache(vma, addr, ptep) \
> +       update_mmu_cache_range(vma, addr, ptep, 1)
>  
>  #define __HAVE_ARCH_UPDATE_MMU_TLB
>  #define update_mmu_tlb update_mmu_cache
> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct
> mm_struct *mm,
>         set_pte(ptep, pteval);
>  }
>  
> -static inline void set_pte_at(struct mm_struct *mm,
> -       unsigned long addr, pte_t *ptep, pte_t pteval)
> +static inline void set_ptes(struct mm_struct *mm, unsigned long
> addr,
> +               pte_t *ptep, pte_t pteval, unsigned int nr)
>  {
> -       page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
> -       __set_pte_at(mm, addr, ptep, pteval);
> +       page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
> +
> +       for (;;) {
> +               __set_pte_at(mm, addr, ptep, pteval);
> +               if (--nr == 0)
> +                       break;
> +               ptep++;
> +               addr += PAGE_SIZE;
> +               pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
> +       }
>  }
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
> pte, 1)
>  
>  static inline void pte_clear(struct mm_struct *mm,
>         unsigned long addr, pte_t *ptep)
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 3cc07ed45aeb..b725c3f6f57f 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool
> local)
>  #ifdef CONFIG_MMU
>  void flush_icache_pte(pte_t pte)
>  {
> -       struct page *page = pte_page(pte);
> +       struct folio *folio = page_folio(pte_page(pte));
>  
> -       /*
> -        * HugeTLB pages are always fully mapped, so only setting
> head page's
> -        * PG_dcache_clean flag is enough.
> -        */
> -       if (PageHuge(page))
> -               page = compound_head(page);
> -
> -       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
> +       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>                 flush_icache_all();
>  }
>  #endif /* CONFIG_MMU */


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 10/7] riscv: Implement the new page table range API
  2023-02-15  8:38     ` Yin, Fengwei
@ 2023-02-15 12:27       ` Yin, Fengwei
  2023-02-16  8:14       ` Alexandre Ghiti
  1 sibling, 0 replies; 37+ messages in thread
From: Yin, Fengwei @ 2023-02-15 12:27 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), linux-mm, linux-riscv, Alexandre Ghiti,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, linux-arch



On 2/15/2023 4:35 PM, Yin Fengwei wrote:
> On Wed, 2023-02-15 at 00:04 +0000, Matthew Wilcox (Oracle) wrote:
>> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
>>
>> The PG_dcache_clear flag changes from being a per-page bit to being a
>> per-folio bit.
>>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>>  arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>>  arch/riscv/mm/cacheflush.c          | 11 ++---------
>>  3 files changed, 29 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cacheflush.h
>> b/arch/riscv/include/asm/cacheflush.h
>> index 03e3b95ae6da..10e5e96f09b5 100644
>> --- a/arch/riscv/include/asm/cacheflush.h
>> +++ b/arch/riscv/include/asm/cacheflush.h
>> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>>  
>>  #define PG_dcache_clean PG_arch_1
>>  
>> -static inline void flush_dcache_page(struct page *page)
>> +static inline void flush_dcache_folio(struct folio *folio)
>>  {
>> -       /*
>> -        * HugeTLB pages are always fully mapped and only head page
>> will be
>> -        * set PG_dcache_clean (see comments in flush_icache_pte()).
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (test_bit(PG_dcache_clean, &page->flags))
>> -               clear_bit(PG_dcache_clean, &page->flags);
>> +       if (test_bit(PG_dcache_clean, &folio->flags))
>> +               clear_bit(PG_dcache_clean, &folio->flags);
>>  }
>> +#define flush_dcache_folio flush_dcache_folio
>>  #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>>  
>> +static inline void flush_dcache_page(struct page *page)
>> +{
>> +       flush_dcache_folio(page_folio(page));
>> +}
>> +
>>  /*
>>   * RISC-V doesn't have an instruction to flush parts of the
>> instruction cache,
>>   * so instead we just flush the whole thing.
>> diff --git a/arch/riscv/include/asm/pgtable.h
>> b/arch/riscv/include/asm/pgtable.h
>> index 13222fd5c4b4..03706c833e70 100644
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte,
>> pgprot_t newprot)
>>  
>>  
>>  /* Commit new configuration to MMU hardware */
>> -static inline void update_mmu_cache(struct vm_area_struct *vma,
>> -       unsigned long address, pte_t *ptep)
>> +static inline void update_mmu_cache_range(struct vm_area_struct
>> *vma,
>> +               unsigned long address, pte_t *ptep, unsigned int nr)
>>  {
>>         /*
>>          * The kernel assumes that TLBs don't cache invalid entries,
>> but
>> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct
>> vm_area_struct *vma,
>>          * Relying on flush_tlb_fix_spurious_fault would suffice, but
>>          * the extra traps reduce performance.  So, eagerly
>> SFENCE.VMA.
>>          */
>> -       flush_tlb_page(vma, address);
>> +       flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
> 
> The flush_tlb_range() of riscv is a little bit strange to me. It gives
> __sbi_tlb_flush_range() stride PAGE_SIZE. That means if (end - start)
> is larger than stride, it will trigger flush_tlb_all().
> 
> So this change could trigger flush_tlb_all() while original
> flush_tlb_page() just trigger flush_tlb_page().
> 
> My understanding is flush_tlb_page() should be better because 
> flush_pmd_tlb_range() has PMD_SIZE as stride to avoid flush_tlb_all().
> I must miss something here.
So the huge page can have one TLB for huge page. So PMD_SIZE here
makes sense.

Regards
Yin, Fengwei 

> 
> Regards
> Yin, Fengwei
> 
>>  }
>> +#define update_mmu_cache(vma, addr, ptep) \
>> +       update_mmu_cache_range(vma, addr, ptep, 1)
>>  
>>  #define __HAVE_ARCH_UPDATE_MMU_TLB
>>  #define update_mmu_tlb update_mmu_cache
>> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct
>> mm_struct *mm,
>>         set_pte(ptep, pteval);
>>  }
>>  
>> -static inline void set_pte_at(struct mm_struct *mm,
>> -       unsigned long addr, pte_t *ptep, pte_t pteval)
>> +static inline void set_ptes(struct mm_struct *mm, unsigned long
>> addr,
>> +               pte_t *ptep, pte_t pteval, unsigned int nr)
>>  {
>> -       page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
>> -       __set_pte_at(mm, addr, ptep, pteval);
>> +       page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
>> +
>> +       for (;;) {
>> +               __set_pte_at(mm, addr, ptep, pteval);
>> +               if (--nr == 0)
>> +                       break;
>> +               ptep++;
>> +               addr += PAGE_SIZE;
>> +               pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
>> +       }
>>  }
>> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
>> pte, 1)
>>  
>>  static inline void pte_clear(struct mm_struct *mm,
>>         unsigned long addr, pte_t *ptep)
>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> index 3cc07ed45aeb..b725c3f6f57f 100644
>> --- a/arch/riscv/mm/cacheflush.c
>> +++ b/arch/riscv/mm/cacheflush.c
>> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool
>> local)
>>  #ifdef CONFIG_MMU
>>  void flush_icache_pte(pte_t pte)
>>  {
>> -       struct page *page = pte_page(pte);
>> +       struct folio *folio = page_folio(pte_page(pte));
>>  
>> -       /*
>> -        * HugeTLB pages are always fully mapped, so only setting
>> head page's
>> -        * PG_dcache_clean flag is enough.
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
>> +       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>>                 flush_icache_all();
>>  }
>>  #endif /* CONFIG_MMU */
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 9/7] arm64: Implement the new page table range API
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
                     ` (3 preceding siblings ...)
  2023-02-15  0:04   ` [PATCH 13/7] loongson: " Matthew Wilcox (Oracle)
@ 2023-02-15 13:26   ` Catalin Marinas
  2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
  5 siblings, 0 replies; 37+ messages in thread
From: Catalin Marinas @ 2023-02-15 13:26 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-arm-kernel, Will Deacon, linux-arch

On Wed, Feb 15, 2023 at 12:04:42AM +0000, Matthew Wilcox wrote:
> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
> 
> The PG_dcache_clear flag changes from being a per-page bit to being a
> per-folio bit.

Nit: s/PG_dcache_clear/PG_dcache_clean/

I should do the same with PG_mte_tagged bit (I already started but got
distracted by other things).

For this patch:

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* RE: [PATCH 12/7] hexagon: Implement the new page table range API
  2023-02-15  0:04   ` [PATCH 12/7] hexagon: " Matthew Wilcox (Oracle)
@ 2023-02-15 16:22     ` Brian Cain
  0 siblings, 0 replies; 37+ messages in thread
From: Brian Cain @ 2023-02-15 16:22 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), linux-mm@kvack.org,
	linux-hexagon@vger.kernel.org, linux-arch@vger.kernel.org



> -----Original Message-----
> From: Matthew Wilcox (Oracle) <willy@infradead.org>
...
> 
> Add set_ptes() and update_mmu_cache_range().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  arch/hexagon/include/asm/cacheflush.h |  7 +++++--
>  arch/hexagon/include/asm/pgtable.h    | 16 ++++++++++++++--
>  2 files changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/hexagon/include/asm/cacheflush.h
> b/arch/hexagon/include/asm/cacheflush.h
> index 6eff0730e6ef..63ca314ede89 100644
> --- a/arch/hexagon/include/asm/cacheflush.h
> +++ b/arch/hexagon/include/asm/cacheflush.h
> @@ -58,12 +58,15 @@ extern void flush_cache_all_hexagon(void);
>   * clean the cache when the PTE is set.
>   *
>   */
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> -                                       unsigned long address, pte_t *ptep)
> +static inline void update_mmu_cache_range(struct vm_area_struct *vma,
> +               unsigned long address, pte_t *ptep, unsigned int nr)
>  {
>         /*  generic_ptrace_pokedata doesn't wind up here, does it?  */
>  }
> 
> +#define update_mmu_cache(vma, addr, ptep) \
> +       update_mmu_cache_range(vma, addr, ptep, 1)
> +
>  void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
>                        unsigned long vaddr, void *dst, void *src, int len);
>  #define copy_to_user_page copy_to_user_page
> diff --git a/arch/hexagon/include/asm/pgtable.h
> b/arch/hexagon/include/asm/pgtable.h
> index 59393613d086..f58f1d920769 100644
> --- a/arch/hexagon/include/asm/pgtable.h
> +++ b/arch/hexagon/include/asm/pgtable.h
> @@ -346,12 +346,24 @@ static inline int pte_exec(pte_t pte)
>  #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
> 
>  /*
> - * set_pte_at - update page table and do whatever magic may be
> + * set_ptes - update page table and do whatever magic may be
>   * necessary to make the underlying hardware/firmware take note.
>   *
>   * VM may require a virtual instruction to alert the MMU.
>   */
> -#define set_pte_at(mm, addr, ptep, pte) set_pte(ptep, pte)
> +static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
> +               pte_t *ptep, pte_t pte, unsigned int nr)
> +{
> +       for (;;) {
> +               set_pte(ptep, pte);
> +               if (--nr == 0)
> +                       break;
> +               ptep++;
> +               pte_val(pte) += PAGE_SIZE;
> +       }
> +}
> +
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
> 
>  static inline unsigned long pmd_page_vaddr(pmd_t pmd)
>  {
> --
> 2.39.1

Acked-by: Brian Cain <bcain@quicinc.com>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 14/17] ia64: Implement the new page table range API
  2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
                     ` (4 preceding siblings ...)
  2023-02-15 13:26   ` [PATCH 9/7] arm64: " Catalin Marinas
@ 2023-02-15 20:09   ` Matthew Wilcox (Oracle)
  2023-02-15 20:09     ` [PATCH 15/17] m68k: " Matthew Wilcox (Oracle)
                       ` (2 more replies)
  5 siblings, 3 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15 20:09 UTC (permalink / raw)
  To: linux-mm, linux-ia64, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
PG_arch_1 (aka PG_dcache_clean) becomes a per-folio flag instead of
per-page, which makes arch_dma_mark_clean() and mark_clean() a little
more exciting.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/ia64/hp/common/sba_iommu.c    | 26 +++++++++++++++-----------
 arch/ia64/include/asm/cacheflush.h | 14 ++++++++++----
 arch/ia64/include/asm/pgtable.h    | 14 +++++++++++++-
 arch/ia64/mm/init.c                | 29 +++++++++++++++++++----------
 4 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 8ad6946521d8..48d475f10003 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -798,22 +798,26 @@ sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
 #endif
 
 #ifdef ENABLE_MARK_CLEAN
-/**
+/*
  * Since DMA is i-cache coherent, any (complete) pages that were written via
  * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
  * flush them when they get mapped into an executable vm-area.
  */
-static void
-mark_clean (void *addr, size_t size)
+static void mark_clean(void *addr, size_t size)
 {
-	unsigned long pg_addr, end;
-
-	pg_addr = PAGE_ALIGN((unsigned long) addr);
-	end = (unsigned long) addr + size;
-	while (pg_addr + PAGE_SIZE <= end) {
-		struct page *page = virt_to_page((void *)pg_addr);
-		set_bit(PG_arch_1, &page->flags);
-		pg_addr += PAGE_SIZE;
+	struct folio *folio = virt_to_folio(addr);
+	ssize_t left = size;
+	size_t offset = offset_in_folio(folio, addr);
+
+	if (offset) {
+		left -= folio_size(folio) - offset;
+		folio = folio_next(folio);
+	}
+
+	while (left >= folio_size(folio)) {
+		set_bit(PG_arch_1, &folio->flags);
+		left -= folio_size(folio);
+		folio = folio_next(folio);
 	}
 }
 #endif
diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h
index 708c0fa5d975..eac493fa9e0d 100644
--- a/arch/ia64/include/asm/cacheflush.h
+++ b/arch/ia64/include/asm/cacheflush.h
@@ -13,10 +13,16 @@
 #include <asm/page.h>
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-#define flush_dcache_page(page)			\
-do {						\
-	clear_bit(PG_arch_1, &(page)->flags);	\
-} while (0)
+static inline void flush_dcache_folio(struct folio *folio)
+{
+	clear_bit(PG_arch_1, &folio->flags);
+}
+#define flush_dcache_folio flush_dcache_folio
+
+static inline void flush_dcache_page(struct page *page)
+{
+	flush_dcache_folio(page_folio(page));
+}
 
 extern void flush_icache_range(unsigned long start, unsigned long end);
 #define flush_icache_range flush_icache_range
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 21c97e31a28a..0c2be4ea664b 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -303,7 +303,18 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 	*ptep = pteval;
 }
 
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += PAGE_SIZE;
+	}
+}
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, add, ptep, pte, 1)
 
 /*
  * Make page protection values cacheable, uncacheable, or write-
@@ -396,6 +407,7 @@ pte_same (pte_t a, pte_t b)
 	return pte_val(a) == pte_val(b);
 }
 
+#define update_mmu_cache_range(vma, address, ptep, nr) do { } while (0)
 #define update_mmu_cache(vma, address, ptep) do { } while (0)
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 7f5353e28516..12aef25944aa 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -50,30 +50,39 @@ void
 __ia64_sync_icache_dcache (pte_t pte)
 {
 	unsigned long addr;
-	struct page *page;
+	struct folio *folio;
 
-	page = pte_page(pte);
-	addr = (unsigned long) page_address(page);
+	folio = page_folio(pte_page(pte));
+	addr = (unsigned long)folio_address(folio);
 
-	if (test_bit(PG_arch_1, &page->flags))
+	if (test_bit(PG_arch_1, &folio->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	flush_icache_range(addr, addr + page_size(page));
-	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
+	flush_icache_range(addr, addr + folio_size(folio));
+	set_bit(PG_arch_1, &folio->flags);	/* mark page as clean */
 }
 
 /*
- * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * Since DMA is i-cache coherent, any (complete) folios that were written via
  * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
  * flush them when they get mapped into an executable vm-area.
  */
 void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
 {
-	unsigned long pfn = PHYS_PFN(paddr);
+	struct folio *folio = page_folio(phys_to_page(paddr));
+	ssize_t left = size;
+	size_t offset = offset_in_folio(folio, paddr);
 
-	do {
+	if (offset) {
+		left -= folio_size(folio) - offset;
+		folio = folio_next(folio);
+	}
+
+	while (left >= (ssize_t)folio_size(folio)) {
 		set_bit(PG_arch_1, &pfn_to_page(pfn)->flags);
-	} while (++pfn <= PHYS_PFN(paddr + size - 1));
+		left -= folio_size(folio);
+		folio = folio_next(folio);
+	}
 }
 
 inline void
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 15/17] m68k: Implement the new page table range API
  2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
@ 2023-02-15 20:09     ` Matthew Wilcox (Oracle)
  2023-02-16  0:59       ` Michael Schmitz
  2023-02-15 20:09     ` [PATCH 16/17] microblaze: " Matthew Wilcox (Oracle)
  2023-02-15 20:09     ` [PATCH 17/17] mips: " Matthew Wilcox (Oracle)
  2 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15 20:09 UTC (permalink / raw)
  To: linux-mm, linux-m68k, Geert Uytterhoeven, linux-arch
  Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
flush_dcache_folio().  I'm not entirely certain that the 040/060 case
in __flush_pages_to_ram() is correct.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/m68k/include/asm/cacheflush_mm.h | 12 ++++++++----
 arch/m68k/include/asm/pgtable_mm.h    | 21 ++++++++++++++++++---
 arch/m68k/mm/motorola.c               |  2 +-
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 1ac55e7b47f0..2244c35178d0 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -220,13 +220,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm
 
 /* Push the page at kernel virtual address and clear the icache */
 /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
-static inline void __flush_page_to_ram(void *vaddr)
+static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
 {
 	if (CPU_IS_COLDFIRE) {
 		unsigned long addr, start, end;
 		addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1);
 		start = addr & ICACHE_SET_MASK;
-		end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK;
+		end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK;
 		if (start > end) {
 			flush_cf_bcache(0, end);
 			end = ICACHE_MAX_ADDR;
@@ -249,10 +249,14 @@ static inline void __flush_page_to_ram(void *vaddr)
 }
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-#define flush_dcache_page(page)		__flush_page_to_ram(page_address(page))
+#define flush_dcache_page(page)	__flush_pages_to_ram(page_address(page), 1)
+#define flush_dcache_folio(folio)		\
+	__flush_pages_to_ram(folio_address(folio), folio_nr_pages(folio))
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-#define flush_icache_page(vma, page)	__flush_page_to_ram(page_address(page))
+#define flush_icache_pages(vma, page, nr)	\
+	__flush_pages_to_ram(page_address(page), nr)
+#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
 
 extern void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
 				    unsigned long addr, int len);
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index b93c41fe2067..400206c17c97 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -31,8 +31,20 @@
 	do{							\
 		*(pteptr) = (pteval);				\
 	} while(0)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += PAGE_SIZE;
+	}
+}
+
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 /* PMD_SHIFT determines the size of the area a second-level page table can map */
 #if CONFIG_PGTABLE_LEVELS == 3
@@ -138,11 +150,14 @@ extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
  * tables contain all the necessary information.  The Sun3 does, but
  * they are updated on demand.
  */
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-				    unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
 {
 }
 
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
+
 #endif /* !__ASSEMBLY__ */
 
 /* MMU-specific headers */
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 2a375637e007..7784d0fcdf6e 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -81,7 +81,7 @@ static inline void cache_page(void *vaddr)
 
 void mmu_page_ctor(void *page)
 {
-	__flush_page_to_ram(page);
+	__flush_pages_to_ram(page, 1);
 	flush_tlb_kernel_page(page);
 	nocache_page(page);
 }
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 16/17] microblaze: Implement the new page table range API
  2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
  2023-02-15 20:09     ` [PATCH 15/17] m68k: " Matthew Wilcox (Oracle)
@ 2023-02-15 20:09     ` Matthew Wilcox (Oracle)
  2023-02-15 20:09     ` [PATCH 17/17] mips: " Matthew Wilcox (Oracle)
  2 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15 20:09 UTC (permalink / raw)
  To: linux-mm, Michal Simek, linux-arch; +Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
flush_dcache_folio().  Also change the calling convention for set_pte()
to be the same as other architectures.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/microblaze/include/asm/cacheflush.h |  8 ++++++++
 arch/microblaze/include/asm/pgtable.h    | 17 ++++++++++++-----
 arch/microblaze/include/asm/tlbflush.h   |  4 +++-
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h
index 39f8fb6768d8..e6641ff98cb3 100644
--- a/arch/microblaze/include/asm/cacheflush.h
+++ b/arch/microblaze/include/asm/cacheflush.h
@@ -74,6 +74,14 @@ do { \
 	flush_dcache_range((unsigned) (addr), (unsigned) (addr) + PAGE_SIZE); \
 } while (0);
 
+static void flush_dcache_folio(struct folio *folio)
+{
+	unsigned long addr = folio_pfn(folio) << PAGE_SHIFT;
+
+	flush_dcache_range(addr, addr + folio_size(folio));
+}
+#define flush_dcache_folio flush_dcache_folio
+
 #define flush_cache_page(vma, vmaddr, pfn) \
 	flush_dcache_range(pfn << PAGE_SHIFT, (pfn << PAGE_SHIFT) + PAGE_SIZE);
 
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index d1b8272abcd9..a01e1369b486 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -330,18 +330,25 @@ static inline unsigned long pte_update(pte_t *p, unsigned long clr,
 /*
  * set_pte stores a linux PTE into the linux page table.
  */
-static inline void set_pte(struct mm_struct *mm, unsigned long addr,
-		pte_t *ptep, pte_t pte)
+static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-		pte_t *ptep, pte_t pte)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
 {
-	*ptep = pte;
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += 1 << PFN_SHIFT_OFFSET;
+	}
 }
 
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 		unsigned long address, pte_t *ptep)
diff --git a/arch/microblaze/include/asm/tlbflush.h b/arch/microblaze/include/asm/tlbflush.h
index 2038168ed128..1b179e5e9062 100644
--- a/arch/microblaze/include/asm/tlbflush.h
+++ b/arch/microblaze/include/asm/tlbflush.h
@@ -33,7 +33,9 @@ static inline void local_flush_tlb_range(struct vm_area_struct *vma,
 
 #define flush_tlb_kernel_range(start, end)	do { } while (0)
 
-#define update_mmu_cache(vma, addr, ptep)	do { } while (0)
+#define update_mmu_cache_range(vma, addr, ptep, nr)	do { } while (0)
+#define update_mmu_cache(vma, addr, pte) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 
 #define flush_tlb_all local_flush_tlb_all
 #define flush_tlb_mm local_flush_tlb_mm
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 17/17] mips: Implement the new page table range API
  2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
  2023-02-15 20:09     ` [PATCH 15/17] m68k: " Matthew Wilcox (Oracle)
  2023-02-15 20:09     ` [PATCH 16/17] microblaze: " Matthew Wilcox (Oracle)
@ 2023-02-15 20:09     ` Matthew Wilcox (Oracle)
  2 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-02-15 20:09 UTC (permalink / raw)
  To: linux-mm, linux-mips, Thomas Bogendoerfer, linux-arch
  Cc: Matthew Wilcox (Oracle)

Add set_ptes(), update_mmu_cache_range(), flush_icache_pages()
and flush_dcache_folio().  PG_arch_1 (aka PG_dcache_dirty) becomes a
per-folio flag instead of per-page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/mips/include/asm/cacheflush.h | 32 +++++++++++------
 arch/mips/mm/c-r4k.c               |  5 +--
 arch/mips/mm/cache.c               | 56 +++++++++++++++---------------
 arch/mips/mm/init.c                | 17 +++++----
 4 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index b3dc9c589442..2683cade42ef 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -36,12 +36,12 @@
  */
 #define PG_dcache_dirty			PG_arch_1
 
-#define Page_dcache_dirty(page)		\
-	test_bit(PG_dcache_dirty, &(page)->flags)
-#define SetPageDcacheDirty(page)	\
-	set_bit(PG_dcache_dirty, &(page)->flags)
-#define ClearPageDcacheDirty(page)	\
-	clear_bit(PG_dcache_dirty, &(page)->flags)
+#define folio_test_dcache_dirty(folio)		\
+	test_bit(PG_dcache_dirty, &(folio)->flags)
+#define folio_set_dcache_dirty(folio)	\
+	set_bit(PG_dcache_dirty, &(folio)->flags)
+#define folio_clear_dcache_dirty(folio)	\
+	clear_bit(PG_dcache_dirty, &(folio)->flags)
 
 extern void (*flush_cache_all)(void);
 extern void (*__flush_cache_all)(void);
@@ -50,15 +50,24 @@ extern void (*flush_cache_mm)(struct mm_struct *mm);
 extern void (*flush_cache_range)(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end);
 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
-extern void __flush_dcache_page(struct page *page);
+extern void __flush_dcache_pages(struct page *page, unsigned int nr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+static inline void flush_dcache_folio(struct folio *folio)
+{
+	if (cpu_has_dc_aliases)
+		__flush_dcache_pages(&folio->page, folio_nr_pages(folio));
+	else if (!cpu_has_ic_fills_f_dc)
+		folio_set_dcache_dirty(folio);
+}
+#define flush_dcache_folio flush_dcache_folio
+
 static inline void flush_dcache_page(struct page *page)
 {
 	if (cpu_has_dc_aliases)
-		__flush_dcache_page(page);
+		__flush_dcache_pages(page, 1);
 	else if (!cpu_has_ic_fills_f_dc)
-		SetPageDcacheDirty(page);
+		folio_set_dcache_dirty(page_folio(page));
 }
 
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
@@ -73,10 +82,11 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 		__flush_anon_page(page, vmaddr);
 }
 
-static inline void flush_icache_page(struct vm_area_struct *vma,
-	struct page *page)
+static inline void flush_icache_pages(struct vm_area_struct *vma,
+		struct page *page, unsigned int nr)
 {
 }
+#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
 
 extern void (*flush_icache_range)(unsigned long start, unsigned long end);
 extern void (*local_flush_icache_range)(unsigned long start, unsigned long end);
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index a549fa98c2f4..7d2a42f0cffd 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -679,13 +679,14 @@ static inline void local_r4k_flush_cache_page(void *args)
 	if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID))
 		vaddr = NULL;
 	else {
+		struct folio *folio = page_folio(page);
 		/*
 		 * Use kmap_coherent or kmap_atomic to do flushes for
 		 * another ASID than the current one.
 		 */
 		map_coherent = (cpu_has_dc_aliases &&
-				page_mapcount(page) &&
-				!Page_dcache_dirty(page));
+				folio_mapped(folio) &&
+				!folio_test_dcache_dirty(folio));
 		if (map_coherent)
 			vaddr = kmap_coherent(page, addr);
 		else
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 11b3e7ddafd5..0668435521fc 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -82,13 +82,15 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
 	return 0;
 }
 
-void __flush_dcache_page(struct page *page)
+void __flush_dcache_pages(struct page *page, unsigned int nr)
 {
-	struct address_space *mapping = page_mapping_file(page);
+	struct folio *folio = page_folio(page);
+	struct address_space *mapping = folio_flush_mapping(folio);
 	unsigned long addr;
+	unsigned int i;
 
 	if (mapping && !mapping_mapped(mapping)) {
-		SetPageDcacheDirty(page);
+		folio_set_dcache_dirty(folio);
 		return;
 	}
 
@@ -97,25 +99,21 @@ void __flush_dcache_page(struct page *page)
 	 * case is for exec env/arg pages and those are %99 certainly going to
 	 * get faulted into the tlb (and thus flushed) anyways.
 	 */
-	if (PageHighMem(page))
-		addr = (unsigned long)kmap_atomic(page);
-	else
-		addr = (unsigned long)page_address(page);
-
-	flush_data_cache_page(addr);
-
-	if (PageHighMem(page))
-		kunmap_atomic((void *)addr);
+	for (i = 0; i < nr; i++) {
+		addr = (unsigned long)kmap_local_page(page + i);
+		flush_data_cache_page(addr);
+		kunmap_local((void *)addr);
+	}
 }
-
-EXPORT_SYMBOL(__flush_dcache_page);
+EXPORT_SYMBOL(__flush_dcache_pages);
 
 void __flush_anon_page(struct page *page, unsigned long vmaddr)
 {
 	unsigned long addr = (unsigned long) page_address(page);
+	struct folio *folio = page_folio(page);
 
 	if (pages_do_alias(addr, vmaddr)) {
-		if (page_mapcount(page) && !Page_dcache_dirty(page)) {
+		if (folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
 			void *kaddr;
 
 			kaddr = kmap_coherent(page, vmaddr);
@@ -130,27 +128,29 @@ EXPORT_SYMBOL(__flush_anon_page);
 
 void __update_cache(unsigned long address, pte_t pte)
 {
-	struct page *page;
+	struct folio *folio;
 	unsigned long pfn, addr;
 	int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
+	unsigned int i;
 
 	pfn = pte_pfn(pte);
 	if (unlikely(!pfn_valid(pfn)))
 		return;
-	page = pfn_to_page(pfn);
-	if (Page_dcache_dirty(page)) {
-		if (PageHighMem(page))
-			addr = (unsigned long)kmap_atomic(page);
-		else
-			addr = (unsigned long)page_address(page);
-
-		if (exec || pages_do_alias(addr, address & PAGE_MASK))
-			flush_data_cache_page(addr);
 
-		if (PageHighMem(page))
-			kunmap_atomic((void *)addr);
+	folio = page_folio(pfn_to_page(pfn));
+	address &= PAGE_MASK;
+	address -= offset_in_folio(folio, pfn << PAGE_SHIFT);
+
+	if (folio_test_dcache_dirty(folio)) {
+		for (i = 0; i < folio_nr_pages(folio); i++) {
+			addr = (unsigned long)kmap_local_folio(folio, i);
 
-		ClearPageDcacheDirty(page);
+			if (exec || pages_do_alias(addr, address))
+				flush_data_cache_page(addr);
+			kunmap_local((void *)addr);
+			address += PAGE_SIZE;
+		}
+		folio_clear_dcache_dirty(folio);
 	}
 }
 
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 5a8002839550..19d4ca3b3fbd 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -88,7 +88,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
 	pte_t pte;
 	int tlbidx;
 
-	BUG_ON(Page_dcache_dirty(page));
+	BUG_ON(folio_test_dcache_dirty(page_folio(page)));
 
 	preempt_disable();
 	pagefault_disable();
@@ -169,11 +169,12 @@ void kunmap_coherent(void)
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long vaddr, struct vm_area_struct *vma)
 {
+	struct folio *src = page_folio(from);
 	void *vfrom, *vto;
 
 	vto = kmap_atomic(to);
 	if (cpu_has_dc_aliases &&
-	    page_mapcount(from) && !Page_dcache_dirty(from)) {
+	    folio_mapped(src) && !folio_test_dcache_dirty(src)) {
 		vfrom = kmap_coherent(from, vaddr);
 		copy_page(vto, vfrom);
 		kunmap_coherent();
@@ -194,15 +195,17 @@ void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len)
 {
+	struct folio *folio = page_folio(page);
+
 	if (cpu_has_dc_aliases &&
-	    page_mapcount(page) && !Page_dcache_dirty(page)) {
+	    folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
 		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
 		memcpy(vto, src, len);
 		kunmap_coherent();
 	} else {
 		memcpy(dst, src, len);
 		if (cpu_has_dc_aliases)
-			SetPageDcacheDirty(page);
+			folio_set_dcache_dirty(folio);
 	}
 	if (vma->vm_flags & VM_EXEC)
 		flush_cache_page(vma, vaddr, page_to_pfn(page));
@@ -212,15 +215,17 @@ void copy_from_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len)
 {
+	struct folio *folio = page_folio(page);
+
 	if (cpu_has_dc_aliases &&
-	    page_mapcount(page) && !Page_dcache_dirty(page)) {
+	    folio_mapped(folio) && !folio_test_dcache_dirty(folio)) {
 		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
 		memcpy(dst, vfrom, len);
 		kunmap_coherent();
 	} else {
 		memcpy(dst, src, len);
 		if (cpu_has_dc_aliases)
-			SetPageDcacheDirty(page);
+			folio_set_dcache_dirty(folio);
 	}
 }
 EXPORT_SYMBOL_GPL(copy_from_user_page);
-- 
2.39.1



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 15/17] m68k: Implement the new page table range API
  2023-02-15 20:09     ` [PATCH 15/17] m68k: " Matthew Wilcox (Oracle)
@ 2023-02-16  0:59       ` Michael Schmitz
  2023-02-16  4:26         ` Matthew Wilcox
  0 siblings, 1 reply; 37+ messages in thread
From: Michael Schmitz @ 2023-02-16  0:59 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), linux-mm, linux-m68k, Geert Uytterhoeven,
	linux-arch

Matthew,

On 16/02/23 09:09, Matthew Wilcox (Oracle) wrote:
> Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
> flush_dcache_folio().  I'm not entirely certain that the 040/060 case
> in __flush_pages_to_ram() is correct.

I'm pretty sure you need to iterate to hit each of the pages - the code 
as is will only push cache entries for the first page.

Quoting the 040 UM:

"Both instructions [cinv, cpush] allow operation on a single cache line, 
all cache lines in a specific page, or an entire cache, and can select 
one or both caches for the operation. For line and page operations, a 
physical address in an address register specifies the memory address."

Cheers,

     Michael


> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   arch/m68k/include/asm/cacheflush_mm.h | 12 ++++++++----
>   arch/m68k/include/asm/pgtable_mm.h    | 21 ++++++++++++++++++---
>   arch/m68k/mm/motorola.c               |  2 +-
>   3 files changed, 27 insertions(+), 8 deletions(-)
>
> diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
> index 1ac55e7b47f0..2244c35178d0 100644
> --- a/arch/m68k/include/asm/cacheflush_mm.h
> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -220,13 +220,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm
>   
>   /* Push the page at kernel virtual address and clear the icache */
>   /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
> -static inline void __flush_page_to_ram(void *vaddr)
> +static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
>   {
>   	if (CPU_IS_COLDFIRE) {
>   		unsigned long addr, start, end;
>   		addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1);
>   		start = addr & ICACHE_SET_MASK;
> -		end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK;
> +		end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK;
>   		if (start > end) {
>   			flush_cf_bcache(0, end);
>   			end = ICACHE_MAX_ADDR;
> @@ -249,10 +249,14 @@ static inline void __flush_page_to_ram(void *vaddr)
>   }
>   
>   #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
> -#define flush_dcache_page(page)		__flush_page_to_ram(page_address(page))
> +#define flush_dcache_page(page)	__flush_pages_to_ram(page_address(page), 1)
> +#define flush_dcache_folio(folio)		\
> +	__flush_pages_to_ram(folio_address(folio), folio_nr_pages(folio))
>   #define flush_dcache_mmap_lock(mapping)		do { } while (0)
>   #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
> -#define flush_icache_page(vma, page)	__flush_page_to_ram(page_address(page))
> +#define flush_icache_pages(vma, page, nr)	\
> +	__flush_pages_to_ram(page_address(page), nr)
> +#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
>   
>   extern void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
>   				    unsigned long addr, int len);
> diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
> index b93c41fe2067..400206c17c97 100644
> --- a/arch/m68k/include/asm/pgtable_mm.h
> +++ b/arch/m68k/include/asm/pgtable_mm.h
> @@ -31,8 +31,20 @@
>   	do{							\
>   		*(pteptr) = (pteval);				\
>   	} while(0)
> -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
>   
> +static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
> +		pte_t *ptep, pte_t pte, unsigned int nr)
> +{
> +	for (;;) {
> +		set_pte(ptep, pte);
> +		if (--nr == 0)
> +			break;
> +		ptep++;
> +		pte_val(pte) += PAGE_SIZE;
> +	}
> +}
> +
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
>   
>   /* PMD_SHIFT determines the size of the area a second-level page table can map */
>   #if CONFIG_PGTABLE_LEVELS == 3
> @@ -138,11 +150,14 @@ extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
>    * tables contain all the necessary information.  The Sun3 does, but
>    * they are updated on demand.
>    */
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> -				    unsigned long address, pte_t *ptep)
> +static inline void update_mmu_cache_range(struct vm_area_struct *vma,
> +		unsigned long address, pte_t *ptep, unsigned int nr)
>   {
>   }
>   
> +#define update_mmu_cache(vma, addr, ptep) \
> +	update_mmu_cache_range(vma, addr, ptep, 1)
> +
>   #endif /* !__ASSEMBLY__ */
>   
>   /* MMU-specific headers */
> diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
> index 2a375637e007..7784d0fcdf6e 100644
> --- a/arch/m68k/mm/motorola.c
> +++ b/arch/m68k/mm/motorola.c
> @@ -81,7 +81,7 @@ static inline void cache_page(void *vaddr)
>   
>   void mmu_page_ctor(void *page)
>   {
> -	__flush_page_to_ram(page);
> +	__flush_pages_to_ram(page, 1);
>   	flush_tlb_kernel_page(page);
>   	nocache_page(page);
>   }


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 15/17] m68k: Implement the new page table range API
  2023-02-16  0:59       ` Michael Schmitz
@ 2023-02-16  4:26         ` Matthew Wilcox
  2023-02-16  7:55           ` Geert Uytterhoeven
  2023-02-16 22:03           ` Michael Schmitz
  0 siblings, 2 replies; 37+ messages in thread
From: Matthew Wilcox @ 2023-02-16  4:26 UTC (permalink / raw)
  To: Michael Schmitz; +Cc: linux-mm, linux-m68k, Geert Uytterhoeven, linux-arch

On Thu, Feb 16, 2023 at 01:59:44PM +1300, Michael Schmitz wrote:
> Matthew,
> 
> On 16/02/23 09:09, Matthew Wilcox (Oracle) wrote:
> > Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
> > flush_dcache_folio().  I'm not entirely certain that the 040/060 case
> > in __flush_pages_to_ram() is correct.
> 
> I'm pretty sure you need to iterate to hit each of the pages - the code as
> is will only push cache entries for the first page.
> 
> Quoting the 040 UM:
> 
> "Both instructions [cinv, cpush] allow operation on a single cache line, all
> cache lines in a specific page, or an entire cache, and can select one or
> both caches for the operation. For line and page operations, a physical
> address in an address register specifies the memory address."

I actually found that!  What I didn't find was how to tell if this
cpush insn is the one which is operating on a single cache line,
a single page, or the entire cache.

So I should do a loop around this asm and call it once for each page
we're flushing?



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 15/17] m68k: Implement the new page table range API
  2023-02-16  4:26         ` Matthew Wilcox
@ 2023-02-16  7:55           ` Geert Uytterhoeven
  2023-02-16 22:03           ` Michael Schmitz
  1 sibling, 0 replies; 37+ messages in thread
From: Geert Uytterhoeven @ 2023-02-16  7:55 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Michael Schmitz, linux-mm, linux-m68k, linux-arch

Hi Matthew,

On Thu, Feb 16, 2023 at 5:26 AM Matthew Wilcox <willy@infradead.org> wrote:
> On Thu, Feb 16, 2023 at 01:59:44PM +1300, Michael Schmitz wrote:
> > On 16/02/23 09:09, Matthew Wilcox (Oracle) wrote:
> > > Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
> > > flush_dcache_folio().  I'm not entirely certain that the 040/060 case
> > > in __flush_pages_to_ram() is correct.
> >
> > I'm pretty sure you need to iterate to hit each of the pages - the code as
> > is will only push cache entries for the first page.
> >
> > Quoting the 040 UM:
> >
> > "Both instructions [cinv, cpush] allow operation on a single cache line, all
> > cache lines in a specific page, or an entire cache, and can select one or
> > both caches for the operation. For line and page operations, a physical
> > address in an address register specifies the memory address."
>
> I actually found that!  What I didn't find was how to tell if this
> cpush insn is the one which is operating on a single cache line,
> a single page, or the entire cache.

cpushl (line), cpushp (page), cpusha (all).
Same for cinv.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 10/7] riscv: Implement the new page table range API
  2023-02-15  8:38     ` Yin, Fengwei
  2023-02-15 12:27       ` Yin, Fengwei
@ 2023-02-16  8:14       ` Alexandre Ghiti
  2023-02-16 13:27         ` Yin, Fengwei
  1 sibling, 1 reply; 37+ messages in thread
From: Alexandre Ghiti @ 2023-02-16  8:14 UTC (permalink / raw)
  To: Yin, Fengwei, linux-riscv@lists.infradead.org, linux-mm@kvack.org,
	paul.walmsley@sifive.com, aou@eecs.berkeley.edu,
	palmer@dabbelt.com, willy@infradead.org,
	linux-arch@vger.kernel.org

Hi Yin,

On 2/15/23 09:38, Yin, Fengwei wrote:
> On Wed, 2023-02-15 at 00:04 +0000, Matthew Wilcox (Oracle) wrote:
>> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
>>
>> The PG_dcache_clear flag changes from being a per-page bit to being a
>> per-folio bit.
>>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>   arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>>   arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>>   arch/riscv/mm/cacheflush.c          | 11 ++---------
>>   3 files changed, 29 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cacheflush.h
>> b/arch/riscv/include/asm/cacheflush.h
>> index 03e3b95ae6da..10e5e96f09b5 100644
>> --- a/arch/riscv/include/asm/cacheflush.h
>> +++ b/arch/riscv/include/asm/cacheflush.h
>> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>>   
>>   #define PG_dcache_clean PG_arch_1
>>   
>> -static inline void flush_dcache_page(struct page *page)
>> +static inline void flush_dcache_folio(struct folio *folio)
>>   {
>> -       /*
>> -        * HugeTLB pages are always fully mapped and only head page
>> will be
>> -        * set PG_dcache_clean (see comments in flush_icache_pte()).
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (test_bit(PG_dcache_clean, &page->flags))
>> -               clear_bit(PG_dcache_clean, &page->flags);
>> +       if (test_bit(PG_dcache_clean, &folio->flags))
>> +               clear_bit(PG_dcache_clean, &folio->flags);
>>   }
>> +#define flush_dcache_folio flush_dcache_folio
>>   #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>>   
>> +static inline void flush_dcache_page(struct page *page)
>> +{
>> +       flush_dcache_folio(page_folio(page));
>> +}
>> +
>>   /*
>>    * RISC-V doesn't have an instruction to flush parts of the
>> instruction cache,
>>    * so instead we just flush the whole thing.
>> diff --git a/arch/riscv/include/asm/pgtable.h
>> b/arch/riscv/include/asm/pgtable.h
>> index 13222fd5c4b4..03706c833e70 100644
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte,
>> pgprot_t newprot)
>>   
>>   
>>   /* Commit new configuration to MMU hardware */
>> -static inline void update_mmu_cache(struct vm_area_struct *vma,
>> -       unsigned long address, pte_t *ptep)
>> +static inline void update_mmu_cache_range(struct vm_area_struct
>> *vma,
>> +               unsigned long address, pte_t *ptep, unsigned int nr)
>>   {
>>          /*
>>           * The kernel assumes that TLBs don't cache invalid entries,
>> but
>> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct
>> vm_area_struct *vma,
>>           * Relying on flush_tlb_fix_spurious_fault would suffice, but
>>           * the extra traps reduce performance.  So, eagerly
>> SFENCE.VMA.
>>           */
>> -       flush_tlb_page(vma, address);
>> +       flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
> The flush_tlb_range() of riscv is a little bit strange to me. It gives
> __sbi_tlb_flush_range() stride PAGE_SIZE. That means if (end - start)
> is larger than stride, it will trigger flush_tlb_all().
>
> So this change could trigger flush_tlb_all() while original
> flush_tlb_page() just trigger flush_tlb_page().


Maybe I'm missing something but update_mmu_cache behaviour is not 
changed here, it will always call flush_tlb_page as nr == 1, right?

update_mmu_cache_range though will likely call flush_tlb_all: I have to 
admit that I'm wondering why we don't only flush the range of pages 
instead of flushing everything, I'll look into that.

Alex


>
> My understanding is flush_tlb_page() should be better because
> flush_pmd_tlb_range() has PMD_SIZE as stride to avoid flush_tlb_all().
> I must miss something here.
>
> Regards
> Yin, Fengwei
>
>>   }
>> +#define update_mmu_cache(vma, addr, ptep) \
>> +       update_mmu_cache_range(vma, addr, ptep, 1)
>>   
>>   #define __HAVE_ARCH_UPDATE_MMU_TLB
>>   #define update_mmu_tlb update_mmu_cache
>> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct
>> mm_struct *mm,
>>          set_pte(ptep, pteval);
>>   }
>>   
>> -static inline void set_pte_at(struct mm_struct *mm,
>> -       unsigned long addr, pte_t *ptep, pte_t pteval)
>> +static inline void set_ptes(struct mm_struct *mm, unsigned long
>> addr,
>> +               pte_t *ptep, pte_t pteval, unsigned int nr)
>>   {
>> -       page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
>> -       __set_pte_at(mm, addr, ptep, pteval);
>> +       page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
>> +
>> +       for (;;) {
>> +               __set_pte_at(mm, addr, ptep, pteval);
>> +               if (--nr == 0)
>> +                       break;
>> +               ptep++;
>> +               addr += PAGE_SIZE;
>> +               pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
>> +       }
>>   }
>> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
>> pte, 1)
>>   
>>   static inline void pte_clear(struct mm_struct *mm,
>>          unsigned long addr, pte_t *ptep)
>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> index 3cc07ed45aeb..b725c3f6f57f 100644
>> --- a/arch/riscv/mm/cacheflush.c
>> +++ b/arch/riscv/mm/cacheflush.c
>> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool
>> local)
>>   #ifdef CONFIG_MMU
>>   void flush_icache_pte(pte_t pte)
>>   {
>> -       struct page *page = pte_page(pte);
>> +       struct folio *folio = page_folio(pte_page(pte));
>>   
>> -       /*
>> -        * HugeTLB pages are always fully mapped, so only setting
>> head page's
>> -        * PG_dcache_clean flag is enough.
>> -        */
>> -       if (PageHuge(page))
>> -               page = compound_head(page);
>> -
>> -       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
>> +       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>>                  flush_icache_all();
>>   }
>>   #endif /* CONFIG_MMU */


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 10/7] riscv: Implement the new page table range API
  2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
  2023-02-15  8:38     ` Yin, Fengwei
@ 2023-02-16  8:16     ` Alexandre Ghiti
  1 sibling, 0 replies; 37+ messages in thread
From: Alexandre Ghiti @ 2023-02-16  8:16 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), linux-mm, linux-riscv, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, linux-arch

Hi Matthew,

On 2/15/23 01:04, Matthew Wilcox (Oracle) wrote:
> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
>
> The PG_dcache_clear flag changes from being a per-page bit to being a
> per-folio bit.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>   arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>   arch/riscv/mm/cacheflush.c          | 11 ++---------
>   3 files changed, 29 insertions(+), 26 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
> index 03e3b95ae6da..10e5e96f09b5 100644
> --- a/arch/riscv/include/asm/cacheflush.h
> +++ b/arch/riscv/include/asm/cacheflush.h
> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>   
>   #define PG_dcache_clean PG_arch_1
>   
> -static inline void flush_dcache_page(struct page *page)
> +static inline void flush_dcache_folio(struct folio *folio)
>   {
> -	/*
> -	 * HugeTLB pages are always fully mapped and only head page will be
> -	 * set PG_dcache_clean (see comments in flush_icache_pte()).
> -	 */
> -	if (PageHuge(page))
> -		page = compound_head(page);
> -
> -	if (test_bit(PG_dcache_clean, &page->flags))
> -		clear_bit(PG_dcache_clean, &page->flags);
> +	if (test_bit(PG_dcache_clean, &folio->flags))
> +		clear_bit(PG_dcache_clean, &folio->flags);
>   }
> +#define flush_dcache_folio flush_dcache_folio
>   #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>   
> +static inline void flush_dcache_page(struct page *page)
> +{
> +	flush_dcache_folio(page_folio(page));
> +}
> +
>   /*
>    * RISC-V doesn't have an instruction to flush parts of the instruction cache,
>    * so instead we just flush the whole thing.
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 13222fd5c4b4..03706c833e70 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>   
>   
>   /* Commit new configuration to MMU hardware */
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> -	unsigned long address, pte_t *ptep)
> +static inline void update_mmu_cache_range(struct vm_area_struct *vma,
> +		unsigned long address, pte_t *ptep, unsigned int nr)
>   {
>   	/*
>   	 * The kernel assumes that TLBs don't cache invalid entries, but
> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>   	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
>   	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
>   	 */
> -	flush_tlb_page(vma, address);
> +	flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
>   }
> +#define update_mmu_cache(vma, addr, ptep) \
> +	update_mmu_cache_range(vma, addr, ptep, 1)
>   
>   #define __HAVE_ARCH_UPDATE_MMU_TLB
>   #define update_mmu_tlb update_mmu_cache
> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct mm_struct *mm,
>   	set_pte(ptep, pteval);
>   }
>   
> -static inline void set_pte_at(struct mm_struct *mm,
> -	unsigned long addr, pte_t *ptep, pte_t pteval)
> +static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
> +		pte_t *ptep, pte_t pteval, unsigned int nr)
>   {
> -	page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
> -	__set_pte_at(mm, addr, ptep, pteval);
> +	page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
> +
> +	for (;;) {
> +		__set_pte_at(mm, addr, ptep, pteval);
> +		if (--nr == 0)
> +			break;
> +		ptep++;
> +		addr += PAGE_SIZE;
> +		pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
> +	}
>   }
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
>   
>   static inline void pte_clear(struct mm_struct *mm,
>   	unsigned long addr, pte_t *ptep)
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 3cc07ed45aeb..b725c3f6f57f 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
>   #ifdef CONFIG_MMU
>   void flush_icache_pte(pte_t pte)
>   {
> -	struct page *page = pte_page(pte);
> +	struct folio *folio = page_folio(pte_page(pte));
>   
> -	/*
> -	 * HugeTLB pages are always fully mapped, so only setting head page's
> -	 * PG_dcache_clean flag is enough.
> -	 */
> -	if (PageHuge(page))
> -		page = compound_head(page);
> -
> -	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
> +	if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>   		flush_icache_all();
>   }
>   #endif /* CONFIG_MMU */

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks,

Alex



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 10/7] riscv: Implement the new page table range API
  2023-02-16  8:14       ` Alexandre Ghiti
@ 2023-02-16 13:27         ` Yin, Fengwei
  0 siblings, 0 replies; 37+ messages in thread
From: Yin, Fengwei @ 2023-02-16 13:27 UTC (permalink / raw)
  To: Alexandre Ghiti, linux-riscv@lists.infradead.org,
	linux-mm@kvack.org, paul.walmsley@sifive.com,
	aou@eecs.berkeley.edu, palmer@dabbelt.com, willy@infradead.org,
	linux-arch@vger.kernel.org

Hi Alex,

On 2/16/2023 4:14 PM, Alexandre Ghiti wrote:
> Hi Yin,
> 
> On 2/15/23 09:38, Yin, Fengwei wrote:
>> On Wed, 2023-02-15 at 00:04 +0000, Matthew Wilcox (Oracle) wrote:
>>> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
>>>
>>> The PG_dcache_clear flag changes from being a per-page bit to being a
>>> per-folio bit.
>>>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>> ---
>>>   arch/riscv/include/asm/cacheflush.h | 19 +++++++++----------
>>>   arch/riscv/include/asm/pgtable.h    | 25 ++++++++++++++++++-------
>>>   arch/riscv/mm/cacheflush.c          | 11 ++---------
>>>   3 files changed, 29 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/arch/riscv/include/asm/cacheflush.h
>>> b/arch/riscv/include/asm/cacheflush.h
>>> index 03e3b95ae6da..10e5e96f09b5 100644
>>> --- a/arch/riscv/include/asm/cacheflush.h
>>> +++ b/arch/riscv/include/asm/cacheflush.h
>>> @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void)
>>>     #define PG_dcache_clean PG_arch_1
>>>   -static inline void flush_dcache_page(struct page *page)
>>> +static inline void flush_dcache_folio(struct folio *folio)
>>>   {
>>> -       /*
>>> -        * HugeTLB pages are always fully mapped and only head page
>>> will be
>>> -        * set PG_dcache_clean (see comments in flush_icache_pte()).
>>> -        */
>>> -       if (PageHuge(page))
>>> -               page = compound_head(page);
>>> -
>>> -       if (test_bit(PG_dcache_clean, &page->flags))
>>> -               clear_bit(PG_dcache_clean, &page->flags);
>>> +       if (test_bit(PG_dcache_clean, &folio->flags))
>>> +               clear_bit(PG_dcache_clean, &folio->flags);
>>>   }
>>> +#define flush_dcache_folio flush_dcache_folio
>>>   #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
>>>   +static inline void flush_dcache_page(struct page *page)
>>> +{
>>> +       flush_dcache_folio(page_folio(page));
>>> +}
>>> +
>>>   /*
>>>    * RISC-V doesn't have an instruction to flush parts of the
>>> instruction cache,
>>>    * so instead we just flush the whole thing.
>>> diff --git a/arch/riscv/include/asm/pgtable.h
>>> b/arch/riscv/include/asm/pgtable.h
>>> index 13222fd5c4b4..03706c833e70 100644
>>> --- a/arch/riscv/include/asm/pgtable.h
>>> +++ b/arch/riscv/include/asm/pgtable.h
>>> @@ -405,8 +405,8 @@ static inline pte_t pte_modify(pte_t pte,
>>> pgprot_t newprot)
>>>       /* Commit new configuration to MMU hardware */
>>> -static inline void update_mmu_cache(struct vm_area_struct *vma,
>>> -       unsigned long address, pte_t *ptep)
>>> +static inline void update_mmu_cache_range(struct vm_area_struct
>>> *vma,
>>> +               unsigned long address, pte_t *ptep, unsigned int nr)
>>>   {
>>>          /*
>>>           * The kernel assumes that TLBs don't cache invalid entries,
>>> but
>>> @@ -415,8 +415,10 @@ static inline void update_mmu_cache(struct
>>> vm_area_struct *vma,
>>>           * Relying on flush_tlb_fix_spurious_fault would suffice, but
>>>           * the extra traps reduce performance.  So, eagerly
>>> SFENCE.VMA.
>>>           */
>>> -       flush_tlb_page(vma, address);
>>> +       flush_tlb_range(vma, address, address + nr * PAGE_SIZE);
>> The flush_tlb_range() of riscv is a little bit strange to me. It gives
>> __sbi_tlb_flush_range() stride PAGE_SIZE. That means if (end - start)
>> is larger than stride, it will trigger flush_tlb_all().
>>
>> So this change could trigger flush_tlb_all() while original
>> flush_tlb_page() just trigger flush_tlb_page().
> 
> 
> Maybe I'm missing something but update_mmu_cache behaviour is not changed here, it will always call flush_tlb_page as nr == 1, right?
Yes. This is my understanding too. Thanks.


Regards
Yin, Fengwei

> 
> update_mmu_cache_range though will likely call flush_tlb_all: I have to admit that I'm wondering why we don't only flush the range of pages instead of flushing everything, I'll look into that.
> 
> Alex
> 
> 
>>
>> My understanding is flush_tlb_page() should be better because
>> flush_pmd_tlb_range() has PMD_SIZE as stride to avoid flush_tlb_all().
>> I must miss something here.
>>
>> Regards
>> Yin, Fengwei
>>
>>>   }
>>> +#define update_mmu_cache(vma, addr, ptep) \
>>> +       update_mmu_cache_range(vma, addr, ptep, 1)
>>>     #define __HAVE_ARCH_UPDATE_MMU_TLB
>>>   #define update_mmu_tlb update_mmu_cache
>>> @@ -456,12 +458,21 @@ static inline void __set_pte_at(struct
>>> mm_struct *mm,
>>>          set_pte(ptep, pteval);
>>>   }
>>>   -static inline void set_pte_at(struct mm_struct *mm,
>>> -       unsigned long addr, pte_t *ptep, pte_t pteval)
>>> +static inline void set_ptes(struct mm_struct *mm, unsigned long
>>> addr,
>>> +               pte_t *ptep, pte_t pteval, unsigned int nr)
>>>   {
>>> -       page_table_check_ptes_set(mm, addr, ptep, pteval, 1);
>>> -       __set_pte_at(mm, addr, ptep, pteval);
>>> +       page_table_check_ptes_set(mm, addr, ptep, pteval, nr);
>>> +
>>> +       for (;;) {
>>> +               __set_pte_at(mm, addr, ptep, pteval);
>>> +               if (--nr == 0)
>>> +                       break;
>>> +               ptep++;
>>> +               addr += PAGE_SIZE;
>>> +               pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
>>> +       }
>>>   }
>>> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep,
>>> pte, 1)
>>>     static inline void pte_clear(struct mm_struct *mm,
>>>          unsigned long addr, pte_t *ptep)
>>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>>> index 3cc07ed45aeb..b725c3f6f57f 100644
>>> --- a/arch/riscv/mm/cacheflush.c
>>> +++ b/arch/riscv/mm/cacheflush.c
>>> @@ -81,16 +81,9 @@ void flush_icache_mm(struct mm_struct *mm, bool
>>> local)
>>>   #ifdef CONFIG_MMU
>>>   void flush_icache_pte(pte_t pte)
>>>   {
>>> -       struct page *page = pte_page(pte);
>>> +       struct folio *folio = page_folio(pte_page(pte));
>>>   -       /*
>>> -        * HugeTLB pages are always fully mapped, so only setting
>>> head page's
>>> -        * PG_dcache_clean flag is enough.
>>> -        */
>>> -       if (PageHuge(page))
>>> -               page = compound_head(page);
>>> -
>>> -       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
>>> +       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
>>>                  flush_icache_all();
>>>   }
>>>   #endif /* CONFIG_MMU */


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 15/17] m68k: Implement the new page table range API
  2023-02-16  4:26         ` Matthew Wilcox
  2023-02-16  7:55           ` Geert Uytterhoeven
@ 2023-02-16 22:03           ` Michael Schmitz
  1 sibling, 0 replies; 37+ messages in thread
From: Michael Schmitz @ 2023-02-16 22:03 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-mm, linux-m68k, Geert Uytterhoeven, linux-arch

Hi Matthew,

On 16/02/23 17:26, Matthew Wilcox wrote:
> On Thu, Feb 16, 2023 at 01:59:44PM +1300, Michael Schmitz wrote:
>> Matthew,
>>
>> On 16/02/23 09:09, Matthew Wilcox (Oracle) wrote:
>>> Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
>>> flush_dcache_folio().  I'm not entirely certain that the 040/060 case
>>> in __flush_pages_to_ram() is correct.
>> I'm pretty sure you need to iterate to hit each of the pages - the code as
>> is will only push cache entries for the first page.
>>
>> Quoting the 040 UM:
>>
>> "Both instructions [cinv, cpush] allow operation on a single cache line, all
>> cache lines in a specific page, or an entire cache, and can select one or
>> both caches for the operation. For line and page operations, a physical
>> address in an address register specifies the memory address."
> I actually found that!  What I didn't find was how to tell if this
> cpush insn is the one which is operating on a single cache line,
> a single page, or the entire cache.
>
> So I should do a loop around this asm and call it once for each page
> we're flushing?

Yes, that's the idea. I'm uncertain whether contiguous virtual pages are 
always guaranteed to have contiguous physical mappings, so no point in 
trying to 'optimize' and shift the loop into inline assembly.

Cheers,

     Michael



>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 13/7] loongson: Implement the new page table range API
  2023-02-15  0:04   ` [PATCH 13/7] loongson: " Matthew Wilcox (Oracle)
@ 2023-02-26  4:34     ` Matthew Wilcox
  2023-02-26  6:56       ` WANG Xuerui
  0 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox @ 2023-02-26  4:34 UTC (permalink / raw)
  To: linux-mm, Huacai Chen, WANG Xuerui, loongarch, linux-arch

On Wed, Feb 15, 2023 at 12:04:46AM +0000, Matthew Wilcox (Oracle) wrote:
> Add set_ptes() and update_mmu_cache_range().
> 
> THIS PATCH IS INCOMPLETE.  I DO NOT KNOW WHAT TO DO IN __update_tlb()

Help?  This is the only remaining architecture to fix; I have all the
others converted now.

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  arch/loongarch/include/asm/cacheflush.h |  2 ++
>  arch/loongarch/include/asm/pgtable.h    | 30 ++++++++++++++++---------
>  arch/loongarch/mm/tlb.c                 |  4 +++-
>  3 files changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
> index 0681788eb474..7907eb42bfbd 100644
> --- a/arch/loongarch/include/asm/cacheflush.h
> +++ b/arch/loongarch/include/asm/cacheflush.h
> @@ -47,8 +47,10 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
>  #define flush_cache_vmap(start, end)			do { } while (0)
>  #define flush_cache_vunmap(start, end)			do { } while (0)
>  #define flush_icache_page(vma, page)			do { } while (0)
> +#define flush_icache_pages(vma, page)			do { } while (0)
>  #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
>  #define flush_dcache_page(page)				do { } while (0)
> +#define flush_dcache_folio(folio)			do { } while (0)
>  #define flush_dcache_mmap_lock(mapping)			do { } while (0)
>  #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
>  
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index d28fb9dbec59..0f5fa7c40c52 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -334,12 +334,20 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
>  	}
>  }
>  
> -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> -			      pte_t *ptep, pte_t pteval)
> -{
> -	set_pte(ptep, pteval);
> +static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
> +		pte_t *ptep, pte_t pte, unsigned int nr)
> +{
> +	for (;;) {
> +		set_pte(ptep, pte);
> +		if (--nr == 0)
> +			break;
> +		ptep++;
> +		pte_val(pte) += 1 << _PFN_SHIFT;
> +	}
>  }
>  
> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
> +
>  static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>  {
>  	/* Preserve global status for the pair */
> @@ -442,14 +450,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>  		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
>  }
>  
> -extern void __update_tlb(struct vm_area_struct *vma,
> -			unsigned long address, pte_t *ptep);
> +extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
> +		pte_t *ptep, unsigned int nr);
>  
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> -			unsigned long address, pte_t *ptep)
> +static inline void update_mmu_cache_range(struct vm_area_struct *vma,
> +		unsigned long address, pte_t *ptep, unsigned int nr)
>  {
> -	__update_tlb(vma, address, ptep);
> +	__update_tlb(vma, address, ptep, nr);
>  }
> +#define update_mmu_cache(vma, addr, ptep) \
> +	update_mmu_cache_range(vma, addr, ptep, 1)
>  
>  #define __HAVE_ARCH_UPDATE_MMU_TLB
>  #define update_mmu_tlb	update_mmu_cache
> @@ -457,7 +467,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>  			unsigned long address, pmd_t *pmdp)
>  {
> -	__update_tlb(vma, address, (pte_t *)pmdp);
> +	__update_tlb(vma, address, (pte_t *)pmdp, 1);
>  }
>  
>  static inline unsigned long pmd_pfn(pmd_t pmd)
> diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
> index 8bad6b0cff59..ac0b19dbd1dc 100644
> --- a/arch/loongarch/mm/tlb.c
> +++ b/arch/loongarch/mm/tlb.c
> @@ -162,7 +162,8 @@ static void __update_hugetlb(struct vm_area_struct *vma, unsigned long address,
>  #endif
>  }
>  
> -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
> +void __update_tlb(struct vm_area_struct *vma, unsigned long address,
> +		pte_t *ptep, unsigned int nr)
>  {
>  	int idx;
>  	unsigned long flags;
> @@ -187,6 +188,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep
>  	write_csr_entryhi(address);
>  	tlb_probe();
>  	idx = read_csr_tlbidx();
> +// I have no idea what to do here
>  	write_csr_pagesize(PS_DEFAULT_SIZE);
>  	write_csr_entrylo0(pte_val(*ptep++));
>  	write_csr_entrylo1(pte_val(*ptep));
> -- 
> 2.39.1
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 13/7] loongson: Implement the new page table range API
  2023-02-26  4:34     ` Matthew Wilcox
@ 2023-02-26  6:56       ` WANG Xuerui
  0 siblings, 0 replies; 37+ messages in thread
From: WANG Xuerui @ 2023-02-26  6:56 UTC (permalink / raw)
  To: Matthew Wilcox, linux-mm, Huacai Chen, loongarch, linux-arch

Hi,

On 2/26/23 12:34, Matthew Wilcox wrote:
> On Wed, Feb 15, 2023 at 12:04:46AM +0000, Matthew Wilcox (Oracle) wrote:
>> Add set_ptes() and update_mmu_cache_range().
>>
>> THIS PATCH IS INCOMPLETE.  I DO NOT KNOW WHAT TO DO IN __update_tlb()
> Help?  This is the only remaining architecture to fix; I have all the
> others converted now.
Sorry for the late reply, it seems Huacai is busy with other things on 
hand, and I've been busy in my daily job recently.
>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>   arch/loongarch/include/asm/cacheflush.h |  2 ++
>>   arch/loongarch/include/asm/pgtable.h    | 30 ++++++++++++++++---------
>>   arch/loongarch/mm/tlb.c                 |  4 +++-
>>   3 files changed, 25 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
>> index 0681788eb474..7907eb42bfbd 100644
>> --- a/arch/loongarch/include/asm/cacheflush.h
>> +++ b/arch/loongarch/include/asm/cacheflush.h
>> @@ -47,8 +47,10 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
>>   #define flush_cache_vmap(start, end)			do { } while (0)
>>   #define flush_cache_vunmap(start, end)			do { } while (0)
>>   #define flush_icache_page(vma, page)			do { } while (0)
>> +#define flush_icache_pages(vma, page)			do { } while (0)
>>   #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
>>   #define flush_dcache_page(page)				do { } while (0)
>> +#define flush_dcache_folio(folio)			do { } while (0)
>>   #define flush_dcache_mmap_lock(mapping)			do { } while (0)
>>   #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
>>   
>> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
>> index d28fb9dbec59..0f5fa7c40c52 100644
>> --- a/arch/loongarch/include/asm/pgtable.h
>> +++ b/arch/loongarch/include/asm/pgtable.h
>> @@ -334,12 +334,20 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
>>   	}
>>   }
>>   
>> -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
>> -			      pte_t *ptep, pte_t pteval)
>> -{
>> -	set_pte(ptep, pteval);
>> +static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
>> +		pte_t *ptep, pte_t pte, unsigned int nr)
>> +{
>> +	for (;;) {
>> +		set_pte(ptep, pte);
>> +		if (--nr == 0)
>> +			break;
>> +		ptep++;
>> +		pte_val(pte) += 1 << _PFN_SHIFT;
>> +	}
>>   }
>>   
>> +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
>> +
>>   static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>>   {
>>   	/* Preserve global status for the pair */
>> @@ -442,14 +450,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>>   		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
>>   }
>>   
>> -extern void __update_tlb(struct vm_area_struct *vma,
>> -			unsigned long address, pte_t *ptep);
>> +extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
>> +		pte_t *ptep, unsigned int nr);
>>   
>> -static inline void update_mmu_cache(struct vm_area_struct *vma,
>> -			unsigned long address, pte_t *ptep)
>> +static inline void update_mmu_cache_range(struct vm_area_struct *vma,
>> +		unsigned long address, pte_t *ptep, unsigned int nr)
>>   {
>> -	__update_tlb(vma, address, ptep);
>> +	__update_tlb(vma, address, ptep, nr);
>>   }
>> +#define update_mmu_cache(vma, addr, ptep) \
>> +	update_mmu_cache_range(vma, addr, ptep, 1)
>>   
>>   #define __HAVE_ARCH_UPDATE_MMU_TLB
>>   #define update_mmu_tlb	update_mmu_cache
>> @@ -457,7 +467,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>>   static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>>   			unsigned long address, pmd_t *pmdp)
>>   {
>> -	__update_tlb(vma, address, (pte_t *)pmdp);
>> +	__update_tlb(vma, address, (pte_t *)pmdp, 1);
>>   }
>>   
>>   static inline unsigned long pmd_pfn(pmd_t pmd)
>> diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
>> index 8bad6b0cff59..ac0b19dbd1dc 100644
>> --- a/arch/loongarch/mm/tlb.c
>> +++ b/arch/loongarch/mm/tlb.c
>> @@ -162,7 +162,8 @@ static void __update_hugetlb(struct vm_area_struct *vma, unsigned long address,
>>   #endif
>>   }
>>   
>> -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
>> +void __update_tlb(struct vm_area_struct *vma, unsigned long address,
>> +		pte_t *ptep, unsigned int nr)
>>   {
>>   	int idx;
>>   	unsigned long flags;
>> @@ -187,6 +188,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep
>>   	write_csr_entryhi(address);
>>   	tlb_probe();
>>   	idx = read_csr_tlbidx();
>> +// I have no idea what to do here

AFAIK you probably can't make __update_tlb do this, at least not 
directly. The underlying LoongArch instructions __update_tlb wraps 
(TLBWR or TLBFILL [1]) only update one TLB entry, and one entry consists 
of just two consecutive pages, like MIPS. The CSRs the hardware's gonna 
read [2] also don't seem to support expressing the "page number".  IMO 
you need to wrap this helper in some loop to provide the desired semantics.

(The current implementation of LoongArch TLB very much resembles, if not 
*identical to* MIPS, if that helps. It's said future generations of 
LoongArch hardware would feature hardware-managed TLBs though, but I'm 
personally/generally wary of such claims.)

Huacai may want to supply more details (or correct me) once he gets to this.

[1]: 
https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_tlbwr
[2]: 
https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#control-and-status-registers-related-to-mapped-address-translation

>>   	write_csr_pagesize(PS_DEFAULT_SIZE);
>>   	write_csr_entrylo0(pte_val(*ptep++));
>>   	write_csr_entrylo1(pte_val(*ptep));
>> -- 
>> 2.39.1
>>
-- 
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/



^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2023-02-26  6:56 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-11  3:39 [PATCH 0/7] New arch interfaces for manipulating multiple pages Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 1/7] mm: Convert page_table_check_pte_set() to page_table_check_ptes_set() Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 2/7] mm: Add generic flush_icache_pages() and documentation Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 3/7] mm: Add folio_flush_mapping() Matthew Wilcox (Oracle)
2023-02-11  3:39 ` [PATCH 4/7] mm: Remove ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO Matthew Wilcox (Oracle)
2023-02-12 15:51   ` Mike Rapoport
2023-02-12 23:59     ` Matthew Wilcox
2023-02-11  3:39 ` [PATCH 5/7] alpha: Implement the new page table range API Matthew Wilcox (Oracle)
2023-02-13  3:15   ` Yin, Fengwei
2023-02-11  3:39 ` [PATCH 6/7] arc: " Matthew Wilcox (Oracle)
2023-02-13  3:09   ` Yin, Fengwei
2023-02-13 15:16     ` Matthew Wilcox
2023-02-14  6:32       ` Yin, Fengwei
2023-02-11  3:39 ` [PATCH 7/7] x86: " Matthew Wilcox (Oracle)
2023-02-13 21:04 ` [PATCH 8/7] arm: " Matthew Wilcox (Oracle)
2023-02-15  0:04 ` [PATCH 9/7] arm64: " Matthew Wilcox (Oracle)
2023-02-15  0:04   ` [PATCH 10/7] riscv: " Matthew Wilcox (Oracle)
2023-02-15  8:38     ` Yin, Fengwei
2023-02-15 12:27       ` Yin, Fengwei
2023-02-16  8:14       ` Alexandre Ghiti
2023-02-16 13:27         ` Yin, Fengwei
2023-02-16  8:16     ` Alexandre Ghiti
2023-02-15  0:04   ` [PATCH 11/7] csky: " Matthew Wilcox (Oracle)
2023-02-15  0:04   ` [PATCH 12/7] hexagon: " Matthew Wilcox (Oracle)
2023-02-15 16:22     ` Brian Cain
2023-02-15  0:04   ` [PATCH 13/7] loongson: " Matthew Wilcox (Oracle)
2023-02-26  4:34     ` Matthew Wilcox
2023-02-26  6:56       ` WANG Xuerui
2023-02-15 13:26   ` [PATCH 9/7] arm64: " Catalin Marinas
2023-02-15 20:09   ` [PATCH 14/17] ia64: " Matthew Wilcox (Oracle)
2023-02-15 20:09     ` [PATCH 15/17] m68k: " Matthew Wilcox (Oracle)
2023-02-16  0:59       ` Michael Schmitz
2023-02-16  4:26         ` Matthew Wilcox
2023-02-16  7:55           ` Geert Uytterhoeven
2023-02-16 22:03           ` Michael Schmitz
2023-02-15 20:09     ` [PATCH 16/17] microblaze: " Matthew Wilcox (Oracle)
2023-02-15 20:09     ` [PATCH 17/17] mips: " Matthew Wilcox (Oracle)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).