All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
@ 2021-03-05 11:38 Jan Kiszka
  2021-03-06 13:14 ` Jan Kiszka
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Kiszka @ 2021-03-05 11:38 UTC (permalink / raw)
  To: Xenomai, Philippe Gerum

From: Jan Kiszka <jan.kiszka@siemens.com>

This is still needed to avoid that a real-time parent seems minor faults
after forking for shared pages until they are finally unshared.

This missing support became visible in Xenomai when running the complete
smokey test suite on certain architectures/config combinations.

Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

For discussion. If there is a better pattern in reach, I'm happy to go 
that path as well.

Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
in sync with noarch, you will need manual merging). Possibly, it already 
applies to 5.4 as well, didn't check yet.

 mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 4cc7c72a0b7e..f102f4de2213 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
 
 unsigned long highest_memmap_pfn __read_mostly;
 
+static bool cow_user_page(struct page *dst, struct page *src,
+			  struct vm_fault *vmf);
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
@@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 static inline unsigned long
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+	     unsigned long addr, int *rss, pmd_t *src_pmd,
+	     struct page *uncow_page)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
+#ifdef CONFIG_IPIPE
+		if (uncow_page) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			struct vm_fault vmf;
+
+			vmf.vma = vma;
+			vmf.address = addr;
+			vmf.orig_pte = pte;
+			vmf.pmd = src_pmd;
+
+			if (cow_user_page(uncow_page, old_page, &vmf)) {
+				pte = mk_pte(uncow_page, vma->vm_page_prot);
+
+				if (vm_flags & VM_SHARED)
+					pte = pte_mkclean(pte);
+				pte = pte_mkold(pte);
+
+				page_add_new_anon_rmap(uncow_page, vma, addr,
+						       false);
+				rss[!!PageAnon(uncow_page)]++;
+				goto out_set_pte;
+			} else {
+				/* unexpected: source page no longer present */
+				WARN_ON_ONCE(1);
+			}
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	int progress = 0;
 	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
-
+	struct page *uncow_page = NULL;
+#ifdef CONFIG_IPIPE
+	int do_cow_break = 0;
+again:
+	if (do_cow_break) {
+		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (uncow_page == NULL)
+			return -ENOMEM;
+		do_cow_break = 0;
+	}
+#else
 again:
+#endif
 	init_rss_vec(rss);
 
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
+	if (!dst_pte) {
+		if (uncow_page)
+			put_page(uncow_page);
 		return -ENOMEM;
+	}
 	src_pte = pte_offset_map(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			progress++;
 			continue;
 		}
+#ifdef CONFIG_IPIPE
+		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
+			if (is_cow_mapping(vma->vm_flags) &&
+			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
+			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
+				arch_leave_lazy_mmu_mode();
+				spin_unlock(src_ptl);
+				pte_unmap(src_pte);
+				add_mm_rss_vec(dst_mm, rss);
+				pte_unmap_unlock(dst_pte, dst_ptl);
+				cond_resched();
+				do_cow_break = 1;
+				goto again;
+			}
+		}
+#endif
 		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
-							vma, addr, rss);
+					 vma, addr, rss, src_pmd, uncow_page);
+		uncow_page = NULL;
 		if (entry.val)
 			break;
 		progress += 8;
@@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
 	return same;
 }
 
-static inline bool cow_user_page(struct page *dst, struct page *src,
-				 struct vm_fault *vmf)
+static bool cow_user_page(struct page *dst, struct page *src,
+			  struct vm_fault *vmf)
 {
 	bool ret;
 	void *kaddr;
-- 
2.26.2


-- 
Siemens AG, T RDA IOT
Corporate Competence Center Embedded Linux


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-03-15 12:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-03-05 11:38 [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range Jan Kiszka
2021-03-06 13:14 ` Jan Kiszka
2021-03-06 13:33   ` Henning Schild
2021-03-06 14:58   ` Philippe Gerum
2021-03-07 12:46     ` Jan Kiszka
2021-03-07 13:09       ` Greg Gallagher
2021-03-08  7:02         ` Greg Gallagher
2021-03-08  7:32           ` Jan Kiszka
2021-03-15 10:00     ` Jan Kiszka
2021-03-15 12:08       ` Philippe Gerum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.