* [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
[not found] ` <1406212541-25975-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2014-07-24 14:35 ` Joerg Roedel
0 siblings, 0 replies; 8+ messages in thread
From: Joerg Roedel @ 2014-07-24 14:35 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jay.Cornwall-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Jerome Glisse,
jroedel-l3A5Bk7waGM, Jesse Barnes, David Woodhouse,
ben.sander-5C7GfCeVMHo
From: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
Add calls to the new mmu_notifier_invalidate_range()
function to all places if the VMM that need it.
Signed-off-by: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
---
include/linux/mmu_notifier.h | 28 ++++++++++++++++++++++++++++
kernel/events/uprobes.c | 2 +-
mm/fremap.c | 2 +-
mm/huge_memory.c | 9 +++++----
mm/hugetlb.c | 7 ++++++-
mm/ksm.c | 4 ++--
mm/memory.c | 3 ++-
mm/migrate.c | 3 ++-
mm/rmap.c | 2 +-
9 files changed, 48 insertions(+), 12 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index f333668..6959dc8 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -273,6 +273,32 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__young; \
})
+#define ptep_clear_flush_notify(__vma, __address, __ptep) \
+({ \
+ unsigned long ___addr = __address & PAGE_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pte_t ___pte; \
+ \
+ ___pte = ptep_clear_flush(__vma, __address, __ptep); \
+ mmu_notifier_invalidate_range(___mm, ___addr, \
+ ___addr + PAGE_SIZE); \
+ \
+ ___pte; \
+})
+
+#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pmd_t ___pmd; \
+ \
+ ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PMD_SIZE); \
+ \
+ ___pmd; \
+})
+
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -346,6 +372,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e..642262d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -186,7 +186,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 72b8fa3..9129013 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_present(pte)) {
flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
+ pte = ptep_clear_flush_notify(vma, addr, ptep);
page = vm_normal_page(vma, addr, pte);
if (page) {
if (pte_dirty(pte))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d8..b322c97 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1031,7 +1031,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
goto out_free_pages;
VM_BUG_ON_PAGE(!PageHead(page), page);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1168,7 +1168,7 @@ alloc:
pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
@@ -1499,7 +1499,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
pmd_t entry;
ret = 1;
if (!prot_numa) {
- entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmdp_get_and_clear_notify(mm, addr, pmd);
if (pmd_numa(entry))
entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
@@ -1631,6 +1631,7 @@ static int __split_huge_page_splitting(struct page *page,
* serialize against split_huge_page*.
*/
pmdp_splitting_flush(vma, address, pmd);
+
ret = 1;
spin_unlock(ptl);
}
@@ -2793,7 +2794,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
pmd_t _pmd;
int i;
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2024bbd..da37ad1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2602,8 +2602,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_pte_at(dst, addr, dst_pte, entry);
} else {
- if (cow)
+ if (cow) {
huge_ptep_set_wrprotect(src, addr, src_pte);
+ mmu_notifier_invalidate_range(src, mmun_start,
+ mmun_end);
+ }
ptepage = pte_page(entry);
get_page(ptepage);
page_dup_rmap(ptepage);
@@ -2910,6 +2913,7 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, address, ptep);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page);
@@ -3384,6 +3388,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* and that page table be reused and filled with junk.
*/
flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
mmu_notifier_invalidate_range_end(mm, start, end);
diff --git a/mm/ksm.c b/mm/ksm.c
index 346ddc9..a73df3b 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* this assure us that no O_DIRECT can happen after the check
* or in the middle of the check.
*/
- entry = ptep_clear_flush(vma, addr, ptep);
+ entry = ptep_clear_flush_notify(vma, addr, ptep);
/*
* Check that no O_DIRECT or similar I/O is in progress on the
* page
@@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
page_add_anon_rmap(kpage, vma, addr);
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/memory.c b/mm/memory.c
index d67fd9f..ceff829 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -236,6 +236,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
tlb->need_flush = 0;
tlb_flush(tlb);
+ mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
@@ -2232,7 +2233,7 @@ gotten:
* seen in the presence of one thread doing SMC and another
* thread doing COW.
*/
- ptep_clear_flush(vma, address, page_table);
+ ptep_clear_flush_notify(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
/*
* We call the notify macro here because, when using secondary
diff --git a/mm/migrate.c b/mm/migrate.c
index 9e0beaa..812c0d6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1874,7 +1874,7 @@ fail_putback:
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
- pmdp_clear_flush(vma, mmun_start, pmd);
+ pmdp_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
@@ -1882,6 +1882,7 @@ fail_putback:
if (page_count(page) != 2) {
set_pmd_at(mm, mmun_start, pmd, orig_entry);
flush_tlb_range(vma, mmun_start, mmun_end);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(new_page);
goto fail_putback;
diff --git a/mm/rmap.c b/mm/rmap.c
index b7e94eb..c2a703b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1384,7 +1384,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address)) {
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
2014-07-29 16:18 [PATCH 0/3 v2] mmu_notifier: Allow to manage CPU external TLBs Joerg Roedel
@ 2014-07-29 16:18 ` Joerg Roedel
2014-08-16 12:55 ` Oded Gabbay
0 siblings, 1 reply; 8+ messages in thread
From: Joerg Roedel @ 2014-07-29 16:18 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jerome Glisse, jroedel, Jay.Cornwall, Oded.Gabbay, John.Bridgman,
Suravee.Suthikulpanit, ben.sander, Jesse Barnes, David Woodhouse,
linux-kernel, linux-mm, iommu
From: Joerg Roedel <jroedel@suse.de>
Add calls to the new mmu_notifier_invalidate_range()
function to all places if the VMM that need it.
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
include/linux/mmu_notifier.h | 28 ++++++++++++++++++++++++++++
kernel/events/uprobes.c | 2 +-
mm/fremap.c | 2 +-
mm/huge_memory.c | 9 +++++----
mm/hugetlb.c | 7 ++++++-
mm/ksm.c | 4 ++--
mm/memory.c | 3 ++-
mm/migrate.c | 3 ++-
mm/rmap.c | 2 +-
9 files changed, 48 insertions(+), 12 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1bac99c..f760e95 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -273,6 +273,32 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__young; \
})
+#define ptep_clear_flush_notify(__vma, __address, __ptep) \
+({ \
+ unsigned long ___addr = __address & PAGE_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pte_t ___pte; \
+ \
+ ___pte = ptep_clear_flush(__vma, __address, __ptep); \
+ mmu_notifier_invalidate_range(___mm, ___addr, \
+ ___addr + PAGE_SIZE); \
+ \
+ ___pte; \
+})
+
+#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pmd_t ___pmd; \
+ \
+ ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PMD_SIZE); \
+ \
+ ___pmd; \
+})
+
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -346,6 +372,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e..642262d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -186,7 +186,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 72b8fa3..9129013 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_present(pte)) {
flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
+ pte = ptep_clear_flush_notify(vma, addr, ptep);
page = vm_normal_page(vma, addr, pte);
if (page) {
if (pte_dirty(pte))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d8..b322c97 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1031,7 +1031,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
goto out_free_pages;
VM_BUG_ON_PAGE(!PageHead(page), page);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1168,7 +1168,7 @@ alloc:
pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
@@ -1499,7 +1499,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
pmd_t entry;
ret = 1;
if (!prot_numa) {
- entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmdp_get_and_clear_notify(mm, addr, pmd);
if (pmd_numa(entry))
entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
@@ -1631,6 +1631,7 @@ static int __split_huge_page_splitting(struct page *page,
* serialize against split_huge_page*.
*/
pmdp_splitting_flush(vma, address, pmd);
+
ret = 1;
spin_unlock(ptl);
}
@@ -2793,7 +2794,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
pmd_t _pmd;
int i;
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9221c02..603851d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2602,8 +2602,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_pte_at(dst, addr, dst_pte, entry);
} else {
- if (cow)
+ if (cow) {
huge_ptep_set_wrprotect(src, addr, src_pte);
+ mmu_notifier_invalidate_range(src, mmun_start,
+ mmun_end);
+ }
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
get_page(ptepage);
@@ -2911,6 +2914,7 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, address, ptep);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page);
@@ -3385,6 +3389,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* and that page table be reused and filled with junk.
*/
flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
mmu_notifier_invalidate_range_end(mm, start, end);
diff --git a/mm/ksm.c b/mm/ksm.c
index 346ddc9..a73df3b 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* this assure us that no O_DIRECT can happen after the check
* or in the middle of the check.
*/
- entry = ptep_clear_flush(vma, addr, ptep);
+ entry = ptep_clear_flush_notify(vma, addr, ptep);
/*
* Check that no O_DIRECT or similar I/O is in progress on the
* page
@@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
page_add_anon_rmap(kpage, vma, addr);
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/memory.c b/mm/memory.c
index 7e8d820..36daa2d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -236,6 +236,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
tlb->need_flush = 0;
tlb_flush(tlb);
+ mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
@@ -2232,7 +2233,7 @@ gotten:
* seen in the presence of one thread doing SMC and another
* thread doing COW.
*/
- ptep_clear_flush(vma, address, page_table);
+ ptep_clear_flush_notify(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
/*
* We call the notify macro here because, when using secondary
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf9..d3fb8d0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1875,7 +1875,7 @@ fail_putback:
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
- pmdp_clear_flush(vma, mmun_start, pmd);
+ pmdp_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
@@ -1883,6 +1883,7 @@ fail_putback:
if (page_count(page) != 2) {
set_pmd_at(mm, mmun_start, pmd, orig_entry);
flush_tlb_range(vma, mmun_start, mmun_end);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(new_page);
goto fail_putback;
diff --git a/mm/rmap.c b/mm/rmap.c
index 22a4a76..8a0d02d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1380,7 +1380,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address)) {
--
1.9.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
2014-07-29 16:18 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
@ 2014-08-16 12:55 ` Oded Gabbay
0 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2014-08-16 12:55 UTC (permalink / raw)
To: Joerg Roedel, Andrew Morton, Andrea Arcangeli, Peter Zijlstra,
Rik van Riel, Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jerome Glisse, jroedel, Jay.Cornwall, John.Bridgman,
Suravee.Suthikulpanit, ben.sander, Jesse Barnes, David Woodhouse,
linux-kernel, linux-mm, iommu
On 29/07/14 19:18, Joerg Roedel wrote:
> From: Joerg Roedel <jroedel@suse.de>
>
> Add calls to the new mmu_notifier_invalidate_range()
> function to all places if the VMM that need it.
>
> Signed-off-by: Joerg Roedel <jroedel@suse.de>
> ---
> include/linux/mmu_notifier.h | 28 ++++++++++++++++++++++++++++
> kernel/events/uprobes.c | 2 +-
> mm/fremap.c | 2 +-
> mm/huge_memory.c | 9 +++++----
> mm/hugetlb.c | 7 ++++++-
> mm/ksm.c | 4 ++--
> mm/memory.c | 3 ++-
> mm/migrate.c | 3 ++-
> mm/rmap.c | 2 +-
> 9 files changed, 48 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> index 1bac99c..f760e95 100644
> --- a/include/linux/mmu_notifier.h
> +++ b/include/linux/mmu_notifier.h
> @@ -273,6 +273,32 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
> __young; \
> })
>
> +#define ptep_clear_flush_notify(__vma, __address, __ptep) \
> +({ \
> + unsigned long ___addr = __address & PAGE_MASK; \
> + struct mm_struct *___mm = (__vma)->vm_mm; \
> + pte_t ___pte; \
> + \
> + ___pte = ptep_clear_flush(__vma, __address, __ptep); \
> + mmu_notifier_invalidate_range(___mm, ___addr, \
> + ___addr + PAGE_SIZE); \
> + \
> + ___pte; \
> +})
> +
> +#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \
> +({ \
> + unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
> + struct mm_struct *___mm = (__vma)->vm_mm; \
> + pmd_t ___pmd; \
> + \
> + ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \
> + mmu_notifier_invalidate_range(___mm, ___haddr, \
> + ___haddr + HPAGE_PMD_SIZE); \
> + \
> + ___pmd; \
> +})
> +
> /*
> * set_pte_at_notify() sets the pte _after_ running the notifier.
> * This is safe to start by updating the secondary MMUs, because the primary MMU
> @@ -346,6 +372,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
>
> #define ptep_clear_flush_young_notify ptep_clear_flush_young
> #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
> +#define ptep_clear_flush_notify ptep_clear_flush
> +#define pmdp_clear_flush_notify pmdp_clear_flush
> #define set_pte_at_notify set_pte_at
>
> #endif /* CONFIG_MMU_NOTIFIER */
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 6f3254e..642262d 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -186,7 +186,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
> }
>
> flush_cache_page(vma, addr, pte_pfn(*ptep));
> - ptep_clear_flush(vma, addr, ptep);
> + ptep_clear_flush_notify(vma, addr, ptep);
> set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
>
> page_remove_rmap(page);
> diff --git a/mm/fremap.c b/mm/fremap.c
> index 72b8fa3..9129013 100644
> --- a/mm/fremap.c
> +++ b/mm/fremap.c
> @@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
>
> if (pte_present(pte)) {
> flush_cache_page(vma, addr, pte_pfn(pte));
> - pte = ptep_clear_flush(vma, addr, ptep);
> + pte = ptep_clear_flush_notify(vma, addr, ptep);
> page = vm_normal_page(vma, addr, pte);
> if (page) {
> if (pte_dirty(pte))
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 33514d8..b322c97 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1031,7 +1031,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
> goto out_free_pages;
> VM_BUG_ON_PAGE(!PageHead(page), page);
>
> - pmdp_clear_flush(vma, haddr, pmd);
> + pmdp_clear_flush_notify(vma, haddr, pmd);
> /* leave pmd empty until pte is filled */
>
> pgtable = pgtable_trans_huge_withdraw(mm, pmd);
> @@ -1168,7 +1168,7 @@ alloc:
> pmd_t entry;
> entry = mk_huge_pmd(new_page, vma->vm_page_prot);
> entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> - pmdp_clear_flush(vma, haddr, pmd);
> + pmdp_clear_flush_notify(vma, haddr, pmd);
> page_add_new_anon_rmap(new_page, vma, haddr);
> set_pmd_at(mm, haddr, pmd, entry);
> update_mmu_cache_pmd(vma, address, pmd);
> @@ -1499,7 +1499,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
> pmd_t entry;
> ret = 1;
> if (!prot_numa) {
> - entry = pmdp_get_and_clear(mm, addr, pmd);
> + entry = pmdp_get_and_clear_notify(mm, addr, pmd);
Where is pmdp_get_and_clear_notify() implemented ?
I didn't find any implementation in this patch nor in linux-next.
Oded
> if (pmd_numa(entry))
> entry = pmd_mknonnuma(entry);
> entry = pmd_modify(entry, newprot);
> @@ -1631,6 +1631,7 @@ static int __split_huge_page_splitting(struct page *page,
> * serialize against split_huge_page*.
> */
> pmdp_splitting_flush(vma, address, pmd);
> +
> ret = 1;
> spin_unlock(ptl);
> }
> @@ -2793,7 +2794,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
> pmd_t _pmd;
> int i;
>
> - pmdp_clear_flush(vma, haddr, pmd);
> + pmdp_clear_flush_notify(vma, haddr, pmd);
> /* leave pmd empty until pte is filled */
>
> pgtable = pgtable_trans_huge_withdraw(mm, pmd);
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 9221c02..603851d 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2602,8 +2602,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
> }
> set_huge_pte_at(dst, addr, dst_pte, entry);
> } else {
> - if (cow)
> + if (cow) {
> huge_ptep_set_wrprotect(src, addr, src_pte);
> + mmu_notifier_invalidate_range(src, mmun_start,
> + mmun_end);
> + }
> entry = huge_ptep_get(src_pte);
> ptepage = pte_page(entry);
> get_page(ptepage);
> @@ -2911,6 +2914,7 @@ retry_avoidcopy:
>
> /* Break COW */
> huge_ptep_clear_flush(vma, address, ptep);
> + mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
> set_huge_pte_at(mm, address, ptep,
> make_huge_pte(vma, new_page, 1));
> page_remove_rmap(old_page);
> @@ -3385,6 +3389,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
> * and that page table be reused and filled with junk.
> */
> flush_tlb_range(vma, start, end);
> + mmu_notifier_invalidate_range(mm, start, end);
> mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
> mmu_notifier_invalidate_range_end(mm, start, end);
>
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 346ddc9..a73df3b 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
> * this assure us that no O_DIRECT can happen after the check
> * or in the middle of the check.
> */
> - entry = ptep_clear_flush(vma, addr, ptep);
> + entry = ptep_clear_flush_notify(vma, addr, ptep);
> /*
> * Check that no O_DIRECT or similar I/O is in progress on the
> * page
> @@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
> page_add_anon_rmap(kpage, vma, addr);
>
> flush_cache_page(vma, addr, pte_pfn(*ptep));
> - ptep_clear_flush(vma, addr, ptep);
> + ptep_clear_flush_notify(vma, addr, ptep);
> set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
>
> page_remove_rmap(page);
> diff --git a/mm/memory.c b/mm/memory.c
> index 7e8d820..36daa2d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -236,6 +236,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
> {
> tlb->need_flush = 0;
> tlb_flush(tlb);
> + mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
> #ifdef CONFIG_HAVE_RCU_TABLE_FREE
> tlb_table_flush(tlb);
> #endif
> @@ -2232,7 +2233,7 @@ gotten:
> * seen in the presence of one thread doing SMC and another
> * thread doing COW.
> */
> - ptep_clear_flush(vma, address, page_table);
> + ptep_clear_flush_notify(vma, address, page_table);
> page_add_new_anon_rmap(new_page, vma, address);
> /*
> * We call the notify macro here because, when using secondary
> diff --git a/mm/migrate.c b/mm/migrate.c
> index be6dbf9..d3fb8d0 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1875,7 +1875,7 @@ fail_putback:
> */
> flush_cache_range(vma, mmun_start, mmun_end);
> page_add_anon_rmap(new_page, vma, mmun_start);
> - pmdp_clear_flush(vma, mmun_start, pmd);
> + pmdp_clear_flush_notify(vma, mmun_start, pmd);
> set_pmd_at(mm, mmun_start, pmd, entry);
> flush_tlb_range(vma, mmun_start, mmun_end);
> update_mmu_cache_pmd(vma, address, &entry);
> @@ -1883,6 +1883,7 @@ fail_putback:
> if (page_count(page) != 2) {
> set_pmd_at(mm, mmun_start, pmd, orig_entry);
> flush_tlb_range(vma, mmun_start, mmun_end);
> + mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
> update_mmu_cache_pmd(vma, address, &entry);
> page_remove_rmap(new_page);
> goto fail_putback;
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 22a4a76..8a0d02d 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1380,7 +1380,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
>
> /* Nuke the page table entry. */
> flush_cache_page(vma, address, pte_pfn(*pte));
> - pteval = ptep_clear_flush(vma, address, pte);
> + pteval = ptep_clear_flush_notify(vma, address, pte);
>
> /* If nonlinear, store the file page offset in the pte. */
> if (page->index != linear_page_index(vma, address)) {
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
[not found] ` <1410277434-3087-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2014-09-09 15:43 ` Joerg Roedel
0 siblings, 0 replies; 8+ messages in thread
From: Joerg Roedel @ 2014-09-09 15:43 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jay.Cornwall-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Jerome Glisse,
jroedel-l3A5Bk7waGM, Jesse Barnes, David Woodhouse,
ben.sander-5C7GfCeVMHo
From: Joerg Roedel <jroedel@suse.de>
Add calls to the new mmu_notifier_invalidate_range()
function to all places if the VMM that need it.
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
include/linux/mmu_notifier.h | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/events/uprobes.c | 2 +-
mm/fremap.c | 2 +-
mm/huge_memory.c | 9 +++++----
mm/hugetlb.c | 7 ++++++-
mm/ksm.c | 4 ++--
mm/memory.c | 3 ++-
mm/migrate.c | 3 ++-
mm/rmap.c | 2 +-
9 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 5d03f31..877d1c8 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -275,6 +275,44 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__young; \
})
+#define ptep_clear_flush_notify(__vma, __address, __ptep) \
+({ \
+ unsigned long ___addr = __address & PAGE_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pte_t ___pte; \
+ \
+ ___pte = ptep_clear_flush(__vma, __address, __ptep); \
+ mmu_notifier_invalidate_range(___mm, ___addr, \
+ ___addr + PAGE_SIZE); \
+ \
+ ___pte; \
+})
+
+#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pmd_t ___pmd; \
+ \
+ ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PMD_SIZE); \
+ \
+ ___pmd; \
+})
+
+#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
+ pmd_t ___pmd; \
+ \
+ ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd); \
+ mmu_notifier_invalidate_range(__mm, ___haddr, \
+ ___haddr + HPAGE_PMD_SIZE); \
+ \
+ ___pmd; \
+})
+
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -352,6 +390,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
+#define pmdp_get_and_clear_notify pmdp_get_and_clear
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1d0af8a..bc143cf 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -193,7 +193,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 72b8fa3..9129013 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_present(pte)) {
flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
+ pte = ptep_clear_flush_notify(vma, addr, ptep);
page = vm_normal_page(vma, addr, pte);
if (page) {
if (pte_dirty(pte))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d9a21d06..c343ebd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1036,7 +1036,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
goto out_free_pages;
VM_BUG_ON_PAGE(!PageHead(page), page);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1179,7 +1179,7 @@ alloc:
pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
@@ -1512,7 +1512,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
pmd_t entry;
ret = 1;
if (!prot_numa) {
- entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmdp_get_and_clear_notify(mm, addr, pmd);
if (pmd_numa(entry))
entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
@@ -1644,6 +1644,7 @@ static int __split_huge_page_splitting(struct page *page,
* serialize against split_huge_page*.
*/
pmdp_splitting_flush(vma, address, pmd);
+
ret = 1;
spin_unlock(ptl);
}
@@ -2836,7 +2837,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
pmd_t _pmd;
int i;
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eeceeeb..393f2cd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2598,8 +2598,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_pte_at(dst, addr, dst_pte, entry);
} else {
- if (cow)
+ if (cow) {
huge_ptep_set_wrprotect(src, addr, src_pte);
+ mmu_notifier_invalidate_range(src, mmun_start,
+ mmun_end);
+ }
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
get_page(ptepage);
@@ -2899,6 +2902,7 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, address, ptep);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page);
@@ -3374,6 +3378,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* and that page table be reused and filled with junk.
*/
flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
mmu_notifier_invalidate_range_end(mm, start, end);
diff --git a/mm/ksm.c b/mm/ksm.c
index fb75902..6b1c239 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* this assure us that no O_DIRECT can happen after the check
* or in the middle of the check.
*/
- entry = ptep_clear_flush(vma, addr, ptep);
+ entry = ptep_clear_flush_notify(vma, addr, ptep);
/*
* Check that no O_DIRECT or similar I/O is in progress on the
* page
@@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
page_add_anon_rmap(kpage, vma, addr);
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/memory.c b/mm/memory.c
index adeac30..a332714 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -236,6 +236,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
tlb->need_flush = 0;
tlb_flush(tlb);
+ mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
@@ -2230,7 +2231,7 @@ gotten:
* seen in the presence of one thread doing SMC and another
* thread doing COW.
*/
- ptep_clear_flush(vma, address, page_table);
+ ptep_clear_flush_notify(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
diff --git a/mm/migrate.c b/mm/migrate.c
index f78ec9b..44dbe91 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1859,7 +1859,7 @@ fail_putback:
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
- pmdp_clear_flush(vma, mmun_start, pmd);
+ pmdp_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
@@ -1867,6 +1867,7 @@ fail_putback:
if (page_count(page) != 2) {
set_pmd_at(mm, mmun_start, pmd, orig_entry);
flush_tlb_range(vma, mmun_start, mmun_end);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(new_page);
goto fail_putback;
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c..0b192fe 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1360,7 +1360,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address)) {
--
1.9.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 0/3 v4] mmu_notifier: Allow to manage CPU external TLBs
@ 2014-10-28 17:13 Joerg Roedel
[not found] ` <1414516440-910-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Joerg Roedel @ 2014-10-28 17:13 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jerome Glisse, Jay.Cornwall, Oded.Gabbay, John.Bridgman,
Suravee.Suthikulpanit, ben.sander, Jesse Barnes, David Woodhouse,
linux-kernel, linux-mm, iommu, jroedel, joro
From: Joerg Roedel <jroedel@suse.de>
Changes V3->V4:
* Rebased to v3.18-rc2
* Updated patch description and some comments
Changes V2->V3:
* Rebased to v3.17-rc4
* Fixed compile error because pmdp_get_and_clear_notify was
missing
Changes V1->V2:
* Rebase to v3.16-rc7
* Added call of ->invalidate_range to
__mmu_notifier_invalidate_end() so that the subsystem
doesn't need to register an ->invalidate_end() call-back,
subsystems will likely either register
invalidate_range_start/end or invalidate_range, so that
should be fine.
* Re-orded declarations a bit to reflect that
invalidate_range is not only called between
invalidate_range_start/end
* Updated documentation to cover the case where
invalidate_range is called outside of
invalidate_range_start/end to flush page-table pages out
of the TLB
Hi,
here is v4 of my patch-set which extends the mmu-notifiers
to allow managing CPU external TLBs. A more in-depth
description on the How and Why of this patch-set can be
found in the description of patch 1/3.
Any comments and review appreciated!
Thanks,
Joerg
Joerg Roedel (3):
mmu_notifier: Add mmu_notifier_invalidate_range()
mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
mmu_notifier: Add the call-back for mmu_notifier_invalidate_range()
include/linux/mmu_notifier.h | 88 +++++++++++++++++++++++++++++++++++++++++---
kernel/events/uprobes.c | 2 +-
mm/fremap.c | 2 +-
mm/huge_memory.c | 9 +++--
mm/hugetlb.c | 7 +++-
mm/ksm.c | 4 +-
mm/memory.c | 3 +-
mm/migrate.c | 3 +-
mm/mmu_notifier.c | 25 +++++++++++++
mm/rmap.c | 2 +-
10 files changed, 128 insertions(+), 17 deletions(-)
--
1.8.4.5
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/3] mmu_notifier: Add mmu_notifier_invalidate_range()
[not found] ` <1414516440-910-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2014-10-28 17:13 ` Joerg Roedel
2014-10-28 17:13 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
2014-10-28 17:14 ` [PATCH 3/3] mmu_notifier: Add the call-back for mmu_notifier_invalidate_range() Joerg Roedel
2 siblings, 0 replies; 8+ messages in thread
From: Joerg Roedel @ 2014-10-28 17:13 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jay.Cornwall-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo,
jroedel-l3A5Bk7waGM, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Jerome Glisse,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Jesse Barnes,
David Woodhouse, ben.sander-5C7GfCeVMHo
From: Joerg Roedel <jroedel@suse.de>
This notifier closes an important gap in the current
mmu_notifier implementation, the existing call-backs are
called too early or too late to reliably manage a non-CPU
TLB. Specifically, invalidate_range_start() is called when
all pages are still mapped and invalidate_range_end() when
all pages are unmapped and potentially freed.
This is fine when the users of the mmu_notifiers manage
their own SoftTLB, like KVM does. When the TLB is managed in
software it is easy to wipe out entries for a given range
and prevent new entries to be established until
invalidate_range_end is called.
But when the user of mmu_notifiers has to manage a hardware
TLB it can still wipe out TLB entries in
invalidate_range_start, but it can't make sure that no new
TLB entries in the given range are established between
invalidate_range_start and invalidate_range_end.
To avoid silent data corruption the entries in the non-CPU
TLB need to be flushed when the pages are unmapped (at this
point in time no _new_ TLB entries can be established in the
non-CPU TLB) but not yet freed (as the non-CPU TLB may still
have _existing_ entries pointing to the pages about to be
freed).
To fix this problem we need to catch the moment when the
Linux VMM flushes remote TLBs (as a non-CPU TLB is not very
different in its flushing requirements from any other remote
CPU TLB), as this is the point in time when the pages are
unmapped but _not_ yet freed.
The mmu_notifier_invalidate_range() function aims to catch
that moment.
IOMMU code will be one user of the notifier-callback.
Currently this is only the AMD IOMMUv2 driver, but its code
is about to be more generalized and converted to a generic
IOMMU-API extension to fit the needs of similar
functionality in other IOMMUs as well.
The current attempt in the AMD IOMMUv2 driver to work around
the invalidate_range_start/end() shortcoming is to assign an
empty page table to the non-CPU TLB between any
invalidata_range_start/end calls. With the empty page-table
assigned, every page-table walk to re-fill the non-CPU TLB
will cause a page-fault reported to the IOMMU driver via an
interrupt, possibly causing interrupt storms.
The page-fault handler in the AMD IOMMUv2 driver doesn't
handle the fault if an invalidate_range_start/end pair is
active, it just reports back SUCESS to the device and let it
refault the page. But existing hardware (newer Radeon GPUs)
that makes use of this feature don't re-fault indefinitly,
after a certain number of faults for the same address the
device enters a failure state and needs to be resetted.
To avoid the GPUs entering a failure state we need to get
rid of the empty-page-table workaround and use the
mmu_notifier_invalidate_range() function introduced with
this patch.
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
include/linux/mmu_notifier.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 88787bb..1790790 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -242,6 +242,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
__mmu_notifier_invalidate_range_end(mm, start, end);
}
+static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
mm->mmu_notifier_mm = NULL;
@@ -342,6 +347,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
{
}
+static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
}
--
1.8.4.5
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
[not found] ` <1414516440-910-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2014-10-28 17:13 ` [PATCH 1/3] mmu_notifier: Add mmu_notifier_invalidate_range() Joerg Roedel
@ 2014-10-28 17:13 ` Joerg Roedel
2014-10-28 17:14 ` [PATCH 3/3] mmu_notifier: Add the call-back for mmu_notifier_invalidate_range() Joerg Roedel
2 siblings, 0 replies; 8+ messages in thread
From: Joerg Roedel @ 2014-10-28 17:13 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jay.Cornwall-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo,
jroedel-l3A5Bk7waGM, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Jerome Glisse,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Jesse Barnes,
David Woodhouse, ben.sander-5C7GfCeVMHo
From: Joerg Roedel <jroedel@suse.de>
Add calls to the new mmu_notifier_invalidate_range()
function to all places in the VMM that need it.
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
include/linux/mmu_notifier.h | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/events/uprobes.c | 2 +-
mm/fremap.c | 2 +-
mm/huge_memory.c | 9 +++++----
mm/hugetlb.c | 7 ++++++-
mm/ksm.c | 4 ++--
mm/memory.c | 3 ++-
mm/migrate.c | 3 ++-
mm/rmap.c | 2 +-
9 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1790790..966da2b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -284,6 +284,44 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__young; \
})
+#define ptep_clear_flush_notify(__vma, __address, __ptep) \
+({ \
+ unsigned long ___addr = __address & PAGE_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pte_t ___pte; \
+ \
+ ___pte = ptep_clear_flush(__vma, __address, __ptep); \
+ mmu_notifier_invalidate_range(___mm, ___addr, \
+ ___addr + PAGE_SIZE); \
+ \
+ ___pte; \
+})
+
+#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pmd_t ___pmd; \
+ \
+ ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PMD_SIZE); \
+ \
+ ___pmd; \
+})
+
+#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
+ pmd_t ___pmd; \
+ \
+ ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd); \
+ mmu_notifier_invalidate_range(__mm, ___haddr, \
+ ___haddr + HPAGE_PMD_SIZE); \
+ \
+ ___pmd; \
+})
+
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -362,6 +400,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
+#define pmdp_get_and_clear_notify pmdp_get_and_clear
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1d0af8a..bc143cf 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -193,7 +193,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 72b8fa3..9129013 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_present(pte)) {
flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
+ pte = ptep_clear_flush_notify(vma, addr, ptep);
page = vm_normal_page(vma, addr, pte);
if (page) {
if (pte_dirty(pte))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74c78aa..ef320af 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1036,7 +1036,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
goto out_free_pages;
VM_BUG_ON_PAGE(!PageHead(page), page);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1179,7 +1179,7 @@ alloc:
pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
@@ -1512,7 +1512,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
pmd_t entry;
ret = 1;
if (!prot_numa) {
- entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmdp_get_and_clear_notify(mm, addr, pmd);
if (pmd_numa(entry))
entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
@@ -1644,6 +1644,7 @@ static int __split_huge_page_splitting(struct page *page,
* serialize against split_huge_page*.
*/
pmdp_splitting_flush(vma, address, pmd);
+
ret = 1;
spin_unlock(ptl);
}
@@ -2833,7 +2834,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
pmd_t _pmd;
int i;
- pmdp_clear_flush(vma, haddr, pmd);
+ pmdp_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9fd7227..2e6add0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2598,8 +2598,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_pte_at(dst, addr, dst_pte, entry);
} else {
- if (cow)
+ if (cow) {
huge_ptep_set_wrprotect(src, addr, src_pte);
+ mmu_notifier_invalidate_range(src, mmun_start,
+ mmun_end);
+ }
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
get_page(ptepage);
@@ -2899,6 +2902,7 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, address, ptep);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page);
@@ -3374,6 +3378,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* and that page table be reused and filled with junk.
*/
flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
mmu_notifier_invalidate_range_end(mm, start, end);
diff --git a/mm/ksm.c b/mm/ksm.c
index 6b2e337..d247efa 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* this assure us that no O_DIRECT can happen after the check
* or in the middle of the check.
*/
- entry = ptep_clear_flush(vma, addr, ptep);
+ entry = ptep_clear_flush_notify(vma, addr, ptep);
/*
* Check that no O_DIRECT or similar I/O is in progress on the
* page
@@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
page_add_anon_rmap(kpage, vma, addr);
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
diff --git a/mm/memory.c b/mm/memory.c
index 1cc6bfb..c287d4c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -238,6 +238,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
tlb->need_flush = 0;
tlb_flush(tlb);
+ mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
@@ -2233,7 +2234,7 @@ gotten:
* seen in the presence of one thread doing SMC and another
* thread doing COW.
*/
- ptep_clear_flush(vma, address, page_table);
+ ptep_clear_flush_notify(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
diff --git a/mm/migrate.c b/mm/migrate.c
index 0143995..41945cb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1854,7 +1854,7 @@ fail_putback:
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
- pmdp_clear_flush(vma, mmun_start, pmd);
+ pmdp_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
@@ -1862,6 +1862,7 @@ fail_putback:
if (page_count(page) != 2) {
set_pmd_at(mm, mmun_start, pmd, orig_entry);
flush_tlb_range(vma, mmun_start, mmun_end);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(new_page);
goto fail_putback;
diff --git a/mm/rmap.c b/mm/rmap.c
index 116a505..fdb8055 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1364,7 +1364,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address)) {
--
1.8.4.5
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/3] mmu_notifier: Add the call-back for mmu_notifier_invalidate_range()
[not found] ` <1414516440-910-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2014-10-28 17:13 ` [PATCH 1/3] mmu_notifier: Add mmu_notifier_invalidate_range() Joerg Roedel
2014-10-28 17:13 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
@ 2014-10-28 17:14 ` Joerg Roedel
2 siblings, 0 replies; 8+ messages in thread
From: Joerg Roedel @ 2014-10-28 17:14 UTC (permalink / raw)
To: Andrew Morton, Andrea Arcangeli, Peter Zijlstra, Rik van Riel,
Hugh Dickins, Mel Gorman, Johannes Weiner
Cc: Jay.Cornwall-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo,
jroedel-l3A5Bk7waGM, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Jerome Glisse,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Jesse Barnes,
David Woodhouse, ben.sander-5C7GfCeVMHo
From: Joerg Roedel <jroedel@suse.de>
Now that the mmu_notifier_invalidate_range() calls are in
place, add the call-back to allow subsystems to register
against it.
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
include/linux/mmu_notifier.h | 37 ++++++++++++++++++++++++++++++++-----
mm/mmu_notifier.c | 25 +++++++++++++++++++++++++
2 files changed, 57 insertions(+), 5 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 966da2b..94d19f6 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -98,11 +98,11 @@ struct mmu_notifier_ops {
/*
* invalidate_range_start() and invalidate_range_end() must be
* paired and are called only when the mmap_sem and/or the
- * locks protecting the reverse maps are held. The subsystem
- * must guarantee that no additional references are taken to
- * the pages in the range established between the call to
- * invalidate_range_start() and the matching call to
- * invalidate_range_end().
+ * locks protecting the reverse maps are held. If the subsystem
+ * can't guarantee that no additional references are taken to
+ * the pages in the range, it has to implement the
+ * invalidate_range() notifier to remove any references taken
+ * after invalidate_range_start().
*
* Invalidation of multiple concurrent ranges may be
* optionally permitted by the driver. Either way the
@@ -144,6 +144,29 @@ struct mmu_notifier_ops {
void (*invalidate_range_end)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end);
+
+ /*
+ * invalidate_range() is either called between
+ * invalidate_range_start() and invalidate_range_end() when the
+ * VM has to free pages that where unmapped, but before the
+ * pages are actually freed, or outside of _start()/_end() when
+ * a (remote) TLB is necessary.
+ *
+ * If invalidate_range() is used to manage a non-CPU TLB with
+ * shared page-tables, it not necessary to implement the
+ * invalidate_range_start()/end() notifiers, as
+ * invalidate_range() alread catches the points in time when an
+ * external TLB range needs to be flushed.
+ *
+ * The invalidate_range() function is called under the ptl
+ * spin-lock and not allowed to sleep.
+ *
+ * Note that this function might be called with just a sub-range
+ * of what was passed to invalidate_range_start()/end(), if
+ * called between those functions.
+ */
+ void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
+ unsigned long start, unsigned long end);
};
/*
@@ -190,6 +213,8 @@ extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end);
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end);
+extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
static inline void mmu_notifier_release(struct mm_struct *mm)
{
@@ -245,6 +270,8 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
+ if (mm_has_notifiers(mm))
+ __mmu_notifier_invalidate_range(mm, start, end);
}
static inline void mmu_notifier_mm_init(struct mm_struct *mm)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 2c8da98..3b9b3d0 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -193,6 +193,16 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
+ /*
+ * Call invalidate_range here too to avoid the need for the
+ * subsystem of having to register an invalidate_range_end
+ * call-back when there is invalidate_range already. Usually a
+ * subsystem registers either invalidate_range_start()/end() or
+ * invalidate_range(), so this will be no additional overhead
+ * (besides the pointer check).
+ */
+ if (mn->ops->invalidate_range)
+ mn->ops->invalidate_range(mn, mm, start, end);
if (mn->ops->invalidate_range_end)
mn->ops->invalidate_range_end(mn, mm, start, end);
}
@@ -200,6 +210,21 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
}
EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end);
+void __mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct mmu_notifier *mn;
+ int id;
+
+ id = srcu_read_lock(&srcu);
+ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops->invalidate_range)
+ mn->ops->invalidate_range(mn, mm, start, end);
+ }
+ srcu_read_unlock(&srcu, id);
+}
+EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range);
+
static int do_mmu_notifier_register(struct mmu_notifier *mn,
struct mm_struct *mm,
int take_mmap_sem)
--
1.8.4.5
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2014-10-28 17:14 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-28 17:13 [PATCH 0/3 v4] mmu_notifier: Allow to manage CPU external TLBs Joerg Roedel
[not found] ` <1414516440-910-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2014-10-28 17:13 ` [PATCH 1/3] mmu_notifier: Add mmu_notifier_invalidate_range() Joerg Roedel
2014-10-28 17:13 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
2014-10-28 17:14 ` [PATCH 3/3] mmu_notifier: Add the call-back for mmu_notifier_invalidate_range() Joerg Roedel
-- strict thread matches above, loose matches on Subject: below --
2014-09-09 15:43 [PATCH 0/3 v3] mmu_notifier: Allow to manage CPU external TLBs Joerg Roedel
[not found] ` <1410277434-3087-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2014-09-09 15:43 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
2014-07-29 16:18 [PATCH 0/3 v2] mmu_notifier: Allow to manage CPU external TLBs Joerg Roedel
2014-07-29 16:18 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
2014-08-16 12:55 ` Oded Gabbay
2014-07-24 14:35 [PATCH 0/3] mmu_notifier: Allow to manage CPU external TLBs Joerg Roedel
[not found] ` <1406212541-25975-1-git-send-email-joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2014-07-24 14:35 ` [PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM Joerg Roedel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).