LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 09/11] hugetlb: Introduce generic version of huge_ptep_set_wrprotect
From: Alexandre Ghiti @ 2018-08-06 17:57 UTC (permalink / raw)
  To: linux-mm, mike.kravetz, linux, catalin.marinas, will.deacon,
	tony.luck, fenghua.yu, ralf, paul.burton, jhogan, jejb, deller,
	benh, paulus, mpe, ysato, dalias, davem, tglx, mingo, hpa, x86,
	arnd, linux-arm-kernel, linux-kernel, linux-ia64, linux-mips,
	linux-parisc, linuxppc-dev, linux-sh, sparclinux, linux-arch
  Cc: Alexandre Ghiti
In-Reply-To: <20180806175711.24438-1-alex@ghiti.fr>

arm, ia64, mips, powerpc, sh, x86 architectures use the same version
of huge_ptep_set_wrprotect, so move this generic implementation into
asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 arch/arm/include/asm/hugetlb-3level.h        | 6 ------
 arch/arm64/include/asm/hugetlb.h             | 1 +
 arch/ia64/include/asm/hugetlb.h              | 6 ------
 arch/mips/include/asm/hugetlb.h              | 6 ------
 arch/parisc/include/asm/hugetlb.h            | 1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h | 6 ------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 1 +
 arch/powerpc/include/asm/nohash/32/pgtable.h | 6 ------
 arch/powerpc/include/asm/nohash/64/pgtable.h | 1 +
 arch/sh/include/asm/hugetlb.h                | 6 ------
 arch/sparc/include/asm/hugetlb.h             | 1 +
 arch/x86/include/asm/hugetlb.h               | 6 ------
 include/asm-generic/hugetlb.h                | 8 ++++++++
 13 files changed, 13 insertions(+), 42 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
index b897541520ef..8247cd6a2ac6 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -37,12 +37,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return retval;
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 3e7f6e69b28d..f4f69ae5466e 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -48,6 +48,7 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 				     unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep);
 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index cbe296271030..49d1f7949f3a 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -27,12 +27,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 6ff2531cfb1d..3dcf5debf8c4 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -63,12 +63,6 @@ static inline int huge_pte_none(pte_t pte)
 	return !val || (val == (unsigned long)invalid_pte_table);
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     pte_t *ptep, pte_t pte,
diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h
index fb7e0fd858a3..9c3950ca2974 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -39,6 +39,7 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep);
 
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 02f5acd7ccc4..fc1511ce33a4 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -228,12 +228,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 {
 	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
 }
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 42aafba7a308..7d957f7c47cd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -451,6 +451,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 7c46a98cc7f4..6cabbd04a6fa 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -249,12 +249,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 {
 	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
 }
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index dd0c7236208f..69fbf7e9b4db 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -238,6 +238,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index f1bbd255ee43..8df4004977b9 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -32,12 +32,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 2101ea217f33..c41754a113f3 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -32,6 +32,7 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 59c056adb3c9..a3f781f7a264 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -13,12 +13,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 6c0c8b0c71e0..9b9039845278 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -102,4 +102,12 @@ static inline int prepare_hugepage_range(struct file *file,
 }
 #endif
 
+#ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+#endif
+
 #endif /* _ASM_GENERIC_HUGETLB_H */
-- 
2.16.2

^ permalink raw reply related

* [PATCH v6 10/11] hugetlb: Introduce generic version of huge_ptep_set_access_flags
From: Alexandre Ghiti @ 2018-08-06 17:57 UTC (permalink / raw)
  To: linux-mm, mike.kravetz, linux, catalin.marinas, will.deacon,
	tony.luck, fenghua.yu, ralf, paul.burton, jhogan, jejb, deller,
	benh, paulus, mpe, ysato, dalias, davem, tglx, mingo, hpa, x86,
	arnd, linux-arm-kernel, linux-kernel, linux-ia64, linux-mips,
	linux-parisc, linuxppc-dev, linux-sh, sparclinux, linux-arch
  Cc: Alexandre Ghiti
In-Reply-To: <20180806175711.24438-1-alex@ghiti.fr>

arm, ia64, sh, x86 architectures use the same version
of huge_ptep_set_access_flags, so move this generic implementation
into asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 arch/arm/include/asm/hugetlb-3level.h | 7 -------
 arch/arm64/include/asm/hugetlb.h      | 1 +
 arch/ia64/include/asm/hugetlb.h       | 7 -------
 arch/mips/include/asm/hugetlb.h       | 1 +
 arch/parisc/include/asm/hugetlb.h     | 1 +
 arch/powerpc/include/asm/hugetlb.h    | 1 +
 arch/sh/include/asm/hugetlb.h         | 7 -------
 arch/sparc/include/asm/hugetlb.h      | 1 +
 arch/x86/include/asm/hugetlb.h        | 7 -------
 include/asm-generic/hugetlb.h         | 9 +++++++++
 10 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
index 8247cd6a2ac6..54e4b097b1f5 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -37,11 +37,4 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return retval;
 }
 
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
 #endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index f4f69ae5466e..80887abcef7f 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -42,6 +42,7 @@ extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte);
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 				      unsigned long addr, pte_t *ptep,
 				      pte_t pte, int dirty);
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index 49d1f7949f3a..e9b42750fdf5 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -27,13 +27,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 	return *ptep;
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 3dcf5debf8c4..120adc3b2ffd 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -63,6 +63,7 @@ static inline int huge_pte_none(pte_t pte)
 	return !val || (val == (unsigned long)invalid_pte_table);
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     pte_t *ptep, pte_t pte,
diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h
index 9c3950ca2974..165b4e5a6f32 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -43,6 +43,7 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep);
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty);
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 69c14ecac133..658bf7136a3c 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -137,6 +137,7 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 	flush_hugetlb_page(vma, addr);
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 				      unsigned long addr, pte_t *ptep,
 				      pte_t pte, int dirty);
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 8df4004977b9..c87195ae0cfa 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -32,13 +32,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 	return *ptep;
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index c41754a113f3..028a1465fbe7 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -40,6 +40,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index a3f781f7a264..574d42eb081e 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -13,13 +13,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 }
 
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 	return *ptep;
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 9b9039845278..f3c99a03ee83 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -110,4 +110,13 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 }
 #endif
 
+#ifndef __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep,
+		pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+#endif
+
 #endif /* _ASM_GENERIC_HUGETLB_H */
-- 
2.16.2

^ permalink raw reply related

* [PATCH v6 11/11] hugetlb: Introduce generic version of huge_ptep_get
From: Alexandre Ghiti @ 2018-08-06 17:57 UTC (permalink / raw)
  To: linux-mm, mike.kravetz, linux, catalin.marinas, will.deacon,
	tony.luck, fenghua.yu, ralf, paul.burton, jhogan, jejb, deller,
	benh, paulus, mpe, ysato, dalias, davem, tglx, mingo, hpa, x86,
	arnd, linux-arm-kernel, linux-kernel, linux-ia64, linux-mips,
	linux-parisc, linuxppc-dev, linux-sh, sparclinux, linux-arch
  Cc: Alexandre Ghiti
In-Reply-To: <20180806175711.24438-1-alex@ghiti.fr>

ia64, mips, parisc, powerpc, sh, sparc, x86 architectures use the
same version of huge_ptep_get, so move this generic implementation into
asm-generic/hugetlb.h.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 arch/arm/include/asm/hugetlb-3level.h | 1 +
 arch/arm64/include/asm/hugetlb.h      | 1 +
 arch/ia64/include/asm/hugetlb.h       | 5 -----
 arch/mips/include/asm/hugetlb.h       | 5 -----
 arch/parisc/include/asm/hugetlb.h     | 5 -----
 arch/powerpc/include/asm/hugetlb.h    | 5 -----
 arch/sh/include/asm/hugetlb.h         | 5 -----
 arch/sparc/include/asm/hugetlb.h      | 5 -----
 arch/x86/include/asm/hugetlb.h        | 5 -----
 include/asm-generic/hugetlb.h         | 7 +++++++
 10 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
index 54e4b097b1f5..0d9f3918fa7e 100644
--- a/arch/arm/include/asm/hugetlb-3level.h
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -29,6 +29,7 @@
  * ptes.
  * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
  */
+#define __HAVE_ARCH_HUGE_PTEP_GET
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 	pte_t retval = *ptep;
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 80887abcef7f..fb6609875455 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -20,6 +20,7 @@
 
 #include <asm/page.h>
 
+#define __HAVE_ARCH_HUGE_PTEP_GET
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 	return READ_ONCE(*ptep);
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index e9b42750fdf5..36cc0396b214 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -27,11 +27,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 120adc3b2ffd..425bb6fc3bda 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -82,11 +82,6 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	return changed;
 }
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h
index 165b4e5a6f32..7cb595dcb7d7 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -48,11 +48,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty);
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 658bf7136a3c..33a2d9e3ea9e 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -142,11 +142,6 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 				      unsigned long addr, pte_t *ptep,
 				      pte_t pte, int dirty);
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index c87195ae0cfa..6f025fe18146 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -32,11 +32,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 }
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 	clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 028a1465fbe7..3963f80d1cb3 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -53,11 +53,6 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	return changed;
 }
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 574d42eb081e..7469d321f072 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -13,11 +13,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 }
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index f3c99a03ee83..71d7b77eea50 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -119,4 +119,11 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 }
 #endif
 
+#ifndef __HAVE_ARCH_HUGE_PTEP_GET
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+#endif
+
 #endif /* _ASM_GENERIC_HUGETLB_H */
-- 
2.16.2

^ permalink raw reply related

* Re: [PATCH v2] selftests/powerpc: Avoid remaining process/threads
From: Breno Leitao @ 2018-08-06 18:24 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: Gustavo Romero
In-Reply-To: <87bmafoiu5.fsf@concordia.ellerman.id.au>

Hello Michael,

On 08/06/2018 08:06 AM, Michael Ellerman wrote:
> Breno Leitao <leitao@debian.org> writes:
> 
>> diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
>> index 66d31de60b9a..06c51e8d8ccb 100644
>> --- a/tools/testing/selftests/powerpc/harness.c
>> +++ b/tools/testing/selftests/powerpc/harness.c
>> @@ -85,13 +85,16 @@ int run_test(int (test_function)(void), char *name)
>>  	return status;
>>  }
>>  
>> -static void alarm_handler(int signum)
>> +static void sig_handler(int signum)
>>  {
>> -	/* Jut wake us up from waitpid */
>> +	if (signum == SIGINT)
>> +		kill(-pid, SIGTERM);
> 
> I don't think we need to do that here, if we just return then we'll pop
> out of the waitpid() and go via the normal path.

Correct, if we press ^C while the parent process is waiting at waitpid(),
then waitpid() syscall will be interrupted (EINTR) and never restarted again
(unless we set sa_flags = SA_RESTART), thus, the code will restart to execute
the next instruction when the signal handler is done, as we had skipped
waitpid().

>From a theoretical point of view, the user can press ^C before the process
executes waitpid() syscall. In this case and the process will not 'skip' the
waitpid(), which will continue to wait. We can clearly force this behavior
putting a sleep(1) before waitpid() and pressing  ^C in the very first
second, it will 'skip' the nanosleep() syscall instead of waitpid() which
will be there, and the ^C will be ignored (thus not calling kill(-pid, SIGTERM)).

>From a practical point of view, I will prepare a v3 patch. :-)

^ permalink raw reply

* Re: [PATCH v2 1/2] powerpc/pseries: Avoid blocking rtas polling handling multiple PRRN events
From: John Allen @ 2018-08-06 19:09 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, nfont
In-Reply-To: <87lg9qfddo.fsf@concordia.ellerman.id.au>

On Wed, Aug 01, 2018 at 11:02:59PM +1000, Michael Ellerman wrote:
>Hi John,
>
>I'm still not sure about this one.
>
>John Allen <jallen@linux.ibm.com> writes:
>> On Mon, Jul 23, 2018 at 11:27:56PM +1000, Michael Ellerman wrote:
>>>Hi John,
>>>
>>>I'm a bit puzzled by this one.
>>>
>>>John Allen <jallen@linux.ibm.com> writes:
>>>> When a PRRN event is being handled and another PRRN event comes in, the
>>>> second event will block rtas polling waiting on the first to complete,
>>>> preventing any further rtas events from being handled. This can be
>>>> especially problematic in case that PRRN events are continuously being
>>>> queued in which case rtas polling gets indefinitely blocked completely.
>>>>
>>>> This patch introduces a mutex that prevents any subsequent PRRN events from
>>>> running while there is a prrn event being handled, allowing rtas polling to
>>>> continue normally.
>>>>
>>>> Signed-off-by: John Allen <jallen@linux.ibm.com>
>>>> ---
>>>> v2:
>>>>   -Unlock prrn_lock when PRRN operations are complete, not after handler is
>>>>    scheduled.
>>>>   -Remove call to flush_work, the previous broken method of serializing
>>>>    PRRN events.
>>>> ---
>>>>  arch/powerpc/kernel/rtasd.c | 10 +++++++---
>>>>  1 file changed, 7 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
>>>> index 44d66c33d59d..845fc5aec178 100644
>>>> --- a/arch/powerpc/kernel/rtasd.c
>>>> +++ b/arch/powerpc/kernel/rtasd.c
>>>> @@ -284,15 +286,17 @@ static void prrn_work_fn(struct work_struct *work)
>>>>  	 */
>>>>  	pseries_devicetree_update(-prrn_update_scope);
>>>>  	numa_update_cpu_topology(false);
>>>> +	mutex_unlock(&prrn_lock);
>>>>  }
>>>>
>>>>  static DECLARE_WORK(prrn_work, prrn_work_fn);
>>>>
>>>>  static void prrn_schedule_update(u32 scope)
>>>>  {
>>>> -	flush_work(&prrn_work);
>>>
>>>This seems like it's actually the core of the change. Previously we were
>>>basically blocking on the flush before continuing.
>>
>> The idea here is to replace the blocking flush_work with a non-blocking
>> mutex. So rather than waiting on the running PRRN event to complete, we
>> bail out since a PRRN event is already running.
>
>OK, but why is it OK to bail out?
>
>The firmware sent you an error log asking you to do something, with a
>scope value that has some meaning, and now you're just going to drop
>that on the floor?
>
>Maybe it is OK to just drop these events? Or maybe you're saying that
>because the system is crashing under the load of too many events it's OK
>to drop the events in this case.

I think I see your point. If a PRRN event comes in while another is 
currently running, the new one may contain a different list of LMBs/CPUs 
and the old list becomes outdated. With the mutex, the only event that 
gets handled is the oldest and we will lose any additional changes 
beyond the initial event. Therefore, as you mentioned in your previous 
message, the behavior of the global workqueue should work just fine once 
we remove the call to flush_work.  While a prrn event is running, only 
one will remain on the workqueue, then when the first one completes, the 
newly scheduled work function should grab the latest PRRN list.

I will send a new version of the patch with just the call to flush_work 
removed.

-John

>
>> The situation this is
>> meant to address is flooding the workqueue with PRRN events, which like
>> the situation in patch 2/2, these can be queued up faster than they can
>> actually be handled.
>
>I'm not really sure why this is a problem though.
>
>The current code synchronously processes the events, so there should
>only ever be one in flight.
>
>I guess the issue is that each one can queue multiple events on the
>hotplug work queue?
>
>But still, we have terabytes of RAM, we should be able to queue a lot
>of events before it becomes a problem.
>
>So what exactly is getting flooded, what's the symptom?
>
>If the queuing of the hotplug events is the problem, then why don't we
>stop doing that? We could just process them synchronously from the PRRN
>update, that would naturally throttle them.
>
>cheers
>

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-06 19:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Michael S. Tsirkin, Will Deacon, Anshuman Khandual,
	virtualization, linux-kernel, linuxppc-dev, aik, robh, joe,
	elfring, david, jasowang, mpe, linuxram, haren, paulus, srikar,
	robin.murphy, jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180806094243.GA16032@infradead.org>

On Mon, 2018-08-06 at 02:42 -0700, Christoph Hellwig wrote:
> On Mon, Aug 06, 2018 at 07:16:47AM +1000, Benjamin Herrenschmidt wrote:
> > Who would set this bit ? qemu ? Under what circumstances ?
> 
> I don't really care who sets what.  The implementation might not even
> involved qemu.
> 
> It is your job to write a coherent interface specification that does
> not depend on the used components.  The hypervisor might be PAPR,
> Linux + qemu, VMware, Hyperv or something so secret that you'd have
> to shoot me if you had to tell me.  The guest might be Linux, FreeBSD,
> AIX, OS400 or a Hipster project of the day in Rust.  As long as we
> properly specify the interface it simplify does not matter.

That's the point Christoph. The interface is today's interface. It does
NOT change. That information is not part of the interface.

It's the VM itself that is stashing away its memory in a secret place,
and thus needs to do bounce buffering. There is no change to the virtio
interface per-se.

> > What would be the effect of this bit while VIRTIO_F_IOMMU is NOT set,
> > ie, what would qemu do and what would Linux do ? I'm not sure I fully
> > understand your idea.
> 
> In a perfect would we'd just reuse VIRTIO_F_IOMMU and clarify the
> description which currently is rather vague but basically captures
> the use case.  Currently is is:
> 
> VIRTIO_F_IOMMU_PLATFORM(33)
>     This feature indicates that the device is behind an IOMMU that
>     translates bus addresses from the device into physical addresses in
>     memory. If this feature bit is set to 0, then the device emits
>     physical addresses which are not translated further, even though an
>     IOMMU may be present.
> 
> And I'd change it to something like:
> 
> VIRTIO_F_PLATFORM_DMA(33)
>     This feature indicates that the device emits platform specific
>     bus addresses that might not be identical to physical address.
>     The translation of physical to bus address is platform speific
>     and defined by the plaform specification for the bus that the virtio
>     device is attached to.
>     If this feature bit is set to 0, then the device emits
>     physical addresses which are not translated further, even if
>     the platform would normally require translations for the bus that
>     the virtio device is attached to.
> 
> If we can't change the defintion any more we should deprecate the
> old VIRTIO_F_IOMMU_PLATFORM bit, and require the VIRTIO_F_IOMMU_PLATFORM
> and VIRTIO_F_PLATFORM_DMA to be not set at the same time.

But this doesn't really change our problem does it ?

None of what happens in our case is part of the "interface". The
suggestion to force the iommu ON was simply that it was a "workaround"
as by doing so, we get to override the DMA ops, but that's just a
trick.

Fundamentally, what we need to solve is pretty much entirely a guest
problem.

> > I'm trying to understand because the limitation is not a device side
> > limitation, it's not a qemu limitation, it's actually more of a VM
> > limitation. It has most of its memory pages made inaccessible for
> > security reasons. The platform from a qemu/KVM perspective is almost
> > entirely normal.
> 
> Well, find a way to describe this either in the qemu specification using
> new feature bits, or by using something like the above.

But again, why do you want to involve the interface, and thus the
hypervisor for something that is essentially what the guest is doign to
itself ?

It really is something we need to solve locally to the guest, it's not
part of the interface.

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-06 19:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180806164106-mutt-send-email-mst@kernel.org>

On Mon, 2018-08-06 at 16:46 +0300, Michael S. Tsirkin wrote:
> 
> > Right, we'll need some quirk to disable balloons  in the guest I
> > suppose.
> > 
> > Passing something from libvirt is cumbersome because the end user may
> > not even need to know about secure VMs. There are use cases where the
> > security is a contract down to some special application running inside
> > the secure VM, the sysadmin knows nothing about.
> > 
> > Also there's repercussions all the way to admin tools, web UIs etc...
> > so it's fairly wide ranging.
> > 
> > So as long as we only need to quirk a couple of devices, it's much
> > better contained that way.
> 
> So just the balloon thing already means that yes management and all the
> way to the user tools must know this is going on. Otherwise
> user will try to inflate the balloon and wonder why this does not work.

There is *dozens* of management systems out there, not even all open
source, we won't ever be able to see the end of the tunnel if we need
to teach every single of them, including end users, about platform
specific new VM flags like that.

.../...

> Here's another example: you can't migrate a secure vm to hypervisor
> which doesn't support this feature. Again management tools above libvirt
> need to know otherwise they will try.

There will have to be a new machine type for that I suppose, yes,
though it's not just the hypervisor that needs to know about the
modified migration stream, it's also the need to have a compatible
ultravisor with the right keys on the other side.

So migration is going to be special and require extra admin work in all
cases yes. But not all secure VMs are meant to be migratable.

In any case, back to the problem at hand. What a qemu flag gives us is
just a way to force iommu at VM creation time.

This is rather sub-optimal, we don't really want the iommu in the way,
so it's at best a "workaround", and it's not really solving the real
problem.

As I said replying to Christoph, we are "leaking" into the interface
something here that is really what's the VM is doing to itself, which
is to stash its memory away in an inaccessible place.

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Michael S. Tsirkin @ 2018-08-06 20:35 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <ef6d5d7c7b812bd797a1c3fd6bc7a26d0074020f.camel@kernel.crashing.org>

On Tue, Aug 07, 2018 at 05:56:59AM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2018-08-06 at 16:46 +0300, Michael S. Tsirkin wrote:
> > 
> > > Right, we'll need some quirk to disable balloons  in the guest I
> > > suppose.
> > > 
> > > Passing something from libvirt is cumbersome because the end user may
> > > not even need to know about secure VMs. There are use cases where the
> > > security is a contract down to some special application running inside
> > > the secure VM, the sysadmin knows nothing about.
> > > 
> > > Also there's repercussions all the way to admin tools, web UIs etc...
> > > so it's fairly wide ranging.
> > > 
> > > So as long as we only need to quirk a couple of devices, it's much
> > > better contained that way.
> > 
> > So just the balloon thing already means that yes management and all the
> > way to the user tools must know this is going on. Otherwise
> > user will try to inflate the balloon and wonder why this does not work.
> 
> There is *dozens* of management systems out there, not even all open
> source, we won't ever be able to see the end of the tunnel if we need
> to teach every single of them, including end users, about platform
> specific new VM flags like that.
> 
> .../...

In the end I suspect you will find you have to.

> > Here's another example: you can't migrate a secure vm to hypervisor
> > which doesn't support this feature. Again management tools above libvirt
> > need to know otherwise they will try.
> 
> There will have to be a new machine type for that I suppose, yes,
> though it's not just the hypervisor that needs to know about the
> modified migration stream, it's also the need to have a compatible
> ultravisor with the right keys on the other side.
> 
> So migration is going to be special and require extra admin work in all
> cases yes. But not all secure VMs are meant to be migratable.
> 
> In any case, back to the problem at hand. What a qemu flag gives us is
> just a way to force iommu at VM creation time.

I don't think a qemu flag is strictly required for a problem at hand.

> This is rather sub-optimal, we don't really want the iommu in the way,
> so it's at best a "workaround", and it's not really solving the real
> problem.

This specific problem, I think I agree.

> As I said replying to Christoph, we are "leaking" into the interface
> something here that is really what's the VM is doing to itself, which
> is to stash its memory away in an inaccessible place.
> 
> Cheers,
> Ben.

I think Christoph merely objects to the specific implementation.  If
instead you do something like tweak dev->bus_dma_mask for the virtio
device I think he won't object.

-- 
MST

^ permalink raw reply

* [PATCH v2 1/2] powerpc/fadump: handle crash memory ranges array index overflow
From: Hari Bathini @ 2018-08-06 20:42 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Mahesh Salgaonkar, Mahesh J Salgaonkar, stable, linuxppc-dev

Crash memory ranges is an array of memory ranges of the crashing kernel
to be exported as a dump via /proc/vmcore file. The size of the array
is set based on INIT_MEMBLOCK_REGIONS, which works alright in most cases
where memblock memory regions count is less than INIT_MEMBLOCK_REGIONS
value. But this count can grow beyond INIT_MEMBLOCK_REGIONS value since
commit 142b45a72e22 ("memblock: Add array resizing support").

On large memory systems with a few DLPAR operations, the memblock memory
regions count could be larger than INIT_MEMBLOCK_REGIONS value. On such
systems, registering fadump results in crash or other system failures
like below:

  task: c00007f39a290010 ti: c00000000b738000 task.ti: c00000000b738000
  NIP: c000000000047df4 LR: c0000000000f9e58 CTR: c00000000010f180
  REGS: c00000000b73b570 TRAP: 0300   Tainted: G          L   X  (4.4.140+)
  MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 22004484  XER: 20000000
  CFAR: c000000000008500 DAR: 000007a450000000 DSISR: 40000000 SOFTE: 0
  GPR00: c0000000000f9e58 c00000000b73b7f0 c000000000f09a00 000000000000001a
  GPR04: c00007f3bf774c90 0000000000000004 c000000000eb9a00 0000000000000800
  GPR08: 0000000000000804 000007a450000000 c000000000fa9a00 c00007ffb169ca20
  GPR12: 0000000022004482 c00000000fa12c00 c00007f3a0ea97a8 0000000000000000
  GPR16: c00007f3a0ea9a50 c00000000b73bd60 0000000000000118 000000000001fe80
  GPR20: 0000000000000118 0000000000000000 c000000000b8c980 00000000000000d0
  GPR24: 000007ffb0b10000 c00007ffb169c980 0000000000000000 c000000000b8c980
  GPR28: 0000000000000004 c00007ffb169c980 000000000000001a c00007ffb169c980
  NIP [c000000000047df4] smp_send_reschedule+0x24/0x80
  LR [c0000000000f9e58] resched_curr+0x138/0x160
  Call Trace:
  [c00000000b73b7f0] [c0000000000f9e58] resched_curr+0x138/0x160 (unreliable)
  [c00000000b73b820] [c0000000000fb538] check_preempt_curr+0xc8/0xf0
  [c00000000b73b850] [c0000000000fb598] ttwu_do_wakeup+0x38/0x150
  [c00000000b73b890] [c0000000000fc9c4] try_to_wake_up+0x224/0x4d0
  [c00000000b73b900] [c00000000011ef34] __wake_up_common+0x94/0x100
  [c00000000b73b960] [c00000000034a78c] ep_poll_callback+0xac/0x1c0
  [c00000000b73b9b0] [c00000000011ef34] __wake_up_common+0x94/0x100
  [c00000000b73ba10] [c00000000011f810] __wake_up_sync_key+0x70/0xa0
  [c00000000b73ba60] [c00000000067c3e8] sock_def_readable+0x58/0xa0
  [c00000000b73ba90] [c0000000007848ac] unix_stream_sendmsg+0x2dc/0x4c0
  [c00000000b73bb70] [c000000000675a38] sock_sendmsg+0x68/0xa0
  [c00000000b73bba0] [c00000000067673c] ___sys_sendmsg+0x2cc/0x2e0
  [c00000000b73bd30] [c000000000677dbc] __sys_sendmsg+0x5c/0xc0
  [c00000000b73bdd0] [c0000000006789bc] SyS_socketcall+0x36c/0x3f0
  [c00000000b73be30] [c000000000009488] system_call+0x3c/0x100
  Instruction dump:
  4e800020 60000000 60420000 3c4c00ec 38421c30 7c0802a6 f8010010 60000000
  3d42000a e92ab420 2fa90000 4dde0020 <e9290000> 2fa90000 419e0044 7c0802a6
  ---[ end trace a6d1dd4bab5f8253 ]---

as array index overflow is not checked for while setting up crash memory
ranges causing memory corruption. To resolve this issue, dynamically
allocate memory for crash memory ranges and resize it incrementally,
in units of pagesize, on hitting array size limit.

Fixes: 2df173d9e85d ("fadump: Initialize elfcore header and add PT_LOAD program headers.")
Cc: stable@vger.kernel.org
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v2:
* Allocating memory for crash ranges in pagesize unit.
* freeing memory allocated while cleaning up.
* Moved the changes to coalesce memory ranges into patch 2/2.


 arch/powerpc/include/asm/fadump.h |    4 +-
 arch/powerpc/kernel/fadump.c      |   91 +++++++++++++++++++++++++++++++------
 2 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 5a23010..3abc738 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -195,8 +195,8 @@ struct fadump_crash_info_header {
 	struct cpumask	online_mask;
 };
 
-/* Crash memory ranges */
-#define INIT_CRASHMEM_RANGES	(INIT_MEMBLOCK_REGIONS + 2)
+/* Crash memory ranges size unit (pagesize) */
+#define CRASHMEM_RANGES_ALLOC_SIZE		PAGE_SIZE
 
 struct fad_crash_memory_ranges {
 	unsigned long long	base;
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 07e8396..2ec5704 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -47,8 +47,10 @@ static struct fadump_mem_struct fdm;
 static const struct fadump_mem_struct *fdm_active;
 
 static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+struct fad_crash_memory_ranges *crash_memory_ranges;
+int crash_memory_ranges_size;
 int crash_mem_ranges;
+int max_crash_mem_ranges;
 
 /* Scan the Firmware Assisted dump configuration details. */
 int __init early_init_dt_scan_fw_dump(unsigned long node,
@@ -868,22 +870,67 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
 	return 0;
 }
 
-static inline void fadump_add_crash_memory(unsigned long long base,
-					unsigned long long end)
+static void free_crash_memory_ranges(void)
+{
+	kfree(crash_memory_ranges);
+	crash_memory_ranges = NULL;
+	crash_memory_ranges_size = 0;
+	max_crash_mem_ranges = 0;
+}
+
+/*
+ * Allocate or reallocate crash memory ranges array in incremental units
+ * of CRASHMEM_RANGES_ALLOC_SIZE.
+ */
+static int allocate_crash_memory_ranges(void)
+{
+	u64 new_size;
+	struct fad_crash_memory_ranges *new_array;
+
+	new_size = crash_memory_ranges_size + CRASHMEM_RANGES_ALLOC_SIZE;
+	pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
+		 new_size);
+
+	new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+	if (new_array == NULL) {
+		pr_err("Insufficient memory for setting up crash memory ranges\n");
+		free_crash_memory_ranges();
+		return -ENOMEM;
+	}
+
+	crash_memory_ranges = new_array;
+	crash_memory_ranges_size = new_size;
+	max_crash_mem_ranges = (new_size /
+				sizeof(struct fad_crash_memory_ranges));
+	return 0;
+}
+
+static inline int fadump_add_crash_memory(unsigned long long base,
+					  unsigned long long end)
 {
 	if (base == end)
-		return;
+		return 0;
+
+	if (crash_mem_ranges == max_crash_mem_ranges) {
+		int ret;
+
+		ret = allocate_crash_memory_ranges();
+		if (ret)
+			return ret;
+	}
 
 	pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
 		crash_mem_ranges, base, end - 1, (end - base));
 	crash_memory_ranges[crash_mem_ranges].base = base;
 	crash_memory_ranges[crash_mem_ranges].size = end - base;
 	crash_mem_ranges++;
+	return 0;
 }
 
-static void fadump_exclude_reserved_area(unsigned long long start,
+static int fadump_exclude_reserved_area(unsigned long long start,
 					unsigned long long end)
 {
+	int ret = 0;
 	unsigned long long ra_start, ra_end;
 
 	ra_start = fw_dump.reserve_dump_area_start;
@@ -891,15 +938,20 @@ static void fadump_exclude_reserved_area(unsigned long long start,
 
 	if ((ra_start < end) && (ra_end > start)) {
 		if ((start < ra_start) && (end > ra_end)) {
-			fadump_add_crash_memory(start, ra_start);
-			fadump_add_crash_memory(ra_end, end);
+			ret = fadump_add_crash_memory(start, ra_start);
+			if (ret)
+				return ret;
+
+			ret = fadump_add_crash_memory(ra_end, end);
 		} else if (start < ra_start) {
-			fadump_add_crash_memory(start, ra_start);
+			ret = fadump_add_crash_memory(start, ra_start);
 		} else if (ra_end < end) {
-			fadump_add_crash_memory(ra_end, end);
+			ret = fadump_add_crash_memory(ra_end, end);
 		}
 	} else
-		fadump_add_crash_memory(start, end);
+		ret = fadump_add_crash_memory(start, end);
+
+	return ret;
 }
 
 static int fadump_init_elfcore_header(char *bufp)
@@ -939,8 +991,9 @@ static int fadump_init_elfcore_header(char *bufp)
  * Traverse through memblock structure and setup crash memory ranges. These
  * ranges will be used create PT_LOAD program headers in elfcore header.
  */
-static void fadump_setup_crash_memory_ranges(void)
+static int fadump_setup_crash_memory_ranges(void)
 {
+	int ret;
 	struct memblock_region *reg;
 	unsigned long long start, end;
 
@@ -953,7 +1006,9 @@ static void fadump_setup_crash_memory_ranges(void)
 	 * specified during fadump registration. We need to create a separate
 	 * program header for this chunk with the correct offset.
 	 */
-	fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+	ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+	if (ret)
+		return ret;
 
 	for_each_memblock(memory, reg) {
 		start = (unsigned long long)reg->base;
@@ -973,8 +1028,12 @@ static void fadump_setup_crash_memory_ranges(void)
 		}
 
 		/* add this range excluding the reserved dump area. */
-		fadump_exclude_reserved_area(start, end);
+		ret = fadump_exclude_reserved_area(start, end);
+		if (ret)
+			return ret;
 	}
+
+	return 0;
 }
 
 /*
@@ -1095,6 +1154,7 @@ static unsigned long init_fadump_header(unsigned long addr)
 
 static int register_fadump(void)
 {
+	int ret;
 	unsigned long addr;
 	void *vaddr;
 
@@ -1105,7 +1165,9 @@ static int register_fadump(void)
 	if (!fw_dump.reserve_dump_area_size)
 		return -ENODEV;
 
-	fadump_setup_crash_memory_ranges();
+	ret = fadump_setup_crash_memory_ranges();
+	if (ret)
+		return ret;
 
 	addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
 	/* Initialize fadump crash info header. */
@@ -1183,6 +1245,7 @@ void fadump_cleanup(void)
 	} else if (fw_dump.dump_registered) {
 		/* Un-register Firmware-assisted dump if it was registered. */
 		fadump_unregister_dump(&fdm);
+		free_crash_memory_ranges();
 	}
 }
 

^ permalink raw reply related

* [PATCH v2 2/2] powerpc/fadump: merge adjacent memory ranges to reduce PT_LOAD segements
From: Hari Bathini @ 2018-08-06 20:42 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Mahesh J Salgaonkar
In-Reply-To: <153358813908.15150.18359359970445648733.stgit@hbathini.in.ibm.com>

With dynamic memory allocation support for crash memory ranges array,
there is no hard limit on the no. of crash memory ranges kernel could
export, but program headers count could overflow in the /proc/vmcore
ELF file while exporting each memory range as PT_LOAD segment. Reduce
the likelihood of a such scenario, by folding adjacent crash memory
ranges which minimizes the total number of PT_LOAD segments.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/kernel/fadump.c |   45 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2ec5704..cd0c555 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -908,22 +908,41 @@ static int allocate_crash_memory_ranges(void)
 static inline int fadump_add_crash_memory(unsigned long long base,
 					  unsigned long long end)
 {
+	u64  start, size;
+	bool is_adjacent = false;
+
 	if (base == end)
 		return 0;
 
-	if (crash_mem_ranges == max_crash_mem_ranges) {
-		int ret;
+	/*
+	 * Fold adjacent memory ranges to bring down the memory ranges/
+	 * PT_LOAD segments count.
+	 */
+	if (crash_mem_ranges) {
+		start = crash_memory_ranges[crash_mem_ranges-1].base;
+		size = crash_memory_ranges[crash_mem_ranges-1].size;
 
-		ret = allocate_crash_memory_ranges();
-		if (ret)
-			return ret;
+		if ((start + size) == base)
+			is_adjacent = true;
+	}
+	if (!is_adjacent) {
+		/* resize the array on reaching the limit */
+		if (crash_mem_ranges == max_crash_mem_ranges) {
+			int ret;
+
+			ret = allocate_crash_memory_ranges();
+			if (ret)
+				return ret;
+		}
+
+		start = base;
+		crash_memory_ranges[crash_mem_ranges].base = start;
+		crash_mem_ranges++;
 	}
 
+	crash_memory_ranges[crash_mem_ranges-1].size = (end - start);
 	pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
-		crash_mem_ranges, base, end - 1, (end - base));
-	crash_memory_ranges[crash_mem_ranges].base = base;
-	crash_memory_ranges[crash_mem_ranges].size = end - base;
-	crash_mem_ranges++;
+		(crash_mem_ranges - 1), start, end - 1, (end - start));
 	return 0;
 }
 
@@ -999,6 +1018,14 @@ static int fadump_setup_crash_memory_ranges(void)
 
 	pr_debug("Setup crash memory ranges.\n");
 	crash_mem_ranges = 0;
+
+	/* allocate memory for crash memory ranges for the first time */
+	if (!max_crash_mem_ranges) {
+		ret = allocate_crash_memory_ranges();
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * add the first memory chunk (RMA_START through boot_memory_size) as
 	 * a separate memory chunk. The reason is, at the time crash firmware

^ permalink raw reply related

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-06 21:26 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180806233024-mutt-send-email-mst@kernel.org>

On Mon, 2018-08-06 at 23:35 +0300, Michael S. Tsirkin wrote:
> > As I said replying to Christoph, we are "leaking" into the interface
> > something here that is really what's the VM is doing to itself, which
> > is to stash its memory away in an inaccessible place.
> > 
> > Cheers,
> > Ben.
> 
> I think Christoph merely objects to the specific implementation.  If
> instead you do something like tweak dev->bus_dma_mask for the virtio
> device I think he won't object.

Well, we don't have "bus_dma_mask" yet ..or you mean dma_mask ?

So, something like that would be a possibility, but the problem is that
the current virtio (guest side) implementation doesn't honor this when
not using dma ops and will not use dma ops if not using iommu, so back
to square one.

Christoph seems to be wanting to use a flag in the interface to make
the guest use dma_ops which is what I don't understand.

What would be needed then would be something along the lines of virtio
noticing that dma_mask isn't big enough to cover all of memory (which
isn't something generic code can easily do here for various reasons I
can elaborate if you want, but that specific test more/less has to be
arch specific), and in that case, force itself to use DMA ops routed to
swiotlb.

I'd rather have arch code do the bulk of that work, don't you think ?

Which brings me back to this option, which may be the simplest and
avoids the overhead of the proposed series (I found the series to be a
nice cleanup but retpoline does kick us in the nuts here).

So what about this ?

--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -155,7 +155,7 @@ static bool vring_use_dma_api(struct virtio_device
*vdev)
         * the DMA API if we're a Xen guest, which at least allows
         * all of the sensible Xen configurations to work correctly.
         */
-       if (xen_domain())
+       if (xen_domain() || arch_virtio_direct_dma_ops(&vdev->dev))
                return true;
 
        return false;

(Passing the dev allows the arch to know this is a virtio device in
"direct" mode or whatever we want to call the !iommu case, and
construct appropriate DMA ops for it, which aren't the same as the DMA
ops of any other PCI device who *do* use the iommu).

Otherwise, the harder option would be for us to hack so that
xen_domain() returns true in our setup (gross), and have the arch code,
when it sets up PCI device DMA ops, have a gross hack to identify
virtio PCI devices, checks their F_IOMMU flag itself, and sets up the
different ops at that point.

As for those "special" ops, they are of course just normal swiotlb ops,
there's nothing "special" other that they aren't the ops that other PCI
device on that bus use.

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Michael S. Tsirkin @ 2018-08-06 21:46 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <0967fc30001323e6e38ed12c8dba8ee3d1aa13f5.camel@kernel.crashing.org>

On Tue, Aug 07, 2018 at 07:26:35AM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2018-08-06 at 23:35 +0300, Michael S. Tsirkin wrote:
> > > As I said replying to Christoph, we are "leaking" into the interface
> > > something here that is really what's the VM is doing to itself, which
> > > is to stash its memory away in an inaccessible place.
> > > 
> > > Cheers,
> > > Ben.
> > 
> > I think Christoph merely objects to the specific implementation.  If
> > instead you do something like tweak dev->bus_dma_mask for the virtio
> > device I think he won't object.
> 
> Well, we don't have "bus_dma_mask" yet ..or you mean dma_mask ?
> 
> So, something like that would be a possibility, but the problem is that
> the current virtio (guest side) implementation doesn't honor this when
> not using dma ops and will not use dma ops if not using iommu, so back
> to square one.

Well we have the RFC for that - the switch to using DMA ops unconditionally isn't
problematic itself IMHO, for now that RFC is blocked
by its perfromance overhead for now but Christoph says
he's trying to remove that for direct mappings,
so we should hopefully be able to get there in X weeks.

> Christoph seems to be wanting to use a flag in the interface to make
> the guest use dma_ops which is what I don't understand.
> 
> What would be needed then would be something along the lines of virtio
> noticing that dma_mask isn't big enough to cover all of memory (which
> isn't something generic code can easily do here for various reasons I
> can elaborate if you want, but that specific test more/less has to be
> arch specific), and in that case, force itself to use DMA ops routed to
> swiotlb.
> 
> I'd rather have arch code do the bulk of that work, don't you think ?
> 
> Which brings me back to this option, which may be the simplest and
> avoids the overhead of the proposed series (I found the series to be a
> nice cleanup but retpoline does kick us in the nuts here).
> 
> So what about this ?
> 
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -155,7 +155,7 @@ static bool vring_use_dma_api(struct virtio_device
> *vdev)
>          * the DMA API if we're a Xen guest, which at least allows
>          * all of the sensible Xen configurations to work correctly.
>          */
> -       if (xen_domain())
> +       if (xen_domain() || arch_virtio_direct_dma_ops(&vdev->dev))
>                 return true;
>  
>         return false;

Right but can't we fix the retpoline overhead such that
vring_use_dma_api will not be called on data path any longer, making
this a setup time check?


> (Passing the dev allows the arch to know this is a virtio device in
> "direct" mode or whatever we want to call the !iommu case, and
> construct appropriate DMA ops for it, which aren't the same as the DMA
> ops of any other PCI device who *do* use the iommu).

I think that's where Christoph might have specific ideas about it.

> Otherwise, the harder option would be for us to hack so that
> xen_domain() returns true in our setup (gross), and have the arch code,
> when it sets up PCI device DMA ops, have a gross hack to identify
> virtio PCI devices, checks their F_IOMMU flag itself, and sets up the
> different ops at that point.
> 
> As for those "special" ops, they are of course just normal swiotlb ops,
> there's nothing "special" other that they aren't the ops that other PCI
> device on that bus use.
> 
> Cheers,
> Ben.

-- 
MST

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-06 22:13 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180807002857-mutt-send-email-mst@kernel.org>

On Tue, 2018-08-07 at 00:46 +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 07, 2018 at 07:26:35AM +1000, Benjamin Herrenschmidt wrote:
> > On Mon, 2018-08-06 at 23:35 +0300, Michael S. Tsirkin wrote:
> > > > As I said replying to Christoph, we are "leaking" into the interface
> > > > something here that is really what's the VM is doing to itself, which
> > > > is to stash its memory away in an inaccessible place.
> > > > 
> > > > Cheers,
> > > > Ben.
> > > 
> > > I think Christoph merely objects to the specific implementation.  If
> > > instead you do something like tweak dev->bus_dma_mask for the virtio
> > > device I think he won't object.
> > 
> > Well, we don't have "bus_dma_mask" yet ..or you mean dma_mask ?
> > 
> > So, something like that would be a possibility, but the problem is that
> > the current virtio (guest side) implementation doesn't honor this when
> > not using dma ops and will not use dma ops if not using iommu, so back
> > to square one.
> 
> Well we have the RFC for that - the switch to using DMA ops unconditionally isn't
> problematic itself IMHO, for now that RFC is blocked
> by its perfromance overhead for now but Christoph says
> he's trying to remove that for direct mappings,
> so we should hopefully be able to get there in X weeks.

That would be good yes.

 ../..

> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -155,7 +155,7 @@ static bool vring_use_dma_api(struct virtio_device
> > *vdev)
> >          * the DMA API if we're a Xen guest, which at least allows
> >          * all of the sensible Xen configurations to work correctly.
> >          */
> > -       if (xen_domain())
> > +       if (xen_domain() || arch_virtio_direct_dma_ops(&vdev->dev))
> >                 return true;
> >  
> >         return false;
> 
> Right but can't we fix the retpoline overhead such that
> vring_use_dma_api will not be called on data path any longer, making
> this a setup time check?

Yes it needs to be a setup time check regardless actually !

The above is broken, sorry I was a bit quick here (too early in the
morning... ugh). We don't want the arch to go override the dma ops
every time that is callled.

But yes, if we can fix the overhead, it becomes just a matter of
setting up the "right" ops automatically.

> > (Passing the dev allows the arch to know this is a virtio device in
> > "direct" mode or whatever we want to call the !iommu case, and
> > construct appropriate DMA ops for it, which aren't the same as the DMA
> > ops of any other PCI device who *do* use the iommu).
> 
> I think that's where Christoph might have specific ideas about it.

OK well, assuming Christoph can solve the direct case in a way that
also work for the virtio !iommu case, we still want some bit of logic
somewhere that will "switch" to swiotlb based ops if the DMA mask is
limited.

You mentioned an RFC for that ? Do you happen to have a link ?

It would be indeed ideal if all we had to do was setup some kind of
bus_dma_mask on all PCI devices and have virtio automagically insert
swiotlb when necessary.

Cheers,
Ben.

^ permalink raw reply

* Re: Build regressions/improvements in v4.17-rc1
From: Andrew Morton @ 2018-08-06 22:54 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Linux Kernel Mailing List, Dan Williams, Michael Ellerman,
	linuxppc-dev
In-Reply-To: <CAMuHMdXxV1nWGWAAXSkBWm-9VWHVc57nhFLfQ4BijbKGh-vaUQ@mail.gmail.com>

On Mon, 6 Aug 2018 12:39:21 +0200 Geert Uytterhoeven <geert@linux-m68k.org> wrote:

> CC Dan, Michael, AKPM, powerpc
> 
> On Mon, Apr 16, 2018 at 3:10 PM Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> > Below is the list of build error/warning regressions/improvements in
> > v4.17-rc1[1] compared to v4.16[2].
> 
> I'd like to point your attention to:
> 
> >   + warning: vmlinux.o(.text+0x376518): Section mismatch in reference from the function .devm_memremap_pages() to the function .meminit.text:.arch_add_memory():  => N/A
> >   + warning: vmlinux.o(.text+0x376d64): Section mismatch in reference from the function .devm_memremap_pages_release() to the function .meminit.text:.arch_remove_memory():  => N/A

hm.  Dan isn't around at present so we're on our own with this one.

x86 doesn't put arch_add_memory and arch_remove_memory into __meminit. 
x86 does

#ifdef CONFIG_MEMORY_HOTPLUG
int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
		bool want_memblock)
{
	...


So I guess powerpc should do that as well?

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-06 23:16 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <93518075238a07e9f011774d89bdc652c083f1ba.camel@kernel.crashing.org>

On Tue, 2018-08-07 at 08:13 +1000, Benjamin Herrenschmidt wrote:
> 
> OK well, assuming Christoph can solve the direct case in a way that
> also work for the virtio !iommu case, we still want some bit of logic
> somewhere that will "switch" to swiotlb based ops if the DMA mask is
> limited.
> 
> You mentioned an RFC for that ? Do you happen to have a link ?
> 
> It would be indeed ideal if all we had to do was setup some kind of
> bus_dma_mask on all PCI devices and have virtio automagically insert
> swiotlb when necessary.

Actually... I can think of a simpler option (Anshuman, didn't you
prototype this earlier ?):

Since that limitaiton of requiring bounce buffering via swiotlb is true
of any device in a secure VM, whether it goes through the iommu or not,
the iommu remapping is essentially pointless.

Thus, we could ensure that the iommu maps 1:1 the swiotlb bounce buffer
(either that or we configure it as "disabled" which is equivalent in
this case).

That way, we can now use the basic swiotlb ops everywhere, the same
dma_ops (swiotlb) will work whether a device uses the iommu or not.

Which boils down now to only making virtio use dma ops, there is no
need to override the dma_ops.

Which means all we have to do is either make xen_domain() return true
(yuck) or replace that one test with arch_virtio_force_dma_api() which
resolves to xen_domain() on x86 and can do something else for us.

As to using a virtio feature flag for that, which is what Christoph
proposes, I'm not too fan of it because this means effectively exposing
this to the peer, ie the interface. I don't think it belong there. The
interface, from the hypervisor perspective, whether it's qemu, vmware,
hyperz etc... have no business knowing how the guest manages its dma
operations, and may not even be aware of the access limitations (in our
case they are somewhat guest self-imposed).

Now, if this flag really is what we have to do, then we'd probably need
a qemu hack which will go set that flag on all virtio devices when it
detects that the VM is going secure.

But I don't think that's where that information "need to use the dma
API even for direct mode" belongs.

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-06 23:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180806233024-mutt-send-email-mst@kernel.org>

On Mon, 2018-08-06 at 23:35 +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 07, 2018 at 05:56:59AM +1000, Benjamin Herrenschmidt wrote:
> > On Mon, 2018-08-06 at 16:46 +0300, Michael S. Tsirkin wrote:
> > > 
> > > > Right, we'll need some quirk to disable balloons  in the guest I
> > > > suppose.
> > > > 
> > > > Passing something from libvirt is cumbersome because the end user may
> > > > not even need to know about secure VMs. There are use cases where the
> > > > security is a contract down to some special application running inside
> > > > the secure VM, the sysadmin knows nothing about.
> > > > 
> > > > Also there's repercussions all the way to admin tools, web UIs etc...
> > > > so it's fairly wide ranging.
> > > > 
> > > > So as long as we only need to quirk a couple of devices, it's much
> > > > better contained that way.
> > > 
> > > So just the balloon thing already means that yes management and all the
> > > way to the user tools must know this is going on. Otherwise
> > > user will try to inflate the balloon and wonder why this does not work.
> > 
> > There is *dozens* of management systems out there, not even all open
> > source, we won't ever be able to see the end of the tunnel if we need
> > to teach every single of them, including end users, about platform
> > specific new VM flags like that.
> > 
> > .../...
> 
> In the end I suspect you will find you have to.

Maybe... we'll tackle this if/when we have to.

For balloon I suspect it's not such a big deal because once secure, all
the guest memory goes into the secure memory which isn't visible or
accounted by the hypervisor, so there's nothing to steal but the guest
is also using no HV memory (other than the few "non-secure" pages used
for swiotlb and a couple of other kernel things).

Future versions of our secure architecture might allow to turn
arbitrary pages of memory secure/non-secure rather than relying on a
separate physical pool, in which case, the balloon will be able to work
normally.

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Michael S. Tsirkin @ 2018-08-06 23:45 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <93518075238a07e9f011774d89bdc652c083f1ba.camel@kernel.crashing.org>

On Tue, Aug 07, 2018 at 08:13:56AM +1000, Benjamin Herrenschmidt wrote:
> On Tue, 2018-08-07 at 00:46 +0300, Michael S. Tsirkin wrote:
> > On Tue, Aug 07, 2018 at 07:26:35AM +1000, Benjamin Herrenschmidt wrote:
> > > On Mon, 2018-08-06 at 23:35 +0300, Michael S. Tsirkin wrote:
> > > > > As I said replying to Christoph, we are "leaking" into the interface
> > > > > something here that is really what's the VM is doing to itself, which
> > > > > is to stash its memory away in an inaccessible place.
> > > > > 
> > > > > Cheers,
> > > > > Ben.
> > > > 
> > > > I think Christoph merely objects to the specific implementation.  If
> > > > instead you do something like tweak dev->bus_dma_mask for the virtio
> > > > device I think he won't object.
> > > 
> > > Well, we don't have "bus_dma_mask" yet ..or you mean dma_mask ?
> > > 
> > > So, something like that would be a possibility, but the problem is that
> > > the current virtio (guest side) implementation doesn't honor this when
> > > not using dma ops and will not use dma ops if not using iommu, so back
> > > to square one.
> > 
> > Well we have the RFC for that - the switch to using DMA ops unconditionally isn't
> > problematic itself IMHO, for now that RFC is blocked
> > by its perfromance overhead for now but Christoph says
> > he's trying to remove that for direct mappings,
> > so we should hopefully be able to get there in X weeks.
> 
> That would be good yes.
> 
>  ../..
> 
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -155,7 +155,7 @@ static bool vring_use_dma_api(struct virtio_device
> > > *vdev)
> > >          * the DMA API if we're a Xen guest, which at least allows
> > >          * all of the sensible Xen configurations to work correctly.
> > >          */
> > > -       if (xen_domain())
> > > +       if (xen_domain() || arch_virtio_direct_dma_ops(&vdev->dev))
> > >                 return true;
> > >  
> > >         return false;
> > 
> > Right but can't we fix the retpoline overhead such that
> > vring_use_dma_api will not be called on data path any longer, making
> > this a setup time check?
> 
> Yes it needs to be a setup time check regardless actually !
> 
> The above is broken, sorry I was a bit quick here (too early in the
> morning... ugh). We don't want the arch to go override the dma ops
> every time that is callled.
> 
> But yes, if we can fix the overhead, it becomes just a matter of
> setting up the "right" ops automatically.
> 
> > > (Passing the dev allows the arch to know this is a virtio device in
> > > "direct" mode or whatever we want to call the !iommu case, and
> > > construct appropriate DMA ops for it, which aren't the same as the DMA
> > > ops of any other PCI device who *do* use the iommu).
> > 
> > I think that's where Christoph might have specific ideas about it.
> 
> OK well, assuming Christoph can solve the direct case in a way that
> also work for the virtio !iommu case, we still want some bit of logic
> somewhere that will "switch" to swiotlb based ops if the DMA mask is
> limited.
> 
> You mentioned an RFC for that ? Do you happen to have a link ?

No but Christoph did I think.

> It would be indeed ideal if all we had to do was setup some kind of
> bus_dma_mask on all PCI devices and have virtio automagically insert
> swiotlb when necessary.
> 
> Cheers,
> Ben.
> 

^ permalink raw reply

* Re: powerpc/e200: Skip tlb1 entries used for kernel mapping
From: Scott Wood @ 2018-08-06 23:57 UTC (permalink / raw)
  To: Bharat Bhushan; +Cc: benh, paulus, mpe, linuxppc-dev, linux-kernel
In-Reply-To: <1532411985-17725-1-git-send-email-Bharat.Bhushan@nxp.com>

On Tue, Jul 24, 2018 at 11:29:45AM +0530, Bharat Bhushan wrote:
> E200 have TLB1 only and it does not have TLB0.
> So TLB1 are used for mapping kernel and user-space both.
> TLB miss handler for E200 does not consider skipping TLBs
> used for kernel mapping. This patch ensures that we skip
> tlb1 entries used for kernel mapping (tlbcam_index).

How much more is needed to get e200 working?  What was this tested on?

> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
> ---
>  arch/powerpc/kernel/head_fsl_booke.S | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
> index bf4c602..951fb96 100644
> --- a/arch/powerpc/kernel/head_fsl_booke.S
> +++ b/arch/powerpc/kernel/head_fsl_booke.S
> @@ -801,12 +801,28 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
>  	/* Round robin TLB1 entries assignment */
>  	mfspr	r12, SPRN_MAS0
>  
> +	/* Get first free tlbcam entry */
> +	lis	r11, tlbcam_index@ha
> +	lwz	r11, tlbcam_index@l(r11)

The existing handler already loads tlbcam_index and uses that when
wrapping.  What specifically is causing that to not work (perhaps it's
just a matter of initializing NV when tlbcam_index changes?), and why
does this patch leave that code in place?

> +
> +	/* Extract MAS0(NV) */
> +	andi.	r13, r12, 0xfff
> +	cmpw	0, r13, r11
> +	blt	0, 5f
> +	b	6f
> +5:

Why these two instructions instead of "bge 6f"?  If it's for branch
prediction, does e200 pay attention to static hints?  If it doesn't,
you could move the wrap code out-of-line.

> +	/* When NV is less than first free tlbcam entry, use first free
> +	 * tlbcam entry for ESEL and set NV */
> +	rlwimi	r12, r11, 16, 4, 15
> +	addi	r11, r11, 1
> +	rlwimi	r12, r11, 0, 20, 31
> +	b	7f

The 4-argument form of rlwimi is easier to read.

BTW, The TLB miss handler would be simpler/faster if you reserve the
upper entries rather than the lower entries.  Then you would just have
one value to check (instead of using TLB1CFG[NENTRY]) to see if you wrap
back to zero.

-Scott

^ permalink raw reply

* Re: [PATCH v2 6/6] fsl_pmc: update device bindings
From: Scott Wood @ 2018-08-07  0:13 UTC (permalink / raw)
  To: Rob Herring, Ran Wang
  Cc: Mark Rutland, devicetree, Zhao Chenhui, Li Yang, Paul Mackerras,
	linux-arm-kernel, Li Yang, linuxppc-dev, linux-kernel
In-Reply-To: <20180416151356.r7si6higfrovl53l@rob-hp-laptop>

On Mon, 2018-04-16 at 10:13 -0500, Rob Herring wrote:
> On Wed, Apr 11, 2018 at 02:35:51PM +0800, Ran Wang wrote:
> > From: Li Yang <leoli@freescale.com>
> 
> Needs a commit msg and the subject should give some indication of what 
> the update is. And also start with "dt-bindings: ..."

This patch should also come before the patches that use the new binding.

> > -  fsl,mpc8536-pmc: Sleep specifiers consist of three cells, the third of
> > -  which will be ORed into PMCDR upon suspend, and cleared from PMCDR
> > -  upon resume.  The first two cells are as described for fsl,mpc8578-pmc.
> > -  This sleep controller only supports disabling devices during system
> > -  sleep, or permanently.
> > -
> > -  fsl,mpc8548-pmc: Sleep specifiers consist of one or two cells, the
> > -  first of which will be ORed into DEVDISR (and the second into
> > -  DEVDISR2, if present -- this cell should be zero or absent if the
> > -  hardware does not have DEVDISR2) upon a request for permanent device
> > -  disabling.  This sleep controller does not support configuring devices
> > -  to disable during system sleep (unless supported by another compatible
> > -  match), or dynamically.
> 
> You seem to be breaking backwards compatibility with this change. I 
> doubt that is okay on these platforms.

I don't think the sleep specifier stuff ever got used.

-Scott

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Benjamin Herrenschmidt @ 2018-08-07  0:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christoph Hellwig, Will Deacon, Anshuman Khandual, virtualization,
	linux-kernel, linuxppc-dev, aik, robh, joe, elfring, david,
	jasowang, mpe, linuxram, haren, paulus, srikar, robin.murphy,
	jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180807024503-mutt-send-email-mst@kernel.org>

On Tue, 2018-08-07 at 02:45 +0300, Michael S. Tsirkin wrote:
> > OK well, assuming Christoph can solve the direct case in a way that
> > also work for the virtio !iommu case, we still want some bit of logic
> > somewhere that will "switch" to swiotlb based ops if the DMA mask is
> > limited.
> > 
> > You mentioned an RFC for that ? Do you happen to have a link ?
> 
> No but Christoph did I think.

Ok I missed that, sorry, I'll dig it out. Thanks.

Cheers,
Ben.

^ permalink raw reply

* [PATCH] powerpc/tm: Print 64-bits MSR
From: Breno Leitao @ 2018-08-07  0:32 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mikey, Breno Leitao

On a kernel TM Bad thing program exception, the MSR is not being properly
displayed, since it dumps a 32-bits value. MSR is a 64 bits register for
all platforms that have HTM enabled.

This patch dumps the MSR value as 64-bits instead of 32 bits.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 arch/powerpc/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0e17dcb48720..cd561fd89532 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1402,7 +1402,7 @@ void program_check_exception(struct pt_regs *regs)
 			goto bail;
 		} else {
 			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
-			       "at %lx (msr 0x%x)\n", regs->nip, reason);
+			       "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
 			die("Unrecoverable exception", regs, SIGABRT);
 		}
 	}
-- 
2.16.3

^ permalink raw reply related

* Re: [PATCH] powerpc/tm: Print 64-bits MSR
From: Michael Neuling @ 2018-08-07  1:17 UTC (permalink / raw)
  To: Breno Leitao, linuxppc-dev
In-Reply-To: <1533601964-20157-1-git-send-email-leitao@debian.org>

On Mon, 2018-08-06 at 21:32 -0300, Breno Leitao wrote:
> On a kernel TM Bad thing program exception, the MSR is not being properly
> displayed, since it dumps a 32-bits value. MSR is a 64 bits register for
> all platforms that have HTM enabled.
>=20
> This patch dumps the MSR value as 64-bits instead of 32 bits.

(sorry I was distracted when you asked me about this before offline...)

I think you might need to clear up in the description why you are changing
  reason -> msr.

Mikey

> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
>  arch/powerpc/kernel/traps.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>=20
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 0e17dcb48720..cd561fd89532 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1402,7 +1402,7 @@ void program_check_exception(struct pt_regs *regs)
>  			goto bail;
>  		} else {
>  			printk(KERN_EMERG "Unexpected TM Bad Thing exception
> "
> -			       "at %lx (msr 0x%x)\n", regs->nip, reason);
> +			       "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
>=20
>  			die("Unrecoverable exception", regs, SIGABRT);
>  		}
>  	}

^ permalink raw reply

* Re: [PATCH v5 2/2] powerpc: Use cpu_smallcore_sibling_mask at SMT level on bigcores
From: kbuild test robot @ 2018-08-07  2:44 UTC (permalink / raw)
  To: Gautham R. Shenoy
  Cc: kbuild-all, Michael Ellerman, Benjamin Herrenschmidt,
	Michael Neuling, Vaidyanathan Srinivasan, Akshay Adiga,
	Shilpasri G Bhat, Oliver O'Halloran, Nicholas Piggin,
	Murilo Opsfelder Araujo, Anton Blanchard, linuxppc-dev,
	linux-kernel, Gautham R. Shenoy
In-Reply-To: <1533572565-17357-3-git-send-email-ego@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1528 bytes --]

Hi Gautham,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.18-rc8 next-20180806]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Gautham-R-Shenoy/powerpc-Detect-the-presence-of-big-cores-via-ibm-thread-groups/20180807-075133
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-pasemi_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

>> arch/powerpc/kernel/smp.c:1178:30: error: 'smallcore_smt_mask' defined but not used [-Werror=unused-function]
    static const struct cpumask *smallcore_smt_mask(int cpu)
                                 ^~~~~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors

vim +/smallcore_smt_mask +1178 arch/powerpc/kernel/smp.c

  1177	
> 1178	static const struct cpumask *smallcore_smt_mask(int cpu)
  1179	{
  1180		return cpu_smallcore_sibling_mask(cpu);
  1181	}
  1182	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 20225 bytes --]

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Christoph Hellwig @ 2018-08-07  6:16 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael S. Tsirkin, Christoph Hellwig, Will Deacon,
	Anshuman Khandual, virtualization, linux-kernel, linuxppc-dev,
	aik, robh, joe, elfring, david, jasowang, mpe, linuxram, haren,
	paulus, srikar, robin.murphy, jean-philippe.brucker, marc.zyngier
In-Reply-To: <0967fc30001323e6e38ed12c8dba8ee3d1aa13f5.camel@kernel.crashing.org>

On Tue, Aug 07, 2018 at 07:26:35AM +1000, Benjamin Herrenschmidt wrote:
> > I think Christoph merely objects to the specific implementation.  If
> > instead you do something like tweak dev->bus_dma_mask for the virtio
> > device I think he won't object.
> 
> Well, we don't have "bus_dma_mask" yet ..or you mean dma_mask ?

It will be new in 4.19:

http://git.infradead.org/users/hch/dma-mapping.git/commitdiff/f07d141fe9430cdf9f8a65a87c41

> So, something like that would be a possibility, but the problem is that
> the current virtio (guest side) implementation doesn't honor this when
> not using dma ops and will not use dma ops if not using iommu, so back
> to square one.
> 
> Christoph seems to be wanting to use a flag in the interface to make
> the guest use dma_ops which is what I don't understand.

As-is virtio devices are very clearly and explcitly defined to use
physical addresses in the spec.  dma ops will often do platform
based translations (iommu, offsets), so we can't just use the plaform
default dma ops and will need to opt into them.

> What would be needed then would be something along the lines of virtio
> noticing that dma_mask isn't big enough to cover all of memory (which
> isn't something generic code can easily do here for various reasons I
> can elaborate if you want, but that specific test more/less has to be
> arch specific), and in that case, force itself to use DMA ops routed to
> swiotlb.
> 
> I'd rather have arch code do the bulk of that work, don't you think ?

There is nothing architecture specific about that.  I've been working
hard to remove all the bullshit architectures have done in their DMA
ops and consolidating them into common code based on rules.  The last
thing I want is another vector for weird underspecified arch
interfaction with DMA ops, which is exactly what your patch below
does.

^ permalink raw reply

* Re: [RFC 0/4] Virtio uses DMA API for all devices
From: Christoph Hellwig @ 2018-08-07  6:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Benjamin Herrenschmidt, Christoph Hellwig, Will Deacon,
	Anshuman Khandual, virtualization, linux-kernel, linuxppc-dev,
	aik, robh, joe, elfring, david, jasowang, mpe, linuxram, haren,
	paulus, srikar, robin.murphy, jean-philippe.brucker, marc.zyngier
In-Reply-To: <20180807002857-mutt-send-email-mst@kernel.org>

On Tue, Aug 07, 2018 at 12:46:34AM +0300, Michael S. Tsirkin wrote:
> Well we have the RFC for that - the switch to using DMA ops unconditionally isn't
> problematic itself IMHO, for now that RFC is blocked
> by its perfromance overhead for now but Christoph says
> he's trying to remove that for direct mappings,
> so we should hopefully be able to get there in X weeks.

The direct calls to dma_direct_ops aren't going to help you with legacy
virtio, given that virtio is specified to deal with physical addresses,
while dma-direct is not in many cases.

It would however help with the case where qemu always sets the platform
dma flag, as we'd avoid the indirect calls for that.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox