* [PATCH v3 2/2] powerpc/64s/hash: add torture_hpt kernel boot option to increase hash faults
From: Nicholas Piggin @ 2020-05-11 9:20 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20200511092040.1339667-1-npiggin@gmail.com>
This option increases the number of hash misses by limiting the number of
kernel HPT entries, by accessing the address immediately after installing
the PTE, then removing it again (except in the case of CI entries that
must not be accessed, these are removed on the next hash fault).
This helps stress test difficult to hit paths in the kernel.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v3:
- Remove the dead code that didn't work on QEMU, and comment it instead.
.../admin-guide/kernel-parameters.txt | 9 ++++
arch/powerpc/include/asm/book3s/64/mmu-hash.h | 11 ++++
arch/powerpc/mm/book3s64/hash_4k.c | 3 ++
arch/powerpc/mm/book3s64/hash_64k.c | 8 +++
arch/powerpc/mm/book3s64/hash_utils.c | 54 ++++++++++++++++++-
5 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8dd4260746dc..f51ed836954f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -876,6 +876,15 @@
them frequently to increase the rate of SLB faults
on kernel addresses.
+ torture_hpt [PPC]
+ Limits the number of kernel HPT entries in the hash
+ page table to increase the rate of hash page table
+ faults on kernel addresses.
+
+ This may hang when run on processors / emulators which
+ do not have a TLB, or flush it more often than
+ required, QEMU seems to have problems.
+
disable= [IPV6]
See Documentation/networking/ipv6.txt.
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 758de1e0f676..3d50ab629bde 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -8,6 +8,7 @@
* PPC64 rework.
*/
+#include <linux/jump_label.h>
#include <asm/page.h>
#include <asm/bug.h>
#include <asm/asm-const.h>
@@ -324,6 +325,16 @@ static inline bool torture_slb(void)
return static_branch_unlikely(&torture_slb_key);
}
+extern bool torture_hpt_enabled;
+DECLARE_STATIC_KEY_FALSE(torture_hpt_key);
+static inline bool torture_hpt(void)
+{
+ return static_branch_unlikely(&torture_hpt_key);
+}
+
+void hpt_do_torture(unsigned long ea, unsigned long access,
+ unsigned long rflags, unsigned long hpte_group);
+
/*
* This computes the AVPN and B fields of the first dword of a HPTE,
* for use when we want to match an existing PTE. The bottom 7 bits
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 22e787123cdf..54e4ff8c558d 100644
--- a/arch/powerpc/mm/book3s64/hash_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -118,6 +118,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+ if (torture_hpt())
+ hpt_do_torture(ea, access, rflags, hpte_group);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 7084ce2951e6..19ea0fc145a9 100644
--- a/arch/powerpc/mm/book3s64/hash_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -216,6 +216,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
+ if (torture_hpt())
+ hpt_do_torture(ea, access, rflags, hpte_group);
+
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
@@ -327,7 +330,12 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+ if (torture_hpt())
+ hpt_do_torture(ea, access, rflags, hpte_group);
}
+
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9c487b5782ef..ffe364269b8a 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -353,8 +353,12 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
return ret;
}
-static bool disable_1tb_segments = false;
+static bool disable_1tb_segments __read_mostly = false;
bool torture_slb_enabled __read_mostly = false;
+bool torture_hpt_enabled __read_mostly = false;
+
+/* per-CPU array allocated if we enable torture_hpt. */
+static unsigned long *torture_hpt_last_group;
static int __init parse_disable_1tb_segments(char *p)
{
@@ -370,6 +374,13 @@ static int __init parse_torture_slb(char *p)
}
early_param("torture_slb", parse_torture_slb);
+static int __init parse_torture_hpt(char *p)
+{
+ torture_hpt_enabled = true;
+ return 0;
+}
+early_param("torture_hpt", parse_torture_hpt);
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -863,6 +874,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
}
DEFINE_STATIC_KEY_FALSE(torture_slb_key);
+DEFINE_STATIC_KEY_FALSE(torture_hpt_key);
static void __init htab_initialize(void)
{
@@ -882,6 +894,15 @@ static void __init htab_initialize(void)
if (torture_slb_enabled)
static_branch_enable(&torture_slb_key);
+ if (torture_hpt_enabled) {
+ unsigned long tmp;
+ static_branch_enable(&torture_hpt_key);
+ tmp = memblock_phys_alloc_range(sizeof(unsigned long) * NR_CPUS,
+ 0,
+ 0, MEMBLOCK_ALLOC_ANYWHERE);
+ memset((void *)tmp, 0xff, sizeof(unsigned long) * NR_CPUS);
+ torture_hpt_last_group = __va(tmp);
+ }
/*
* Calculate the required size of the htab. We want the number of
@@ -1901,6 +1922,37 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
return slot;
}
+void hpt_do_torture(unsigned long ea, unsigned long access,
+ unsigned long rflags, unsigned long hpte_group)
+{
+ unsigned long last_group;
+ int cpu = raw_smp_processor_id();
+
+ last_group = torture_hpt_last_group[cpu];
+ if (last_group != -1UL) {
+ while (mmu_hash_ops.hpte_remove(last_group) != -1)
+ ;
+ torture_hpt_last_group[cpu] = -1UL;
+ }
+
+ if (ea >= PAGE_OFFSET) {
+ /*
+ * We would really like to prefetch here to get the TLB loaded,
+ * then remove the PTE before returning to userspace, to
+ * increase the hash fault rate.
+ *
+ * Unfortunately QEMU TCG does not model the TLB in a way that
+ * makes this possible, and systemsim (mambo) emulator does not
+ * bring in TLBs with prefetches (although loads/stores do
+ * work for non-CI PTEs).
+ *
+ * So remember this PTE and clear it on the next hash fault.
+ */
+ torture_hpt_last_group[cpu] = hpte_group;
+ }
+}
+
+
#ifdef CONFIG_DEBUG_PAGEALLOC
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v3 1/2] powerpc/64s/hash: add torture_slb kernel boot option to increase SLB faults
From: Nicholas Piggin @ 2020-05-11 9:20 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
This option increases the number of SLB misses by limiting the number of
kernel SLB entries, and increased flushing of cached lookaside information.
This helps stress test difficult to hit paths in the kernel.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v3:
- Fix compile error found by ktr
.../admin-guide/kernel-parameters.txt | 5 +
arch/powerpc/include/asm/book3s/64/mmu-hash.h | 7 +
arch/powerpc/mm/book3s64/hash_utils.c | 13 ++
arch/powerpc/mm/book3s64/slb.c | 152 ++++++++++++------
4 files changed, 132 insertions(+), 45 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7bc83f3d9bdf..8dd4260746dc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -871,6 +871,11 @@
can be useful when debugging issues that require an SLB
miss to occur.
+ torture_slb [PPC]
+ Limits the number of kernel SLB entries, and flushes
+ them frequently to increase the rate of SLB faults
+ on kernel addresses.
+
disable= [IPV6]
See Documentation/networking/ipv6.txt.
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 3fa1b962dc27..758de1e0f676 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -317,6 +317,13 @@ extern unsigned long tce_alloc_start, tce_alloc_end;
*/
extern int mmu_ci_restrictions;
+extern bool torture_slb_enabled;
+DECLARE_STATIC_KEY_FALSE(torture_slb_key);
+static inline bool torture_slb(void)
+{
+ return static_branch_unlikely(&torture_slb_key);
+}
+
/*
* This computes the AVPN and B fields of the first dword of a HPTE,
* for use when we want to match an existing PTE. The bottom 7 bits
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 8ed2411c3f39..9c487b5782ef 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -354,6 +354,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
}
static bool disable_1tb_segments = false;
+bool torture_slb_enabled __read_mostly = false;
static int __init parse_disable_1tb_segments(char *p)
{
@@ -362,6 +363,13 @@ static int __init parse_disable_1tb_segments(char *p)
}
early_param("disable_1tb_segments", parse_disable_1tb_segments);
+static int __init parse_torture_slb(char *p)
+{
+ torture_slb_enabled = true;
+ return 0;
+}
+early_param("torture_slb", parse_torture_slb);
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -854,6 +862,8 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
pr_info("Partition table %p\n", partition_tb);
}
+DEFINE_STATIC_KEY_FALSE(torture_slb_key);
+
static void __init htab_initialize(void)
{
unsigned long table;
@@ -870,6 +880,9 @@ static void __init htab_initialize(void)
printk(KERN_INFO "Using 1TB segments\n");
}
+ if (torture_slb_enabled)
+ static_branch_enable(&torture_slb_key);
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 716204aee3da..cf41c4b63d95 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -68,7 +68,7 @@ static void assert_slb_presence(bool present, unsigned long ea)
* slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
* ignores all other bits from 0-27, so just clear them all.
*/
- ea &= ~((1UL << 28) - 1);
+ ea &= ~((1UL << SID_SHIFT) - 1);
asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
WARN_ON(present == (tmp == 0));
@@ -153,14 +153,42 @@ void slb_flush_all_realmode(void)
asm volatile("slbmte %0,%0; slbia" : : "r" (0));
}
+static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
+{
+ struct slb_shadow *p = get_slb_shadow();
+ unsigned long ksp_esid_data, ksp_vsid_data;
+ u32 ih;
+
+ /*
+ * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
+ * information created with Class=0 entries, which we use for kernel
+ * SLB entries (the SLB entries themselves are still invalidated).
+ *
+ * Older processors will ignore this optimisation. Over-invalidation
+ * is fine because we never rely on lookaside information existing.
+ */
+ if (preserve_kernel_lookaside)
+ ih = 1;
+ else
+ ih = 0;
+
+ ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+ ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+ asm volatile(PPC_SLBIA(%0)" \n"
+ "slbmte %1, %2 \n"
+ :: "i" (ih),
+ "r" (ksp_vsid_data),
+ "r" (ksp_esid_data)
+ : "memory");
+}
+
/*
* This flushes non-bolted entries, it can be run in virtual mode. Must
* be called with interrupts disabled.
*/
void slb_flush_and_restore_bolted(void)
{
- struct slb_shadow *p = get_slb_shadow();
-
BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
WARN_ON(!irqs_disabled());
@@ -171,13 +199,10 @@ void slb_flush_and_restore_bolted(void)
*/
hard_irq_disable();
- asm volatile("isync\n"
- "slbia\n"
- "slbmte %0, %1\n"
- "isync\n"
- :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
- "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
- : "memory");
+ isync();
+ __slb_flush_and_restore_bolted(false);
+ isync();
+
assert_slb_presence(true, get_paca()->kstack);
get_paca()->slb_cache_ptr = 0;
@@ -400,6 +425,30 @@ void preload_new_slb_context(unsigned long start, unsigned long sp)
local_irq_enable();
}
+static void slb_cache_slbie_kernel(unsigned int index)
+{
+ unsigned long slbie_data = get_paca()->slb_cache[index];
+ unsigned long ksp = get_paca()->kstack;
+
+ slbie_data <<= SID_SHIFT;
+ slbie_data |= 0xc000000000000000ULL;
+ if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
+ return;
+ slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
+
+ asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+static void slb_cache_slbie_user(unsigned int index)
+{
+ unsigned long slbie_data = get_paca()->slb_cache[index];
+
+ slbie_data <<= SID_SHIFT;
+ slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
+ slbie_data |= SLBIE_C; /* user slbs have C=1 */
+
+ asm volatile("slbie %0" : : "r" (slbie_data));
+}
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
@@ -414,8 +463,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
*/
hard_irq_disable();
- asm volatile("isync" : : : "memory");
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ isync();
+ if (torture_slb()) {
+ __slb_flush_and_restore_bolted(false);
+ isync();
+ get_paca()->slb_cache_ptr = 0;
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
/*
* SLBIA IH=3 invalidates all Class=1 SLBEs and their
* associated lookaside structures, which matches what
@@ -423,47 +478,29 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
* cache.
*/
asm volatile(PPC_SLBIA(3));
+
} else {
unsigned long offset = get_paca()->slb_cache_ptr;
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
offset <= SLB_CACHE_ENTRIES) {
- unsigned long slbie_data = 0;
-
- for (i = 0; i < offset; i++) {
- unsigned long ea;
-
- ea = (unsigned long)
- get_paca()->slb_cache[i] << SID_SHIFT;
- /*
- * Could assert_slb_presence(true) here, but
- * hypervisor or machine check could have come
- * in and removed the entry at this point.
- */
-
- slbie_data = ea;
- slbie_data |= user_segment_size(slbie_data)
- << SLBIE_SSIZE_SHIFT;
- slbie_data |= SLBIE_C; /* user slbs have C=1 */
- asm volatile("slbie %0" : : "r" (slbie_data));
- }
+ /*
+ * Could assert_slb_presence(true) here, but
+ * hypervisor or machine check could have come
+ * in and removed the entry at this point.
+ */
+
+ for (i = 0; i < offset; i++)
+ slb_cache_slbie_user(i);
/* Workaround POWER5 < DD2.1 issue */
if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
- asm volatile("slbie %0" : : "r" (slbie_data));
+ slb_cache_slbie_user(0);
} else {
- struct slb_shadow *p = get_slb_shadow();
- unsigned long ksp_esid_data =
- be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
- unsigned long ksp_vsid_data =
- be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
-
- asm volatile(PPC_SLBIA(1) "\n"
- "slbmte %0,%1\n"
- "isync"
- :: "r"(ksp_vsid_data),
- "r"(ksp_esid_data));
+ /* Flush but retain kernel lookaside information */
+ __slb_flush_and_restore_bolted(true);
+ isync();
get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
}
@@ -503,7 +540,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
* address accesses by the kernel (user mode won't happen until
* rfid, which is safe).
*/
- asm volatile("isync" : : : "memory");
+ isync();
}
void slb_set_size(u16 size)
@@ -571,6 +608,9 @@ static void slb_cache_update(unsigned long esid_data)
if (cpu_has_feature(CPU_FTR_ARCH_300))
return; /* ISAv3.0B and later does not use slb_cache */
+ if (torture_slb())
+ return;
+
/*
* Now update slb cache entries
*/
@@ -580,7 +620,7 @@ static void slb_cache_update(unsigned long esid_data)
* We have space in slb cache for optimized switch_slb().
* Top 36 bits from esid_data as per ISA
*/
- local_paca->slb_cache[slb_cache_index++] = esid_data >> 28;
+ local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
local_paca->slb_cache_ptr++;
} else {
/*
@@ -671,6 +711,28 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
* accesses user memory before it returns to userspace with rfid.
*/
assert_slb_presence(false, ea);
+ if (torture_slb()) {
+ int slb_cache_index = local_paca->slb_cache_ptr;
+
+ /*
+ * torture_slb() does not use slb cache, repurpose as a
+ * cache of inserted (non-bolted) kernel SLB entries. All
+ * non-bolted kernel entries are flushed on any user fault,
+ * or if there are already 3 non-boled kernel entries.
+ */
+ BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
+ if (!kernel || slb_cache_index == 3) {
+ int i;
+
+ for (i = 0; i < slb_cache_index; i++)
+ slb_cache_slbie_kernel(i);
+ slb_cache_index = 0;
+ }
+
+ if (kernel)
+ local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
+ local_paca->slb_cache_ptr = slb_cache_index;
+ }
asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
barrier();
--
2.23.0
^ permalink raw reply related
* [powerpc:merge] BUILD SUCCESS 78263190ec9727216ca715bfc0ee8b58b657d1ea
From: kbuild test robot @ 2020-05-11 8:25 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 78263190ec9727216ca715bfc0ee8b58b657d1ea Automatic merge of 'master', 'next' and 'fixes' (2020-05-07 22:25)
elapsed time: 5480m
configs tested: 118
configs skipped: 26
The following configs have been built successfully.
More configs may be tested in the coming days.
arm defconfig
arm allyesconfig
arm allmodconfig
arm allnoconfig
arm64 allyesconfig
arm64 defconfig
arm64 allmodconfig
arm64 allnoconfig
sparc allyesconfig
m68k allyesconfig
m68k allmodconfig
i386 allyesconfig
s390 allmodconfig
m68k allnoconfig
um allnoconfig
m68k defconfig
i386 allnoconfig
i386 defconfig
i386 debian-10.3
ia64 allmodconfig
ia64 defconfig
ia64 allnoconfig
ia64 allyesconfig
m68k sun3_defconfig
nds32 defconfig
nds32 allnoconfig
csky allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
h8300 allmodconfig
xtensa defconfig
arc defconfig
arc allyesconfig
sh allmodconfig
sh allnoconfig
microblaze allnoconfig
nios2 defconfig
nios2 allyesconfig
openrisc defconfig
c6x allyesconfig
c6x allnoconfig
openrisc allyesconfig
mips allyesconfig
mips allnoconfig
mips allmodconfig
parisc allnoconfig
parisc defconfig
parisc allyesconfig
parisc allmodconfig
powerpc defconfig
powerpc allyesconfig
powerpc rhel-kconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a006-20200511
i386 randconfig-a005-20200511
i386 randconfig-a003-20200511
i386 randconfig-a001-20200511
i386 randconfig-a004-20200511
i386 randconfig-a002-20200511
i386 randconfig-a005-20200507
i386 randconfig-a004-20200507
i386 randconfig-a001-20200507
i386 randconfig-a002-20200507
i386 randconfig-a003-20200507
i386 randconfig-a006-20200507
x86_64 randconfig-a005-20200511
x86_64 randconfig-a003-20200511
x86_64 randconfig-a006-20200511
x86_64 randconfig-a004-20200511
x86_64 randconfig-a001-20200511
x86_64 randconfig-a002-20200511
i386 randconfig-a012-20200510
i386 randconfig-a016-20200510
i386 randconfig-a014-20200510
i386 randconfig-a011-20200510
i386 randconfig-a013-20200510
i386 randconfig-a015-20200510
i386 randconfig-a012-20200507
i386 randconfig-a016-20200507
i386 randconfig-a014-20200507
i386 randconfig-a011-20200507
i386 randconfig-a015-20200507
i386 randconfig-a013-20200507
i386 randconfig-a012-20200511
i386 randconfig-a016-20200511
i386 randconfig-a014-20200511
i386 randconfig-a011-20200511
i386 randconfig-a013-20200511
i386 randconfig-a015-20200511
x86_64 randconfig-a004-20200507
x86_64 randconfig-a006-20200507
x86_64 randconfig-a002-20200507
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
s390 allyesconfig
s390 allnoconfig
s390 defconfig
sparc defconfig
sparc64 defconfig
sparc64 allnoconfig
sparc64 allyesconfig
sparc64 allmodconfig
um allmodconfig
um allyesconfig
um defconfig
x86_64 rhel
x86_64 rhel-7.6
x86_64 rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64 lkp
x86_64 fedora-25
x86_64 kexec
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:fixes] BUILD SUCCESS c44dc6323cd49d8d742c37e234b952e822c35de4
From: kbuild test robot @ 2020-05-11 8:25 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes
branch HEAD: c44dc6323cd49d8d742c37e234b952e822c35de4 powerpc/64s/kuap: Restore AMR in fast_interrupt_return
elapsed time: 5481m
configs tested: 160
configs skipped: 38
The following configs have been built successfully.
More configs may be tested in the coming days.
arm defconfig
arm allyesconfig
arm allmodconfig
arm allnoconfig
arm64 allyesconfig
arm64 defconfig
arm64 allmodconfig
arm64 allnoconfig
sparc allyesconfig
m68k allyesconfig
m68k allmodconfig
um defconfig
i386 allyesconfig
alpha allyesconfig
mips allmodconfig
s390 allmodconfig
m68k allnoconfig
m68k defconfig
um allnoconfig
s390 allnoconfig
riscv allnoconfig
riscv allmodconfig
parisc allyesconfig
i386 allnoconfig
i386 defconfig
i386 debian-10.3
ia64 allmodconfig
ia64 defconfig
ia64 allnoconfig
ia64 allyesconfig
m68k sun3_defconfig
nios2 defconfig
nios2 allyesconfig
openrisc defconfig
c6x allyesconfig
c6x allnoconfig
openrisc allyesconfig
nds32 defconfig
nds32 allnoconfig
csky allyesconfig
csky defconfig
alpha defconfig
xtensa allyesconfig
h8300 allyesconfig
h8300 allmodconfig
xtensa defconfig
arc defconfig
arc allyesconfig
sh allmodconfig
sh allnoconfig
microblaze allnoconfig
mips allyesconfig
mips allnoconfig
parisc allnoconfig
parisc defconfig
parisc allmodconfig
powerpc defconfig
powerpc allyesconfig
powerpc rhel-kconfig
powerpc allmodconfig
powerpc allnoconfig
x86_64 randconfig-a005-20200508
x86_64 randconfig-a003-20200508
x86_64 randconfig-a001-20200508
x86_64 randconfig-a006-20200508
x86_64 randconfig-a004-20200508
x86_64 randconfig-a002-20200508
i386 randconfig-a006-20200511
i386 randconfig-a005-20200511
i386 randconfig-a003-20200511
i386 randconfig-a001-20200511
i386 randconfig-a004-20200511
i386 randconfig-a002-20200511
i386 randconfig-a005-20200509
i386 randconfig-a004-20200509
i386 randconfig-a003-20200509
i386 randconfig-a002-20200509
i386 randconfig-a001-20200509
i386 randconfig-a006-20200509
i386 randconfig-a005-20200507
i386 randconfig-a004-20200507
i386 randconfig-a001-20200507
i386 randconfig-a002-20200507
i386 randconfig-a003-20200507
i386 randconfig-a006-20200507
i386 randconfig-a005-20200508
i386 randconfig-a004-20200508
i386 randconfig-a003-20200508
i386 randconfig-a002-20200508
i386 randconfig-a001-20200508
i386 randconfig-a006-20200508
x86_64 randconfig-a005-20200511
x86_64 randconfig-a003-20200511
x86_64 randconfig-a006-20200511
x86_64 randconfig-a004-20200511
x86_64 randconfig-a001-20200511
x86_64 randconfig-a002-20200511
x86_64 randconfig-a015-20200507
x86_64 randconfig-a014-20200507
x86_64 randconfig-a012-20200507
x86_64 randconfig-a013-20200507
x86_64 randconfig-a011-20200507
x86_64 randconfig-a016-20200507
x86_64 randconfig-a015-20200509
x86_64 randconfig-a014-20200509
x86_64 randconfig-a011-20200509
x86_64 randconfig-a013-20200509
x86_64 randconfig-a012-20200509
x86_64 randconfig-a016-20200509
x86_64 randconfig-a014-20200508
x86_64 randconfig-a012-20200508
x86_64 randconfig-a016-20200508
x86_64 randconfig-a016-20200511
x86_64 randconfig-a012-20200511
x86_64 randconfig-a014-20200511
x86_64 randconfig-a004-20200507
x86_64 randconfig-a006-20200507
x86_64 randconfig-a002-20200507
i386 randconfig-a012-20200511
i386 randconfig-a016-20200511
i386 randconfig-a014-20200511
i386 randconfig-a011-20200511
i386 randconfig-a013-20200511
i386 randconfig-a015-20200511
i386 randconfig-a012-20200510
i386 randconfig-a016-20200510
i386 randconfig-a014-20200510
i386 randconfig-a011-20200510
i386 randconfig-a013-20200510
i386 randconfig-a015-20200510
i386 randconfig-a012-20200507
i386 randconfig-a016-20200507
i386 randconfig-a014-20200507
i386 randconfig-a011-20200507
i386 randconfig-a015-20200507
i386 randconfig-a013-20200507
i386 randconfig-a012-20200508
i386 randconfig-a014-20200508
i386 randconfig-a016-20200508
i386 randconfig-a011-20200508
i386 randconfig-a013-20200508
i386 randconfig-a015-20200508
riscv allyesconfig
riscv defconfig
s390 allyesconfig
s390 defconfig
sparc defconfig
sparc64 defconfig
sparc64 allnoconfig
sparc64 allyesconfig
sparc64 allmodconfig
um allmodconfig
um allyesconfig
x86_64 rhel
x86_64 rhel-7.6
x86_64 rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64 lkp
x86_64 fedora-25
x86_64 kexec
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:topic/ppc-kvm] BUILD SUCCESS b1f9be9392f090f08e4ad9e2c68963aeff03bd67
From: kbuild test robot @ 2020-05-11 8:25 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git topic/ppc-kvm
branch HEAD: b1f9be9392f090f08e4ad9e2c68963aeff03bd67 powerpc/xive: Enforce load-after-store ordering when StoreEOI is active
elapsed time: 484m
configs tested: 95
configs skipped: 71
The following configs have been built successfully.
More configs may be tested in the coming days.
arm defconfig
arm allyesconfig
arm allmodconfig
arm allnoconfig
arm64 allyesconfig
arm64 defconfig
arm64 allmodconfig
arm64 allnoconfig
sparc allyesconfig
m68k allyesconfig
parisc allyesconfig
i386 allnoconfig
i386 allyesconfig
i386 defconfig
i386 debian-10.3
ia64 allmodconfig
ia64 defconfig
ia64 allnoconfig
ia64 allyesconfig
nios2 defconfig
nios2 allyesconfig
openrisc defconfig
c6x allyesconfig
c6x allnoconfig
openrisc allyesconfig
nds32 defconfig
nds32 allnoconfig
csky allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
h8300 allmodconfig
xtensa defconfig
arc defconfig
arc allyesconfig
sh allmodconfig
sh allnoconfig
microblaze allnoconfig
mips allyesconfig
mips allnoconfig
mips allmodconfig
parisc allnoconfig
parisc defconfig
parisc allmodconfig
powerpc defconfig
powerpc allyesconfig
powerpc rhel-kconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a006-20200511
i386 randconfig-a005-20200511
i386 randconfig-a003-20200511
i386 randconfig-a001-20200511
i386 randconfig-a004-20200511
i386 randconfig-a002-20200511
i386 randconfig-a012-20200510
i386 randconfig-a016-20200510
i386 randconfig-a014-20200510
i386 randconfig-a011-20200510
i386 randconfig-a013-20200510
i386 randconfig-a015-20200510
i386 randconfig-a012-20200511
i386 randconfig-a016-20200511
i386 randconfig-a014-20200511
i386 randconfig-a011-20200511
i386 randconfig-a013-20200511
i386 randconfig-a015-20200511
x86_64 randconfig-a005-20200511
x86_64 randconfig-a003-20200511
x86_64 randconfig-a006-20200511
x86_64 randconfig-a004-20200511
x86_64 randconfig-a001-20200511
x86_64 randconfig-a002-20200511
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
s390 allyesconfig
s390 allnoconfig
s390 allmodconfig
s390 defconfig
sparc64 defconfig
sparc64 allnoconfig
sparc64 allyesconfig
sparc64 allmodconfig
sparc defconfig
x86_64 rhel
x86_64 rhel-7.6
x86_64 rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64 lkp
x86_64 fedora-25
x86_64 kexec
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:fixes-test] BUILD SUCCESS 25107dc0610d8687ebbf4fc56323babf87149cb4
From: kbuild test robot @ 2020-05-11 8:24 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: 25107dc0610d8687ebbf4fc56323babf87149cb4 powerpc/64s/kuap: Restore AMR in fast_interrupt_return
elapsed time: 8440m
configs tested: 111
configs skipped: 81
The following configs have been built successfully.
More configs may be tested in the coming days.
arm64 allyesconfig
arm allyesconfig
arm64 allmodconfig
arm allmodconfig
arm64 allnoconfig
arm allnoconfig
arm defconfig
arm64 defconfig
sparc allyesconfig
m68k allyesconfig
mips allmodconfig
m68k sun3_defconfig
riscv defconfig
xtensa allyesconfig
nds32 allnoconfig
powerpc allnoconfig
parisc allyesconfig
i386 allnoconfig
i386 allyesconfig
i386 defconfig
i386 debian-10.3
ia64 allmodconfig
ia64 defconfig
ia64 allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k allnoconfig
m68k defconfig
nios2 defconfig
nios2 allyesconfig
openrisc defconfig
c6x allyesconfig
c6x allnoconfig
openrisc allyesconfig
nds32 defconfig
csky allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
h8300 allyesconfig
h8300 allmodconfig
xtensa defconfig
arc defconfig
arc allyesconfig
sh allmodconfig
sh allnoconfig
microblaze allnoconfig
mips allyesconfig
mips allnoconfig
parisc allnoconfig
parisc allmodconfig
parisc defconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc defconfig
powerpc rhel-kconfig
i386 randconfig-a006-20200511
i386 randconfig-a005-20200511
i386 randconfig-a003-20200511
i386 randconfig-a001-20200511
i386 randconfig-a004-20200511
i386 randconfig-a002-20200511
x86_64 randconfig-a015-20200507
x86_64 randconfig-a014-20200507
x86_64 randconfig-a012-20200507
x86_64 randconfig-a013-20200507
x86_64 randconfig-a011-20200507
x86_64 randconfig-a016-20200507
i386 randconfig-a012-20200511
i386 randconfig-a016-20200511
i386 randconfig-a014-20200511
i386 randconfig-a011-20200511
i386 randconfig-a013-20200511
i386 randconfig-a015-20200511
i386 randconfig-a012-20200510
i386 randconfig-a016-20200510
i386 randconfig-a014-20200510
i386 randconfig-a011-20200510
i386 randconfig-a013-20200510
i386 randconfig-a015-20200510
x86_64 randconfig-a003-20200505
x86_64 randconfig-a001-20200505
i386 randconfig-a001-20200505
i386 randconfig-a003-20200505
i386 randconfig-a002-20200505
x86_64 randconfig-a005-20200511
x86_64 randconfig-a003-20200511
x86_64 randconfig-a006-20200511
x86_64 randconfig-a004-20200511
x86_64 randconfig-a001-20200511
x86_64 randconfig-a002-20200511
riscv allyesconfig
riscv allnoconfig
riscv allmodconfig
s390 allyesconfig
s390 allnoconfig
s390 allmodconfig
s390 defconfig
sparc defconfig
sparc64 defconfig
sparc64 allnoconfig
sparc64 allyesconfig
sparc64 allmodconfig
um defconfig
x86_64 rhel
x86_64 rhel-7.6
x86_64 rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64 lkp
x86_64 fedora-25
x86_64 kexec
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* Re: [PATCH 02/31] arm64: fix the flush_icache_range arguments in machine_kexec
From: Will Deacon @ 2020-05-11 7:51 UTC (permalink / raw)
To: Christoph Hellwig, james.morse, catalin.marinas
Cc: linux-ia64, linux-sh, Roman Zippel, linux-mips, linux-mm,
sparclinux, linux-riscv, linux-arch, linux-c6x-dev, linux-hexagon,
x86, linux-xtensa, Arnd Bergmann, Jessica Yu, linux-um,
linux-m68k, openrisc, linux-arm-kernel, Michal Simek,
linux-kernel, linux-alpha, linux-fsdevel, Andrew Morton,
linuxppc-dev
In-Reply-To: <20200510075510.987823-3-hch@lst.de>
[+James and Catalin]
On Sun, May 10, 2020 at 09:54:41AM +0200, Christoph Hellwig wrote:
> The second argument is the end "pointer", not the length.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> arch/arm64/kernel/machine_kexec.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> index 8e9c924423b4e..a0b144cfaea71 100644
> --- a/arch/arm64/kernel/machine_kexec.c
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -177,6 +177,7 @@ void machine_kexec(struct kimage *kimage)
> * the offline CPUs. Therefore, we must use the __* variant here.
> */
> __flush_icache_range((uintptr_t)reboot_code_buffer,
> + (uintptr_t)reboot_code_buffer +
> arm64_relocate_new_kernel_size);
Urgh, well spotted. It's annoyingly different from __flush_dcache_area().
But now I'm wondering what this code actually does... the loop condition
in invalidate_icache_by_line works with 64-bit arithmetic, so we could
spend a /very/ long time here afaict. It's also a bit annoying that we
do a bunch of redundant D-cache maintenance too.
Should we use invalidate_icache_range() here instead? (and why does that
thing need to toggle uaccess)? Argh, too many questions!
Will
^ permalink raw reply
* Re: sort out the flush_icache_range mess
From: Geert Uytterhoeven @ 2020-05-11 7:46 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-ia64@vger.kernel.org, Linux-sh list, Roman Zippel,
open list:BROADCOM NVRAM DRIVER, Linux MM, sparclinux,
linux-riscv, Linux-Arch, linux-c6x-dev,
open list:QUALCOMM HEXAGON..., the arch/x86 maintainers,
open list:TENSILICA XTENSA PORT (xtensa), Arnd Bergmann,
Jessica Yu, linux-um, linux-m68k, Openrisc, Linux ARM,
Michal Simek, Linux Kernel Mailing List, alpha, Linux FS Devel,
Andrew Morton, linuxppc-dev
In-Reply-To: <20200510075510.987823-1-hch@lst.de>
Hi Christoph,
On Sun, May 10, 2020 at 9:55 AM Christoph Hellwig <hch@lst.de> wrote:
> none of which really are used by a typical MMU enabled kernel, as a.out can
> only be build for alpha and m68k to start with.
Quoting myself:
"I think it's safe to assume no one still runs a.out binaries on m68k."
http://lore.kernel.org/r/CAMuHMdW+m0Q+j3rsQdMXnrEPm+XB5Y2AQrxW5sD1mZAKgmEqoA@mail.gmail.com
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: Greg KH @ 2020-05-11 7:41 UTC (permalink / raw)
To: rananta; +Cc: andrew, linuxppc-dev, linux-kernel, jslaby
In-Reply-To: <a033c31f8d8bf121e2cfdabbca138c1a@codeaurora.org>
On Mon, May 11, 2020 at 12:34:44AM -0700, rananta@codeaurora.org wrote:
> On 2020-05-11 00:23, rananta@codeaurora.org wrote:
> > On 2020-05-09 23:48, Greg KH wrote:
> > > On Sat, May 09, 2020 at 06:30:56PM -0700, rananta@codeaurora.org
> > > wrote:
> > > > On 2020-05-06 02:48, Greg KH wrote:
> > > > > On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
> > > > > > Potentially, hvc_open() can be called in parallel when two tasks calls
> > > > > > open() on /dev/hvcX. In such a scenario, if the
> > > > > > hp->ops->notifier_add()
> > > > > > callback in the function fails, where it sets the tty->driver_data to
> > > > > > NULL, the parallel hvc_open() can see this NULL and cause a memory
> > > > > > abort.
> > > > > > Hence, serialize hvc_open and check if tty->private_data is NULL
> > > > > > before
> > > > > > proceeding ahead.
> > > > > >
> > > > > > The issue can be easily reproduced by launching two tasks
> > > > > > simultaneously
> > > > > > that does nothing but open() and close() on /dev/hvcX.
> > > > > > For example:
> > > > > > $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
> > > > > >
> > > > > > Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
> > > > > > ---
> > > > > > drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
> > > > > > 1 file changed, 14 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/tty/hvc/hvc_console.c
> > > > > > b/drivers/tty/hvc/hvc_console.c
> > > > > > index 436cc51c92c3..ebe26fe5ac09 100644
> > > > > > --- a/drivers/tty/hvc/hvc_console.c
> > > > > > +++ b/drivers/tty/hvc/hvc_console.c
> > > > > > @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
> > > > > > */
> > > > > > static DEFINE_MUTEX(hvc_structs_mutex);
> > > > > >
> > > > > > +/* Mutex to serialize hvc_open */
> > > > > > +static DEFINE_MUTEX(hvc_open_mutex);
> > > > > > /*
> > > > > > * This value is used to assign a tty->index value to a hvc_struct
> > > > > > based
> > > > > > * upon order of exposure via hvc_probe(), when we can not match it
> > > > > > to
> > > > > > @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver
> > > > > > *driver, struct tty_struct *tty)
> > > > > > */
> > > > > > static int hvc_open(struct tty_struct *tty, struct file * filp)
> > > > > > {
> > > > > > - struct hvc_struct *hp = tty->driver_data;
> > > > > > + struct hvc_struct *hp;
> > > > > > unsigned long flags;
> > > > > > int rc = 0;
> > > > > >
> > > > > > + mutex_lock(&hvc_open_mutex);
> > > > > > +
> > > > > > + hp = tty->driver_data;
> > > > > > + if (!hp) {
> > > > > > + rc = -EIO;
> > > > > > + goto out;
> > > > > > + }
> > > > > > +
> > > > > > spin_lock_irqsave(&hp->port.lock, flags);
> > > > > > /* Check and then increment for fast path open. */
> > > > > > if (hp->port.count++ > 0) {
> > > > > > spin_unlock_irqrestore(&hp->port.lock, flags);
> > > > > > hvc_kick();
> > > > > > - return 0;
> > > > > > + goto out;
> > > > > > } /* else count == 0 */
> > > > > > spin_unlock_irqrestore(&hp->port.lock, flags);
> > > > >
> > > > > Wait, why isn't this driver just calling tty_port_open() instead of
> > > > > trying to open-code all of this?
> > > > >
> > > > > Keeping a single mutext for open will not protect it from close, it will
> > > > > just slow things down a bit. There should already be a tty lock held by
> > > > > the tty core for open() to keep it from racing things, right?
> > > > The tty lock should have been held, but not likely across
> > > > ->install() and
> > > > ->open() callbacks, thus resulting in a race between
> > > > hvc_install() and
> > > > hvc_open(),
> > >
> > > How? The tty lock is held in install, and should not conflict with
> > > open(), otherwise, we would be seeing this happen in all tty drivers,
> > > right?
> > >
> > Well, I was expecting the same, but IIRC, I see that the open() was
> > being
> > called in parallel for the same device node.
> >
> > Is it expected that the tty core would allow only one thread to
> > access the dev-node, while blocking the other, or is it the client
> > driver's responsibility to handle the exclusiveness?
> Or is there any optimization going on where the second call doesn't go
> through
> install(), but calls open() directly as the file was already opened by the
> first
> thread?
Yes, it should only happen once, look at the logic in tty_kopen().
greg k-h
^ permalink raw reply
* Re: [PATCH 31/31] module: move the set_fs hack for flush_icache_range to m68k
From: Geert Uytterhoeven @ 2020-05-11 7:40 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-ia64@vger.kernel.org, Linux-sh list, Roman Zippel,
open list:BROADCOM NVRAM DRIVER, Linux MM, sparclinux,
linux-riscv, Linux-Arch, linux-c6x-dev,
open list:QUALCOMM HEXAGON..., the arch/x86 maintainers,
open list:TENSILICA XTENSA PORT (xtensa), Arnd Bergmann,
Jessica Yu, linux-um, linux-m68k, Openrisc, Linux ARM,
Michal Simek, Linux Kernel Mailing List, alpha, Linux FS Devel,
Andrew Morton, linuxppc-dev
In-Reply-To: <20200510075510.987823-32-hch@lst.de>
On Sun, May 10, 2020 at 9:57 AM Christoph Hellwig <hch@lst.de> wrote:
>
> flush_icache_range generally operates on kernel addresses, but for some
> reason m68k needed a set_fs override. Move that into the m68k code
> insted of keeping it in the module loader.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: Greg KH @ 2020-05-11 7:39 UTC (permalink / raw)
To: rananta; +Cc: andrew, linuxppc-dev, linux-kernel, jslaby
In-Reply-To: <77d889be4e0cb0e6e30f96199e2d843d@codeaurora.org>
On Mon, May 11, 2020 at 12:23:58AM -0700, rananta@codeaurora.org wrote:
> On 2020-05-09 23:48, Greg KH wrote:
> > On Sat, May 09, 2020 at 06:30:56PM -0700, rananta@codeaurora.org wrote:
> > > On 2020-05-06 02:48, Greg KH wrote:
> > > > On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
> > > > > Potentially, hvc_open() can be called in parallel when two tasks calls
> > > > > open() on /dev/hvcX. In such a scenario, if the
> > > > > hp->ops->notifier_add()
> > > > > callback in the function fails, where it sets the tty->driver_data to
> > > > > NULL, the parallel hvc_open() can see this NULL and cause a memory
> > > > > abort.
> > > > > Hence, serialize hvc_open and check if tty->private_data is NULL
> > > > > before
> > > > > proceeding ahead.
> > > > >
> > > > > The issue can be easily reproduced by launching two tasks
> > > > > simultaneously
> > > > > that does nothing but open() and close() on /dev/hvcX.
> > > > > For example:
> > > > > $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
> > > > >
> > > > > Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
> > > > > ---
> > > > > drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
> > > > > 1 file changed, 14 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/tty/hvc/hvc_console.c
> > > > > b/drivers/tty/hvc/hvc_console.c
> > > > > index 436cc51c92c3..ebe26fe5ac09 100644
> > > > > --- a/drivers/tty/hvc/hvc_console.c
> > > > > +++ b/drivers/tty/hvc/hvc_console.c
> > > > > @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
> > > > > */
> > > > > static DEFINE_MUTEX(hvc_structs_mutex);
> > > > >
> > > > > +/* Mutex to serialize hvc_open */
> > > > > +static DEFINE_MUTEX(hvc_open_mutex);
> > > > > /*
> > > > > * This value is used to assign a tty->index value to a hvc_struct
> > > > > based
> > > > > * upon order of exposure via hvc_probe(), when we can not match it
> > > > > to
> > > > > @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver
> > > > > *driver, struct tty_struct *tty)
> > > > > */
> > > > > static int hvc_open(struct tty_struct *tty, struct file * filp)
> > > > > {
> > > > > - struct hvc_struct *hp = tty->driver_data;
> > > > > + struct hvc_struct *hp;
> > > > > unsigned long flags;
> > > > > int rc = 0;
> > > > >
> > > > > + mutex_lock(&hvc_open_mutex);
> > > > > +
> > > > > + hp = tty->driver_data;
> > > > > + if (!hp) {
> > > > > + rc = -EIO;
> > > > > + goto out;
> > > > > + }
> > > > > +
> > > > > spin_lock_irqsave(&hp->port.lock, flags);
> > > > > /* Check and then increment for fast path open. */
> > > > > if (hp->port.count++ > 0) {
> > > > > spin_unlock_irqrestore(&hp->port.lock, flags);
> > > > > hvc_kick();
> > > > > - return 0;
> > > > > + goto out;
> > > > > } /* else count == 0 */
> > > > > spin_unlock_irqrestore(&hp->port.lock, flags);
> > > >
> > > > Wait, why isn't this driver just calling tty_port_open() instead of
> > > > trying to open-code all of this?
> > > >
> > > > Keeping a single mutext for open will not protect it from close, it will
> > > > just slow things down a bit. There should already be a tty lock held by
> > > > the tty core for open() to keep it from racing things, right?
> > > The tty lock should have been held, but not likely across
> > > ->install() and
> > > ->open() callbacks, thus resulting in a race between hvc_install() and
> > > hvc_open(),
> >
> > How? The tty lock is held in install, and should not conflict with
> > open(), otherwise, we would be seeing this happen in all tty drivers,
> > right?
> >
> Well, I was expecting the same, but IIRC, I see that the open() was being
> called in parallel for the same device node.
So open and install are happening at the same time? And the tty_lock()
does not protect the needed fields from being protected properly? If
not, what fields are being touched without the lock?
> Is it expected that the tty core would allow only one thread to
> access the dev-node, while blocking the other, or is it the client
> driver's responsibility to handle the exclusiveness?
The tty core should handle this correctly, for things that can mess
stuff up (like install and open at the same time). A driver should not
have to worry about that.
> > > where hvc_install() sets a data and the hvc_open() clears it.
> > > hvc_open()
> > > doesn't
> > > check if the data was set to NULL and proceeds.
> >
> > What data is being set that hvc_open is checking?
> hvc_install sets tty->private_data to hp, while hvc_open sets it to NULL (in
> one of the paths).
I see no use of private_data in drivers/tty/hvc/ so what exactly are you
referring to? The file private_data or the port private_data or
something else?
> > And you are not grabbing a lock in your install callback, you are only
> > serializing your open call here, I don't see how this is fixing anything
> > other than perhaps slowing down your codepaths.
> Basically, my intention was to add a NULL check before accessing *hp in
> open().
> The intention of the lock was to protect against this check.
> If the tty layer would have taken care of this, then perhaps there won't be
> a
> need to check for NULL.
Ah, driver_data is what you are referring to, not private_data.
Look at hvc_close(), no locking is done there to test for private_data,
right? Why not? The only thing setting driver_data is in install, and
your lock is not touching that.
And again, install and open should not race, if so, the tty core needs
to be fixed.
> > As an arument why this isn't correct, can you answer why this same type
> > of change wouldn't be required for all tty drivers in the tree?
> >
> I agree, that if it's already taken care by the tty-core, we don't need it
> here.
> Correct me if I'm wrong, but looks like the tty layer is allowing parallel
> accesses
> to open(),
I do not think that happens, try counting the calls to open(), there
should only be one. If not, that's a bug somewhere else.
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH 26/31] m68k: implement flush_icache_user_range
From: Geert Uytterhoeven @ 2020-05-11 7:38 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-ia64@vger.kernel.org, Linux-sh list, Roman Zippel,
open list:BROADCOM NVRAM DRIVER, Linux MM, sparclinux,
linux-riscv, Linux-Arch, linux-c6x-dev,
open list:QUALCOMM HEXAGON..., the arch/x86 maintainers,
open list:TENSILICA XTENSA PORT (xtensa), Arnd Bergmann,
Jessica Yu, linux-um, linux-m68k, Openrisc, Linux ARM,
Michal Simek, Linux Kernel Mailing List, alpha, Linux FS Devel,
Andrew Morton, linuxppc-dev
In-Reply-To: <20200510075510.987823-27-hch@lst.de>
On Sun, May 10, 2020 at 9:57 AM Christoph Hellwig <hch@lst.de> wrote:
> Rename the current flush_icache_range to flush_icache_user_range as
> per commit ae92ef8a4424 ("PATCH] flush icache in correct context") there
> seems to be an assumption that it operates on user addresses. Add a
> flush_icache_range around it that for now is a no-op.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH 21/31] mm: rename flush_icache_user_range to flush_icache_user_page
From: Geert Uytterhoeven @ 2020-05-11 7:36 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-ia64@vger.kernel.org, Linux-sh list, Roman Zippel,
open list:BROADCOM NVRAM DRIVER, Linux MM, sparclinux,
linux-riscv, Linux-Arch, linux-c6x-dev,
open list:QUALCOMM HEXAGON..., the arch/x86 maintainers,
open list:TENSILICA XTENSA PORT (xtensa), Arnd Bergmann,
Jessica Yu, linux-um, linux-m68k, Openrisc, Linux ARM,
Michal Simek, Linux Kernel Mailing List, alpha, Linux FS Devel,
Andrew Morton, linuxppc-dev
In-Reply-To: <20200510075510.987823-22-hch@lst.de>
On Sun, May 10, 2020 at 9:57 AM Christoph Hellwig <hch@lst.de> wrote:
> The function currently known as flush_icache_user_range only operates
> on a single page. Rename it to flush_icache_user_page as we'll need
> the name flush_icache_user_range for something else soon.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> arch/m68k/include/asm/cacheflush_mm.h | 4 ++--
> arch/m68k/mm/cache.c | 2 +-
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: rananta @ 2020-05-11 7:34 UTC (permalink / raw)
To: Greg KH; +Cc: andrew, linuxppc-dev, linux-kernel, jslaby
In-Reply-To: <77d889be4e0cb0e6e30f96199e2d843d@codeaurora.org>
On 2020-05-11 00:23, rananta@codeaurora.org wrote:
> On 2020-05-09 23:48, Greg KH wrote:
>> On Sat, May 09, 2020 at 06:30:56PM -0700, rananta@codeaurora.org
>> wrote:
>>> On 2020-05-06 02:48, Greg KH wrote:
>>> > On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
>>> > > Potentially, hvc_open() can be called in parallel when two tasks calls
>>> > > open() on /dev/hvcX. In such a scenario, if the
>>> > > hp->ops->notifier_add()
>>> > > callback in the function fails, where it sets the tty->driver_data to
>>> > > NULL, the parallel hvc_open() can see this NULL and cause a memory
>>> > > abort.
>>> > > Hence, serialize hvc_open and check if tty->private_data is NULL
>>> > > before
>>> > > proceeding ahead.
>>> > >
>>> > > The issue can be easily reproduced by launching two tasks
>>> > > simultaneously
>>> > > that does nothing but open() and close() on /dev/hvcX.
>>> > > For example:
>>> > > $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
>>> > >
>>> > > Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
>>> > > ---
>>> > > drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
>>> > > 1 file changed, 14 insertions(+), 2 deletions(-)
>>> > >
>>> > > diff --git a/drivers/tty/hvc/hvc_console.c
>>> > > b/drivers/tty/hvc/hvc_console.c
>>> > > index 436cc51c92c3..ebe26fe5ac09 100644
>>> > > --- a/drivers/tty/hvc/hvc_console.c
>>> > > +++ b/drivers/tty/hvc/hvc_console.c
>>> > > @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
>>> > > */
>>> > > static DEFINE_MUTEX(hvc_structs_mutex);
>>> > >
>>> > > +/* Mutex to serialize hvc_open */
>>> > > +static DEFINE_MUTEX(hvc_open_mutex);
>>> > > /*
>>> > > * This value is used to assign a tty->index value to a hvc_struct
>>> > > based
>>> > > * upon order of exposure via hvc_probe(), when we can not match it
>>> > > to
>>> > > @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver
>>> > > *driver, struct tty_struct *tty)
>>> > > */
>>> > > static int hvc_open(struct tty_struct *tty, struct file * filp)
>>> > > {
>>> > > - struct hvc_struct *hp = tty->driver_data;
>>> > > + struct hvc_struct *hp;
>>> > > unsigned long flags;
>>> > > int rc = 0;
>>> > >
>>> > > + mutex_lock(&hvc_open_mutex);
>>> > > +
>>> > > + hp = tty->driver_data;
>>> > > + if (!hp) {
>>> > > + rc = -EIO;
>>> > > + goto out;
>>> > > + }
>>> > > +
>>> > > spin_lock_irqsave(&hp->port.lock, flags);
>>> > > /* Check and then increment for fast path open. */
>>> > > if (hp->port.count++ > 0) {
>>> > > spin_unlock_irqrestore(&hp->port.lock, flags);
>>> > > hvc_kick();
>>> > > - return 0;
>>> > > + goto out;
>>> > > } /* else count == 0 */
>>> > > spin_unlock_irqrestore(&hp->port.lock, flags);
>>> >
>>> > Wait, why isn't this driver just calling tty_port_open() instead of
>>> > trying to open-code all of this?
>>> >
>>> > Keeping a single mutext for open will not protect it from close, it will
>>> > just slow things down a bit. There should already be a tty lock held by
>>> > the tty core for open() to keep it from racing things, right?
>>> The tty lock should have been held, but not likely across ->install()
>>> and
>>> ->open() callbacks, thus resulting in a race between hvc_install()
>>> and
>>> hvc_open(),
>>
>> How? The tty lock is held in install, and should not conflict with
>> open(), otherwise, we would be seeing this happen in all tty drivers,
>> right?
>>
> Well, I was expecting the same, but IIRC, I see that the open() was
> being
> called in parallel for the same device node.
>
> Is it expected that the tty core would allow only one thread to
> access the dev-node, while blocking the other, or is it the client
> driver's responsibility to handle the exclusiveness?
Or is there any optimization going on where the second call doesn't go
through
install(), but calls open() directly as the file was already opened by
the first
thread?
>>> where hvc_install() sets a data and the hvc_open() clears it.
>>> hvc_open()
>>> doesn't
>>> check if the data was set to NULL and proceeds.
>>
>> What data is being set that hvc_open is checking?
> hvc_install sets tty->private_data to hp, while hvc_open sets it to
> NULL (in one of the paths).
>>
>> And you are not grabbing a lock in your install callback, you are only
>> serializing your open call here, I don't see how this is fixing
>> anything
>> other than perhaps slowing down your codepaths.
> Basically, my intention was to add a NULL check before accessing *hp in
> open().
> The intention of the lock was to protect against this check.
> If the tty layer would have taken care of this, then perhaps there
> won't be a
> need to check for NULL.
>>
>> As an arument why this isn't correct, can you answer why this same
>> type
>> of change wouldn't be required for all tty drivers in the tree?
>>
> I agree, that if it's already taken care by the tty-core, we don't need
> it here.
> Correct me if I'm wrong, but looks like the tty layer is allowing
> parallel accesses
> to open(),
>> thanks,
>>
>> greg k-h
^ permalink raw reply
* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: rananta @ 2020-05-11 7:23 UTC (permalink / raw)
To: Greg KH; +Cc: andrew, linuxppc-dev, linux-kernel, jslaby
In-Reply-To: <20200510064819.GB3400311@kroah.com>
On 2020-05-09 23:48, Greg KH wrote:
> On Sat, May 09, 2020 at 06:30:56PM -0700, rananta@codeaurora.org wrote:
>> On 2020-05-06 02:48, Greg KH wrote:
>> > On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
>> > > Potentially, hvc_open() can be called in parallel when two tasks calls
>> > > open() on /dev/hvcX. In such a scenario, if the
>> > > hp->ops->notifier_add()
>> > > callback in the function fails, where it sets the tty->driver_data to
>> > > NULL, the parallel hvc_open() can see this NULL and cause a memory
>> > > abort.
>> > > Hence, serialize hvc_open and check if tty->private_data is NULL
>> > > before
>> > > proceeding ahead.
>> > >
>> > > The issue can be easily reproduced by launching two tasks
>> > > simultaneously
>> > > that does nothing but open() and close() on /dev/hvcX.
>> > > For example:
>> > > $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
>> > >
>> > > Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
>> > > ---
>> > > drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
>> > > 1 file changed, 14 insertions(+), 2 deletions(-)
>> > >
>> > > diff --git a/drivers/tty/hvc/hvc_console.c
>> > > b/drivers/tty/hvc/hvc_console.c
>> > > index 436cc51c92c3..ebe26fe5ac09 100644
>> > > --- a/drivers/tty/hvc/hvc_console.c
>> > > +++ b/drivers/tty/hvc/hvc_console.c
>> > > @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
>> > > */
>> > > static DEFINE_MUTEX(hvc_structs_mutex);
>> > >
>> > > +/* Mutex to serialize hvc_open */
>> > > +static DEFINE_MUTEX(hvc_open_mutex);
>> > > /*
>> > > * This value is used to assign a tty->index value to a hvc_struct
>> > > based
>> > > * upon order of exposure via hvc_probe(), when we can not match it
>> > > to
>> > > @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver
>> > > *driver, struct tty_struct *tty)
>> > > */
>> > > static int hvc_open(struct tty_struct *tty, struct file * filp)
>> > > {
>> > > - struct hvc_struct *hp = tty->driver_data;
>> > > + struct hvc_struct *hp;
>> > > unsigned long flags;
>> > > int rc = 0;
>> > >
>> > > + mutex_lock(&hvc_open_mutex);
>> > > +
>> > > + hp = tty->driver_data;
>> > > + if (!hp) {
>> > > + rc = -EIO;
>> > > + goto out;
>> > > + }
>> > > +
>> > > spin_lock_irqsave(&hp->port.lock, flags);
>> > > /* Check and then increment for fast path open. */
>> > > if (hp->port.count++ > 0) {
>> > > spin_unlock_irqrestore(&hp->port.lock, flags);
>> > > hvc_kick();
>> > > - return 0;
>> > > + goto out;
>> > > } /* else count == 0 */
>> > > spin_unlock_irqrestore(&hp->port.lock, flags);
>> >
>> > Wait, why isn't this driver just calling tty_port_open() instead of
>> > trying to open-code all of this?
>> >
>> > Keeping a single mutext for open will not protect it from close, it will
>> > just slow things down a bit. There should already be a tty lock held by
>> > the tty core for open() to keep it from racing things, right?
>> The tty lock should have been held, but not likely across ->install()
>> and
>> ->open() callbacks, thus resulting in a race between hvc_install() and
>> hvc_open(),
>
> How? The tty lock is held in install, and should not conflict with
> open(), otherwise, we would be seeing this happen in all tty drivers,
> right?
>
Well, I was expecting the same, but IIRC, I see that the open() was
being
called in parallel for the same device node.
Is it expected that the tty core would allow only one thread to
access the dev-node, while blocking the other, or is it the client
driver's responsibility to handle the exclusiveness?
>> where hvc_install() sets a data and the hvc_open() clears it.
>> hvc_open()
>> doesn't
>> check if the data was set to NULL and proceeds.
>
> What data is being set that hvc_open is checking?
hvc_install sets tty->private_data to hp, while hvc_open sets it to NULL
(in one of the paths).
>
> And you are not grabbing a lock in your install callback, you are only
> serializing your open call here, I don't see how this is fixing
> anything
> other than perhaps slowing down your codepaths.
Basically, my intention was to add a NULL check before accessing *hp in
open().
The intention of the lock was to protect against this check.
If the tty layer would have taken care of this, then perhaps there won't
be a
need to check for NULL.
>
> As an arument why this isn't correct, can you answer why this same type
> of change wouldn't be required for all tty drivers in the tree?
>
I agree, that if it's already taken care by the tty-core, we don't need
it here.
Correct me if I'm wrong, but looks like the tty layer is allowing
parallel accesses
to open(),
> thanks,
>
> greg k-h
^ permalink raw reply
* Re: [PATCH RFC 3/4] powerpc/microwatt: Add early debug UART support for Microwatt
From: Segher Boessenkool @ 2020-05-11 7:07 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev, Michael Neuling, Benjamin Herrenschmidt
In-Reply-To: <20200509050340.GD1464954@thinks.paulus.ozlabs.org>
Hi!
On Sat, May 09, 2020 at 03:03:40PM +1000, Paul Mackerras wrote:
> + __asm__ volatile("mtmsrd %3,0; ldcix %0,%1,%2; mtmsrd %4,0"
> + : "=r" (val) : "b" (potato_uart_base), "r" (offset),
> + "r" (msr & ~MSR_DR), "r" (msr));
That should be "=&r"(val) (an earlyclobber), because when %0 is
written, %4 will still be used later.
Looks fine otherwise.
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Segher
^ permalink raw reply
* Re: [PATCH v4 02/14] arm: add support for folded p4d page tables
From: Marek Szyprowski @ 2020-05-11 6:36 UTC (permalink / raw)
To: Mike Rapoport
Cc: Rich Felker, linux-ia64, Geert Uytterhoeven, Fenghua Yu, linux-mm,
Paul Mackerras, Will Deacon, kvmarm, Jonas Bonn, Brian Cain,
linux-hexagon, linux-sh, Russell King, Ley Foon Tan,
Catalin Marinas, uclinux-h8-devel, linux-arch, Arnd Bergmann,
Bartlomiej Zolnierkiewicz, Łukasz Stelmach, kvm-ppc,
Stefan Kristiansson, openrisc, Stafford Horne, Guan Xuetao,
linux-arm-kernel, Tony Luck, Yoshinori Sato, linux-kernel,
Marc Zyngier, nios2-dev, Andrew Morton, linuxppc-dev,
Mike Rapoport
In-Reply-To: <20200508174232.GA759899@linux.ibm.com>
Hi Mike,
On 08.05.2020 19:42, Mike Rapoport wrote:
> On Fri, May 08, 2020 at 08:53:27AM +0200, Marek Szyprowski wrote:
>> On 07.05.2020 18:11, Mike Rapoport wrote:
>>> On Thu, May 07, 2020 at 02:16:56PM +0200, Marek Szyprowski wrote:
>>>> On 14.04.2020 17:34, Mike Rapoport wrote:
>>>>> From: Mike Rapoport <rppt@linux.ibm.com>
>>>>>
>>>>> Implement primitives necessary for the 4th level folding, add walks of p4d
>>>>> level where appropriate, and remove __ARCH_USE_5LEVEL_HACK.
>>>>>
>>>>> Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
>>>> Today I've noticed that kexec is broken on ARM 32bit. Bisecting between
>>>> current linux-next and v5.7-rc1 pointed to this commit. I've tested this
>>>> on Odroid XU4 and Raspberry Pi4 boards. Here is the relevant log:
>>>>
>>>> # kexec --kexec-syscall -l zImage --append "$(cat /proc/cmdline)"
>>>> memory_range[0]:0x40000000..0xbe9fffff
>>>> memory_range[0]:0x40000000..0xbe9fffff
>>>> # kexec -e
>>>> kexec_core: Starting new kernel
>>>> 8<--- cut here ---
>>>> Unable to handle kernel paging request at virtual address c010f1f4
>>>> pgd = c6817793
>>>> [c010f1f4] *pgd=4000041e(bad)
>>>> Internal error: Oops: 80d [#1] PREEMPT ARM
>>>> Modules linked in:
>>>> CPU: 0 PID: 1329 Comm: kexec Tainted: G W
>>>> 5.7.0-rc3-00127-g6cba81ed0f62 #611
>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>> PC is at machine_kexec+0x40/0xfc
>>> Any chance you have the debug info in this kernel?
>>> scripts/faddr2line would come handy here.
>> # ./scripts/faddr2line --list vmlinux machine_kexec+0x40
>> machine_kexec+0x40/0xf8:
>>
>> machine_kexec at arch/arm/kernel/machine_kexec.c:182
>> 177 reboot_code_buffer =
>> page_address(image->control_code_page);
>> 178
>> 179 /* Prepare parameters for reboot_code_buffer*/
>> 180 set_kernel_text_rw();
>> 181 kexec_start_address = image->start;
>> >182< kexec_indirection_page = page_list;
>> 183 kexec_mach_type = machine_arch_type;
>> 184 kexec_boot_atags = image->arch.kernel_r2;
>> 185
>> 186 /* copy our kernel relocation code to the control code
>> page */
>> 187 reboot_entry = fncpy(reboot_code_buffer,
> Can you please try the patch below:
>
> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> index 963b5284d284..f86b3d17928e 100644
> --- a/arch/arm/mm/init.c
> +++ b/arch/arm/mm/init.c
> @@ -571,7 +571,7 @@ static inline void section_update(unsigned long addr, pmdval_t mask,
> {
> pmd_t *pmd;
>
> - pmd = pmd_off_k(addr);
> + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr);
>
> #ifdef CONFIG_ARM_LPAE
> pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
This fixes kexec issue! Thanks!
Feel free to add:
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Fixes: 218f1c390557 ("arm: add support for folded p4d page tables")
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
^ permalink raw reply
* Re: [PATCH V3 2/3] mm/hugetlb: Define a generic fallback for is_hugepage_only_range()
From: Anshuman Khandual @ 2020-05-11 3:14 UTC (permalink / raw)
To: Mike Kravetz, linux-mm, akpm
Cc: Rich Felker, linux-ia64, linux-sh, Catalin Marinas,
Heiko Carstens, linux-kernel, James E.J. Bottomley,
Paul Mackerras, H. Peter Anvin, sparclinux, linux-riscv,
Will Deacon, linux-arch, linux-s390, Yoshinori Sato, Helge Deller,
x86, Russell King, Christian Borntraeger, Ingo Molnar, Fenghua Yu,
Vasily Gorbik, Thomas Bogendoerfer, Borislav Petkov,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, Tony Luck,
linux-parisc, linux-mips, Palmer Dabbelt, linuxppc-dev,
David S. Miller
In-Reply-To: <9fc622e1-45ff-b79f-ebe0-35614837456c@oracle.com>
On 05/09/2020 03:52 AM, Mike Kravetz wrote:
> On 5/7/20 8:07 PM, Anshuman Khandual wrote:
>> There are multiple similar definitions for is_hugepage_only_range() on
>> various platforms. Lets just add it's generic fallback definition for
>> platforms that do not override. This help reduce code duplication.
>>
>> Cc: Russell King <linux@armlinux.org.uk>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Tony Luck <tony.luck@intel.com>
>> Cc: Fenghua Yu <fenghua.yu@intel.com>
>> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
>> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
>> Cc: Helge Deller <deller@gmx.de>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Paul Walmsley <paul.walmsley@sifive.com>
>> Cc: Palmer Dabbelt <palmer@dabbelt.com>
>> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
>> Cc: Vasily Gorbik <gor@linux.ibm.com>
>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>> Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
>> Cc: Rich Felker <dalias@libc.org>
>> Cc: "David S. Miller" <davem@davemloft.net>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Borislav Petkov <bp@alien8.de>
>> Cc: "H. Peter Anvin" <hpa@zytor.com>
>> Cc: Mike Kravetz <mike.kravetz@oracle.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: x86@kernel.org
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-ia64@vger.kernel.org
>> Cc: linux-mips@vger.kernel.org
>> Cc: linux-parisc@vger.kernel.org
>> Cc: linuxppc-dev@lists.ozlabs.org
>> Cc: linux-riscv@lists.infradead.org
>> Cc: linux-s390@vger.kernel.org
>> Cc: linux-sh@vger.kernel.org
>> Cc: sparclinux@vger.kernel.org
>> Cc: linux-mm@kvack.org
>> Cc: linux-arch@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>> arch/arm/include/asm/hugetlb.h | 6 ------
>> arch/arm64/include/asm/hugetlb.h | 6 ------
>> arch/ia64/include/asm/hugetlb.h | 1 +
>> arch/mips/include/asm/hugetlb.h | 7 -------
>> arch/parisc/include/asm/hugetlb.h | 6 ------
>> arch/powerpc/include/asm/hugetlb.h | 1 +
>> arch/riscv/include/asm/hugetlb.h | 6 ------
>> arch/s390/include/asm/hugetlb.h | 7 -------
>> arch/sh/include/asm/hugetlb.h | 6 ------
>> arch/sparc/include/asm/hugetlb.h | 6 ------
>> arch/x86/include/asm/hugetlb.h | 6 ------
>> include/linux/hugetlb.h | 9 +++++++++
>> 12 files changed, 11 insertions(+), 56 deletions(-)
>>
> <snip>
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 43a1cef8f0f1..c01c0c6f7fd4 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -591,6 +591,15 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
>>
>> #include <asm/hugetlb.h>
>>
>> +#ifndef is_hugepage_only_range
>> +static inline int is_hugepage_only_range(struct mm_struct *mm,
>> + unsigned long addr, unsigned long len)
>> +{
>> + return 0;
>> +}
>> +#define is_hugepage_only_range is_hugepage_only_range
>> +#endif
>> +
>> #ifndef arch_make_huge_pte
>> static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
>> struct page *page, int writable)
>>
>
> Did you try building without CONFIG_HUGETLB_PAGE defined? I'm guessing
Yes I did for multiple platforms (s390, arm64, ia64, x86, powerpc etc).
> that you need a stub for is_hugepage_only_range(). Or, perhaps add this
> to asm-generic/hugetlb.h?
>
There is already a stub (include/linux/hugetlb.h) when !CONFIG_HUGETLB_PAGE.
^ permalink raw reply
* [PATCH v5 16/16] powerpc/watchpoint/xmon: Support 2nd DAWR
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
Add support for 2nd DAWR in xmon. With this, we can have two
simultaneous breakpoints from xmon.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/xmon/xmon.c | 101 ++++++++++++++++++++++++++-------------
1 file changed, 69 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 99e9138661e4..01da49b666db 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -111,7 +111,7 @@ struct bpt {
#define NBPTS 256
static struct bpt bpts[NBPTS];
-static struct bpt dabr;
+static struct bpt dabr[HBP_NUM_MAX];
static struct bpt *iabr;
static unsigned bpinstr = 0x7fe00008; /* trap */
@@ -787,10 +787,17 @@ static int xmon_sstep(struct pt_regs *regs)
static int xmon_break_match(struct pt_regs *regs)
{
+ int i;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
- if (dabr.enabled == 0)
- return 0;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (dabr[i].enabled)
+ goto found;
+ }
+ return 0;
+
+found:
xmon_core(regs, 0);
return 1;
}
@@ -929,13 +936,16 @@ static void insert_bpts(void)
static void insert_cpu_bpts(void)
{
+ int i;
struct arch_hw_breakpoint brk;
- if (dabr.enabled) {
- brk.address = dabr.address;
- brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- brk.len = DABR_MAX_LEN;
- __set_breakpoint(0, &brk);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (dabr[i].enabled) {
+ brk.address = dabr[i].address;
+ brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+ brk.len = 8;
+ __set_breakpoint(i, &brk);
+ }
}
if (iabr)
@@ -1349,6 +1359,35 @@ static long check_bp_loc(unsigned long addr)
return 1;
}
+static int find_free_data_bpt(void)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!dabr[i].enabled)
+ return i;
+ }
+ printf("Couldn't find free breakpoint register\n");
+ return -1;
+}
+
+static void print_data_bpts(void)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!dabr[i].enabled)
+ continue;
+
+ printf(" data "REG" [", dabr[i].address);
+ if (dabr[i].enabled & 1)
+ printf("r");
+ if (dabr[i].enabled & 2)
+ printf("w");
+ printf("]\n");
+ }
+}
+
static char *breakpoint_help_string =
"Breakpoint command usage:\n"
"b show breakpoints\n"
@@ -1382,10 +1421,9 @@ bpt_cmds(void)
printf("Hardware data breakpoint not supported on this cpu\n");
break;
}
- if (dabr.enabled) {
- printf("Couldn't find free breakpoint register\n");
+ i = find_free_data_bpt();
+ if (i < 0)
break;
- }
mode = 7;
cmd = inchar();
if (cmd == 'r')
@@ -1394,15 +1432,15 @@ bpt_cmds(void)
mode = 6;
else
termch = cmd;
- dabr.address = 0;
- dabr.enabled = 0;
- if (scanhex(&dabr.address)) {
- if (!is_kernel_addr(dabr.address)) {
+ dabr[i].address = 0;
+ dabr[i].enabled = 0;
+ if (scanhex(&dabr[i].address)) {
+ if (!is_kernel_addr(dabr[i].address)) {
printf(badaddr);
break;
}
- dabr.address &= ~HW_BRK_TYPE_DABR;
- dabr.enabled = mode | BP_DABR;
+ dabr[i].address &= ~HW_BRK_TYPE_DABR;
+ dabr[i].enabled = mode | BP_DABR;
}
force_enable_xmon();
@@ -1441,7 +1479,9 @@ bpt_cmds(void)
for (i = 0; i < NBPTS; ++i)
bpts[i].enabled = 0;
iabr = NULL;
- dabr.enabled = 0;
+ for (i = 0; i < nr_wp_slots(); i++)
+ dabr[i].enabled = 0;
+
printf("All breakpoints cleared\n");
break;
}
@@ -1475,14 +1515,7 @@ bpt_cmds(void)
if (xmon_is_ro || !scanhex(&a)) {
/* print all breakpoints */
printf(" type address\n");
- if (dabr.enabled) {
- printf(" data "REG" [", dabr.address);
- if (dabr.enabled & 1)
- printf("r");
- if (dabr.enabled & 2)
- printf("w");
- printf("]\n");
- }
+ print_data_bpts();
for (bp = bpts; bp < &bpts[NBPTS]; ++bp) {
if (!bp->enabled)
continue;
@@ -1942,8 +1975,13 @@ static void dump_207_sprs(void)
printf("hfscr = %.16lx dhdes = %.16lx rpr = %.16lx\n",
mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR));
- printf("dawr = %.16lx dawrx = %.16lx ciabr = %.16lx\n",
- mfspr(SPRN_DAWR0), mfspr(SPRN_DAWRX0), mfspr(SPRN_CIABR));
+ printf("dawr0 = %.16lx dawrx0 = %.16lx\n",
+ mfspr(SPRN_DAWR0), mfspr(SPRN_DAWRX0));
+ if (nr_wp_slots() > 1) {
+ printf("dawr1 = %.16lx dawrx1 = %.16lx\n",
+ mfspr(SPRN_DAWR1), mfspr(SPRN_DAWRX1));
+ }
+ printf("ciabr = %.16lx\n", mfspr(SPRN_CIABR));
#endif
}
@@ -3873,10 +3911,9 @@ static void clear_all_bpt(void)
bpts[i].enabled = 0;
/* Clear any data or iabr breakpoints */
- if (iabr || dabr.enabled) {
- iabr = NULL;
- dabr.enabled = 0;
- }
+ iabr = NULL;
+ for (i = 0; i < nr_wp_slots(); i++)
+ dabr[i].enabled = 0;
}
#ifdef CONFIG_DEBUG_FS
--
2.21.1
^ permalink raw reply related
* [PATCH v5 15/16] powerpc/watchpoint/xmon: Don't allow breakpoint overwriting
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
Xmon allows overwriting breakpoints because it's supported by only
one DAWR. But with multiple DAWRs, overwriting becomes ambiguous
or unnecessary complicated. So let's not allow it.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/xmon/xmon.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d8c0f01e4b24..99e9138661e4 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1382,6 +1382,10 @@ bpt_cmds(void)
printf("Hardware data breakpoint not supported on this cpu\n");
break;
}
+ if (dabr.enabled) {
+ printf("Couldn't find free breakpoint register\n");
+ break;
+ }
mode = 7;
cmd = inchar();
if (cmd == 'r')
--
2.21.1
^ permalink raw reply related
* [PATCH v5 14/16] powerpc/watchpoint: Don't allow concurrent perf and ptrace events
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
With Book3s DAWR, ptrace and perf watchpoints on powerpc behaves
differently. Ptrace watchpoint works in one-shot mode and generates
signal before executing instruction. It's ptrace user's job to
single-step the instruction and re-enable the watchpoint. OTOH, in
case of perf watchpoint, kernel emulates/single-steps the instruction
and then generates event. If perf and ptrace creates two events with
same or overlapping address ranges, it's ambiguous to decide who
should single-step the instruction. Because of this issue, don't
allow perf and ptrace watchpoint at the same time if their address
range overlaps.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/include/asm/hw_breakpoint.h | 2 +
arch/powerpc/kernel/hw_breakpoint.c | 221 +++++++++++++++++++++++
kernel/events/hw_breakpoint.c | 16 ++
3 files changed, 239 insertions(+)
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index add5aa076919..f42a55eb77d2 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -70,6 +70,8 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
int arch_install_hw_breakpoint(struct perf_event *bp);
void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+int arch_reserve_bp_slot(struct perf_event *bp);
+void arch_release_bp_slot(struct perf_event *bp);
void arch_unregister_hw_breakpoint(struct perf_event *bp);
void hw_breakpoint_pmu_read(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 28d57d841642..c8623708c9c7 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -123,6 +123,227 @@ static bool is_ptrace_bp(struct perf_event *bp)
return bp->overflow_handler == ptrace_triggered;
}
+struct breakpoint {
+ struct list_head list;
+ struct perf_event *bp;
+ bool ptrace_bp;
+};
+
+static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
+static LIST_HEAD(task_bps);
+
+static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return ERR_PTR(-ENOMEM);
+ tmp->bp = bp;
+ tmp->ptrace_bp = is_ptrace_bp(bp);
+ return tmp;
+}
+
+static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2)
+{
+ __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
+
+ bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
+ bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE);
+ bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
+ bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE);
+
+ return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
+}
+
+static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
+{
+ return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
+}
+
+static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
+{
+ return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
+}
+
+static int task_bps_add(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ tmp = alloc_breakpoint(bp);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ list_add(&tmp->list, &task_bps);
+ return 0;
+}
+
+static void task_bps_remove(struct perf_event *bp)
+{
+ struct list_head *pos, *q;
+
+ list_for_each_safe(pos, q, &task_bps) {
+ struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
+
+ if (tmp->bp == bp) {
+ list_del(&tmp->list);
+ kfree(tmp);
+ break;
+ }
+ }
+}
+
+/*
+ * If any task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool all_task_bps_check(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ list_for_each_entry(tmp, &task_bps, list) {
+ if (!can_co_exist(tmp, bp))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * If same task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool same_task_bps_check(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ list_for_each_entry(tmp, &task_bps, list) {
+ if (tmp->bp->hw.target == bp->hw.target &&
+ !can_co_exist(tmp, bp))
+ return true;
+ }
+ return false;
+}
+
+static int cpu_bps_add(struct perf_event *bp)
+{
+ struct breakpoint **cpu_bp;
+ struct breakpoint *tmp;
+ int i = 0;
+
+ tmp = alloc_breakpoint(bp);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!cpu_bp[i]) {
+ cpu_bp[i] = tmp;
+ break;
+ }
+ }
+ return 0;
+}
+
+static void cpu_bps_remove(struct perf_event *bp)
+{
+ struct breakpoint **cpu_bp;
+ int i = 0;
+
+ cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!cpu_bp[i])
+ continue;
+
+ if (cpu_bp[i]->bp == bp) {
+ kfree(cpu_bp[i]);
+ cpu_bp[i] = NULL;
+ break;
+ }
+ }
+}
+
+static bool cpu_bps_check(int cpu, struct perf_event *bp)
+{
+ struct breakpoint **cpu_bp;
+ int i;
+
+ cpu_bp = per_cpu_ptr(cpu_bps, cpu);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
+ return true;
+ }
+ return false;
+}
+
+static bool all_cpu_bps_check(struct perf_event *bp)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu_bps_check(cpu, bp))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * We don't use any locks to serialize accesses to cpu_bps or task_bps
+ * because are already inside nr_bp_mutex.
+ */
+int arch_reserve_bp_slot(struct perf_event *bp)
+{
+ int ret;
+
+ /* ptrace breakpoint */
+ if (is_ptrace_bp(bp)) {
+ if (all_cpu_bps_check(bp))
+ return -ENOSPC;
+
+ if (same_task_bps_check(bp))
+ return -ENOSPC;
+
+ return task_bps_add(bp);
+ }
+
+ /* perf breakpoint */
+ if (is_kernel_addr(bp->attr.bp_addr))
+ return 0;
+
+ if (bp->hw.target && bp->cpu == -1) {
+ if (same_task_bps_check(bp))
+ return -ENOSPC;
+
+ return task_bps_add(bp);
+ } else if (!bp->hw.target && bp->cpu != -1) {
+ if (all_task_bps_check(bp))
+ return -ENOSPC;
+
+ return cpu_bps_add(bp);
+ }
+
+ if (same_task_bps_check(bp))
+ return -ENOSPC;
+
+ ret = cpu_bps_add(bp);
+ if (ret)
+ return ret;
+ ret = task_bps_add(bp);
+ if (ret)
+ cpu_bps_remove(bp);
+
+ return ret;
+}
+
+void arch_release_bp_slot(struct perf_event *bp)
+{
+ if (!is_kernel_addr(bp->attr.bp_addr)) {
+ if (bp->hw.target)
+ task_bps_remove(bp);
+ if (bp->cpu != -1)
+ cpu_bps_remove(bp);
+ }
+}
+
/*
* Perform cleanup of arch-specific counters during unregistration
* of the perf-event
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 3cc8416ec844..b48d7039a015 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -213,6 +213,15 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
list_del(&bp->hw.bp_list);
}
+__weak int arch_reserve_bp_slot(struct perf_event *bp)
+{
+ return 0;
+}
+
+__weak void arch_release_bp_slot(struct perf_event *bp)
+{
+}
+
/*
* Function to perform processor-specific cleanup during unregistration
*/
@@ -270,6 +279,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
struct bp_busy_slots slots = {0};
enum bp_type_idx type;
int weight;
+ int ret;
/* We couldn't initialize breakpoint constraints on boot */
if (!constraints_initialized)
@@ -294,6 +304,10 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
if (slots.pinned + (!!slots.flexible) > nr_slots[type])
return -ENOSPC;
+ ret = arch_reserve_bp_slot(bp);
+ if (ret)
+ return ret;
+
toggle_bp_slot(bp, true, type, weight);
return 0;
@@ -317,6 +331,8 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
enum bp_type_idx type;
int weight;
+ arch_release_bp_slot(bp);
+
type = find_slot_idx(bp_type);
weight = hw_breakpoint_weight(bp);
toggle_bp_slot(bp, false, type, weight);
--
2.21.1
^ permalink raw reply related
* [PATCH v5 13/16] powerpc/watchpoint: Prepare handler to handle more than one watcnhpoint
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
Currently we assume that we have only one watchpoint supported by hw.
Get rid of that assumption and use dynamic loop instead. This should
make supporting more watchpoints very easy.
With more than one watchpoint, exception handler needs to know which
DAWR caused the exception, and hw currently does not provide it. So
we need sw logic for the same. To figure out which DAWR caused the
exception, check all different combinations of user specified range,
DAWR address range, actual access range and DAWRX constrains. For ex,
if user specified range and actual access range overlaps but DAWRX is
configured for readonly watchpoint and the instruction is store, this
DAWR must not have caused exception.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/include/asm/processor.h | 2 +-
arch/powerpc/include/asm/sstep.h | 2 +
arch/powerpc/kernel/hw_breakpoint.c | 400 +++++++++++++++++++++------
arch/powerpc/kernel/process.c | 3 -
4 files changed, 315 insertions(+), 92 deletions(-)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 668c02c67b61..251f50eec9fa 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -185,7 +185,7 @@ struct thread_struct {
* Helps identify source of single-step exception and subsequent
* hw-breakpoint enablement
*/
- struct perf_event *last_hit_ubp;
+ struct perf_event *last_hit_ubp[HBP_NUM_MAX];
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */
unsigned long trap_nr; /* last trap # on this thread */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 769f055509c9..38919b27a6fa 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -48,6 +48,8 @@ enum instruction_type {
#define INSTR_TYPE_MASK 0x1f
+#define OP_IS_LOAD(type) ((LOAD <= (type) && (type) <= LOAD_VSX) || (type) == LARX)
+#define OP_IS_STORE(type) ((STORE <= (type) && (type) <= STORE_VSX) || (type) == STCX)
#define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX)
/* Compute flags, ORed in with type */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index ab0dd22fed5f..28d57d841642 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -30,7 +30,7 @@
* Stores the breakpoints currently in use on each breakpoint address
* register for every cpu
*/
-static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
/*
* Returns total number of data or instruction breakpoints available.
@@ -42,6 +42,17 @@ int hw_breakpoint_slots(int type)
return 0; /* no instruction breakpoints available */
}
+static bool single_step_pending(void)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (current->thread.last_hit_ubp[i])
+ return true;
+ }
+ return false;
+}
+
/*
* Install a perf counter breakpoint.
*
@@ -54,16 +65,26 @@ int hw_breakpoint_slots(int type)
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
- struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
- *slot = bp;
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+ return -EBUSY;
/*
* Do not install DABR values if the instruction must be single-stepped.
* If so, DABR will be populated in single_step_dabr_instruction().
*/
- if (current->thread.last_hit_ubp != bp)
- __set_breakpoint(0, info);
+ if (!single_step_pending())
+ __set_breakpoint(i, info);
return 0;
}
@@ -79,15 +100,22 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
- struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+ struct arch_hw_breakpoint null_brk = {0};
+ struct perf_event **slot;
+ int i;
- if (*slot != bp) {
- WARN_ONCE(1, "Can't find the breakpoint");
- return;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
}
- *slot = NULL;
- hw_breakpoint_disable();
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+ return;
+
+ __set_breakpoint(i, &null_brk);
}
static bool is_ptrace_bp(struct perf_event *bp)
@@ -107,8 +135,14 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* restoration variables to prevent dangling pointers.
* FIXME, this should not be using bp->ctx at all! Sayeth peterz.
*/
- if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
- bp->ctx->task->thread.last_hit_ubp = NULL;
+ if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) {
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (bp->ctx->task->thread.last_hit_ubp[i] == bp)
+ bp->ctx->task->thread.last_hit_ubp[i] = NULL;
+ }
+ }
}
/*
@@ -220,90 +254,215 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
{
struct arch_hw_breakpoint *info;
+ int i;
- if (likely(!tsk->thread.last_hit_ubp))
- return;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (unlikely(tsk->thread.last_hit_ubp[i]))
+ goto reset;
+ }
+ return;
- info = counter_arch_bp(tsk->thread.last_hit_ubp);
+reset:
regs->msr &= ~MSR_SE;
- __set_breakpoint(0, info);
- tsk->thread.last_hit_ubp = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
+ __set_breakpoint(i, info);
+ tsk->thread.last_hit_ubp[i] = NULL;
+ }
}
-static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
{
return ((info->address <= dar) && (dar - info->address < info->len));
}
-static bool
-dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+static bool dar_user_range_overlaps(unsigned long dar, int size,
+ struct arch_hw_breakpoint *info)
{
- return ((dar <= info->address + info->len - 1) &&
- (dar + size - 1 >= info->address));
+ return ((dar < info->address + info->len) &&
+ (dar + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+
+ hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool dar_hw_range_overlaps(unsigned long dar, int size,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+
+ hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ return ((dar < hw_end_addr) && (dar + size > hw_start_addr));
}
/*
- * Handle debug exception notifications.
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
*/
-static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
- struct arch_hw_breakpoint *info)
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+ struct arch_hw_breakpoint *info)
{
- unsigned int instr = 0;
- int ret, type, size;
- struct instruction_op op;
- unsigned long addr = info->address;
+ if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+ return false;
- if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
- goto fail;
+ if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE))
+ return false;
- ret = analyse_instr(&op, regs, instr);
- type = GETTYPE(op.type);
- size = GETSIZE(op.type);
+ if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+ return false;
- if (!ret && (type == LARX || type == STCX)) {
- printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
- " Breakpoint at 0x%lx will be disabled.\n", addr);
- goto disable;
- }
+ if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+ return false;
+
+ return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+static bool check_constraints(struct pt_regs *regs, unsigned int instr,
+ int type, int size,
+ struct arch_hw_breakpoint *info)
+{
+ bool in_user_range = dar_in_user_range(regs->dar, info);
+ bool dawrx_constraints;
/*
- * If it's extraneous event, we still need to emulate/single-
- * step the instruction, but we don't generate an event.
+ * 8xx supports only one breakpoint and thus we can
+ * unconditionally return true.
*/
- if (size && !dar_range_overlaps(regs->dar, size, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (!in_user_range)
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ if (unlikely(instr == -1)) {
+ if (in_user_range)
+ return true;
+
+ if (dar_in_hw_range(regs->dar, info)) {
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
return false;
}
- if (!emulate_step(regs, instr))
- goto fail;
+ dawrx_constraints = check_dawrx_constraints(regs, type, info);
- return true;
+ if (dar_user_range_overlaps(regs->dar, size, info))
+ return dawrx_constraints;
+
+ if (dar_hw_range_overlaps(regs->dar, size, info)) {
+ if (dawrx_constraints) {
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int get_instr_detail(struct pt_regs *regs, int *type, int *size,
+ bool *larx_stcx)
+{
+ unsigned int instr = 0;
+ struct instruction_op op;
+
+ if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+ return -1;
+
+ analyse_instr(&op, regs, instr);
-fail:
/*
- * We've failed in reliably handling the hw-breakpoint. Unregister
- * it and throw a warning message to let the user know about it.
+ * Set size = 8 if analyse_instr() fails. If it's a userspace
+ * watchpoint(valid or extraneous), we can notify user about it.
+ * If it's a kernel watchpoint, instruction emulation will fail
+ * in stepping_handler() and watchpoint will be disabled.
*/
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", addr);
+ *type = GETTYPE(op.type);
+ *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8;
+ *larx_stcx = (*type == LARX || *type == STCX);
-disable:
+ return instr;
+}
+
+/*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info)
+{
+ WARN(1, "Unable to handle hardware breakpoint."
+ "Breakpoint at 0x%lx will be disabled.",
+ info->address);
+ perf_event_disable_inatomic(bp);
+}
+
+static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info)
+{
+ printk_ratelimited("Breakpoint hit on instruction that can't "
+ "be emulated. Breakpoint at 0x%lx will be "
+ "disabled.\n", info->address);
perf_event_disable_inatomic(bp);
- return false;
+}
+
+static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
+ struct arch_hw_breakpoint **info, int *hit,
+ unsigned int instr)
+{
+ int i;
+ int stepped;
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ current->thread.last_hit_ubp[i] = bp[i];
+ info[i] = NULL;
+ }
+ regs->msr |= MSR_SE;
+ return false;
+ }
+
+ stepped = emulate_step(regs, instr);
+ if (!stepped) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ handler_error(bp[i], info[i]);
+ info[i] = NULL;
+ }
+ return false;
+ }
+ return true;
}
int hw_breakpoint_handler(struct die_args *args)
{
+ bool err = false;
int rc = NOTIFY_STOP;
- struct perf_event *bp;
+ struct perf_event *bp[HBP_NUM_MAX] = {0};
struct pt_regs *regs = args->regs;
- struct arch_hw_breakpoint *info;
+ struct arch_hw_breakpoint *info[HBP_NUM_MAX] = {0};
+ int i;
+ int hit[HBP_NUM_MAX] = {0};
+ int nr_hit = 0;
+ bool ptrace_bp = false;
+ unsigned int instr = 0;
+ int type = 0;
+ int size = 0;
+ bool larx_stcx = false;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -316,12 +475,39 @@ int hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();
- bp = __this_cpu_read(bp_per_reg);
- if (!bp) {
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
+ instr = get_instr_detail(regs, &type, &size, &larx_stcx);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ bp[i] = __this_cpu_read(bp_per_reg[i]);
+ if (!bp[i])
+ continue;
+
+ info[i] = counter_arch_bp(bp[i]);
+ info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ if (check_constraints(regs, instr, type, size, info[i])) {
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && instr == -1) {
+ handler_error(bp[i], info[i]);
+ info[i] = NULL;
+ err = 1;
+ continue;
+ }
+
+ if (is_ptrace_bp(bp[i]))
+ ptrace_bp = true;
+ hit[i] = 1;
+ nr_hit++;
+ }
+ }
+
+ if (err)
+ goto reset;
+
+ if (!nr_hit) {
rc = NOTIFY_DONE;
goto out;
}
- info = counter_arch_bp(bp);
/*
* Return early after invoking user-callback function without restoring
@@ -329,29 +515,50 @@ int hw_breakpoint_handler(struct die_args *args)
* one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
* generated in do_dabr().
*/
- if (is_ptrace_bp(bp)) {
- perf_bp_event(bp, regs);
+ if (ptrace_bp) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ perf_bp_event(bp[i], regs);
+ info[i] = NULL;
+ }
rc = NOTIFY_DONE;
- goto out;
+ goto reset;
}
- info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (IS_ENABLED(CONFIG_PPC_8xx)) {
- if (!dar_within_range(regs->dar, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
- } else {
- if (!stepping_handler(regs, bp, info))
- goto out;
+ if (!IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (larx_stcx) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ larx_stcx_err(bp[i], info[i]);
+ info[i] = NULL;
+ }
+ goto reset;
+ }
+
+ if (!stepping_handler(regs, bp, info, hit, instr))
+ goto reset;
}
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp[i], regs);
+ }
+
+reset:
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!info[i])
+ continue;
+ __set_breakpoint(i, info[i]);
+ }
- __set_breakpoint(0, info);
out:
rcu_read_unlock();
return rc;
@@ -366,26 +573,43 @@ static int single_step_dabr_instruction(struct die_args *args)
struct pt_regs *regs = args->regs;
struct perf_event *bp = NULL;
struct arch_hw_breakpoint *info;
+ int i;
+ bool found = false;
- bp = current->thread.last_hit_ubp;
/*
* Check if we are single-stepping as a result of a
* previous HW Breakpoint exception
*/
- if (!bp)
- return NOTIFY_DONE;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ bp = current->thread.last_hit_ubp[i];
+
+ if (!bp)
+ continue;
+
+ found = true;
+ info = counter_arch_bp(bp);
+
+ /*
+ * We shall invoke the user-defined callback function in the
+ * single stepping handler to confirm to 'trigger-after-execute'
+ * semantics
+ */
+ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp, regs);
+ current->thread.last_hit_ubp[i] = NULL;
+ }
- info = counter_arch_bp(bp);
+ if (!found)
+ return NOTIFY_DONE;
- /*
- * We shall invoke the user-defined callback function in the single
- * stepping handler to confirm to 'trigger-after-execute' semantics
- */
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ bp = __this_cpu_read(bp_per_reg[i]);
+ if (!bp)
+ continue;
- __set_breakpoint(0, info);
- current->thread.last_hit_ubp = NULL;
+ info = counter_arch_bp(bp);
+ __set_breakpoint(i, info);
+ }
/*
* If the process was being single-stepped by ptrace, let the
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dcf9c5b4ac59..98bfd0153291 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -629,9 +629,6 @@ void do_break (struct pt_regs *regs, unsigned long address,
if (debugger_break_match(regs))
return;
- /* Clear the breakpoint */
- hw_breakpoint_disable();
-
/* Deliver the signal to userspace */
force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
}
--
2.21.1
^ permalink raw reply related
* [PATCH v5 12/16] powerpc/watchpoint: Use builtin ALIGN*() macros
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
Currently we calculate hw aligned start and end addresses manually.
Replace them with builtin ALIGN_DOWN() and ALIGN() macros.
So far end_addr was inclusive but this patch makes it exclusive (by
avoiding -1) for better readability.
Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/include/asm/hw_breakpoint.h | 5 +++--
arch/powerpc/kernel/hw_breakpoint.c | 12 ++++++------
arch/powerpc/kernel/process.c | 8 ++++----
arch/powerpc/kernel/ptrace/ptrace-noadv.c | 2 +-
4 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index d472b2eb757e..add5aa076919 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -34,10 +34,11 @@ struct arch_hw_breakpoint {
#define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
HW_BRK_TYPE_HYP)
+/* Minimum granularity */
#ifdef CONFIG_PPC_8xx
-#define HW_BREAKPOINT_ALIGN 0x3
+#define HW_BREAKPOINT_SIZE 0x4
#else
-#define HW_BREAKPOINT_ALIGN 0x7
+#define HW_BREAKPOINT_SIZE 0x8
#endif
#define DABR_MAX_LEN 8
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 319a761b7412..ab0dd22fed5f 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -145,10 +145,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
* <---8 bytes--->
*
* In this case, we should configure hw as:
- * start_addr = address & ~HW_BREAKPOINT_ALIGN
+ * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
* len = 16 bytes
*
- * @start_addr and @end_addr are inclusive.
+ * @start_addr is inclusive but @end_addr is exclusive.
*/
static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
{
@@ -156,14 +156,14 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
u16 hw_len;
unsigned long start_addr, end_addr;
- start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
- end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
- hw_len = end_addr - start_addr + 1;
+ start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
+ end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
+ hw_len = end_addr - start_addr;
if (dawr_enabled()) {
max_len = DAWR_MAX_LEN;
/* DAWR region can't cross 512 bytes boundary */
- if ((start_addr >> 9) != (end_addr >> 9))
+ if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M))
return -EINVAL;
} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
/* 8xx can setup a range without limitation */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 41a59a37383b..dcf9c5b4ac59 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -800,12 +800,12 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
LCTRL1_CRWF_RW;
unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
- unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
- unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
+ unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE);
+ unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE);
if (start_addr == 0)
lctrl2 |= LCTRL2_LW0LA_F;
- else if (end_addr == ~0U)
+ else if (end_addr == 0)
lctrl2 |= LCTRL2_LW0LA_E;
else
lctrl2 |= LCTRL2_LW0LA_EandF;
@@ -821,7 +821,7 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
mtspr(SPRN_CMPE, start_addr - 1);
- mtspr(SPRN_CMPF, end_addr + 1);
+ mtspr(SPRN_CMPF, end_addr);
mtspr(SPRN_LCTRL1, lctrl1);
mtspr(SPRN_LCTRL2, lctrl2);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 08cb8c1b504c..697c7e4b5877 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -216,7 +216,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf
if ((unsigned long)bp_info->addr >= TASK_SIZE)
return -EIO;
- brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+ brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
brk.type = HW_BRK_TYPE_TRANSLATE;
brk.len = DABR_MAX_LEN;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
--
2.21.1
^ permalink raw reply related
* [PATCH v5 11/16] powerpc/watchpoint: Introduce is_ptrace_bp() function
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
Introduce is_ptrace_bp() function and move the check inside the
function. It will be utilize more in later set of patches.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/kernel/hw_breakpoint.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 772b2c953220..319a761b7412 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -90,6 +90,11 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
hw_breakpoint_disable();
}
+static bool is_ptrace_bp(struct perf_event *bp)
+{
+ return bp->overflow_handler == ptrace_triggered;
+}
+
/*
* Perform cleanup of arch-specific counters during unregistration
* of the perf-event
@@ -324,7 +329,7 @@ int hw_breakpoint_handler(struct die_args *args)
* one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
* generated in do_dabr().
*/
- if (bp->overflow_handler == ptrace_triggered) {
+ if (is_ptrace_bp(bp)) {
perf_bp_event(bp, regs);
rc = NOTIFY_DONE;
goto out;
--
2.21.1
^ permalink raw reply related
* [PATCH v5 10/16] powerpc/watchpoint: Use loop for thread_struct->ptrace_bps
From: Ravi Bangoria @ 2020-05-11 2:59 UTC (permalink / raw)
To: mpe, mikey
Cc: apopple, ravi.bangoria, peterz, fweisbec, oleg, npiggin,
linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200511025911.212827-1-ravi.bangoria@linux.ibm.com>
ptrace_bps is already an array of size HBP_NUM_MAX. But we use
hardcoded index 0 while fetching/updating it. Convert such code
to loop over array.
ptrace interface to use multiple watchpoint remains same. eg:
two PPC_PTRACE_SETHWDEBUG calls will create two watchpoint if
underneath hw supports it.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/kernel/hw_breakpoint.c | 7 ++++--
arch/powerpc/kernel/process.c | 6 ++++-
arch/powerpc/kernel/ptrace/ptrace-noadv.c | 28 +++++++++++++++++------
3 files changed, 31 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 5826f1f2cab9..772b2c953220 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -419,10 +419,13 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
{
+ int i;
struct thread_struct *t = &tsk->thread;
- unregister_hw_breakpoint(t->ptrace_bps[0]);
- t->ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
}
void hw_breakpoint_pmu_read(struct perf_event *bp)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6d1b7cede900..41a59a37383b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1604,6 +1604,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int i;
+#endif
klp_init_thread_info(p);
@@ -1663,7 +1666,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
p->thread.ksp_limit = (unsigned long)end_of_stack(p);
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- p->thread.ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++)
+ p->thread.ptrace_bps[i] = NULL;
#endif
p->thread.fp_save_area = NULL;
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 0dbb35392dd2..08cb8c1b504c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -168,6 +168,19 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l
return 0;
}
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static int find_empty_ptrace_bp(struct thread_struct *thread)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!thread->ptrace_bps[i])
+ return i;
+ }
+ return -1;
+}
+#endif
+
static int find_empty_hw_brk(struct thread_struct *thread)
{
int i;
@@ -217,8 +230,9 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf
len = 1;
else
return -EINVAL;
- bp = thread->ptrace_bps[0];
- if (bp)
+
+ i = find_empty_ptrace_bp(thread);
+ if (i < 0)
return -ENOSPC;
/* Create a new breakpoint request if one doesn't exist already */
@@ -228,13 +242,13 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf
arch_bp_generic_fields(brk.type, &attr.bp_type);
bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child);
- thread->ptrace_bps[0] = bp;
+ thread->ptrace_bps[i] = bp;
if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
+ thread->ptrace_bps[i] = NULL;
return PTR_ERR(bp);
}
- return 1;
+ return i + 1;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
@@ -263,10 +277,10 @@ long ppc_del_hwdebug(struct task_struct *child, long data)
return -EINVAL;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
+ bp = thread->ptrace_bps[data - 1];
if (bp) {
unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
+ thread->ptrace_bps[data - 1] = NULL;
} else {
ret = -ENOENT;
}
--
2.21.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox