* [PATCH v2 4/5] powerpc/mm: Remove custom stack expansion checking
From: Michael Ellerman @ 2020-07-24 9:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-kernel, dja
In-Reply-To: <20200724092528.1578671-1-mpe@ellerman.id.au>
We have powerpc specific logic in our page fault handling to decide if
an access to an unmapped address below the stack pointer should expand
the stack VMA.
The logic aims to prevent userspace from doing bad accesses below the
stack pointer. However as long as the stack is < 1MB in size, we allow
all accesses without further checks. Adding some debug I see that I
can do a full kernel build and LTP run, and not a single process has
used more than 1MB of stack. So for the majority of processes the
logic never even fires.
We also recently found a nasty bug in this code which could cause
userspace programs to be killed during signal delivery. It went
unnoticed presumably because most processes use < 1MB of stack.
The generic mm code has also grown support for stack guard pages since
this code was originally written, so the most heinous case of the
stack expanding into other mappings is now handled for us.
Finally although some other arches have special logic in this path,
from what I can tell none of x86, arm64, arm and s390 impose any extra
checks other than those in expand_stack().
So drop our complicated logic and like other architectures just let
the stack expand as long as its within the rlimit.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/mm/fault.c | 109 ++--------------------------------------
1 file changed, 5 insertions(+), 104 deletions(-)
v2: no change just rebased.
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3ebb1792e636..925a7231abb3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,39 +42,7 @@
#include <asm/kup.h>
#include <asm/inst.h>
-/*
- * Check whether the instruction inst is a store using
- * an update addressing form which will update r1.
- */
-static bool store_updates_sp(struct ppc_inst inst)
-{
- /* check for 1 in the rA field */
- if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1)
- return false;
- /* check major opcode */
- switch (ppc_inst_primary_opcode(inst)) {
- case OP_STWU:
- case OP_STBU:
- case OP_STHU:
- case OP_STFSU:
- case OP_STFDU:
- return true;
- case OP_STD: /* std or stdu */
- return (ppc_inst_val(inst) & 3) == 1;
- case OP_31:
- /* check minor opcode */
- switch ((ppc_inst_val(inst) >> 1) & 0x3ff) {
- case OP_31_XOP_STDUX:
- case OP_31_XOP_STWUX:
- case OP_31_XOP_STBUX:
- case OP_31_XOP_STHUX:
- case OP_31_XOP_STFSUX:
- case OP_31_XOP_STFDUX:
- return true;
- }
- }
- return false;
-}
+
/*
* do_page_fault error handling helpers
*/
@@ -267,57 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
return false;
}
-// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
-#define SIGFRAME_MAX_SIZE (4096 + 128)
-
-static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
- struct vm_area_struct *vma, unsigned int flags,
- bool *must_retry)
-{
- /*
- * N.B. The POWER/Open ABI allows programs to access up to
- * 288 bytes below the stack pointer.
- * The kernel signal delivery code writes a bit over 4KB
- * below the stack pointer (r1) before decrementing it.
- * The exec code can write slightly over 640kB to the stack
- * before setting the user r1. Thus we allow the stack to
- * expand to 1MB without further checks.
- */
- if (address + 0x100000 < vma->vm_end) {
- struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip;
- /* get user regs even if this fault is in kernel mode */
- struct pt_regs *uregs = current->thread.regs;
- if (uregs == NULL)
- return true;
-
- /*
- * A user-mode access to an address a long way below
- * the stack pointer is only valid if the instruction
- * is one which would update the stack pointer to the
- * address accessed if the instruction completed,
- * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
- * (or the byte, halfword, float or double forms).
- *
- * If we don't check this then any write to the area
- * between the last mapped region and the stack will
- * expand the stack rather than segfaulting.
- */
- if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
- return false;
-
- if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
- access_ok(nip, sizeof(*nip))) {
- struct ppc_inst inst;
-
- if (!probe_user_read_inst(&inst, nip))
- return !store_updates_sp(inst);
- *must_retry = true;
- }
- return true;
- }
- return false;
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
struct vm_area_struct *vma)
@@ -483,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_user = user_mode(regs);
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
- bool must_retry = false;
bool kprobe_fault = kprobe_page_fault(regs, 11);
if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
@@ -572,30 +488,15 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
vma = find_vma(mm, address);
if (unlikely(!vma))
return bad_area(regs, address);
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- return bad_area(regs, address);
- /* The stack is being expanded, check if it's valid */
- if (unlikely(bad_stack_expansion(regs, address, vma, flags,
- &must_retry))) {
- if (!must_retry)
+ if (unlikely(vma->vm_start > address)) {
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
return bad_area(regs, address);
- mmap_read_unlock(mm);
- if (fault_in_pages_readable((const char __user *)regs->nip,
- sizeof(unsigned int)))
- return bad_area_nosemaphore(regs, address);
- goto retry;
+ if (unlikely(expand_stack(vma, address)))
+ return bad_area(regs, address);
}
- /* Try to expand it */
- if (unlikely(expand_stack(vma, address)))
- return bad_area(regs, address);
-
-good_area:
-
#ifdef CONFIG_PPC_MEM_KEYS
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma)))
--
2.25.1
^ permalink raw reply related
* [PATCH v2 5/5] selftests/powerpc: Remove powerpc special cases from stack expansion test
From: Michael Ellerman @ 2020-07-24 9:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-kernel, dja
In-Reply-To: <20200724092528.1578671-1-mpe@ellerman.id.au>
Now that the powerpc code behaves the same as other architectures we
can drop the special cases we had.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
.../powerpc/mm/stack_expansion_ldst.c | 41 +++----------------
1 file changed, 5 insertions(+), 36 deletions(-)
v2: no change just rebased.
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
index 8dbfb51acf0f..ed9143990888 100644
--- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -56,13 +56,7 @@ int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta,
#else
asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
#endif
-
- // Kludge, delta < 0 indicates relative to SP
- if (delta < 0)
- target = stack_top_sp + delta;
- else
- target = stack_high - delta + 1;
-
+ target = stack_high - delta + 1;
volatile char *p = (char *)target;
if (type == STORE)
@@ -162,41 +156,16 @@ static int test_one(unsigned int stack_used, int delta, enum access_type type)
static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
{
- assert(test_one(DEFAULT_SIZE, 512 * _KB, type) == 0);
+ unsigned long delta;
- // powerpc has a special case to allow up to 1MB
- assert(test_one(DEFAULT_SIZE, 1 * _MB, type) == 0);
-
-#ifdef __powerpc__
- // This fails on powerpc because it's > 1MB and is not a stdu &
- // not close to r1
- assert(test_one(DEFAULT_SIZE, 1 * _MB + 8, type) != 0);
-#else
- assert(test_one(DEFAULT_SIZE, 1 * _MB + 8, type) == 0);
-#endif
-
-#ifdef __powerpc__
- // Accessing way past the stack pointer is not allowed on powerpc
- assert(test_one(DEFAULT_SIZE, rlim_cur, type) != 0);
-#else
// We should be able to access anywhere within the rlimit
+ for (delta = page_size; delta <= rlim_cur; delta += page_size)
+ assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
-#endif
// But if we go past the rlimit it should fail
assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
-
- // Above 1MB powerpc only allows accesses within 4224 bytes of
- // r1 for accesses that aren't stdu
- assert(test_one(1 * _MB + page_size - 128, -4224, type) == 0);
-#ifdef __powerpc__
- assert(test_one(1 * _MB + page_size - 128, -4225, type) != 0);
-#else
- assert(test_one(1 * _MB + page_size - 128, -4225, type) == 0);
-#endif
-
- // By consuming 2MB of stack we test the stdu case
- assert(test_one(2 * _MB + page_size - 128, -4224, type) == 0);
}
static int test(void)
--
2.25.1
^ permalink raw reply related
* Re: [PATCH 2/2] powerpc/64s: system call support for scv/rfscv instructions
From: Michael Ellerman @ 2020-07-24 10:45 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linux-api, musl, linuxppc-dev, Nicholas Piggin, libc-dev
In-Reply-To: <20200723184814.Horde.pk5BO9iFqyGX5D4TW5wqmg1@messagerie.si.c-s.fr>
Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Michael Ellerman <mpe@ellerman.id.au> a écrit :
>
>> Nicholas Piggin <npiggin@gmail.com> writes:
>>> diff --git a/arch/powerpc/include/asm/ppc-opcode.h
>>> b/arch/powerpc/include/asm/ppc-opcode.h
>>> index 2a39c716c343..b2bdc4de1292 100644
>>> --- a/arch/powerpc/include/asm/ppc-opcode.h
>>> +++ b/arch/powerpc/include/asm/ppc-opcode.h
>>> @@ -257,6 +257,7 @@
>>> #define PPC_INST_MFVSRD 0x7c000066
>>> #define PPC_INST_MTVSRD 0x7c000166
>>> #define PPC_INST_SC 0x44000002
>>> +#define PPC_INST_SCV 0x44000001
>> ...
>>> @@ -411,6 +412,7 @@
>> ...
>>> +#define __PPC_LEV(l) (((l) & 0x7f) << 5)
>>
>> These conflicted and didn't seem to be used so I dropped them.
>>
>>> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
>>> index 5abe98216dc2..161bfccbc309 100644
>>> --- a/arch/powerpc/lib/sstep.c
>>> +++ b/arch/powerpc/lib/sstep.c
>>> @@ -3378,6 +3382,16 @@ int emulate_step(struct pt_regs *regs,
>>> struct ppc_inst instr)
>>> regs->msr = MSR_KERNEL;
>>> return 1;
>>>
>>> + case SYSCALL_VECTORED_0: /* scv 0 */
>>> + regs->gpr[9] = regs->gpr[13];
>>> + regs->gpr[10] = MSR_KERNEL;
>>> + regs->gpr[11] = regs->nip + 4;
>>> + regs->gpr[12] = regs->msr & MSR_MASK;
>>> + regs->gpr[13] = (unsigned long) get_paca();
>>> + regs->nip = (unsigned long) &system_call_vectored_emulate;
>>> + regs->msr = MSR_KERNEL;
>>> + return 1;
>>> +
>>
>> This broke the ppc64e build:
>>
>> ld: arch/powerpc/lib/sstep.o:(.toc+0x0): undefined reference to
>> `system_call_vectored_emulate'
>> make[1]: *** [/home/michael/linux/Makefile:1139: vmlinux] Error 1
>>
>> I wrapped it in #ifdef CONFIG_PPC64_BOOK3S.
>
> You mean CONFIG_PPC_BOOK3S_64 ?
I hope so ...
#### ## ####.
Will send a fixup. Thanks for noticing.
cheers
^ permalink raw reply
* Re: [PATCH v 1/1] powerpc/64s: allow for clang's objdump differences
From: Michael Ellerman @ 2020-07-24 10:48 UTC (permalink / raw)
To: Bill Wendling, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, Bill Wendling
In-Reply-To: <20200724001605.3718561-1-morbo@google.com>
Hi Bill,
Bill Wendling <morbo@google.com> writes:
> Clang's objdump emits slightly different output from GNU's objdump,
> causing a list of warnings to be emitted during relocatable builds.
> E.g., clang's objdump emits this:
>
> c000000000000004: 2c 00 00 48 b 0xc000000000000030
> ...
> c000000000005c6c: 10 00 82 40 bf 2, 0xc000000000005c7c
>
> while GNU objdump emits:
>
> c000000000000004: 2c 00 00 48 b c000000000000030 <__start+0x30>
> ...
> c000000000005c6c: 10 00 82 40 bne c000000000005c7c <masked_interrupt+0x3c>
>
> Adjust llvm-objdump's output to remove the extraneous '0x' and convert
> 'bf' and 'bt' to 'bne' and 'beq' resp. to more closely match GNU
> objdump's output.
>
> Note that clang's objdump doesn't yet output the relocation symbols on
> PPC.
>
> Signed-off-by: Bill Wendling <morbo@google.com>
> ---
> arch/powerpc/tools/unrel_branch_check.sh | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
> index 77114755dc6f..71ce86b68d18 100755
> --- a/arch/powerpc/tools/unrel_branch_check.sh
> +++ b/arch/powerpc/tools/unrel_branch_check.sh
> @@ -31,6 +31,9 @@ grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]
> grep -v '\<__start_initialization_multiplatform>' |
> grep -v -e 'b.\?.\?ctr' |
> grep -v -e 'b.\?.\?lr' |
> +sed 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' |
> +sed 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' |
> +sed 's/[[:space:]]0x/ /' |
> sed 's/://' |
I know you followed the example in the script of just doing everything
as a separate entry in the pipeline, but I think we could consolidate
all the seds into one?
eg:
sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \
-e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \
-e 's/[[:space:]]0x/ /' \
-e 's/://' |
Does that work?
cheers
^ permalink raw reply
* [PATCH v2] powerpc/numa: Limit possible nodes to within num_possible_nodes
From: Srikar Dronamraju @ 2020-07-24 10:58 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Tyrel Datwyler, Srikar Dronamraju, Nicholas Piggin,
linuxppc-dev
MAX_NUMNODES is a theoretical maximum number of nodes thats is supported
by the kernel. Device tree properties exposes the number of possible
nodes on the current platform. The kernel would detected this and would
use it for most of its resource allocations. If the platform now
increases the nodes to over what was already exposed, then it may lead
to inconsistencies. Hence limit it to the already exposed nodes.
Suggested-by: Nathan Lynch <nathanl@linux.ibm.com>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Changelog v1 -> v2:
v1: https://lore.kernel.org/linuxppc-dev/20200715120534.3673-1-srikar@linux.vnet.ibm.com/t/#u
Use nr_node_ids instead of num_possible_nodes() When nodes are
sparse like in PowerNV, nr_node_ids gets the right value unlike
num_possible_nodes()
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
arch/powerpc/mm/numa.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index e437a9ac4956..383359272270 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -221,7 +221,7 @@ static void initialize_distance_lookup_table(int nid,
}
}
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/* Returns nid in the range [0..nr_node_ids], or -1 if no useful numa
* info is found.
*/
static int associativity_to_nid(const __be32 *associativity)
@@ -235,7 +235,7 @@ static int associativity_to_nid(const __be32 *associativity)
nid = of_read_number(&associativity[min_common_depth], 1);
/* POWER4 LPAR uses 0xffff as invalid node */
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = NUMA_NO_NODE;
if (nid > 0 &&
@@ -448,7 +448,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = default_nid;
if (nid > 0) {
--
2.17.1
^ permalink raw reply related
* Re: [v3 13/15] tools/perf: Add perf tools support for extended register capability in powerpc
From: Ravi Bangoria @ 2020-07-24 11:02 UTC (permalink / raw)
To: Athira Rajeev
Cc: ego, mikey, maddy, kvm, Ravi Bangoria, kvm-ppc, svaidyan, acme,
jolsa, linuxppc-dev
In-Reply-To: <1594996707-3727-14-git-send-email-atrajeev@linux.vnet.ibm.com>
Hi Athira,
On 7/17/20 8:08 PM, Athira Rajeev wrote:
> From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
>
> Add extended regs to sample_reg_mask in the tool side to use
> with `-I?` option. Perf tools side uses extended mask to display
> the platform supported register names (with -I? option) to the user
> and also send this mask to the kernel to capture the extended registers
> in each sample. Hence decide the mask value based on the processor
> version.
>
> Currently definitions for `mfspr`, `SPRN_PVR` are part of
> `arch/powerpc/util/header.c`. Move this to a header file so that
> these definitions can be re-used in other source files as well.
It seems this patch has a regression.
Without this patch:
$ sudo ./perf record -I
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.458 MB perf.data (318 samples) ]
With this patch:
$ sudo ./perf record -I
Error:
dummy:HG: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'
Ravi
^ permalink raw reply
* RE: [RFC PATCH] powerpc/pseries/svm: capture instruction faulting on MMIO access, in sprg0 register
From: Michael Ellerman @ 2020-07-24 11:49 UTC (permalink / raw)
To: Ram Pai
Cc: ldufour, aik, kvm-ppc, bharata, sathnaga, sukadev, linuxppc-dev,
bauerman, david
In-Reply-To: <20200722074929.GI7339@oc0525413822.ibm.com>
Ram Pai <linuxram@us.ibm.com> writes:
> On Wed, Jul 22, 2020 at 12:06:06PM +1000, Michael Ellerman wrote:
>> Ram Pai <linuxram@us.ibm.com> writes:
>> > An instruction accessing a mmio address, generates a HDSI fault. This fault is
>> > appropriately handled by the Hypervisor. However in the case of secureVMs, the
>> > fault is delivered to the ultravisor.
>> >
>> > Unfortunately the Ultravisor has no correct-way to fetch the faulting
>> > instruction. The PEF architecture does not allow Ultravisor to enable MMU
>> > translation. Walking the two level page table to read the instruction can race
>> > with other vcpus modifying the SVM's process scoped page table.
>>
>> You're trying to read the guest's kernel text IIUC, that mapping should
>> be stable. Possibly permissions on it could change over time, but the
>> virtual -> real mapping should not.
>
> Actually the code does not capture the address of the instruction in the
> sprg0 register. It captures the instruction itself. So should the mapping
> matter?
>>
>> > This problem can be correctly solved with some help from the kernel.
>> >
>> > Capture the faulting instruction in SPRG0 register, before executing the
>> > faulting instruction. This enables the ultravisor to easily procure the
>> > faulting instruction and emulate it.
>>
>> This is not something I'm going to merge. Sorry.
>
> Ok. Will consider other approaches.
To elaborate ...
You've basically invented a custom ucall ABI. But a really strange one
which takes an instruction as its first parameter in SPRG0, and then
subsequent parameters in any GPR depending on what the instruction was.
The UV should either emulate the instruction, which means the guest
should not be expected to do anything other than execute the
instruction. Or it should be done with a proper ucall that the guest
explicitly makes with a well defined ABI.
cheers
^ permalink raw reply
* Re: [PATCH v3 0/4] powerpc/mm/radix: Memory unplug fixes
From: Michael Ellerman @ 2020-07-24 11:52 UTC (permalink / raw)
To: bharata; +Cc: Nathan Lynch, Aneesh Kumar K.V, linuxppc-dev, david
In-Reply-To: <20200722060506.GO7902@in.ibm.com>
Bharata B Rao <bharata@linux.ibm.com> writes:
> On Tue, Jul 21, 2020 at 10:25:58PM +1000, Michael Ellerman wrote:
>> Bharata B Rao <bharata@linux.ibm.com> writes:
>> > On Tue, Jul 21, 2020 at 11:45:20AM +1000, Michael Ellerman wrote:
>> >> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> >> > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> >> >> This is the next version of the fixes for memory unplug on radix.
>> >> >> The issues and the fix are described in the actual patches.
>> >> >
>> >> > I guess this isn't actually causing problems at runtime right now, but I
>> >> > notice calls to resize_hpt_for_hotplug() from arch_add_memory() and
>> >> > arch_remove_memory(), which ought to be mmu-agnostic:
>> >> >
>> >> > int __ref arch_add_memory(int nid, u64 start, u64 size,
>> >> > struct mhp_params *params)
>> >> > {
>> >> > unsigned long start_pfn = start >> PAGE_SHIFT;
>> >> > unsigned long nr_pages = size >> PAGE_SHIFT;
>> >> > int rc;
>> >> >
>> >> > resize_hpt_for_hotplug(memblock_phys_mem_size());
>> >> >
>> >> > start = (unsigned long)__va(start);
>> >> > rc = create_section_mapping(start, start + size, nid,
>> >> > params->pgprot);
>> >> > ...
>> >>
>> >> Hmm well spotted.
>> >>
>> >> That does return early if the ops are not setup:
>> >>
>> >> int resize_hpt_for_hotplug(unsigned long new_mem_size)
>> >> {
>> >> unsigned target_hpt_shift;
>> >>
>> >> if (!mmu_hash_ops.resize_hpt)
>> >> return 0;
>> >>
>> >>
>> >> And:
>> >>
>> >> void __init hpte_init_pseries(void)
>> >> {
>> >> ...
>> >> if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
>> >> mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
>> >>
>> >> And that comes in via ibm,hypertas-functions:
>> >>
>> >> {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"},
>> >>
>> >>
>> >> But firmware is not necessarily going to add/remove that call based on
>> >> whether we're using hash/radix.
>> >
>> > Correct but hpte_init_pseries() will not be called for radix guests.
>>
>> Yeah, duh. You'd think the function name would have been a sufficient
>> clue for me :)
>>
>> >> So I think a follow-up patch is needed to make this more robust.
>> >>
>> >> Aneesh/Bharata what platform did you test this series on? I'm curious
>> >> how this didn't break.
>> >
>> > I have tested memory hotplug/unplug for radix guest on zz platform and
>> > sanity-tested this for hash guest on P8.
>> >
>> > As noted above, mmu_hash_ops.resize_hpt will not be set for radix
>> > guest and hence we won't see any breakage.
>>
>> OK.
>>
>> That's probably fine as it is then. Or maybe just a comment in
>> resize_hpt_for_hotplug() pointing out that resize_hpt will be NULL if
>> we're using radix.
>
> Or we could move these calls to hpt-only routines like below?
That looks like it would be equivalent, and would nicely isolate those
calls in hash specific code. So yeah I think that's worth sending as a
proper patch, even better if you can test it.
> David - Do you remember if there was any particular reason to have
> these two hpt-resize calls within powerpc-generic memory hotplug code?
I think the HPT resizing was developed before or concurrently with the
radix support, so I would guess it was just not something we thought
about at the time.
cheers
> diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
> index c89b32443cff..1e6fa371cc38 100644
> --- a/arch/powerpc/include/asm/sparsemem.h
> +++ b/arch/powerpc/include/asm/sparsemem.h
> @@ -17,12 +17,6 @@ extern int create_section_mapping(unsigned long start, unsigned long end,
> int nid, pgprot_t prot);
> extern int remove_section_mapping(unsigned long start, unsigned long end);
>
> -#ifdef CONFIG_PPC_BOOK3S_64
> -extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
> -#else
> -static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
> -#endif
> -
> #ifdef CONFIG_NUMA
> extern int hot_add_scn_to_nid(unsigned long scn_addr);
> #else
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index eec6f4e5e481..5daf53ec7600 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -787,7 +787,7 @@ static unsigned long __init htab_get_table_size(void)
> }
>
> #ifdef CONFIG_MEMORY_HOTPLUG
> -int resize_hpt_for_hotplug(unsigned long new_mem_size)
> +static int resize_hpt_for_hotplug(unsigned long new_mem_size)
> {
> unsigned target_hpt_shift;
>
> @@ -821,6 +821,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end,
> return -1;
> }
>
> + resize_hpt_for_hotplug(memblock_phys_mem_size());
> +
> rc = htab_bolt_mapping(start, end, __pa(start),
> pgprot_val(prot), mmu_linear_psize,
> mmu_kernel_ssize);
> @@ -838,6 +840,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
> int rc = htab_remove_mapping(start, end, mmu_linear_psize,
> mmu_kernel_ssize);
> WARN_ON(rc < 0);
> +
> + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
> + pr_warn("Hash collision while resizing HPT\n");
> +
> return rc;
> }
> #endif /* CONFIG_MEMORY_HOTPLUG */
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index c2c11eb8dcfc..9dafc636588f 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -127,8 +127,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
> unsigned long nr_pages = size >> PAGE_SHIFT;
> int rc;
>
> - resize_hpt_for_hotplug(memblock_phys_mem_size());
> -
> start = (unsigned long)__va(start);
> rc = create_section_mapping(start, start + size, nid,
> params->pgprot);
> @@ -161,9 +159,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
> * hit that section of memory
> */
> vm_unmap_aliases();
> -
> - if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
> - pr_warn("Hash collision while resizing HPT\n");
> }
> #endif
>
> --
> 2.26.2
^ permalink raw reply
* Re: [PATCH v3 0/4] powerpc/mm/radix: Memory unplug fixes
From: Bharata B Rao @ 2020-07-24 12:17 UTC (permalink / raw)
To: Michael Ellerman; +Cc: Nathan Lynch, Aneesh Kumar K.V, linuxppc-dev, david
In-Reply-To: <87mu3pp1u9.fsf@mpe.ellerman.id.au>
On Fri, Jul 24, 2020 at 09:52:14PM +1000, Michael Ellerman wrote:
> Bharata B Rao <bharata@linux.ibm.com> writes:
> > On Tue, Jul 21, 2020 at 10:25:58PM +1000, Michael Ellerman wrote:
> >> Bharata B Rao <bharata@linux.ibm.com> writes:
> >> > On Tue, Jul 21, 2020 at 11:45:20AM +1000, Michael Ellerman wrote:
> >> >> Nathan Lynch <nathanl@linux.ibm.com> writes:
> >> >> > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> >> >> >> This is the next version of the fixes for memory unplug on radix.
> >> >> >> The issues and the fix are described in the actual patches.
> >> >> >
> >> >> > I guess this isn't actually causing problems at runtime right now, but I
> >> >> > notice calls to resize_hpt_for_hotplug() from arch_add_memory() and
> >> >> > arch_remove_memory(), which ought to be mmu-agnostic:
> >> >> >
> >> >> > int __ref arch_add_memory(int nid, u64 start, u64 size,
> >> >> > struct mhp_params *params)
> >> >> > {
> >> >> > unsigned long start_pfn = start >> PAGE_SHIFT;
> >> >> > unsigned long nr_pages = size >> PAGE_SHIFT;
> >> >> > int rc;
> >> >> >
> >> >> > resize_hpt_for_hotplug(memblock_phys_mem_size());
> >> >> >
> >> >> > start = (unsigned long)__va(start);
> >> >> > rc = create_section_mapping(start, start + size, nid,
> >> >> > params->pgprot);
> >> >> > ...
> >> >>
> >> >> Hmm well spotted.
> >> >>
> >> >> That does return early if the ops are not setup:
> >> >>
> >> >> int resize_hpt_for_hotplug(unsigned long new_mem_size)
> >> >> {
> >> >> unsigned target_hpt_shift;
> >> >>
> >> >> if (!mmu_hash_ops.resize_hpt)
> >> >> return 0;
> >> >>
> >> >>
> >> >> And:
> >> >>
> >> >> void __init hpte_init_pseries(void)
> >> >> {
> >> >> ...
> >> >> if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
> >> >> mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
> >> >>
> >> >> And that comes in via ibm,hypertas-functions:
> >> >>
> >> >> {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"},
> >> >>
> >> >>
> >> >> But firmware is not necessarily going to add/remove that call based on
> >> >> whether we're using hash/radix.
> >> >
> >> > Correct but hpte_init_pseries() will not be called for radix guests.
> >>
> >> Yeah, duh. You'd think the function name would have been a sufficient
> >> clue for me :)
> >>
> >> >> So I think a follow-up patch is needed to make this more robust.
> >> >>
> >> >> Aneesh/Bharata what platform did you test this series on? I'm curious
> >> >> how this didn't break.
> >> >
> >> > I have tested memory hotplug/unplug for radix guest on zz platform and
> >> > sanity-tested this for hash guest on P8.
> >> >
> >> > As noted above, mmu_hash_ops.resize_hpt will not be set for radix
> >> > guest and hence we won't see any breakage.
> >>
> >> OK.
> >>
> >> That's probably fine as it is then. Or maybe just a comment in
> >> resize_hpt_for_hotplug() pointing out that resize_hpt will be NULL if
> >> we're using radix.
> >
> > Or we could move these calls to hpt-only routines like below?
>
> That looks like it would be equivalent, and would nicely isolate those
> calls in hash specific code. So yeah I think that's worth sending as a
> proper patch, even better if you can test it.
Sure I will send it as a proper patch. I did test minimal hotplug/unplug
for hash guest with that patch, will do more extensive test and resend.
>
> > David - Do you remember if there was any particular reason to have
> > these two hpt-resize calls within powerpc-generic memory hotplug code?
>
> I think the HPT resizing was developed before or concurrently with the
> radix support, so I would guess it was just not something we thought
> about at the time.
Right.
Regards,
Bharata.
^ permalink raw reply
* Re: [PATCHv3 2/2] powerpc/pseries: update device tree before ejecting hotplug uevents
From: Pingfan Liu @ 2020-07-24 12:24 UTC (permalink / raw)
To: Nathan Lynch
Cc: cheloha, Kexec Mailing List, ldufour, linuxppc-dev, Hari Bathini
In-Reply-To: <87imee1hvt.fsf@linux.ibm.com>
On Thu, Jul 23, 2020 at 9:27 PM Nathan Lynch <nathanl@linux.ibm.com> wrote:
>
> Pingfan Liu <kernelfans@gmail.com> writes:
> > A bug is observed on pseries by taking the following steps on rhel:
> > -1. drmgr -c mem -r -q 5
> > -2. echo c > /proc/sysrq-trigger
> >
> > And then, the failure looks like:
> > kdump: saving to /sysroot//var/crash/127.0.0.1-2020-01-16-02:06:14/
> > kdump: saving vmcore-dmesg.txt
> > kdump: saving vmcore-dmesg.txt complete
> > kdump: saving vmcore
> > Checking for memory holes : [ 0.0 %] / Checking for memory holes : [100.0 %] | Excluding unnecessary pages : [100.0 %] \ Copying data : [ 0.3 %] - eta: 38s[ 44.337636] hash-mmu: mm: Hashing failure ! EA=0x7fffba400000 access=0x8000000000000004 current=makedumpfile
> > [ 44.337663] hash-mmu: trap=0x300 vsid=0x13a109c ssize=1 base psize=2 psize 2 pte=0xc000000050000504
> > [ 44.337677] hash-mmu: mm: Hashing failure ! EA=0x7fffba400000 access=0x8000000000000004 current=makedumpfile
> > [ 44.337692] hash-mmu: trap=0x300 vsid=0x13a109c ssize=1 base psize=2 psize 2 pte=0xc000000050000504
> > [ 44.337708] makedumpfile[469]: unhandled signal 7 at 00007fffba400000 nip 00007fffbbc4d7fc lr 000000011356ca3c code 2
> > [ 44.338548] Core dump to |/bin/false pipe failed
> > /lib/kdump-lib-initramfs.sh: line 98: 469 Bus error $CORE_COLLECTOR /proc/vmcore $_mp/$KDUMP_PATH/$HOST_IP-$DATEDIR/vmcore-incomplete
> > kdump: saving vmcore failed
> >
> > * Root cause *
> > After analyzing, it turns out that in the current implementation,
> > when hot-removing lmb, the KOBJ_REMOVE event ejects before the dt updating as
> > the code __remove_memory() comes before drmem_update_dt().
> > So in kdump kernel, when read_from_oldmem() resorts to
> > pSeries_lpar_hpte_insert() to install hpte, but fails with -2 due to
> > non-exist pfn. And finally, low_hash_fault() raise SIGBUS to process, as it
> > can be observed "Bus error"
> >
> > From a viewpoint of listener and publisher, the publisher notifies the
> > listener before data is ready. This introduces a problem where udev
> > launches kexec-tools (due to KOBJ_REMOVE) and loads a stale dt before
> > updating. And in capture kernel, makedumpfile will access the memory based
> > on the stale dt info, and hit a SIGBUS error due to an un-existed lmb.
> >
> > * Fix *
> > In order to fix this issue, update dt before __remove_memory(), and
> > accordingly the same rule in hot-add path.
> >
> > This will introduce extra dt updating payload for each involved lmb when hotplug.
> > But it should be fine since drmem_update_dt() is memory based operation and
> > hotplug is not a hot path.
>
> This is great analysis but the performance implications of the change
> are grave. The add/remove paths here are already O(n) where n is the
> quantity of memory assigned to the LP, this change would make it O(n^2):
>
> dlpar_memory_add_by_count
> for_each_drmem_lmb <--
> dlpar_add_lmb
> drmem_update_dt(_v1|_v2)
> for_each_drmem_lmb <--
>
> Memory add/remove isn't a hot path but quadratic runtime complexity
> isn't acceptable. Its current performance is bad enough that I have
Yes, the quadratic runtime complexity sounds terrible.
And I am curious about the bug. Does the system have thousands of lmb?
> internal bugs open on it.
>
> Not to mention we leak memory every time drmem_update_dt is called
> because we can't safely free device tree properties :-(
Do you know what block us to free it?
>
> Also note that this sort of reverts (fixes?) 063b8b1251fd
> ("powerpc/pseries/memory-hotplug: Only update DT once per memory DLPAR
> request").
Yes. And now, I think I need to bring up another method to fix it.
Thanks,
Pingfan
^ permalink raw reply
* Re: [v3 12/15] powerpc/perf: Add support for outputting extended regs in perf intr_regs
From: Ravi Bangoria @ 2020-07-24 12:26 UTC (permalink / raw)
To: Athira Rajeev
Cc: ego, mikey, maddy, kvm, Ravi Bangoria, kvm-ppc, svaidyan, acme,
jolsa, linuxppc-dev
In-Reply-To: <1594996707-3727-13-git-send-email-atrajeev@linux.vnet.ibm.com>
Hi Athira,
> +/* Function to return the extended register values */
> +static u64 get_ext_regs_value(int idx)
> +{
> + switch (idx) {
> + case PERF_REG_POWERPC_MMCR0:
> + return mfspr(SPRN_MMCR0);
> + case PERF_REG_POWERPC_MMCR1:
> + return mfspr(SPRN_MMCR1);
> + case PERF_REG_POWERPC_MMCR2:
> + return mfspr(SPRN_MMCR2);
> + default: return 0;
> + }
> +}
> +
> u64 perf_reg_value(struct pt_regs *regs, int idx)
> {
> - if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
> - return 0;
> + u64 PERF_REG_EXTENDED_MAX;
PERF_REG_EXTENDED_MAX should be initialized. otherwise ...
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_300))
> + PERF_REG_EXTENDED_MAX = PERF_REG_MAX_ISA_300;
>
> if (idx == PERF_REG_POWERPC_SIER &&
> (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
> @@ -85,6 +103,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
> IS_ENABLED(CONFIG_PPC32)))
> return 0;
>
> + if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX)
> + return get_ext_regs_value(idx);
On non p9/p10 machine, PERF_REG_EXTENDED_MAX may contain random value which will
allow user to pass this if condition unintentionally.
Neat: PERF_REG_EXTENDED_MAX is a local variable so it should be in lowercase.
Any specific reason to define it in capital?
Ravi
^ permalink raw reply
* [PATCH v4 0/6] powerpc: queued spinlocks and rwlocks
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
Updated with everybody's feedback (thanks all), and more performance
results.
What I've found is I might have been measuring the worst load point for
the paravirt case, and by looking at a range of loads it's clear that
queued spinlocks are overall better even on PV, doubly so when you look
at the generally much improved worst case latencies.
I have defaulted it to N even though I'm less concerned about the PV
numbers now, just because I think it needs more stress testing. But
it's very nicely selectable so should be low risk to include.
All in all this is a very cool technology and great results especially
on the big systems but even on smaller ones there are nice gains. Thanks
Waiman and everyone who developed it.
Thanks,
Nick
Nicholas Piggin (6):
powerpc/pseries: move some PAPR paravirt functions to their own file
powerpc: move spinlock implementation to simple_spinlock
powerpc/64s: implement queued spinlocks and rwlocks
powerpc/pseries: implement paravirt qspinlocks for SPLPAR
powerpc/qspinlock: optimised atomic_try_cmpxchg_lock that adds the
lock hint
powerpc: implement smp_cond_load_relaxed
arch/powerpc/Kconfig | 15 +
arch/powerpc/include/asm/Kbuild | 1 +
arch/powerpc/include/asm/atomic.h | 28 ++
arch/powerpc/include/asm/barrier.h | 14 +
arch/powerpc/include/asm/paravirt.h | 87 +++++
arch/powerpc/include/asm/qspinlock.h | 91 ++++++
arch/powerpc/include/asm/qspinlock_paravirt.h | 7 +
arch/powerpc/include/asm/simple_spinlock.h | 288 ++++++++++++++++
.../include/asm/simple_spinlock_types.h | 21 ++
arch/powerpc/include/asm/spinlock.h | 308 +-----------------
arch/powerpc/include/asm/spinlock_types.h | 17 +-
arch/powerpc/lib/Makefile | 3 +
arch/powerpc/lib/locks.c | 12 +-
arch/powerpc/platforms/pseries/Kconfig | 9 +-
arch/powerpc/platforms/pseries/setup.c | 4 +-
include/asm-generic/qspinlock.h | 4 +
16 files changed, 588 insertions(+), 321 deletions(-)
create mode 100644 arch/powerpc/include/asm/paravirt.h
create mode 100644 arch/powerpc/include/asm/qspinlock.h
create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
create mode 100644 arch/powerpc/include/asm/simple_spinlock.h
create mode 100644 arch/powerpc/include/asm/simple_spinlock_types.h
--
2.23.0
^ permalink raw reply
* [PATCH v4 1/6] powerpc/pseries: move some PAPR paravirt functions to their own file
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
In-Reply-To: <20200724131423.1362108-1-npiggin@gmail.com>
These functions will be used by queued spinlock implementation,
and may be useful elsewhere too, so move them out of spinlock.h.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/paravirt.h | 59 +++++++++++++++++++++++++++++
arch/powerpc/include/asm/spinlock.h | 24 +-----------
arch/powerpc/lib/locks.c | 12 +++---
3 files changed, 66 insertions(+), 29 deletions(-)
create mode 100644 arch/powerpc/include/asm/paravirt.h
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
new file mode 100644
index 000000000000..339e8533464b
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PARAVIRT_H
+#define _ASM_POWERPC_PARAVIRT_H
+
+#include <linux/jump_label.h>
+#include <asm/smp.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/hvcall.h>
+#endif
+
+#ifdef CONFIG_PPC_SPLPAR
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
+static inline bool is_shared_processor(void)
+{
+ return static_branch_unlikely(&shared_processor);
+}
+
+/* If bit 0 is set, the cpu has been preempted */
+static inline u32 yield_count_of(int cpu)
+{
+ __be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count);
+ return be32_to_cpu(yield_count);
+}
+
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+ plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+}
+#else
+static inline bool is_shared_processor(void)
+{
+ return false;
+}
+
+static inline u32 yield_count_of(int cpu)
+{
+ return 0;
+}
+
+extern void ___bad_yield_to_preempted(void);
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+ ___bad_yield_to_preempted(); /* This would be a bug */
+}
+#endif
+
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ if (!is_shared_processor())
+ return false;
+ if (yield_count_of(cpu) & 1)
+ return true;
+ return false;
+}
+
+#endif /* _ASM_POWERPC_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 2d620896cdae..79be9bb10bbb 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -15,11 +15,10 @@
*
* (the type definitions are in asm/spinlock_types.h)
*/
-#include <linux/jump_label.h>
#include <linux/irqflags.h>
+#include <asm/paravirt.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
-#include <asm/hvcall.h>
#endif
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
@@ -35,18 +34,6 @@
#define LOCK_TOKEN 1
#endif
-#ifdef CONFIG_PPC_PSERIES
-DECLARE_STATIC_KEY_FALSE(shared_processor);
-
-#define vcpu_is_preempted vcpu_is_preempted
-static inline bool vcpu_is_preempted(int cpu)
-{
- if (!static_branch_unlikely(&shared_processor))
- return false;
- return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
-}
-#endif
-
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
@@ -110,15 +97,6 @@ static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
#endif
-static inline bool is_shared_processor(void)
-{
-#ifdef CONFIG_PPC_SPLPAR
- return static_branch_unlikely(&shared_processor);
-#else
- return false;
-#endif
-}
-
static inline void spin_yield(arch_spinlock_t *lock)
{
if (is_shared_processor())
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6440d5943c00..04165b7a163f 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock)
return;
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+ yield_count = yield_count_of(holder_cpu);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (lock->slock != lock_value)
return; /* something has changed */
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
+ yield_to_preempted(holder_cpu, yield_count);
}
EXPORT_SYMBOL_GPL(splpar_spin_yield);
@@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw)
return; /* no write lock at present */
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+ yield_count = yield_count_of(holder_cpu);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (rw->lock != lock_value)
return; /* something has changed */
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
+ yield_to_preempted(holder_cpu, yield_count);
}
#endif
--
2.23.0
^ permalink raw reply related
* [PATCH v4 2/6] powerpc: move spinlock implementation to simple_spinlock
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
In-Reply-To: <20200724131423.1362108-1-npiggin@gmail.com>
To prepare for queued spinlocks. This is a simple rename except to update
preprocessor guard name and a file reference.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/simple_spinlock.h | 288 ++++++++++++++++++
.../include/asm/simple_spinlock_types.h | 21 ++
arch/powerpc/include/asm/spinlock.h | 285 +----------------
arch/powerpc/include/asm/spinlock_types.h | 12 +-
4 files changed, 311 insertions(+), 295 deletions(-)
create mode 100644 arch/powerpc/include/asm/simple_spinlock.h
create mode 100644 arch/powerpc/include/asm/simple_spinlock_types.h
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
new file mode 100644
index 000000000000..fe6cff7df48e
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_H
+
+/*
+ * Simple spin lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ * Rework to support virtual processors
+ *
+ * Type of int is used as a full 64b word is not necessary.
+ *
+ * (the type definitions are in asm/simple_spinlock_types.h)
+ */
+#include <linux/irqflags.h>
+#include <asm/paravirt.h>
+#include <asm/paca.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC64
+/* use 0x800000yy when locked, where yy == CPU number */
+#ifdef __BIG_ENDIAN__
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
+#endif
+#else
+#define LOCK_TOKEN 1
+#endif
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.slock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ smp_mb();
+ return !arch_spin_value_unlocked(*lock);
+}
+
+/*
+ * This returns the old value in the lock, so we succeeded
+ * in getting the lock if the return value is 0.
+ */
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned long tmp, token;
+
+ token = LOCK_TOKEN;
+ __asm__ __volatile__(
+"1: " PPC_LWARX(%0,0,%2,1) "\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n\
+ stwcx. %1,0,%2\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (tmp)
+ : "r" (token), "r" (&lock->slock)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ return __arch_spin_trylock(lock) == 0;
+}
+
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor. Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held. Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+void splpar_spin_yield(arch_spinlock_t *lock);
+void splpar_rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
+#endif
+
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ else
+ barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_rw_yield(lock);
+ else
+ barrier();
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ while (1) {
+ if (likely(__arch_spin_trylock(lock) == 0))
+ break;
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ } while (unlikely(lock->slock != 0));
+ HMT_medium();
+ }
+}
+
+static inline
+void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+{
+ unsigned long flags_dis;
+
+ while (1) {
+ if (likely(__arch_spin_trylock(lock) == 0))
+ break;
+ local_save_flags(flags_dis);
+ local_irq_restore(flags);
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ } while (unlikely(lock->slock != 0));
+ HMT_medium();
+ local_irq_restore(flags_dis);
+ }
+}
+#define arch_spin_lock_flags arch_spin_lock_flags
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ __asm__ __volatile__("# arch_spin_unlock\n\t"
+ PPC_RELEASE_BARRIER: : :"memory");
+ lock->slock = 0;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#ifdef CONFIG_PPC64
+#define __DO_SIGN_EXTEND "extsw %0,%0\n"
+#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
+#else
+#define __DO_SIGN_EXTEND
+#define WRLOCK_TOKEN (-1)
+#endif
+
+/*
+ * This returns the old value in the lock + 1,
+ * so we got a read lock if the return value is > 0.
+ */
+static inline long __arch_read_trylock(arch_rwlock_t *rw)
+{
+ long tmp;
+
+ __asm__ __volatile__(
+"1: " PPC_LWARX(%0,0,%1,1) "\n"
+ __DO_SIGN_EXTEND
+" addic. %0,%0,1\n\
+ ble- 2f\n"
+" stwcx. %0,0,%1\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:" : "=&r" (tmp)
+ : "r" (&rw->lock)
+ : "cr0", "xer", "memory");
+
+ return tmp;
+}
+
+/*
+ * This returns the old value in the lock,
+ * so we got the write lock if the return value is 0.
+ */
+static inline long __arch_write_trylock(arch_rwlock_t *rw)
+{
+ long tmp, token;
+
+ token = WRLOCK_TOKEN;
+ __asm__ __volatile__(
+"1: " PPC_LWARX(%0,0,%2,1) "\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n"
+" stwcx. %1,0,%2\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:" : "=&r" (tmp)
+ : "r" (token), "r" (&rw->lock)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ while (1) {
+ if (likely(__arch_read_trylock(rw) > 0))
+ break;
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
+ } while (unlikely(rw->lock < 0));
+ HMT_medium();
+ }
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ while (1) {
+ if (likely(__arch_write_trylock(rw) == 0))
+ break;
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
+ } while (unlikely(rw->lock != 0));
+ HMT_medium();
+ }
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ return __arch_read_trylock(rw) > 0;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ return __arch_write_trylock(rw) == 0;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ long tmp;
+
+ __asm__ __volatile__(
+ "# read_unlock\n\t"
+ PPC_RELEASE_BARRIER
+"1: lwarx %0,0,%1\n\
+ addic %0,%0,-1\n"
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r"(tmp)
+ : "r"(&rw->lock)
+ : "cr0", "xer", "memory");
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ __asm__ __volatile__("# write_unlock\n\t"
+ PPC_RELEASE_BARRIER: : :"memory");
+ rw->lock = 0;
+}
+
+#define arch_spin_relax(lock) spin_yield(lock)
+#define arch_read_relax(lock) rw_yield(lock)
+#define arch_write_relax(lock) rw_yield(lock)
+
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
new file mode 100644
index 000000000000..0f3cdd8faa95
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+ volatile unsigned int slock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+
+typedef struct {
+ volatile signed int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 79be9bb10bbb..21357fe05fe0 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -3,290 +3,7 @@
#define __ASM_SPINLOCK_H
#ifdef __KERNEL__
-/*
- * Simple spin lock operations.
- *
- * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
- * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
- * Rework to support virtual processors
- *
- * Type of int is used as a full 64b word is not necessary.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-#include <linux/irqflags.h>
-#include <asm/paravirt.h>
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#endif
-#include <asm/synch.h>
-#include <asm/ppc-opcode.h>
-
-#ifdef CONFIG_PPC64
-/* use 0x800000yy when locked, where yy == CPU number */
-#ifdef __BIG_ENDIAN__
-#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
-#else
-#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
-#endif
-#else
-#define LOCK_TOKEN 1
-#endif
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
- return lock.slock == 0;
-}
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
- smp_mb();
- return !arch_spin_value_unlocked(*lock);
-}
-
-/*
- * This returns the old value in the lock, so we succeeded
- * in getting the lock if the return value is 0.
- */
-static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
-{
- unsigned long tmp, token;
-
- token = LOCK_TOKEN;
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0,0,%2,1) "\n\
- cmpwi 0,%0,0\n\
- bne- 2f\n\
- stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ACQUIRE_BARRIER
-"2:"
- : "=&r" (tmp)
- : "r" (token), "r" (&lock->slock)
- : "cr0", "memory");
-
- return tmp;
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- return __arch_spin_trylock(lock) == 0;
-}
-
-/*
- * On a system with shared processors (that is, where a physical
- * processor is multiplexed between several virtual processors),
- * there is no point spinning on a lock if the holder of the lock
- * isn't currently scheduled on a physical processor. Instead
- * we detect this situation and ask the hypervisor to give the
- * rest of our timeslice to the lock holder.
- *
- * So that we can tell which virtual processor is holding a lock,
- * we put 0x80000000 | smp_processor_id() in the lock when it is
- * held. Conveniently, we have a word in the paca that holds this
- * value.
- */
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-void splpar_spin_yield(arch_spinlock_t *lock);
-void splpar_rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
-static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
-#endif
-
-static inline void spin_yield(arch_spinlock_t *lock)
-{
- if (is_shared_processor())
- splpar_spin_yield(lock);
- else
- barrier();
-}
-
-static inline void rw_yield(arch_rwlock_t *lock)
-{
- if (is_shared_processor())
- splpar_rw_yield(lock);
- else
- barrier();
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- while (1) {
- if (likely(__arch_spin_trylock(lock) == 0))
- break;
- do {
- HMT_low();
- if (is_shared_processor())
- splpar_spin_yield(lock);
- } while (unlikely(lock->slock != 0));
- HMT_medium();
- }
-}
-
-static inline
-void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
- unsigned long flags_dis;
-
- while (1) {
- if (likely(__arch_spin_trylock(lock) == 0))
- break;
- local_save_flags(flags_dis);
- local_irq_restore(flags);
- do {
- HMT_low();
- if (is_shared_processor())
- splpar_spin_yield(lock);
- } while (unlikely(lock->slock != 0));
- HMT_medium();
- local_irq_restore(flags_dis);
- }
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- __asm__ __volatile__("# arch_spin_unlock\n\t"
- PPC_RELEASE_BARRIER: : :"memory");
- lock->slock = 0;
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-#ifdef CONFIG_PPC64
-#define __DO_SIGN_EXTEND "extsw %0,%0\n"
-#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
-#else
-#define __DO_SIGN_EXTEND
-#define WRLOCK_TOKEN (-1)
-#endif
-
-/*
- * This returns the old value in the lock + 1,
- * so we got a read lock if the return value is > 0.
- */
-static inline long __arch_read_trylock(arch_rwlock_t *rw)
-{
- long tmp;
-
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0,0,%1,1) "\n"
- __DO_SIGN_EXTEND
-" addic. %0,%0,1\n\
- ble- 2f\n"
-" stwcx. %0,0,%1\n\
- bne- 1b\n"
- PPC_ACQUIRE_BARRIER
-"2:" : "=&r" (tmp)
- : "r" (&rw->lock)
- : "cr0", "xer", "memory");
-
- return tmp;
-}
-
-/*
- * This returns the old value in the lock,
- * so we got the write lock if the return value is 0.
- */
-static inline long __arch_write_trylock(arch_rwlock_t *rw)
-{
- long tmp, token;
-
- token = WRLOCK_TOKEN;
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0,0,%2,1) "\n\
- cmpwi 0,%0,0\n\
- bne- 2f\n"
-" stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ACQUIRE_BARRIER
-"2:" : "=&r" (tmp)
- : "r" (token), "r" (&rw->lock)
- : "cr0", "memory");
-
- return tmp;
-}
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
- while (1) {
- if (likely(__arch_read_trylock(rw) > 0))
- break;
- do {
- HMT_low();
- if (is_shared_processor())
- splpar_rw_yield(rw);
- } while (unlikely(rw->lock < 0));
- HMT_medium();
- }
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
- while (1) {
- if (likely(__arch_write_trylock(rw) == 0))
- break;
- do {
- HMT_low();
- if (is_shared_processor())
- splpar_rw_yield(rw);
- } while (unlikely(rw->lock != 0));
- HMT_medium();
- }
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
- return __arch_read_trylock(rw) > 0;
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
- return __arch_write_trylock(rw) == 0;
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
- long tmp;
-
- __asm__ __volatile__(
- "# read_unlock\n\t"
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%1\n\
- addic %0,%0,-1\n"
-" stwcx. %0,0,%1\n\
- bne- 1b"
- : "=&r"(tmp)
- : "r"(&rw->lock)
- : "cr0", "xer", "memory");
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
- __asm__ __volatile__("# write_unlock\n\t"
- PPC_RELEASE_BARRIER: : :"memory");
- rw->lock = 0;
-}
-
-#define arch_spin_relax(lock) spin_yield(lock)
-#define arch_read_relax(lock) rw_yield(lock)
-#define arch_write_relax(lock) rw_yield(lock)
-
-/* See include/linux/spinlock.h */
-#define smp_mb__after_spinlock() smp_mb()
+#include <asm/simple_spinlock.h>
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 87adaf13b7e8..3906f52dae65 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -6,16 +6,6 @@
# error "please don't include this file directly"
#endif
-typedef struct {
- volatile unsigned int slock;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
-
-typedef struct {
- volatile signed int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+#include <asm/simple_spinlock_types.h>
#endif
--
2.23.0
^ permalink raw reply related
* [PATCH v4 3/6] powerpc/64s: implement queued spinlocks and rwlocks
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
In-Reply-To: <20200724131423.1362108-1-npiggin@gmail.com>
These have shown significantly improved performance and fairness when
spinlock contention is moderate to high on very large systems.
With this series including subsequent patches, on a 16 socket 1536 thread
POWER9, a stress test such as same-file open/close from all CPUs gets big
speedups, 11620op/s aggregate with simple spinlocks vs 384158op/s (33x
faster), where the difference in throughput between the fastest and slowest
thread goes from 7x to 1.4x.
Thanks to the fast path being identical in terms of atomics and barriers
(after a subsequent optimisation patch), single threaded performance is not
changed (no measurable difference).
On smaller systems, performance and fairness seems to be generally improved.
Using dbench on tmpfs as a test (that starts to run into kernel spinlock
contention), a 2-socket OpenPOWER POWER9 system was tested with bare metal
and KVM guest configurations. Results can be found here:
https://github.com/linuxppc/issues/issues/305#issuecomment-663487453
Observations are:
- Queued spinlocks are equal when contention is insignificant, as expected
and as measured with microbenchmarks.
- When there is contention, on bare metal queued spinlocks have better
throughput and max latency at all points.
- When virtualised, queued spinlocks are slightly worse approaching peak
throughput, but significantly better throughput and max latency at all
points beyond peak, until queued spinlock maximum latency rises when
clients are 2x vCPUs.
The regressions haven't been analysed very well yet, there are a lot of
things that can be tuned, particularly the paravirtualised locking, but the
numbers already look like a good net win even on relatively small systems.
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 15 ++++++++++++++
arch/powerpc/include/asm/Kbuild | 1 +
arch/powerpc/include/asm/qspinlock.h | 25 +++++++++++++++++++++++
arch/powerpc/include/asm/spinlock.h | 5 +++++
arch/powerpc/include/asm/spinlock_types.h | 5 +++++
arch/powerpc/lib/Makefile | 3 +++
include/asm-generic/qspinlock.h | 2 ++
7 files changed, 56 insertions(+)
create mode 100644 arch/powerpc/include/asm/qspinlock.h
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..641946052d67 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -145,6 +145,8 @@ config PPC
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
+ select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS
+ select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
@@ -490,6 +492,19 @@ config HOTPLUG_CPU
Say N if you are unsure.
+config PPC_QUEUED_SPINLOCKS
+ bool "Queued spinlocks"
+ depends on SMP
+ help
+ Say Y here to use to use queued spinlocks which give better
+ scalability and fairness on large SMP and NUMA systems without
+ harming single threaded performance.
+
+ This option is currently experimental, the code is more complex
+ and less tested so it defaults to "N" for the moment.
+
+ If unsure, say "N".
+
config ARCH_CPU_PROBE_RELEASE
def_bool y
depends on HOTPLUG_CPU
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index dadbcf3a0b1e..27c2268dfd6c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -6,5 +6,6 @@ generated-y += syscall_table_spu.h
generic-y += export.h
generic-y += local64.h
generic-y += mcs_spinlock.h
+generic-y += qrwlock.h
generic-y += vtime.h
generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 000000000000..c49e33e24edd
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */
+
+#define smp_mb__after_spinlock() smp_mb()
+
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+ /*
+ * This barrier was added to simple spinlocks by commit 51d7d5205d338,
+ * but it should now be possible to remove it, asm arm64 has done with
+ * commit c6f5d02b6a0f.
+ */
+ smp_mb();
+ return atomic_read(&lock->val);
+}
+#define queued_spin_is_locked queued_spin_is_locked
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 21357fe05fe0..434615f1d761 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -3,7 +3,12 @@
#define __ASM_SPINLOCK_H
#ifdef __KERNEL__
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
+#else
#include <asm/simple_spinlock.h>
+#endif
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 3906f52dae65..c5d742f18021 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -6,6 +6,11 @@
# error "please don't include this file directly"
#endif
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+#else
#include <asm/simple_spinlock_types.h>
+#endif
#endif
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5e994cda8e40..d66a645503eb 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -41,7 +41,10 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o memcpy_mcsafe_64.o
+ifndef CONFIG_PPC_QUEUED_SPINLOCKS
obj64-$(CONFIG_SMP) += locks.o
+endif
+
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
test_emulate_step_exec_instr.o
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index fde943d180e0..fb0a814d4395 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -12,6 +12,7 @@
#include <asm-generic/qspinlock_types.h>
+#ifndef queued_spin_is_locked
/**
* queued_spin_is_locked - is the spinlock locked?
* @lock: Pointer to queued spinlock structure
@@ -25,6 +26,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
*/
return atomic_read(&lock->val);
}
+#endif
/**
* queued_spin_value_unlocked - is the spinlock structure unlocked?
--
2.23.0
^ permalink raw reply related
* [PATCH v4 4/6] powerpc/pseries: implement paravirt qspinlocks for SPLPAR
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
In-Reply-To: <20200724131423.1362108-1-npiggin@gmail.com>
This implements the generic paravirt qspinlocks using H_PROD and H_CONFER to
kick and wait.
This uses an un-directed yield to any CPU rather than the directed yield to
a pre-empted lock holder that paravirtualised simple spinlocks use, that
requires no kick hcall. This is something that could be investigated and
improved in future.
Performance results can be found in the commit which added queued spinlocks.
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/paravirt.h | 28 ++++++++
arch/powerpc/include/asm/qspinlock.h | 66 +++++++++++++++++++
arch/powerpc/include/asm/qspinlock_paravirt.h | 7 ++
arch/powerpc/include/asm/spinlock.h | 4 ++
arch/powerpc/platforms/pseries/Kconfig | 9 ++-
arch/powerpc/platforms/pseries/setup.c | 4 +-
include/asm-generic/qspinlock.h | 2 +
7 files changed, 118 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index 339e8533464b..21e5f29ca251 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -28,6 +28,16 @@ static inline void yield_to_preempted(int cpu, u32 yield_count)
{
plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
}
+
+static inline void prod_cpu(int cpu)
+{
+ plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+}
+
+static inline void yield_to_any(void)
+{
+ plpar_hcall_norets(H_CONFER, -1, 0);
+}
#else
static inline bool is_shared_processor(void)
{
@@ -44,6 +54,19 @@ static inline void yield_to_preempted(int cpu, u32 yield_count)
{
___bad_yield_to_preempted(); /* This would be a bug */
}
+
+extern void ___bad_yield_to_any(void);
+static inline void yield_to_any(void)
+{
+ ___bad_yield_to_any(); /* This would be a bug */
+}
+
+extern void ___bad_prod_cpu(void);
+static inline void prod_cpu(int cpu)
+{
+ ___bad_prod_cpu(); /* This would be a bug */
+}
+
#endif
#define vcpu_is_preempted vcpu_is_preempted
@@ -56,4 +79,9 @@ static inline bool vcpu_is_preempted(int cpu)
return false;
}
+static inline bool pv_is_native_spin_unlock(void)
+{
+ return !is_shared_processor();
+}
+
#endif /* _ASM_POWERPC_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index c49e33e24edd..f5066f00a08c 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -3,9 +3,47 @@
#define _ASM_POWERPC_QSPINLOCK_H
#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+ if (!is_shared_processor())
+ native_queued_spin_lock_slowpath(lock, val);
+ else
+ __pv_queued_spin_lock_slowpath(lock, val);
+}
+
+#define queued_spin_unlock queued_spin_unlock
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ if (!is_shared_processor())
+ smp_store_release(&lock->locked, 0);
+ else
+ __pv_queued_spin_unlock(lock);
+}
+
+#else
+extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+#endif
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
+{
+ u32 val = 0;
+
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+ return;
+
+ queued_spin_lock_slowpath(lock, val);
+}
+#define queued_spin_lock queued_spin_lock
+
#define smp_mb__after_spinlock() smp_mb()
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
@@ -20,6 +58,34 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
}
#define queued_spin_is_locked queued_spin_is_locked
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#define SPIN_THRESHOLD (1<<15) /* not tuned */
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+ if (*ptr != val)
+ return;
+ yield_to_any();
+ /*
+ * We could pass in a CPU here if waiting in the queue and yield to
+ * the previous CPU in the queue.
+ */
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+ prod_cpu(cpu);
+}
+
+extern void __pv_init_lock_hash(void);
+
+static inline void pv_spinlocks_init(void)
+{
+ __pv_init_lock_hash();
+}
+
+#endif
+
#include <asm-generic/qspinlock.h>
#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000000000000..6b60e7736a47
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
+#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
+
+EXPORT_SYMBOL(__pv_queued_spin_unlock);
+
+#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 434615f1d761..6ec72282888d 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -10,5 +10,9 @@
#include <asm/simple_spinlock.h>
#endif
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void pv_spinlocks_init(void) { }
+#endif
+
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 24c18362e5ea..5e037df2a3a1 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -25,15 +25,22 @@ config PPC_PSERIES
select SWIOTLB
default y
+config PARAVIRT_SPINLOCKS
+ bool
+
config PPC_SPLPAR
- depends on PPC_PSERIES
bool "Support for shared-processor logical partitions"
+ depends on PPC_PSERIES
+ select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+ default y
help
Enabling this option will make the kernel run more efficiently
on logically-partitioned pSeries systems which use shared
processors, that is, which share physical processors between
two or more partitions.
+ Say Y if you are unsure.
+
config DTL
bool "Dispatch Trace Log"
depends on PPC_SPLPAR && DEBUG_FS
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2db8469e475f..fa847d1f9d54 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -771,8 +771,10 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
- if (lppaca_shared_proc(get_lppaca()))
+ if (lppaca_shared_proc(get_lppaca())) {
static_branch_enable(&shared_processor);
+ pv_spinlocks_init();
+ }
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index fb0a814d4395..38ca14e79a86 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -69,6 +69,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock)
extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+#ifndef queued_spin_lock
/**
* queued_spin_lock - acquire a queued spinlock
* @lock: Pointer to queued spinlock structure
@@ -82,6 +83,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
queued_spin_lock_slowpath(lock, val);
}
+#endif
#ifndef queued_spin_unlock
/**
--
2.23.0
^ permalink raw reply related
* [PATCH v4 5/6] powerpc/qspinlock: optimised atomic_try_cmpxchg_lock that adds the lock hint
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
In-Reply-To: <20200724131423.1362108-1-npiggin@gmail.com>
This brings the behaviour of the uncontended fast path back to roughly
equivalent to simple spinlocks -- a single atomic op with lock hint.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/atomic.h | 28 ++++++++++++++++++++++++++++
arch/powerpc/include/asm/qspinlock.h | 2 +-
2 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 498785ffc25f..f6a3d145ffb7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -193,6 +193,34 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+/*
+ * Don't want to override the generic atomic_try_cmpxchg_acquire, because
+ * we add a lock hint to the lwarx, which may not be wanted for the
+ * _acquire case (and is not used by the other _acquire variants so it
+ * would be a surprise).
+ */
+static __always_inline bool
+atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
+{
+ int r, o = *old;
+
+ __asm__ __volatile__ (
+"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_acquire \n"
+" cmpw 0,%0,%3 \n"
+" bne- 2f \n"
+" stwcx. %4,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"2: \n"
+ : "=&r" (r), "+m" (v->counter)
+ : "r" (&v->counter), "r" (o), "r" (new)
+ : "cr0", "memory");
+
+ if (unlikely(r != o))
+ *old = r;
+ return likely(r == o);
+}
+
/**
* atomic_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic_t
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index f5066f00a08c..b752d34517b3 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -37,7 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val = 0;
- if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+ if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
return;
queued_spin_lock_slowpath(lock, val);
--
2.23.0
^ permalink raw reply related
* [PATCH v4 6/6] powerpc: implement smp_cond_load_relaxed
From: Nicholas Piggin @ 2020-07-24 13:14 UTC (permalink / raw)
To: linuxppc-dev
Cc: linux-arch, Michal Suchánek, Peter Zijlstra, Boqun Feng,
linux-kernel, Nicholas Piggin, virtualization, Ingo Molnar,
kvm-ppc, Waiman Long, Will Deacon
In-Reply-To: <20200724131423.1362108-1-npiggin@gmail.com>
This implements smp_cond_load_relaed with the slowpath busy loop using the
preferred SMT priority pattern.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/barrier.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 123adcefd40f..9b4671d38674 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -76,6 +76,20 @@ do { \
___p1; \
})
+#define smp_cond_load_relaxed(ptr, cond_expr) ({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ VAL = READ_ONCE(*__PTR); \
+ if (unlikely(!(cond_expr))) { \
+ spin_begin(); \
+ do { \
+ VAL = READ_ONCE(*__PTR); \
+ } while (!(cond_expr)); \
+ spin_end(); \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
#ifdef CONFIG_PPC_BOOK3S_64
#define NOSPEC_BARRIER_SLOT nop
#elif defined(CONFIG_PPC_FSL_BOOK3E)
--
2.23.0
^ permalink raw reply related
* [PATCH] powerpc/sstep: Fix incorrect CONFIG symbol in scv handling
From: Michael Ellerman @ 2020-07-24 13:16 UTC (permalink / raw)
To: linuxppc-dev; +Cc: christophe.leroy
When I "fixed" the ppc64e build in Nick's recent patch, I typoed the
CONFIG symbol, resulting in one that doesn't exist. Fix it to use the
correct symbol.
Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/lib/sstep.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 4194119eff82..c58ea9e787cb 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -3382,7 +3382,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
regs->msr = MSR_KERNEL;
return 1;
-#ifdef CONFIG_PPC64_BOOK3S
+#ifdef CONFIG_PPC_BOOK3S_64
case SYSCALL_VECTORED_0: /* scv 0 */
regs->gpr[9] = regs->gpr[13];
regs->gpr[10] = MSR_KERNEL;
--
2.25.1
^ permalink raw reply related
* [PATCH 1/9] powerpc/configs: Drop old symbols from ppc6xx_defconfig
From: Michael Ellerman @ 2020-07-24 13:17 UTC (permalink / raw)
To: linuxppc-dev
ppc6xx_defconfig refers to quite a few symbols that no longer exist,
as reported by scripts/checkkconfigsymbols.py, remove them.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/configs/ppc6xx_defconfig | 39 ---------------------------
1 file changed, 39 deletions(-)
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index feb5d47d8d1e..5e6f92ba3210 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -53,7 +53,6 @@ CONFIG_MPC836x_MDS=y
CONFIG_MPC836x_RDK=y
CONFIG_MPC837x_MDS=y
CONFIG_MPC837x_RDB=y
-CONFIG_SBC834x=y
CONFIG_ASP834x=y
CONFIG_PPC_86xx=y
CONFIG_MPC8641_HPCN=y
@@ -187,7 +186,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -203,7 +201,6 @@ CONFIG_IP_NF_SECURITY=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -241,7 +238,6 @@ CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
CONFIG_IP_DCCP=m
-CONFIG_NET_DCCPPROBE=m
CONFIG_TIPC=m
CONFIG_ATM=m
CONFIG_ATM_CLIP=m
@@ -251,7 +247,6 @@ CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_DECNET=m
CONFIG_DECNET_ROUTER=y
-CONFIG_IPX=m
CONFIG_ATALK=m
CONFIG_DEV_APPLETALK=m
CONFIG_IPDDP=m
@@ -297,26 +292,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_IRDA=m
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-CONFIG_IRTTY_SIR=m
-CONFIG_KINGSUN_DONGLE=m
-CONFIG_KSDAZZLE_DONGLE=m
-CONFIG_KS959_DONGLE=m
-CONFIG_USB_IRDA=m
-CONFIG_SIGMATEL_FIR=m
-CONFIG_NSC_FIR=m
-CONFIG_WINBOND_FIR=m
-CONFIG_TOSHIBA_FIR=m
-CONFIG_SMC_IRCC_FIR=m
-CONFIG_ALI_FIR=m
-CONFIG_VLSI_FIR=m
-CONFIG_VIA_FIR=m
-CONFIG_MCS_FIR=m
CONFIG_BT=m
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
@@ -332,7 +307,6 @@ CONFIG_BT_HCIBFUSB=m
CONFIG_BT_HCIDTL1=m
CONFIG_BT_HCIBT3C=m
CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIBTUART=m
CONFIG_BT_HCIVHCI=m
CONFIG_CFG80211=m
CONFIG_MAC80211=m
@@ -366,7 +340,6 @@ CONFIG_EEPROM_93CX6=m
CONFIG_RAID_ATTRS=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=m
@@ -663,7 +636,6 @@ CONFIG_I2C_MPC=m
CONFIG_I2C_PCA_PLATFORM=m
CONFIG_I2C_SIMTEC=m
CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
CONFIG_I2C_TINY_USB=m
CONFIG_I2C_PCA_ISA=m
CONFIG_I2C_STUB=m
@@ -676,7 +648,6 @@ CONFIG_W1_SLAVE_THERM=m
CONFIG_W1_SLAVE_SMEM=m
CONFIG_W1_SLAVE_DS2433=m
CONFIG_W1_SLAVE_DS2433_CRC=y
-CONFIG_W1_SLAVE_DS2760=m
CONFIG_APM_POWER=m
CONFIG_BATTERY_PMU=m
CONFIG_HWMON=m
@@ -1065,15 +1036,6 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
CONFIG_CIFS_DFS_UPCALL=y
-CONFIG_NCP_FS=m
-CONFIG_NCPFS_PACKET_SIGNING=y
-CONFIG_NCPFS_IOCTL_LOCKING=y
-CONFIG_NCPFS_STRONG=y
-CONFIG_NCPFS_NFS_NS=y
-CONFIG_NCPFS_OS2_NS=y
-CONFIG_NCPFS_SMALLDOS=y
-CONFIG_NCPFS_NLS=y
-CONFIG_NCPFS_EXTRAS=y
CONFIG_CODA_FS=m
CONFIG_9P_FS=m
CONFIG_NLS_DEFAULT="utf8"
@@ -1117,7 +1079,6 @@ CONFIG_NLS_KOI8_U=m
CONFIG_DEBUG_INFO=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_OBJECTS=y
--
2.25.1
^ permalink raw reply related
* [PATCH 2/9] powerpc/configs: Remove dead symbols
From: Michael Ellerman @ 2020-07-24 13:17 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20200724131728.1643966-1-mpe@ellerman.id.au>
Remove references to symbols that no longer exist as reported by
scripts/checkkconfigsymbols.py.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/configs/44x/akebono_defconfig | 1 -
arch/powerpc/configs/85xx/xes_mpc85xx_defconfig | 3 ---
arch/powerpc/configs/86xx-hw.config | 2 --
arch/powerpc/configs/fsl-emb-nonhw.config | 1 -
arch/powerpc/configs/g5_defconfig | 1 -
arch/powerpc/configs/linkstation_defconfig | 1 -
arch/powerpc/configs/mpc512x_defconfig | 1 -
arch/powerpc/configs/mpc83xx_defconfig | 1 -
arch/powerpc/configs/mvme5100_defconfig | 1 -
arch/powerpc/configs/pasemi_defconfig | 1 -
arch/powerpc/configs/pmac32_defconfig | 8 --------
arch/powerpc/configs/powernv_defconfig | 1 -
arch/powerpc/configs/ppc40x_defconfig | 3 ---
arch/powerpc/configs/ppc64_defconfig | 1 -
arch/powerpc/configs/pseries_defconfig | 1 -
15 files changed, 27 deletions(-)
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 60d5fa2c3b93..3894ba8f8ffc 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -56,7 +56,6 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
CONFIG_IBM_EMAC=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index d50aca608736..3a6381aa9fdc 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -51,9 +51,6 @@ CONFIG_NET_IPIP=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_MTD=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config
index 151164cf8cb3..0cb24b33c88e 100644
--- a/arch/powerpc/configs/86xx-hw.config
+++ b/arch/powerpc/configs/86xx-hw.config
@@ -32,8 +32,6 @@ CONFIG_HW_RANDOM=y
CONFIG_HZ_1000=y
CONFIG_I2C_MPC=y
CONFIG_I2C=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
CONFIG_INPUT_FF_MEMLESS=m
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSEDEV is not set
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
index 3c7dad19a691..df37efed0aec 100644
--- a/arch/powerpc/configs/fsl-emb-nonhw.config
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -56,7 +56,6 @@ CONFIG_IKCONFIG=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MROUTE=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index a68c7f3af10e..1c674c4c1d86 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -51,7 +51,6 @@ CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_IRC=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index ea59f3d146df..d4be64f190ff 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -37,7 +37,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index e39346b3dc3b..e75d3f3060c9 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -47,7 +47,6 @@ CONFIG_MTD_UBI=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=1
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_BLK_DEV_RAM_DAX=y
CONFIG_EEPROM_AT24=y
CONFIG_EEPROM_AT25=y
CONFIG_SCSI=y
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index be125729635c..95d43f8a3869 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -19,7 +19,6 @@ CONFIG_MPC836x_MDS=y
CONFIG_MPC836x_RDK=y
CONFIG_MPC837x_MDS=y
CONFIG_MPC837x_RDB=y
-CONFIG_SBC834x=y
CONFIG_ASP834x=y
CONFIG_QE_GPIO=y
CONFIG_MATH_EMULATION=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 3d53d69ed36c..1fed6be95d53 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -45,7 +45,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 08b7f4cef243..af9af03059e4 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -58,7 +58,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_EEPROM_LEGACY=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
-CONFIG_CHR_DEV_OSST=y
CONFIG_BLK_DEV_SR=y
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 05e325ca3fbd..665a8d7cded0 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -75,7 +75,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -90,13 +89,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_IP_DCCP=m
-CONFIG_IRDA=m
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-CONFIG_IRTTY_SIR=m
CONFIG_BT=m
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index afc0dd73a1e6..cf30fc24413b 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -245,7 +245,6 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_CXGB3=m
CONFIG_INFINIBAND_CXGB4=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_INFINIBAND_IPOIB=m
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index 25f6c91e843a..7e48693775f4 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 8d7e3e98856d..48759656a067 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -268,7 +268,6 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_CXGB3=m
CONFIG_INFINIBAND_CXGB4=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_INFINIBAND_IPOIB=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 894e8d85fb48..efd5398928d5 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -223,7 +223,6 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_CXGB3=m
CONFIG_INFINIBAND_CXGB4=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_INFINIBAND_IPOIB=m
--
2.25.1
^ permalink raw reply related
* [PATCH 4/9] powerpc/64e: Drop dead BOOK3E_MMU_TLB_STATS code
From: Michael Ellerman @ 2020-07-24 13:17 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20200724131728.1643966-1-mpe@ellerman.id.au>
This code was merged 11 years ago in commit 13363ab9b9d0 ("powerpc:
Add definitions used by exception handling on 64-bit Book3E") but was
never able to be built because CONFIG_BOOK3E_MMU_TLB_STATS never
existed. Remove it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/include/asm/exception-64e.h | 53 +-----------------------
arch/powerpc/mm/nohash/tlb_low_64e.S | 47 ++-------------------
2 files changed, 4 insertions(+), 96 deletions(-)
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index 72b6657acd2d..40cdcb2fb057 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -66,14 +66,7 @@
#define EX_TLB_SRR0 (10 * 8)
#define EX_TLB_SRR1 (11 * 8)
#define EX_TLB_R7 (12 * 8)
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-#define EX_TLB_R8 (13 * 8)
-#define EX_TLB_R9 (14 * 8)
-#define EX_TLB_LR (15 * 8)
-#define EX_TLB_SIZE (16 * 8)
-#else
#define EX_TLB_SIZE (13 * 8)
-#endif
#define START_EXCEPTION(label) \
.globl exc_##label##_book3e; \
@@ -110,8 +103,7 @@ exc_##label##_book3e:
std r11,EX_TLB_R12(r12); \
mtspr SPRN_SPRG_TLB_EXFRAME,r14; \
std r15,EX_TLB_SRR1(r12); \
- std r16,EX_TLB_SRR0(r12); \
- TLB_MISS_PROLOG_STATS
+ std r16,EX_TLB_SRR0(r12);
/* And these are the matching epilogs that restores things
*
@@ -143,7 +135,6 @@ exc_##label##_book3e:
mtspr SPRN_SRR0,r15; \
ld r15,EX_TLB_R15(r12); \
mtspr SPRN_SRR1,r16; \
- TLB_MISS_RESTORE_STATS \
ld r16,EX_TLB_R16(r12); \
ld r12,EX_TLB_R12(r12); \
@@ -158,48 +149,6 @@ exc_##label##_book3e:
addi r11,r13,PACA_EXTLB; \
TLB_MISS_RESTORE(r11)
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-#define TLB_MISS_PROLOG_STATS \
- mflr r10; \
- std r8,EX_TLB_R8(r12); \
- std r9,EX_TLB_R9(r12); \
- std r10,EX_TLB_LR(r12);
-#define TLB_MISS_RESTORE_STATS \
- ld r16,EX_TLB_LR(r12); \
- ld r9,EX_TLB_R9(r12); \
- ld r8,EX_TLB_R8(r12); \
- mtlr r16;
-#define TLB_MISS_STATS_D(name) \
- addi r9,r13,MMSTAT_DSTATS+name; \
- bl tlb_stat_inc;
-#define TLB_MISS_STATS_I(name) \
- addi r9,r13,MMSTAT_ISTATS+name; \
- bl tlb_stat_inc;
-#define TLB_MISS_STATS_X(name) \
- ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \
- cmpdi cr2,r8,-1; \
- beq cr2,61f; \
- addi r9,r13,MMSTAT_DSTATS+name; \
- b 62f; \
-61: addi r9,r13,MMSTAT_ISTATS+name; \
-62: bl tlb_stat_inc;
-#define TLB_MISS_STATS_SAVE_INFO \
- std r14,EX_TLB_ESR(r12); /* save ESR */
-#define TLB_MISS_STATS_SAVE_INFO_BOLTED \
- std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */
-#else
-#define TLB_MISS_PROLOG_STATS
-#define TLB_MISS_RESTORE_STATS
-#define TLB_MISS_PROLOG_STATS_BOLTED
-#define TLB_MISS_RESTORE_STATS_BOLTED
-#define TLB_MISS_STATS_D(name)
-#define TLB_MISS_STATS_I(name)
-#define TLB_MISS_STATS_X(name)
-#define TLB_MISS_STATS_Y(name)
-#define TLB_MISS_STATS_SAVE_INFO
-#define TLB_MISS_STATS_SAVE_INFO_BOLTED
-#endif
-
#define SET_IVOR(vector_number, vector_offset) \
LOAD_REG_ADDR(r3,interrupt_base_book3e);\
ori r3,r3,vector_offset@l; \
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index d5e2704d0096..bf24451f3e71 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -71,7 +71,6 @@ START_BTB_FLUSH_SECTION
END_BTB_FLUSH_SECTION
std r7,EX_TLB_R7(r12)
#endif
- TLB_MISS_PROLOG_STATS
.endm
.macro tlb_epilog_bolted
@@ -85,7 +84,6 @@ END_BTB_FLUSH_SECTION
mtcr r14
ld r14,EX_TLB_R14(r12)
ld r15,EX_TLB_R15(r12)
- TLB_MISS_RESTORE_STATS
ld r16,EX_TLB_R16(r12)
mfspr r12,SPRN_SPRG_GEN_SCRATCH
.endm
@@ -128,7 +126,6 @@ END_BTB_FLUSH_SECTION
ori r10,r10,_PAGE_PRESENT
oris r11,r10,_PAGE_ACCESSED@h
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_bolted
tlb_miss_common_bolted:
@@ -209,7 +206,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
tlbwe
tlb_miss_done_bolted:
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
@@ -229,11 +225,9 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
bne itlb_miss_fault_bolted
dtlb_miss_fault_bolted:
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_data_storage_book3e
itlb_miss_fault_bolted:
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_instruction_storage_book3e
@@ -243,7 +237,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
srdi r15,r16,60 /* get region */
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne- itlb_miss_fault_bolted
li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
@@ -276,7 +269,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
srdi. r15,r16,60 /* get region */
ori r16,r16,1
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_e6500 /* user/kernel test */
b tlb_miss_common_e6500
@@ -288,7 +280,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
srdi. r15,r16,60 /* get region */
rldicr r16,r16,0,62
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_e6500 /* user vs kernel check */
/*
@@ -460,7 +451,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
.endm
tlb_unlock_e6500
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
@@ -519,11 +509,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
andi. r16,r16,1
bne itlb_miss_fault_e6500
dtlb_miss_fault_e6500:
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_data_storage_book3e
itlb_miss_fault_e6500:
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_instruction_storage_book3e
#endif /* CONFIG_PPC_FSL_BOOK3E */
@@ -548,7 +536,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
mfspr r16,SPRN_DEAR /* get faulting address */
srdi r15,r16,60 /* get region */
cmpldi cr0,r15,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* The page tables are mapped virtually linear. At this point, though,
@@ -600,7 +587,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
/* We got a crappy address, just fault with whatever DEAR and ESR
* are here
*/
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
@@ -624,7 +610,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
*/
srdi r15,r16,60 /* get region */
cmpldi cr0,r15,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* We do the user/kernel test for the PID here along with the RW test
@@ -646,7 +631,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
beq+ normal_tlb_miss
/* We got a crappy address, just fault */
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
@@ -745,7 +729,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
* level 0 and just going back to userland. They are only needed
* if you are going to take an access fault
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -757,11 +740,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
ld r15,EX_TLB_ESR(r12)
mtspr SPRN_DEAR,r14
mtspr SPRN_ESR,r15
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
@@ -899,7 +880,6 @@ BEGIN_MMU_FTR_SECTION
1:
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
/* Return to caller, normal case */
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -935,18 +915,15 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
beq 1f
mtspr SPRN_DEAR,r15
mtspr SPRN_ESR,r16
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
virt_page_table_tlb_miss_whacko_fault:
/* The linear fault will restart everything so ESR and DEAR will
* not have been clobbered, let's just fault with what we have
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
@@ -971,7 +948,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
mfspr r16,SPRN_DEAR /* get faulting address */
srdi r11,r16,60 /* get region */
cmpldi cr0,r11,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* We do the user/kernel test for the PID here along with the RW test
@@ -991,7 +967,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
/* We got a crappy address, just fault with whatever DEAR and ESR
* are here
*/
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
@@ -1015,7 +990,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
*/
srdi r11,r16,60 /* get region */
cmpldi cr0,r11,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* We do the user/kernel test for the PID here along with the RW test
@@ -1033,7 +1007,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
beq+ htw_tlb_miss
/* We got a crappy address, just fault */
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
@@ -1130,7 +1103,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
* level 0 and just going back to userland. They are only needed
* if you are going to take an access fault
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -1142,11 +1114,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
beq 1f
mtspr SPRN_DEAR,r16
mtspr SPRN_ESR,r14
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
/*
@@ -1221,7 +1191,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
* We do that because we can't resume a fault within a TLB
* miss handler, due to MAS and TLB reservation being clobbered.
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
TLB_MISS_EPILOG_ERROR
rfi
@@ -1233,13 +1202,3 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
b exc_data_storage_book3e
1: TLB_MISS_EPILOG_ERROR_SPECIAL
b exc_instruction_storage_book3e
-
-
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-.tlb_stat_inc:
-1: ldarx r8,0,r9
- addi r8,r8,1
- stdcx. r8,0,r9
- bne- 1b
- blr
-#endif
--
2.25.1
^ permalink raw reply related
* [PATCH 3/9] powerpc/52xx: Fix comment about CONFIG_BDI*
From: Michael Ellerman @ 2020-07-24 13:17 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20200724131728.1643966-1-mpe@ellerman.id.au>
There's a comment in lite5200_sleep.S that refers to "CONFIG_BDI*".
This confuses scripts/checkkconfigsymbols.py, which thinks it should
be able to find CONFIG_BDI.
Change the comment to refer to CONFIG_BDI_SWITCH which is presumably
roughly what it was referring to. AFAICS there never has been a
CONFIG_BDI.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
If anyone has a better idea what it means feel free to reply.
---
arch/powerpc/platforms/52xx/lite5200_sleep.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 70083649c9ea..11475c58ea43 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -56,7 +56,7 @@
/*
* save stuff BDI overwrites
* 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
- * even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?))
+ * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?))
* WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
* possibly because BDI sets SDRAM registers before wakeup code does
*/
--
2.25.1
^ permalink raw reply related
* [PATCH 6/9] powerpc/32s: Remove TAUException wart in traps.c
From: Michael Ellerman @ 2020-07-24 13:17 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20200724131728.1643966-1-mpe@ellerman.id.au>
All 32 and 64-bit builds that don't have CONFIG_TAU_INT enabled (all
of them), get a definition of TAUException() in traps.c.
On 64-bit it's completely useless, and just wastes ~120 bytes of text.
On 32-bit it allows the kernel to link because head_32.S calls it
unconditionally.
Instead follow the example of altivec_assist_exception(), and if
CONFIG_TAU_INT is not enabled just point it at unknown_exception using
the preprocessor.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
Can we just remove TAU_INT entirely? It's in zero defconfigs and
doesn't sound like something anyone really wants to enable:
However, on some cpus it appears that the TAU interrupt hardware
is buggy and can cause a situation which would lead unexplained hard
lockups.
Unless you are extending the TAU driver, or enjoy kernel/hardware
debugging, leave this option off.
---
arch/powerpc/kernel/head_32.S | 4 ++++
arch/powerpc/kernel/traps.c | 8 --------
2 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 705c042309d8..dcfb7dceb6d6 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -671,6 +671,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#ifndef CONFIG_ALTIVEC
#define altivec_assist_exception unknown_exception
+#endif
+
+#ifndef CONFIG_TAU_INT
+#define TAUException unknown_exception
#endif
EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 97413a385720..d1ebe152f210 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2060,14 +2060,6 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
NOKPROBE_SYMBOL(DebugException);
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-#if !defined(CONFIG_TAU_INT)
-void TAUException(struct pt_regs *regs)
-{
- printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n",
- regs->nip, regs->msr, regs->trap, print_tainted());
-}
-#endif /* CONFIG_INT_TAU */
-
#ifdef CONFIG_ALTIVEC
void altivec_assist_exception(struct pt_regs *regs)
{
--
2.25.1
^ permalink raw reply related
* [PATCH 5/9] powerpc/32s: Fix CONFIG_BOOK3S_601 uses
From: Michael Ellerman @ 2020-07-24 13:17 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20200724131728.1643966-1-mpe@ellerman.id.au>
We have two uses of CONFIG_BOOK3S_601, which doesn't exist. Fix them
to use CONFIG_PPC_BOOK3S_601 which is the correct symbol.
Fixes: 12c3f1fd87bf ("powerpc/32s: get rid of CPU_FTR_601 feature")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
I think the bug in get_cycles() at least demonstrates that no one has
booted a 601 since v5.4. Time to drop 601?
---
arch/powerpc/include/asm/ptrace.h | 2 +-
arch/powerpc/include/asm/timex.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index f194339cef3b..155a197c0aa1 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -243,7 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs)
}
#define arch_has_single_step() (1)
-#ifndef CONFIG_BOOK3S_601
+#ifndef CONFIG_PPC_BOOK3S_601
#define arch_has_block_step() (true)
#else
#define arch_has_block_step() (false)
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index d2d2c4bd8435..6047402b0a4d 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -17,7 +17,7 @@ typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
{
- if (IS_ENABLED(CONFIG_BOOK3S_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return 0;
return mftb();
--
2.25.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox