* Re: [PATCH kernel v3] powerpc/pci: Remove LSI mappings on device teardown
From: Cédric Le Goater @ 2020-12-02 14:47 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201202005222.5477-1-aik@ozlabs.ru>
On 12/2/20 1:52 AM, Alexey Kardashevskiy wrote:
> From: Oliver O'Halloran <oohall@gmail.com>
>
> When a passthrough IO adapter is removed from a pseries machine using hash
> MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS
> to clear all page table entries related to the adapter. If some are still
> present, the RTAS call which isolates the PCI slot returns error 9001
> "valid outstanding translations" and the removal of the IO adapter fails.
> This is because when the PHBs are scanned, Linux maps automatically the
> INTx interrupts in the Linux interrupt number space but these are never
> removed.
>
> This problem can be fixed by adding the corresponding unmap operation when
> the device is removed. There's no pcibios_* hook for the remove case, but
> the same effect can be achieved using a bus notifier.
>
> Because INTx are shared among PHBs (and potentially across the system),
> this adds tracking of virq to unmap them only when the last user is gone.
>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> [aik: added refcounter]
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
I did some PHB hotplug tests on a KVM guest and a LPAR using only LSIs.
Tested-by: Cédric Le Goater <clg@kaod.org>
Thanks Alexey,
C.
> ---
> Changes:
> v3:
> * free @vi on error path
>
> v2:
> * added refcounter
> ---
> arch/powerpc/kernel/pci-common.c | 82 ++++++++++++++++++++++++++++++--
> 1 file changed, 78 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> index be108616a721..2b555997b295 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -353,6 +353,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
> return NULL;
> }
>
> +struct pci_intx_virq {
> + int virq;
> + struct kref kref;
> + struct list_head list_node;
> +};
> +
> +static LIST_HEAD(intx_list);
> +static DEFINE_MUTEX(intx_mutex);
> +
> +static void ppc_pci_intx_release(struct kref *kref)
> +{
> + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref);
> +
> + list_del(&vi->list_node);
> + irq_dispose_mapping(vi->virq);
> + kfree(vi);
> +}
> +
> +static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
> + unsigned long action, void *data)
> +{
> + struct pci_dev *pdev = to_pci_dev(data);
> +
> + if (action == BUS_NOTIFY_DEL_DEVICE) {
> + struct pci_intx_virq *vi;
> +
> + mutex_lock(&intx_mutex);
> + list_for_each_entry(vi, &intx_list, list_node) {
> + if (vi->virq == pdev->irq) {
> + kref_put(&vi->kref, ppc_pci_intx_release);
> + break;
> + }
> + }
> + mutex_unlock(&intx_mutex);
> + }
> +
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block ppc_pci_unmap_irq_notifier = {
> + .notifier_call = ppc_pci_unmap_irq_line,
> +};
> +
> +static int ppc_pci_register_irq_notifier(void)
> +{
> + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
> +}
> +arch_initcall(ppc_pci_register_irq_notifier);
> +
> /*
> * Reads the interrupt pin to determine if interrupt is use by card.
> * If the interrupt is used, then gets the interrupt line from the
> @@ -361,6 +410,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
> static int pci_read_irq_line(struct pci_dev *pci_dev)
> {
> int virq;
> + struct pci_intx_virq *vi, *vitmp;
> +
> + /* Preallocate vi as rewind is complex if this fails after mapping */
> + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL);
> + if (!vi)
> + return -1;
>
> pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
>
> @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
> * function.
> */
> if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
> - return -1;
> + goto error_exit;
> if (pin == 0)
> - return -1;
> + goto error_exit;
> if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
> line == 0xff || line == 0) {
> - return -1;
> + goto error_exit;
> }
> pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
> line, pin);
> @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
>
> if (!virq) {
> pr_debug(" Failed to map !\n");
> - return -1;
> + goto error_exit;
> }
>
> pr_debug(" Mapped to linux irq %d\n", virq);
>
> pci_dev->irq = virq;
>
> + mutex_lock(&intx_mutex);
> + list_for_each_entry(vitmp, &intx_list, list_node) {
> + if (vitmp->virq == virq) {
> + kref_get(&vitmp->kref);
> + kfree(vi);
> + vi = NULL;
> + break;
> + }
> + }
> + if (vi) {
> + vi->virq = virq;
> + kref_init(&vi->kref);
> + list_add_tail(&vi->list_node, &intx_list);
> + }
> + mutex_unlock(&intx_mutex);
> +
> return 0;
> +error_exit:
> + kfree(vi);
> + return -1;
> }
>
> /*
>
^ permalink raw reply
* Re: powerpc 5.10-rcN boot failures with RCU_SCALE_TEST=m
From: Uladzislau Rezki @ 2020-12-02 14:39 UTC (permalink / raw)
To: Michael Ellerman, Paul E . McKenney
Cc: rcu, linuxppc-dev, Paul E . McKenney, Daniel Axtens
In-Reply-To: <87v9dkuwy3.fsf@mpe.ellerman.id.au>
On Thu, Dec 03, 2020 at 01:03:32AM +1100, Michael Ellerman wrote:
> Daniel Axtens <dja@axtens.net> writes:
> > Hi all,
> >
> > I'm having some difficulty tracking down a bug.
> >
> > Some configurations of the powerpc kernel since somewhere in the 5.10
> > merge window fail to boot on some ppc64 systems. They hang while trying
> > to bring up SMP. It seems to depend on the RCU_SCALE/PERF_TEST option.
> > (It was renamed in the 5.10 merge window.)
> >
> > I can reproduce it as follows with qemu tcg:
> >
> > make -j64 pseries_le_defconfig
> > scripts/config -m RCU_SCALE_TEST
> > scripts/config -m RCU_PERF_TEST
> > make -j 64 vmlinux CC="ccache gcc"
> >
> > qemu-system-ppc64 -cpu power9 -M pseries -m 1G -nographic -vga none -smp 4 -kernel vmlinux
> >
> > ...
> > [ 0.036284][ T0] Mount-cache hash table entries: 8192 (order: 0, 65536 bytes, linear)
> > [ 0.036481][ T0] Mountpoint-cache hash table entries: 8192 (order: 0, 65536 bytes, linear)
> > [ 0.148168][ T1] POWER9 performance monitor hardware support registered
> > [ 0.151118][ T1] rcu: Hierarchical SRCU implementation.
> > [ 0.186660][ T1] smp: Bringing up secondary CPUs ...
> > <hangs>
>
> One does not simply hang :)
>
> > I have no idea why RCU_SCALE/PERF_TEST would be causing this, but that
> > seems to be what does it: if I don't set that, the kernel boots fine.
>
> It seems to be TASKS_RCU that is the key.
>
> I don't need RCU_SCALE_TEST enabled, I can trigger it just with the
> following applied:
>
> diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
> index 0ebe15a84985..f3500c95d6a1 100644
> --- a/kernel/rcu/Kconfig
> +++ b/kernel/rcu/Kconfig
> @@ -78,7 +78,7 @@ config TASKS_RCU_GENERIC
> task-based RCU implementations. Not for manual selection.
>
> config TASKS_RCU
> - def_bool PREEMPTION
> + def_bool y
> help
> This option enables a task-based RCU implementation that uses
> only voluntary context switch (not preemption!), idle, and
>
>
> And bisect points to:
> 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
>
> Which moved init_kprobes() prior to SMP bringup.
>
>
> For some reason when it gets stuck sysrq doesn't work, but I was able to
> get it into gdb and manually call handle_sysrq('t') to get the output
> below.
>
> The SMP bringup stalls because _cpu_up() is blocked trying to take
> cpu_hotplug_lock for writing:
>
> [ 401.403132][ T0] task:swapper/0 state:D stack:12512 pid: 1 ppid: 0 flags:0x00000800
> [ 401.403502][ T0] Call Trace:
> [ 401.403907][ T0] [c0000000062c37d0] [c0000000062c3830] 0xc0000000062c3830 (unreliable)
> [ 401.404068][ T0] [c0000000062c39b0] [c000000000019d70] __switch_to+0x2e0/0x4a0
> [ 401.404189][ T0] [c0000000062c3a10] [c000000000b87228] __schedule+0x288/0x9b0
> [ 401.404257][ T0] [c0000000062c3ad0] [c000000000b879b8] schedule+0x68/0x120
> [ 401.404324][ T0] [c0000000062c3b00] [c000000000184ad4] percpu_down_write+0x164/0x170
> [ 401.404390][ T0] [c0000000062c3b50] [c000000000116b68] _cpu_up+0x68/0x280
> [ 401.404475][ T0] [c0000000062c3bb0] [c000000000116e70] cpu_up+0xf0/0x140
> [ 401.404546][ T0] [c0000000062c3c30] [c00000000011776c] bringup_nonboot_cpus+0xac/0xf0
> [ 401.404643][ T0] [c0000000062c3c80] [c000000000eea1b8] smp_init+0x40/0xcc
> [ 401.404727][ T0] [c0000000062c3ce0] [c000000000ec43dc] kernel_init_freeable+0x1e0/0x3a0
> [ 401.404799][ T0] [c0000000062c3db0] [c000000000011ec4] kernel_init+0x24/0x150
> [ 401.404958][ T0] [c0000000062c3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
>
> It can't get it because kprobe_optimizer() has taken it for read and is now
> blocked waiting for synchronize_rcu_tasks():
>
> [ 401.418808][ T0] task:kworker/0:1 state:D stack:13392 pid: 12 ppid: 2 flags:0x00000800
> [ 401.418951][ T0] Workqueue: events kprobe_optimizer
> [ 401.419078][ T0] Call Trace:
> [ 401.419121][ T0] [c0000000062ef650] [c0000000062ef710] 0xc0000000062ef710 (unreliable)
> [ 401.419213][ T0] [c0000000062ef830] [c000000000019d70] __switch_to+0x2e0/0x4a0
> [ 401.419281][ T0] [c0000000062ef890] [c000000000b87228] __schedule+0x288/0x9b0
> [ 401.419347][ T0] [c0000000062ef950] [c000000000b879b8] schedule+0x68/0x120
> [ 401.419415][ T0] [c0000000062ef980] [c000000000b8e664] schedule_timeout+0x2a4/0x340
> [ 401.419484][ T0] [c0000000062efa80] [c000000000b894ec] wait_for_completion+0x9c/0x170
> [ 401.419552][ T0] [c0000000062efae0] [c0000000001ac85c] __wait_rcu_gp+0x19c/0x210
> [ 401.419619][ T0] [c0000000062efb40] [c0000000001ac90c] synchronize_rcu_tasks_generic+0x3c/0x70
> [ 401.419690][ T0] [c0000000062efbe0] [c00000000022a3dc] kprobe_optimizer+0x1dc/0x470
> [ 401.419757][ T0] [c0000000062efc60] [c000000000136684] process_one_work+0x2f4/0x530
> [ 401.419823][ T0] [c0000000062efd20] [c000000000138d28] worker_thread+0x78/0x570
> [ 401.419891][ T0] [c0000000062efdb0] [c000000000142424] kthread+0x194/0x1a0
> [ 401.419976][ T0] [c0000000062efe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
>
> But why is the synchronize_rcu_tasks() not completing?
>
I think that it is because RCU is not fully initialized by that time.
The 36dadef23fcc ("kprobes: Init kprobes in early_initcall") patch
switches to early_initcall() that has a higher priority sequence than
core_initcall() that is used to complete an RCU setup in the rcu_set_runtime_mode().
--
Vlad Rezki
^ permalink raw reply
* Re: [PATCH 6/8] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
From: Andy Lutomirski @ 2020-12-02 14:38 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-arch, Arnd Bergmann, x86, linux-kernel, Nicholas Piggin,
linux-mm, Mathieu Desnoyers, linuxppc-dev
In-Reply-To: <20201202141957.GJ3021@hirez.programming.kicks-ass.net>
> On Dec 2, 2020, at 6:20 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Sun, Nov 29, 2020 at 02:01:39AM +1000, Nicholas Piggin wrote:
>> + * - A delayed freeing and RCU-like quiescing sequence based on
>> + * mm switching to avoid IPIs completely.
>
> That one's interesting too. so basically you want to count switch_mm()
> invocations on each CPU. Then, periodically snapshot the counter on each
> CPU, and when they've all changed, increment a global counter.
>
> Then, you snapshot the global counter and wait for it to increment
> (twice I think, the first increment might already be in progress).
>
> The only question here is what should drive this machinery.. the tick
> probably.
>
> This shouldn't be too hard to do I think.
>
> Something a little like so perhaps?
I don’t think this will work. A CPU can go idle with lazy mm and nohz forever. This could lead to unbounded memory use on a lightly loaded system.
^ permalink raw reply
* Re: [PATCH 6/8] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
From: Peter Zijlstra @ 2020-12-02 14:19 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Arnd Bergmann, x86, linux-kernel, linux-mm,
Mathieu Desnoyers, linuxppc-dev
In-Reply-To: <20201128160141.1003903-7-npiggin@gmail.com>
On Sun, Nov 29, 2020 at 02:01:39AM +1000, Nicholas Piggin wrote:
> + * - A delayed freeing and RCU-like quiescing sequence based on
> + * mm switching to avoid IPIs completely.
That one's interesting too. so basically you want to count switch_mm()
invocations on each CPU. Then, periodically snapshot the counter on each
CPU, and when they've all changed, increment a global counter.
Then, you snapshot the global counter and wait for it to increment
(twice I think, the first increment might already be in progress).
The only question here is what should drive this machinery.. the tick
probably.
This shouldn't be too hard to do I think.
Something a little like so perhaps?
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41404afb7f4c..27b64a60a468 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4525,6 +4525,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* finish_task_switch()'s mmdrop().
*/
switch_mm_irqs_off(prev->active_mm, next->mm, next);
+ rq->nr_mm_switches++;
if (!prev->mm) { // from kernel
/* will mmdrop() in finish_task_switch(). */
@@ -4739,6 +4740,80 @@ unsigned long long task_sched_runtime(struct task_struct *p)
return ns;
}
+static DEFINE_PER_CPU(unsigned long[2], mm_switches);
+
+static struct {
+ unsigned long __percpu *switches[2];
+ unsigned long generation;
+ atomic_t complete;
+ struct wait_queue_dead wait;
+} mm_foo = {
+ .switches = &mm_switches,
+ .generation = 0,
+ .complete = -1, // XXX bootstrap, hotplug
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(mm_foo.wait),
+};
+
+static void mm_gen_tick(int cpu, struct rq *rq)
+{
+ unsigned long prev, curr, switches = rq->nr_mm_switches;
+ int idx = READ_ONCE(mm_foo.generation) & 1;
+
+ /* DATA-DEP on mm_foo.generation */
+
+ prev = __this_cpu_read(mm_foo.switches[idx^1]);
+ curr = __this_cpu_read(mm_foo.switches[idx]);
+
+ /* we haven't switched since the last generation */
+ if (prev == switches)
+ return false;
+
+ __this_cpu_write(mm_foo.switches[idx], switches);
+
+ /*
+ * If @curr is less than @prev, this is the first update of
+ * this generation, per the above, switches has also increased since,
+ * so mark out CPU complete.
+ */
+ if ((long)(curr - prev) < 0 && atomic_dec_and_test(&mm_foo.complete)) {
+ /*
+ * All CPUs are complete, IOW they all switched at least once
+ * since the last generation. Reset the completion counter and
+ * increment the generation.
+ */
+ atomic_set(&mm_foo.complete, nr_online_cpus());
+ /*
+ * Matches the address dependency above:
+ *
+ * idx = gen & 1 complete = nr_cpus
+ * <DATA-DEP> <WMB>
+ * curr = sw[idx] generation++;
+ * prev = sw[idx^1]
+ * if (curr < prev)
+ * complete--
+ *
+ * If we don't observe the new generation; we'll not decrement. If we
+ * do see the new generation, we must also see the new completion count.
+ */
+ smp_wmb();
+ mm_foo.generation++;
+ return true;
+ }
+
+ return false;
+}
+
+static void mm_gen_wake(void)
+{
+ wake_up_all(&mm_foo.wait);
+}
+
+static void mm_gen_wait(void)
+{
+ unsigned int gen = READ_ONCE(mm_foo.generation);
+ wait_event(&mm_foo.wait, READ_ONCE(mm_foo.generation) - gen > 1);
+}
+
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
@@ -4750,6 +4825,7 @@ void scheduler_tick(void)
struct task_struct *curr = rq->curr;
struct rq_flags rf;
unsigned long thermal_pressure;
+ bool wake_mm_gen;
arch_scale_freq_tick();
sched_clock_tick();
@@ -4763,8 +4839,13 @@ void scheduler_tick(void)
calc_global_load_tick(rq);
psi_task_tick(rq);
+ wake_mm_gen = mm_gen_tick(cpu, rq);
+
rq_unlock(rq, &rf);
+ if (wake_mm_gen)
+ mm_gen_wake();
+
perf_event_task_tick();
#ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index bf9d8da7d35e..62fb685db8d0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -927,6 +927,7 @@ struct rq {
unsigned int ttwu_pending;
#endif
u64 nr_switches;
+ u64 nr_mm_switches;
#ifdef CONFIG_UCLAMP_TASK
/* Utilization clamp values based on CPU's RUNNABLE tasks */
^ permalink raw reply related
* Re: [PATCH kernel v3] powerpc/pci: Remove LSI mappings on device teardown
From: Frederic Barrat @ 2020-12-02 14:17 UTC (permalink / raw)
To: Alexey Kardashevskiy, linuxppc-dev
Cc: Oliver O'Halloran, Cédric Le Goater
In-Reply-To: <20201202005222.5477-1-aik@ozlabs.ru>
On 02/12/2020 01:52, Alexey Kardashevskiy wrote:
> From: Oliver O'Halloran <oohall@gmail.com>
>
> When a passthrough IO adapter is removed from a pseries machine using hash
> MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS
> to clear all page table entries related to the adapter. If some are still
> present, the RTAS call which isolates the PCI slot returns error 9001
> "valid outstanding translations" and the removal of the IO adapter fails.
> This is because when the PHBs are scanned, Linux maps automatically the
> INTx interrupts in the Linux interrupt number space but these are never
> removed.
>
> This problem can be fixed by adding the corresponding unmap operation when
> the device is removed. There's no pcibios_* hook for the remove case, but
> the same effect can be achieved using a bus notifier.
>
> Because INTx are shared among PHBs (and potentially across the system),
> this adds tracking of virq to unmap them only when the last user is gone.
>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> [aik: added refcounter]
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
Looks ok to me.
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
> Changes:
> v3:
> * free @vi on error path
>
> v2:
> * added refcounter
> ---
> arch/powerpc/kernel/pci-common.c | 82 ++++++++++++++++++++++++++++++--
> 1 file changed, 78 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> index be108616a721..2b555997b295 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -353,6 +353,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
> return NULL;
> }
>
> +struct pci_intx_virq {
> + int virq;
> + struct kref kref;
> + struct list_head list_node;
> +};
> +
> +static LIST_HEAD(intx_list);
> +static DEFINE_MUTEX(intx_mutex);
> +
> +static void ppc_pci_intx_release(struct kref *kref)
> +{
> + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref);
> +
> + list_del(&vi->list_node);
> + irq_dispose_mapping(vi->virq);
> + kfree(vi);
> +}
> +
> +static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
> + unsigned long action, void *data)
> +{
> + struct pci_dev *pdev = to_pci_dev(data);
> +
> + if (action == BUS_NOTIFY_DEL_DEVICE) {
> + struct pci_intx_virq *vi;
> +
> + mutex_lock(&intx_mutex);
> + list_for_each_entry(vi, &intx_list, list_node) {
> + if (vi->virq == pdev->irq) {
> + kref_put(&vi->kref, ppc_pci_intx_release);
> + break;
> + }
> + }
> + mutex_unlock(&intx_mutex);
> + }
> +
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block ppc_pci_unmap_irq_notifier = {
> + .notifier_call = ppc_pci_unmap_irq_line,
> +};
> +
> +static int ppc_pci_register_irq_notifier(void)
> +{
> + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
> +}
> +arch_initcall(ppc_pci_register_irq_notifier);
> +
> /*
> * Reads the interrupt pin to determine if interrupt is use by card.
> * If the interrupt is used, then gets the interrupt line from the
> @@ -361,6 +410,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
> static int pci_read_irq_line(struct pci_dev *pci_dev)
> {
> int virq;
> + struct pci_intx_virq *vi, *vitmp;
> +
> + /* Preallocate vi as rewind is complex if this fails after mapping */
> + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL);
> + if (!vi)
> + return -1;
>
> pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
>
> @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
> * function.
> */
> if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
> - return -1;
> + goto error_exit;
> if (pin == 0)
> - return -1;
> + goto error_exit;
> if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
> line == 0xff || line == 0) {
> - return -1;
> + goto error_exit;
> }
> pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
> line, pin);
> @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
>
> if (!virq) {
> pr_debug(" Failed to map !\n");
> - return -1;
> + goto error_exit;
> }
>
> pr_debug(" Mapped to linux irq %d\n", virq);
>
> pci_dev->irq = virq;
>
> + mutex_lock(&intx_mutex);
> + list_for_each_entry(vitmp, &intx_list, list_node) {
> + if (vitmp->virq == virq) {
> + kref_get(&vitmp->kref);
> + kfree(vi);
> + vi = NULL;
> + break;
> + }
> + }
> + if (vi) {
> + vi->virq = virq;
> + kref_init(&vi->kref);
> + list_add_tail(&vi->list_node, &intx_list);
> + }
> + mutex_unlock(&intx_mutex);
> +
> return 0;
> +error_exit:
> + kfree(vi);
> + return -1;
> }
>
> /*
>
^ permalink raw reply
* Re: powerpc 5.10-rcN boot failures with RCU_SCALE_TEST=m
From: Michael Ellerman @ 2020-12-02 14:03 UTC (permalink / raw)
To: Daniel Axtens, rcu, linuxppc-dev, Paul E . McKenney
In-Reply-To: <87eekfh80a.fsf@dja-thinkpad.axtens.net>
Daniel Axtens <dja@axtens.net> writes:
> Hi all,
>
> I'm having some difficulty tracking down a bug.
>
> Some configurations of the powerpc kernel since somewhere in the 5.10
> merge window fail to boot on some ppc64 systems. They hang while trying
> to bring up SMP. It seems to depend on the RCU_SCALE/PERF_TEST option.
> (It was renamed in the 5.10 merge window.)
>
> I can reproduce it as follows with qemu tcg:
>
> make -j64 pseries_le_defconfig
> scripts/config -m RCU_SCALE_TEST
> scripts/config -m RCU_PERF_TEST
> make -j 64 vmlinux CC="ccache gcc"
>
> qemu-system-ppc64 -cpu power9 -M pseries -m 1G -nographic -vga none -smp 4 -kernel vmlinux
>
> ...
> [ 0.036284][ T0] Mount-cache hash table entries: 8192 (order: 0, 65536 bytes, linear)
> [ 0.036481][ T0] Mountpoint-cache hash table entries: 8192 (order: 0, 65536 bytes, linear)
> [ 0.148168][ T1] POWER9 performance monitor hardware support registered
> [ 0.151118][ T1] rcu: Hierarchical SRCU implementation.
> [ 0.186660][ T1] smp: Bringing up secondary CPUs ...
> <hangs>
One does not simply hang :)
> I have no idea why RCU_SCALE/PERF_TEST would be causing this, but that
> seems to be what does it: if I don't set that, the kernel boots fine.
It seems to be TASKS_RCU that is the key.
I don't need RCU_SCALE_TEST enabled, I can trigger it just with the
following applied:
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 0ebe15a84985..f3500c95d6a1 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -78,7 +78,7 @@ config TASKS_RCU_GENERIC
task-based RCU implementations. Not for manual selection.
config TASKS_RCU
- def_bool PREEMPTION
+ def_bool y
help
This option enables a task-based RCU implementation that uses
only voluntary context switch (not preemption!), idle, and
And bisect points to:
36dadef23fcc ("kprobes: Init kprobes in early_initcall")
Which moved init_kprobes() prior to SMP bringup.
For some reason when it gets stuck sysrq doesn't work, but I was able to
get it into gdb and manually call handle_sysrq('t') to get the output
below.
The SMP bringup stalls because _cpu_up() is blocked trying to take
cpu_hotplug_lock for writing:
[ 401.403132][ T0] task:swapper/0 state:D stack:12512 pid: 1 ppid: 0 flags:0x00000800
[ 401.403502][ T0] Call Trace:
[ 401.403907][ T0] [c0000000062c37d0] [c0000000062c3830] 0xc0000000062c3830 (unreliable)
[ 401.404068][ T0] [c0000000062c39b0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.404189][ T0] [c0000000062c3a10] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.404257][ T0] [c0000000062c3ad0] [c000000000b879b8] schedule+0x68/0x120
[ 401.404324][ T0] [c0000000062c3b00] [c000000000184ad4] percpu_down_write+0x164/0x170
[ 401.404390][ T0] [c0000000062c3b50] [c000000000116b68] _cpu_up+0x68/0x280
[ 401.404475][ T0] [c0000000062c3bb0] [c000000000116e70] cpu_up+0xf0/0x140
[ 401.404546][ T0] [c0000000062c3c30] [c00000000011776c] bringup_nonboot_cpus+0xac/0xf0
[ 401.404643][ T0] [c0000000062c3c80] [c000000000eea1b8] smp_init+0x40/0xcc
[ 401.404727][ T0] [c0000000062c3ce0] [c000000000ec43dc] kernel_init_freeable+0x1e0/0x3a0
[ 401.404799][ T0] [c0000000062c3db0] [c000000000011ec4] kernel_init+0x24/0x150
[ 401.404958][ T0] [c0000000062c3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
It can't get it because kprobe_optimizer() has taken it for read and is now
blocked waiting for synchronize_rcu_tasks():
[ 401.418808][ T0] task:kworker/0:1 state:D stack:13392 pid: 12 ppid: 2 flags:0x00000800
[ 401.418951][ T0] Workqueue: events kprobe_optimizer
[ 401.419078][ T0] Call Trace:
[ 401.419121][ T0] [c0000000062ef650] [c0000000062ef710] 0xc0000000062ef710 (unreliable)
[ 401.419213][ T0] [c0000000062ef830] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.419281][ T0] [c0000000062ef890] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.419347][ T0] [c0000000062ef950] [c000000000b879b8] schedule+0x68/0x120
[ 401.419415][ T0] [c0000000062ef980] [c000000000b8e664] schedule_timeout+0x2a4/0x340
[ 401.419484][ T0] [c0000000062efa80] [c000000000b894ec] wait_for_completion+0x9c/0x170
[ 401.419552][ T0] [c0000000062efae0] [c0000000001ac85c] __wait_rcu_gp+0x19c/0x210
[ 401.419619][ T0] [c0000000062efb40] [c0000000001ac90c] synchronize_rcu_tasks_generic+0x3c/0x70
[ 401.419690][ T0] [c0000000062efbe0] [c00000000022a3dc] kprobe_optimizer+0x1dc/0x470
[ 401.419757][ T0] [c0000000062efc60] [c000000000136684] process_one_work+0x2f4/0x530
[ 401.419823][ T0] [c0000000062efd20] [c000000000138d28] worker_thread+0x78/0x570
[ 401.419891][ T0] [c0000000062efdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.419976][ T0] [c0000000062efe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
But why is the synchronize_rcu_tasks() not completing?
Hopefully Paul can help there, otherwise I'll try and work out how to
dump some RCU state when it gets stuck.
Full sysrq-t output below.
cheers
[ 401.402512][ T0] sysrq: Show State
[ 401.403132][ T0] task:swapper/0 state:D stack:12512 pid: 1 ppid: 0 flags:0x00000800
[ 401.403502][ T0] Call Trace:
[ 401.403907][ T0] [c0000000062c37d0] [c0000000062c3830] 0xc0000000062c3830 (unreliable)
[ 401.404068][ T0] [c0000000062c39b0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.404189][ T0] [c0000000062c3a10] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.404257][ T0] [c0000000062c3ad0] [c000000000b879b8] schedule+0x68/0x120
[ 401.404324][ T0] [c0000000062c3b00] [c000000000184ad4] percpu_down_write+0x164/0x170
[ 401.404390][ T0] [c0000000062c3b50] [c000000000116b68] _cpu_up+0x68/0x280
[ 401.404475][ T0] [c0000000062c3bb0] [c000000000116e70] cpu_up+0xf0/0x140
[ 401.404546][ T0] [c0000000062c3c30] [c00000000011776c] bringup_nonboot_cpus+0xac/0xf0
[ 401.404643][ T0] [c0000000062c3c80] [c000000000eea1b8] smp_init+0x40/0xcc
[ 401.404727][ T0] [c0000000062c3ce0] [c000000000ec43dc] kernel_init_freeable+0x1e0/0x3a0
[ 401.404799][ T0] [c0000000062c3db0] [c000000000011ec4] kernel_init+0x24/0x150
[ 401.404958][ T0] [c0000000062c3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.405221][ T0] task:kthreadd state:S stack:13712 pid: 2 ppid: 0 flags:0x00000800
[ 401.405326][ T0] Call Trace:
[ 401.405380][ T0] [c0000000062c7a60] [c0000000062c7ac0] 0xc0000000062c7ac0 (unreliable)
[ 401.405473][ T0] [c0000000062c7c40] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.405565][ T0] [c0000000062c7ca0] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.405639][ T0] [c0000000062c7d60] [c000000000b879b8] schedule+0x68/0x120
[ 401.405720][ T0] [c0000000062c7d90] [c000000000143508] kthreadd+0x278/0x2f0
[ 401.405798][ T0] [c0000000062c7e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.405908][ T0] task:rcu_gp state:I stack:14576 pid: 3 ppid: 2 flags:0x00000800
[ 401.407471][ T0] Call Trace:
[ 401.407690][ T0] [c0000000062cba00] [c0000000062cba60] 0xc0000000062cba60 (unreliable)
[ 401.407851][ T0] [c0000000062cbbe0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.407952][ T0] [c0000000062cbc40] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.408037][ T0] [c0000000062cbd00] [c000000000b879b8] schedule+0x68/0x120
[ 401.408123][ T0] [c0000000062cbd30] [c000000000136ed4] rescuer_thread+0x2c4/0x3f0
[ 401.408268][ T0] [c0000000062cbdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.408351][ T0] [c0000000062cbe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.408463][ T0] task:rcu_par_gp state:I stack:14624 pid: 4 ppid: 2 flags:0x00000800
[ 401.408629][ T0] Call Trace:
[ 401.408725][ T0] [c0000000062cfa00] [c0000000062cfa60] 0xc0000000062cfa60 (unreliable)
[ 401.408830][ T0] [c0000000062cfbe0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.408927][ T0] [c0000000062cfc40] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.409030][ T0] [c0000000062cfd00] [c000000000b879b8] schedule+0x68/0x120
[ 401.409143][ T0] [c0000000062cfd30] [c000000000136ed4] rescuer_thread+0x2c4/0x3f0
[ 401.409256][ T0] [c0000000062cfdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.409349][ T0] [c0000000062cfe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.409458][ T0] task:kworker/0:0 state:I stack:13888 pid: 5 ppid: 2 flags:0x00000800
[ 401.409749][ T0] Workqueue: 0x0 (events)
[ 401.409923][ T0] Call Trace:
[ 401.409986][ T0] [c0000000062d39f0] [c0000000062d3a50] 0xc0000000062d3a50 (unreliable)
[ 401.410125][ T0] [c0000000062d3bd0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.410263][ T0] [c0000000062d3c30] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.410371][ T0] [c0000000062d3cf0] [c000000000b879b8] schedule+0x68/0x120
[ 401.410450][ T0] [c0000000062d3d20] [c000000000138dac] worker_thread+0xfc/0x570
[ 401.410567][ T0] [c0000000062d3db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.410671][ T0] [c0000000062d3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.410795][ T0] task:kworker/0:0H state:I stack:14624 pid: 6 ppid: 2 flags:0x00000800
[ 401.411024][ T0] Call Trace:
[ 401.411117][ T0] [c0000000062d79f0] [c0000000062d7a50] 0xc0000000062d7a50 (unreliable)
[ 401.411267][ T0] [c0000000062d7bd0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.411401][ T0] [c0000000062d7c30] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.411484][ T0] [c0000000062d7cf0] [c000000000b879b8] schedule+0x68/0x120
[ 401.411575][ T0] [c0000000062d7d20] [c000000000138dac] worker_thread+0xfc/0x570
[ 401.411666][ T0] [c0000000062d7db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.411722][ T0] [c0000000062d7e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.411809][ T0] task:kworker/u8:0 state:I stack:14624 pid: 7 ppid: 2 flags:0x00000800
[ 401.411923][ T0] Call Trace:
[ 401.411969][ T0] [c0000000062db9f0] [c0000000062dba50] 0xc0000000062dba50 (unreliable)
[ 401.412045][ T0] [c0000000062dbbd0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.412143][ T0] [c0000000062dbc30] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.413324][ T0] [c0000000062dbcf0] [c000000000b879b8] schedule+0x68/0x120
[ 401.413402][ T0] [c0000000062dbd20] [c000000000138dac] worker_thread+0xfc/0x570
[ 401.413468][ T0] [c0000000062dbdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.413522][ T0] [c0000000062dbe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.413595][ T0] task:mm_percpu_wq state:I stack:14624 pid: 8 ppid: 2 flags:0x00000800
[ 401.413699][ T0] Call Trace:
[ 401.413745][ T0] [c0000000062dfa00] [c0000000062dfa60] 0xc0000000062dfa60 (unreliable)
[ 401.413826][ T0] [c0000000062dfbe0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.413894][ T0] [c0000000062dfc40] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.413960][ T0] [c0000000062dfd00] [c000000000b879b8] schedule+0x68/0x120
[ 401.414025][ T0] [c0000000062dfd30] [c000000000136ed4] rescuer_thread+0x2c4/0x3f0
[ 401.414105][ T0] [c0000000062dfdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.414185][ T0] [c0000000062dfe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.414275][ T0] task:ksoftirqd/0 state:S stack:14544 pid: 9 ppid: 2 flags:0x00000800
[ 401.414506][ T0] Call Trace:
[ 401.414729][ T0] [c0000000062e3a20] [c0000000062e3a80] 0xc0000000062e3a80 (unreliable)
[ 401.415109][ T0] [c0000000062e3c00] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.415651][ T0] [c0000000062e3c60] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.415944][ T0] [c0000000062e3d20] [c000000000b879b8] schedule+0x68/0x120
[ 401.416044][ T0] [c0000000062e3d50] [c000000000148774] smpboot_thread_fn+0x254/0x260
[ 401.416104][ T0] [c0000000062e3db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.416177][ T0] [c0000000062e3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.416261][ T0] task:rcu_sched state:I stack:12928 pid: 10 ppid: 2 flags:0x00000800
[ 401.416378][ T0] Call Trace:
[ 401.416423][ T0] [c0000000062e7990] [c0000000062e7a50] 0xc0000000062e7a50 (unreliable)
[ 401.416501][ T0] [c0000000062e7b70] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.416569][ T0] [c0000000062e7bd0] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.416633][ T0] [c0000000062e7c90] [c000000000b879b8] schedule+0x68/0x120
[ 401.416705][ T0] [c0000000062e7cc0] [c0000000001b7b54] rcu_gp_kthread+0xa94/0xc00
[ 401.416798][ T0] [c0000000062e7db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.416871][ T0] [c0000000062e7e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.416965][ T0] task:migration/0 state:S stack:14496 pid: 11 ppid: 2 flags:0x00000800
[ 401.417050][ T0] Call Trace:
[ 401.417092][ T0] [c0000000062eba20] [c0000000062ebaa0] 0xc0000000062ebaa0 (unreliable)
[ 401.417206][ T0] [c0000000062ebc00] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.417397][ T0] [c0000000062ebc60] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.417631][ T0] [c0000000062ebd20] [c000000000b879b8] schedule+0x68/0x120
[ 401.417930][ T0] [c0000000062ebd50] [c000000000148774] smpboot_thread_fn+0x254/0x260
[ 401.418251][ T0] [c0000000062ebdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.418520][ T0] [c0000000062ebe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.418808][ T0] task:kworker/0:1 state:D stack:13392 pid: 12 ppid: 2 flags:0x00000800
[ 401.418951][ T0] Workqueue: events kprobe_optimizer
[ 401.419078][ T0] Call Trace:
[ 401.419121][ T0] [c0000000062ef650] [c0000000062ef710] 0xc0000000062ef710 (unreliable)
[ 401.419213][ T0] [c0000000062ef830] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.419281][ T0] [c0000000062ef890] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.419347][ T0] [c0000000062ef950] [c000000000b879b8] schedule+0x68/0x120
[ 401.419415][ T0] [c0000000062ef980] [c000000000b8e664] schedule_timeout+0x2a4/0x340
[ 401.419484][ T0] [c0000000062efa80] [c000000000b894ec] wait_for_completion+0x9c/0x170
[ 401.419552][ T0] [c0000000062efae0] [c0000000001ac85c] __wait_rcu_gp+0x19c/0x210
[ 401.419619][ T0] [c0000000062efb40] [c0000000001ac90c] synchronize_rcu_tasks_generic+0x3c/0x70
[ 401.419690][ T0] [c0000000062efbe0] [c00000000022a3dc] kprobe_optimizer+0x1dc/0x470
[ 401.419757][ T0] [c0000000062efc60] [c000000000136684] process_one_work+0x2f4/0x530
[ 401.419823][ T0] [c0000000062efd20] [c000000000138d28] worker_thread+0x78/0x570
[ 401.419891][ T0] [c0000000062efdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.419976][ T0] [c0000000062efe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.420051][ T0] task:cpuhp/0 state:S stack:14544 pid: 13 ppid: 2 flags:0x00000800
[ 401.420136][ T0] Call Trace:
[ 401.420197][ T0] [c0000000062ffa20] [c0000000062ffa80] 0xc0000000062ffa80 (unreliable)
[ 401.420342][ T0] [c0000000062ffc00] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.420519][ T0] [c0000000062ffc60] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.420704][ T0] [c0000000062ffd20] [c000000000b879b8] schedule+0x68/0x120
[ 401.420904][ T0] [c0000000062ffd50] [c000000000148774] smpboot_thread_fn+0x254/0x260
[ 401.421134][ T0] [c0000000062ffdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.421487][ T0] [c0000000062ffe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.421834][ T0] task:cpuhp/1 state:S stack:13584 pid: 14 ppid: 2 flags:0x00000800
[ 401.422146][ T0] Call Trace:
[ 401.422233][ T0] [c0000000063c3a20] [c0000000063c3a80] 0xc0000000063c3a80 (unreliable)
[ 401.422314][ T0] [c0000000063c3c00] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.422378][ T0] [c0000000063c3c60] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.422444][ T0] [c0000000063c3d20] [c000000000b879b8] schedule+0x68/0x120
[ 401.422511][ T0] [c0000000063c3d50] [c000000000148774] smpboot_thread_fn+0x254/0x260
[ 401.422575][ T0] [c0000000063c3db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.422658][ T0] [c0000000063c3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.422742][ T0] task:migration/1 state:S stack:13472 pid: 15 ppid: 2 flags:0x00000800
[ 401.422826][ T0] Call Trace:
[ 401.422873][ T0] [c0000000063c7a20] [c0000000063c7aa0] 0xc0000000063c7aa0 (unreliable)
[ 401.423195][ T0] [c0000000063c7c00] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.423285][ T0] [c0000000063c7c60] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.423354][ T0] [c0000000063c7d20] [c000000000b879b8] schedule+0x68/0x120
[ 401.423421][ T0] [c0000000063c7d50] [c000000000148774] smpboot_thread_fn+0x254/0x260
[ 401.423486][ T0] [c0000000063c7db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.423576][ T0] [c0000000063c7e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.423783][ T0] task:ksoftirqd/1 state:S stack:14544 pid: 16 ppid: 2 flags:0x00000800
[ 401.424112][ T0] Call Trace:
[ 401.424410][ T0] [c0000000063cba20] [c0000000063cba80] 0xc0000000063cba80 (unreliable)
[ 401.424775][ T0] [c0000000063cbc00] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.425005][ T0] [c0000000063cbc60] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.425124][ T0] [c0000000063cbd20] [c000000000b879b8] schedule+0x68/0x120
[ 401.425197][ T0] [c0000000063cbd50] [c000000000148774] smpboot_thread_fn+0x254/0x260
[ 401.425299][ T0] [c0000000063cbdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.425398][ T0] [c0000000063cbe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.425504][ T0] task:kworker/1:0 state:I stack:14624 pid: 17 ppid: 2 flags:0x00000800
[ 401.425684][ T0] Call Trace:
[ 401.425748][ T0] [c0000000063cf9f0] [c0000000063cfa50] 0xc0000000063cfa50 (unreliable)
[ 401.425845][ T0] [c0000000063cfbd0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.425916][ T0] [c0000000063cfc30] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.425983][ T0] [c0000000063cfcf0] [c000000000b879b8] schedule+0x68/0x120
[ 401.426050][ T0] [c0000000063cfd20] [c000000000138dac] worker_thread+0xfc/0x570
[ 401.426123][ T0] [c0000000063cfdb0] [c000000000142424] kthread+0x194/0x1a0
[ 401.426229][ T0] [c0000000063cfe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.426327][ T0] task:kworker/1:0H state:I stack:14320 pid: 18 ppid: 2 flags:0x00000800
[ 401.426494][ T0] Call Trace:
[ 401.426577][ T0] [c0000000063d39f0] [c0000000063d3ab0] 0xc0000000063d3ab0 (unreliable)
[ 401.426685][ T0] [c0000000063d3bd0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.426772][ T0] [c0000000063d3c30] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.426868][ T0] [c0000000063d3cf0] [c000000000b879b8] schedule+0x68/0x120
[ 401.426969][ T0] [c0000000063d3d20] [c000000000138dac] worker_thread+0xfc/0x570
[ 401.427082][ T0] [c0000000063d3db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.427244][ T0] [c0000000063d3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.427403][ T0] task:kworker/0:2 state:I stack:14320 pid: 19 ppid: 2 flags:0x00000800
[ 401.427624][ T0] Workqueue: 0x0 (events)
[ 401.427768][ T0] Call Trace:
[ 401.427840][ T0] [c0000000063d79f0] [c0000000063d7ab0] 0xc0000000063d7ab0 (unreliable)
[ 401.427981][ T0] [c0000000063d7bd0] [c000000000019d70] __switch_to+0x2e0/0x4a0
[ 401.428096][ T0] [c0000000063d7c30] [c000000000b87228] __schedule+0x288/0x9b0
[ 401.428303][ T0] [c0000000063d7cf0] [c000000000b879b8] schedule+0x68/0x120
[ 401.428394][ T0] [c0000000063d7d20] [c000000000138dac] worker_thread+0xfc/0x570
[ 401.428470][ T0] [c0000000063d7db0] [c000000000142424] kthread+0x194/0x1a0
[ 401.428575][ T0] [c0000000063d7e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
[ 401.429454][ T0] Sched Debug Version: v0.11, 5.10.0-rc6-gcc-8.2.0-01356-ga1aeabd25a36-dirty #563
[ 401.429604][ T0] ktime : 383770.000000
[ 401.429683][ T0] sched_clk : 401429.227980
[ 401.429744][ T0] cpu_clk : 401429.232778
[ 401.429799][ T0] jiffies : 4294975673
[ 401.429926][ T0]
[ 401.430003][ T0] sysctl_sched
[ 401.430066][ T0] .sysctl_sched_latency : 12.000000
[ 401.430152][ T0] .sysctl_sched_min_granularity : 1.500000
[ 401.430339][ T0] .sysctl_sched_wakeup_granularity : 2.000000
[ 401.430524][ T0] .sysctl_sched_child_runs_first : 0
[ 401.430688][ T0] .sysctl_sched_features : 4139835
[ 401.430900][ T0] .sysctl_sched_tunable_scaling : 1 (logarithmic)
[ 401.431124][ T0]
[ 401.431697][ T0] cpu#0
[ 401.431766][ T0] .nr_running : 0
[ 401.431813][ T0] .nr_switches : 1055
[ 401.431865][ T0] .nr_uninterruptible : 2
[ 401.432042][ T0] .next_balance : 4294.937296
[ 401.432103][ T0] .curr->pid : 0
[ 401.432195][ T0] .clock : 401423.022270
[ 401.432313][ T0] .clock_task : 401423.022270
[ 401.432415][ T0] .avg_idle : 1000000
[ 401.432488][ T0] .max_idle_balance_cost : 500000
[ 401.432817][ T0]
[ 401.433054][ T0] cfs_rq[0]:/
[ 401.433196][ T0] .exec_clock : 0.000000
[ 401.433386][ T0] .MIN_vruntime : 0.000001
[ 401.433503][ T0] .min_vruntime : 278.095255
[ 401.433596][ T0] .max_vruntime : 0.000001
[ 401.433691][ T0] .spread : 0.000000
[ 401.433784][ T0] .spread0 : 0.000000
[ 401.433886][ T0] .nr_spread_over : 0
[ 401.433954][ T0] .nr_running : 0
[ 401.434039][ T0] .load : 0
[ 401.434127][ T0] .load_avg : 0
[ 401.434235][ T0] .runnable_avg : 0
[ 401.434341][ T0] .util_avg : 0
[ 401.434451][ T0] .util_est_enqueued : 0
[ 401.434540][ T0] .removed.load_avg : 0
[ 401.434611][ T0] .removed.util_avg : 0
[ 401.434697][ T0] .removed.runnable_avg : 0
[ 401.434811][ T0] .tg_load_avg_contrib : 0
[ 401.434902][ T0] .tg_load_avg : 0
[ 401.435203][ T0]
[ 401.435308][ T0] rt_rq[0]:
[ 401.435394][ T0] .rt_nr_running : 0
[ 401.435481][ T0] .rt_nr_migratory : 0
[ 401.435569][ T0] .rt_throttled : 0
[ 401.435678][ T0] .rt_time : 0.000000
[ 401.435772][ T0] .rt_runtime : 950.000000
[ 401.435942][ T0]
[ 401.436017][ T0] dl_rq[0]:
[ 401.436116][ T0] .dl_nr_running : 0
[ 401.436212][ T0] .dl_nr_migratory : 0
[ 401.436301][ T0] .dl_bw->bw : 996147
[ 401.436386][ T0] .dl_bw->total_bw : 0
[ 401.436476][ T0]
[ 401.436560][ T0] runnable tasks:
[ 401.436614][ T0] S task PID tree-key switches prio wait-time sum-exec sum-sleep
[ 401.436687][ T0] -------------------------------------------------------------------------------------------------------------
[ 401.436875][ T0] D swapper/0 1 84.404220 26 120 0.000000 69.398526 0.000000 0 0 /
[ 401.437357][ T0] S kthreadd 2 80.816484 18 120 0.000000 24.915098 0.000000 0 0 /
[ 401.437554][ T0] I rcu_gp 3 26.218815 2 100 0.000000 1.771584 0.000000 0 0 /
[ 401.437698][ T0] I rcu_par_gp 4 28.434004 2 100 0.000000 0.138216 0.000000 0 0 /
[ 401.437853][ T0] I kworker/0:0 5 86.357041 8 120 0.000000 7.010072 0.000000 0 0 /
[ 401.438002][ T0] I kworker/0:0H 6 32.481348 2 100 0.000000 0.097112 0.000000 0 0 /
[ 401.438144][ T0] I kworker/u8:0 7 32.635000 2 120 0.000000 0.086604 0.000000 0 0 /
[ 401.438368][ T0] I mm_percpu_wq 8 34.185643 2 100 0.000000 0.118036 0.000000 0 0 /
[ 401.438544][ T0] S ksoftirqd/0 9 36.753489 3 120 0.000000 0.617720 0.000000 0 0 /
[ 401.438686][ T0] I rcu_sched 10 79.402224 7 120 0.000000 9.592868 0.000000 0 0 /
[ 401.438890][ T0] S migration/0 11 0.100901 98 0 0.000000 40.445210 0.000000 0 0 /
[ 401.439041][ T0] D kworker/0:1 12 83.770462 4 120 0.000000 4.564404 0.000000 0 0 /
[ 401.439230][ T0] S cpuhp/0 13 54.369911 3 120 0.000000 1.278230 0.000000 0 0 /
[ 401.439412][ T0] I kworker/0:2 19 278.095255 384 120 0.000000 187.691038 0.000000 0 0 /
[ 401.439939][ T0]
[ 401.440140][ T0] cpu#1
[ 401.440250][ T0] .nr_running : 0
[ 401.440331][ T0] .nr_switches : 196
[ 401.440434][ T0] .nr_uninterruptible : 0
[ 401.440500][ T0] .next_balance : 4294.937296
[ 401.440552][ T0] .curr->pid : 0
[ 401.440631][ T0] .clock : 401422.799786
[ 401.440689][ T0] .clock_task : 401422.799786
[ 401.440777][ T0] .avg_idle : 1000000
[ 401.440865][ T0] .max_idle_balance_cost : 500000
[ 401.440945][ T0]
[ 401.441027][ T0] rt_rq[1]:
[ 401.441076][ T0] .rt_nr_running : 0
[ 401.441127][ T0] .rt_nr_migratory : 0
[ 401.441197][ T0] .rt_throttled : 0
[ 401.441255][ T0] .rt_time : 0.000000
[ 401.441315][ T0] .rt_runtime : 950.000000
[ 401.441395][ T0]
[ 401.441445][ T0] dl_rq[1]:
[ 401.441497][ T0] .dl_nr_running : 0
[ 401.441555][ T0] .dl_nr_migratory : 0
[ 401.441609][ T0] .dl_bw->bw : 996147
[ 401.441665][ T0] .dl_bw->total_bw : 0
[ 401.441717][ T0]
[ 401.441755][ T0] runnable tasks:
[ 401.441817][ T0] S task PID tree-key switches prio wait-time sum-exec sum-sleep
[ 401.441888][ T0] -------------------------------------------------------------------------------------------------------------
[ 401.441995][ T0] S cpuhp/1 14 7.177520 3 120 0.000000 11.790932 0.000000 0 0 /
[ 401.442211][ T0] S migration/1 15 0.000000 98 0 0.000000 39.188082 0.000000 0 0 /
[ 401.442383][ T0] S ksoftirqd/1 16 0.312838 3 120 0.000000 3.826106 0.000000 0 0 /
[ 401.442615][ T0] I kworker/1:0 17 -3.720346 3 120 0.000000 0.592222 0.000000 0 0 /
[ 401.442879][ T0] I kworker/1:0H 18 -4.047847 3 100 0.000000 0.211754 0.000000 0 0 /
[ 401.443037][ T0]
[ 401.443407][ T0]
[ 401.443407][ T0] Showing all locks held in the system:
[ 401.443722][ T0] 2 locks held by swapper/0/1:
[ 401.443859][ T0] #0: c000000000f6be60 (cpu_add_remove_lock){....}-{3:3}, at: cpu_up+0xcc/0x140
[ 401.444836][ T0] #1: c000000000f6bdd0 (cpu_hotplug_lock){....}-{0:0}, at: _cpu_up+0x68/0x280
[ 401.445096][ T0] 5 locks held by kworker/0:1/12:
[ 401.445223][ T0] #0: c000000006070138 ((wq_completion)events){....}-{0:0}, at: process_one_work+0x278/0x530
[ 401.445408][ T0] #1: c0000000062efcc0 ((optimizing_work).work){....}-{0:0}, at: process_one_work+0x278/0x530
[ 401.445528][ T0] #2: c00000000107de60 (kprobe_mutex){....}-{3:3}, at: kprobe_optimizer+0x50/0x470
[ 401.445610][ T0] #3: c000000000f6bdd0 (cpu_hotplug_lock){....}-{0:0}, at: kprobe_optimizer+0x58/0x470
[ 401.445746][ T0] #4: c000000000f6d018 (text_mutex){....}-{3:3}, at: kprobe_optimizer+0x70/0x470
[ 401.445895][ T0]
[ 401.445934][ T0] =============================================
[ 401.445934][ T0]
[ 401.446043][ T0] Showing busy workqueues and worker pools:
[ 401.446139][ T0] workqueue events: flags=0x0
[ 401.446275][ T0] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
[ 401.446602][ T0] in-flight: 12:kprobe_optimizer
[ 401.447083][ T0] pool 0: cpus=0 node=0 flags=0x0 nice=0 hung=0s workers=3 idle: 19 5
^ permalink raw reply related
* Re: [PATCH 5/8] powerpc/64s/powernv: ratelimit harmless HMI error printing
From: Michael Ellerman @ 2020-12-02 13:00 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin, kvm-ppc, Mahesh Salgaonkar
In-Reply-To: <20201128070728.825934-6-npiggin@gmail.com>
Nicholas Piggin <npiggin@gmail.com> writes:
> Harmless HMI errors can be triggered by guests in some cases, and don't
> contain much useful information anyway. Ratelimit these to avoid
> flooding the console/logs.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/platforms/powernv/opal-hmi.c | 27 +++++++++++++----------
> 1 file changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
> index 3e1f064a18db..959da6df0227 100644
> --- a/arch/powerpc/platforms/powernv/opal-hmi.c
> +++ b/arch/powerpc/platforms/powernv/opal-hmi.c
> @@ -240,19 +240,22 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
> break;
> }
>
> - printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
> - level, sevstr,
> - hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
> - "Recovered" : "Not recovered");
> - error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
> - hmi_error_types[hmi_evt->type]
> - : "Unknown";
> - printk("%s Error detail: %s\n", level, error_info);
> - printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
> - if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
> - (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
> - printk("%s TFMR: %016llx\n", level,
> + if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || printk_ratelimit()) {
> + printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
> + level, sevstr,
> + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
> + "Recovered" : "Not recovered");
> + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
> + hmi_error_types[hmi_evt->type]
> + : "Unknown";
> + printk("%s Error detail: %s\n", level, error_info);
> + printk("%s HMER: %016llx\n", level,
> + be64_to_cpu(hmi_evt->hmer));
> + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
> + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
> + printk("%s TFMR: %016llx\n", level,
> be64_to_cpu(hmi_evt->tfmr));
> + }
Same comment RE printk_ratelimit(), I folded this in:
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 959da6df0227..f0c1830deb51 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -213,6 +213,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
"A hypervisor resource error occurred",
"CAPP recovery process is in progress",
};
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
/* Print things out */
if (hmi_evt->version < OpalHMIEvt_V1) {
@@ -240,7 +242,7 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
break;
}
- if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || printk_ratelimit()) {
+ if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) {
printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
level, sevstr,
hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
cheers
^ permalink raw reply related
* Re: [PATCH 4/8] KVM: PPC: Book3S HV: Ratelimit machine check messages coming from guests
From: Michael Ellerman @ 2020-12-02 12:58 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin, kvm-ppc, Mahesh Salgaonkar
In-Reply-To: <20201128070728.825934-5-npiggin@gmail.com>
Nicholas Piggin <npiggin@gmail.com> writes:
> A number of machine check exceptions are triggerable by the guest.
> Ratelimit these to avoid a guest flooding the host console and logs.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/kvm/book3s_hv.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index e3b1839fc251..c94f9595133d 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1328,8 +1328,12 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
> r = RESUME_GUEST;
> break;
> case BOOK3S_INTERRUPT_MACHINE_CHECK:
> - /* Print the MCE event to host console. */
> - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
> + /*
> + * Print the MCE event to host console. Ratelimit so the guest
> + * can't flood the host log.
> + */
> + if (printk_ratelimit())
> + machine_check_print_event_info(&vcpu->arch.mce_evt,false, true);
You're not supposed to use printk_ratelimit(), because there's a single
rate limit state for all printks. ie. some other noisty printk() can
cause this one to never be printed.
I folded this in:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cbbc4f0a26fe..cfaa91b27112 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1327,12 +1327,14 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_SYSTEM_RESET:
r = RESUME_GUEST;
break;
- case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ case BOOK3S_INTERRUPT_MACHINE_CHECK: {
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
/*
* Print the MCE event to host console. Ratelimit so the guest
* can't flood the host log.
*/
- if (printk_ratelimit())
+ if (__ratelimit(&rs))
machine_check_print_event_info(&vcpu->arch.mce_evt,false, true);
/*
@@ -1361,6 +1363,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
r = RESUME_HOST;
break;
+ }
case BOOK3S_INTERRUPT_PROGRAM:
{
ulong flags;
@@ -1520,12 +1523,16 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ {
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
/* Pass the machine check to the L1 guest */
r = RESUME_HOST;
/* Print the MCE event to host console. */
- if (printk_ratelimit())
+ if (__ratelimit(&rs))
machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
break;
+ }
/*
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
cheers
^ permalink raw reply related
* Re: [PATCH 6/8] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
From: Peter Zijlstra @ 2020-12-02 12:45 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Arnd Bergmann, x86, linux-kernel, linux-mm,
Mathieu Desnoyers, linuxppc-dev
In-Reply-To: <20201202111731.GA2414@hirez.programming.kicks-ass.net>
On Wed, Dec 02, 2020 at 12:17:31PM +0100, Peter Zijlstra wrote:
> So the obvious 'improvement' here would be something like:
>
> for_each_online_cpu(cpu) {
> p = rcu_dereference(cpu_rq(cpu)->curr;
> if (p->active_mm != mm)
> continue;
> __cpumask_set_cpu(cpu, tmpmask);
> }
> on_each_cpu_mask(tmpmask, ...);
>
> The remote CPU will never switch _to_ @mm, on account of it being quite
> dead, but it is quite prone to false negatives.
>
> Consider that __schedule() sets rq->curr *before* context_switch(), this
> means we'll see next->active_mm, even though prev->active_mm might still
> be our @mm.
>
> Now, because we'll be removing the atomic ops from context_switch()'s
> active_mm swizzling, I think we can change this to something like the
> below. The hope being that the cost of the new barrier can be offset by
> the loss of the atomics.
>
> Hmm ?
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 41404afb7f4c..2597c5c0ccb0 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4509,7 +4509,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
> if (!next->mm) { // to kernel
> enter_lazy_tlb(prev->active_mm, next);
>
> - next->active_mm = prev->active_mm;
> if (prev->mm) // from user
> mmgrab(prev->active_mm);
> else
> @@ -4524,6 +4523,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
> * case 'prev->active_mm == next->mm' through
> * finish_task_switch()'s mmdrop().
> */
> + next->active_mm = next->mm;
> switch_mm_irqs_off(prev->active_mm, next->mm, next);
I think that next->active_mm store should be after switch_mm(),
otherwise we still race.
>
> if (!prev->mm) { // from kernel
> @@ -5713,11 +5713,9 @@ static void __sched notrace __schedule(bool preempt)
>
> if (likely(prev != next)) {
> rq->nr_switches++;
> - /*
> - * RCU users of rcu_dereference(rq->curr) may not see
> - * changes to task_struct made by pick_next_task().
> - */
> - RCU_INIT_POINTER(rq->curr, next);
> +
> + next->active_mm = prev->active_mm;
> + rcu_assign_pointer(rq->curr, next);
> /*
> * The membarrier system call requires each architecture
> * to have a full memory barrier after updating
^ permalink raw reply
* [PATCH] drivers: char: tpm: remove unneeded MODULE_VERSION() usage
From: Enrico Weigelt, metux IT consult @ 2020-12-02 12:15 UTC (permalink / raw)
To: linux-kernel
Cc: jgg, jarkko, paulus, linux-integrity, linuxppc-dev, peterhuewe
Remove MODULE_VERSION(), as it isn't needed at all: the only version
making sense is the kernel version.
Link: https://lkml.org/lkml/2017/11/22/480
Signed-off-by: Enrico Weigelt <info@metux.net>
---
drivers/char/tpm/st33zp24/i2c.c | 1 -
drivers/char/tpm/st33zp24/spi.c | 1 -
drivers/char/tpm/st33zp24/st33zp24.c | 1 -
drivers/char/tpm/tpm-interface.c | 1 -
drivers/char/tpm/tpm_atmel.c | 1 -
drivers/char/tpm/tpm_crb.c | 1 -
drivers/char/tpm/tpm_i2c_infineon.c | 1 -
drivers/char/tpm/tpm_ibmvtpm.c | 1 -
drivers/char/tpm/tpm_infineon.c | 1 -
drivers/char/tpm/tpm_nsc.c | 1 -
drivers/char/tpm/tpm_tis.c | 1 -
drivers/char/tpm/tpm_tis_core.c | 1 -
drivers/char/tpm/tpm_vtpm_proxy.c | 1 -
13 files changed, 13 deletions(-)
diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c
index 7c617edff4ca..7ed9829cacc4 100644
--- a/drivers/char/tpm/st33zp24/i2c.c
+++ b/drivers/char/tpm/st33zp24/i2c.c
@@ -313,5 +313,4 @@ module_i2c_driver(st33zp24_i2c_driver);
MODULE_AUTHOR("TPM support (TPMsupport@list.st.com)");
MODULE_DESCRIPTION("STM TPM 1.2 I2C ST33 Driver");
-MODULE_VERSION("1.3.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c
index a75dafd39445..147efea4eb05 100644
--- a/drivers/char/tpm/st33zp24/spi.c
+++ b/drivers/char/tpm/st33zp24/spi.c
@@ -430,5 +430,4 @@ module_spi_driver(st33zp24_spi_driver);
MODULE_AUTHOR("TPM support (TPMsupport@list.st.com)");
MODULE_DESCRIPTION("STM TPM 1.2 SPI ST33 Driver");
-MODULE_VERSION("1.3.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index 4ec10ab5e576..e0f1a5828993 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -646,5 +646,4 @@ EXPORT_SYMBOL(st33zp24_pm_resume);
MODULE_AUTHOR("TPM support (TPMsupport@list.st.com)");
MODULE_DESCRIPTION("ST33ZP24 TPM 1.2 driver");
-MODULE_VERSION("1.3.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1621ce818705..dfdc68b8bf88 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -514,5 +514,4 @@ module_exit(tpm_exit);
MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 54a6750a6757..35bf249cc95a 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -231,5 +231,4 @@ module_exit(cleanup_atmel);
MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index a9dcf31eadd2..3e72b7b99cce 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -748,5 +748,4 @@ static struct acpi_driver crb_acpi_driver = {
module_acpi_driver(crb_acpi_driver);
MODULE_AUTHOR("Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>");
MODULE_DESCRIPTION("TPM2 Driver");
-MODULE_VERSION("0.1");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index a19d32cb4e94..8920b7c19fcb 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -731,5 +731,4 @@ static struct i2c_driver tpm_tis_i2c_driver = {
module_i2c_driver(tpm_tis_i2c_driver);
MODULE_AUTHOR("Peter Huewe <peter.huewe@infineon.com>");
MODULE_DESCRIPTION("TPM TIS I2C Infineon Driver");
-MODULE_VERSION("2.2.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 994385bf37c0..5b04d113f634 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -750,5 +750,4 @@ module_exit(ibmvtpm_module_exit);
MODULE_AUTHOR("adlai@us.ibm.com");
MODULE_DESCRIPTION("IBM vTPM Driver");
-MODULE_VERSION("1.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index 9c924a1440a9..8a58966c5c9b 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -621,5 +621,4 @@ module_pnp_driver(tpm_inf_pnp_driver);
MODULE_AUTHOR("Marcel Selhorst <tpmdd@sirrix.com>");
MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2");
-MODULE_VERSION("1.9.2");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 038701d48351..6ab2fe7e8782 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -412,5 +412,4 @@ module_exit(cleanup_nsc);
MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 4ed6e660273a..3074235b405d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -429,5 +429,4 @@ module_init(init_tis);
module_exit(cleanup_tis);
MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 92c51c6cfd1b..20f4b2c7ea52 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1164,5 +1164,4 @@ EXPORT_SYMBOL_GPL(tpm_tis_resume);
MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 91c772e38bb5..18f14162d1c1 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -729,5 +729,4 @@ module_exit(vtpm_module_exit);
MODULE_AUTHOR("Stefan Berger (stefanb@us.ibm.com)");
MODULE_DESCRIPTION("vTPM Driver");
-MODULE_VERSION("0.1");
MODULE_LICENSE("GPL");
--
2.11.0
^ permalink raw reply related
* Re: [PATCH 2/2] powerpc/ps3: make system bus's remove and shutdown callbacks return void
From: Takashi Iwai @ 2020-12-02 12:22 UTC (permalink / raw)
To: Michael Ellerman
Cc: alsa-devel, linux-fbdev, dri-devel, Jaroslav Kysela,
Paul Mackerras, linux-scsi, Alan Stern, Uwe Kleine-König,
Jakub Kicinski, Arnd Bergmann, Bartlomiej Zolnierkiewicz,
James E.J. Bottomley, linux-block, Jens Axboe, Martin K. Petersen,
Geoff Levand, Greg Kroah-Hartman, linux-usb, Takashi Iwai,
Jim Paris, netdev, linuxppc-dev, David S. Miller
In-Reply-To: <875z5kwgkx.fsf@mpe.ellerman.id.au>
On Wed, 02 Dec 2020 13:14:06 +0100,
Michael Ellerman wrote:
>
> Uwe Kleine-König <u.kleine-koenig@pengutronix.de> writes:
> > Hello Michael,
> >
> > On Sat, Nov 28, 2020 at 09:48:30AM +0100, Takashi Iwai wrote:
> >> On Thu, 26 Nov 2020 17:59:50 +0100,
> >> Uwe Kleine-König wrote:
> >> >
> >> > The driver core ignores the return value of struct device_driver::remove
> >> > because there is only little that can be done. For the shutdown callback
> >> > it's ps3_system_bus_shutdown() which ignores the return value.
> >> >
> >> > To simplify the quest to make struct device_driver::remove return void,
> >> > let struct ps3_system_bus_driver::remove return void, too. All users
> >> > already unconditionally return 0, this commit makes it obvious that
> >> > returning an error code is a bad idea and ensures future users behave
> >> > accordingly.
> >> >
> >> > Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> >>
> >> For the sound bit:
> >> Acked-by: Takashi Iwai <tiwai@suse.de>
> >
> > assuming that you are the one who will apply this patch: Note that it
> > depends on patch 1 that Takashi already applied to his tree. So you
> > either have to wait untils patch 1 appears in some tree that you merge
> > before applying, or you have to take patch 1, too. (With Takashi
> > optinally dropping it then.)
>
> Thanks. I've picked up both patches.
>
> If Takashi doesn't want to rebase his tree to drop patch 1 that's OK, it
> will just arrive in mainline via two paths, but git should handle it.
Yeah, I'd like to avoid rebasing, so let's get it merge from both
trees. git can handle such a case gracefully.
thanks,
Takashi
^ permalink raw reply
* Re: [PATCH 2/2] powerpc/ps3: make system bus's remove and shutdown callbacks return void
From: Michael Ellerman @ 2020-12-02 12:14 UTC (permalink / raw)
To: Uwe Kleine-König, Takashi Iwai
Cc: alsa-devel, linux-fbdev, dri-devel, Jaroslav Kysela,
Paul Mackerras, linux-scsi, Alan Stern, Jakub Kicinski,
Arnd Bergmann, Bartlomiej Zolnierkiewicz, James E.J. Bottomley,
linux-block, Jens Axboe, Martin K. Petersen, Geoff Levand,
Greg Kroah-Hartman, linux-usb, Takashi Iwai, Jim Paris, netdev,
linuxppc-dev, David S. Miller
In-Reply-To: <20201129173153.jbt3epcxnasbemir@pengutronix.de>
Uwe Kleine-König <u.kleine-koenig@pengutronix.de> writes:
> Hello Michael,
>
> On Sat, Nov 28, 2020 at 09:48:30AM +0100, Takashi Iwai wrote:
>> On Thu, 26 Nov 2020 17:59:50 +0100,
>> Uwe Kleine-König wrote:
>> >
>> > The driver core ignores the return value of struct device_driver::remove
>> > because there is only little that can be done. For the shutdown callback
>> > it's ps3_system_bus_shutdown() which ignores the return value.
>> >
>> > To simplify the quest to make struct device_driver::remove return void,
>> > let struct ps3_system_bus_driver::remove return void, too. All users
>> > already unconditionally return 0, this commit makes it obvious that
>> > returning an error code is a bad idea and ensures future users behave
>> > accordingly.
>> >
>> > Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
>>
>> For the sound bit:
>> Acked-by: Takashi Iwai <tiwai@suse.de>
>
> assuming that you are the one who will apply this patch: Note that it
> depends on patch 1 that Takashi already applied to his tree. So you
> either have to wait untils patch 1 appears in some tree that you merge
> before applying, or you have to take patch 1, too. (With Takashi
> optinally dropping it then.)
Thanks. I've picked up both patches.
If Takashi doesn't want to rebase his tree to drop patch 1 that's OK, it
will just arrive in mainline via two paths, but git should handle it.
cheers
^ permalink raw reply
* Re: [PATCH 00/13] ibmvfc: initial MQ development
From: Hannes Reinecke @ 2020-12-02 12:03 UTC (permalink / raw)
To: Tyrel Datwyler, james.bottomley
Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <20201126014824.123831-1-tyreld@linux.ibm.com>
On 11/26/20 2:48 AM, Tyrel Datwyler wrote:
> Recent updates in pHyp Firmware and VIOS releases provide new infrastructure
> towards enabling Subordinate Command Response Queues (Sub-CRQs) such that each
> Sub-CRQ is a channel backed by an actual hardware queue in the FC stack on the
> partner VIOS. Sub-CRQs are registered with the firmware via hypercalls and then
> negotiated with the VIOS via new Management Datagrams (MADs) for channel setup.
>
> This initial implementation adds the necessary Sub-CRQ framework and implements
> the new MADs for negotiating and assigning a set of Sub-CRQs to associated VIOS
> HW backed channels. The event pool and locking still leverages the legacy single
> queue implementation, and as such lock contention is problematic when increasing
> the number of queues. However, this initial work demonstrates a 1.2x factor
> increase in IOPs when configured with two HW queues despite lock contention.
>
Why do you still hold the hold lock during submission?
An initial check on the submission code path didn't reveal anything
obvious, so it _should_ be possible to drop the host lock there.
Or at least move it into the submission function itself to avoid lock
contention. Hmm?
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
^ permalink raw reply
* Re: [PATCH] EDAC, mv64x60: Fix error return code in mv64x60_pci_err_probe()
From: Borislav Petkov @ 2020-12-02 11:25 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: cj.chengjian, linux-kernel, Wang ShaoBo, james.morse,
huawei.libin, mchehab, linuxppc-dev, linux-edac
In-Reply-To: <20201124063009.1529-1-bobo.shaobowang@huawei.com>
On Tue, Nov 24, 2020 at 02:30:09PM +0800, Wang ShaoBo wrote:
> Fix to return -ENODEV error code when edac_pci_add_device() failed instaed
> of 0 in mv64x60_pci_err_probe(), as done elsewhere in this function.
>
> Fixes: 4f4aeeabc061 ("drivers-edac: add marvell mv64x60 driver")
> Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
> ---
> drivers/edac/mv64x60_edac.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
> index 3c68bb525d5d..456b9ca1fe8d 100644
> --- a/drivers/edac/mv64x60_edac.c
> +++ b/drivers/edac/mv64x60_edac.c
> @@ -168,6 +168,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
>
> if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
> edac_dbg(3, "failed edac_pci_add_device()\n");
> + res = -ENODEV;
> goto err;
> }
That driver depends on MV64X60 and I don't see anything in the tree
enabling it and I can't select it AFAICT:
config MV64X60
bool
select PPC_INDIRECT_PCI
select CHECK_CACHE_COHERENCY
PPC folks, what do we do here?
If not used anymore, I'd love to have one less EDAC driver.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply
* Re: [PATCH 6/8] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
From: Peter Zijlstra @ 2020-12-02 11:17 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Arnd Bergmann, x86, linux-kernel, linux-mm,
Mathieu Desnoyers, linuxppc-dev
In-Reply-To: <20201128160141.1003903-7-npiggin@gmail.com>
On Sun, Nov 29, 2020 at 02:01:39AM +1000, Nicholas Piggin wrote:
> +static void shoot_lazy_tlbs(struct mm_struct *mm)
> +{
> + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
> + /*
> + * IPI overheads have not found to be expensive, but they could
> + * be reduced in a number of possible ways, for example (in
> + * roughly increasing order of complexity):
> + * - A batch of mms requiring IPIs could be gathered and freed
> + * at once.
> + * - CPUs could store their active mm somewhere that can be
> + * remotely checked without a lock, to filter out
> + * false-positives in the cpumask.
> + * - After mm_users or mm_count reaches zero, switching away
> + * from the mm could clear mm_cpumask to reduce some IPIs
> + * (some batching or delaying would help).
> + * - A delayed freeing and RCU-like quiescing sequence based on
> + * mm switching to avoid IPIs completely.
> + */
> + on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
> + if (IS_ENABLED(CONFIG_DEBUG_VM))
> + on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
So the obvious 'improvement' here would be something like:
for_each_online_cpu(cpu) {
p = rcu_dereference(cpu_rq(cpu)->curr;
if (p->active_mm != mm)
continue;
__cpumask_set_cpu(cpu, tmpmask);
}
on_each_cpu_mask(tmpmask, ...);
The remote CPU will never switch _to_ @mm, on account of it being quite
dead, but it is quite prone to false negatives.
Consider that __schedule() sets rq->curr *before* context_switch(), this
means we'll see next->active_mm, even though prev->active_mm might still
be our @mm.
Now, because we'll be removing the atomic ops from context_switch()'s
active_mm swizzling, I think we can change this to something like the
below. The hope being that the cost of the new barrier can be offset by
the loss of the atomics.
Hmm ?
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41404afb7f4c..2597c5c0ccb0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4509,7 +4509,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
if (!next->mm) { // to kernel
enter_lazy_tlb(prev->active_mm, next);
- next->active_mm = prev->active_mm;
if (prev->mm) // from user
mmgrab(prev->active_mm);
else
@@ -4524,6 +4523,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* case 'prev->active_mm == next->mm' through
* finish_task_switch()'s mmdrop().
*/
+ next->active_mm = next->mm;
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
@@ -5713,11 +5713,9 @@ static void __sched notrace __schedule(bool preempt)
if (likely(prev != next)) {
rq->nr_switches++;
- /*
- * RCU users of rcu_dereference(rq->curr) may not see
- * changes to task_struct made by pick_next_task().
- */
- RCU_INIT_POINTER(rq->curr, next);
+
+ next->active_mm = prev->active_mm;
+ rcu_assign_pointer(rq->curr, next);
/*
* The membarrier system call requires each architecture
* to have a full memory barrier after updating
^ permalink raw reply related
* Re: CONFIG_PPC_VAS depends on 64k pages...?
From: Will Springer @ 2020-12-02 7:17 UTC (permalink / raw)
To: Sukadev Bhattiprolu, Bulent Abali
Cc: Tulio Magno Quites Machado Filho, daniel, haren, linuxppc-dev,
Raphael M Zinsly
In-Reply-To: <OF66F86744.2ADAED9E-ON85258631.0047CAC6-85258631.0048F494@notes.na.collabserv.com>
On Tuesday, December 1, 2020 5:16:51 AM PST Bulent Abali wrote:
> I don't know anything about VAS page size requirements in the kernel. I
> checked the user compression library and saw that we do a sysconf to
> get the page size; so the library should be immune to page size by
> design. But it wouldn't surprise me if a 64KB constant is inadvertently
> hardcoded somewhere else in the library. Giving heads up to Tulio and
> Raphael who are owners of the github repo.
>
> https://github.com/libnxz/power-gzip/blob/master/lib/nx_zlib.c#L922
>
> If we got this wrong in the library it might manifest itself as an error
> message of the sort "excessive page faults". The library must touch
> pages ahead to make them present in the memory; occasional page faults
> is acceptable. It will retry.
Hm, good to know. As I said I haven't noticed any problems so far, over a
few different days of testing. My change is now in the Void Linux kernel
package, and is working for others as well (including the Void maintainer
Daniel/q66 who I CC'd initially).
>
> Bulent
>
>
>
>
> From: "Sukadev Bhattiprolu" <sukadev@linux.ibm.com>
> To: "Christophe Leroy" <christophe.leroy@csgroup.eu>
> Cc: "Will Springer" <skirmisher@protonmail.com>,
> linuxppc-dev@lists.ozlabs.org, daniel@octaforge.org, Bulent
> Abali/Watson/IBM@IBM, haren@linux.ibm.com Date: 12/01/2020 12:53
> AM
> Subject: Re: CONFIG_PPC_VAS depends on 64k pages...?
>
> Christophe Leroy [christophe.leroy@csgroup.eu] wrote:
> > Hi,
> >
> > Le 19/11/2020 à 11:58, Will Springer a écrit :
> > > I learned about the POWER9 gzip accelerator a few months ago when
> > > the
> > > support hit upstream Linux 5.8. However, for some reason the Kconfig
> > > dictates that VAS depends on a 64k page size, which is problematic
> > > as I
> > > run Void Linux, which uses a 4k-page kernel.
> > >
> > > Some early poking by others indicated there wasn't an obvious page
> > > size
> > > dependency in the code, and suggested I try modifying the config to
> > > switch it on. I did so, but was stopped by a minor complaint of an
> > > "unexpected DT configuration" by the VAS code. I wasn't equipped to
> > > figure out exactly what this meant, even after finding the
> > > offending condition, so after writing a very drawn-out forum post
> > > asking for help, I dropped the subject.
> > >
> > > Fast forward to today, when I was reminded of the whole thing again,
> > > and decided to debug a bit further. Apparently the VAS platform
> > > device (derived from the DT node) has 5 resources on my 4k kernel,
> > > instead of 4 (which evidently works for others who have had success
> > > on 64k kernels). I have no idea what this means in practice (I
> > > don't know how to introspect it), but after making a tiny patch[1],
> > > everything came up smoothly and I was doing blazing-fast gzip
> > > (de)compression in no time.
> > >
> > > Everything seems to work fine on 4k pages. So, what's up? Are there
> > > pitfalls lurking around that I've yet to stumble over? More
> > > reasonably,
> > > I'm curious as to why the feature supposedly depends on 64k pages,
> > > or if there's anything else I should be concerned about.
>
> Will,
>
> The reason I put in that config check is because we were only able to
> test 64K pages at that point.
>
> It is interesting that it is working for you. Following code in skiboot
> https://github.com/open-power/skiboot/blob/master/hw/vas.cshould
> restrict it to 64K pages. IIRC there is also a corresponding change in
> some NX registers that should also be configured to allow 4K pages.
Huh, that is interesting indeed. As far as the kernel code, the only thing
specific to 64k pages I could find was in [1], where
VAS_XLATE_LPCR_PAGE_SIZE is set. There is also NX_PAGE_SIZE in drivers/
crypto/nx/nx.h, which is set to 4096, but I don't know if that's related to
kernel page size at all. Without a better idea of the code base, I didn't
examine more thoroughly.
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/powerpc/platforms/powernv/vas-window.c#n293
> static int init_north_ctl(struct proc_chip *chip)
> {
> uint64_t val = 0ULL;
>
> val = SETFIELD(VAS_64K_MODE_MASK, val,
> true); val = SETFIELD(VAS_ACCEPT_PASTE_MASK, val, true); val =
> SETFIELD(VAS_ENABLE_WC_MMIO_BAR, val, true); val =
> SETFIELD(VAS_ENABLE_UWC_MMIO_BAR, val, true); val =
> SETFIELD(VAS_ENABLE_RMA_MMIO_BAR, val, true);
>
> return vas_scom_write(chip,
> VAS_MISC_N_CTL, val); }
>
> I am copying Bulent Albali and Haren Myneni who have been working with
> VAS/NX for their thoughts/experience.
Thanks for this and for your input, by the way.
>
> > Maybe ask Sukadev who did the implementation and is maintaining it ?
> >
> > > I do have to say I'm quite satisfied with the results of the NX
> > > accelerator, though. Being able to shuffle data to a RaptorCS box
> > > over gigE and get compressed data back faster than most software
> > > gzip could ever hope to achieve is no small feat, let alone the
> > > instantaneous results locally.> >
> > > :)
> > >
> > > Cheers,
> > > Will Springer [she/her]
> > >
> > > [1]:
> > > https://github.com/Skirmisher/void-packages/blob/vas-4k-pages/srcpkgs/linux5.9/patches/ppc-vas-on-4k.patch
> > Christophe
Will [she/her]
^ permalink raw reply
* Re: [RFC PATCH] powerpc: show registers when unwinding interrupt frames
From: Michael Ellerman @ 2020-12-02 6:03 UTC (permalink / raw)
To: Christophe Leroy, Nicholas Piggin, linuxppc-dev
In-Reply-To: <3d0fbd5d-6332-fe01-a9e3-e8f204705979@csgroup.eu>
Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 07/11/2020 à 03:33, Nicholas Piggin a écrit :
>> It's often useful to know the register state for interrupts in
>> the stack frame. In the below example (with this patch applied),
>> the important information is the state of the page fault.
>>
>> A blatant case like this probably rather should have the page
>> fault regs passed down to the warning, but quite often there are
>> less obvious cases where an interrupt shows up that might give
>> some more clues.
>>
>> The downside is longer and more complex bug output.
>
> Do we want all interrupts, including system call ?
I think we do.
> I don't find the dump of the syscall interrupt so usefull, do you ?
Yes :)
Because it's consistent, ie. we always show the full chain back to
userspace.
I think it's also helpful for folks who are less familiar with how
things work to show all the pieces, rather than hiding syscalls or
treating them specially.
Also I'm pretty sure I've had occasions where I've been debugging and
wanted to see the values that came in from userspace.
cheers
> See below an (unexpected?) KUAP warning due to an expected NULL pointer dereference in
> copy_from_kernel_nofault() called from kthread_probe_data()
>
>
> [ 1117.202054] ------------[ cut here ]------------
> [ 1117.202102] Bug: fault blocked by AP register !
> [ 1117.202261] WARNING: CPU: 0 PID: 377 at arch/powerpc/include/asm/nohash/32/kup-8xx.h:66
> do_page_fault+0x4a8/0x5ec
> [ 1117.202310] Modules linked in:
> [ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: G W
> 5.10.0-rc5-s3k-dev-01340-g83f53be2de31-dirty #4175
> [ 1117.202499] NIP: c0012048 LR: c0012048 CTR: 00000000
> [ 1117.202573] REGS: cacdbb88 TRAP: 0700 Tainted: G W
> (5.10.0-rc5-s3k-dev-01340-g83f53be2de31-dirty)
> [ 1117.202625] MSR: 00021032 <ME,IR,DR,RI> CR: 24082222 XER: 20000000
> [ 1117.202899]
> [ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 00000023 c092e554 00000001 c09865e8 c092e640
> [ 1117.202899] GPR08: 00001032 00000000 00000000 00014efc 28082224 100d166a 100a0920 00000000
> [ 1117.202899] GPR16: 100cac0c 100b0000 1080c3fc 1080d685 100d0000 100d0000 00000000 100a0900
> [ 1117.202899] GPR24: 100d0000 c07892ec 00000000 c0921510 c21f4440 0000005c c0000000 cacdbc80
> [ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec
> [ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec
> [ 1117.204509] Call Trace:
> [ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable)
> [ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34
> [ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0
> [ 1117.204979] NIP: c010dbec LR: c010dbac CTR: 00000001
> [ 1117.205053] REGS: cacdbc80 TRAP: 0301 Tainted: G W
> (5.10.0-rc5-s3k-dev-01340-g83f53be2de31-dirty)
> [ 1117.205104] MSR: 00009032 <EE,ME,IR,DR,RI> CR: 28082224 XER: 00000000
> [ 1117.205416] DAR: 0000005c DSISR: c0000000
> [ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 00000001 00000017 00000017 00000027 0000000f
> [ 1117.205416] GPR08: c09926ec 00000000 00000000 3ffff000 24082224
> [ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0
> [ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0
> [ 1117.206258] --- interrupt: 301
> [ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 (unreliable)
> [ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194
> [ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168
> [ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100
> [ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24
> [ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0
> [ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74
> [ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114
> [ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478
> [ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128
> [ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34
> [ 1117.207938] --- interrupt: c01 at 0xfd4e784
> [ 1117.208008] NIP: 0fd4e784 LR: 0fe0f244 CTR: 10048d38
> [ 1117.208083] REGS: cacdbf48 TRAP: 0c01 Tainted: G W
> (5.10.0-rc5-s3k-dev-01340-g83f53be2de31-dirty)
> [ 1117.208134] MSR: 0000d032 <EE,PR,ME,IR,DR,RI> CR: 44002222 XER: 00000000
> [ 1117.208470]
> [ 1117.208470] GPR00: 00000004 7fc34090 77bfb4e0 00000001 1080fa40 00000002 7400000f fefefeff
> [ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 00000000
> [ 1117.209104] NIP [0fd4e784] 0xfd4e784
> [ 1117.209180] LR [0fe0f244] 0xfe0f244
> [ 1117.209236] --- interrupt: c01
> [ 1117.209274] Instruction dump:
> [ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 73c90040 4082ff60
> [ 1117.209727] 0fe00000 3c60c082 386399f4 48013b65 <0fe00000> 80010034 3860000b 7c0803a6
> [ 1117.210102] ---[ end trace 1927c0323393af3e ]---
>
> Christophe
>
>
>>
>> Bug: Write fault blocked by AMR!
>> WARNING: CPU: 0 PID: 72 at arch/powerpc/include/asm/book3s/64/kup-radix.h:164 __do_page_fault+0x880/0xa90
>> Modules linked in:
>> CPU: 0 PID: 72 Comm: systemd-gpt-aut Not tainted
>> NIP: c00000000006e2f0 LR: c00000000006e2ec CTR: 0000000000000000
>> REGS: c00000000a4f3420 TRAP: 0700
>> MSR: 8000000000021033 <SF,ME,IR,DR,RI,LE> CR: 28002840 XER: 20040000
>> CFAR: c000000000128be0 IRQMASK: 3
>> GPR00: c00000000006e2ec c00000000a4f36c0 c0000000014f0700 0000000000000020
>> GPR04: 0000000000000001 c000000001290f50 0000000000000001 c000000001290f80
>> GPR08: c000000001612b08 0000000000000000 0000000000000000 00000000ffffe0f7
>> GPR12: 0000000048002840 c0000000016e0000 c00c000000021c80 c000000000fd6f60
>> GPR16: 0000000000000000 c00000000a104698 0000000000000003 c0000000087f0000
>> GPR20: 0000000000000100 c0000000070330b8 0000000000000000 0000000000000004
>> GPR24: 0000000002000000 0000000000000300 0000000002000000 c00000000a5b0c00
>> GPR28: 0000000000000000 000000000a000000 00007fffb2a90038 c00000000a4f3820
>> NIP [c00000000006e2f0] __do_page_fault+0x880/0xa90
>> LR [c00000000006e2ec] __do_page_fault+0x87c/0xa90
>> Call Trace:
>> [c00000000a4f36c0] [c00000000006e2ec] __do_page_fault+0x87c/0xa90 (unreliable)
>> [c00000000a4f3780] [c000000000e1c034] do_page_fault+0x34/0x90
>> [c00000000a4f37b0] [c000000000008908] data_access_common_virt+0x158/0x1b0
>> --- interrupt: 300 at __copy_tofrom_user_base+0x9c/0x5a4
>> NIP: c00000000009b028 LR: c000000000802978 CTR: 0000000000000800
>> REGS: c00000000a4f3820 TRAP: 0300
>> MSR: 800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24004840 XER: 00000000
>> CFAR: c00000000009aff4 DAR: 00007fffb2a90038 DSISR: 0a000000 IRQMASK: 0
>> GPR00: 0000000000000000 c00000000a4f3ac0 c0000000014f0700 00007fffb2a90028
>> GPR04: c000000008720010 0000000000010000 0000000000000000 0000000000000000
>> GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000001
>> GPR12: 0000000000004000 c0000000016e0000 c00c000000021c80 c000000000fd6f60
>> GPR16: 0000000000000000 c00000000a104698 0000000000000003 c0000000087f0000
>> GPR20: 0000000000000100 c0000000070330b8 0000000000000000 0000000000000004
>> GPR24: c00000000a4f3c80 c000000008720000 0000000000010000 0000000000000000
>> GPR28: 0000000000010000 0000000008720000 0000000000010000 c000000001515b98
>> NIP [c00000000009b028] __copy_tofrom_user_base+0x9c/0x5a4
>> LR [c000000000802978] copyout+0x68/0xc0
>> --- interrupt: 300
>> [c00000000a4f3af0] [c0000000008074b8] copy_page_to_iter+0x188/0x540
>> [c00000000a4f3b50] [c00000000035c678] generic_file_buffered_read+0x358/0xd80
>> [c00000000a4f3c40] [c0000000004c1e90] blkdev_read_iter+0x50/0x80
>> [c00000000a4f3c60] [c00000000045733c] new_sync_read+0x12c/0x1c0
>> [c00000000a4f3d00] [c00000000045a1f0] vfs_read+0x1d0/0x240
>> [c00000000a4f3d50] [c00000000045a7f4] ksys_read+0x84/0x140
>> [c00000000a4f3da0] [c000000000033a60] system_call_exception+0x100/0x280
>> [c00000000a4f3e10] [c00000000000c508] system_call_common+0xf8/0x2f8
>> Instruction dump:
>> eae10078 3be0000b 4bfff890 60420000 792917e1 4182ff18 3c82ffab 3884a5e0
>> 3c62ffab 3863a6e8 480ba891 60000000 <0fe00000> 3be0000b 4bfff860 e93c0938
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> arch/powerpc/kernel/process.c | 20 ++++++++++++++------
>> 1 file changed, 14 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
>> index ea36a29c8b01..799f00b32f74 100644
>> --- a/arch/powerpc/kernel/process.c
>> +++ b/arch/powerpc/kernel/process.c
>> @@ -1475,12 +1475,10 @@ static void print_msr_bits(unsigned long val)
>> #define LAST_VOLATILE 12
>> #endif
>>
>> -void show_regs(struct pt_regs * regs)
>> +static void __show_regs(struct pt_regs *regs)
>> {
>> int i, trap;
>>
>> - show_regs_print_info(KERN_DEFAULT);
>> -
>> printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
>> regs->nip, regs->link, regs->ctr);
>> printk("REGS: %px TRAP: %04lx %s (%s)\n",
>> @@ -1522,6 +1520,12 @@ void show_regs(struct pt_regs * regs)
>> printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
>> printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
>> }
>> +}
>> +
>> +void show_regs(struct pt_regs *regs)
>> +{
>> + show_regs_print_info(KERN_DEFAULT);
>> + __show_regs(regs);
>> show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
>> if (!user_mode(regs))
>> show_instructions(regs);
>> @@ -2192,10 +2196,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack,
>> && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
>> struct pt_regs *regs = (struct pt_regs *)
>> (sp + STACK_FRAME_OVERHEAD);
>> +
>> lr = regs->link;
>> - printk("%s--- interrupt: %lx at %pS\n LR = %pS\n",
>> - loglvl, regs->trap,
>> - (void *)regs->nip, (void *)lr);
>> + printk("%s--- interrupt: %lx at %pS\n",
>> + loglvl, regs->trap, (void *)regs->nip);
>> + __show_regs(regs);
>> + printk("%s--- interrupt: %lx\n",
>> + loglvl, regs->trap);
>> +
>> firstframe = 1;
>> }
>>
>>
^ permalink raw reply
* [powerpc:next-test] BUILD SUCCESS 72e886545963b33dd5e1d92ee9c77dadb51adc4e
From: kernel test robot @ 2020-12-02 5:40 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next-test
branch HEAD: 72e886545963b33dd5e1d92ee9c77dadb51adc4e powerpc/pseries: Define PCI bus speed for Gen4 and Gen5
elapsed time: 724m
configs tested: 147
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
arm clps711x_defconfig
sh edosk7760_defconfig
powerpc cell_defconfig
mips ath79_defconfig
openrisc alldefconfig
sh j2_defconfig
mips capcella_defconfig
arm viper_defconfig
c6x evmc6474_defconfig
arm pxa3xx_defconfig
c6x evmc6457_defconfig
m68k q40_defconfig
arc nsim_700_defconfig
arc haps_hs_defconfig
riscv nommu_virt_defconfig
mips tb0226_defconfig
arm dove_defconfig
m68k m5272c3_defconfig
mips malta_kvm_guest_defconfig
sh ecovec24-romimage_defconfig
powerpc wii_defconfig
arm integrator_defconfig
s390 zfcpdump_defconfig
xtensa generic_kc705_defconfig
ia64 defconfig
powerpc klondike_defconfig
mips loongson1c_defconfig
arc nsimosci_defconfig
mips gcw0_defconfig
xtensa virt_defconfig
c6x evmc6678_defconfig
sh shx3_defconfig
mips maltasmvp_eva_defconfig
powerpc tqm5200_defconfig
arc nsimosci_hs_smp_defconfig
s390 defconfig
sh ap325rxa_defconfig
m68k m5475evb_defconfig
c6x defconfig
powerpc ep8248e_defconfig
arm pcm027_defconfig
mips ip22_defconfig
ia64 generic_defconfig
sh dreamcast_defconfig
arm mps2_defconfig
arm s3c6400_defconfig
powerpc rainier_defconfig
powerpc taishan_defconfig
powerpc eiger_defconfig
powerpc fsp2_defconfig
powerpc ppc40x_defconfig
h8300 defconfig
powerpc ppc44x_defconfig
arm alldefconfig
arm moxart_defconfig
powerpc amigaone_defconfig
mips maltaup_defconfig
arc alldefconfig
microblaze mmu_defconfig
parisc alldefconfig
arm h3600_defconfig
mips jazz_defconfig
arm davinci_all_defconfig
powerpc mpc834x_itx_defconfig
powerpc mvme5100_defconfig
sh apsh4ad0a_defconfig
sh sh7785lcr_32bit_defconfig
arm imote2_defconfig
mips fuloong2e_defconfig
sparc sparc64_defconfig
arm vexpress_defconfig
mips xway_defconfig
xtensa audio_kc705_defconfig
powerpc canyonlands_defconfig
sh hp6xx_defconfig
powerpc skiroot_defconfig
ia64 allmodconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
nios2 defconfig
arc allyesconfig
nds32 allnoconfig
c6x allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
i386 allyesconfig
sparc allyesconfig
sparc defconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a004-20201201
i386 randconfig-a005-20201201
i386 randconfig-a001-20201201
i386 randconfig-a002-20201201
i386 randconfig-a006-20201201
i386 randconfig-a003-20201201
x86_64 randconfig-a016-20201201
x86_64 randconfig-a012-20201201
x86_64 randconfig-a014-20201201
x86_64 randconfig-a013-20201201
x86_64 randconfig-a015-20201201
x86_64 randconfig-a011-20201201
i386 randconfig-a014-20201201
i386 randconfig-a013-20201201
i386 randconfig-a011-20201201
i386 randconfig-a015-20201201
i386 randconfig-a012-20201201
i386 randconfig-a016-20201201
riscv nommu_k210_defconfig
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv rv32_defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 kexec
clang tested configs:
x86_64 randconfig-a004-20201201
x86_64 randconfig-a006-20201201
x86_64 randconfig-a001-20201201
x86_64 randconfig-a002-20201201
x86_64 randconfig-a005-20201201
x86_64 randconfig-a003-20201201
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:fixes-test] BUILD SUCCESS f54db39fbe40731c40aefdd3bc26e7d56d668c64
From: kernel test robot @ 2020-12-02 5:40 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: f54db39fbe40731c40aefdd3bc26e7d56d668c64 KVM: PPC: Book3S HV: XIVE: Fix vCPU id sanity check
elapsed time: 724m
configs tested: 157
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
arm clps711x_defconfig
sh edosk7760_defconfig
powerpc cell_defconfig
mips ath79_defconfig
openrisc alldefconfig
sh j2_defconfig
mips capcella_defconfig
arm viper_defconfig
c6x evmc6474_defconfig
arm pxa3xx_defconfig
c6x evmc6457_defconfig
m68k q40_defconfig
arc nsim_700_defconfig
riscv nommu_k210_defconfig
arc haps_hs_defconfig
riscv nommu_virt_defconfig
mips tb0226_defconfig
arm dove_defconfig
m68k m5272c3_defconfig
sh se7705_defconfig
m68k apollo_defconfig
powerpc mpc834x_itxgp_defconfig
arm realview_defconfig
microblaze defconfig
mips malta_kvm_guest_defconfig
sh ecovec24-romimage_defconfig
powerpc wii_defconfig
arm integrator_defconfig
s390 zfcpdump_defconfig
xtensa generic_kc705_defconfig
powerpc klondike_defconfig
mips loongson1c_defconfig
arc nsimosci_defconfig
ia64 defconfig
mips gcw0_defconfig
xtensa virt_defconfig
c6x evmc6678_defconfig
sh shx3_defconfig
mips maltasmvp_eva_defconfig
powerpc tqm5200_defconfig
arc nsimosci_hs_smp_defconfig
s390 defconfig
sh ap325rxa_defconfig
m68k m5475evb_defconfig
c6x defconfig
powerpc ep8248e_defconfig
arm pcm027_defconfig
mips ip22_defconfig
sh dreamcast_defconfig
arm mps2_defconfig
sparc defconfig
ia64 generic_defconfig
arm s3c6400_defconfig
powerpc rainier_defconfig
powerpc taishan_defconfig
powerpc eiger_defconfig
powerpc fsp2_defconfig
powerpc ppc40x_defconfig
h8300 defconfig
powerpc ppc44x_defconfig
arm alldefconfig
arm moxart_defconfig
powerpc amigaone_defconfig
mips maltaup_defconfig
arc alldefconfig
microblaze mmu_defconfig
parisc alldefconfig
arm h3600_defconfig
mips jazz_defconfig
arm davinci_all_defconfig
powerpc mpc834x_itx_defconfig
powerpc mvme5100_defconfig
sh apsh4ad0a_defconfig
sh sh7785lcr_32bit_defconfig
arm imote2_defconfig
mips fuloong2e_defconfig
sparc sparc64_defconfig
arm vexpress_defconfig
powerpc pasemi_defconfig
powerpc storcenter_defconfig
parisc generic-32bit_defconfig
mips maltaup_xpa_defconfig
mips e55_defconfig
mips xway_defconfig
xtensa audio_kc705_defconfig
powerpc canyonlands_defconfig
sh hp6xx_defconfig
powerpc skiroot_defconfig
ia64 allmodconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
arc allyesconfig
nds32 allnoconfig
c6x allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
i386 allyesconfig
sparc allyesconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a004-20201201
i386 randconfig-a005-20201201
i386 randconfig-a001-20201201
i386 randconfig-a002-20201201
i386 randconfig-a006-20201201
i386 randconfig-a003-20201201
x86_64 randconfig-a016-20201201
x86_64 randconfig-a012-20201201
x86_64 randconfig-a014-20201201
x86_64 randconfig-a013-20201201
x86_64 randconfig-a015-20201201
x86_64 randconfig-a011-20201201
i386 randconfig-a014-20201201
i386 randconfig-a013-20201201
i386 randconfig-a011-20201201
i386 randconfig-a015-20201201
i386 randconfig-a012-20201201
i386 randconfig-a016-20201201
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv rv32_defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 kexec
clang tested configs:
x86_64 randconfig-a004-20201201
x86_64 randconfig-a006-20201201
x86_64 randconfig-a001-20201201
x86_64 randconfig-a002-20201201
x86_64 randconfig-a005-20201201
x86_64 randconfig-a003-20201201
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [PATCH v2 3/4] powerpc: Reintroduce is_kvm_guest in a new avatar
From: Srikar Dronamraju @ 2020-12-02 5:04 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Phil Auld, Srikar Dronamraju,
Juri Lelli, Peter Zijlstra, LKML, Nicholas Piggin, Waiman Long,
linuxppc-dev, Valentin Schneider
In-Reply-To: <20201202050456.164005-1-srikar@linux.vnet.ibm.com>
Introduce a static branch that would be set during boot if the OS
happens to be a KVM guest. Subsequent checks to see if we are on KVM
will rely on this static branch. This static branch would be used in
vcpu_is_preempted in a subsequent patch.
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Phil Auld <pauld@redhat.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/kvm_guest.h | 10 ++++++++++
arch/powerpc/include/asm/kvm_para.h | 2 +-
arch/powerpc/kernel/firmware.c | 2 ++
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
index ba8291e02ba9..627ba272e781 100644
--- a/arch/powerpc/include/asm/kvm_guest.h
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -7,8 +7,18 @@
#define __POWERPC_KVM_GUEST_H__
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(kvm_guest);
+
+static inline bool is_kvm_guest(void)
+{
+ return static_branch_unlikely(&kvm_guest);
+}
+
bool check_kvm_guest(void);
#else
+static inline bool is_kvm_guest(void) { return false; }
static inline bool check_kvm_guest(void) { return false; }
#endif
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 6fba06b6cfdb..abe1b5e82547 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -14,7 +14,7 @@
static inline int kvm_para_available(void)
{
- return IS_ENABLED(CONFIG_KVM_GUEST) && check_kvm_guest();
+ return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
}
static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index 0aeb6a5b1a9e..28498fc573f2 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -22,6 +22,7 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
#endif
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+DEFINE_STATIC_KEY_FALSE(kvm_guest);
bool check_kvm_guest(void)
{
struct device_node *hyper_node;
@@ -33,6 +34,7 @@ bool check_kvm_guest(void)
if (!of_device_is_compatible(hyper_node, "linux,kvm"))
return 0;
+ static_branch_enable(&kvm_guest);
return 1;
}
#endif
--
2.18.4
^ permalink raw reply related
* [PATCH v2 1/4] powerpc: Refactor is_kvm_guest declaration to new header
From: Srikar Dronamraju @ 2020-12-02 5:04 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Phil Auld, Srikar Dronamraju,
Juri Lelli, Peter Zijlstra, LKML, Nicholas Piggin, Waiman Long,
linuxppc-dev, Valentin Schneider
In-Reply-To: <20201202050456.164005-1-srikar@linux.vnet.ibm.com>
Only code/declaration movement, in anticipation of doing a kvm-aware
vcpu_is_preempted. No additional changes.
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Phil Auld <pauld@redhat.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
Changelog:
v1->v2:
v1: https://lore.kernel.org/linuxppc-dev/20201028123512.871051-1-srikar@linux.vnet.ibm.com/t/#u
- Moved a hunk to fix a no previous prototype warning reported by: lkp@intel.com
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org/thread/C6PTRPHWMC7VV4OTYN3ISYKDHTDQS6YI/
arch/powerpc/include/asm/firmware.h | 6 ------
arch/powerpc/include/asm/kvm_guest.h | 15 +++++++++++++++
arch/powerpc/include/asm/kvm_para.h | 2 +-
arch/powerpc/kernel/firmware.c | 1 +
arch/powerpc/platforms/pseries/smp.c | 1 +
5 files changed, 18 insertions(+), 7 deletions(-)
create mode 100644 arch/powerpc/include/asm/kvm_guest.h
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 0b295bdb201e..aa6a5ef5d483 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -134,12 +134,6 @@ extern int ibm_nmi_interlock_token;
extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
-bool is_kvm_guest(void);
-#else
-static inline bool is_kvm_guest(void) { return false; }
-#endif
-
#ifdef CONFIG_PPC_PSERIES
void pseries_probe_fw_features(void);
#else
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
new file mode 100644
index 000000000000..c0ace884a0e8
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+#ifndef __POWERPC_KVM_GUEST_H__
+#define __POWERPC_KVM_GUEST_H__
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+bool is_kvm_guest(void);
+#else
+static inline bool is_kvm_guest(void) { return false; }
+#endif
+
+#endif /* __POWERPC_KVM_GUEST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 744612054c94..abe1b5e82547 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -8,7 +8,7 @@
#ifndef __POWERPC_KVM_PARA_H__
#define __POWERPC_KVM_PARA_H__
-#include <asm/firmware.h>
+#include <asm/kvm_guest.h>
#include <uapi/asm/kvm_para.h>
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index fe48d319d490..5f48e5ad24cd 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -14,6 +14,7 @@
#include <linux/of.h>
#include <asm/firmware.h>
+#include <asm/kvm_guest.h>
#ifdef CONFIG_PPC64
unsigned long powerpc_firmware_features __read_mostly;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 92922491a81c..d578732c545d 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -42,6 +42,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/code-patching.h>
#include <asm/svm.h>
+#include <asm/kvm_guest.h>
#include "pseries.h"
--
2.18.4
^ permalink raw reply related
* [PATCH v2 2/4] powerpc: Rename is_kvm_guest to check_kvm_guest
From: Srikar Dronamraju @ 2020-12-02 5:04 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Phil Auld, Srikar Dronamraju,
Juri Lelli, Peter Zijlstra, LKML, Nicholas Piggin, Waiman Long,
linuxppc-dev, Valentin Schneider
In-Reply-To: <20201202050456.164005-1-srikar@linux.vnet.ibm.com>
is_kvm_guest() will be reused in subsequent patch in a new avatar. Hence
rename is_kvm_guest to check_kvm_guest. No additional changes.
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Phil Auld <pauld@redhat.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/kvm_guest.h | 4 ++--
arch/powerpc/include/asm/kvm_para.h | 2 +-
arch/powerpc/kernel/firmware.c | 2 +-
arch/powerpc/platforms/pseries/smp.c | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
index c0ace884a0e8..ba8291e02ba9 100644
--- a/arch/powerpc/include/asm/kvm_guest.h
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -7,9 +7,9 @@
#define __POWERPC_KVM_GUEST_H__
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
-bool is_kvm_guest(void);
+bool check_kvm_guest(void);
#else
-static inline bool is_kvm_guest(void) { return false; }
+static inline bool check_kvm_guest(void) { return false; }
#endif
#endif /* __POWERPC_KVM_GUEST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index abe1b5e82547..6fba06b6cfdb 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -14,7 +14,7 @@
static inline int kvm_para_available(void)
{
- return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
+ return IS_ENABLED(CONFIG_KVM_GUEST) && check_kvm_guest();
}
static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index 5f48e5ad24cd..0aeb6a5b1a9e 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -22,7 +22,7 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
#endif
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
-bool is_kvm_guest(void)
+bool check_kvm_guest(void)
{
struct device_node *hyper_node;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index d578732c545d..c70b4be9f0a5 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -211,7 +211,7 @@ static __init void pSeries_smp_probe(void)
if (!cpu_has_feature(CPU_FTR_SMT))
return;
- if (is_kvm_guest()) {
+ if (check_kvm_guest()) {
/*
* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
* faults to the hypervisor which then reads the instruction
--
2.18.4
^ permalink raw reply related
* [PATCH v2 4/4] powerpc/paravirt: Use is_kvm_guest in vcpu_is_preempted
From: Srikar Dronamraju @ 2020-12-02 5:04 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Phil Auld, Srikar Dronamraju,
Juri Lelli, Peter Zijlstra, LKML, Nicholas Piggin, Waiman Long,
linuxppc-dev, Valentin Schneider
In-Reply-To: <20201202050456.164005-1-srikar@linux.vnet.ibm.com>
If its a shared lpar but not a KVM guest, then see if the vCPU is
related to the calling vCPU. On PowerVM, only cores can be preempted.
So if one vCPU is a non-preempted state, we can decipher that all other
vCPUs sharing the same core are in non-preempted state.
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Phil Auld <pauld@redhat.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/paravirt.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index 9362c94fe3aa..edc08f04aef7 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -10,6 +10,9 @@
#endif
#ifdef CONFIG_PPC_SPLPAR
+#include <asm/kvm_guest.h>
+#include <asm/cputhreads.h>
+
DECLARE_STATIC_KEY_FALSE(shared_processor);
static inline bool is_shared_processor(void)
@@ -74,6 +77,21 @@ static inline bool vcpu_is_preempted(int cpu)
{
if (!is_shared_processor())
return false;
+
+#ifdef CONFIG_PPC_SPLPAR
+ if (!is_kvm_guest()) {
+ int first_cpu = cpu_first_thread_sibling(smp_processor_id());
+
+ /*
+ * Preemption can only happen at core granularity. This CPU
+ * is not preempted if one of the CPU of this core is not
+ * preempted.
+ */
+ if (cpu_first_thread_sibling(cpu) == first_cpu)
+ return false;
+ }
+#endif
+
if (yield_count_of(cpu) & 1)
return true;
return false;
--
2.18.4
^ permalink raw reply related
* [PATCH v2 0/4] Powerpc: Better preemption for shared processor
From: Srikar Dronamraju @ 2020-12-02 5:04 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Phil Auld, Srikar Dronamraju,
Juri Lelli, Peter Zijlstra, LKML, Nicholas Piggin, Waiman Long,
linuxppc-dev, Valentin Schneider
Currently, vcpu_is_preempted will return the yield_count for
shared_processor. On a PowerVM LPAR, Phyp schedules at SMT8 core boundary
i.e all CPUs belonging to a core are either group scheduled in or group
scheduled out. This can be used to better predict non-preempted CPUs on
PowerVM shared LPARs.
perf stat -r 5 -a perf bench sched pipe -l 10000000 (lesser time is better)
powerpc/next
35,107,951.20 msec cpu-clock # 255.898 CPUs utilized ( +- 0.31% )
23,655,348 context-switches # 0.674 K/sec ( +- 3.72% )
14,465 cpu-migrations # 0.000 K/sec ( +- 5.37% )
82,463 page-faults # 0.002 K/sec ( +- 8.40% )
1,127,182,328,206 cycles # 0.032 GHz ( +- 1.60% ) (66.67%)
78,587,300,622 stalled-cycles-frontend # 6.97% frontend cycles idle ( +- 0.08% ) (50.01%)
654,124,218,432 stalled-cycles-backend # 58.03% backend cycles idle ( +- 1.74% ) (50.01%)
834,013,059,242 instructions # 0.74 insn per cycle
# 0.78 stalled cycles per insn ( +- 0.73% ) (66.67%)
132,911,454,387 branches # 3.786 M/sec ( +- 0.59% ) (50.00%)
2,890,882,143 branch-misses # 2.18% of all branches ( +- 0.46% ) (50.00%)
137.195 +- 0.419 seconds time elapsed ( +- 0.31% )
powerpc/next + patchset
29,981,702.64 msec cpu-clock # 255.881 CPUs utilized ( +- 1.30% )
40,162,456 context-switches # 0.001 M/sec ( +- 0.01% )
1,110 cpu-migrations # 0.000 K/sec ( +- 5.20% )
62,616 page-faults # 0.002 K/sec ( +- 3.93% )
1,430,030,626,037 cycles # 0.048 GHz ( +- 1.41% ) (66.67%)
83,202,707,288 stalled-cycles-frontend # 5.82% frontend cycles idle ( +- 0.75% ) (50.01%)
744,556,088,520 stalled-cycles-backend # 52.07% backend cycles idle ( +- 1.39% ) (50.01%)
940,138,418,674 instructions # 0.66 insn per cycle
# 0.79 stalled cycles per insn ( +- 0.51% ) (66.67%)
146,452,852,283 branches # 4.885 M/sec ( +- 0.80% ) (50.00%)
3,237,743,996 branch-misses # 2.21% of all branches ( +- 1.18% ) (50.01%)
117.17 +- 1.52 seconds time elapsed ( +- 1.30% )
This is around 14.6% improvement in performance.
Changelog:
v1->v2:
v1: https://lore.kernel.org/linuxppc-dev/20201028123512.871051-1-srikar@linux.vnet.ibm.com/t/#u
- Rebased to 27th Nov linuxppc/merge tree.
- Moved a hunk to fix a no previous prototype warning reported by: lkp@intel.com
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org/thread/C6PTRPHWMC7VV4OTYN3ISYKDHTDQS6YI/
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Phil Auld <pauld@redhat.com>
Srikar Dronamraju (4):
powerpc: Refactor is_kvm_guest declaration to new header
powerpc: Rename is_kvm_guest to check_kvm_guest
powerpc: Reintroduce is_kvm_guest
powerpc/paravirt: Use is_kvm_guest in vcpu_is_preempted
arch/powerpc/include/asm/firmware.h | 6 ------
arch/powerpc/include/asm/kvm_guest.h | 25 +++++++++++++++++++++++++
arch/powerpc/include/asm/kvm_para.h | 2 +-
arch/powerpc/include/asm/paravirt.h | 18 ++++++++++++++++++
arch/powerpc/kernel/firmware.c | 5 ++++-
arch/powerpc/platforms/pseries/smp.c | 3 ++-
6 files changed, 50 insertions(+), 9 deletions(-)
create mode 100644 arch/powerpc/include/asm/kvm_guest.h
--
2.18.4
^ permalink raw reply
* [PATCH v7 updated 21/22 ] powerpc/book3s64/kup: Check max key supported before enabling kup
From: Aneesh Kumar K.V @ 2020-12-02 4:38 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Aneesh Kumar K.V
In-Reply-To: <20201127044424.40686-22-aneesh.kumar@linux.ibm.com>
Don't enable KUEP/KUAP if we support less than or equal to 3 keys.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/kup.h | 3 +++
arch/powerpc/mm/book3s64/pkeys.c | 33 ++++++++++++++++++++------------
arch/powerpc/mm/init-common.c | 4 ++--
3 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 952be0414f43..f8ec679bd2de 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -44,6 +44,9 @@
#else /* !__ASSEMBLY__ */
+extern bool disable_kuep;
+extern bool disable_kuap;
+
#include <linux/pgtable.h>
void setup_kup(void);
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 4a3aeddbe0c7..2b7ded396db4 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -185,6 +185,27 @@ void __init pkey_early_init_devtree(void)
default_uamor &= ~(0x3ul << pkeyshift(execute_only_key));
}
+ if (unlikely(num_pkey <= 3)) {
+ /*
+ * Insufficient number of keys to support
+ * KUAP/KUEP feature.
+ */
+ disable_kuep = true;
+ disable_kuap = true;
+ WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey);
+ } else {
+ /* handle key which is used by kernel for KAUP */
+ reserved_allocation_mask |= (0x1 << 3);
+ /*
+ * Mark access for kup_key in default amr so that
+ * we continue to operate with that AMR in
+ * copy_to/from_user().
+ */
+ default_amr &= ~(0x3ul << pkeyshift(3));
+ default_iamr &= ~(0x1ul << pkeyshift(3));
+ default_uamor &= ~(0x3ul << pkeyshift(3));
+ }
+
/*
* Allow access for only key 0. And prevent any other modification.
*/
@@ -205,18 +226,6 @@ void __init pkey_early_init_devtree(void)
reserved_allocation_mask |= (0x1 << 1);
default_uamor &= ~(0x3ul << pkeyshift(1));
- /* handle key which is used by kernel for KAUP */
- reserved_allocation_mask |= (0x1 << 3);
- /*
- * Mark access for KUAP key in default amr so that
- * we continue to operate with that AMR in
- * copy_to/from_user().
- */
- default_amr &= ~(0x3ul << pkeyshift(3));
- default_iamr &= ~(0x1ul << pkeyshift(3));
- default_uamor &= ~(0x3ul << pkeyshift(3));
-
-
/*
* Prevent the usage of OS reserved keys. Update UAMOR
* for those keys. Also mark the rest of the bits in the
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 8e0d792ac296..afdebb95bcae 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(kernstart_addr);
unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
EXPORT_SYMBOL_GPL(kernstart_virt_addr);
-static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
-static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
+bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
static int __init parse_nosmep(char *p)
{
--
2.28.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox