LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v3 1/2] cpuidle: Trace IPI based and timer based wakeup latency from idle states
From: Rafael J. Wysocki @ 2020-07-27 13:42 UTC (permalink / raw)
  To: Pratik Rajesh Sampat
  Cc: Gautham R. Shenoy, pratik.r.sampat, Linux PM, Daniel Lezcano,
	Rafael J. Wysocki, linuxppc-dev, Nicholas Piggin, Paul Mackerras,
	linux-kselftest, Shuah Khan, srivatsa, Linux Kernel Mailing List
In-Reply-To: <20200721124300.65615-2-psampat@linux.ibm.com>

On Tue, Jul 21, 2020 at 2:43 PM Pratik Rajesh Sampat
<psampat@linux.ibm.com> wrote:
>
> Fire directed smp_call_function_single IPIs from a specified source
> CPU to the specified target CPU to reduce the noise we have to wade
> through in the trace log.

And what's the purpose of it?

> The module is based on the idea written by Srivatsa Bhat and maintained
> by Vaidyanathan Srinivasan internally.
>
> Queue HR timer and measure jitter. Wakeup latency measurement for idle
> states using hrtimer.  Echo a value in ns to timer_test_function and
> watch trace. A HRtimer will be queued and when it fires the expected
> wakeup vs actual wakeup is computes and delay printed in ns.
>
> Implemented as a module which utilizes debugfs so that it can be
> integrated with selftests.
>
> To include the module, check option and include as module
> kernel hacking -> Cpuidle latency selftests
>
> [srivatsa.bhat@linux.vnet.ibm.com: Initial implementation in
>  cpidle/sysfs]
>
> [svaidy@linux.vnet.ibm.com: wakeup latency measurements using hrtimer
>  and fix some of the time calculation]
>
> [ego@linux.vnet.ibm.com: Fix some whitespace and tab errors and
>  increase the resolution of IPI wakeup]
>
> Signed-off-by: Pratik Rajesh Sampat <psampat@linux.ibm.com>
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  drivers/cpuidle/Makefile               |   1 +
>  drivers/cpuidle/test-cpuidle_latency.c | 150 +++++++++++++++++++++++++
>  lib/Kconfig.debug                      |  10 ++
>  3 files changed, 161 insertions(+)
>  create mode 100644 drivers/cpuidle/test-cpuidle_latency.c
>
> diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
> index f07800cbb43f..2ae05968078c 100644
> --- a/drivers/cpuidle/Makefile
> +++ b/drivers/cpuidle/Makefile
> @@ -8,6 +8,7 @@ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
>  obj-$(CONFIG_DT_IDLE_STATES)             += dt_idle_states.o
>  obj-$(CONFIG_ARCH_HAS_CPU_RELAX)         += poll_state.o
>  obj-$(CONFIG_HALTPOLL_CPUIDLE)           += cpuidle-haltpoll.o
> +obj-$(CONFIG_IDLE_LATENCY_SELFTEST)      += test-cpuidle_latency.o
>
>  ##################################################################################
>  # ARM SoC drivers
> diff --git a/drivers/cpuidle/test-cpuidle_latency.c b/drivers/cpuidle/test-cpuidle_latency.c
> new file mode 100644
> index 000000000000..61574665e972
> --- /dev/null
> +++ b/drivers/cpuidle/test-cpuidle_latency.c
> @@ -0,0 +1,150 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Module-based API test facility for cpuidle latency using IPIs and timers

I'd like to see a more detailed description of what it does and how it
works here.

> + */
> +
> +#include <linux/debugfs.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +
> +/* IPI based wakeup latencies */
> +struct latency {
> +       unsigned int src_cpu;
> +       unsigned int dest_cpu;
> +       ktime_t time_start;
> +       ktime_t time_end;
> +       u64 latency_ns;
> +} ipi_wakeup;
> +
> +static void measure_latency(void *info)
> +{
> +       struct latency *v;
> +       ktime_t time_diff;
> +
> +       v = (struct latency *)info;
> +       v->time_end = ktime_get();
> +       time_diff = ktime_sub(v->time_end, v->time_start);
> +       v->latency_ns = ktime_to_ns(time_diff);
> +}
> +
> +void run_smp_call_function_test(unsigned int cpu)
> +{
> +       ipi_wakeup.src_cpu = smp_processor_id();
> +       ipi_wakeup.dest_cpu = cpu;
> +       ipi_wakeup.time_start = ktime_get();
> +       smp_call_function_single(cpu, measure_latency, &ipi_wakeup, 1);
> +}
> +
> +/* Timer based wakeup latencies */
> +struct timer_data {
> +       unsigned int src_cpu;
> +       u64 timeout;
> +       ktime_t time_start;
> +       ktime_t time_end;
> +       struct hrtimer timer;
> +       u64 timeout_diff_ns;
> +} timer_wakeup;
> +
> +static enum hrtimer_restart timer_called(struct hrtimer *hrtimer)
> +{
> +       struct timer_data *w;
> +       ktime_t time_diff;
> +
> +       w = container_of(hrtimer, struct timer_data, timer);
> +       w->time_end = ktime_get();
> +
> +       time_diff = ktime_sub(w->time_end, w->time_start);
> +       time_diff = ktime_sub(time_diff, ns_to_ktime(w->timeout));
> +       w->timeout_diff_ns = ktime_to_ns(time_diff);
> +       return HRTIMER_NORESTART;
> +}
> +
> +static void run_timer_test(unsigned int ns)
> +{
> +       hrtimer_init(&timer_wakeup.timer, CLOCK_MONOTONIC,
> +                    HRTIMER_MODE_REL);
> +       timer_wakeup.timer.function = timer_called;
> +       timer_wakeup.time_start = ktime_get();
> +       timer_wakeup.src_cpu = smp_processor_id();
> +       timer_wakeup.timeout = ns;
> +
> +       hrtimer_start(&timer_wakeup.timer, ns_to_ktime(ns),
> +                     HRTIMER_MODE_REL_PINNED);
> +}
> +
> +static struct dentry *dir;
> +
> +static int cpu_read_op(void *data, u64 *value)
> +{
> +       *value = ipi_wakeup.dest_cpu;
> +       return 0;
> +}
> +
> +static int cpu_write_op(void *data, u64 value)
> +{
> +       run_smp_call_function_test(value);
> +       return 0;
> +}
> +DEFINE_SIMPLE_ATTRIBUTE(ipi_ops, cpu_read_op, cpu_write_op, "%llu\n");
> +
> +static int timeout_read_op(void *data, u64 *value)
> +{
> +       *value = timer_wakeup.timeout;
> +       return 0;
> +}
> +
> +static int timeout_write_op(void *data, u64 value)
> +{
> +       run_timer_test(value);
> +       return 0;
> +}
> +DEFINE_SIMPLE_ATTRIBUTE(timeout_ops, timeout_read_op, timeout_write_op, "%llu\n");
> +
> +static int __init latency_init(void)
> +{
> +       struct dentry *temp;
> +
> +       dir = debugfs_create_dir("latency_test", 0);
> +       if (!dir) {
> +               pr_alert("latency_test: failed to create /sys/kernel/debug/latency_test\n");
> +               return -1;
> +       }
> +       temp = debugfs_create_file("ipi_cpu_dest",
> +                                  0666,
> +                                  dir,
> +                                  NULL,
> +                                  &ipi_ops);
> +       if (!temp) {
> +               pr_alert("latency_test: failed to create /sys/kernel/debug/ipi_cpu_dest\n");
> +               return -1;
> +       }
> +       debugfs_create_u64("ipi_latency_ns", 0444, dir, &ipi_wakeup.latency_ns);
> +       debugfs_create_u32("ipi_cpu_src", 0444, dir, &ipi_wakeup.src_cpu);
> +
> +       temp = debugfs_create_file("timeout_expected_ns",
> +                                  0666,
> +                                  dir,
> +                                  NULL,
> +                                  &timeout_ops);
> +       if (!temp) {
> +               pr_alert("latency_test: failed to create /sys/kernel/debug/timeout_expected_ns\n");
> +               return -1;
> +       }
> +       debugfs_create_u64("timeout_diff_ns", 0444, dir, &timer_wakeup.timeout_diff_ns);
> +       debugfs_create_u32("timeout_cpu_src", 0444, dir, &timer_wakeup.src_cpu);
> +       pr_info("Latency Test module loaded\n");
> +       return 0;
> +}
> +
> +static void __exit latency_cleanup(void)
> +{
> +       pr_info("Cleaning up Latency Test module.\n");
> +       debugfs_remove_recursive(dir);
> +}
> +
> +module_init(latency_init);
> +module_exit(latency_cleanup);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("IBM Corporation");
> +MODULE_DESCRIPTION("Measuring idle latency for IPIs and Timers");
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index d74ac0fd6b2d..e2283790245a 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -1375,6 +1375,16 @@ config DEBUG_KOBJECT
>           If you say Y here, some extra kobject debugging messages will be sent
>           to the syslog.
>
> +config IDLE_LATENCY_SELFTEST
> +       tristate "Cpuidle latency selftests"
> +       depends on CPU_IDLE
> +       help
> +         This option provides a kernel module that runs tests using the IPI and
> +         timers to measure latency.

What latency does it measure?

> +
> +         Say M if you want these self tests to build as a module.
> +         Say N if you are unsure.
> +
>  config DEBUG_KOBJECT_RELEASE
>         bool "kobject release debugging"
>         depends on DEBUG_OBJECTS_TIMERS
> --
> 2.25.4
>

^ permalink raw reply

* Re: [PATCH] lockdep: Fix TRACE_IRQFLAGS vs NMIs
From: Ingo Molnar @ 2020-07-27 13:17 UTC (permalink / raw)
  To: peterz
  Cc: linux-arch, linux-sh, jcmvbkbc, Will Deacon, x86, linux-kernel,
	npiggin, borntraeger, linuxppc-dev
In-Reply-To: <20200727124852.GK119549@hirez.programming.kicks-ass.net>


* peterz@infradead.org <peterz@infradead.org> wrote:

> 
> Prior to commit 859d069ee1dd ("lockdep: Prepare for NMI IRQ state
> tracking") IRQ state tracking was disabled in NMIs due to nmi_enter()
> doing lockdep_off() -- with the obvious requirement that NMI entry
> call nmi_enter() before trace_hardirqs_off().
> 
> [ afaict, PowerPC and SH violate this order on their NMI entry ]
> 
> However, that commit explicitly changed lockdep_hardirqs_*() to ignore
> lockdep_off() and breaks every architecture that has irq-tracing in
> it's NMI entry that hasn't been fixed up (x86 being the only fixed one
> at this point).
> 
> The reason for this change is that by ignoring lockdep_off() we can:
> 
>   - get rid of 'current->lockdep_recursion' in lockdep_assert_irqs*()
>     which was going to to give header-recursion issues with the
>     seqlock rework.
> 
>   - allow these lockdep_assert_*() macros to function in NMI context.
> 
> Restore the previous state of things and allow an architecture to
> opt-in to the NMI IRQ tracking support, however instead of relying on
> lockdep_off(), rely on in_nmi(), both are part of nmi_enter() and so
> over-all entry ordering doesn't need to change.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/Kconfig.debug   |    3 +++
>  kernel/locking/lockdep.c |    8 +++++++-
>  lib/Kconfig.debug        |    6 ++++++
>  3 files changed, 16 insertions(+), 1 deletion(-)

Tree management side note: to apply this I've created a new 
tip:locking/nmi branch, which is based off the existing NMI vs. IRQ 
tracing commits included in locking/core:

ed00495333cc: ("locking/lockdep: Fix TRACE_IRQFLAGS vs. NMIs")
ba1f2b2eaa2a: ("x86/entry: Fix NMI vs IRQ state tracking")
859d069ee1dd: ("lockdep: Prepare for NMI IRQ state tracking")
248591f5d257: ("kcsan: Make KCSAN compatible with new IRQ state tracking")
e1bcad609f5a: ("Merge branch 'tip/x86/entry'")
b037b09b9058: ("x86/entry: Rename idtentry_enter/exit_cond_rcu() to idtentry_enter/exit()")
dcb7fd82c75e: ("Linux 5.8-rc4")

This locking/nmi branch can then be merged into irq/entry (there's a 
bunch of conflicts between them), without coupling all of v5.9's 
locking changes to Thomas's generic entry work.

Thanks,

	Ingo

^ permalink raw reply

* [PATCH] lockdep: Fix TRACE_IRQFLAGS vs NMIs
From: peterz @ 2020-07-27 12:48 UTC (permalink / raw)
  To: Ingo Molnar, Will Deacon
  Cc: linux-arch, linux-sh, jcmvbkbc, x86, linux-kernel, npiggin,
	borntraeger, linuxppc-dev


Prior to commit 859d069ee1dd ("lockdep: Prepare for NMI IRQ state
tracking") IRQ state tracking was disabled in NMIs due to nmi_enter()
doing lockdep_off() -- with the obvious requirement that NMI entry
call nmi_enter() before trace_hardirqs_off().

[ afaict, PowerPC and SH violate this order on their NMI entry ]

However, that commit explicitly changed lockdep_hardirqs_*() to ignore
lockdep_off() and breaks every architecture that has irq-tracing in
it's NMI entry that hasn't been fixed up (x86 being the only fixed one
at this point).

The reason for this change is that by ignoring lockdep_off() we can:

  - get rid of 'current->lockdep_recursion' in lockdep_assert_irqs*()
    which was going to to give header-recursion issues with the
    seqlock rework.

  - allow these lockdep_assert_*() macros to function in NMI context.

Restore the previous state of things and allow an architecture to
opt-in to the NMI IRQ tracking support, however instead of relying on
lockdep_off(), rely on in_nmi(), both are part of nmi_enter() and so
over-all entry ordering doesn't need to change.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/Kconfig.debug   |    3 +++
 kernel/locking/lockdep.c |    8 +++++++-
 lib/Kconfig.debug        |    6 ++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -3,6 +3,9 @@
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
+config TRACE_IRQFLAGS_NMI_SUPPORT
+	def_bool y
+
 config EARLY_PRINTK_USB
 	bool
 
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3712,6 +3712,9 @@ void noinstr lockdep_hardirqs_on(unsigne
 	 * and not rely on hardware state like normal interrupts.
 	 */
 	if (unlikely(in_nmi())) {
+		if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
+			return;
+
 		/*
 		 * Skip:
 		 *  - recursion check, because NMI can hit lockdep;
@@ -3773,7 +3776,10 @@ void noinstr lockdep_hardirqs_off(unsign
 	 * they will restore the software state. This ensures the software
 	 * state is consistent inside NMIs as well.
 	 */
-	if (unlikely(!in_nmi() && (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)))
+	if (in_nmi()) {
+		if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
+			return;
+	} else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
 		return;
 
 	/*
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1325,11 +1325,17 @@ config WW_MUTEX_SELFTEST
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
+	depends on TRACE_IRQFLAGS_SUPPORT
 	bool
 	help
 	  Enables hooks to interrupt enabling and disabling for
 	  either tracing or lock debugging.
 
+config TRACE_IRQFLAGS_NMI
+	def_bool y
+	depends on TRACE_IRQFLAGS
+	depends on TRACE_IRQFLAGS_NMI_SUPPORT
+
 config STACKTRACE
 	bool "Stack backtrace support"
 	depends on STACKTRACE_SUPPORT

^ permalink raw reply

* Re: [PATCH] powerpc/64s/hash: Fix hash_preload running with interrupts enabled
From: Michael Ellerman @ 2020-07-27 12:35 UTC (permalink / raw)
  To: Athira Rajeev, Nicholas Piggin; +Cc: Aneesh Kumar K . V, linuxppc-dev
In-Reply-To: <4925309C-A338-4C0F-90E3-4522643021CB@linux.vnet.ibm.com>

Athira Rajeev <atrajeev@linux.vnet.ibm.com> writes:
>> On 27-Jul-2020, at 11:39 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
>> 
>> Commit 2f92447f9f96 ("powerpc/book3s64/hash: Use the pte_t address from the
>> caller") removed the local_irq_disable from hash_preload, but it was
>> required for more than just the page table walk: the hash pte busy bit is
>> effectively a lock which may be taken in interrupt context, and the local
>> update flag test must not be preempted before it's used.
>> 
>> This solves apparent lockups with perf interrupting __hash_page_64K. If
>> get_perf_callchain then also takes a hash fault on the same page while it
>> is already locked, it will loop forever taking hash faults, which looks like
>> this:
>> 
>> cpu 0x49e: Vector: 100 (System Reset) at [c00000001a4f7d70]
>>    pc: c000000000072dc8: hash_page_mm+0x8/0x800
>>    lr: c00000000000c5a4: do_hash_page+0x24/0x38
>>    sp: c0002ac1cc69ac70
>>   msr: 8000000000081033
>>  current = 0xc0002ac1cc602e00
>>  paca    = 0xc00000001de1f280   irqmask: 0x03   irq_happened: 0x01
>>    pid   = 20118, comm = pread2_processe
>> Linux version 5.8.0-rc6-00345-g1fad14f18bc6
>> 49e:mon> t
>> [c0002ac1cc69ac70] c00000000000c5a4 do_hash_page+0x24/0x38 (unreliable)
>> --- Exception: 300 (Data Access) at c00000000008fa60 __copy_tofrom_user_power7+0x20c/0x7ac
>> [link register   ] c000000000335d10 copy_from_user_nofault+0xf0/0x150
>> [c0002ac1cc69af70] c00032bf9fa3c880 (unreliable)
>> [c0002ac1cc69afa0] c000000000109df0 read_user_stack_64+0x70/0xf0
>> [c0002ac1cc69afd0] c000000000109fcc perf_callchain_user_64+0x15c/0x410
>> [c0002ac1cc69b060] c000000000109c00 perf_callchain_user+0x20/0x40
>> [c0002ac1cc69b080] c00000000031c6cc get_perf_callchain+0x25c/0x360
>> [c0002ac1cc69b120] c000000000316b50 perf_callchain+0x70/0xa0
>> [c0002ac1cc69b140] c000000000316ddc perf_prepare_sample+0x25c/0x790
>> [c0002ac1cc69b1a0] c000000000317350 perf_event_output_forward+0x40/0xb0
>> [c0002ac1cc69b220] c000000000306138 __perf_event_overflow+0x88/0x1a0
>> [c0002ac1cc69b270] c00000000010cf70 record_and_restart+0x230/0x750
>> [c0002ac1cc69b620] c00000000010d69c perf_event_interrupt+0x20c/0x510
>> [c0002ac1cc69b730] c000000000027d9c performance_monitor_exception+0x4c/0x60
>> [c0002ac1cc69b750] c00000000000b2f8 performance_monitor_common_virt+0x1b8/0x1c0
>> --- Exception: f00 (Performance Monitor) at c0000000000cb5b0 pSeries_lpar_hpte_insert+0x0/0x160
>> [link register   ] c0000000000846f0 __hash_page_64K+0x210/0x540
>> [c0002ac1cc69ba50] 0000000000000000 (unreliable)
>> [c0002ac1cc69bb00] c000000000073ae0 update_mmu_cache+0x390/0x3a0
>> [c0002ac1cc69bb70] c00000000037f024 wp_page_copy+0x364/0xce0
>> [c0002ac1cc69bc20] c00000000038272c do_wp_page+0xdc/0xa60
>> [c0002ac1cc69bc70] c0000000003857bc handle_mm_fault+0xb9c/0x1b60
>> [c0002ac1cc69bd50] c00000000006c434 __do_page_fault+0x314/0xc90
>> [c0002ac1cc69be20] c00000000000c5c8 handle_page_fault+0x10/0x2c
>> --- Exception: 300 (Data Access) at 00007fff8c861fe8
>> SP (7ffff6b19660) is in userspace
>> 
>> Reported-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> Reported-by: Anton Blanchard <anton@ozlabs.org>
>> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> Fixes: 2f92447f9f96 ("powerpc/book3s64/hash: Use the pte_t address from the
>> caller")
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
>
> Hi,
>
> Tested with the patch and it fixes the lockups I was seeing with my test run.
> Thanks for the fix.
>
> Tested-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>

Thanks for testing.

What test are you running?

cheers

^ permalink raw reply

* Re: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame
From: Michael Ellerman @ 2020-07-27 12:28 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc-dev, linux-kernel, dja
In-Reply-To: <20200727082331.GA2110@lt-gp.iram.es>

Gabriel Paubert <paubert@iram.es> writes:
> On Fri, Jul 24, 2020 at 07:25:25PM +1000, Michael Ellerman wrote:
>> We have powerpc specific logic in our page fault handling to decide if
>> an access to an unmapped address below the stack pointer should expand
>> the stack VMA.
>> 
>> The code was originally added in 2004 "ported from 2.4". The rough
>> logic is that the stack is allowed to grow to 1MB with no extra
>> checking. Over 1MB the access must be within 2048 bytes of the stack
>> pointer, or be from a user instruction that updates the stack pointer.
>> 
>> The 2048 byte allowance below the stack pointer is there to cover the
>> 288 byte "red zone" as well as the "about 1.5kB" needed by the signal
>> delivery code.
>> 
>> Unfortunately since then the signal frame has expanded, and is now
>> 4224 bytes on 64-bit kernels with transactional memory enabled.
>
> Are there really users of transactional memory in the wild? 

Not many that I've heard of, but some.

Though anything that does use it needs to be written to fallback to
regular locking if TM is not available anyway.

> Just asking because Power10 removes TM, and Power9 has had some issues
> with it AFAICT.

It varies on different Power9 chip levels. For guests it should work.

> Getting rid of it (if possible) would result in smaller signal frames,
> with simpler signal delivery code (probably slightly faster also).

All the kernel code should be behind CONFIG_PPC_TRANSACTIONAL_MEM.

Deciding to disable that is really a distro decision.

In upstream we tend not to drop support for existing hardware while
people are still using it. But we could make a special case for TM,
because it's quite intrusive. I think we'd wait for a major distro to
ship without TM enabled before we did that though.

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/test_emulate_sstep: Fix build error
From: Michael Ellerman @ 2020-07-27 12:07 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
In-Reply-To: <20200724004109.1461709-1-mpe@ellerman.id.au>

On Fri, 24 Jul 2020 10:41:09 +1000, Michael Ellerman wrote:
> ppc64_book3e_allmodconfig fails with:
> 
>   arch/powerpc/lib/test_emulate_step.c: In function 'test_pld':
>   arch/powerpc/lib/test_emulate_step.c:113:7: error: implicit declaration of function 'cpu_has_feature'
>     113 |  if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
>         |       ^~~~~~~~~~~~~~~
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/test_emulate_sstep: Fix build error
      https://git.kernel.org/powerpc/c/70cc062c47e7851335ff4c44ba9b362174baf7d4

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/sstep: Fix incorrect CONFIG symbol in scv handling
From: Michael Ellerman @ 2020-07-27 12:07 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: christophe.leroy
In-Reply-To: <20200724131609.1640533-1-mpe@ellerman.id.au>

On Fri, 24 Jul 2020 23:16:09 +1000, Michael Ellerman wrote:
> When I "fixed" the ppc64e build in Nick's recent patch, I typoed the
> CONFIG symbol, resulting in one that doesn't exist. Fix it to use the
> correct symbol.

Applied to powerpc/next.

[1/1] powerpc/sstep: Fix incorrect CONFIG symbol in scv handling
      https://git.kernel.org/powerpc/c/826b07b190c8ca69ce674f13b4dc9be2bc536fcd

cheers

^ permalink raw reply

* [powerpc:next] BUILD SUCCESS 86052e407e8e1964c81965de25832258875a0e6d
From: kernel test robot @ 2020-07-27 11:48 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next
branch HEAD: 86052e407e8e1964c81965de25832258875a0e6d  powerpc/powernv/pci.h: delete duplicated word

elapsed time: 1289m

configs tested: 70
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
i386                             allyesconfig
i386                                defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
m68k                           sun3_defconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nios2                            allyesconfig
openrisc                            defconfig
nds32                               defconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
xtensa                              defconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
sparc                            allyesconfig
sparc                               defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                             defconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
i386                 randconfig-a003-20200727
i386                 randconfig-a005-20200727
i386                 randconfig-a004-20200727
i386                 randconfig-a006-20200727
i386                 randconfig-a002-20200727
i386                 randconfig-a001-20200727
x86_64               randconfig-a005-20200727
x86_64               randconfig-a004-20200727
x86_64               randconfig-a003-20200727
x86_64               randconfig-a006-20200727
x86_64               randconfig-a002-20200727
x86_64               randconfig-a001-20200727
i386                 randconfig-a016-20200727
i386                 randconfig-a013-20200727
i386                 randconfig-a012-20200727
i386                 randconfig-a015-20200727
i386                 randconfig-a011-20200727
i386                 randconfig-a014-20200727
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
sparc64                             defconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                              defconfig

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v2] powerpc/book3s64/radix: Add kernel command line option to disable radix GTSE
From: Bharata B Rao @ 2020-07-27 11:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linuxppc-dev
In-Reply-To: <20200727085908.420806-1-aneesh.kumar@linux.ibm.com>

On Mon, Jul 27, 2020 at 02:29:08PM +0530, Aneesh Kumar K.V wrote:
> This adds a kernel command line option that can be used to disable GTSE support.
> Disabling GTSE implies kernel will make hcalls to invalidate TLB entries.
> 
> This was done so that we can do VM migration between configs that enable/disable
> GTSE support via hypervisor. To migrate a VM from a system that supports
> GTSE to a system that doesn't, we can boot the guest with
> radix_hcall_invalidate=on, thereby forcing the guest to use hcalls for TLB
> invalidates.
> 
> The check for hcall availability is done in pSeries_setup_arch so that
> the panic message appears on the console. This should only happen on
> a hypervisor that doesn't force the guest to hash translation even
> though it can't handle the radix GTSE=0 request via CAS. With
> radix_hcall_invalidate=on if the hypervisor doesn't support hcall_rpt_invalidate
> hcall it should force the LPAR to hash translation.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Tested

1. radix_hcall_invalidate=on with KVM implementation of H_RPT_INVALIDATE hcall,
   the tlb flush calls get off-loaded to the hcall.
2. radix_hcall_invalidate=on w/o H_RPT_INVALIDATE hcall, the guest kernel
   panics as per design.

Tested-by: Bharata B Rao <bharata@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 09/10] powerpc/smp: Create coregroup domain
From: Srikar Dronamraju @ 2020-07-27 11:18 UTC (permalink / raw)
  To: Gautham R Shenoy
  Cc: Nathan Lynch, Michael Neuling, Peter Zijlstra, LKML,
	Nicholas Piggin, Valentin Schneider, Oliver O'Halloran,
	Jordan Niethe, linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727043941.GA18303@in.ibm.com>

* Gautham R Shenoy <ego@linux.vnet.ibm.com> [2020-07-27 10:09:41]:

> > 
> >  static void fixup_topology(void)
> >  {
> > +	if (!has_coregroup_support())
> > +		powerpc_topology[mc_idx].mask = cpu_bigcore_mask;
> > +
> >  	if (shared_caches) {
> >  		pr_info("Using shared cache scheduler topology\n");
> >  		powerpc_topology[bigcore_idx].mask = shared_cache_mask;
> 
> 
> Suppose we consider a topology which does not have coregroup_support,
> but has shared_caches. In that case, we would want our coregroup
> domain to degenerate.
> 
> From the above code, after the fixup, our topology will look as
> follows:
> 
> static struct sched_domain_topology_level powerpc_topology[] = {
>   	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
>  	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> 	{ cpu_bigcore_mask, SD_INIT_NAME(MC) },
>   	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
>   	{ NULL, },
> 
> So, in this case, the core-group domain (identified by MC) will
> degenerate only if cpu_bigcore_mask() and shared_cache_mask() return
> the same value. This may work for existing platforms, because either
> shared_caches don't exist, or when they do, cpu_bigcore_mask and
> shared_cache_mask return the same set of CPUs. But this may or may not
> continue to hold good in the future.
> 
> Furthermore, if that is always going to be the case that in the
> presence of shared_caches the cpu_bigcore_mask() and
> shared_cache_mask() will always be the same, then why even define two
> separate masks and not just have only the cpu_bigcore_mask() ?
> 

Your two statements are contradicting. In the former you are saying we
should be future proof and in the latter, you are asking for why add if they
are both going to be the same.

> The correct way would be to set the powerpc_topology[mc_idx].mask to
> powerpc_topology[bigcore_idx].mask *after* we have fixedup the
> big_core level.

The reason I modified it in v4 is not for degeneration or for future case
but for the current PowerNV/SMT 4 case. I could have as well detected the
the same and modified bigcore but thought fixup at one place would be
better.

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH v2 01/14] powerpc/eeh: Remove eeh_dev_phb_init_dynamic()
From: Michael Ellerman @ 2020-07-27 10:53 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev, Oliver O'Halloran
In-Reply-To: <159583477725.602200.17371356742597086381.b4-ty@ellerman.id.au>

Michael Ellerman <patch-notifications@ellerman.id.au> writes:
> On Wed, 22 Jul 2020 14:26:15 +1000, Oliver O'Halloran wrote:
>> This function is a one line wrapper around eeh_phb_pe_create() and despite
>> the name it doesn't create any eeh_dev structures. Replace it with direct
>> calls to eeh_phb_pe_create() since that does what it says on the tin
>> and removes a layer of indirection.
>
> Applied to powerpc/next.
>
> [01/14] powerpc/eeh: Remove eeh_dev_phb_init_dynamic()
>         https://git.kernel.org/powerpc/c/475028efc708880e16e61cc4cbbc00af784cb39b

Something weird happened with the "thanks" script. Pretty sure I applied v3.

I think I applied this version previously and the script just matched
the subjects?

Anyway, ignore this mail.

cheers

^ permalink raw reply

* Re: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame
From: Daniel Axtens @ 2020-07-27 10:50 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: linux-kernel
In-Reply-To: <20200724092528.1578671-2-mpe@ellerman.id.au>

Hi Michael,

I have tested this with the test from the bug and it now seems to pass
fine. On that basis:

Tested-by: Daniel Axtens <dja@axtens.net>

Thank you for coming up with a better solution than my gross hack!

Kind regards,
Daniel

> We have powerpc specific logic in our page fault handling to decide if
> an access to an unmapped address below the stack pointer should expand
> the stack VMA.
>
> The code was originally added in 2004 "ported from 2.4". The rough
> logic is that the stack is allowed to grow to 1MB with no extra
> checking. Over 1MB the access must be within 2048 bytes of the stack
> pointer, or be from a user instruction that updates the stack pointer.
>
> The 2048 byte allowance below the stack pointer is there to cover the
> 288 byte "red zone" as well as the "about 1.5kB" needed by the signal
> delivery code.
>
> Unfortunately since then the signal frame has expanded, and is now
> 4224 bytes on 64-bit kernels with transactional memory enabled. This
> means if a process has consumed more than 1MB of stack, and its stack
> pointer lies less than 4224 bytes from the next page boundary, signal
> delivery will fault when trying to expand the stack and the process
> will see a SEGV.
>
> The total size of the signal frame is the size of struct rt_sigframe
> (which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
> 64-bit).
>
> The 2048 byte allowance was correct until 2008 as the signal frame
> was:
>
> struct rt_sigframe {
>         struct ucontext    uc;                           /*     0  1440 */
>         /* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
>         long unsigned int          _unused[2];           /*  1440    16 */
>         unsigned int               tramp[6];             /*  1456    24 */
>         struct siginfo *           pinfo;                /*  1480     8 */
>         void *                     puc;                  /*  1488     8 */
>         struct siginfo     info;                         /*  1496   128 */
>         /* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
>         char                       abigap[288];          /*  1624   288 */
>
>         /* size: 1920, cachelines: 15, members: 7 */
>         /* padding: 8 */
> };
>
> 1920 + 128 = 2048
>
> Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
> ptrace and signal support") (Jul 2008) the signal frame expanded to
> 2304 bytes:
>
> struct rt_sigframe {
>         struct ucontext    uc;                           /*     0  1696 */	<--
>         /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
>         long unsigned int          _unused[2];           /*  1696    16 */
>         unsigned int               tramp[6];             /*  1712    24 */
>         struct siginfo *           pinfo;                /*  1736     8 */
>         void *                     puc;                  /*  1744     8 */
>         struct siginfo     info;                         /*  1752   128 */
>         /* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
>         char                       abigap[288];          /*  1880   288 */
>
>         /* size: 2176, cachelines: 17, members: 7 */
>         /* padding: 8 */
> };
>
> 2176 + 128 = 2304
>
> At this point we should have been exposed to the bug, though as far as
> I know it was never reported. I no longer have a system old enough to
> easily test on.
>
> Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
> grow-down stack segment") caused our stack expansion code to never
> trigger, as there was always a VMA found for a write up to PAGE_SIZE
> below r1.
>
> That meant the bug was hidden as we continued to expand the signal
> frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
> state to the signal context") (Feb 2013):
>
> struct rt_sigframe {
>         struct ucontext    uc;                           /*     0  1696 */
>         /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
>         struct ucontext    uc_transact;                  /*  1696  1696 */	<--
>         /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
>         long unsigned int          _unused[2];           /*  3392    16 */
>         unsigned int               tramp[6];             /*  3408    24 */
>         struct siginfo *           pinfo;                /*  3432     8 */
>         void *                     puc;                  /*  3440     8 */
>         struct siginfo     info;                         /*  3448   128 */
>         /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
>         char                       abigap[288];          /*  3576   288 */
>
>         /* size: 3872, cachelines: 31, members: 8 */
>         /* padding: 8 */
>         /* last cacheline: 32 bytes */
> };
>
> 3872 + 128 = 4000
>
> And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
> userspace to 512 bytes") (Feb 2014):
>
> struct rt_sigframe {
>         struct ucontext    uc;                           /*     0  1696 */
>         /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
>         struct ucontext    uc_transact;                  /*  1696  1696 */
>         /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
>         long unsigned int          _unused[2];           /*  3392    16 */
>         unsigned int               tramp[6];             /*  3408    24 */
>         struct siginfo *           pinfo;                /*  3432     8 */
>         void *                     puc;                  /*  3440     8 */
>         struct siginfo     info;                         /*  3448   128 */
>         /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
>         char                       abigap[512];          /*  3576   512 */	<--
>
>         /* size: 4096, cachelines: 32, members: 8 */
>         /* padding: 8 */
> };
>
> 4096 + 128 = 4224
>
> Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
> gap, between vmas") exposed us to the existing bug, because it changed
> the stack VMA to be the correct/real size, meaning our stack expansion
> code is now triggered.
>
> Fix it by increasing the allowance to 4224 bytes.
>
> Hard-coding 4224 is obviously unsafe against future expansions of the
> signal frame in the same way as the existing code. We can't easily use
> sizeof() because the signal frame structure is not in a header. We
> will either fix that, or rip out all the custom stack expansion
> checking logic entirely.
>
> Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
> Cc: stable@vger.kernel.org # v2.6.27+
> Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
>
> v2: Account for the extra 128 bytes of __SIGNAL_FRAMESIZE, making the
>     total size 4224, as noticed by dja.
>
> See also https://bugzilla.kernel.org/show_bug.cgi?id=205183
> ---
>  arch/powerpc/mm/fault.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 641fc5f3d7dd..3ebb1792e636 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -267,6 +267,9 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>  	return false;
>  }
>  
> +// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
> +#define SIGFRAME_MAX_SIZE	(4096 + 128)
> +
>  static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
>  				struct vm_area_struct *vma, unsigned int flags,
>  				bool *must_retry)
> @@ -274,7 +277,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
>  	/*
>  	 * N.B. The POWER/Open ABI allows programs to access up to
>  	 * 288 bytes below the stack pointer.
> -	 * The kernel signal delivery code writes up to about 1.5kB
> +	 * The kernel signal delivery code writes a bit over 4KB
>  	 * below the stack pointer (r1) before decrementing it.
>  	 * The exec code can write slightly over 640kB to the stack
>  	 * before setting the user r1.  Thus we allow the stack to
> @@ -299,7 +302,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
>  		 * between the last mapped region and the stack will
>  		 * expand the stack rather than segfaulting.
>  		 */
> -		if (address + 2048 >= uregs->gpr[1])
> +		if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
>  			return false;
>  
>  		if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
> -- 
> 2.25.1

^ permalink raw reply

* [PATCH] powerpc/mm: Limit resize_hpt_for_hotplug() call to hash guests only
From: Bharata B Rao @ 2020-07-27  9:57 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Bharata B Rao, aneesh.kumar, david, Nathan Lynch

During memory hotplug and unplug, resize_hpt_for_hotplug() gets called
for both hash and radix guests but it should be called only for hash
guests. Though the call does nothing in the radix guest case, it is
cleaner to push this call into hash specific memory hotplug routines.

Reported-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
Tested with memory hotplug and unplug for hash and radix KVM guests.

 arch/powerpc/include/asm/sparsemem.h  | 6 ------
 arch/powerpc/mm/book3s64/hash_utils.c | 8 +++++++-
 arch/powerpc/mm/mem.c                 | 5 -----
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index c89b32443cff..1e6fa371cc38 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,12 +17,6 @@ extern int create_section_mapping(unsigned long start, unsigned long end,
 				  int nid, pgprot_t prot);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
 
-#ifdef CONFIG_PPC_BOOK3S_64
-extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
-#else
-static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
-#endif
-
 #ifdef CONFIG_NUMA
 extern int hot_add_scn_to_nid(unsigned long scn_addr);
 #else
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9fdabea04990..30a4a91d9987 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -785,7 +785,7 @@ static unsigned long __init htab_get_table_size(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int resize_hpt_for_hotplug(unsigned long new_mem_size)
+static int resize_hpt_for_hotplug(unsigned long new_mem_size)
 {
 	unsigned target_hpt_shift;
 
@@ -819,6 +819,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end,
 		return -1;
 	}
 
+	resize_hpt_for_hotplug(memblock_phys_mem_size());
+
 	rc = htab_bolt_mapping(start, end, __pa(start),
 			       pgprot_val(prot), mmu_linear_psize,
 			       mmu_kernel_ssize);
@@ -836,6 +838,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
 	int rc = htab_remove_mapping(start, end, mmu_linear_psize,
 				     mmu_kernel_ssize);
 	WARN_ON(rc < 0);
+
+	if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+		pr_warn("Hash collision while resizing HPT\n");
+
 	return rc;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb8dcfc..9dafc636588f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -127,8 +127,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int rc;
 
-	resize_hpt_for_hotplug(memblock_phys_mem_size());
-
 	start = (unsigned long)__va(start);
 	rc = create_section_mapping(start, start + size, nid,
 				    params->pgprot);
@@ -161,9 +159,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 	 * hit that section of memory
 	 */
 	vm_unmap_aliases();
-
-	if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
-		pr_warn("Hash collision while resizing HPT\n");
 }
 #endif
 
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] KVM: PPC: Book3S HV: increase KVMPPC_NR_LPIDS on POWER8 and POWER9
From: Cédric Le Goater @ 2020-07-27  9:38 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm-ppc, linuxppc-dev, Nicholas Piggin, kvm
In-Reply-To: <20200723062016.GE213782@thinks.paulus.ozlabs.org>

On 7/23/20 8:20 AM, Paul Mackerras wrote:
> On Mon, Jun 08, 2020 at 01:57:14PM +0200, Cédric Le Goater wrote:
>> POWER8 and POWER9 have 12-bit LPIDs. Change LPID_RSVD to support up to
>> (4096 - 2) guests on these processors. POWER7 is kept the same with a
>> limitation of (1024 - 2), but it might be time to drop KVM support for
>> POWER7.
>>
>> Tested with 2048 guests * 4 vCPUs on a witherspoon system with 512G
>> RAM and a bit of swap.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> Thanks, patch applied to my kvm-ppc-next branch.

We have pushed the limits further on a 1TB system and reached the limit
of 4094 guests with 16 vCPUs. 

With more vCPUs, the system starts to check-stop. We believe that the 
pages used by the interrupt controller for the backing store of the 
XIVE internal tables (END and NVT) allocated with GFP_KERNEL are 
reclaimable.

I am thinking of changing the allocation flags with :  
 
	__GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC

because XIVE should be able to fail gracefully if the system is 
low on mem. Is that correct ? 

Thanks,  

C.

^ permalink raw reply

* Re: [PATCH] powerpc/64s/hash: Fix hash_preload running with interrupts enabled
From: Athira Rajeev @ 2020-07-27  9:32 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Aneesh Kumar K . V, linuxppc-dev
In-Reply-To: <20200727060947.10060-1-npiggin@gmail.com>



> On 27-Jul-2020, at 11:39 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> Commit 2f92447f9f96 ("powerpc/book3s64/hash: Use the pte_t address from the
> caller") removed the local_irq_disable from hash_preload, but it was
> required for more than just the page table walk: the hash pte busy bit is
> effectively a lock which may be taken in interrupt context, and the local
> update flag test must not be preempted before it's used.
> 
> This solves apparent lockups with perf interrupting __hash_page_64K. If
> get_perf_callchain then also takes a hash fault on the same page while it
> is already locked, it will loop forever taking hash faults, which looks like
> this:
> 
> cpu 0x49e: Vector: 100 (System Reset) at [c00000001a4f7d70]
>    pc: c000000000072dc8: hash_page_mm+0x8/0x800
>    lr: c00000000000c5a4: do_hash_page+0x24/0x38
>    sp: c0002ac1cc69ac70
>   msr: 8000000000081033
>  current = 0xc0002ac1cc602e00
>  paca    = 0xc00000001de1f280   irqmask: 0x03   irq_happened: 0x01
>    pid   = 20118, comm = pread2_processe
> Linux version 5.8.0-rc6-00345-g1fad14f18bc6
> 49e:mon> t
> [c0002ac1cc69ac70] c00000000000c5a4 do_hash_page+0x24/0x38 (unreliable)
> --- Exception: 300 (Data Access) at c00000000008fa60 __copy_tofrom_user_power7+0x20c/0x7ac
> [link register   ] c000000000335d10 copy_from_user_nofault+0xf0/0x150
> [c0002ac1cc69af70] c00032bf9fa3c880 (unreliable)
> [c0002ac1cc69afa0] c000000000109df0 read_user_stack_64+0x70/0xf0
> [c0002ac1cc69afd0] c000000000109fcc perf_callchain_user_64+0x15c/0x410
> [c0002ac1cc69b060] c000000000109c00 perf_callchain_user+0x20/0x40
> [c0002ac1cc69b080] c00000000031c6cc get_perf_callchain+0x25c/0x360
> [c0002ac1cc69b120] c000000000316b50 perf_callchain+0x70/0xa0
> [c0002ac1cc69b140] c000000000316ddc perf_prepare_sample+0x25c/0x790
> [c0002ac1cc69b1a0] c000000000317350 perf_event_output_forward+0x40/0xb0
> [c0002ac1cc69b220] c000000000306138 __perf_event_overflow+0x88/0x1a0
> [c0002ac1cc69b270] c00000000010cf70 record_and_restart+0x230/0x750
> [c0002ac1cc69b620] c00000000010d69c perf_event_interrupt+0x20c/0x510
> [c0002ac1cc69b730] c000000000027d9c performance_monitor_exception+0x4c/0x60
> [c0002ac1cc69b750] c00000000000b2f8 performance_monitor_common_virt+0x1b8/0x1c0
> --- Exception: f00 (Performance Monitor) at c0000000000cb5b0 pSeries_lpar_hpte_insert+0x0/0x160
> [link register   ] c0000000000846f0 __hash_page_64K+0x210/0x540
> [c0002ac1cc69ba50] 0000000000000000 (unreliable)
> [c0002ac1cc69bb00] c000000000073ae0 update_mmu_cache+0x390/0x3a0
> [c0002ac1cc69bb70] c00000000037f024 wp_page_copy+0x364/0xce0
> [c0002ac1cc69bc20] c00000000038272c do_wp_page+0xdc/0xa60
> [c0002ac1cc69bc70] c0000000003857bc handle_mm_fault+0xb9c/0x1b60
> [c0002ac1cc69bd50] c00000000006c434 __do_page_fault+0x314/0xc90
> [c0002ac1cc69be20] c00000000000c5c8 handle_page_fault+0x10/0x2c
> --- Exception: 300 (Data Access) at 00007fff8c861fe8
> SP (7ffff6b19660) is in userspace
> 
> Reported-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Anton Blanchard <anton@ozlabs.org>
> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Fixes: 2f92447f9f96 ("powerpc/book3s64/hash: Use the pte_t address from the
> caller")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>


Hi,

Tested with the patch and it fixes the lockups I was seeing with my test run.
Thanks for the fix.

Tested-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>

> ---
> arch/powerpc/kernel/exceptions-64s.S  | 14 +++++++++++---
> arch/powerpc/mm/book3s64/hash_utils.c | 25 +++++++++++++++++++++++++
> arch/powerpc/perf/core-book3s.c       |  6 ++++++
> 3 files changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 0fc8bad878b2..446e54c3f71e 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -3072,10 +3072,18 @@ do_hash_page:
> 	ori	r0,r0,DSISR_BAD_FAULT_64S@l
> 	and.	r0,r5,r0		/* weird error? */
> 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
> +
> +	/*
> +	 * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
> +	 * don't call hash_page, just fail the fault. This is required to
> +	 * prevent re-entrancy problems in the hash code, namely perf
> +	 * interrupts hitting while something holds H_PAGE_BUSY, and taking a
> +	 * hash fault. See the comment in hash_preload().
> +	 */
> 	ld	r11, PACA_THREAD_INFO(r13)
> -	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
> -	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
> -	bne	77f			/* then don't call hash_page now */
> +	lwz	r0,TI_PREEMPT(r11)
> +	andis.	r0,r0,NMI_MASK@h
> +	bne	77f
> 
> 	/*
> 	 * r3 contains the trap number
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index 468169e33c86..9b9f92ad0e7a 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -1559,6 +1559,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
> 	pgd_t *pgdir;
> 	int rc, ssize, update_flags = 0;
> 	unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
> +	unsigned long flags;
> 
> 	BUG_ON(get_region_id(ea) != USER_REGION_ID);
> 
> @@ -1592,6 +1593,28 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
> 		return;
> #endif /* CONFIG_PPC_64K_PAGES */
> 
> +	/*
> +	 * __hash_page_* must run with interrupts off, as it sets the
> +	 * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any
> +	 * time and may take a hash fault reading the user stack, see
> +	 * read_user_stack_slow() in the powerpc/perf code.
> +	 *
> +	 * If that takes a hash fault on the same page as we lock here, it
> +	 * will bail out when seeing H_PAGE_BUSY set, and retry the access
> +	 * leading to an infinite loop.
> +	 *
> +	 * Disabling interrupts here does not prevent perf interrupts, but it
> +	 * will prevent them taking hash faults (see the NMI test in
> +	 * do_hash_page), then read_user_stack's copy_from_user_nofault will
> +	 * fail and perf will fall back to read_user_stack_slow(), which
> +	 * walks the Linux page tables.
> +	 *
> +	 * Interrupts must also be off for the duration of the
> +	 * mm_is_thread_local test and update, to prevent preempt running the
> +	 * mm on another CPU (XXX: this may be racy vs kthread_use_mm).
> +	 */
> +	local_irq_save(flags);
> +
> 	/* Is that local to this CPU ? */
> 	if (mm_is_thread_local(mm))
> 		update_flags |= HPTE_LOCAL_UPDATE;
> @@ -1614,6 +1637,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
> 				   mm_ctx_user_psize(&mm->context),
> 				   mm_ctx_user_psize(&mm->context),
> 				   pte_val(*ptep));
> +
> +	local_irq_restore(flags);
> }
> 
> /*
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index cd6a742ac6ef..01d70280d287 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2179,6 +2179,12 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> 
> 	perf_read_regs(regs);
> 
> +	/*
> +	 * If perf interrupts hit in a local_irq_disable (soft-masked) region,
> +	 * we consider them as NMIs. This is required to prevent hash faults on
> +	 * user addresses when reading callchains. See the NMI test in
> +	 * do_hash_page.
> +	 */
> 	nmi = perf_intr_is_nmi(regs);
> 	if (nmi)
> 		nmi_enter();
> -- 
> 2.23.0
> 


^ permalink raw reply

* [PATCH v2] powerpc/book3s64/radix: Add kernel command line option to disable radix GTSE
From: Aneesh Kumar K.V @ 2020-07-27  8:59 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Aneesh Kumar K.V, Bharata B Rao

This adds a kernel command line option that can be used to disable GTSE support.
Disabling GTSE implies kernel will make hcalls to invalidate TLB entries.

This was done so that we can do VM migration between configs that enable/disable
GTSE support via hypervisor. To migrate a VM from a system that supports
GTSE to a system that doesn't, we can boot the guest with
radix_hcall_invalidate=on, thereby forcing the guest to use hcalls for TLB
invalidates.

The check for hcall availability is done in pSeries_setup_arch so that
the panic message appears on the console. This should only happen on
a hypervisor that doesn't force the guest to hash translation even
though it can't handle the radix GTSE=0 request via CAS. With
radix_hcall_invalidate=on if the hypervisor doesn't support hcall_rpt_invalidate
hcall it should force the LPAR to hash translation.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes from v1:
* rename kernel parameter
* Drop a kernel warn

 Documentation/admin-guide/kernel-parameters.txt |  4 ++++
 arch/powerpc/include/asm/firmware.h             |  4 +++-
 arch/powerpc/kernel/prom_init.c                 | 13 +++++++++----
 arch/powerpc/mm/init_64.c                       |  1 -
 arch/powerpc/platforms/pseries/firmware.c       |  1 +
 arch/powerpc/platforms/pseries/setup.c          |  5 +++++
 6 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3ab61cd0f89c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -896,6 +896,10 @@
 	disable_radix	[PPC]
 			Disable RADIX MMU mode on POWER9
 
+	radix_hcall_invalidate=on  [PPC/PSERIES]
+			Disable RADIX GTSE feature and use hcall for TLB
+			invalidate.
+
 	disable_tlbie	[PPC]
 			Disable TLBIE instruction. Currently does not work
 			with KVM, with HASH MMU, or with coherent accelerators.
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 6003c2e533a0..aa6a5ef5d483 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,7 @@
 #define FW_FEATURE_PAPR_SCM 	ASM_CONST(0x0000002000000000)
 #define FW_FEATURE_ULTRAVISOR	ASM_CONST(0x0000004000000000)
 #define FW_FEATURE_STUFF_TCE	ASM_CONST(0x0000008000000000)
+#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -71,7 +72,8 @@ enum {
 		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
 		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
 		FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
-		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
+		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
+		FW_FEATURE_RPT_INVALIDATE,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cbc605cfdec0..f279a1f58fa7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -169,6 +169,7 @@ static unsigned long __prombss prom_tce_alloc_end;
 
 #ifdef CONFIG_PPC_PSERIES
 static bool __prombss prom_radix_disable;
+static bool __prombss prom_radix_gtse_disable;
 static bool __prombss prom_xive_disable;
 #endif
 
@@ -823,6 +824,12 @@ static void __init early_cmdline_parse(void)
 	if (prom_radix_disable)
 		prom_debug("Radix disabled from cmdline\n");
 
+	opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on");
+	if (opt) {
+		prom_radix_gtse_disable = true;
+		prom_debug("Radix GTSE disabled from cmdline\n");
+	}
+
 	opt = prom_strstr(prom_cmd_line, "xive=off");
 	if (opt) {
 		prom_xive_disable = true;
@@ -1285,10 +1292,8 @@ static void __init prom_parse_platform_support(u8 index, u8 val,
 		prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
 		break;
 	case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
-		if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
-			prom_debug("Radix - GTSE supported\n");
-			support->radix_gtse = true;
-		}
+		if (val & OV5_FEAT(OV5_RADIX_GTSE))
+			support->radix_gtse = !prom_radix_gtse_disable;
 		break;
 	case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
 		prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 152aa0200cef..4ae5fc0ceb30 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -406,7 +406,6 @@ static void __init early_check_vec5(void)
 		}
 		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
 						OV5_FEAT(OV5_RADIX_GTSE))) {
-			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
 			cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
 		} else
 			cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 3e49cc23a97a..4c7b7f5a2ebc 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -65,6 +65,7 @@ hypertas_fw_features_table[] = {
 	{FW_FEATURE_HPT_RESIZE,		"hcall-hpt-resize"},
 	{FW_FEATURE_BLOCK_REMOVE,	"hcall-block-remove"},
 	{FW_FEATURE_PAPR_SCM,		"hcall-scm"},
+	{FW_FEATURE_RPT_INVALIDATE,	"hcall-rpt-invalidate"},
 };
 
 /* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8c85466e0dd8..ae9bfc9fbb06 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -747,6 +747,11 @@ static void __init pSeries_setup_arch(void)
 	smp_init_pseries();
 
 
+	if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
+		if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+			panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
+
+
 	/* openpic global configuration register (64-bit format). */
 	/* openpic Interrupt Source Unit pointer (64-bit format). */
 	/* python0 facility area (mmio) (64-bit format) REAL address. */
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame
From: Gabriel Paubert @ 2020-07-27  8:23 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, linux-kernel, dja
In-Reply-To: <20200724092528.1578671-2-mpe@ellerman.id.au>

On Fri, Jul 24, 2020 at 07:25:25PM +1000, Michael Ellerman wrote:
> We have powerpc specific logic in our page fault handling to decide if
> an access to an unmapped address below the stack pointer should expand
> the stack VMA.
> 
> The code was originally added in 2004 "ported from 2.4". The rough
> logic is that the stack is allowed to grow to 1MB with no extra
> checking. Over 1MB the access must be within 2048 bytes of the stack
> pointer, or be from a user instruction that updates the stack pointer.
> 
> The 2048 byte allowance below the stack pointer is there to cover the
> 288 byte "red zone" as well as the "about 1.5kB" needed by the signal
> delivery code.
> 
> Unfortunately since then the signal frame has expanded, and is now
> 4224 bytes on 64-bit kernels with transactional memory enabled.

Are there really users of transactional memory in the wild? 

Just asking because Power10 removes TM, and Power9 has had some issues
with it AFAICT.

Getting rid of it (if possible) would result in smaller signal frames,
with simpler signal delivery code (probably slightly faster also).

	Gabriel
 


^ permalink raw reply

* [powerpc:next-test] BUILD SUCCESS 78807804b0854ecb7dc6906e379fc688aca36456
From: kernel test robot @ 2020-07-27  8:05 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: 78807804b0854ecb7dc6906e379fc688aca36456  selftests/powerpc: Add test for pkey siginfo verification

elapsed time: 1065m

configs tested: 58
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

arm                                 defconfig
arm                              allyesconfig
arm                              allmodconfig
arm64                            allyesconfig
arm64                               defconfig
i386                             allyesconfig
i386                                defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                           sun3_defconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nios2                            allyesconfig
openrisc                            defconfig
nds32                               defconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
xtensa                              defconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
sparc                            allyesconfig
sparc                               defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                             defconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
i386                 randconfig-a016-20200727
i386                 randconfig-a013-20200727
i386                 randconfig-a012-20200727
i386                 randconfig-a015-20200727
i386                 randconfig-a011-20200727
i386                 randconfig-a014-20200727
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
sparc64                             defconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                              defconfig

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [PATCH 7/7] powerpc/smp: Depend on cpu_l1_cache_map when adding cpus
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

Currently on hotplug/hotunplug, the cpu iterates through all the cpus in
its core to find threads in its thread group. However this info is
already captured in cpu_l1_cache_map. Hence we could reduce the
iteration and cleanup add_cpu_to_smallcore_masks function.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/smp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index eceb7aa0f4b8..22f4b3856470 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1276,16 +1276,15 @@ static void remove_cpu_from_masks(int cpu)
 
 static inline void add_cpu_to_smallcore_masks(int cpu)
 {
-	struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
-	int i, first_thread = cpu_first_thread_sibling(cpu);
+	int i;
 
 	if (!has_big_cores)
 		return;
 
 	cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
 
-	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-		if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+	for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) {
+		if (cpu_online(i))
 			set_cpus_related(i, cpu, cpu_smallcore_mask);
 	}
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH 5/7] powerpc/smp: Limit cpus traversed to within a node.
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

All the arch specific topology cpumasks are within a node/die.
However when setting these per cpu cpumasks, system traverses through
all the online cpus. This is redundant.

Reduce the traversal to only cpus that are online in the node to which
the cpu belongs to.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cde157483abf..9b03aad0beac 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1232,7 +1232,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 		return false;
 
 	cpumask_set_cpu(cpu, mask_fn(cpu));
-	for_each_cpu(i, cpu_online_mask) {
+	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
 		/*
 		 * when updating the marks the current CPU has not been marked
 		 * online, but we need to update the cache masks
-- 
2.17.1


^ permalink raw reply related

* [PATCH 6/7] powerpc/smp: Stop passing mask to update_mask_by_l2
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

update_mask_by_l2 is called only once. But it passes cpu_l2_cache_mask
as parameter. Instead of passing cpu_l2_cache_mask, use it directly in
update_mask_by_l2.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/smp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9b03aad0beac..eceb7aa0f4b8 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1222,7 +1222,7 @@ static struct device_node *cpu_to_l2cache(int cpu)
 	return cache;
 }
 
-static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
+static bool update_mask_by_l2(int cpu)
 {
 	struct device_node *l2_cache, *np;
 	int i;
@@ -1231,7 +1231,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 	if (!l2_cache)
 		return false;
 
-	cpumask_set_cpu(cpu, mask_fn(cpu));
+	cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
 	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
 		/*
 		 * when updating the marks the current CPU has not been marked
@@ -1242,7 +1242,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 			continue;
 
 		if (np == l2_cache)
-			set_cpus_related(cpu, i, mask_fn);
+			set_cpus_related(cpu, i, cpu_l2_cache_mask);
 
 		of_node_put(np);
 	}
@@ -1306,7 +1306,7 @@ static void add_cpu_to_masks(int cpu)
 			set_cpus_related(i, cpu, cpu_sibling_mask);
 
 	add_cpu_to_smallcore_masks(cpu);
-	update_mask_by_l2(cpu, cpu_l2_cache_mask);
+	update_mask_by_l2(cpu);
 
 	if (has_coregroup_support()) {
 		int coregroup_id = cpu_to_coregroup_id(cpu);
-- 
2.17.1


^ permalink raw reply related

* [PATCH 4/7] powerpc/smp: Optimize remove_cpu_from_masks
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

Currently while offlining a cpu, we iterate through all the cpus in the
DIE to clear sibling, l2_cache and smallcore maps. However if there are
more number of cores in a DIE, we end up spending more time iterating
through cpus which are completely unrelated.

Optimize this by only iterating through lesser but relevant cpumap.
If shared_cache is set, cpu_l2_cache_map should be relevant else
cpu_sibling_map would be relevant.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/smp.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d476098fc25c..cde157483abf 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1254,14 +1254,21 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 #ifdef CONFIG_HOTPLUG_CPU
 static void remove_cpu_from_masks(int cpu)
 {
+	struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
 	int i;
 
-	for_each_cpu(i, cpu_cpu_mask(cpu)) {
+	if (shared_caches)
+		mask_fn = cpu_l2_cache_mask;
+
+	for_each_cpu(i, mask_fn(cpu)) {
 		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
 		if (has_big_cores)
 			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
-		if (has_coregroup_support())
+	}
+
+	if (has_coregroup_support()) {
+		for_each_cpu(i, cpu_coregroup_mask(cpu))
 			set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
 	}
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH 3/7] powerpc/smp: Remove get_physical_package_id
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

Now that cpu_core_mask has been removed and topology_core_cpumask has
been updated to use cpu_cpu_mask, we no more need
get_physical_package_id.

Please note get_physical_package_id is an exported symbol. However
it was introduced recently and probably no users outside kernel.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/topology.h |  5 -----
 arch/powerpc/kernel/smp.c           | 20 --------------------
 2 files changed, 25 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index e0f232533c9d..e45219f74be0 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -114,12 +114,7 @@ static inline int cpu_to_coregroup_id(int cpu)
 #ifdef CONFIG_PPC64
 #include <asm/smp.h>
 
-#ifdef CONFIG_PPC_SPLPAR
-int get_physical_package_id(int cpu);
-#define topology_physical_package_id(cpu)	(get_physical_package_id(cpu))
-#else
 #define topology_physical_package_id(cpu)	(cpu_to_chip_id(cpu))
-#endif
 
 #define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
 #define topology_core_cpumask(cpu)	(cpu_cpu_mask(cpu))
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8c28e1b4957b..d476098fc25c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1283,26 +1283,6 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
 	}
 }
 
-int get_physical_package_id(int cpu)
-{
-	int pkg_id = cpu_to_chip_id(cpu);
-
-	/*
-	 * If the platform is PowerNV or Guest on KVM, ibm,chip-id is
-	 * defined. Hence we would return the chip-id as the result of
-	 * get_physical_package_id.
-	 */
-	if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) &&
-	    IS_ENABLED(CONFIG_PPC_SPLPAR)) {
-		struct device_node *np = of_get_cpu_node(cpu, NULL);
-		pkg_id = of_node_to_nid(np);
-		of_node_put(np);
-	}
-
-	return pkg_id;
-}
-EXPORT_SYMBOL_GPL(get_physical_package_id);
-
 static void add_cpu_to_masks(int cpu)
 {
 	int first_thread = cpu_first_thread_sibling(cpu);
-- 
2.17.1


^ permalink raw reply related

* [PATCH 2/7] powerpc/smp: Stop updating cpu_core_mask
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

Anton Blanchard reported that his 4096 vcpu KVM guest took around 30
minutes to boot. He also analyzed it to the time taken to iterate while
setting the cpu_core_mask.

Further analysis shows that cpu_core_mask and cpu_cpu_mask for any CPU
would be equal on Power. However updating cpu_core_mask took forever to
update as its a per cpu cpumask variable. Instead cpu_cpu_mask was a per
NODE /per DIE cpumask that was shared by all the respective CPUs.

Also cpu_cpu_mask is needed from a scheduler perspective. However
cpu_core_map is an exported symbol. Hence stop updating cpu_core_map
and make it point to cpu_cpu_mask.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/smp.h |  5 -----
 arch/powerpc/kernel/smp.c      | 33 +++++++--------------------------
 2 files changed, 7 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 5bdc17a7049f..cf6e7c7be62b 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -119,11 +119,6 @@ static inline struct cpumask *cpu_sibling_mask(int cpu)
 	return per_cpu(cpu_sibling_map, cpu);
 }
 
-static inline struct cpumask *cpu_core_mask(int cpu)
-{
-	return per_cpu(cpu_core_map, cpu);
-}
-
 static inline struct cpumask *cpu_l2_cache_mask(int cpu)
 {
 	return per_cpu(cpu_l2_cache_map, cpu);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 95f0bf72e283..8c28e1b4957b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -957,12 +957,17 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 				local_memory_node(numa_cpu_lookup_table[cpu]));
 		}
 #endif
+		/*
+		 * cpu_core_map is no more updated and exists only since
+		 * its been exported for long. It only will have a snapshot
+		 * of cpu_cpu_mask.
+		 */
+		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
 	}
 
 	/* Init the cpumasks so the boot CPU is related to itself */
 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
 	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
-	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
 	if (has_coregroup_support())
 		cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
@@ -1251,9 +1256,7 @@ static void remove_cpu_from_masks(int cpu)
 {
 	int i;
 
-	/* NB: cpu_core_mask is a superset of the others */
-	for_each_cpu(i, cpu_core_mask(cpu)) {
-		set_cpus_unrelated(cpu, i, cpu_core_mask);
+	for_each_cpu(i, cpu_cpu_mask(cpu)) {
 		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
 		if (has_big_cores)
@@ -1303,7 +1306,6 @@ EXPORT_SYMBOL_GPL(get_physical_package_id);
 static void add_cpu_to_masks(int cpu)
 {
 	int first_thread = cpu_first_thread_sibling(cpu);
-	int pkg_id = get_physical_package_id(cpu);
 	int i;
 
 	/*
@@ -1311,7 +1313,6 @@ static void add_cpu_to_masks(int cpu)
 	 * add it to it's own thread sibling mask.
 	 */
 	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
-	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
 
 	for (i = first_thread; i < first_thread + threads_per_core; i++)
 		if (cpu_online(i))
@@ -1333,26 +1334,6 @@ static void add_cpu_to_masks(int cpu)
 				set_cpus_related(cpu, i, cpu_coregroup_mask);
 		}
 	}
-
-	if (pkg_id == -1) {
-		struct cpumask *(*mask)(int) = cpu_sibling_mask;
-
-		/*
-		 * Copy the sibling mask into core sibling mask and
-		 * mark any CPUs on the same chip as this CPU.
-		 */
-		if (shared_caches)
-			mask = cpu_l2_cache_mask;
-
-		for_each_cpu(i, mask(cpu))
-			set_cpus_related(cpu, i, cpu_core_mask);
-
-		return;
-	}
-
-	for_each_cpu(i, cpu_online_mask)
-		if (get_physical_package_id(i) == pkg_id)
-			set_cpus_related(cpu, i, cpu_core_mask);
 }
 
 /* Activate a secondary processor. */
-- 
2.17.1


^ permalink raw reply related

* [PATCH 1/7] powerpc/topology: Update topology_core_cpumask
From: Srikar Dronamraju @ 2020-07-27  7:55 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
	Srikar Dronamraju, Peter Zijlstra, LKML, Nicholas Piggin,
	Valentin Schneider, Oliver O'Halloran, Satheesh Rajendran,
	linuxppc-dev, Ingo Molnar
In-Reply-To: <20200727075532.30058-1-srikar@linux.vnet.ibm.com>

On Power, cpu_core_mask and cpu_cpu_mask refer to the same set of CPUs.
cpu_cpu_mask is needed by scheduler, hence look at deprecating
cpu_core_mask. Before deleting the cpu_core_mask, ensure its only user
is moved to cpu_cpu_mask.

Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 6609174918ab..e0f232533c9d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -122,7 +122,7 @@ int get_physical_package_id(int cpu);
 #endif
 
 #define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
-#define topology_core_cpumask(cpu)	(per_cpu(cpu_core_map, cpu))
+#define topology_core_cpumask(cpu)	(cpu_cpu_mask(cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 
 #endif
-- 
2.17.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox