LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 2/2] powerpc/watchdog: Avoid holding wd_smp_lock over printk and smp_send_nmi_ipi
From: Laurent Dufour @ 2021-10-29 12:09 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <20211029083908.87931-2-npiggin@gmail.com>

Le 29/10/2021 à 10:39, Nicholas Piggin a écrit :
> There is a deadlock with the console_owner lock and the wd_smp_lock:
> 
> CPU x takes the console_owner lock
> CPU y takes a watchdog timer interrupt and takes __wd_smp_lock
> CPU x takes a watchdog timer interrupt and spins on __wd_smp_lock
> CPU y detects a deadlock, tries to print something and spins on console_owner
> -> deadlock
> 
> Change the watchdog locking scheme so wd_smp_lock protects the watchdog
> internal data, but "reporting" (printing, issuing NMI IPIs, taking any
> action outside of watchdog) uses a non-waiting exclusion. If a CPU detects
> a problem but can not take the reporting lock, it just returns because
> something else is already reporting. It will try again at some point.
> 
> Typically hard lockup watchdog report usefulness is not impacted due to
> failure to spewing a large enough amount of data in as short a time as
> possible, but by messages getting garbled.
> 
> Laurent debugged this and found the deadlock, and this patch is based on
> his general approach to avoid expensive operations while holding the lock.
> With the addition of the reporting exclusion.
> 
> Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
> [np: rework to add reporting exclusion update changelog]
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/kernel/watchdog.c | 76 ++++++++++++++++++++++++++--------
>   1 file changed, 59 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
> index 4bb7c8e371a2..69a475aa0f44 100644
> --- a/arch/powerpc/kernel/watchdog.c
> +++ b/arch/powerpc/kernel/watchdog.c
> @@ -85,10 +85,32 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);
>   
>   /* SMP checker bits */
>   static unsigned long __wd_smp_lock;
> +static unsigned long __wd_reporting;
>   static cpumask_t wd_smp_cpus_pending;
>   static cpumask_t wd_smp_cpus_stuck;
>   static u64 wd_smp_last_reset_tb;
>   
> +/*
> + * Try to take the exclusive watchdog action / NMI IPI / printing lock.
> + * wd_smp_lock must be held. If this fails, we should return and wait
> + * for the watchdog to kick in again (or another CPU to trigger it).
> + */
> +static bool wd_try_report(void)
> +{
> +	if (__wd_reporting)
> +		return false;
> +	__wd_reporting = 1;
> +	return true;
> +}
> +
> +/* End printing after successful wd_try_report. wd_smp_lock not required. */
> +static void wd_end_reporting(void)
> +{
> +	smp_mb(); /* End printing "critical section" */
> +	WARN_ON_ONCE(__wd_reporting == 0);
> +	WRITE_ONCE(__wd_reporting, 0);
> +}
> +
>   static inline void wd_smp_lock(unsigned long *flags)
>   {
>   	/*
> @@ -131,10 +153,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
>   	/* Do not panic from here because that can recurse into NMI IPI layer */
>   }
>   
> -static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
> +static void set_cpu_stuck(int cpu, u64 tb)
>   {
> -	cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
> -	cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
> +	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
> +	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
>   	if (cpumask_empty(&wd_smp_cpus_pending)) {
>   		wd_smp_last_reset_tb = tb;
>   		cpumask_andnot(&wd_smp_cpus_pending,
> @@ -142,10 +164,6 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
>   				&wd_smp_cpus_stuck);
>   	}
>   }
> -static void set_cpu_stuck(int cpu, u64 tb)
> -{
> -	set_cpumask_stuck(cpumask_of(cpu), tb);
> -}
>   
>   static void watchdog_smp_panic(int cpu, u64 tb)
>   {
> @@ -160,6 +178,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
>   		goto out;
>   	if (cpumask_weight(&wd_smp_cpus_pending) == 0)
>   		goto out;
> +	if (!wd_try_report())
> +		goto out;
> +	wd_smp_unlock(&flags);
>   
>   	pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
>   		 cpu, cpumask_pr_args(&wd_smp_cpus_pending));
> @@ -172,24 +193,32 @@ static void watchdog_smp_panic(int cpu, u64 tb)
>   		 * Try to trigger the stuck CPUs, unless we are going to
>   		 * get a backtrace on all of them anyway.
>   		 */
> -		for_each_cpu(c, &wd_smp_cpus_pending) {
> +		for_each_online_cpu(c) {
>   			if (c == cpu)
>   				continue;
> +			if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
                                               ^ c
cpu is the reporting CPU, c is the target here.

> +				continue;
> +			wd_smp_lock(&flags);
> +			if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
                                               ^ again c
> +				wd_smp_unlock(&flags);
> +				continue;
> +			}
> +			/* Take the stuck CPU out of the watch group */
> +			set_cpu_stuck(cpu, tb);
                                       ^ c
> +			wd_smp_unlock(&flags);
> +
>   			smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
>   		}
>   	}
>   
> -	/* Take the stuck CPUs out of the watch group */
> -	set_cpumask_stuck(&wd_smp_cpus_pending, tb);
> -
> -	wd_smp_unlock(&flags);
> -
>   	if (sysctl_hardlockup_all_cpu_backtrace)
>   		trigger_allbutself_cpu_backtrace();
>   
>   	if (hardlockup_panic)
>   		nmi_panic(NULL, "Hard LOCKUP");
>   
> +	wd_end_reporting();
> +
>   	return;
>   
>   out:
> @@ -203,8 +232,6 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
>   			struct pt_regs *regs = get_irq_regs();
>   			unsigned long flags;
>   
> -			wd_smp_lock(&flags);
> -
>   			pr_emerg("CPU %d became unstuck TB:%lld\n",
>   				 cpu, tb);
>   			print_irqtrace_events(current);
> @@ -213,6 +240,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
>   			else
>   				dump_stack();
>   
> +			wd_smp_lock(&flags);
>   			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
>   			wd_smp_unlock(&flags);
>   		} else {
> @@ -291,8 +319,17 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
>   			wd_smp_unlock(&flags);
>   			return 0;
>   		}
> +		if (!wd_try_report()) {
> +			wd_smp_unlock(&flags);
> +			/* Couldn't report, try again in 100ms */
> +			mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
> +			return 0;
> +		}
> +
>   		set_cpu_stuck(cpu, tb);
>   
> +		wd_smp_unlock(&flags);
> +
>   		pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
>   			 cpu, (void *)regs->nip);
>   		pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
> @@ -302,14 +339,19 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
>   		print_irqtrace_events(current);
>   		show_regs(regs);
>   
> -		wd_smp_unlock(&flags);
> -
>   		if (sysctl_hardlockup_all_cpu_backtrace)
>   			trigger_allbutself_cpu_backtrace();
>   
>   		if (hardlockup_panic)
>   			nmi_panic(regs, "Hard LOCKUP");
> +
> +		wd_end_reporting();
>   	}
> +	/*
> +	 * We are okay to change DEC in soft_nmi_interrupt because the masked
> +	 * handler has marked a DEC as pending, so the timer interrupt will be
> +	 * replayed as soon as local irqs are enabled again.
> +	 */
>   	if (wd_panic_timeout_tb < 0x7fffffff)
>   		mtspr(SPRN_DEC, wd_panic_timeout_tb);
>   
> 


^ permalink raw reply

* Re: Linux kernel: powerpc: KVM guest can trigger host crash on Power8
From: John Paul Adrian Glaubitz @ 2021-10-29 12:33 UTC (permalink / raw)
  To: Nicholas Piggin, Michael Ellerman
  Cc: debian-powerpc@lists.debian.org, linuxppc-dev
In-Reply-To: <1635467831.en5s268a3l.astroid@bobo.none>

Hi Nicholas!

On 10/29/21 02:41, Nicholas Piggin wrote:
> Soft lockup should mean it's taking timer interrupts still, just not 
> scheduling. Do you have the hard lockup detector enabled as well? Is
> there anything stuck spinning on another CPU?

I haven't enabled it. But looking at the documentation [1] it seems we could
use it to print a backtrace once the lockup occurs.

> Do you have the full dmesg / kernel log for this boot?

I do, uploaded the messages file here: https://people.debian.org/~glaubitz/messages-kvm-lockup.gz

Also, I noticed there is actually a backtrace:

Oct 25 17:02:31 watson kernel: [14104.902061]   (detected by 80, t=5252 jiffies, g=49897, q=37)
Oct 25 17:02:31 watson kernel: [14104.902072] Sending NMI from CPU 80 to CPUs 136:
Oct 25 17:02:31 watson kernel: [14108.253972] Modules linked in: dm_mod(E) vhost_net(E) vhost(E) vhost_iotlb(E) tap(E) tun(E) kvm_hv(E) kvm_pr(E) kvm(E) xt_CHECKSUM(E) xt_MASQUERADE(E) xt_conntrack(E) ipt_REJECT(E) nf_reject_ipv4(E) xt_tcpudp(E) nft_compat(E) nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) nft_counter(E) nf_tables(E) nfnetlink(E) bridge(E) stp(E) llc(E) xfs(E) ecb(E) xts(E) sg(E) ctr(E) vmx_crypto(E) gf128mul(E) ipmi_powernv(E) powernv_rng(E) ipmi_devintf(E) rng_core(E) ipmi_msghandler(E) powernv_op_panel(E) ib_iser(E) rdma_cm(E) iw_cm(E) ib_cm(E) ib_core(E) iscsi_tcp(E) libiscsi_tcp(E) sunrpc(E) libiscsi(E) drm(E) scsi_transport_iscsi(E) fuse(E) drm_panel_orientation_quirks(E) configfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) sr_mod(E) sd_mod(E) ses(E) cdrom(E) enclosure(E) t10_pi(E) crc_t10dif(E) scsi_transport_sas(E) crct10dif_generic(E) crct10dif_common(E) btrfs(E) blake2b_generic(E) zstd_compress(E) raid10(E) raid456(E)
Oct 25 17:02:31 watson kernel: [14108.254101]  async_raid6_recov(E) async_memcpy(E) async_pq(E) async_xor(E) async_tx(E) xor(E) raid6_pq(E) libcrc32c(E) crc32c_generic(E) raid1(E) raid0(E) multipath(E) linear(E) md_mod(E) xhci_pci(E) xhci_hcd(E) e1000e(E) usbcore(E) ptp(E) pps_core(E) ipr(E) usb_common(E)
Oct 25 17:02:31 watson kernel: [14108.254139] CPU: 104 PID: 175 Comm: migration/104 Tainted: G            E     5.14.0-0.bpo.2-powerpc64le #1  Debian 5.14.9-2~bpo11+2
Oct 25 17:02:31 watson kernel: [14108.254146] Stopper: multi_cpu_stop+0x0/0x240 <- migrate_swap+0xf8/0x240
Oct 25 17:02:31 watson kernel: [14108.254160] NIP:  c0000000001f6a58 LR: c00000000026b734 CTR: c00000000026b5c0
Oct 25 17:02:31 watson kernel: [14108.254163] REGS: c000001001237970 TRAP: 0900   Tainted: G            E      (5.14.0-0.bpo.2-powerpc64le Debian 5.14.9-2~bpo11+2)
Oct 25 17:02:31 watson kernel: [14108.254168] MSR:  9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 28002442  XER: 20000000
Oct 25 17:02:31 watson kernel: [14108.254183] CFAR: c00000000026b730 IRQMASK: 0 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR00: c00000000026b32c c000001001237c10 c00000000166ce00 c000000000d02c30 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR04: c000001806433198 c000001806433198 0000000000000000 000000005687ca06 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR08: c0000017fc8948a0 c0000017fc894780 0000000000000004 c00800000a80e378 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR12: 0000000000000000 c0000017ffff5a00 c000000000173ec8 c00000000194c080 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR20: 0000000000000000 c000001806433170 0000000000000000 0000000000000001 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR24: 0000000000000002 0000000000000003 0000000000000000 c000000000d02c30 
Oct 25 17:02:31 watson kernel: [14108.254183] GPR28: 0000000000000001 c000001806433170 c000001806433194 0000000000000001 
Oct 25 17:02:31 watson kernel: [14108.254240] NIP [c0000000001f6a58] rcu_momentary_dyntick_idle+0x48/0x60
Oct 25 17:02:31 watson kernel: [14108.254245] LR [c00000000026b734] multi_cpu_stop+0x174/0x240
Oct 25 17:02:31 watson kernel: [14108.254251] Call Trace:
Oct 25 17:02:31 watson kernel: [14108.254253] [c000001001237c10] [c000001001237c80] 0xc000001001237c80 (unreliable)
Oct 25 17:02:31 watson kernel: [14108.254260] [c000001001237c80] [c00000000026b32c] cpu_stopper_thread+0x16c/0x280
Oct 25 17:02:31 watson kernel: [14108.254267] [c000001001237d40] [c00000000017ad4c] smpboot_thread_fn+0x1ec/0x260
Oct 25 17:02:31 watson kernel: [14108.254273] [c000001001237da0] [c00000000017403c] kthread+0x17c/0x190
Oct 25 17:02:31 watson kernel: [14108.254280] [c000001001237e10] [c00000000000cf64] ret_from_kernel_thread+0x5c/0x64
Oct 25 17:02:31 watson kernel: [14108.254287] Instruction dump:
Oct 25 17:02:31 watson kernel: [14108.254289] 394a7aa4 39297980 7cc751ae e94d0030 7d295214 39090120 7c0004ac 39400004 
Oct 25 17:02:31 watson kernel: [14108.254301] 7ce04028 7cea3a14 7ce0412d 40c2fff4 <7c0004ac> 70e90002 4c820020 0fe00000 
Oct 25 17:02:31 watson kernel: [14110.585275] CPU 136 didn't respond to backtrace IPI, inspecting paca.
Oct 25 17:02:31 watson kernel: [14110.585279] irq_soft_mask: 0x03 in_mce: 0 in_nmi: 0 current: 1813 (CPU 12/KVM)
Oct 25 17:02:31 watson kernel: [14110.585284] Back trace of paca->saved_r1 (0xc00000180640f4c0) (possibly stale):
Oct 25 17:02:31 watson kernel: [14110.585286] Call Trace:
Oct 25 17:02:31 watson kernel: [14110.585378] task:rcu_sched       state:R  running task     stack:    0 pid:   13 ppid:     2 flags:0x00000800
Oct 25 17:02:31 watson kernel: [14110.585386] Call Trace:
Oct 25 17:02:31 watson kernel: [14110.585388] [c00000000e0978d0] [c0000000001f71c0] rcu_implicit_dynticks_qs+0x0/0x370 (unreliable)
Oct 25 17:02:31 watson kernel: [14110.585399] [c00000000e097ac0] [c00000000001b264] __switch_to+0x1d4/0x2e0
Oct 25 17:02:31 watson kernel: [14110.585407] [c00000000e097b30] [c000000000cb9838] __schedule+0x2f8/0xbb0
Oct 25 17:02:31 watson kernel: [14110.585416] [c00000000e097c00] [c000000000cba334] __cond_resched+0x64/0x90
Oct 25 17:02:31 watson kernel: [14110.585424] [c00000000e097c30] [c0000000001f8670] force_qs_rnp+0xe0/0x2e0
Oct 25 17:02:31 watson kernel: [14110.585433] [c00000000e097cd0] [c0000000001fc8a8] rcu_gp_kthread+0x9c8/0xc90
Oct 25 17:02:31 watson kernel: [14110.585442] [c00000000e097da0] [c00000000017403c] kthread+0x17c/0x190
Oct 25 17:02:31 watson kernel: [14110.585450] [c00000000e097e10] [c00000000000cf64] ret_from_kernel_thread+0x5c/0x64
Oct 25 17:02:31 watson kernel: [14110.585462] Sending NMI from CPU 80 to CPUs 32:
Oct 25 17:02:31 watson kernel: [14110.585469] NMI backtrace for cpu 32
Oct 25 17:02:31 watson kernel: [14110.585473] CPU: 32 PID: 1289 Comm: in:imklog Tainted: G            EL    5.14.0-0.bpo.2-powerpc64le #1  Debian 5.14.9-2~bpo11+2
Oct 25 17:02:31 watson kernel: [14110.585477] NIP:  00007fff92bc3bbc LR: 00007fff92bc5e90 CTR: 00007fff92bc5bf0
Oct 25 17:02:31 watson kernel: [14110.585480] REGS: c00000001c9bfe80 TRAP: 0500   Tainted: G            EL     (5.14.0-0.bpo.2-powerpc64le Debian 5.14.9-2~bpo11+2)
Oct 25 17:02:31 watson kernel: [14110.585483] MSR:  900000000280f033 <SF,HV,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 48004802  XER: 00000000
Oct 25 17:02:31 watson kernel: [14110.585496] CFAR: 00007fff92bc3c34 IRQMASK: 0 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR00: 0000000000000000 00007fff9220d940 00007fff92d37100 000000000000000c 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR04: 00007fff9222f928 00007fff84000060 00007fff84097800 00007fff84000900 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR08: 00007fff840008d0 00007fff84000050 00007fff8408f3a0 0000000000000007 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR12: 0000000028004802 00007fff92236810 00007fff84097af0 0000000000000000 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR16: 00007fff93040000 00007fff92f54478 0000000000000000 00007fff9222f160 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR20: 00007fff9222f810 00007fff9220e4f0 0000000000000008 00007fff927156b0 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR24: 00007fff92715638 00007fff927304f8 0000000000001fa0 0000000000000000 
Oct 25 17:02:31 watson kernel: [14110.585496] GPR28: 00007fff9220e529 000000000000006f 00007fff84000020 0000000000000030 
Oct 25 17:02:31 watson kernel: [14110.585530] NIP [00007fff92bc3bbc] 0x7fff92bc3bbc
Oct 25 17:02:31 watson kernel: [14110.585534] LR [00007fff92bc5e90] 0x7fff92bc5e90

> Could you try a sysrq+w to get a trace of blocked tasks?

Not sure how to send a magic sysrequest over the IPMI serial console. Any idea?

> Are you able to shut down the guests and exit qemu normally?

Not after the crash. I have to hard-reboot the whole machine.

Adrian

> [1] https://www.kernel.org/doc/html/latest/admin-guide/lockup-watchdogs.html

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz@debian.org
`. `'   Freie Universitaet Berlin - glaubitz@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


^ permalink raw reply

* Re: [V3] powerpc/perf: Enable PMU counters post partition migration if PMU is active
From: Michael Ellerman @ 2021-10-29 13:15 UTC (permalink / raw)
  To: Nicholas Piggin, Athira Rajeev
  Cc: nathanl, kjain, maddy, linuxppc-dev, rnsastry
In-Reply-To: <1635487923.hwdpof7s4v.astroid@bobo.none>

Nicholas Piggin <npiggin@gmail.com> writes:
> Excerpts from Athira Rajeev's message of October 29, 2021 1:05 pm:
>> During Live Partition Migration (LPM), it is observed that perf
>> counter values reports zero post migration completion. However
>> 'perf stat' with workload continues to show counts post migration
>> since PMU gets disabled/enabled during sched switches. But incase
>> of system/cpu wide monitoring, zero counts were reported with 'perf
>> stat' after migration completion.
>> 
>> Example:
>>  ./perf stat -e r1001e -I 1000
>>            time             counts unit events
>>      1.001010437         22,137,414      r1001e
>>      2.002495447         15,455,821      r1001e
>> <<>> As seen in next below logs, the counter values shows zero
>>         after migration is completed.
>> <<>>
>>     86.142535370    129,392,333,440      r1001e
>>     87.144714617                  0      r1001e
>>     88.146526636                  0      r1001e
>>     89.148085029                  0      r1001e
>
> This is the output without the patch? After the patch it keeps counting 
> I suppose? And does the very large count go away too?
>
>> 
>> Here PMU is enabled during start of perf session and counter
>> values are read at intervals. Counters are only disabled at the
>> end of session. The powerpc mobility code presently does not handle
>> disabling and enabling back of PMU counters during partition
>> migration. Also since the PMU register values are not saved/restored
>> during migration, PMU registers like Monitor Mode Control Register 0
>> (MMCR0), Monitor Mode Control Register 1 (MMCR1) will not contain
>> the value it was programmed with. Hence PMU counters will not be
>> enabled correctly post migration.
>> 
>> Fix this in mobility code by handling disabling and enabling of
>> PMU in all cpu's before and after migration. Patch introduces two
>> functions 'mobility_pmu_disable' and 'mobility_pmu_enable'.
>> mobility_pmu_disable() is called before the processor threads goes
>> to suspend state so as to disable the PMU counters. And disable is
>> done only if there are any active events running on that cpu.
>> mobility_pmu_enable() is called after the migrate is done to enable
>> back the PMU counters.
>> 
>> Since the performance Monitor counters ( PMCs) are not
>> saved/restored during LPM, results in PMC value being zero and the
>> 'event->hw.prev_count' being non-zero value. This causes problem
>> during updation of event->count since we always accumulate
>> (event->hw.prev_count - PMC value) in event->count.  If
>> event->hw.prev_count is greater PMC value, event->count becomes
>> negative. To fix this, 'prev_count' also needs to be re-initialised
>> for all events while enabling back the events. Hence read the
>> existing events and clear the PMC index (stored in event->hw.idx)
>> for all events im mobility_pmu_disable. By this way, event count
>> settings will get re-initialised correctly in power_pmu_enable.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> [ Fixed compilation error reported by kernel test robot ]
>> Reported-by: kernel test robot <lkp@intel.com>
>> ---
>> Changelog:
>> Change from v2 -> v3:
>> Addressed review comments from Nicholas Piggin.
>>  - Removed the "migrate" field which was added in initial
>>    patch to address updation of event count settings correctly
>>    in power_pmu_enable. Instead read off existing events in
>>    mobility_pmu_disable before power_pmu_enable.
>>  - Moved the mobility_pmu_disable/enable declaration from
>>    rtas.h to perf event header file.
>> 
>> Addressed review comments from Nathan.
>>  - Moved the mobility function calls from stop_machine
>>    context out to pseries_migrate_partition. Also now this
>>    is a per cpu invocation.
>> 
>> Change from v1 -> v2:
>>  - Moved the mobility_pmu_enable and mobility_pmu_disable
>>    declarations under CONFIG_PPC_PERF_CTRS in rtas.h.
>>    Also included 'asm/rtas.h' in core-book3s to fix the
>>    compilation warning reported by kernel test robot.
>> 
>>  arch/powerpc/include/asm/perf_event.h     |  8 +++++
>>  arch/powerpc/perf/core-book3s.c           | 39 +++++++++++++++++++++++
>>  arch/powerpc/platforms/pseries/mobility.c |  7 ++++
>>  3 files changed, 54 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
>> index 164e910bf654..88aab6cf840c 100644
>> --- a/arch/powerpc/include/asm/perf_event.h
>> +++ b/arch/powerpc/include/asm/perf_event.h
>> @@ -17,6 +17,14 @@ static inline bool is_sier_available(void) { return false; }
>>  static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; }
>>  #endif
>>  
>> +#ifdef CONFIG_PPC_PERF_CTRS
>> +void mobility_pmu_disable(void *unused);
>> +void mobility_pmu_enable(void *unused);
>> +#else
>> +static inline void mobility_pmu_disable(void *unused) { }
>> +static inline void mobility_pmu_enable(void *unused) { }
>> +#endif
>> +
>>  #ifdef CONFIG_FSL_EMB_PERF_EVENT
>>  #include <asm/perf_event_fsl_emb.h>
>>  #endif
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index 73e62e9b179b..2e8c4c668fa3 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -1343,6 +1343,33 @@ static void power_pmu_disable(struct pmu *pmu)
>>  	local_irq_restore(flags);
>>  }
>>  
>> +/*
>> + * Called from pseries_migrate_partition() function
>> + * before migration, from powerpc/mobility code.
>> + */

These are only needed if pseries is built, so should be inside a PSERIES
ifdef.

This function should handle iterating over CPUs, that shouldn't be left
up to the mobility.c code.

And the names should be something like pmu_start_migration(),
pmu_finish_migration().

>> +void mobility_pmu_disable(void *unused)
>> +{
>> +	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
>> +	struct perf_event *event;
>> +
>> +	if (cpuhw->n_events != 0) {
>> +		int i;
>> +
>> +		power_pmu_disable(NULL);
>> +		/*
>> +		 * Read off any pre-existing events because the register
>> +		 * values may not be migrated.
>> +		 */
>> +		for (i = 0; i < cpuhw->n_events; ++i) {
>> +			event = cpuhw->event[i];
>> +			if (event->hw.idx) {
>> +				power_pmu_read(event);
>> +				event->hw.idx = 0;
>> +			}
>> +		}
>> +	}
>> +}
>> +
>>  /*
>>   * Re-enable all events if disable == 0.
>>   * If we were previously disabled and events were added, then
>> @@ -1515,6 +1542,18 @@ static void power_pmu_enable(struct pmu *pmu)
>>  	local_irq_restore(flags);
>>  }
>>  
>> +/*
>> + * Called from pseries_migrate_partition() function
>> + * after migration, from powerpc/mobility code.
>> + */
>> +void mobility_pmu_enable(void *unused)
>> +{
>> +	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
>> +
>> +	cpuhw->n_added = cpuhw->n_events;
>> +	power_pmu_enable(NULL);
>> +}
>> +
>>  static int collect_events(struct perf_event *group, int max_count,
>>  			  struct perf_event *ctrs[], u64 *events,
>>  			  unsigned int *flags)
>> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>> index e83e0891272d..3e96485ccba4 100644
>> --- a/arch/powerpc/platforms/pseries/mobility.c
>> +++ b/arch/powerpc/platforms/pseries/mobility.c
>> @@ -22,6 +22,7 @@
>>  #include <linux/delay.h>
>>  #include <linux/slab.h>
>>  #include <linux/stringify.h>
>> +#include <linux/perf_event.h>
>>  
>>  #include <asm/machdep.h>
>>  #include <asm/rtas.h>
>> @@ -631,12 +632,18 @@ static int pseries_migrate_partition(u64 handle)
>>  	if (ret)
>>  		return ret;
>>  
>> +	/* Disable PMU before suspend */
>> +	on_each_cpu(&mobility_pmu_disable, NULL, 0);
>
> Why was this moved out of stop machine and to an IPI?
>
> My concern would be, what are the other CPUs doing at this time? Is it 
> possible they could take interrupts and schedule? Could that mess up the
> perf state here?

pseries_migrate_partition() is called directly from migration_store(),
which is the sysfs store function, which can be called concurrently by
different CPUs.

It's also potentially called from rtas_syscall_dispatch_ibm_suspend_me(),
from sys_rtas(), again with no locking.

So we could have two CPUs calling into here at the same time, which
might not crash, but is unlikely to work well.

I think the lack of locking might have been OK in the past because only
one CPU will successfully get the other CPUs to call do_join() in
pseries_suspend(). But I could be wrong.

Anyway, now that we're mutating the PMU state before suspending we need
to be more careful. So I think we need a lock around the whole sequence.

cheers

^ permalink raw reply

* [Bug 214867] New: UBSAN: shift-out-of-bounds in drivers/of/unittest.c:1933:36
From: bugzilla-daemon @ 2021-10-29 13:59 UTC (permalink / raw)
  To: linuxppc-dev

https://bugzilla.kernel.org/show_bug.cgi?id=214867

            Bug ID: 214867
           Summary: UBSAN: shift-out-of-bounds in
                    drivers/of/unittest.c:1933:36
           Product: Platform Specific/Hardware
           Version: 2.5
    Kernel Version: 5.15-rc7
          Hardware: PPC-64
                OS: Linux
              Tree: Mainline
            Status: NEW
          Severity: normal
          Priority: P1
         Component: PPC-64
          Assignee: platform_ppc-64@kernel-bugs.osdl.org
          Reporter: erhard_f@mailbox.org
                CC: bugzilla.kernel.org@frowand.com
        Regression: No

Created attachment 299361
  --> https://bugzilla.kernel.org/attachment.cgi?id=299361&action=edit
kernel dmesg (kernel 5.15-rc7, Talos II)

UBSAN catches this at boot on my Talos II.

[...]
### dt-test ### EXPECT / : GPIO line <<int>> (line-C-input) hogged as input
================================================================================
UBSAN: shift-out-of-bounds in drivers/of/unittest.c:1933:36
shift exponent -1 is negative
CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc7-TalosII #1
Call Trace:
[c000000004163700] [c0000000008ffaa8] .dump_stack_lvl+0xa4/0x100 (unreliable)
[c000000004163790] [c0000000008fb46c] .ubsan_epilogue+0x10/0x70
[c000000004163800] [c0000000008fb270]
.__ubsan_handle_shift_out_of_bounds+0x1f0/0x34c
[c000000004163910] [c000000000ad94a0] .of_unittest_untrack_overlay+0x6c/0xe0
[c0000000041639a0] [c000000002098ff8] .of_unittest+0x4c50/0x59f8
[c000000004163b60] [c000000000011b5c] .do_one_initcall+0x7c/0x4f0
[c000000004163c50] [c00000000200300c] .kernel_init_freeable+0x704/0x858
[c000000004163d90] [c000000000012730] .kernel_init+0x20/0x190
[c000000004163e10] [c00000000000ce78] .ret_from_kernel_thread+0x58/0x60
================================================================================
### dt-test ### EXPECT \ : OF: overlay: WARNING: memory leak will occur if
overlay removed, property: /testcase-data-2/substation@100/status
[...]

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 214867] UBSAN: shift-out-of-bounds in drivers/of/unittest.c:1933:36
From: bugzilla-daemon @ 2021-10-29 14:00 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-214867-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=214867

--- Comment #1 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 299363
  --> https://bugzilla.kernel.org/attachment.cgi?id=299363&action=edit
kernel .config (kernel 5.15-rc7, Talos II)

 # lspci 
0000:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0000:01:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
Turks XT [Radeon HD 6670/7670]
0000:01:00.1 Audio device: Advanced Micro Devices, Inc. [AMD/ATI] Turks HDMI
Audio [Radeon HD 6500/6600 / 6700M Series]
0001:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0001:01:00.0 Non-Volatile memory controller: Phison Electronics Corporation
Device 5008 (rev 01)
0002:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0003:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0003:01:00.0 USB controller: Texas Instruments TUSB73x0 SuperSpeed USB 3.0 xHCI
Host Controller (rev 02)
0004:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0004:01:00.0 Ethernet controller: Broadcom Inc. and subsidiaries NetXtreme
BCM5719 Gigabit Ethernet PCIe (rev 01)
0004:01:00.1 Ethernet controller: Broadcom Inc. and subsidiaries NetXtreme
BCM5719 Gigabit Ethernet PCIe (rev 01)
0005:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0005:01:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge (rev
04)
0005:02:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED Graphics
Family (rev 41)
0030:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0031:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0032:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)
0033:00:00.0 PCI bridge: IBM POWER9 Host Bridge (PHB4)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 214867] UBSAN: shift-out-of-bounds in drivers/of/unittest.c:1933:36
From: bugzilla-daemon @ 2021-10-29 14:06 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-214867-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=214867

Arnd Bergmann (arnd@arndb.de) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |arnd@arndb.de

--- Comment #2 from Arnd Bergmann (arnd@arndb.de) ---
This is the function that triggers it:

static void of_unittest_untrack_overlay(int id)
{
        if (overlay_first_id < 0)
                return;
        id -= overlay_first_id;
        if (WARN_ON(id >= MAX_UNITTEST_OVERLAYS))
                return;
        overlay_id_bits[BIT_WORD(id)] &= ~BIT_MASK(id);
}

My guess is that 'id' is negative here, which means it fails to tigger the
WARN_ON() but ends up still being out of range.

Can you try changing it to 'unsigned int id'?

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* Re: [PATCH 10/13] ps3disk: add error handling support for add_disk()
From: Geoff Levand @ 2021-10-29 15:05 UTC (permalink / raw)
  To: Luis Chamberlain, axboe, mpe, benh, paulus, jim, minchan, ngupta,
	senozhatsky, richard, miquel.raynal, vigneshr, dan.j.williams,
	vishal.l.verma, dave.jiang, ira.weiny, kbusch, hch, sagi
  Cc: nvdimm, linux-kernel, linux-nvme, linux-block, linux-mtd,
	linuxppc-dev
In-Reply-To: <20211015235219.2191207-11-mcgrof@kernel.org>

Hi Luis,

On 10/15/21 4:52 PM, Luis Chamberlain wrote:
> We never checked for errors on add_disk() as this function
> returned void. Now that this is fixed, use the shiny new
> error handling.
> 
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>

I tested your 20211011-for-axboe-add-disk-error-handling branch
on PS3 and the ps3disk changes seem to be working OK.

Tested-by: Geoff Levand <geoff@infradead.org>

^ permalink raw reply

* Re: [PATCH 11/13] ps3vram: add error handling support for add_disk()
From: Geoff Levand @ 2021-10-29 15:09 UTC (permalink / raw)
  To: Luis Chamberlain, axboe, mpe, benh, paulus, jim, minchan, ngupta,
	senozhatsky, richard, miquel.raynal, vigneshr, dan.j.williams,
	vishal.l.verma, dave.jiang, ira.weiny, kbusch, hch, sagi
  Cc: nvdimm, linux-kernel, linux-nvme, linux-block, linux-mtd,
	linuxppc-dev
In-Reply-To: <20211015235219.2191207-12-mcgrof@kernel.org>

Hi Luis,

On 10/15/21 4:52 PM, Luis Chamberlain wrote:
> We never checked for errors on add_disk() as this function
> returned void. Now that this is fixed, use the shiny new
> error handling.

I didn't yet test this ps3vram related change, but based
on the ps3disk testing I think this change will be OK.

Acked-by: Geoff Levand <geoff@infradead.org>

^ permalink raw reply

* [PATCH] powerpc/8xx: Fix Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST
From: Christophe Leroy @ 2021-10-29 15:10 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Until now, all tests involving CONFIG_STRICT_KERNEL_RWX were done with
DEBUG_RODATA_TEST to check the result. But now that
CONFIG_STRICT_KERNEL_RWX is selected by default, it came without
CONFIG_DEBUG_RODATA_TEST and led to the following Oops

[    6.830908] Freeing unused kernel image (initmem) memory: 352K
[    6.840077] BUG: Unable to handle kernel data access on write at 0xc1285200
[    6.846836] Faulting instruction address: 0xc0004b6c
[    6.851745] Oops: Kernel access of bad area, sig: 11 [#1]
[    6.857075] BE PAGE_SIZE=16K PREEMPT CMPC885
[    6.861348] SAF3000 DIE NOTIFICATION
[    6.864830] CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.0-rc5-s3k-dev-02255-g2747d7b7916f #451
[    6.873429] NIP:  c0004b6c LR: c0004b60 CTR: 00000000
[    6.878419] REGS: c902be60 TRAP: 0300   Not tainted  (5.15.0-rc5-s3k-dev-02255-g2747d7b7916f)
[    6.886852] MSR:  00009032 <EE,ME,IR,DR,RI>  CR: 53000335  XER: 8000ff40
[    6.893564] DAR: c1285200 DSISR: 82000000
[    6.893564] GPR00: 0c000000 c902bf20 c20f4000 08000000 00000001 04001f00 c1800000 00000035
[    6.893564] GPR08: ff0001ff c1280000 00000002 c0004b60 00001000 00000000 c0004b1c 00000000
[    6.893564] GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    6.893564] GPR24: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c1060000
[    6.932034] NIP [c0004b6c] kernel_init+0x50/0x138
[    6.936682] LR [c0004b60] kernel_init+0x44/0x138
[    6.941245] Call Trace:
[    6.943653] [c902bf20] [c0004b60] kernel_init+0x44/0x138 (unreliable)
[    6.950022] [c902bf30] [c001122c] ret_from_kernel_thread+0x5c/0x64
[    6.956135] Instruction dump:
[    6.959060] 48ffc521 48045469 4800d8cd 3d20c086 89295fa0 2c090000 41820058 480796c9
[    6.966890] 4800e48d 3d20c128 39400002 3fe0c106 <91495200> 3bff8000 4806fa1d 481f7d75
[    6.974902] ---[ end trace 1e397bacba4aa610 ]---

0xc1285200 corresponds to 'system_state' global var that the kernel is trying to set to
SYSTEM_RUNNING. This var is above the RO/RW limit so it shouldn't Oops.

It oopses because the dirty bit is missing.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_8xx.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9bdb95f5694f..2d596881b70e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -755,7 +755,7 @@ _GLOBAL(mmu_pin_tlb)
 	cmplw	r6, r9
 	bdnzt	lt, 2b
 
-4:	LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+4:	LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
 2:	ori	r0, r6, MD_EVALID
 	mtspr	SPRN_MD_CTR, r5
 	mtspr	SPRN_MD_EPN, r0
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH v1 1/1] soc: fsl: Replace kernel.h with the necessary inclusions
From: Andy Shevchenko @ 2021-10-29 15:55 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev; +Cc: Paul Mackerras
In-Reply-To: <20211027153354.81129-1-andriy.shevchenko@linux.intel.com>

On Wed, Oct 27, 2021 at 06:33:54PM +0300, Andy Shevchenko wrote:
> When kernel.h is used in the headers it adds a lot into dependency hell,
> especially when there are circular dependencies are involved.
> 
> Replace kernel.h inclusion with the list of what is really being used.

Seems nobody from PPC took this patch.
Any idea who can take it?

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply

* Re: [PATCH v1 1/1] soc: fsl: Replace kernel.h with the necessary inclusions
From: LEROY Christophe @ 2021-10-29 19:01 UTC (permalink / raw)
  To: Andy Shevchenko, linux-kernel@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, Michael Ellerman
  Cc: Paul Mackerras
In-Reply-To: <YXwZihLk1njsBNT4@smile.fi.intel.com>



Le 29/10/2021 à 17:55, Andy Shevchenko a écrit :
> On Wed, Oct 27, 2021 at 06:33:54PM +0300, Andy Shevchenko wrote:
>> When kernel.h is used in the headers it adds a lot into dependency hell,
>> especially when there are circular dependencies are involved.
>>
>> Replace kernel.h inclusion with the list of what is really being used.
> 
> Seems nobody from PPC took this patch.
> Any idea who can take it?
> 

You have to check in MAINTAINERS file in the root directory of kernel 
sources: https://github.com/linuxppc/linux/blob/master/MAINTAINERS

That's Michael who takes them. But you have to allow him enough time for it.

Christophe

^ permalink raw reply

* Re: [PATCH v1 1/1] soc: fsl: Replace kernel.h with the necessary inclusions
From: Christophe Leroy @ 2021-10-29 19:02 UTC (permalink / raw)
  To: Andy Shevchenko, linux-kernel, linuxppc-dev, Michael Ellerman
  Cc: Paul Mackerras
In-Reply-To: <YXwZihLk1njsBNT4@smile.fi.intel.com>



Le 29/10/2021 à 17:55, Andy Shevchenko a écrit :
> On Wed, Oct 27, 2021 at 06:33:54PM +0300, Andy Shevchenko wrote:
>> When kernel.h is used in the headers it adds a lot into dependency hell,
>> especially when there are circular dependencies are involved.
>>
>> Replace kernel.h inclusion with the list of what is really being used.
>
> Seems nobody from PPC took this patch.
> Any idea who can take it?
>

You have to check in MAINTAINERS file in the root directory of kernel 
sources: https://github.com/linuxppc/linux/blob/master/MAINTAINERS

That's Michael who takes them. But you have to allow him enough time for it.

Christophe


^ permalink raw reply

* Re: [PATCH v1 1/1] soc: fsl: Replace kernel.h with the necessary inclusions
From: Andy Shevchenko @ 2021-10-29 20:31 UTC (permalink / raw)
  To: LEROY Christophe
  Cc: Andy Shevchenko, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org, Paul Mackerras
In-Reply-To: <c0524c86-fc7e-632a-8b2a-862695529115@csgroup.eu>

On Fri, Oct 29, 2021 at 10:04 PM LEROY Christophe
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 29/10/2021 à 17:55, Andy Shevchenko a écrit :
> > On Wed, Oct 27, 2021 at 06:33:54PM +0300, Andy Shevchenko wrote:
> >> When kernel.h is used in the headers it adds a lot into dependency hell,
> >> especially when there are circular dependencies are involved.
> >>
> >> Replace kernel.h inclusion with the list of what is really being used.
> >
> > Seems nobody from PPC took this patch.
> > Any idea who can take it?
> >
>
> You have to check in MAINTAINERS file in the root directory of kernel
> sources: https://github.com/linuxppc/linux/blob/master/MAINTAINERS

Actually for these files get_maintainer.pl showed nothing.
I have chosen PPC maintainers manually.

> That's Michael who takes them. But you have to allow him enough time for it.

Thanks!

I wrote that message because I have got a notification from checkpatch
that it should go somewhere else.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* [PATCH] powerpc/fsl: fix the schema check errors for fsl, tmu-calibration
From: David Heidelberg @ 2021-10-29 12:17 UTC (permalink / raw)
  To: Rob Herring, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras
  Cc: devicetree, linuxppc-dev, ~okias/devicetree, David Heidelberg,
	linux-kernel

fsl,tmu-calibration is in u32-matrix format. Use matching property syntax.
No functional changes. Fixes warnings as:
$ make dtbs_check
...
arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dt.yaml: tmu@30260000: fsl,tmu-calibration:0: Additional items are not allowed (1, 41, 2, 47, 3, 53, 4, 61, 5, 67, 6, 75, 7, 81, 8, 87, 9, 95, 10, 103, 11, 111
, 65536, 27, 65537, 35, 65538, 43, 65539, 51, 65540, 59, 65541, 67, 65542, 75, 65543, 85, 65544, 93, 65545, 103, 65546, 112, 131072, 23, 131073, 35, 131074, 45, 131075, 55, 131076, 65, 131077, 75, 131078, 87, 13
1079, 99, 131080, 111, 196608, 21, 196609, 33, 196610, 45, 196611, 57, 196612, 69, 196613, 83, 196614, 95, 196615, 113 were unexpected)
        From schema: Documentation/devicetree/bindings/thermal/qoriq-thermal.yaml
...

Signed-off-by: David Heidelberg <david@ixit.cz>
---
 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 79 +++++++++++----------
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 71 +++++++++---------
 2 files changed, 76 insertions(+), 74 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index d552044c5afc..aa5152ca8120 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -367,45 +367,46 @@ tmu: tmu@f0000 {
 		reg = <0xf0000 0x1000>;
 		interrupts = <18 2 0 0>;
 		fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>;
-		fsl,tmu-calibration = <0x00000000 0x0000000f
-				       0x00000001 0x00000017
-				       0x00000002 0x0000001e
-				       0x00000003 0x00000026
-				       0x00000004 0x0000002e
-				       0x00000005 0x00000035
-				       0x00000006 0x0000003d
-				       0x00000007 0x00000044
-				       0x00000008 0x0000004c
-				       0x00000009 0x00000053
-				       0x0000000a 0x0000005b
-				       0x0000000b 0x00000064
-
-				       0x00010000 0x00000011
-				       0x00010001 0x0000001c
-				       0x00010002 0x00000024
-				       0x00010003 0x0000002b
-				       0x00010004 0x00000034
-				       0x00010005 0x00000039
-				       0x00010006 0x00000042
-				       0x00010007 0x0000004c
-				       0x00010008 0x00000051
-				       0x00010009 0x0000005a
-				       0x0001000a 0x00000063
-
-				       0x00020000 0x00000013
-				       0x00020001 0x00000019
-				       0x00020002 0x00000024
-				       0x00020003 0x0000002c
-				       0x00020004 0x00000035
-				       0x00020005 0x0000003d
-				       0x00020006 0x00000046
-				       0x00020007 0x00000050
-				       0x00020008 0x00000059
-
-				       0x00030000 0x00000002
-				       0x00030001 0x0000000d
-				       0x00030002 0x00000019
-				       0x00030003 0x00000024>;
+		fsl,tmu-calibration =
+				<0x00000000 0x0000000f>,
+				<0x00000001 0x00000017>,
+				<0x00000002 0x0000001e>,
+				<0x00000003 0x00000026>,
+				<0x00000004 0x0000002e>,
+				<0x00000005 0x00000035>,
+				<0x00000006 0x0000003d>,
+				<0x00000007 0x00000044>,
+				<0x00000008 0x0000004c>,
+				<0x00000009 0x00000053>,
+				<0x0000000a 0x0000005b>,
+				<0x0000000b 0x00000064>,
+
+				<0x00010000 0x00000011>,
+				<0x00010001 0x0000001c>,
+				<0x00010002 0x00000024>,
+				<0x00010003 0x0000002b>,
+				<0x00010004 0x00000034>,
+				<0x00010005 0x00000039>,
+				<0x00010006 0x00000042>,
+				<0x00010007 0x0000004c>,
+				<0x00010008 0x00000051>,
+				<0x00010009 0x0000005a>,
+				<0x0001000a 0x00000063>,
+
+				<0x00020000 0x00000013>,
+				<0x00020001 0x00000019>,
+				<0x00020002 0x00000024>,
+				<0x00020003 0x0000002c>,
+				<0x00020004 0x00000035>,
+				<0x00020005 0x0000003d>,
+				<0x00020006 0x00000046>,
+				<0x00020007 0x00000050>,
+				<0x00020008 0x00000059>,
+
+				<0x00030000 0x00000002>,
+				<0x00030001 0x0000000d>,
+				<0x00030002 0x00000019>,
+				<0x00030003 0x00000024>;
 		#thermal-sensor-cells = <1>;
 	};
 
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index f58eb820eb5e..27e6985d8bde 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -447,41 +447,42 @@ tmu: tmu@f0000 {
 		reg = <0xf0000 0x1000>;
 		interrupts = <18 2 0 0>;
 		fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>;
-		fsl,tmu-calibration = <0x00000000 0x00000025
-				       0x00000001 0x00000028
-				       0x00000002 0x0000002d
-				       0x00000003 0x00000031
-				       0x00000004 0x00000036
-				       0x00000005 0x0000003a
-				       0x00000006 0x00000040
-				       0x00000007 0x00000044
-				       0x00000008 0x0000004a
-				       0x00000009 0x0000004f
-				       0x0000000a 0x00000054
-
-				       0x00010000 0x0000000d
-				       0x00010001 0x00000013
-				       0x00010002 0x00000019
-				       0x00010003 0x0000001f
-				       0x00010004 0x00000025
-				       0x00010005 0x0000002d
-				       0x00010006 0x00000033
-				       0x00010007 0x00000043
-				       0x00010008 0x0000004b
-				       0x00010009 0x00000053
-
-				       0x00020000 0x00000010
-				       0x00020001 0x00000017
-				       0x00020002 0x0000001f
-				       0x00020003 0x00000029
-				       0x00020004 0x00000031
-				       0x00020005 0x0000003c
-				       0x00020006 0x00000042
-				       0x00020007 0x0000004d
-				       0x00020008 0x00000056
-
-				       0x00030000 0x00000012
-				       0x00030001 0x0000001d>;
+		fsl,tmu-calibration =
+				<0x00000000 0x00000025>,
+				<0x00000001 0x00000028>,
+				<0x00000002 0x0000002d>,
+				<0x00000003 0x00000031>,
+				<0x00000004 0x00000036>,
+				<0x00000005 0x0000003a>,
+				<0x00000006 0x00000040>,
+				<0x00000007 0x00000044>,
+				<0x00000008 0x0000004a>,
+				<0x00000009 0x0000004f>,
+				<0x0000000a 0x00000054>,
+
+				<0x00010000 0x0000000d>,
+				<0x00010001 0x00000013>,
+				<0x00010002 0x00000019>,
+				<0x00010003 0x0000001f>,
+				<0x00010004 0x00000025>,
+				<0x00010005 0x0000002d>,
+				<0x00010006 0x00000033>,
+				<0x00010007 0x00000043>,
+				<0x00010008 0x0000004b>,
+				<0x00010009 0x00000053>,
+
+				<0x00020000 0x00000010>,
+				<0x00020001 0x00000017>,
+				<0x00020002 0x0000001f>,
+				<0x00020003 0x00000029>,
+				<0x00020004 0x00000031>,
+				<0x00020005 0x0000003c>,
+				<0x00020006 0x00000042>,
+				<0x00020007 0x0000004d>,
+				<0x00020008 0x00000056>,
+
+				<0x00030000 0x00000012>,
+				<0x00030001 0x0000001d>;
 		#thermal-sensor-cells = <1>;
 	};
 
-- 
2.33.0


^ permalink raw reply related

* RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
From: Eugene Bordenkircher @ 2021-10-29 17:24 UTC (permalink / raw)
  To: linux-usb@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
  Cc: balbi@kernel.org, gregkh@linuxfoundation.org, leoyang.li@nxp.com
In-Reply-To: <MWHPR2201MB152074F47BF142189365627B91879@MWHPR2201MB1520.namprd22.prod.outlook.com>

Typing Greg's email correct this time.  My apologies.

Eugene 

-----Original Message-----
From: Eugene Bordenkircher 
Sent: Friday, October 29, 2021 10:14 AM
To: linux-usb@vger.kernel.org; linuxppc-dev@lists.ozlabs.org
Cc: leoyang.li@nxp.com; balbi@kernel.org; gregkh@linuxfoundataion.org
Subject: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.

Hello all,

We've discovered a situation where the FSL udc driver (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the request queue, but the queue has been corrupted at some point so it loops infinitely.  I believe we have narrowed into the offending code, but we are in need of assistance trying to find an appropriate fix for the problem.  The identified code appears to be in all versions of the Linux kernel the driver exists in.

The problem appears to be when handling a USB_REQ_GET_STATUS request.  The driver gets this request and then calls the ch9getstatus() function.  In this function, it starts a request by "borrowing" the per device status_req, filling it in, and then queuing it with a call to list_add_tail() to add the request to the endpoint queue.  Right before it exits the function however, it's calling ep0_prime_status(), which is filling out that same status_req structure and then queuing it with another call to list_add_tail() to add the request to the endpoint queue.  This adds two instances of the exact same LIST_HEAD to the endpoint queue, which breaks the list since the prev and next pointers end up pointing to the wrong things.  This ends up causing a hard loop the next time nuke() gets called, which happens on the next setup IRQ.

I'm not sure what the appropriate fix to this problem is, mostly due to my lack of expertise in USB and this driver stack.  The code has been this way in the kernel for a very long time, which suggests that it has been working, unless USB_REQ_GET_STATUS requests are never made.  This further suggests that there is something else going on that I don't understand.  Deleting the call to ep0_prime_status() and the following ep0stall() call appears, on the surface, to get the device working again, but may have side effects that I'm not seeing.

I'm hopeful someone in the community can help provide some information on what I may be missing or help come up with a solution to the problem.  A big thank you to anyone who would like to help out.

Eugene

^ permalink raw reply

* bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
From: Eugene Bordenkircher @ 2021-10-29 17:14 UTC (permalink / raw)
  To: linux-usb@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
  Cc: balbi@kernel.org, gregkh@linuxfoundataion.org, leoyang.li@nxp.com

Hello all,

We've discovered a situation where the FSL udc driver (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the request queue, but the queue has been corrupted at some point so it loops infinitely.  I believe we have narrowed into the offending code, but we are in need of assistance trying to find an appropriate fix for the problem.  The identified code appears to be in all versions of the Linux kernel the driver exists in.

The problem appears to be when handling a USB_REQ_GET_STATUS request.  The driver gets this request and then calls the ch9getstatus() function.  In this function, it starts a request by "borrowing" the per device status_req, filling it in, and then queuing it with a call to list_add_tail() to add the request to the endpoint queue.  Right before it exits the function however, it's calling ep0_prime_status(), which is filling out that same status_req structure and then queuing it with another call to list_add_tail() to add the request to the endpoint queue.  This adds two instances of the exact same LIST_HEAD to the endpoint queue, which breaks the list since the prev and next pointers end up pointing to the wrong things.  This ends up causing a hard loop the next time nuke() gets called, which happens on the next setup IRQ.

I'm not sure what the appropriate fix to this problem is, mostly due to my lack of expertise in USB and this driver stack.  The code has been this way in the kernel for a very long time, which suggests that it has been working, unless USB_REQ_GET_STATUS requests are never made.  This further suggests that there is something else going on that I don't understand.  Deleting the call to ep0_prime_status() and the following ep0stall() call appears, on the surface, to get the device working again, but may have side effects that I'm not seeing.

I'm hopeful someone in the community can help provide some information on what I may be missing or help come up with a solution to the problem.  A big thank you to anyone who would like to help out.

Eugene

^ permalink raw reply

* [GIT PULL] Please pull powerpc/linux.git powerpc-5.15-6 tag
From: Michael Ellerman @ 2021-10-29 23:05 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: aik, linuxppc-dev, linux-kernel

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

Hi Linus,

Please pull the final set of powerpc fixes for 5.15:

The following changes since commit 787252a10d9422f3058df9a4821f389e5326c440:

  powerpc/smp: do not decrement idle task preempt count in CPU offline (2021-10-20 21:38:01 +1100)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.15-6

for you to fetch changes up to d853adc7adf601d7d6823afe3ed396065a3e08d1:

  powerpc/pseries/iommu: Create huge DMA window if no MMIO32 is present (2021-10-25 11:41:15 +1100)

- ------------------------------------------------------------------
powerpc fixes for 5.15 #6

Three commits fixing some issues introduced with the recent IOMMU changes we merged.

Thanks to: Alexey Kardashevskiy

- ------------------------------------------------------------------
Alexey Kardashevskiy (3):
      powerpc/pseries/iommu: Use correct vfree for it_map
      powerpc/pseries/iommu: Check if the default window in use before removing it
      powerpc/pseries/iommu: Create huge DMA window if no MMIO32 is present


 arch/powerpc/platforms/pseries/iommu.c | 27 ++++++++++----------
 1 file changed, 14 insertions(+), 13 deletions(-)
-----BEGIN PGP SIGNATURE-----

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmF8fhoACgkQUevqPMjh
pYAoaRAAps3wmmCXKdVbFvqKTFzcRFiWoFa0r2c6SykG7hvo6y1r3avF5PhXU5ry
OshoMcw+ZPFeH/Jc7VB/i7a9nQZSlf1k3Z9SaM+WVOgqFUhbE6OjC1r2VfRgo2lY
8QFmlLesNNx5dg+NXcunFD7Z7ydQopCR9QprlpWq2ZAxcIf9z7PP/SNlfxzCMo0d
0zYBfchkHAsg4C3/c6CjIr6lmbuPvlX3YoSyiOb9MBuAZB+fA6jNxqsW8GWbLYOA
XNCFQ+1vqv5cwrjlo1nKCLQjYi/9MnF7/SLPeIHA/MYQBF7iuAeOCDo2ldgzKtAO
uwSDrNiuGBya2QMU6ulnbHlropmg4NdtCp9i0jcztbDWRZl+dmJ88LqI5jE43JOF
pgaf25jTw80yCrwxBFxfGwAesQPAxWMAV5SmqilArNu8ctCThRVeyYxIeFXpoZBA
Gl54/3VX6lXGF0Myf1gHdu5Qqkj6W/PlOwmr/WcQLRthHhIaVnW/Y0VlWqQ1FA3e
SsPf5XfP5VsqTXSos+t8FR9kpFaxOOC8C3Qo6bTbGYdd/dNx37AqXAK9B7vlgm3I
ufLg5t6bx9DWLx8i+tNOqG7owY4PfwnBDgxLl9dsP41srWPdgP81/IsHnevSYis8
QrSBgPE3+elkr2V8tRR9Eco3bYwPQDBSdMqTksfnkMJ+t1jinz0=
=l0Ac
-----END PGP SIGNATURE-----

^ permalink raw reply

* Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
From: Li Yang @ 2021-10-29 23:14 UTC (permalink / raw)
  To: Eugene Bordenkircher
  Cc: balbi@kernel.org, linux-usb@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, gregkh@linuxfoundation.org
In-Reply-To: <MWHPR2201MB1520D45396628364E91A1FA691879@MWHPR2201MB1520.namprd22.prod.outlook.com>

On Fri, Oct 29, 2021 at 4:27 PM Eugene Bordenkircher
<Eugene_Bordenkircher@selinc.com> wrote:
>
> Typing Greg's email correct this time.  My apologies.
>
> Eugene
>
> -----Original Message-----
> From: Eugene Bordenkircher
> Sent: Friday, October 29, 2021 10:14 AM
> To: linux-usb@vger.kernel.org; linuxppc-dev@lists.ozlabs.org
> Cc: leoyang.li@nxp.com; balbi@kernel.org; gregkh@linuxfoundataion.org
> Subject: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
>
> Hello all,
>
> We've discovered a situation where the FSL udc driver (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the request queue, but the queue has been corrupted at some point so it loops infinitely.  I believe we have narrowed into the offending code, but we are in need of assistance trying to find an appropriate fix for the problem.  The identified code appears to be in all versions of the Linux kernel the driver exists in.
>
> The problem appears to be when handling a USB_REQ_GET_STATUS request.  The driver gets this request and then calls the ch9getstatus() function.  In this function, it starts a request by "borrowing" the per device status_req, filling it in, and then queuing it with a call to list_add_tail() to add the request to the endpoint queue.  Right before it exits the function however, it's calling ep0_prime_status(), which is filling out that same status_req structure and then queuing it with another call to list_add_tail() to add the request to the endpoint queue.  This adds two instances of the exact same LIST_HEAD to the endpoint queue, which breaks the list since the prev and next pointers end up pointing to the wrong things.  This ends up causing a hard loop the next time nuke() gets called, which happens on the next setup IRQ.
>

I agree with you that this looks problematic.  This is probably
introduced by f79a60b8785 "usb: fsl_udc_core: prime status stage once
data stage has primed" that it didn't consider that the status_req has
been re-used for the DATA phase.

I think the proper fix should be having a separate request allocated
for the data phase after the above change.

> I'm not sure what the appropriate fix to this problem is, mostly due to my lack of expertise in USB and this driver stack.  The code has been this way in the kernel for a very long time, which suggests that it has been working, unless USB_REQ_GET_STATUS requests are never made.  This further suggests that there is something else going on that I don't understand.  Deleting the call to ep0_prime_status() and the following ep0stall() call appears, on the surface, to get the device working again, but may have side effects that I'm not seeing.
>
> I'm hopeful someone in the community can help provide some information on what I may be missing or help come up with a solution to the problem.  A big thank you to anyone who would like to help out.
>
> Eugene

^ permalink raw reply

* [Bug 214867] UBSAN: shift-out-of-bounds in drivers/of/unittest.c:1933:36
From: bugzilla-daemon @ 2021-10-30  0:01 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-214867-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=214867

--- Comment #3 from Frank Rowand (bugzilla.kernel.org@frowand.com) ---
I forwarded my email notification of this bug to the mail lists.  I prefer
discussion to occur there:

  https://lore.kernel.org/all/c474a371-b524-1da8-4a67-e72cf8f2b0f7@gmail.com/

Thank you for the report.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [PATCH] powerpc: fadump: correct two typos in a comment
From: Randy Dunlap @ 2021-10-30  0:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: Randy Dunlap, linuxppc-dev, Matthew Wilcox

Fix typos of 'remaining' and 'those'.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Suggested-by: Matthew Wilcox <willy@infradead.org> # 'remaining'
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/kernel/fadump.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- linux-next-20211029.orig/arch/powerpc/kernel/fadump.c
+++ linux-next-20211029/arch/powerpc/kernel/fadump.c
@@ -73,8 +73,8 @@ static struct cma *fadump_cma;
  * The total size of fadump reserved memory covers for boot memory size
  * + cpu data size + hpte size and metadata.
  * Initialize only the area equivalent to boot memory size for CMA use.
- * The reamining portion of fadump reserved memory will be not given
- * to CMA and pages for thoes will stay reserved. boot memory size is
+ * The remaining portion of fadump reserved memory will be not given
+ * to CMA and pages for those will stay reserved. boot memory size is
  * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
  * But for some reason even if it fails we still have the memory reservation
  * with us and we can still continue doing fadump.

^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.15-6 tag
From: pr-tracker-bot @ 2021-10-30  0:37 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: aik, linuxppc-dev, Linus Torvalds, linux-kernel
In-Reply-To: <87pmrn8m9h.fsf@mpe.ellerman.id.au>

The pull request you sent on Sat, 30 Oct 2021 10:05:46 +1100:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.15-6

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/119c85055d867b9588263bca59794c872ef2a30e

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply

* Re: [PATCH v1 1/1] soc: fsl: Replace kernel.h with the necessary inclusions
From: Christophe Leroy @ 2021-10-30  4:46 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Andy Shevchenko, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org, Paul Mackerras
In-Reply-To: <CAHp75VeB8Dmr6Hrupb1Hj=D=oG1zPyJrhCXu37QtXHNNKdNvZg@mail.gmail.com>


Le 29/10/2021 à 22:31, Andy Shevchenko a écrit :
> On Fri, Oct 29, 2021 at 10:04 PM LEROY Christophe
> <christophe.leroy@csgroup.eu> wrote:
>>
>>
>>
>> Le 29/10/2021 à 17:55, Andy Shevchenko a écrit :
>>> On Wed, Oct 27, 2021 at 06:33:54PM +0300, Andy Shevchenko wrote:
>>>> When kernel.h is used in the headers it adds a lot into dependency hell,
>>>> especially when there are circular dependencies are involved.
>>>>
>>>> Replace kernel.h inclusion with the list of what is really being used.
>>>
>>> Seems nobody from PPC took this patch.
>>> Any idea who can take it?
>>>
>>
>> You have to check in MAINTAINERS file in the root directory of kernel
>> sources: https://github.com/linuxppc/linux/blob/master/MAINTAINERS
> 
> Actually for these files get_maintainer.pl showed nothing.
> I have chosen PPC maintainers manually.
> 
>> That's Michael who takes them. But you have to allow him enough time for it.
> 
> Thanks!
> 
> I wrote that message because I have got a notification from checkpatch
> that it should go somewhere else.
> 

That means that Michael considered it is not for him.

And I think the reason is that in MAINTAINERS you have:

FREESCALE QUICC ENGINE LIBRARY
M:	Qiang Zhao <qiang.zhao@nxp.com>
L:	linuxppc-dev@lists.ozlabs.org
S:	Maintained
F:	drivers/soc/fsl/qe/
F:	include/soc/fsl/*qe*.h
F:	include/soc/fsl/*ucc*.h


FREESCALE SOC DRIVERS
M:	Li Yang <leoyang.li@nxp.com>
L:	linuxppc-dev@lists.ozlabs.org
L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S:	Maintained
F:	Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F:	Documentation/devicetree/bindings/soc/fsl/
F:	drivers/soc/fsl/
F:	include/linux/fsl/

Sorry I overlooked your patch.

Christophe

^ permalink raw reply

* Re: Linux kernel: powerpc: KVM guest can trigger host crash on Power8
From: John Paul Adrian Glaubitz @ 2021-10-30  7:19 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: debian-powerpc@lists.debian.org, linuxppc-dev
In-Reply-To: <878rydac0d.fsf@mpe.ellerman.id.au>

Hi Michael!

On 10/28/21 08:39, Michael Ellerman wrote:
> That completed fine on my BE VM here.
> 
> I ran these in two tmux windows:
>   $ sbuild -d sid --arch=powerpc --no-arch-all gcc-11_11.2.0-10.dsc
>   $ sbuild -d sid --arch=ppc64 --no-arch-all gcc-11_11.2.0-10.dsc

Could you try gcc-10 instead? It's testsuite has crashed the host for me
with a patched kernel twice now.

$ dget -u https://deb.debian.org/debian/pool/main/g/gcc-10/gcc-10_10.3.0-12.dsc
$ sbuild -d sid --arch=powerpc --no-arch-all gcc-10_10.3.0-12.dsc
$ sbuild -d sid --arch=ppc64 --no-arch-all gcc-10_10.3.0-12.dsc

Thanks,
Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz@debian.org
`. `'   Freie Universitaet Berlin - glaubitz@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


^ permalink raw reply

* Re: [PATCH v1 1/1] soc: fsl: Replace kernel.h with the necessary inclusions
From: Andy Shevchenko @ 2021-10-30  7:28 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Andy Shevchenko, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org, Paul Mackerras
In-Reply-To: <064efc28-eb86-b729-0bb0-b1f5ed5bbf60@csgroup.eu>

[-- Attachment #1: Type: text/plain, Size: 2214 bytes --]

On Saturday, October 30, 2021, Christophe Leroy <christophe.leroy@csgroup.eu>
wrote:

>
> Le 29/10/2021 à 22:31, Andy Shevchenko a écrit :
>
>> On Fri, Oct 29, 2021 at 10:04 PM LEROY Christophe
>> <christophe.leroy@csgroup.eu> wrote:
>>
>>>
>>>
>>>
>>> Le 29/10/2021 à 17:55, Andy Shevchenko a écrit :
>>>
>>>> On Wed, Oct 27, 2021 at 06:33:54PM +0300, Andy Shevchenko wrote:
>>>>
>>>>> When kernel.h is used in the headers it adds a lot into dependency
>>>>> hell,
>>>>> especially when there are circular dependencies are involved.
>>>>>
>>>>> Replace kernel.h inclusion with the list of what is really being used.
>>>>>
>>>>
>>>> Seems nobody from PPC took this patch.
>>>> Any idea who can take it?
>>>>
>>>>
>>> You have to check in MAINTAINERS file in the root directory of kernel
>>> sources: https://github.com/linuxppc/linux/blob/master/MAINTAINERS
>>>
>>
>> Actually for these files get_maintainer.pl showed nothing.
>> I have chosen PPC maintainers manually.
>>
>> That's Michael who takes them. But you have to allow him enough time for
>>> it.
>>>
>>
>> Thanks!
>>
>> I wrote that message because I have got a notification from checkpatch
>> that it should go somewhere else.
>>
>>
> That means that Michael considered it is not for him.
>
> And I think the reason is that in MAINTAINERS you have:
>
> FREESCALE QUICC ENGINE LIBRARY
> M:      Qiang Zhao <qiang.zhao@nxp.com>
> L:      linuxppc-dev@lists.ozlabs.org
> S:      Maintained
> F:      drivers/soc/fsl/qe/
> F:      include/soc/fsl/*qe*.h
> F:      include/soc/fsl/*ucc*.h
>
>
> FREESCALE SOC DRIVERS
> M:      Li Yang <leoyang.li@nxp.com>
> L:      linuxppc-dev@lists.ozlabs.org
> L:      linux-arm-kernel@lists.infradead.org (moderated for
> non-subscribers)
> S:      Maintained
> F:      Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
> F:      Documentation/devicetree/bindings/soc/fsl/
> F:      drivers/soc/fsl/
> F:      include/linux/fsl/
>
>
Thanks! Now I should understand why get_maintainer hadn’t showed above for
me.



> Sorry I overlooked your patch.


NP


>
> Christophe
>


-- 
With Best Regards,
Andy Shevchenko

[-- Attachment #2: Type: text/html, Size: 3946 bytes --]

^ permalink raw reply

* Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
From: Joakim Tjernlund @ 2021-10-30 14:20 UTC (permalink / raw)
  To: linuxppc-dev@lists.ozlabs.org, Eugene_Bordenkircher@selinc.com,
	linux-usb@vger.kernel.org
  Cc: gregkh@linuxfoundataion.org, balbi@kernel.org, leoyang.li@nxp.com
In-Reply-To: <MWHPR2201MB152074F47BF142189365627B91879@MWHPR2201MB1520.namprd22.prod.outlook.com>

[-- Attachment #1: Type: text/plain, Size: 2314 bytes --]

On Fri, 2021-10-29 at 17:14 +0000, Eugene Bordenkircher wrote:
> Hello all,
> 
> We've discovered a situation where the FSL udc driver (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the request queue, but the queue has been corrupted at some point so it loops infinitely.  I believe we have narrowed into the offending code, but we are in need of assistance trying to find an appropriate fix for the problem.  The identified code appears to be in all versions of the Linux kernel the driver exists in.
> 
> The problem appears to be when handling a USB_REQ_GET_STATUS request.  The driver gets this request and then calls the ch9getstatus() function.  In this function, it starts a request by "borrowing" the per device status_req, filling it in, and then queuing it with a call to list_add_tail() to add the request to the endpoint queue.  Right before it exits the function however, it's calling ep0_prime_status(), which is filling out that same status_req structure and then queuing it with another call to list_add_tail() to add the request to the endpoint queue.  This adds two instances of the exact same LIST_HEAD to the endpoint queue, which breaks the list since the prev and next pointers end up pointing to the wrong things.  This ends up causing a hard loop the next time nuke() gets called, which happens on the next setup IRQ.
> 
> I'm not sure what the appropriate fix to this problem is, mostly due to my lack of expertise in USB and this driver stack.  The code has been this way in the kernel for a very long time, which suggests that it has been working, unless USB_REQ_GET_STATUS requests are never made.  This further suggests that there is something else going on that I don't understand.  Deleting the call to ep0_prime_status() and the following ep0stall() call appears, on the surface, to get the device working again, but may have side effects that I'm not seeing.
> 
> I'm hopeful someone in the community can help provide some information on what I may be missing or help come up with a solution to the problem.  A big thank you to anyone who would like to help out.
> 
> Eugene

Run into this to a while ago. Found the bug and a few more fixes.
This is against 4.19 so you may have to tweak them a bit.
Feel free to upstream them.

 Jocke 

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0005-fsl_udc_core-Init-max_pipes-for-reset_queues.patch --]
[-- Type: text/x-patch; name="0005-fsl_udc_core-Init-max_pipes-for-reset_queues.patch", Size: 989 bytes --]

From a7ed9cffbfc90371b570ebef698d96c39adbaf77 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Mon, 11 May 2020 11:18:14 +0200
Subject: [PATCH 5/5] fsl_udc_core: Init max_pipes for reset_queues()

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index bd3825d9f1d2..92136dff8373 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -2441,6 +2441,7 @@ static int fsl_udc_probe(struct platform_device *pdev)
 	/* Get max device endpoints */
 	/* DEN is bidirectional ep number, max_ep doubles the number */
 	udc_controller->max_ep = (dccparams & DCCPARAMS_DEN_MASK) * 2;
+	udc_controller->max_pipes = udc_controller->max_ep;
 
 	udc_controller->irq = platform_get_irq(pdev, 0);
 	if (!udc_controller->irq) {
-- 
2.32.0


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0004-fsl_udc_stop-Use-list_for_each_entry_safe-when-delet.patch --]
[-- Type: text/x-patch; name="0004-fsl_udc_stop-Use-list_for_each_entry_safe-when-delet.patch", Size: 1422 bytes --]

From b98fa0dd384f17fee0c1283b91f855b97d1976f4 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Mon, 11 May 2020 10:38:07 +0200
Subject: [PATCH 4/5] fsl_udc_stop: Use list_for_each_entry_safe() when
 deleting

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 4f835332af45..bd3825d9f1d2 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1984,7 +1984,7 @@ static int fsl_udc_start(struct usb_gadget *g,
 /* Disconnect from gadget driver */
 static int fsl_udc_stop(struct usb_gadget *g)
 {
-	struct fsl_ep *loop_ep;
+	struct fsl_ep *loop_ep, *tmp_loop;
 	unsigned long flags;
 
 	if (!IS_ERR_OR_NULL(udc_controller->transceiver))
@@ -2002,8 +2002,8 @@ static int fsl_udc_stop(struct usb_gadget *g)
 	spin_lock_irqsave(&udc_controller->lock, flags);
 	udc_controller->gadget.speed = USB_SPEED_UNKNOWN;
 	nuke(&udc_controller->eps[0], -ESHUTDOWN);
-	list_for_each_entry(loop_ep, &udc_controller->gadget.ep_list,
-			ep.ep_list)
+	list_for_each_entry_safe(loop_ep, tmp_loop, &udc_controller->gadget.ep_list,
+				 ep.ep_list)
 		nuke(loop_ep, -ESHUTDOWN);
 	spin_unlock_irqrestore(&udc_controller->lock, flags);
 
-- 
2.32.0


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: 0003-fsl_ep_dequeue.patch --]
[-- Type: text/x-patch; name="0003-fsl_ep_dequeue.patch", Size: 1007 bytes --]

From a90a89d06bd008f606404ec613b4f2343b9dda1a Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Thu, 7 May 2020 22:35:14 +0200
Subject: [PATCH 3/5] fsl_ep_dequeue

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 4b1591fa2e1c..4f835332af45 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -977,7 +977,13 @@ static int fsl_ep_dequeue(struct usb_ep *_ep, struct usb_request *_req)
 
 			/* prime with dTD of next request */
 			fsl_prime_ep(ep, next_req->head);
-		}
+		} else {
+			struct ep_queue_head *qh;
+
+			qh = ep->qh;
+			qh->next_dtd_ptr = 1;
+			qh->size_ioc_int_sts = 0;
+ 		}
 	/* The request hasn't been processed, patch up the TD chain */
 	} else {
 		struct fsl_req *prev_req;
-- 
2.32.0


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #5: 0002-fsl_udc-import-build_dtd-fixes.patch --]
[-- Type: text/x-patch; name="0002-fsl_udc-import-build_dtd-fixes.patch", Size: 2239 bytes --]

From b3f09747be2007be3a372fe80635b51df6ba71bd Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Thu, 7 May 2020 22:32:26 +0200
Subject: [PATCH 2/5] fsl_udc: import build_dtd fixes

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 2546bc28f42a..4b1591fa2e1c 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -774,12 +774,20 @@ static void fsl_queue_td(struct fsl_ep *ep, struct fsl_req *req)
 static struct ep_td_struct *fsl_build_dtd(struct fsl_req *req, unsigned *length,
 		dma_addr_t *dma, int *is_last, gfp_t gfp_flags)
 {
-	u32 swap_temp;
+	u32 swap_temp, mult = 0;
 	struct ep_td_struct *dtd;
+	struct ep_queue_head *dqh;
 
 	/* how big will this transfer be? */
-	*length = min(req->req.length - req->req.actual,
-			(unsigned)EP_MAX_LENGTH_TRANSFER);
+	if (usb_endpoint_xfer_isoc(req->ep->ep.desc)) {
+		dqh = req->ep->qh;
+		mult = (dqh->max_pkt_length >> EP_QUEUE_HEAD_MULT_POS)
+			& 0x3;
+		*length = min(req->req.length - req->req.actual,
+			      (unsigned)(mult * req->ep->ep.maxpacket));
+	} else
+		*length = min(req->req.length - req->req.actual,
+			      (unsigned)EP_MAX_LENGTH_TRANSFER);
 
 	dtd = dma_pool_alloc(udc_controller->td_pool, gfp_flags, dma);
 	if (dtd == NULL)
@@ -794,6 +802,7 @@ static struct ep_td_struct *fsl_build_dtd(struct fsl_req *req, unsigned *length,
 	/* Init all of buffer page pointers */
 	swap_temp = (u32) (req->req.dma + req->req.actual);
 	dtd->buff_ptr0 = cpu_to_hc32(swap_temp);
+	swap_temp &= ~0xFFF;
 	dtd->buff_ptr1 = cpu_to_hc32(swap_temp + 0x1000);
 	dtd->buff_ptr2 = cpu_to_hc32(swap_temp + 0x2000);
 	dtd->buff_ptr3 = cpu_to_hc32(swap_temp + 0x3000);
@@ -820,6 +829,7 @@ static struct ep_td_struct *fsl_build_dtd(struct fsl_req *req, unsigned *length,
 	/* Enable interrupt for the last dtd of a request */
 	if (*is_last && !req->req.no_interrupt)
 		swap_temp |= DTD_IOC;
+	swap_temp |= mult << 10;
 
 	dtd->size_ioc_sts = cpu_to_hc32(swap_temp);
 
-- 
2.32.0


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #6: 0001-ch9getstatus-ep0_prime_status-fixes-RND-28770.patch --]
[-- Type: text/x-patch; name="0001-ch9getstatus-ep0_prime_status-fixes-RND-28770.patch", Size: 4367 bytes --]

From 17c684fdcd6152b7e504656b1711e24508c32f6e Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Fri, 8 May 2020 17:12:53 +0200
Subject: [PATCH 1/5] ch9getstatus/ep0_prime_status, fixes RND-28770

USB driver added the same req twice to the same list.
This cause a endless loop while in IRQ context.
Fix by importing code from mv_udc_core.c, its sister driver.

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 56 ++++++++++-----------------
 1 file changed, 21 insertions(+), 35 deletions(-)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 367697144cda..2546bc28f42a 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1266,7 +1266,7 @@ static void ep0stall(struct fsl_udc *udc)
 }
 
 /* Prime a status phase for ep0 */
-static int ep0_prime_status(struct fsl_udc *udc, int direction)
+static int ep0_prime_status(struct fsl_udc *udc, int direction, u16 status, bool empty)
 {
 	struct fsl_req *req = udc->status_req;
 	struct fsl_ep *ep;
@@ -1281,8 +1281,14 @@ static int ep0_prime_status(struct fsl_udc *udc, int direction)
 	if (udc->ep0_state != DATA_STATE_XMIT)
 		udc->ep0_state = WAIT_FOR_OUT_STATUS;
 
+	/* fill in the reqest structure */
+	if (empty == false) {
+		*((u16 *) req->req.buf) = cpu_to_le16(status);
+		req->req.length = 2;
+	} else
+		req->req.length = 0;
+
 	req->ep = ep;
-	req->req.length = 0;
 	req->req.status = -EINPROGRESS;
 	req->req.actual = 0;
 	req->req.complete = fsl_noop_complete;
@@ -1292,14 +1298,19 @@ static int ep0_prime_status(struct fsl_udc *udc, int direction)
 	if (ret)
 		return ret;
 
+	ret = -ENOMEM;
 	if (fsl_req_to_dtd(req, GFP_ATOMIC) == 0)
 		fsl_queue_td(ep, req);
 	else
-		return -ENOMEM;
+		goto out;
 
 	list_add_tail(&req->queue, &ep->queue);
 
 	return 0;
+out:
+	usb_gadget_unmap_request(&udc->gadget, &req->req, ep_is_in(ep));
+
+	return ret;
 }
 
 static void udc_reset_ep_queue(struct fsl_udc *udc, u8 pipe)
@@ -1320,7 +1331,7 @@ static void ch9setaddress(struct fsl_udc *udc, u16 value, u16 index, u16 length)
 	/* Update usb state */
 	udc->usb_state = USB_STATE_ADDRESS;
 	/* Status phase */
-	if (ep0_prime_status(udc, EP_DIR_IN))
+	if (ep0_prime_status(udc, EP_DIR_IN, 0, true))
 		ep0stall(udc);
 }
 
@@ -1331,9 +1342,7 @@ static void ch9getstatus(struct fsl_udc *udc, u8 request_type, u16 value,
 		u16 index, u16 length)
 {
 	u16 tmp = 0;		/* Status, cpu endian */
-	struct fsl_req *req;
 	struct fsl_ep *ep;
-	int ret;
 
 	ep = &udc->eps[0];
 
@@ -1358,33 +1367,10 @@ static void ch9getstatus(struct fsl_udc *udc, u8 request_type, u16 value,
 				<< USB_ENDPOINT_HALT;
 	}
 
-	udc->ep0_dir = USB_DIR_IN;
-	/* Borrow the per device status_req */
-	req = udc->status_req;
-	/* Fill in the reqest structure */
-	*((u16 *) req->req.buf) = cpu_to_le16(tmp);
-
-	req->ep = ep;
-	req->req.length = 2;
-	req->req.status = -EINPROGRESS;
-	req->req.actual = 0;
-	req->req.complete = fsl_noop_complete;
-	req->dtd_count = 0;
-
-	ret = usb_gadget_map_request(&ep->udc->gadget, &req->req, ep_is_in(ep));
-	if (ret)
-		goto stall;
-
-	/* prime the data phase */
-	if ((fsl_req_to_dtd(req, GFP_ATOMIC) == 0))
-		fsl_queue_td(ep, req);
-	else			/* no mem */
-		goto stall;
-
-	list_add_tail(&req->queue, &ep->queue);
-	udc->ep0_state = DATA_STATE_XMIT;
-	if (ep0_prime_status(udc, EP_DIR_OUT))
+	if (ep0_prime_status(udc, EP_DIR_OUT, tmp, false))
 		ep0stall(udc);
+	else
+		udc->ep0_state = DATA_STATE_XMIT;
 
 	return;
 stall:
@@ -1465,7 +1451,7 @@ __acquires(udc->lock)
 			break;
 
 		if (rc == 0) {
-			if (ep0_prime_status(udc, EP_DIR_IN))
+			if (ep0_prime_status(udc, EP_DIR_IN, 0, true))
 				ep0stall(udc);
 		}
 		if (ptc) {
@@ -1501,7 +1487,7 @@ __acquires(udc->lock)
 		 * See 2.0 Spec chapter 8.5.3.3 for detail.
 		 */
 		if (udc->ep0_state == DATA_STATE_XMIT)
-			if (ep0_prime_status(udc, EP_DIR_OUT))
+			if (ep0_prime_status(udc, EP_DIR_OUT, 0, true))
 				ep0stall(udc);
 
 	} else {
@@ -1537,7 +1523,7 @@ static void ep0_req_complete(struct fsl_udc *udc, struct fsl_ep *ep0,
 		break;
 	case DATA_STATE_RECV:
 		/* send status phase */
-		if (ep0_prime_status(udc, EP_DIR_IN))
+		if (ep0_prime_status(udc, EP_DIR_IN, 0, true))
 			ep0stall(udc);
 		break;
 	case WAIT_FOR_OUT_STATUS:
-- 
2.32.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox