LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 04/12] macintosh/via-pmu: Enhance state machine with new 'uninitialized' state
From: Finn Thain @ 2018-06-13 10:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael Schmitz, linuxppc-dev, linux-m68k, linux-kernel
In-Reply-To: <cover.1528885172.git.fthain@telegraphics.com.au>

On 68k Macs, the via/vias pointer can't be used to determine whether
the PMU driver has been initialized. For portability, add a new state
to indicate that via_find_pmu() succeeded.

After via_find_pmu() executes, testing vias == NULL is equivalent to
testing via == NULL. Replace these tests with pmu_state == uninitialized
which is simpler and more consistent. No functional change.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/macintosh/via-pmu.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 4c1bae5380c2..c4c324fb5fa6 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -114,6 +114,7 @@ static volatile unsigned char __iomem *via;
 #define CB1_INT		0x10		/* transition on CB1 input */
 
 static volatile enum pmu_state {
+	uninitialized = 0,
 	idle,
 	sending,
 	intack,
@@ -274,7 +275,7 @@ int __init find_via_pmu(void)
 	u64 taddr;
 	const u32 *reg;
 
-	if (via != 0)
+	if (pmu_state != uninitialized)
 		return 1;
 	vias = of_find_node_by_name(NULL, "via-pmu");
 	if (vias == NULL)
@@ -369,20 +370,19 @@ int __init find_via_pmu(void)
  fail:
 	of_node_put(vias);
 	vias = NULL;
+	pmu_state = uninitialized;
 	return 0;
 }
 
 #ifdef CONFIG_ADB
 static int pmu_probe(void)
 {
-	return vias == NULL? -ENODEV: 0;
+	return pmu_state == uninitialized ? -ENODEV : 0;
 }
 
 static int pmu_init(void)
 {
-	if (vias == NULL)
-		return -ENODEV;
-	return 0;
+	return pmu_state == uninitialized ? -ENODEV : 0;
 }
 #endif /* CONFIG_ADB */
 
@@ -397,7 +397,7 @@ static int __init via_pmu_start(void)
 {
 	unsigned int irq;
 
-	if (vias == NULL)
+	if (pmu_state == uninitialized)
 		return -ENODEV;
 
 	batt_req.complete = 1;
@@ -463,7 +463,7 @@ arch_initcall(via_pmu_start);
  */
 static int __init via_pmu_dev_init(void)
 {
-	if (vias == NULL)
+	if (pmu_state == uninitialized)
 		return -ENODEV;
 
 #ifdef CONFIG_PMAC_BACKLIGHT
@@ -966,7 +966,7 @@ static int pmu_send_request(struct adb_request *req, int sync)
 {
 	int i, ret;
 
-	if ((vias == NULL) || (!pmu_fully_inited)) {
+	if (pmu_state == uninitialized || !pmu_fully_inited) {
 		req->complete = 1;
 		return -ENXIO;
 	}
@@ -1060,7 +1060,7 @@ static int __pmu_adb_autopoll(int devs)
 
 static int pmu_adb_autopoll(int devs)
 {
-	if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb)
+	if (pmu_state == uninitialized || !pmu_fully_inited || !pmu_has_adb)
 		return -ENXIO;
 
 	adb_dev_map = devs;
@@ -1073,7 +1073,7 @@ static int pmu_adb_reset_bus(void)
 	struct adb_request req;
 	int save_autopoll = adb_dev_map;
 
-	if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb)
+	if (pmu_state == uninitialized || !pmu_fully_inited || !pmu_has_adb)
 		return -ENXIO;
 
 	/* anyone got a better idea?? */
@@ -1109,7 +1109,7 @@ pmu_request(struct adb_request *req, void (*done)(struct adb_request *),
 	va_list list;
 	int i;
 
-	if (vias == NULL)
+	if (pmu_state == uninitialized)
 		return -ENXIO;
 
 	if (nbytes < 0 || nbytes > 32) {
@@ -1134,7 +1134,7 @@ pmu_queue_request(struct adb_request *req)
 	unsigned long flags;
 	int nsend;
 
-	if (via == NULL) {
+	if (pmu_state == uninitialized) {
 		req->complete = 1;
 		return -ENXIO;
 	}
@@ -1247,7 +1247,7 @@ pmu_start(void)
 void
 pmu_poll(void)
 {
-	if (!via)
+	if (pmu_state == uninitialized)
 		return;
 	if (disable_poll)
 		return;
@@ -1257,7 +1257,7 @@ pmu_poll(void)
 void
 pmu_poll_adb(void)
 {
-	if (!via)
+	if (pmu_state == uninitialized)
 		return;
 	if (disable_poll)
 		return;
@@ -1272,7 +1272,7 @@ pmu_poll_adb(void)
 void
 pmu_wait_complete(struct adb_request *req)
 {
-	if (!via)
+	if (pmu_state == uninitialized)
 		return;
 	while((pmu_state != idle && pmu_state != locked) || !req->complete)
 		via_pmu_interrupt(0, NULL);
@@ -1288,7 +1288,7 @@ pmu_suspend(void)
 {
 	unsigned long flags;
 
-	if (!via)
+	if (pmu_state == uninitialized)
 		return;
 	
 	spin_lock_irqsave(&pmu_lock, flags);
@@ -1319,7 +1319,7 @@ pmu_resume(void)
 {
 	unsigned long flags;
 
-	if (!via || (pmu_suspended < 1))
+	if (pmu_state == uninitialized || pmu_suspended < 1)
 		return;
 
 	spin_lock_irqsave(&pmu_lock, flags);
@@ -1681,7 +1681,7 @@ pmu_enable_irled(int on)
 {
 	struct adb_request req;
 
-	if (vias == NULL)
+	if (pmu_state == uninitialized)
 		return ;
 	if (pmu_kind == PMU_KEYLARGO_BASED)
 		return ;
@@ -1696,7 +1696,7 @@ pmu_restart(void)
 {
 	struct adb_request req;
 
-	if (via == NULL)
+	if (pmu_state == uninitialized)
 		return;
 
 	local_irq_disable();
@@ -1721,7 +1721,7 @@ pmu_shutdown(void)
 {
 	struct adb_request req;
 
-	if (via == NULL)
+	if (pmu_state == uninitialized)
 		return;
 
 	local_irq_disable();
@@ -1749,7 +1749,7 @@ pmu_shutdown(void)
 int
 pmu_present(void)
 {
-	return via != 0;
+	return pmu_state != uninitialized;
 }
 
 #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
@@ -2415,7 +2415,7 @@ static struct miscdevice pmu_device = {
 
 static int pmu_device_init(void)
 {
-	if (!via)
+	if (pmu_state == uninitialized)
 		return 0;
 	if (misc_register(&pmu_device) < 0)
 		printk(KERN_ERR "via-pmu: cannot register misc device.\n");
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 01/12] macintosh/via-pmu: Fix section mismatch warning
From: Finn Thain @ 2018-06-13 10:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael Schmitz, linuxppc-dev, linux-m68k, linux-kernel
In-Reply-To: <cover.1528885172.git.fthain@telegraphics.com.au>

The pmu_init() function has the __init qualifier, but the ops struct
that holds a pointer to it does not. This causes a build warning.
The driver works fine because the pointer is only dereferenced early.

The function is so small that there's negligible benefit from using
the __init qualifier. Remove it to fix the warning, consistent with
the other ADB drivers.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/macintosh/via-pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 433dbeddfcf9..fd3c5640d586 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -378,7 +378,7 @@ static int pmu_probe(void)
 	return vias == NULL? -ENODEV: 0;
 }
 
-static int __init pmu_init(void)
+static int pmu_init(void)
 {
 	if (vias == NULL)
 		return -ENODEV;
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 03/12] macintosh/via-pmu: Don't clear shift register interrupt flag twice
From: Finn Thain @ 2018-06-13 10:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael Schmitz, linuxppc-dev, linux-m68k, linux-kernel
In-Reply-To: <cover.1528885172.git.fthain@telegraphics.com.au>

The shift register interrupt flag gets cleared in via_pmu_interrupt()
and once again in pmu_sr_intr(). Fix this theoretical race condition.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/macintosh/via-pmu.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 74065ea410bd..4c1bae5380c2 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -1458,7 +1458,6 @@ pmu_sr_intr(void)
 
 	if (in_8(&via[B]) & TREQ) {
 		printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B]));
-		out_8(&via[IFR], SR_INT);
 		return NULL;
 	}
 	/* The ack may not yet be low when we get the interrupt */
-- 
2.16.4

^ permalink raw reply related

* [PATCH v3 00/12] macintosh: Resolve various PMU driver problems
From: Finn Thain @ 2018-06-13 10:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael Schmitz, linuxppc-dev, linux-m68k, linux-kernel

This series of patches has the following aims.

1) Eliminate duplicated code. Linux presently has two drivers for
   the 68HC05-based PMU devices found in Macs: via-pmu and via-pmu68k.
   There's no value in having separate PMU drivers for each architecture.

2) Avoid further work on via-pmu68k that's not needed for via-pmu.

3) Fix some bugs in the via-pmu driver.

4) Enable the /dev/pmu and /proc/pmu/* userspace APIs on m68k Macs
   by adopting via-pmu.

5) Improve stability on early 100-series PowerBooks by loading no PMU
   driver at all. Neither via-pmu nor via-pmu68k supports the early
   M50753-based PMU device found in these models.

6) Eliminate duplicated RTC accessors for PMU and Cuda. Presently these
   can be found under both arch/m68k and arch/powerpc.

7) Assist the out-of-tree NuBus PowerMac port to support PMU designs
   shared with the m68k Mac port (e.g. PowerBooks 190 and 5300).

This patch series has been regression tested on various PowerBooks
(190, 520, 3400, Pismo G3) and PowerMacs (Beige G3, G5). These patches
did not affect userland utilities. (Note that there is a userland-
visible change to the contents of /proc/pmu/interrupts.)

Changed since v1:
1) Added blank lines after 'break' statements in patch 10.
2) Improved patch description for patch 3.
3) Added reviewed-by tags.
4) Split patch 8 to make code review easier.

Changed since v2:
1) Added reviewed-by tag.
2) Retained PMU_68K_V1 and PMU_68K_V2 symbols.

Finn Thain (12):
  macintosh/via-pmu: Fix section mismatch warning
  macintosh/via-pmu: Add missing mmio accessors
  macintosh/via-pmu: Don't clear shift register interrupt flag twice
  macintosh/via-pmu: Enhance state machine with new 'uninitialized'
    state
  macintosh/via-pmu: Replace via pointer with via1 and via2 pointers
  macintosh/via-pmu: Add support for m68k PowerBooks
  macintosh/via-pmu: Make CONFIG_PPC_PMAC Kconfig deps explicit
  macintosh/via-pmu68k: Don't load driver on unsupported hardware
  macintosh/via-pmu: Replace via-pmu68k driver with via-pmu driver
  macintosh: Use common code to access RTC
  macintosh/via-pmu: Clean up interrupt statistics
  macintosh/via-pmu: Disambiguate interrupt statistics

 arch/m68k/configs/mac_defconfig        |   2 +-
 arch/m68k/configs/multi_defconfig      |   2 +-
 arch/m68k/mac/config.c                 |   2 +-
 arch/m68k/mac/misc.c                   | 118 +----
 arch/powerpc/platforms/powermac/time.c |  74 +--
 drivers/macintosh/Kconfig              |  19 +-
 drivers/macintosh/Makefile             |   1 -
 drivers/macintosh/adb.c                |   2 +-
 drivers/macintosh/via-cuda.c           |  34 ++
 drivers/macintosh/via-pmu.c            | 378 ++++++++++-----
 drivers/macintosh/via-pmu68k.c         | 850 ---------------------------------
 include/linux/cuda.h                   |   3 +
 include/linux/pmu.h                    |   3 +
 include/uapi/linux/pmu.h               |   4 +-
 14 files changed, 313 insertions(+), 1179 deletions(-)
 delete mode 100644 drivers/macintosh/via-pmu68k.c

-- 
2.16.4

^ permalink raw reply

* [PATCH v3 02/12] macintosh/via-pmu: Add missing mmio accessors
From: Finn Thain @ 2018-06-13 10:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael Schmitz, linuxppc-dev, linux-m68k, linux-kernel
In-Reply-To: <cover.1528885172.git.fthain@telegraphics.com.au>

Add missing in_8() accessors to init_pmu() and pmu_sr_intr().

This fixes several sparse warnings:
drivers/macintosh/via-pmu.c:536:29: warning: dereference of noderef expression
drivers/macintosh/via-pmu.c:537:33: warning: dereference of noderef expression
drivers/macintosh/via-pmu.c:1455:17: warning: dereference of noderef expression
drivers/macintosh/via-pmu.c:1456:69: warning: dereference of noderef expression

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/macintosh/via-pmu.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index fd3c5640d586..74065ea410bd 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -532,8 +532,9 @@ init_pmu(void)
 	int timeout;
 	struct adb_request req;
 
-	out_8(&via[B], via[B] | TREQ);			/* negate TREQ */
-	out_8(&via[DIRB], (via[DIRB] | TREQ) & ~TACK);	/* TACK in, TREQ out */
+	/* Negate TREQ. Set TACK to input and TREQ to output. */
+	out_8(&via[B], in_8(&via[B]) | TREQ);
+	out_8(&via[DIRB], (in_8(&via[DIRB]) | TREQ) & ~TACK);
 
 	pmu_request(&req, NULL, 2, PMU_SET_INTR_MASK, pmu_intr_mask);
 	timeout =  100000;
@@ -1455,8 +1456,8 @@ pmu_sr_intr(void)
 	struct adb_request *req;
 	int bite = 0;
 
-	if (via[B] & TREQ) {
-		printk(KERN_ERR "PMU: spurious SR intr (%x)\n", via[B]);
+	if (in_8(&via[B]) & TREQ) {
+		printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B]));
 		out_8(&via[IFR], SR_INT);
 		return NULL;
 	}
-- 
2.16.4

^ permalink raw reply related

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Julien Thierry @ 2018-06-13 10:25 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Daniel Lezcano, Andrew Morton,
	Levin, Alexander (Sasha Levin), Randy Dunlap, Masami Hiramatsu,
	Marc Zyngier, Bartosz Golaszewski, Doug Berger, Palmer Dabbelt,
	iommu
In-Reply-To: <alpine.DEB.2.21.1806131149410.2280@nanos.tec.linutronix.de>



On 13/06/18 10:57, Thomas Gleixner wrote:
> On Wed, 13 Jun 2018, Julien Thierry wrote:
>> On 13/06/18 10:20, Thomas Gleixner wrote:
>>> Adding NMI delivery support at low level architecture irq chip level is
>>> perfectly fine, but the exposure of that needs to be restricted very
>>> much. Adding it to the generic interrupt control interfaces is not going to
>>> happen. That's doomed to begin with and a complete abuse of the interface
>>> as the handler can not ever be used for that.
>>>
>>
>> Understood, however the need would be to provide a way for a driver to request
>> an interrupt to be delivered as an NMI (if irqchip supports it).
> 
> s/driver/specialized code written by people who know what they are doing/
> 
>> But from your response this would be out of the question (in the
>> interrupt/irq/irqchip definitions).
> 
> Adding some magic to the irq chip is fine, because that's where the low
> level integration needs to be done, but exposing it through the generic
> interrupt subsystem is a NONO for obvious reasons.
> 
>> Or somehow the concerned irqchip informs the arch it supports NMI delivery and
>> it is up to the interested drivers to query the arch whether NMI delivery is
>> supported by the system?
> 
> Yes, we need some infrastructure for that, but that needs to be separate
> and with very limited exposure.
> 

Right, makes sense. I'll check with Marc how such an infrastructure 
should be introduced.

Thanks,

-- 
Julien Thierry

^ permalink raw reply

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Marc Zyngier @ 2018-06-13 10:06 UTC (permalink / raw)
  To: Thomas Gleixner, Julien Thierry
  Cc: Peter Zijlstra, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Daniel Lezcano, Andrew Morton,
	Levin, Alexander (Sasha Levin), Randy Dunlap, Masami Hiramatsu,
	Bartosz Golaszewski, Doug Berger, Palmer Dabbelt, iommu
In-Reply-To: <alpine.DEB.2.21.1806131104570.2280@nanos.tec.linutronix.de>

On 13/06/18 10:20, Thomas Gleixner wrote:
> On Wed, 13 Jun 2018, Julien Thierry wrote:
>> On 13/06/18 09:34, Peter Zijlstra wrote:
>>> On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
>>>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>>>> index 5426627..dbc5e02 100644
>>>> --- a/include/linux/interrupt.h
>>>> +++ b/include/linux/interrupt.h
>>>> @@ -61,6 +61,8 @@
>>>>    *                interrupt handler after suspending interrupts. For
>>>> system
>>>>    *                wakeup devices users need to implement wakeup
>>>> detection in
>>>>    *                their interrupt handlers.
>>>> + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as
>>>> non-maskable, if
>>>> + *                supported by the chip.
>>>>    */
>>>
>>> NAK on the first 6 patches. You really _REALLY_ don't want to expose
>>> NMIs to this level.
>>>
>>
>> I've been working on something similar on arm64 side, and effectively the one
>> thing that might be common to arm64 and intel is the interface to set an
>> interrupt as NMI. So I guess it would be nice to agree on the right approach
>> for this.
>>
>> The way I did it was by introducing a new irq_state and let the irqchip driver
>> handle most of the work (if it supports that state):
>>
>> https://lkml.org/lkml/2018/5/25/181
>>
>> This has not been ACKed nor NAKed. So I am just asking whether this is a more
>> suitable approach, and if not, is there any suggestions on how to do this?
> 
> I really didn't pay attention to that as it's burried in the GIC/ARM series
> which is usually Marc's playground.

I'm working my way through it ATM now that I have some brain cycles back.

> Adding NMI delivery support at low level architecture irq chip level is
> perfectly fine, but the exposure of that needs to be restricted very
> much. Adding it to the generic interrupt control interfaces is not going to
> happen. That's doomed to begin with and a complete abuse of the interface
> as the handler can not ever be used for that.

I can only agree with that. Allowing random driver to use request_irq()
to make anything an NMI ultimately turns it into a complete mess ("hey,
NMI is *faster*, let's use that"), and a potential source of horrible
deadlocks.

What I'd find more palatable is a way for an irqchip to be able to
prioritize some interrupts based on a set of architecturally-defined
requirements, and a separate NMI requesting/handling framework that is
separate from the IRQ API, as the overall requirements are likely to
completely different.

It shouldn't have to be nearly as complex as the IRQ API, and require
much stricter requirements in terms of what you can do there (flow
handling should definitely be different).

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply

* [PATCH] powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss problem with THP
From: Nicholas Piggin @ 2018-06-13  9:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Aneesh Kumar K . V

The patch 99baac21e4 ("mm: fix MADV_[FREE|DONTNEED] TLB flush miss
problem") added a force flush mode to the mmu_gather flush, which
unconditionally flushes the entire address range being invalidated
(even if actual ptes only covered a smaller range), to solve a problem
with concurrent threads invalidating the same PTEs causing them to
miss TLBs that need flushing.

This does not work with powerpc that invalidates mmu_gather batches
according to page size. Have powerpc flush all possible page sizes in
the range if it encounters the concurrency condition.

Hash does not have a problem because it invalidates TLBs inside the
page table locks.

Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Since RFC:
- Account for hugetlb pages that can be mixed with the tlb_flush
  range.

 arch/powerpc/mm/tlb-radix.c | 85 ++++++++++++++++++++++++++++---------
 1 file changed, 66 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 67a6e86d3e7e..9dbccda651d7 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-		     unsigned long end)
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+					unsigned long start, unsigned long end,
+					bool flush_all_sizes)
 
 {
-	struct mm_struct *mm = vma->vm_mm;
 	unsigned long pid;
 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
 	unsigned long page_size = 1UL << page_shift;
 	unsigned long nr_pages = (end - start) >> page_shift;
 	bool local, full;
 
-#ifdef CONFIG_HUGETLB_PAGE
-	if (is_vm_hugetlb_page(vma))
-		return radix__flush_hugetlb_tlb_range(vma, start, end);
-#endif
-
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		return;
@@ -738,18 +733,27 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 				_tlbie_pid(pid, RIC_FLUSH_TLB);
 		}
 	} else {
-		bool hflush = false;
+		bool hflush = flush_all_sizes;
+		bool gflush = flush_all_sizes;
 		unsigned long hstart, hend;
+		unsigned long gstart, gend;
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
-		hend = end >> HPAGE_PMD_SHIFT;
-		if (hstart < hend) {
-			hstart <<= HPAGE_PMD_SHIFT;
-			hend <<= HPAGE_PMD_SHIFT;
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 			hflush = true;
+
+		if (hflush) {
+			hstart = (start + HPAGE_PMD_SIZE - 1) & HPAGE_PMD_MASK;
+			hend = end & HPAGE_PMD_MASK;
+			if (hstart == hend)
+				hflush = false;
+		}
+
+		if (gflush) {
+			gstart = (start + HPAGE_PUD_SIZE - 1) & HPAGE_PUD_MASK;
+			gend = end & HPAGE_PUD_MASK;
+			if (gstart == gend)
+				gflush = false;
 		}
-#endif
 
 		asm volatile("ptesync": : :"memory");
 		if (local) {
@@ -757,18 +761,36 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			if (hflush)
 				__tlbiel_va_range(hstart, hend, pid,
 						HPAGE_PMD_SIZE, MMU_PAGE_2M);
+			if (gflush)
+				__tlbiel_va_range(gstart, gend, pid,
+						HPAGE_PUD_SIZE, MMU_PAGE_1G);
 			asm volatile("ptesync": : :"memory");
 		} else {
 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
 			if (hflush)
 				__tlbie_va_range(hstart, hend, pid,
 						HPAGE_PMD_SIZE, MMU_PAGE_2M);
+			if (gflush)
+				__tlbie_va_range(gstart, gend, pid,
+						HPAGE_PUD_SIZE, MMU_PAGE_1G);
 			fixup_tlbie();
 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
 		}
 	}
 	preempt_enable();
 }
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	if (is_vm_hugetlb_page(vma))
+		return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+	__radix__flush_tlb_range(vma->vm_mm, start, end, false);
+}
 EXPORT_SYMBOL(radix__flush_tlb_range);
 
 static int radix_get_mmu_psize(int page_size)
@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	int psize = 0;
 	struct mm_struct *mm = tlb->mm;
 	int page_size = tlb->page_size;
+	unsigned long start = tlb->start;
+	unsigned long end = tlb->end;
 
 	/*
 	 * if page size is not something we understand, do a full mm flush
@@ -847,15 +871,38 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 */
 	if (tlb->fullmm) {
 		__flush_all_mm(mm, true);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+	} else if (mm_tlb_flush_pending(mm)) {
+		/*
+		 * If there is a concurrent invalidation that is clearing ptes,
+		 * then it's possible this invalidation will miss one of those
+		 * cleared ptes and miss flushing the TLB. If this invalidate
+		 * returns before the other one flushes TLBs, that can result
+		 * in it returning while there are still valid TLBs inside the
+		 * range to be invalidated.
+		 *
+		 * See mm/memory.c:tlb_finish_mmu() for more details.
+		 *
+		 * The solution to this is ensure the entire range is always
+		 * flushed here. The problem for powerpc is that the flushes
+		 * are page size specific, so this "forced flush" would not
+		 * do the right thing if there are a mix of page sizes in
+		 * the range to be invalidated. So use __flush_tlb_range
+		 * which invalidates all possible page sizes in the range.
+		 *
+		 * PWC flush probably is not be required because the core code
+		 * shouldn't free page tables in this path, but accounting
+		 * for the possibility makes us a bit more robust.
+		 */
+		WARN_ON_ONCE(tlb->need_flush_all);
+		__radix__flush_tlb_range(mm, start, end, true);
+#endif
 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
 		if (!tlb->need_flush_all)
 			radix__flush_tlb_mm(mm);
 		else
 			radix__flush_all_mm(mm);
 	} else {
-		unsigned long start = tlb->start;
-		unsigned long end = tlb->end;
-
 		if (!tlb->need_flush_all)
 			radix__flush_tlb_range_psize(mm, start, end, psize);
 		else
-- 
2.17.0

^ permalink raw reply related

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Thomas Gleixner @ 2018-06-13  9:57 UTC (permalink / raw)
  To: Julien Thierry
  Cc: Peter Zijlstra, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Daniel Lezcano, Andrew Morton,
	Levin, Alexander (Sasha Levin), Randy Dunlap, Masami Hiramatsu,
	Marc Zyngier, Bartosz Golaszewski, Doug Berger, Palmer Dabbelt,
	iommu
In-Reply-To: <344b838e-81e3-97d8-f90d-315fed7879c1@arm.com>

On Wed, 13 Jun 2018, Julien Thierry wrote:
> On 13/06/18 10:20, Thomas Gleixner wrote:
> > Adding NMI delivery support at low level architecture irq chip level is
> > perfectly fine, but the exposure of that needs to be restricted very
> > much. Adding it to the generic interrupt control interfaces is not going to
> > happen. That's doomed to begin with and a complete abuse of the interface
> > as the handler can not ever be used for that.
> > 
> 
> Understood, however the need would be to provide a way for a driver to request
> an interrupt to be delivered as an NMI (if irqchip supports it).

s/driver/specialized code written by people who know what they are doing/

> But from your response this would be out of the question (in the
> interrupt/irq/irqchip definitions).

Adding some magic to the irq chip is fine, because that's where the low
level integration needs to be done, but exposing it through the generic
interrupt subsystem is a NONO for obvious reasons.

> Or somehow the concerned irqchip informs the arch it supports NMI delivery and
> it is up to the interested drivers to query the arch whether NMI delivery is
> supported by the system?

Yes, we need some infrastructure for that, but that needs to be separate
and with very limited exposure.

Thanks,

	tglx

^ permalink raw reply

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Julien Thierry @ 2018-06-13  9:49 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Daniel Lezcano, Andrew Morton,
	Levin, Alexander (Sasha Levin), Randy Dunlap, Masami Hiramatsu,
	Marc Zyngier, Bartosz Golaszewski, Doug Berger, Palmer Dabbelt,
	iommu
In-Reply-To: <344b838e-81e3-97d8-f90d-315fed7879c1@arm.com>



On 13/06/18 10:36, Julien Thierry wrote:
> 
> 
> On 13/06/18 10:20, Thomas Gleixner wrote:
>> On Wed, 13 Jun 2018, Julien Thierry wrote:
>>> On 13/06/18 09:34, Peter Zijlstra wrote:
>>>> On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
>>>>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>>>>> index 5426627..dbc5e02 100644
>>>>> --- a/include/linux/interrupt.h
>>>>> +++ b/include/linux/interrupt.h
>>>>> @@ -61,6 +61,8 @@
>>>>>     *                interrupt handler after suspending interrupts. 
>>>>> For
>>>>> system
>>>>>     *                wakeup devices users need to implement wakeup
>>>>> detection in
>>>>>     *                their interrupt handlers.
>>>>> + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as
>>>>> non-maskable, if
>>>>> + *                supported by the chip.
>>>>>     */
>>>>
>>>> NAK on the first 6 patches. You really _REALLY_ don't want to expose
>>>> NMIs to this level.
>>>>
>>>
>>> I've been working on something similar on arm64 side, and effectively 
>>> the one
>>> thing that might be common to arm64 and intel is the interface to set an
>>> interrupt as NMI. So I guess it would be nice to agree on the right 
>>> approach
>>> for this.
>>>
>>> The way I did it was by introducing a new irq_state and let the 
>>> irqchip driver
>>> handle most of the work (if it supports that state):
>>>
>>> https://lkml.org/lkml/2018/5/25/181
>>>
>>> This has not been ACKed nor NAKed. So I am just asking whether this 
>>> is a more
>>> suitable approach, and if not, is there any suggestions on how to do 
>>> this?
>>
>> I really didn't pay attention to that as it's burried in the GIC/ARM 
>> series
>> which is usually Marc's playground.
>>
>> Adding NMI delivery support at low level architecture irq chip level is
>> perfectly fine, but the exposure of that needs to be restricted very
>> much. Adding it to the generic interrupt control interfaces is not 
>> going to
>> happen. That's doomed to begin with and a complete abuse of the interface
>> as the handler can not ever be used for that.
>>
> 
> Understood, however the need would be to provide a way for a driver to 
> request an interrupt to be delivered as an NMI (if irqchip supports it).
> 
> But from your response this would be out of the question (in the 
> interrupt/irq/irqchip definitions).
> 
> Or somehow the concerned irqchip informs the arch it supports NMI 
> delivery and it is up to the interested drivers to query the arch 
> whether NMI delivery is supported by the system?

Actually scratch that last part, it is also missing a way for the driver 
to actually communicate to the irqchip that its interrupt should be 
treated as an NMI, so it wouldn't work...

-- 
Julien Thierry

^ permalink raw reply

* Re: [RFC PATCH 20/23] watchdog/hardlockup/hpet: Rotate interrupt among all monitored CPUs
From: Thomas Gleixner @ 2018-06-13  9:48 UTC (permalink / raw)
  To: Ricardo Neri
  Cc: Ingo Molnar, H. Peter Anvin, Andi Kleen, Ashok Raj,
	Borislav Petkov, Tony Luck, Ravi V. Shankar, x86, sparclinux,
	linuxppc-dev, linux-kernel, Jacob Pan, Rafael J. Wysocki,
	Don Zickus, Nicholas Piggin, Michael Ellerman,
	Frederic Weisbecker, Alexei Starovoitov, Babu Moger,
	Mathieu Desnoyers, Masami Hiramatsu, Peter Zijlstra,
	Andrew Morton, Philippe Ombredanne, Colin Ian King,
	Byungchul Park, Paul E. McKenney, Luis R. Rodriguez, Waiman Long,
	Josh Poimboeuf, Randy Dunlap, Davidlohr Bueso, Christoffer Dall,
	Marc Zyngier, Kai-Heng Feng, Konrad Rzeszutek Wilk,
	David Rientjes, iommu
In-Reply-To: <1528851463-21140-21-git-send-email-ricardo.neri-calderon@linux.intel.com>

On Tue, 12 Jun 2018, Ricardo Neri wrote:
> +	/* There are no CPUs to monitor. */
> +	if (!cpumask_weight(&hdata->monitored_mask))
> +		return NMI_HANDLED;
> +
>  	inspect_for_hardlockups(regs);
>  
> +	/*
> +	 * Target a new CPU. Keep trying until we find a monitored CPU. CPUs
> +	 * are addded and removed to this mask at cpu_up() and cpu_down(),
> +	 * respectively. Thus, the interrupt should be able to be moved to
> +	 * the next monitored CPU.
> +	 */
> +	spin_lock(&hld_data->lock);

Yuck. Taking a spinlock from NMI ... 

> +	for_each_cpu_wrap(cpu, &hdata->monitored_mask, smp_processor_id() + 1) {
> +		if (!irq_set_affinity(hld_data->irq, cpumask_of(cpu)))
> +			break;

... and then calling into generic interrupt code which will take even more
locks is completely broken.

Guess what happens when the NMI hits a section where one of those locks is
held? Then you need another watchdog to decode the lockup you just ran into.

Thanks,

	tglx

^ permalink raw reply

* Re: [RFC PATCH 17/23] watchdog/hardlockup/hpet: Convert the timer's interrupt to NMI
From: Thomas Gleixner @ 2018-06-13  9:40 UTC (permalink / raw)
  To: Ricardo Neri
  Cc: Ingo Molnar, H. Peter Anvin, Andi Kleen, Ashok Raj,
	Borislav Petkov, Tony Luck, Ravi V. Shankar, x86, sparclinux,
	linuxppc-dev, linux-kernel, Jacob Pan, Rafael J. Wysocki,
	Don Zickus, Nicholas Piggin, Michael Ellerman,
	Frederic Weisbecker, Alexei Starovoitov, Babu Moger,
	Mathieu Desnoyers, Masami Hiramatsu, Peter Zijlstra,
	Andrew Morton, Philippe Ombredanne, Colin Ian King,
	Byungchul Park, Paul E. McKenney, Luis R. Rodriguez, Waiman Long,
	Josh Poimboeuf, Randy Dunlap, Davidlohr Bueso, Christoffer Dall,
	Marc Zyngier, Kai-Heng Feng, Konrad Rzeszutek Wilk,
	David Rientjes, iommu
In-Reply-To: <1528851463-21140-18-git-send-email-ricardo.neri-calderon@linux.intel.com>

On Tue, 12 Jun 2018, Ricardo Neri wrote:
> @@ -183,6 +184,8 @@ static irqreturn_t hardlockup_detector_irq_handler(int irq, void *data)
>  	if (!(hdata->flags & HPET_DEV_PERI_CAP))
>  		kick_timer(hdata);
>  
> +	pr_err("This interrupt should not have happened. Ensure delivery mode is NMI.\n");

Eeew.

>  /**
> + * hardlockup_detector_nmi_handler() - NMI Interrupt handler
> + * @val:	Attribute associated with the NMI. Not used.
> + * @regs:	Register values as seen when the NMI was asserted
> + *
> + * When an NMI is issued, look for hardlockups. If the timer is not periodic,
> + * kick it. The interrupt is always handled when if delivered via the
> + * Front-Side Bus.
> + *
> + * Returns:
> + *
> + * NMI_DONE if the HPET timer did not cause the interrupt. NMI_HANDLED
> + * otherwise.
> + */
> +static int hardlockup_detector_nmi_handler(unsigned int val,
> +					   struct pt_regs *regs)
> +{
> +	struct hpet_hld_data *hdata = hld_data;
> +	unsigned int use_fsb;
> +
> +	/*
> +	 * If FSB delivery mode is used, the timer interrupt is programmed as
> +	 * edge-triggered and there is no need to check the ISR register.
> +	 */
> +	use_fsb = hdata->flags & HPET_DEV_FSB_CAP;
> +
> +	if (!use_fsb && !is_hpet_wdt_interrupt(hdata))
> +		return NMI_DONE;

So for 'use_fsb == True' every single NMI will fall through into the
watchdog code below.

> +	inspect_for_hardlockups(regs);
> +
> +	if (!(hdata->flags & HPET_DEV_PERI_CAP))
> +		kick_timer(hdata);

And in case that the HPET does not support periodic mode this reprogramms
the timer on every NMI which means that while perf is running the watchdog
will never ever detect anything.

Aside of that, reading TWO HPET registers for every NMI is insane. HPET
access is horribly slow, so any high frequency perf monitoring will take a
massive performance hit.

Thanks,

	tglx

^ permalink raw reply

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Julien Thierry @ 2018-06-13  9:36 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Daniel Lezcano, Andrew Morton,
	Levin, Alexander (Sasha Levin), Randy Dunlap, Masami Hiramatsu,
	Marc Zyngier, Bartosz Golaszewski, Doug Berger, Palmer Dabbelt,
	iommu
In-Reply-To: <alpine.DEB.2.21.1806131104570.2280@nanos.tec.linutronix.de>



On 13/06/18 10:20, Thomas Gleixner wrote:
> On Wed, 13 Jun 2018, Julien Thierry wrote:
>> On 13/06/18 09:34, Peter Zijlstra wrote:
>>> On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
>>>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>>>> index 5426627..dbc5e02 100644
>>>> --- a/include/linux/interrupt.h
>>>> +++ b/include/linux/interrupt.h
>>>> @@ -61,6 +61,8 @@
>>>>     *                interrupt handler after suspending interrupts. For
>>>> system
>>>>     *                wakeup devices users need to implement wakeup
>>>> detection in
>>>>     *                their interrupt handlers.
>>>> + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as
>>>> non-maskable, if
>>>> + *                supported by the chip.
>>>>     */
>>>
>>> NAK on the first 6 patches. You really _REALLY_ don't want to expose
>>> NMIs to this level.
>>>
>>
>> I've been working on something similar on arm64 side, and effectively the one
>> thing that might be common to arm64 and intel is the interface to set an
>> interrupt as NMI. So I guess it would be nice to agree on the right approach
>> for this.
>>
>> The way I did it was by introducing a new irq_state and let the irqchip driver
>> handle most of the work (if it supports that state):
>>
>> https://lkml.org/lkml/2018/5/25/181
>>
>> This has not been ACKed nor NAKed. So I am just asking whether this is a more
>> suitable approach, and if not, is there any suggestions on how to do this?
> 
> I really didn't pay attention to that as it's burried in the GIC/ARM series
> which is usually Marc's playground.
> 
> Adding NMI delivery support at low level architecture irq chip level is
> perfectly fine, but the exposure of that needs to be restricted very
> much. Adding it to the generic interrupt control interfaces is not going to
> happen. That's doomed to begin with and a complete abuse of the interface
> as the handler can not ever be used for that.
> 

Understood, however the need would be to provide a way for a driver to 
request an interrupt to be delivered as an NMI (if irqchip supports it).

But from your response this would be out of the question (in the 
interrupt/irq/irqchip definitions).

Or somehow the concerned irqchip informs the arch it supports NMI 
delivery and it is up to the interested drivers to query the arch 
whether NMI delivery is supported by the system?

Thanks,

-- 
Julien Thierry

^ permalink raw reply

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Thomas Gleixner @ 2018-06-13  9:20 UTC (permalink / raw)
  To: Julien Thierry
  Cc: Peter Zijlstra, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Daniel Lezcano, Andrew Morton,
	Levin, Alexander (Sasha Levin), Randy Dunlap, Masami Hiramatsu,
	Marc Zyngier, Bartosz Golaszewski, Doug Berger, Palmer Dabbelt,
	iommu
In-Reply-To: <26687332-ab8f-7f6d-909a-f0918dbfea86@arm.com>

On Wed, 13 Jun 2018, Julien Thierry wrote:
> On 13/06/18 09:34, Peter Zijlstra wrote:
> > On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
> > > diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
> > > index 5426627..dbc5e02 100644
> > > --- a/include/linux/interrupt.h
> > > +++ b/include/linux/interrupt.h
> > > @@ -61,6 +61,8 @@
> > >    *                interrupt handler after suspending interrupts. For
> > > system
> > >    *                wakeup devices users need to implement wakeup
> > > detection in
> > >    *                their interrupt handlers.
> > > + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as
> > > non-maskable, if
> > > + *                supported by the chip.
> > >    */
> > 
> > NAK on the first 6 patches. You really _REALLY_ don't want to expose
> > NMIs to this level.
> > 
> 
> I've been working on something similar on arm64 side, and effectively the one
> thing that might be common to arm64 and intel is the interface to set an
> interrupt as NMI. So I guess it would be nice to agree on the right approach
> for this.
> 
> The way I did it was by introducing a new irq_state and let the irqchip driver
> handle most of the work (if it supports that state):
> 
> https://lkml.org/lkml/2018/5/25/181
>
> This has not been ACKed nor NAKed. So I am just asking whether this is a more
> suitable approach, and if not, is there any suggestions on how to do this?

I really didn't pay attention to that as it's burried in the GIC/ARM series
which is usually Marc's playground.

Adding NMI delivery support at low level architecture irq chip level is
perfectly fine, but the exposure of that needs to be restricted very
much. Adding it to the generic interrupt control interfaces is not going to
happen. That's doomed to begin with and a complete abuse of the interface
as the handler can not ever be used for that.

Thanks,

	tglx

^ permalink raw reply

* Re: [RFC PATCH 12/23] kernel/watchdog: Introduce a struct for NMI watchdog operations
From: Thomas Gleixner @ 2018-06-13  9:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nicholas Piggin, Ricardo Neri, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Don Zickus, Michael Ellerman, Frederic Weisbecker,
	Babu Moger, David S. Miller, Benjamin Herrenschmidt,
	Paul Mackerras, Mathieu Desnoyers, Masami Hiramatsu,
	Andrew Morton, Philippe Ombredanne, Colin Ian King,
	Luis R. Rodriguez, iommu
In-Reply-To: <20180613084219.GT12258@hirez.programming.kicks-ass.net>

On Wed, 13 Jun 2018, Peter Zijlstra wrote:
> On Wed, Jun 13, 2018 at 05:41:41PM +1000, Nicholas Piggin wrote:
> > On Tue, 12 Jun 2018 17:57:32 -0700
> > Ricardo Neri <ricardo.neri-calderon@linux.intel.com> wrote:
> > 
> > > Instead of exposing individual functions for the operations of the NMI
> > > watchdog, define a common interface that can be used across multiple
> > > implementations.
> > > 
> > > The struct nmi_watchdog_ops is defined for such operations. These initial
> > > definitions include the enable, disable, start, stop, and cleanup
> > > operations.
> > > 
> > > Only a single NMI watchdog can be used in the system. The operations of
> > > this NMI watchdog are accessed via the new variable nmi_wd_ops. This
> > > variable is set to point the operations of the first NMI watchdog that
> > > initializes successfully. Even though at this moment, the only available
> > > NMI watchdog is the perf-based hardlockup detector. More implementations
> > > can be added in the future.
> > 
> > Cool, this looks pretty nice at a quick glance. sparc and powerpc at
> > least have their own NMI watchdogs, it would be good to have those
> > converted as well.
> 
> Yeah, agreed, this looks like half a patch.

Though I'm not seeing the advantage of it. That kind of NMI watchdogs are
low level architecture details so having yet another 'ops' data structure
with a gazillion of callbacks, checks and indirections does not provide
value over the currently available weak stubs.

> > Is hpet a cross platform thing, or just x86? We should avoid
> > proliferation of files under kernel/ I think, so with these watchdog
> > driver structs then maybe implementations could go in drivers/ or
> > arch/
> 
> HPET is mostly an x86 thing (altough it can be found elsewhere), but the

On ia64 and I doubt that anyone wants to take on the task of underwater
welding it to Itanic.

> whole thing relies on the x86 NMI mechanism and is thus firmly arch/
> material (like the sparc and ppc thing).

Right. Trying to make this 'generic' is not really solving anything.

Thanks,

	tglx

^ permalink raw reply

* Re: [RFC PATCH 17/23] watchdog/hardlockup/hpet: Convert the timer's interrupt to NMI
From: Peter Zijlstra @ 2018-06-13  9:07 UTC (permalink / raw)
  To: Ricardo Neri
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen,
	Ashok Raj, Borislav Petkov, Tony Luck, Ravi V. Shankar, x86,
	sparclinux, linuxppc-dev, linux-kernel, Jacob Pan,
	Rafael J. Wysocki, Don Zickus, Nicholas Piggin, Michael Ellerman,
	Frederic Weisbecker, Alexei Starovoitov, Babu Moger,
	Mathieu Desnoyers, Masami Hiramatsu, Andrew Morton,
	Philippe Ombredanne, Colin Ian King, Byungchul Park,
	Paul E. McKenney, Luis R. Rodriguez, Waiman Long, Josh Poimboeuf,
	Randy Dunlap, Davidlohr Bueso, Christoffer Dall, Marc Zyngier,
	Kai-Heng Feng, Konrad Rzeszutek Wilk, David Rientjes, iommu
In-Reply-To: <1528851463-21140-18-git-send-email-ricardo.neri-calderon@linux.intel.com>

On Tue, Jun 12, 2018 at 05:57:37PM -0700, Ricardo Neri wrote:

+static bool is_hpet_wdt_interrupt(struct hpet_hld_data *hdata)
+{
+       unsigned long this_isr;
+       unsigned int lvl_trig;
+
+       this_isr = hpet_readl(HPET_STATUS) & BIT(hdata->num);
+
+       lvl_trig = hpet_readl(HPET_Tn_CFG(hdata->num)) & HPET_TN_LEVEL;
+
+       if (lvl_trig && this_isr)
+               return true;
+
+       return false;
+}

> +static int hardlockup_detector_nmi_handler(unsigned int val,
> +					   struct pt_regs *regs)
> +{
> +	struct hpet_hld_data *hdata = hld_data;
> +	unsigned int use_fsb;
> +
> +	/*
> +	 * If FSB delivery mode is used, the timer interrupt is programmed as
> +	 * edge-triggered and there is no need to check the ISR register.
> +	 */
> +	use_fsb = hdata->flags & HPET_DEV_FSB_CAP;

Please do explain.. That FSB thing basically means MSI. But there's only
a single NMI vector. How do we know this NMI came from the HPET?

> +
> +	if (!use_fsb && !is_hpet_wdt_interrupt(hdata))

So you add _2_ HPET reads for every single NMI that gets triggered...
and IIRC HPET reads are _sllooooowwwwww_.

> +		return NMI_DONE;
> +
> +	inspect_for_hardlockups(regs);
> +
> +	if (!(hdata->flags & HPET_DEV_PERI_CAP))
> +		kick_timer(hdata);
> +
> +	/* Acknowledge interrupt if in level-triggered mode */
> +	if (!use_fsb)
> +		hpet_writel(BIT(hdata->num), HPET_STATUS);
> +
> +	return NMI_HANDLED;

So if I read this right, when in FSB/MSI mode, we'll basically _always_
claim every single NMI as handled?

That's broken.

> +}

^ permalink raw reply

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Julien Thierry @ 2018-06-13  8:59 UTC (permalink / raw)
  To: Peter Zijlstra, Ricardo Neri
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen,
	Ashok Raj, Borislav Petkov, Tony Luck, Ravi V. Shankar, x86,
	sparclinux, linuxppc-dev, linux-kernel, Jacob Pan, Daniel Lezcano,
	Andrew Morton, Levin, Alexander (Sasha Levin), Randy Dunlap,
	Masami Hiramatsu, Marc Zyngier, Bartosz Golaszewski, Doug Berger,
	Palmer Dabbelt, iommu
In-Reply-To: <20180613083419.GS12258@hirez.programming.kicks-ass.net>

Hi Peter, Ricardo,

On 13/06/18 09:34, Peter Zijlstra wrote:
> On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>> index 5426627..dbc5e02 100644
>> --- a/include/linux/interrupt.h
>> +++ b/include/linux/interrupt.h
>> @@ -61,6 +61,8 @@
>>    *                interrupt handler after suspending interrupts. For system
>>    *                wakeup devices users need to implement wakeup detection in
>>    *                their interrupt handlers.
>> + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as non-maskable, if
>> + *                supported by the chip.
>>    */
> 
> NAK on the first 6 patches. You really _REALLY_ don't want to expose
> NMIs to this level.
> 

I've been working on something similar on arm64 side, and effectively 
the one thing that might be common to arm64 and intel is the interface 
to set an interrupt as NMI. So I guess it would be nice to agree on the 
right approach for this.

The way I did it was by introducing a new irq_state and let the irqchip 
driver handle most of the work (if it supports that state):

https://lkml.org/lkml/2018/5/25/181

This has not been ACKed nor NAKed. So I am just asking whether this is a 
more suitable approach, and if not, is there any suggestions on how to 
do this?

Thanks,

-- 
Julien Thierry

^ permalink raw reply

* Re: [RFC PATCH 14/23] watchdog/hardlockup: Decouple the hardlockup detector from perf
From: Peter Zijlstra @ 2018-06-13  8:43 UTC (permalink / raw)
  To: Ricardo Neri
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen,
	Ashok Raj, Borislav Petkov, Tony Luck, Ravi V. Shankar, x86,
	sparclinux, linuxppc-dev, linux-kernel, Jacob Pan, Don Zickus,
	Nicholas Piggin, Michael Ellerman, Frederic Weisbecker,
	Babu Moger, David S. Miller, Benjamin Herrenschmidt,
	Paul Mackerras, Mathieu Desnoyers, Masami Hiramatsu,
	Andrew Morton, Philippe Ombredanne, Colin Ian King,
	Luis R. Rodriguez, iommu
In-Reply-To: <1528851463-21140-15-git-send-email-ricardo.neri-calderon@linux.intel.com>

On Tue, Jun 12, 2018 at 05:57:34PM -0700, Ricardo Neri wrote:
> The current default implementation of the hardlockup detector assumes that
> it is implemented using perf events.

The sparc and powerpc things are very much not using perf.

^ permalink raw reply

* Re: UBSAN: Undefined behaviour in ../include/linux/percpu_counter.h:137:13
From: Mathieu Malaterre @ 2018-06-13  8:43 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <87in6ntqar.fsf@concordia.ellerman.id.au>

On Wed, Jun 13, 2018 at 3:43 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Mathieu Malaterre <malat@debian.org> writes:
>
> > Hi there,
> >
> > I have a reproducible UBSAN appearing in dmesg after a while on my G4
> > (*). Could anyone suggest a way to diagnose the actual root issue here
> > (or is it just a false positive) ?
>
> It looks like a real overflow, I guess the question is why are we seeing it.
>
> The first thing to work out would be what exactly is overflowing.
>
> Is it in here?
>
>         cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
>                                      rq->io_start_time_ns, rq->cmd_flags);
>
>
> If so that would suggest something is taking multiple hours to complete,
> which seems unlikely. Is time going backward?

There is also something suspicious in the kern.log file:

Jun 12 20:09:04 debian kernel: [    5.504182]
================================================================================
Jun 12 20:09:04 debian kernel: [    5.508945] UBSAN: Undefined
behaviour in ../drivers/rtc/rtc-lib.c:87:22
Jun 12 20:09:04 debian kernel: [    5.513658] signed integer overflow:
Jun 12 20:09:04 debian kernel: [    5.518211] 1193024 * 3600 cannot be
represented in type 'int'
Jun 12 20:09:04 debian kernel: [    5.522866] CPU: 0 PID: 1 Comm:
swapper Not tainted 4.17.0+ #1
Jun 12 20:09:04 debian kernel: [    5.527567] Call Trace:
Jun 12 20:09:04 debian kernel: [    5.532200] [df4e7b00] [c0481074]
ubsan_epilogue+0x18/0x4c (unreliable)
Jun 12 20:09:04 debian kernel: [    5.537019] [df4e7b10] [c0481a14]
handle_overflow+0xbc/0xdc
Jun 12 20:09:04 debian kernel: [    5.541832] [df4e7b90] [c060d698]
rtc_time64_to_tm+0x344/0x388
Jun 12 20:09:04 debian kernel: [    5.546655] [df4e7bd0] [c001076c]
rtc_generic_get_time+0x2c/0x40
Jun 12 20:09:04 debian kernel: [    5.551477] [df4e7be0] [c06113d4]
__rtc_read_time+0x70/0x13c
Jun 12 20:09:04 debian kernel: [    5.556288] [df4e7c00] [c061150c]
rtc_read_time+0x6c/0x130
Jun 12 20:09:04 debian kernel: [    5.561088] [df4e7c30] [c061271c]
__rtc_read_alarm+0x34/0x684
Jun 12 20:09:04 debian kernel: [    5.565884] [df4e7ce0] [c060f234]
rtc_device_register+0x88/0x218
Jun 12 20:09:04 debian kernel: [    5.570695] [df4e7d40] [c060f428]
devm_rtc_device_register+0x64/0xc4
Jun 12 20:09:04 debian kernel: [    5.575528] [df4e7d60] [c09d15d4]
generic_rtc_probe+0x50/0x78
Jun 12 20:09:04 debian kernel: [    5.580359] [df4e7d70] [c055e4a4]
platform_drv_probe+0xa8/0x128
Jun 12 20:09:04 debian kernel: [    5.585210] [df4e7d90] [c0559d28]
driver_probe_device+0x354/0x6fc
Jun 12 20:09:04 debian kernel: [    5.590064] [df4e7dd0] [c055a270]
__driver_attach+0x1a0/0x22c
Jun 12 20:09:04 debian kernel: [    5.594917] [df4e7df0] [c0555b70]
bus_for_each_dev+0x84/0xdc
Jun 12 20:09:04 debian kernel: [    5.599750] [df4e7e20] [c0558420]
bus_add_driver+0x188/0x348
Jun 12 20:09:04 debian kernel: [    5.604584] [df4e7e40] [c055b7b4]
driver_register+0xa0/0x18c
Jun 12 20:09:04 debian kernel: [    5.609433] [df4e7e50] [c055e950]
__platform_driver_probe+0x8c/0x198
Jun 12 20:09:04 debian kernel: [    5.614330] [df4e7e70] [c0005800]
do_one_initcall+0x64/0x280
Jun 12 20:09:04 debian kernel: [    5.619237] [df4e7ee0] [c0997c04]
kernel_init_freeable+0x3a4/0x444
Jun 12 20:09:04 debian kernel: [    5.624145] [df4e7f30] [c00049f8]
kernel_init+0x24/0x118
Jun 12 20:09:04 debian kernel: [    5.629029] [df4e7f40] [c001b1c4]
ret_from_kernel_thread+0x14/0x1c
Jun 12 20:09:04 debian kernel: [    5.633878]
================================================================================


Grep-ing all leads to:

$ grep  "cannot be represented" kern.log | colrm 1 45|sort -u
 1193022 * 3600 cannot be represented in type 'int'
 1193024 * 3600 cannot be represented in type 'int'
 1193032 * 3600 cannot be represented in type 'int'
 1193033 * 3600 cannot be represented in type 'int'
 1193034 * 3600 cannot be represented in type 'int'
 1193035 * 3600 cannot be represented in type 'int'

How come tm_hour can store a value of 1193035 ?

> cheers
>
> > (*)
> > [41877.514338] ================================================================================
> > [41877.514364] UBSAN: Undefined behaviour in
> > ../include/linux/percpu_counter.h:137:13
> > [41877.514373] signed integer overflow:
> > [41877.514378] 9223352809007201260 + 41997676517838 cannot be
> > represented in type 'long long int'
> > [41877.514389] CPU: 0 PID: 0 Comm: swapper Not tainted 4.17.0+ #54
> > [41877.514394] Call Trace:
> > [41877.514411] [dffedd30] [c047a5f8] ubsan_epilogue+0x18/0x4c (unreliable)
> > [41877.514422] [dffedd40] [c047af98] handle_overflow+0xbc/0xdc
> > [41877.514437] [dffeddc0] [c043aaa8] cfq_completed_request+0x560/0x1234
> > [41877.514446] [dffede40] [c03f595c] __blk_put_request+0xb0/0x2dc
> > [41877.514460] [dffede80] [c05aa41c] scsi_end_request+0x19c/0x344
> > [41877.514469] [dffedeb0] [c05abba0] scsi_io_completion+0x4b4/0x854
> > [41877.514482] [dffedf10] [c040604c] blk_done_softirq+0xe4/0x1e0
> > [41877.514496] [dffedf60] [c07eef84] __do_softirq+0x16c/0x5f0
> > [41877.514508] [dffedfd0] [c0065160] irq_exit+0x110/0x1a8
> > [41877.514520] [dffedff0] [c001646c] call_do_irq+0x24/0x3c
> > [41877.514533] [c0ce5e80] [c0009a2c] do_IRQ+0x98/0x1a0
> > [41877.514541] [c0ce5eb0] [c001b93c] ret_from_except+0x0/0x14
> > [41877.514549] --- interrupt: 501 at arch_cpu_idle+0x30/0x78
> >                    LR = arch_cpu_idle+0x30/0x78
> > [41877.514558] [c0ce5f70] [c0ce4000] 0xc0ce4000 (unreliable)
> > [41877.514570] [c0ce5f80] [c00a3928] do_idle+0xc4/0x158
> > [41877.514577] [c0ce5fb0] [c00a3b74] cpu_startup_entry+0x24/0x28
> > [41877.514585] [c0ce5fc0] [c0988820] start_kernel+0x47c/0x490
> > [41877.514592] [c0ce5ff0] [00003444] 0x3444
> > [41877.514597] ================================================================================
> > [41886.390210] ================================================================================
> > [41886.390236] UBSAN: Undefined behaviour in
> > ../include/linux/percpu_counter.h:137:13
> > [41886.390245] signed integer overflow:
> > [41886.390250] 9223366156262940402 + 42006563339289 cannot be
> > represented in type 'long long int'
> > [41886.390260] CPU: 0 PID: 0 Comm: swapper Not tainted 4.17.0+ #54
> > [41886.390265] Call Trace:
> > [41886.390282] [dffedd30] [c047a5f8] ubsan_epilogue+0x18/0x4c (unreliable)
> > [41886.390293] [dffedd40] [c047af98] handle_overflow+0xbc/0xdc
> > [41886.390309] [dffeddc0] [c043a8c4] cfq_completed_request+0x37c/0x1234
> > [41886.390317] [dffede40] [c03f595c] __blk_put_request+0xb0/0x2dc
> > [41886.390331] [dffede80] [c05aa41c] scsi_end_request+0x19c/0x344
> > [41886.390340] [dffedeb0] [c05abba0] scsi_io_completion+0x4b4/0x854
> > [41886.390353] [dffedf10] [c040604c] blk_done_softirq+0xe4/0x1e0
> > [41886.390367] [dffedf60] [c07eef84] __do_softirq+0x16c/0x5f0
> > [41886.390379] [dffedfd0] [c0065160] irq_exit+0x110/0x1a8
> > [41886.390391] [dffedff0] [c001646c] call_do_irq+0x24/0x3c
> > [41886.390404] [c0ce5e80] [c0009a2c] do_IRQ+0x98/0x1a0
> > [41886.390411] [c0ce5eb0] [c001b93c] ret_from_except+0x0/0x14
> > [41886.390420] --- interrupt: 501 at arch_cpu_idle+0x30/0x78
> >                    LR = arch_cpu_idle+0x30/0x78
> > [41886.390429] [c0ce5f70] [c0ce4000] 0xc0ce4000 (unreliable)
> > [41886.390441] [c0ce5f80] [c00a3928] do_idle+0xc4/0x158
> > [41886.390449] [c0ce5fb0] [c00a3b74] cpu_startup_entry+0x24/0x28
> > [41886.390457] [c0ce5fc0] [c0988820] start_kernel+0x47c/0x490
> > [41886.390463] [c0ce5ff0] [00003444] 0x3444
> > [41886.390468] ================================================================================

^ permalink raw reply

* Re: [RFC PATCH 12/23] kernel/watchdog: Introduce a struct for NMI watchdog operations
From: Peter Zijlstra @ 2018-06-13  8:42 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Ricardo Neri, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Andi Kleen, Ashok Raj, Borislav Petkov, Tony Luck,
	Ravi V. Shankar, x86, sparclinux, linuxppc-dev, linux-kernel,
	Jacob Pan, Don Zickus, Michael Ellerman, Frederic Weisbecker,
	Babu Moger, David S. Miller, Benjamin Herrenschmidt,
	Paul Mackerras, Mathieu Desnoyers, Masami Hiramatsu,
	Andrew Morton, Philippe Ombredanne, Colin Ian King,
	Luis R. Rodriguez, iommu
In-Reply-To: <20180613174141.539fc6c1@roar.ozlabs.ibm.com>

On Wed, Jun 13, 2018 at 05:41:41PM +1000, Nicholas Piggin wrote:
> On Tue, 12 Jun 2018 17:57:32 -0700
> Ricardo Neri <ricardo.neri-calderon@linux.intel.com> wrote:
> 
> > Instead of exposing individual functions for the operations of the NMI
> > watchdog, define a common interface that can be used across multiple
> > implementations.
> > 
> > The struct nmi_watchdog_ops is defined for such operations. These initial
> > definitions include the enable, disable, start, stop, and cleanup
> > operations.
> > 
> > Only a single NMI watchdog can be used in the system. The operations of
> > this NMI watchdog are accessed via the new variable nmi_wd_ops. This
> > variable is set to point the operations of the first NMI watchdog that
> > initializes successfully. Even though at this moment, the only available
> > NMI watchdog is the perf-based hardlockup detector. More implementations
> > can be added in the future.
> 
> Cool, this looks pretty nice at a quick glance. sparc and powerpc at
> least have their own NMI watchdogs, it would be good to have those
> converted as well.

Yeah, agreed, this looks like half a patch.

> Is hpet a cross platform thing, or just x86? We should avoid
> proliferation of files under kernel/ I think, so with these watchdog
> driver structs then maybe implementations could go in drivers/ or
> arch/

HPET is mostly an x86 thing (altough it can be found elsewhere), but the
whole thing relies on the x86 NMI mechanism and is thus firmly arch/
material (like the sparc and ppc thing).

^ permalink raw reply

* Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI
From: Peter Zijlstra @ 2018-06-13  8:34 UTC (permalink / raw)
  To: Ricardo Neri
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen,
	Ashok Raj, Borislav Petkov, Tony Luck, Ravi V. Shankar, x86,
	sparclinux, linuxppc-dev, linux-kernel, Jacob Pan, Daniel Lezcano,
	Andrew Morton, Levin, Alexander (Sasha Levin), Randy Dunlap,
	Masami Hiramatsu, Marc Zyngier, Bartosz Golaszewski, Doug Berger,
	Palmer Dabbelt, iommu
In-Reply-To: <1528851463-21140-4-git-send-email-ricardo.neri-calderon@linux.intel.com>

On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
> index 5426627..dbc5e02 100644
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -61,6 +61,8 @@
>   *                interrupt handler after suspending interrupts. For system
>   *                wakeup devices users need to implement wakeup detection in
>   *                their interrupt handlers.
> + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as non-maskable, if
> + *                supported by the chip.
>   */

NAK on the first 6 patches. You really _REALLY_ don't want to expose
NMIs to this level.

^ permalink raw reply

* Re: [RFC PATCH 3/4] powerpc/64s: Wire up arch_trigger_cpumask_backtrace()
From: Christophe LEROY @ 2018-06-13  7:32 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: npiggin
In-Reply-To: <20180502130729.24077-3-mpe@ellerman.id.au>

Hi Michael,

It looks like this commit generates the following error:

stacktrace.c:(.text+0x1b0): undefined reference to `.smp_send_safe_nmi_ipi'
make[1]: *** [vmlinux] Error 1
make: *** [sub-make] Error 2

See http://kisskb.ellerman.id.au/kisskb/buildresult/13395345/ for details

Seems like that function only exists when CONFIG_NMI_IPI is defined.

Christophe

Le 02/05/2018 à 15:07, Michael Ellerman a écrit :
> This allows eg. the RCU stall detector, or the soft/hardlockup
> detectors to trigger a backtrace on all CPUs.
> 
> We implement this by sending a "safe" NMI, which will actually only
> send an IPI. Unfortunately the generic code prints "NMI", so that's a
> little confusing but we can probably live with it.
> 
> If one of the CPUs doesn't respond to the IPI, we then print some info
> from it's paca and do a backtrace based on its saved_r1.
> 
> Example output:
> 
>    INFO: rcu_sched detected stalls on CPUs/tasks:
>    	2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735
>    	(detected by 4, t=58847 jiffies, g=58, c=57, q=1258)
>    Sending NMI from CPU 4 to CPUs 2:
>    CPU 2 didn't respond to backtrace IPI, inspecting paca.
>    irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash)
>    Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale):
>    Call Trace:
>    [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable)
>    [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80
>    [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0
>    [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0
>    [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0
>    [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240
>    [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110
>    [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
>   arch/powerpc/include/asm/nmi.h   |  4 ++++
>   arch/powerpc/kernel/stacktrace.c | 51 ++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 55 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
> index 9c80939b4d14..e97f58689ca7 100644
> --- a/arch/powerpc/include/asm/nmi.h
> +++ b/arch/powerpc/include/asm/nmi.h
> @@ -4,6 +4,10 @@
>   
>   #ifdef CONFIG_PPC_WATCHDOG
>   extern void arch_touch_nmi_watchdog(void);
> +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> +					   bool exclude_self);
> +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> +
>   #else
>   static inline void arch_touch_nmi_watchdog(void) {}
>   #endif
> diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
> index d534ed901538..cf4652d5df80 100644
> --- a/arch/powerpc/kernel/stacktrace.c
> +++ b/arch/powerpc/kernel/stacktrace.c
> @@ -11,12 +11,15 @@
>    */
>   
>   #include <linux/export.h>
> +#include <linux/nmi.h>
>   #include <linux/sched.h>
>   #include <linux/sched/debug.h>
>   #include <linux/stacktrace.h>
>   #include <asm/ptrace.h>
>   #include <asm/processor.h>
>   
> +#include <asm/paca.h>
> +
>   /*
>    * Save stack-backtrace addresses into a stack_trace buffer.
>    */
> @@ -76,3 +79,51 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
>   	save_context_stack(trace, regs->gpr[1], current, 0);
>   }
>   EXPORT_SYMBOL_GPL(save_stack_trace_regs);
> +
> +#ifdef CONFIG_PPC_BOOK3S_64
> +static void handle_backtrace_ipi(struct pt_regs *regs)
> +{
> +	nmi_cpu_backtrace(regs);
> +}
> +
> +static void raise_backtrace_ipi(cpumask_t *mask)
> +{
> +	unsigned int cpu;
> +
> +	for_each_cpu(cpu, mask) {
> +		if (cpu == smp_processor_id())
> +			handle_backtrace_ipi(NULL);
> +		else
> +			smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
> +	}
> +
> +	for_each_cpu(cpu, mask) {
> +		struct paca_struct *p = paca_ptrs[cpu];
> +
> +		cpumask_clear_cpu(cpu, mask);
> +
> +		pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
> +		if (!virt_addr_valid(p)) {
> +			pr_warn("paca pointer appears corrupt? (%px)\n", p);
> +			continue;
> +		}
> +
> +		pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
> +			p->irq_soft_mask, p->in_mce, p->in_nmi);
> +
> +		if (virt_addr_valid(p->__current))
> +			pr_cont(" current: %d (%s)\n", p->__current->pid,
> +				p->__current->comm);
> +		else
> +			pr_cont(" current pointer corrupt? (%px)\n", p->__current);
> +
> +		pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
> +		show_stack(p->__current, (unsigned long *)p->saved_r1);
> +	}
> +}
> +
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +{
> +	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
> +}
> +#endif /* CONFIG_PPC64 */
> 

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Christoph Hellwig @ 2018-06-13  7:41 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Ram Pai, Michael S. Tsirkin, Christoph Hellwig, robh, pawel.moll,
	Tom Lendacky, aik, jasowang, cohuck, linux-kernel, virtualization,
	joe, Rustad, Mark D, david, linuxppc-dev, elfring,
	Anshuman Khandual
In-Reply-To: <07b804fccd7373c650be79ac9fa77ae7f2375ced.camel@kernel.crashing.org>

On Mon, Jun 11, 2018 at 01:29:18PM +1000, Benjamin Herrenschmidt wrote:
> At the risk of repeating myself, let's just do the first pass which is
> to switch virtio over to always using the DMA API in the actual data
> flow code, with a hook at initialization time that replaces the DMA ops
> with some home cooked "direct" ops in the case where the IOMMU flag
> isn't set.
> 
> This will be equivalent to what we have today but avoids having 2
> separate code path all over the driver.
> 
> Then a second stage, I think, is to replace this "hook" so that the
> architecture gets a say in the matter.

I don't think we can actually use dma_direct_ops.  It still allows
architectures to override parts of the dma setup, which virtio seems
to blindly assume phys == dma and not cache flushing.

I think the right way forward is to either add a new
VIRTIO_F_IS_PCI_DEVICE (or redefine the existing iommu flag if deemed
possible).  And then make sure recent qemu always sets it.

^ permalink raw reply

* Re: [RFC PATCH 12/23] kernel/watchdog: Introduce a struct for NMI watchdog operations
From: Nicholas Piggin @ 2018-06-13  7:41 UTC (permalink / raw)
  To: Ricardo Neri
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen,
	Ashok Raj, Borislav Petkov, Tony Luck, Ravi V. Shankar, x86,
	sparclinux, linuxppc-dev, linux-kernel, Jacob Pan, Don Zickus,
	Michael Ellerman, Frederic Weisbecker, Babu Moger,
	David S. Miller, Benjamin Herrenschmidt, Paul Mackerras,
	Mathieu Desnoyers, Masami Hiramatsu, Peter Zijlstra,
	Andrew Morton, Philippe Ombredanne, Colin Ian King,
	Luis R. Rodriguez, iommu
In-Reply-To: <1528851463-21140-13-git-send-email-ricardo.neri-calderon@linux.intel.com>

On Tue, 12 Jun 2018 17:57:32 -0700
Ricardo Neri <ricardo.neri-calderon@linux.intel.com> wrote:

> Instead of exposing individual functions for the operations of the NMI
> watchdog, define a common interface that can be used across multiple
> implementations.
> 
> The struct nmi_watchdog_ops is defined for such operations. These initial
> definitions include the enable, disable, start, stop, and cleanup
> operations.
> 
> Only a single NMI watchdog can be used in the system. The operations of
> this NMI watchdog are accessed via the new variable nmi_wd_ops. This
> variable is set to point the operations of the first NMI watchdog that
> initializes successfully. Even though at this moment, the only available
> NMI watchdog is the perf-based hardlockup detector. More implementations
> can be added in the future.

Cool, this looks pretty nice at a quick glance. sparc and powerpc at
least have their own NMI watchdogs, it would be good to have those
converted as well.

Is hpet a cross platform thing, or just x86? We should avoid
proliferation of files under kernel/ I think, so with these watchdog
driver structs then maybe implementations could go in drivers/ or
arch/

Thanks,
Nick

^ permalink raw reply

* Re: 4.17.0-10146-gf0dc7f9c6dd9: hw csum failure on powerpc+sungem
From: Mathieu Malaterre @ 2018-06-13  7:38 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, Meelis Roos, Christophe LEROY
In-Reply-To: <CA+7wUszJ0QCMMdU06z7CN3Xg0JnV2e+xmgbZ7Wb2=-doSDrSwQ@mail.gmail.com>

Hi all,

On Tue, Jun 12, 2018 at 10:15 AM Mathieu Malaterre <malat@debian.org> wrote:
>
> Hi Balbir,
>
> On Tue, Jun 12, 2018 at 9:39 AM Balbir Singh <bsingharora@gmail.com> wrote:
> >
> >
> > On 12/06/18 06:20, Mathieu Malaterre wrote:
> >
> > > Hi Meelis,
> > >
> > > On Mon, Jun 11, 2018 at 1:21 PM Meelis Roos <mroos@linux.ee> wrote:
> > >> I am seeing this on PowerMac G4 with sungem ethernet driver. 4.17 was
> > >> OK, 4.17.0-10146-gf0dc7f9c6dd9 is problematic.
> > > Same here.
> > >
> > >> [  140.518664] eth0: hw csum failure
> > >> [  140.518699] CPU: 0 PID: 1237 Comm: postconf Not tainted 4.17.0-10146-gf0dc7f9c6dd9 #83
> > >> [  140.518707] Call Trace:
> > >> [  140.518734] [effefd90] [c03d6db8] __skb_checksum_complete+0xd8/0xdc (unreliable)
> > >> [  140.518759] [effefdb0] [c04c1284] icmpv6_rcv+0x248/0x4ec
> > >> [  140.518775] [effefdd0] [c049a448] ip6_input_finish.constprop.0+0x11c/0x5f4
> > >> [  140.518786] [effefe10] [c049b1c0] ip6_mc_input+0xcc/0x100
> > >> [  140.518807] [effefe20] [c03e110c] __netif_receive_skb_core+0x310/0x944
> > >> [  140.518820] [effefe70] [c03e76ec] napi_gro_receive+0xd0/0xe8
> > >> [  140.518845] [effefe80] [f3e1f66c] gem_poll+0x618/0x1274 [sungem]
> > >> [  140.518856] [effeff30] [c03e6f0c] net_rx_action+0x198/0x374
> > >> [  140.518872] [effeff90] [c0501a88] __do_softirq+0x120/0x278
> > >> [  140.518890] [effeffe0] [c0036188] irq_exit+0xd8/0xdc
> > >> [  140.518908] [effefff0] [c000f478] call_do_irq+0x24/0x3c
> > >> [  140.518925] [d05a5d30] [c0007120] do_IRQ+0x74/0xf0
> > >> [  140.518941] [d05a5d50] [c0012474] ret_from_except+0x0/0x14
> > >> [  140.518960] --- interrupt: 501 at copy_page+0x40/0x90
> > >>                    LR = copy_user_page+0x18/0x30
> > >> [  140.518973] [d05a5e10] [d058cd80] 0xd058cd80 (unreliable)
> > >> [  140.518989] [d05a5e20] [c00fa2bc] wp_page_copy+0xec/0x654
> > >> [  140.519002] [d05a5e60] [c00fd3a4] do_wp_page+0xa8/0x5b4
> > >> [  140.519013] [d05a5e90] [c00fe934] handle_mm_fault+0x564/0xa84
> > >> [  140.519025] [d05a5f00] [c0016230] do_page_fault+0x1bc/0x7e8
> > >> [  140.519037] [d05a5f40] [c0012300] handle_page_fault+0x14/0x40
> > >> [  140.519048] --- interrupt: 301 at 0xb78b6864
> > >>                    LR = 0xb78b6c54
> > >>
> > > For some reason if I do a git bisect it returns that:
> > >
> > > $ git bisect good
> > > 3036bc45364f98515a2c446d7fac2c34dcfbeff4 is the first bad commit
> > >
> > > Could you also check on your side please.
> >
> > Don't have a G4, but the related commits look like
> >
> > 373e098e1e788d7b89ec0f31765a6c08e2ea0f7c and e9c4943a107b56696e4872cdffdba6b0c7277c77 Balbir
> >
>
> Indeed that makes more sense. I must have messed up during my
> git-bisect operation. Will try a simple git-revert on those and report
> back.

Here is what I get.

Current git HEAD is: 0c14e43a42e4e44f70963f8ccf89461290c4e4da if I do:

$ git revert e9c4943a107b56696e4872cdffdba6b0c7277c77.
$ git revert 373e098e1e788d7b89ec0f31765a6c08e2ea0f7c
$ rm -rf g4
$ make O=g4 ARCH=powerpc g4_defconfig
$ make -j8 O=g4 ARCH=powerpc CROSS_COMPILE=powerpc-linux-gnu- bindeb-pkg

-> I can still see the error in dmesg.

> Thanks

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox