linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 07/20] pci/xgene-msi: Convert to hotplug state machine
       [not found] <20161117183541.8588-1-bigeasy@linutronix.de>
@ 2016-11-17 18:35 ` Sebastian Andrzej Siewior
  2016-11-17 18:35 ` [PATCH 14/20] arm/bL_switcher: " Sebastian Andrzej Siewior
  2016-11-17 18:35 ` [PATCH 15/20] ARM/hw_breakpoint: " Sebastian Andrzej Siewior
  2 siblings, 0 replies; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2016-11-17 18:35 UTC (permalink / raw)
  To: linux-arm-kernel

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

Cc: Duc Dang <dhdang@apm.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-pci at vger.kernel.org
Cc: linux-arm-kernel at lists.infradead.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 drivers/pci/host/pci-xgene-msi.c | 69 +++++++++++-----------------------------
 include/linux/cpuhotplug.h       |  1 +
 2 files changed, 20 insertions(+), 50 deletions(-)

diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
index a6456b578269..1f38d0836751 100644
--- a/drivers/pci/host/pci-xgene-msi.c
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -360,16 +360,16 @@ static void xgene_msi_isr(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
+static enum cpuhp_state pci_xgene_online;
+
 static int xgene_msi_remove(struct platform_device *pdev)
 {
-	int virq, i;
 	struct xgene_msi *msi = platform_get_drvdata(pdev);
 
-	for (i = 0; i < NR_HW_IRQS; i++) {
-		virq = msi->msi_groups[i].gic_irq;
-		if (virq != 0)
-			irq_set_chained_handler_and_data(virq, NULL, NULL);
-	}
+	if (pci_xgene_online)
+		cpuhp_remove_state(pci_xgene_online);
+	cpuhp_remove_state(CPUHP_PCI_XGENE_DEAD);
+
 	kfree(msi->msi_groups);
 
 	kfree(msi->bitmap);
@@ -427,7 +427,7 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu)
 	return 0;
 }
 
-static void xgene_msi_hwirq_free(unsigned int cpu)
+static int xgene_msi_hwirq_free(unsigned int cpu)
 {
 	struct xgene_msi *msi = &xgene_msi_ctrl;
 	struct xgene_msi_group *msi_group;
@@ -441,33 +441,9 @@ static void xgene_msi_hwirq_free(unsigned int cpu)
 		irq_set_chained_handler_and_data(msi_group->gic_irq, NULL,
 						 NULL);
 	}
+	return 0;
 }
 
-static int xgene_msi_cpu_callback(struct notifier_block *nfb,
-				  unsigned long action, void *hcpu)
-{
-	unsigned cpu = (unsigned long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		xgene_msi_hwirq_alloc(cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		xgene_msi_hwirq_free(cpu);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block xgene_msi_cpu_notifier = {
-	.notifier_call = xgene_msi_cpu_callback,
-};
-
 static const struct of_device_id xgene_msi_match_table[] = {
 	{.compatible = "apm,xgene1-msi"},
 	{},
@@ -478,7 +454,6 @@ static int xgene_msi_probe(struct platform_device *pdev)
 	struct resource *res;
 	int rc, irq_index;
 	struct xgene_msi *xgene_msi;
-	unsigned int cpu;
 	int virt_msir;
 	u32 msi_val, msi_idx;
 
@@ -540,28 +515,22 @@ static int xgene_msi_probe(struct platform_device *pdev)
 		}
 	}
 
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu)
-		if (xgene_msi_hwirq_alloc(cpu)) {
-			dev_err(&pdev->dev, "failed to register MSI handlers\n");
-			cpu_notifier_register_done();
-			goto error;
-		}
-
-	rc = __register_hotcpu_notifier(&xgene_msi_cpu_notifier);
-	if (rc) {
-		dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
-		cpu_notifier_register_done();
-		goto error;
-	}
-
-	cpu_notifier_register_done();
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pci/xgene:online",
+			       xgene_msi_hwirq_alloc, NULL);
+	if (rc)
+		goto err_cpuhp;
+	pci_xgene_online = rc;
+	rc = cpuhp_setup_state(CPUHP_PCI_XGENE_DEAD, "pci/xgene:dead", NULL,
+			       xgene_msi_hwirq_free);
+	if (rc)
+		goto err_cpuhp;
 
 	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
 
 	return 0;
 
+err_cpuhp:
+	dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
 error:
 	xgene_msi_remove(pdev);
 	return rc;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 6506dce8343a..fd5598b8353a 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -38,6 +38,7 @@ enum cpuhp_state {
 	CPUHP_RADIX_DEAD,
 	CPUHP_PAGE_ALLOC_DEAD,
 	CPUHP_NET_DEV_DEAD,
+	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 14/20] arm/bL_switcher: Convert to hotplug state machine
       [not found] <20161117183541.8588-1-bigeasy@linutronix.de>
  2016-11-17 18:35 ` [PATCH 07/20] pci/xgene-msi: Convert to hotplug state machine Sebastian Andrzej Siewior
@ 2016-11-17 18:35 ` Sebastian Andrzej Siewior
  2016-11-17 18:35 ` [PATCH 15/20] ARM/hw_breakpoint: " Sebastian Andrzej Siewior
  2 siblings, 0 replies; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2016-11-17 18:35 UTC (permalink / raw)
  To: linux-arm-kernel

Install the callbacks via the state machine.

Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel at lists.infradead.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/arm/common/bL_switcher.c | 34 ++++++++++++++++++++--------------
 include/linux/cpuhotplug.h    |  1 +
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 37dc0fe1093f..46730017b3c5 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -757,19 +757,18 @@ EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
  * while the switcher is active.
  * We're just not ready to deal with that given the trickery involved.
  */
-static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
+static int bL_switcher_cpu_pre(unsigned int cpu)
 {
-	if (bL_switcher_active) {
-		int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu];
-		switch (action & 0xf) {
-		case CPU_UP_PREPARE:
-		case CPU_DOWN_PREPARE:
-			if (pairing == -1)
-				return NOTIFY_BAD;
-		}
-	}
-	return NOTIFY_DONE;
+	int pairing;
+
+	if (!bL_switcher_active)
+		return 0;
+
+	pairing = bL_switcher_cpu_pairing[cpu];
+
+	if (pairing == -1)
+		return -EINVAL;
+	return 0;
 }
 
 static bool no_bL_switcher;
@@ -782,8 +781,15 @@ static int __init bL_switcher_init(void)
 	if (!mcpm_is_available())
 		return -ENODEV;
 
-	cpu_notifier(bL_switcher_hotplug_callback, 0);
-
+	cpuhp_setup_state_nocalls(CPUHP_ARM_BL_PREPARE, "arm/bl:prepare",
+				  bL_switcher_cpu_pre, NULL);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "arm/bl:predown",
+					NULL, bL_switcher_cpu_pre);
+	if (ret < 0) {
+		cpuhp_remove_state_nocalls(CPUHP_ARM_BL_PREPARE);
+		pr_err("bL_switcher: Failed to allocate a hotplug state\n");
+		return ret;
+	}
 	if (!no_bL_switcher) {
 		ret = bL_switcher_enable();
 		if (ret)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 69abf2c09f6c..4b99b79c16e5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -64,6 +64,7 @@ enum cpuhp_state {
 	CPUHP_X86_CPUID_PREPARE,
 	CPUHP_X86_MSR_PREPARE,
 	CPUHP_NET_IUCV_PREPARE,
+	CPUHP_ARM_BL_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_NOTF_ERR_INJ_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
       [not found] <20161117183541.8588-1-bigeasy@linutronix.de>
  2016-11-17 18:35 ` [PATCH 07/20] pci/xgene-msi: Convert to hotplug state machine Sebastian Andrzej Siewior
  2016-11-17 18:35 ` [PATCH 14/20] arm/bL_switcher: " Sebastian Andrzej Siewior
@ 2016-11-17 18:35 ` Sebastian Andrzej Siewior
  2016-11-18 12:04   ` Will Deacon
  2017-01-02 14:15   ` Linus Walleij
  2 siblings, 2 replies; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2016-11-17 18:35 UTC (permalink / raw)
  To: linux-arm-kernel

Install the callbacks via the state machine and let the core invoke
the callbacks on the already online CPUs.

smp_call_function_single() has been removed because the function is already
invoked on the target CPU.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel at lists.infradead.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/kernel/hw_breakpoint.c | 44 ++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index b8df45883cf7..51cff5a8feff 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -925,9 +925,9 @@ static bool core_has_os_save_restore(void)
 	}
 }
 
-static void reset_ctrl_regs(void *unused)
+static void reset_ctrl_regs(unsigned int cpu)
 {
-	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
+	int i, raw_num_brps, err = 0;
 	u32 val;
 
 	/*
@@ -1020,25 +1020,20 @@ static void reset_ctrl_regs(void *unused)
 		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
 }
 
-static int dbg_reset_notify(struct notifier_block *self,
-				      unsigned long action, void *cpu)
+static int dbg_reset_online(unsigned int cpu)
 {
-	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
-		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
-
-	return NOTIFY_OK;
+	local_irq_disable();
+	reset_ctrl_regs(cpu);
+	local_irq_enable();
+	return 0;
 }
 
-static struct notifier_block dbg_reset_nb = {
-	.notifier_call = dbg_reset_notify,
-};
-
 #ifdef CONFIG_CPU_PM
 static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action,
 			     void *v)
 {
 	if (action == CPU_PM_EXIT)
-		reset_ctrl_regs(NULL);
+		reset_ctrl_regs(smp_processor_id());
 
 	return NOTIFY_OK;
 }
@@ -1059,6 +1054,8 @@ static inline void pm_init(void)
 
 static int __init arch_hw_breakpoint_init(void)
 {
+	int ret;
+
 	debug_arch = get_debug_arch();
 
 	if (!debug_arch_supported()) {
@@ -1072,8 +1069,6 @@ static int __init arch_hw_breakpoint_init(void)
 	core_num_brps = get_num_brps();
 	core_num_wrps = get_num_wrps();
 
-	cpu_notifier_register_begin();
-
 	/*
 	 * We need to tread carefully here because DBGSWENABLE may be
 	 * driven low on this core and there isn't an architected way to
@@ -1082,15 +1077,18 @@ static int __init arch_hw_breakpoint_init(void)
 	register_undef_hook(&debug_reg_hook);
 
 	/*
-	 * Reset the breakpoint resources. We assume that a halting
-	 * debugger will leave the world in a nice state for us.
+	 * Register CPU notifier which resets the breakpoint resources. We
+	 * assume that a halting debugger will leave the world in a nice state
+	 * for us.
 	 */
-	on_each_cpu(reset_ctrl_regs, NULL, 1);
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
+				dbg_reset_online, NULL);
 	unregister_undef_hook(&debug_reg_hook);
-	if (!cpumask_empty(&debug_err_mask)) {
+	if (WARN_ON(ret < 0) || !cpumask_empty(&debug_err_mask)) {
 		core_num_brps = 0;
 		core_num_wrps = 0;
-		cpu_notifier_register_done();
+		if (ret > 0)
+			cpuhp_remove_state_nocalls(ret);
 		return 0;
 	}
 
@@ -1109,11 +1107,7 @@ static int __init arch_hw_breakpoint_init(void)
 	hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
 			TRAP_HWBKPT, "breakpoint debug exception");
 
-	/* Register hotplug and PM notifiers. */
-	__register_cpu_notifier(&dbg_reset_nb);
-
-	cpu_notifier_register_done();
-
+	/* Register PM notifiers. */
 	pm_init();
 	return 0;
 }
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-17 18:35 ` [PATCH 15/20] ARM/hw_breakpoint: " Sebastian Andrzej Siewior
@ 2016-11-18 12:04   ` Will Deacon
  2016-11-18 13:11     ` Thomas Gleixner
  2017-01-02 14:15   ` Linus Walleij
  1 sibling, 1 reply; 21+ messages in thread
From: Will Deacon @ 2016-11-18 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Nov 17, 2016 at 07:35:36PM +0100, Sebastian Andrzej Siewior wrote:
> Install the callbacks via the state machine and let the core invoke
> the callbacks on the already online CPUs.
> 
> smp_call_function_single() has been removed because the function is already
> invoked on the target CPU.
> 
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: linux-arm-kernel at lists.infradead.org
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/arm/kernel/hw_breakpoint.c | 44 ++++++++++++++++++-----------------------
>  1 file changed, 19 insertions(+), 25 deletions(-)

[...]

>  static int __init arch_hw_breakpoint_init(void)
>  {
> +	int ret;
> +
>  	debug_arch = get_debug_arch();
>  
>  	if (!debug_arch_supported()) {
> @@ -1072,8 +1069,6 @@ static int __init arch_hw_breakpoint_init(void)
>  	core_num_brps = get_num_brps();
>  	core_num_wrps = get_num_wrps();
>  
> -	cpu_notifier_register_begin();
> -
>  	/*
>  	 * We need to tread carefully here because DBGSWENABLE may be
>  	 * driven low on this core and there isn't an architected way to
> @@ -1082,15 +1077,18 @@ static int __init arch_hw_breakpoint_init(void)
>  	register_undef_hook(&debug_reg_hook);
>  
>  	/*
> -	 * Reset the breakpoint resources. We assume that a halting
> -	 * debugger will leave the world in a nice state for us.
> +	 * Register CPU notifier which resets the breakpoint resources. We
> +	 * assume that a halting debugger will leave the world in a nice state
> +	 * for us.
>  	 */
> -	on_each_cpu(reset_ctrl_regs, NULL, 1);
> +	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
> +				dbg_reset_online, NULL);

I'm slightly unsure about this. The dbg_reset_online callback can execute
undefined instructions (unfortunately, there's no way to probe the presence
of some of the debug registers), so it absolutely has to run within the 
register_undef_hook/unregister_undef_hook calls that are in this function.

With this patch, I worry that the callback can be postponed to ONLINE time
for other CPUs, and then the kernel will panic.

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-18 12:04   ` Will Deacon
@ 2016-11-18 13:11     ` Thomas Gleixner
  2016-11-18 13:29       ` Will Deacon
  0 siblings, 1 reply; 21+ messages in thread
From: Thomas Gleixner @ 2016-11-18 13:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 18 Nov 2016, Will Deacon wrote:
> On Thu, Nov 17, 2016 at 07:35:36PM +0100, Sebastian Andrzej Siewior wrote:
> > Install the callbacks via the state machine and let the core invoke
> > the callbacks on the already online CPUs.
> > 
> > smp_call_function_single() has been removed because the function is already
> > invoked on the target CPU.
> > 
> > Cc: Will Deacon <will.deacon@arm.com>
> > Cc: Mark Rutland <mark.rutland@arm.com>
> > Cc: Russell King <linux@armlinux.org.uk>
> > Cc: linux-arm-kernel at lists.infradead.org
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > ---
> >  arch/arm/kernel/hw_breakpoint.c | 44 ++++++++++++++++++-----------------------
> >  1 file changed, 19 insertions(+), 25 deletions(-)
> 
> [...]
> 
> >  static int __init arch_hw_breakpoint_init(void)
> >  {
> > +	int ret;
> > +
> >  	debug_arch = get_debug_arch();
> >  
> >  	if (!debug_arch_supported()) {
> > @@ -1072,8 +1069,6 @@ static int __init arch_hw_breakpoint_init(void)
> >  	core_num_brps = get_num_brps();
> >  	core_num_wrps = get_num_wrps();
> >  
> > -	cpu_notifier_register_begin();
> > -
> >  	/*
> >  	 * We need to tread carefully here because DBGSWENABLE may be
> >  	 * driven low on this core and there isn't an architected way to
> > @@ -1082,15 +1077,18 @@ static int __init arch_hw_breakpoint_init(void)
> >  	register_undef_hook(&debug_reg_hook);
> >  
> >  	/*
> > -	 * Reset the breakpoint resources. We assume that a halting
> > -	 * debugger will leave the world in a nice state for us.
> > +	 * Register CPU notifier which resets the breakpoint resources. We
> > +	 * assume that a halting debugger will leave the world in a nice state
> > +	 * for us.
> >  	 */
> > -	on_each_cpu(reset_ctrl_regs, NULL, 1);
> > +	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
> > +				dbg_reset_online, NULL);
> 
> I'm slightly unsure about this. The dbg_reset_online callback can execute
> undefined instructions (unfortunately, there's no way to probe the presence
> of some of the debug registers), so it absolutely has to run within the 
> register_undef_hook/unregister_undef_hook calls that are in this function.
> 
> With this patch, I worry that the callback can be postponed to ONLINE time
> for other CPUs, and then the kernel will panic.

No. The flow is the following:

  	register_undef_hook(&debug_reg_hook);

	ret = cpuhp_setup_state(.., dbg_reset_online, NULL);
	      {
		for_each_online_cpu(cpu) {
			ret = call_on_cpu(cpu, dbg_reset_online);
			if (ret)
			      return ret:
		}
	      }

  	unregister_undef_hook(&debug_reg_hook);
	
The only difference to the current code is that the call is not invoked via
a smp function call (on_each_cpu), it's pushed to the hotplug thread
context of each cpu and executed there.

But it's guaranteed that cpuhp_setup_state() will not return before the
callback has been invoked on each online cpu.

If cpus are not yet online when that code is invoked, then it's the same
behaviour as before. It will be invoked when the cpu comes online.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-18 13:11     ` Thomas Gleixner
@ 2016-11-18 13:29       ` Will Deacon
  2016-11-18 13:42         ` Thomas Gleixner
  0 siblings, 1 reply; 21+ messages in thread
From: Will Deacon @ 2016-11-18 13:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Nov 18, 2016 at 02:11:58PM +0100, Thomas Gleixner wrote:
> On Fri, 18 Nov 2016, Will Deacon wrote:
> > On Thu, Nov 17, 2016 at 07:35:36PM +0100, Sebastian Andrzej Siewior wrote:
> > > @@ -1082,15 +1077,18 @@ static int __init arch_hw_breakpoint_init(void)
> > >  	register_undef_hook(&debug_reg_hook);
> > >  
> > >  	/*
> > > -	 * Reset the breakpoint resources. We assume that a halting
> > > -	 * debugger will leave the world in a nice state for us.
> > > +	 * Register CPU notifier which resets the breakpoint resources. We
> > > +	 * assume that a halting debugger will leave the world in a nice state
> > > +	 * for us.
> > >  	 */
> > > -	on_each_cpu(reset_ctrl_regs, NULL, 1);
> > > +	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
> > > +				dbg_reset_online, NULL);
> > 
> > I'm slightly unsure about this. The dbg_reset_online callback can execute
> > undefined instructions (unfortunately, there's no way to probe the presence
> > of some of the debug registers), so it absolutely has to run within the 
> > register_undef_hook/unregister_undef_hook calls that are in this function.
> > 
> > With this patch, I worry that the callback can be postponed to ONLINE time
> > for other CPUs, and then the kernel will panic.
> 
> No. The flow is the following:
> 
>   	register_undef_hook(&debug_reg_hook);
> 
> 	ret = cpuhp_setup_state(.., dbg_reset_online, NULL);
> 	      {
> 		for_each_online_cpu(cpu) {
> 			ret = call_on_cpu(cpu, dbg_reset_online);
> 			if (ret)
> 			      return ret:
> 		}
> 	      }
> 
>   	unregister_undef_hook(&debug_reg_hook);
> 	
> The only difference to the current code is that the call is not invoked via
> a smp function call (on_each_cpu), it's pushed to the hotplug thread
> context of each cpu and executed there.
> 
> But it's guaranteed that cpuhp_setup_state() will not return before the
> callback has been invoked on each online cpu.

Ok, that's good.

> If cpus are not yet online when that code is invoked, then it's the same
> behaviour as before. It will be invoked when the cpu comes online.

Just to check, but what stops a CPU from coming online between the call
to cpuhp_setup_state and the call to cpuhp_remove_state_nocalls in the
case of failure (debug_err_mask isn't empty)?

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-18 13:29       ` Will Deacon
@ 2016-11-18 13:42         ` Thomas Gleixner
  2016-11-18 13:48           ` Will Deacon
  0 siblings, 1 reply; 21+ messages in thread
From: Thomas Gleixner @ 2016-11-18 13:42 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 18 Nov 2016, Will Deacon wrote:
> On Fri, Nov 18, 2016 at 02:11:58PM +0100, Thomas Gleixner wrote:
> > But it's guaranteed that cpuhp_setup_state() will not return before the
> > callback has been invoked on each online cpu.
> 
> Ok, that's good.
> 
> > If cpus are not yet online when that code is invoked, then it's the same
> > behaviour as before. It will be invoked when the cpu comes online.
> 
> Just to check, but what stops a CPU from coming online between the call
> to cpuhp_setup_state and the call to cpuhp_remove_state_nocalls in the
> case of failure (debug_err_mask isn't empty)?

Indeed! I missed that part. So we still need a get/put_online_cpus()
protection around all of this.

Just for curiosity sake. Wouldn't it be simpler and less error prone to
make the ARM_DBG_READ/WRITE macros use the exception table and handle that
in the undefined instruction handler to avoid this hook dance?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-18 13:42         ` Thomas Gleixner
@ 2016-11-18 13:48           ` Will Deacon
  2016-11-18 13:59             ` Thomas Gleixner
  0 siblings, 1 reply; 21+ messages in thread
From: Will Deacon @ 2016-11-18 13:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Nov 18, 2016 at 02:42:15PM +0100, Thomas Gleixner wrote:
> On Fri, 18 Nov 2016, Will Deacon wrote:
> > On Fri, Nov 18, 2016 at 02:11:58PM +0100, Thomas Gleixner wrote:
> > > But it's guaranteed that cpuhp_setup_state() will not return before the
> > > callback has been invoked on each online cpu.
> > 
> > Ok, that's good.
> > 
> > > If cpus are not yet online when that code is invoked, then it's the same
> > > behaviour as before. It will be invoked when the cpu comes online.
> > 
> > Just to check, but what stops a CPU from coming online between the call
> > to cpuhp_setup_state and the call to cpuhp_remove_state_nocalls in the
> > case of failure (debug_err_mask isn't empty)?
> 
> Indeed! I missed that part. So we still need a get/put_online_cpus()
> protection around all of this.

Yes, that should do it.

> Just for curiosity sake. Wouldn't it be simpler and less error prone to
> make the ARM_DBG_READ/WRITE macros use the exception table and handle that
> in the undefined instruction handler to avoid this hook dance?

That would be an option, but it's only the reset sequence that could
generate this fault so it's simpler to isolate it there. We'd also have
to take into account SMP if we toggle the handler in the READ/WRITE
accessors, since the fault handler framework is system-wide as opposed
to per-cpu. The whole thing is grotty as hell.

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-18 13:48           ` Will Deacon
@ 2016-11-18 13:59             ` Thomas Gleixner
  2016-11-18 14:11               ` Will Deacon
  0 siblings, 1 reply; 21+ messages in thread
From: Thomas Gleixner @ 2016-11-18 13:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 18 Nov 2016, Will Deacon wrote:
> On Fri, Nov 18, 2016 at 02:42:15PM +0100, Thomas Gleixner wrote:
> > On Fri, 18 Nov 2016, Will Deacon wrote:
> > > On Fri, Nov 18, 2016 at 02:11:58PM +0100, Thomas Gleixner wrote:
> > > > But it's guaranteed that cpuhp_setup_state() will not return before the
> > > > callback has been invoked on each online cpu.
> > > 
> > > Ok, that's good.
> > > 
> > > > If cpus are not yet online when that code is invoked, then it's the same
> > > > behaviour as before. It will be invoked when the cpu comes online.
> > > 
> > > Just to check, but what stops a CPU from coming online between the call
> > > to cpuhp_setup_state and the call to cpuhp_remove_state_nocalls in the
> > > case of failure (debug_err_mask isn't empty)?
> > 
> > Indeed! I missed that part. So we still need a get/put_online_cpus()
> > protection around all of this.
> 
> Yes, that should do it.
> 
> > Just for curiosity sake. Wouldn't it be simpler and less error prone to
> > make the ARM_DBG_READ/WRITE macros use the exception table and handle that
> > in the undefined instruction handler to avoid this hook dance?
> 
> That would be an option, but it's only the reset sequence that could
> generate this fault so it's simpler to isolate it there. 

ARM_DBG_READ/WRITE_SAFE() then for reset_ctrl_regs()

> We'd also have to take into account SMP if we toggle the handler in the
> READ/WRITE accessors, since the fault handler framework is system-wide as
> opposed to per-cpu. The whole thing is grotty as hell.

The exception table is not toggling anything. It's just providing an entry
in the exception tables, which is scanned by fixup_exception(), which then
moves PC to the exception code. See __get_user_asm().

So the whole thing becomes:

static int reset_ctrl_regs(unsigned cpu)
{
	....
	if (ARM_DBG_READ_SAFE(c1, c5, 4, val))
		return -ENODEV;
	....
	return 0;
}

All you need is the extra

    	if (fixup_exception(regs))
		return;

in do_undefinstr() like it is there in do_kernel_fault(). No hooks, no
scope issues, just works.

I just mention this because that's how x86 implements rdmsr/wrmsr_safe() so
it can probe msr access. The difference though it that this results in a
#GP and not in #UD, but that's not a show stopper :)

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-18 13:59             ` Thomas Gleixner
@ 2016-11-18 14:11               ` Will Deacon
  0 siblings, 0 replies; 21+ messages in thread
From: Will Deacon @ 2016-11-18 14:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Nov 18, 2016 at 02:59:04PM +0100, Thomas Gleixner wrote:
> On Fri, 18 Nov 2016, Will Deacon wrote:
> > On Fri, Nov 18, 2016 at 02:42:15PM +0100, Thomas Gleixner wrote:
> > > On Fri, 18 Nov 2016, Will Deacon wrote:
> > > > On Fri, Nov 18, 2016 at 02:11:58PM +0100, Thomas Gleixner wrote:
> > > > > But it's guaranteed that cpuhp_setup_state() will not return before the
> > > > > callback has been invoked on each online cpu.
> > > > 
> > > > Ok, that's good.
> > > > 
> > > > > If cpus are not yet online when that code is invoked, then it's the same
> > > > > behaviour as before. It will be invoked when the cpu comes online.
> > > > 
> > > > Just to check, but what stops a CPU from coming online between the call
> > > > to cpuhp_setup_state and the call to cpuhp_remove_state_nocalls in the
> > > > case of failure (debug_err_mask isn't empty)?
> > > 
> > > Indeed! I missed that part. So we still need a get/put_online_cpus()
> > > protection around all of this.
> > 
> > Yes, that should do it.
> > 
> > > Just for curiosity sake. Wouldn't it be simpler and less error prone to
> > > make the ARM_DBG_READ/WRITE macros use the exception table and handle that
> > > in the undefined instruction handler to avoid this hook dance?
> > 
> > That would be an option, but it's only the reset sequence that could
> > generate this fault so it's simpler to isolate it there. 
> 
> ARM_DBG_READ/WRITE_SAFE() then for reset_ctrl_regs()
> 
> > We'd also have to take into account SMP if we toggle the handler in the
> > READ/WRITE accessors, since the fault handler framework is system-wide as
> > opposed to per-cpu. The whole thing is grotty as hell.
> 
> The exception table is not toggling anything. It's just providing an entry
> in the exception tables, which is scanned by fixup_exception(), which then
> moves PC to the exception code. See __get_user_asm().

Oooh, now I see what you mean. I thought you were on about toggling
using register_undef_hook, but you're actually on about the extable stuff
that we already use for handling faults on user addresses in the kernel.

That's not a bad idea at all.

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2016-11-17 18:35 ` [PATCH 15/20] ARM/hw_breakpoint: " Sebastian Andrzej Siewior
  2016-11-18 12:04   ` Will Deacon
@ 2017-01-02 14:15   ` Linus Walleij
  2017-01-02 14:34     ` Linus Walleij
  1 sibling, 1 reply; 21+ messages in thread
From: Linus Walleij @ 2017-01-02 14:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Nov 17, 2016 at 7:35 PM, Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:

> Install the callbacks via the state machine and let the core invoke
> the callbacks on the already online CPUs.
>
> smp_call_function_single() has been removed because the function is already
> invoked on the target CPU.
>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: linux-arm-kernel at lists.infradead.org
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

This patch causes a regression on my Qualcomm APQ8060 DragonBoard.

[    0.000000] CPU: ARMv7 Processor [510f02d2] revision 2 (ARMv7), cr=10c5787d
[    0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIVT ASID
tagged instruction cache
[    0.000000] OF: fdt:Machine model: Qualcomm APQ8060 Dragonboard

The board hangs in very early boot. Reverting the commit does not work
because of dependencies, but I bisected down to this commit.

With earlypints the crash looks like this:

 NET: Registered protocol family 16
 DMA: preallocated 256 KiB pool for atomic coherent allocations
 cpuidle: using governor menu
 hw-breakpoint: found 3 (+1 reserved) breakpoint and 1 watchpoint registers.
 Internal error: Oops - undefined instruction: 0 [#1] PREEMPT SMP ARM
 Modules linked in: c
 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-rc2-00001-g100b2fb6bf9c #19
 Hardware name: Generic DT based system
 task: c02a0000 task.stack: c029a000
PC is at write_wb_reg+0x20c/0x330
LR is at arch_hw_breakpoint_init+0x1dc/0x27c
pc : [<c03101dc>]    lr : [<c0c0551c>]    psr: 80000013
sp : c029bef0  ip : 00000000  fp : dfffcd80
r10: c0c4f83c  r9 : 00000004  r8 : 000000af
r7 : c0c4f828  r6 : c10a631c  r5 : c10a631c  r4 : 00001fe0
r3 : 00000020  r2 : 00001fe0  r1 : 00000000  r0 : 00000060
Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5787d  Table: 4020406a  DAC: 00000051
 Process swapper/0 (pid: 1, stack limit = 0xc029a210)
 Stack: (0xc029bef0 to 0xc029c000)
 bee0:                                     00000000 00000000 dfffcd80 07f80000
 bf00: ffffe000 c0c05340 00000000 c030185c c0b7b7d4 dfffcded c0931100 c033b75c
 bf20: 60000013 00000003 00000001 c0ab8424 c0b7aa28 00000000 c1099a20 00000003
 bf40: 00000003 c0ac1440 c100c588 c10a6000 c10a6000 00000003 c10a6000 c10a6000
 bf60: c0c5a30c 000000af 00000004 c0c00e04 00000003 00000003 00000000 c0c005c4
 bf80: c08d66c0 00000000 c08d66c0 00000000 00000000 00000000 00000000 00000000
 bfa0: 00000000 c08d66c8 00000000 c0308518 00000000 00000000 00000000 00000000
 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[<c03101dc>] (write_wb_reg) from [<00000000>] (  (null))
 Code: ee001ed2 eaffffc3 ee001ed1 eaffffc1 (ee001ed0)
 ---[ end trace da08286cccfd900e ]---
 Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

 CPU1: stopping
 CPU: 1 PID: 0 Comm: swapper/1 Tainted: G      D
4.10.0-rc2-00001-g100b2fb6bf9c #19
 Hardware name: Generic DT based system
[<c030f914>] (unwind_backtrace) from [<c030c7b0>] (show_stack+0x10/0x14)
[<c030c7b0>] (show_stack) from [<c05eb2a0>] (dump_stack+0x78/0x8c)
[<c05eb2a0>] (dump_stack) from [<c030ea14>] (handle_IPI+0x358/0x368)
[<c030ea14>] (handle_IPI) from [<c03014a4>] (gic_handle_irq+0x88/0x8c)
[<c03014a4>] (gic_handle_irq) from [<c08dce8c>] (__irq_svc+0x6c/0xa8)
Exception stack(0xc02c5f70 to 0xc02c5fb8)
5f60:                                     00000001 00000000 00000000 c0318580
5f80: c02c4000 c1003c78 c1003c2c c0fa1f20 c0afea4c c02c5fc8 00000000 00000000
5fa0: 00000008 c02c5fc0 c0308f98 c0308f9c 60000013 ffffffff
[<c08dce8c>] (__irq_svc) from [<c0308f9c>] (arch_cpu_idle+0x38/0x3c)
[<c0308f9c>] (arch_cpu_idle) from [<c035d24c>] (do_idle+0x170/0x204)
[<c035d24c>] (do_idle) from [<c035d598>] (cpu_startup_entry+0x18/0x1c)
[<c035d598>] (cpu_startup_entry) from [<4030154c>] (0x4030154c)
---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Something hits a brick wall when initializing the hardware breakpoints.

This is pretty tricksy for me to debug, hints welcome...

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-02 14:15   ` Linus Walleij
@ 2017-01-02 14:34     ` Linus Walleij
  2017-01-02 15:00       ` Russell King - ARM Linux
  0 siblings, 1 reply; 21+ messages in thread
From: Linus Walleij @ 2017-01-02 14:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jan 2, 2017 at 3:15 PM, Linus Walleij <linus.walleij@linaro.org> wrote:

> Something hits a brick wall when initializing the hardware breakpoints.

Should be noted that since I'm not using the breakpoints, the dirtyfix is to put

return 0;

in the first line of arch_hw_breakpoint_init() in
arch/arm/kernel/hw_breakpoint.c

I suspect that is not an accepable solution ...

It hangs at PC is at write_wb_reg+0x20c/0x330
Which is c03101dc, and looks like this in objdump -d:

c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>
c0310214:       ee001eb9        mcr     14, 0, r1, cr0, cr9, {5}
c0310218:       eaffffb1        b       c03100e4 <write_wb_reg+0x114>
c031021c:       ee001eb8        mcr     14, 0, r1, cr0, cr8, {5}
c0310220:       eaffffaf        b       c03100e4 <write_wb_reg+0x114>
c0310224:       ee001eb7        mcr     14, 0, r1, cr0, cr7, {5}
c0310228:       eaffffad        b       c03100e4 <write_wb_reg+0x114>
c031022c:       ee001eb6        mcr     14, 0, r1, cr0, cr6, {5}
c0310230:       eaffffab        b       c03100e4 <write_wb_reg+0x114>
c0310234:       ee001eb5        mcr     14, 0, r1, cr0, cr5, {5}
c0310238:       eaffffa9        b       c03100e4 <write_wb_reg+0x114>

No idea if this helps.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-02 14:34     ` Linus Walleij
@ 2017-01-02 15:00       ` Russell King - ARM Linux
  2017-01-02 20:15         ` Linus Walleij
  0 siblings, 1 reply; 21+ messages in thread
From: Russell King - ARM Linux @ 2017-01-02 15:00 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jan 02, 2017 at 03:34:32PM +0100, Linus Walleij wrote:
> in the first line of arch_hw_breakpoint_init() in
> arch/arm/kernel/hw_breakpoint.c
> 
> I suspect that is not an accepable solution ...
> 
> It hangs at PC is at write_wb_reg+0x20c/0x330
> Which is c03101dc, and looks like this in objdump -d:
> 
> c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
> c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>

... and this is several instructions after the address you mention above.
Presumably c03101dc is accessing a higher numbered register?

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-02 15:00       ` Russell King - ARM Linux
@ 2017-01-02 20:15         ` Linus Walleij
  2017-01-03  9:33           ` Mark Rutland
  0 siblings, 1 reply; 21+ messages in thread
From: Linus Walleij @ 2017-01-02 20:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jan 2, 2017 at 4:00 PM, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Mon, Jan 02, 2017 at 03:34:32PM +0100, Linus Walleij wrote:
>> in the first line of arch_hw_breakpoint_init() in
>> arch/arm/kernel/hw_breakpoint.c
>>
>> I suspect that is not an accepable solution ...
>>
>> It hangs at PC is at write_wb_reg+0x20c/0x330
>> Which is c03101dc, and looks like this in objdump -d:
>>
>> c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
>> c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>
>
> ... and this is several instructions after the address you mention above.
> Presumably c03101dc is accessing a higher numbered register?

Ah sorry. It looks like this:

c03101dc:       ee001ed0        mcr     14, 0, r1, cr0, cr0, {6}
c03101e0:       eaffffbf        b       c03100e4 <write_wb_reg+0x114>
c03101e4:       ee001ebf        mcr     14, 0, r1, cr0, cr15, {5}
c03101e8:       eaffffbd        b       c03100e4 <write_wb_reg+0x114>
c03101ec:       ee001ebe        mcr     14, 0, r1, cr0, cr14, {5}
c03101f0:       eaffffbb        b       c03100e4 <write_wb_reg+0x114>
c03101f4:       ee001ebd        mcr     14, 0, r1, cr0, cr13, {5}
c03101f8:       eaffffb9        b       c03100e4 <write_wb_reg+0x114>

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-02 20:15         ` Linus Walleij
@ 2017-01-03  9:33           ` Mark Rutland
  2017-01-04 11:27             ` Sebastian Andrzej Siewior
  2017-01-04 13:56             ` Mark Rutland
  0 siblings, 2 replies; 21+ messages in thread
From: Mark Rutland @ 2017-01-03  9:33 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Mon, Jan 02, 2017 at 09:15:29PM +0100, Linus Walleij wrote:
> On Mon, Jan 2, 2017 at 4:00 PM, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> > On Mon, Jan 02, 2017 at 03:34:32PM +0100, Linus Walleij wrote:
> >> in the first line of arch_hw_breakpoint_init() in
> >> arch/arm/kernel/hw_breakpoint.c
> >>
> >> I suspect that is not an accepable solution ...
> >>
> >> It hangs at PC is at write_wb_reg+0x20c/0x330
> >> Which is c03101dc, and looks like this in objdump -d:
> >>
> >> c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
> >> c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>
> >
> > ... and this is several instructions after the address you mention above.
> > Presumably c03101dc is accessing a higher numbered register?
> 
> Ah sorry. It looks like this:
> 
> c03101dc:       ee001ed0        mcr     14, 0, r1, cr0, cr0, {6}
> c03101e0:       eaffffbf        b       c03100e4 <write_wb_reg+0x114>
> c03101e4:       ee001ebf        mcr     14, 0, r1, cr0, cr15, {5}
> c03101e8:       eaffffbd        b       c03100e4 <write_wb_reg+0x114>
> c03101ec:       ee001ebe        mcr     14, 0, r1, cr0, cr14, {5}
> c03101f0:       eaffffbb        b       c03100e4 <write_wb_reg+0x114>
> c03101f4:       ee001ebd        mcr     14, 0, r1, cr0, cr13, {5}
> c03101f8:       eaffffb9        b       c03100e4 <write_wb_reg+0x114>

FWIW, I was tracking an issue in this area before the holiday.

It looked like DBGPRSR.SPD is set unexpectedly over the default idle
path (i.e. WFI), causing the (otherwise valid) register accesses above
to be handled as undefined.

I haven't looked at the patch in detail, but I guess that it allows idle
to occur between reset_ctrl_regs() and arch_hw_breakpoint_init().

Reading DBGPRSR should clear SPD; but I'm not sure if other debug state
is affected.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-03  9:33           ` Mark Rutland
@ 2017-01-04 11:27             ` Sebastian Andrzej Siewior
  2017-01-04 13:56             ` Mark Rutland
  1 sibling, 0 replies; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-01-04 11:27 UTC (permalink / raw)
  To: linux-arm-kernel

On 2017-01-03 09:33:36 [+0000], Mark Rutland wrote:
> Hi,
Hi,

> It looked like DBGPRSR.SPD is set unexpectedly over the default idle
> path (i.e. WFI), causing the (otherwise valid) register accesses above
> to be handled as undefined.
> 
> I haven't looked at the patch in detail, but I guess that it allows idle
> to occur between reset_ctrl_regs() and arch_hw_breakpoint_init().

This could be possible. The callback is installed and the per-CPU thread
is woken up to handle it. It starts with the lowest CPU and loops for
all present CPUs. While waiting the thread to complete the callback, it
could go idle. See the for_each_present_cpu() loop in
__cpuhp_setup_state() and cpuhp_invoke_ap_callback() kicks the thread
(cpuhp_thread_fun()) and waits for its completion.
So the difference to on_each_cpu() is that the latter performs a busy
loop while waiting for function to complete on other CPUs.

> Reading DBGPRSR should clear SPD; but I'm not sure if other debug state
> is affected.
> 
> Thanks,
> Mark.

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-03  9:33           ` Mark Rutland
  2017-01-04 11:27             ` Sebastian Andrzej Siewior
@ 2017-01-04 13:56             ` Mark Rutland
  2017-01-04 14:32               ` Will Deacon
  2017-01-05 15:26               ` Linus Walleij
  1 sibling, 2 replies; 21+ messages in thread
From: Mark Rutland @ 2017-01-04 13:56 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 03, 2017 at 09:33:36AM +0000, Mark Rutland wrote:
> Hi,
> 
> On Mon, Jan 02, 2017 at 09:15:29PM +0100, Linus Walleij wrote:
> > On Mon, Jan 2, 2017 at 4:00 PM, Russell King - ARM Linux
> > <linux@armlinux.org.uk> wrote:
> > > On Mon, Jan 02, 2017 at 03:34:32PM +0100, Linus Walleij wrote:
> > >> in the first line of arch_hw_breakpoint_init() in
> > >> arch/arm/kernel/hw_breakpoint.c
> > >>
> > >> I suspect that is not an accepable solution ...
> > >>
> > >> It hangs at PC is at write_wb_reg+0x20c/0x330
> > >> Which is c03101dc, and looks like this in objdump -d:
> > >>
> > >> c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
> > >> c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>
> > >
> > > ... and this is several instructions after the address you mention above.
> > > Presumably c03101dc is accessing a higher numbered register?
> > 
> > Ah sorry. It looks like this:
> > 
> > c03101dc:       ee001ed0        mcr     14, 0, r1, cr0, cr0, {6}
> > c03101e0:       eaffffbf        b       c03100e4 <write_wb_reg+0x114>
> > c03101e4:       ee001ebf        mcr     14, 0, r1, cr0, cr15, {5}
> > c03101e8:       eaffffbd        b       c03100e4 <write_wb_reg+0x114>
> > c03101ec:       ee001ebe        mcr     14, 0, r1, cr0, cr14, {5}
> > c03101f0:       eaffffbb        b       c03100e4 <write_wb_reg+0x114>
> > c03101f4:       ee001ebd        mcr     14, 0, r1, cr0, cr13, {5}
> > c03101f8:       eaffffb9        b       c03100e4 <write_wb_reg+0x114>
> 
> FWIW, I was tracking an issue in this area before the holiday.
> 
> It looked like DBGPRSR.SPD is set unexpectedly over the default idle
> path (i.e. WFI), causing the (otherwise valid) register accesses above
> to be handled as undefined.
> 
> I haven't looked at the patch in detail, but I guess that it allows idle
> to occur between reset_ctrl_regs() and arch_hw_breakpoint_init().

I've just reproduced this locally on my dragonboard APQ8060.

It looks like the write_wb_reg() call that's exploding is from
get_max_wp_len(), which we call immediately after registering the
dbg_reset_online callback. Clearing DBGPRSR.SPD before the write_wb_reg() hides
the problem, so I suspect we're seeing the issue I mentioned above -- it just
so happens that we go idle in a new place.

The below hack allows boot to continue, but is not a real fix. I'm not
immediately sure what to do.

Linus, I wasn't able to get ethernet working. Do I need anything on top
of v4.10-rc2 && multi_v7_defconfig?

Thanks,
Mark.

---->8----
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 188180b..a0982ab 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -302,7 +302,7 @@ int hw_breakpoint_slots(int type)
  */
 static u8 get_max_wp_len(void)
 {
-       u32 ctrl_reg;
+       u32 ctrl_reg, val;
        struct arch_hw_breakpoint_ctrl ctrl;
        u8 size = 4;
 
@@ -313,6 +313,9 @@ static u8 get_max_wp_len(void)
        ctrl.len = ARM_BREAKPOINT_LEN_8;
        ctrl_reg = encode_ctrl_reg(ctrl);
 
+       /* HACK: CLEAR SPD */
+       ARM_DBG_READ(c1, c5, 4, val);
+
        write_wb_reg(ARM_BASE_WVR, 0);
        write_wb_reg(ARM_BASE_WCR, ctrl_reg);
        if ((read_wb_reg(ARM_BASE_WCR) & ctrl_reg) == ctrl_reg)

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-04 13:56             ` Mark Rutland
@ 2017-01-04 14:32               ` Will Deacon
  2017-01-05 15:57                 ` Mark Rutland
  2017-01-05 15:26               ` Linus Walleij
  1 sibling, 1 reply; 21+ messages in thread
From: Will Deacon @ 2017-01-04 14:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 04, 2017 at 01:56:44PM +0000, Mark Rutland wrote:
> On Tue, Jan 03, 2017 at 09:33:36AM +0000, Mark Rutland wrote:
> > On Mon, Jan 02, 2017 at 09:15:29PM +0100, Linus Walleij wrote:
> > > On Mon, Jan 2, 2017 at 4:00 PM, Russell King - ARM Linux
> > > <linux@armlinux.org.uk> wrote:
> > > > On Mon, Jan 02, 2017 at 03:34:32PM +0100, Linus Walleij wrote:
> > > >> in the first line of arch_hw_breakpoint_init() in
> > > >> arch/arm/kernel/hw_breakpoint.c
> > > >>
> > > >> I suspect that is not an accepable solution ...
> > > >>
> > > >> It hangs at PC is at write_wb_reg+0x20c/0x330
> > > >> Which is c03101dc, and looks like this in objdump -d:
> > > >>
> > > >> c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
> > > >> c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>
> > > >
> > > > ... and this is several instructions after the address you mention above.
> > > > Presumably c03101dc is accessing a higher numbered register?
> > > 
> > > Ah sorry. It looks like this:
> > > 
> > > c03101dc:       ee001ed0        mcr     14, 0, r1, cr0, cr0, {6}
> > > c03101e0:       eaffffbf        b       c03100e4 <write_wb_reg+0x114>
> > > c03101e4:       ee001ebf        mcr     14, 0, r1, cr0, cr15, {5}
> > > c03101e8:       eaffffbd        b       c03100e4 <write_wb_reg+0x114>
> > > c03101ec:       ee001ebe        mcr     14, 0, r1, cr0, cr14, {5}
> > > c03101f0:       eaffffbb        b       c03100e4 <write_wb_reg+0x114>
> > > c03101f4:       ee001ebd        mcr     14, 0, r1, cr0, cr13, {5}
> > > c03101f8:       eaffffb9        b       c03100e4 <write_wb_reg+0x114>
> > 
> > FWIW, I was tracking an issue in this area before the holiday.
> > 
> > It looked like DBGPRSR.SPD is set unexpectedly over the default idle
> > path (i.e. WFI), causing the (otherwise valid) register accesses above
> > to be handled as undefined.
> > 
> > I haven't looked at the patch in detail, but I guess that it allows idle
> > to occur between reset_ctrl_regs() and arch_hw_breakpoint_init().
> 
> I've just reproduced this locally on my dragonboard APQ8060.
> 
> It looks like the write_wb_reg() call that's exploding is from
> get_max_wp_len(), which we call immediately after registering the
> dbg_reset_online callback. Clearing DBGPRSR.SPD before the write_wb_reg() hides
> the problem, so I suspect we're seeing the issue I mentioned above -- it just
> so happens that we go idle in a new place.

When you say "go idle", are we just executing a WFI, or is the power
controller coming into play and we're actually powering down the non-debug
logic? In the case of the latter, the PM notifier should clear SPD in
reset_ctrl_regs, so this sounds like a hardware bug where the SPD bit is
set unconditionally on WFI.

In that case, this code has always been dodgy -- what happens if you try
to use hardware breakpoints in GDB in the face of WFI-based idle?

> The below hack allows boot to continue, but is not a real fix. I'm not
> immediately sure what to do.

If it's never worked, I suggest we blacklist the MIDR until somebody from
Qualcomm can help us further.

Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-04 13:56             ` Mark Rutland
  2017-01-04 14:32               ` Will Deacon
@ 2017-01-05 15:26               ` Linus Walleij
  2017-01-05 17:14                 ` Mark Rutland
  1 sibling, 1 reply; 21+ messages in thread
From: Linus Walleij @ 2017-01-05 15:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 4, 2017 at 2:56 PM, Mark Rutland <mark.rutland@arm.com> wrote:

> I've just reproduced this locally on my dragonboard APQ8060.

Sweet!

> It looks like the write_wb_reg() call that's exploding is from
> get_max_wp_len(), which we call immediately after registering the
> dbg_reset_online callback. Clearing DBGPRSR.SPD before the write_wb_reg() hides
> the problem, so I suspect we're seeing the issue I mentioned above -- it just
> so happens that we go idle in a new place.
>
> The below hack allows boot to continue, but is not a real fix. I'm not
> immediately sure what to do.

Me neither. But Will's suggestion to simply blacklist this chip might be
best.

> Linus, I wasn't able to get ethernet working. Do I need anything on top
> of v4.10-rc2 && multi_v7_defconfig?

I haven't tried it with multi_v7 but I should probably try that and patch
up the defconfigs, those are probably the root of the problem.

I do this on top of qcom_defconfig:

scripts/config --file .config \
        --enable QCOM_EBI2 \
        --enable ETHERNET \
        --enable NET_VENDOR_SMSC \
        --enable SMSC911X

Maybe you are missing the EBI2 config?

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-04 14:32               ` Will Deacon
@ 2017-01-05 15:57                 ` Mark Rutland
  0 siblings, 0 replies; 21+ messages in thread
From: Mark Rutland @ 2017-01-05 15:57 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jan 04, 2017 at 02:32:06PM +0000, Will Deacon wrote:
> On Wed, Jan 04, 2017 at 01:56:44PM +0000, Mark Rutland wrote:
> > On Tue, Jan 03, 2017 at 09:33:36AM +0000, Mark Rutland wrote:
> > > On Mon, Jan 02, 2017 at 09:15:29PM +0100, Linus Walleij wrote:
> > > > On Mon, Jan 2, 2017 at 4:00 PM, Russell King - ARM Linux
> > > > <linux@armlinux.org.uk> wrote:
> > > > > On Mon, Jan 02, 2017 at 03:34:32PM +0100, Linus Walleij wrote:
> > > > >> in the first line of arch_hw_breakpoint_init() in
> > > > >> arch/arm/kernel/hw_breakpoint.c
> > > > >>
> > > > >> I suspect that is not an accepable solution ...
> > > > >>
> > > > >> It hangs at PC is at write_wb_reg+0x20c/0x330
> > > > >> Which is c03101dc, and looks like this in objdump -d:
> > > > >>
> > > > >> c031020c:       ee001eba        mcr     14, 0, r1, cr0, cr10, {5}
> > > > >> c0310210:       eaffffb3        b       c03100e4 <write_wb_reg+0x114>
> > > > >
> > > > > ... and this is several instructions after the address you mention above.
> > > > > Presumably c03101dc is accessing a higher numbered register?
> > > > 
> > > > Ah sorry. It looks like this:
> > > > 
> > > > c03101dc:       ee001ed0        mcr     14, 0, r1, cr0, cr0, {6}
> > > > c03101e0:       eaffffbf        b       c03100e4 <write_wb_reg+0x114>
> > > > c03101e4:       ee001ebf        mcr     14, 0, r1, cr0, cr15, {5}
> > > > c03101e8:       eaffffbd        b       c03100e4 <write_wb_reg+0x114>
> > > > c03101ec:       ee001ebe        mcr     14, 0, r1, cr0, cr14, {5}
> > > > c03101f0:       eaffffbb        b       c03100e4 <write_wb_reg+0x114>
> > > > c03101f4:       ee001ebd        mcr     14, 0, r1, cr0, cr13, {5}
> > > > c03101f8:       eaffffb9        b       c03100e4 <write_wb_reg+0x114>
> > > 
> > > FWIW, I was tracking an issue in this area before the holiday.
> > > 
> > > It looked like DBGPRSR.SPD is set unexpectedly over the default idle
> > > path (i.e. WFI), causing the (otherwise valid) register accesses above
> > > to be handled as undefined.
> > > 
> > > I haven't looked at the patch in detail, but I guess that it allows idle
> > > to occur between reset_ctrl_regs() and arch_hw_breakpoint_init().
> > 
> > I've just reproduced this locally on my dragonboard APQ8060.
> > 
> > It looks like the write_wb_reg() call that's exploding is from
> > get_max_wp_len(), which we call immediately after registering the
> > dbg_reset_online callback. Clearing DBGPRSR.SPD before the write_wb_reg() hides
> > the problem, so I suspect we're seeing the issue I mentioned above -- it just
> > so happens that we go idle in a new place.
> 
> When you say "go idle", are we just executing a WFI, 

>From my prior experiments, just executing a WFI as we happen to do in
the default cpu_v7_do_idle. I tried passing cpuidle.off=1, but that
didn't help. NOPing the WFI in cpu_v7_do_idle did mask the issue, from
what I recall.

> or is the power controller coming into play and we're actually
> powering down the non-debug logic?

As far as I can see, that isn't happening. We don't save/restore any
other CPU state in the default idle path, and the kernel is otherwise
happy.

> In the case of the latter, the PM notifier should clear SPD in
> reset_ctrl_regs, so this sounds like a hardware bug where the SPD bit is
> set unconditionally on WFI.
> 
> In that case, this code has always been dodgy -- what happens if you try
> to use hardware breakpoints in GDB in the face of WFI-based idle?

The kernel blows up similiarly to Linus's original report when the
kernel tries to program the breakpoint registers.

I also believe this has always been dodgy.

> > The below hack allows boot to continue, but is not a real fix. I'm not
> > immediately sure what to do.
> 
> If it's never worked, I suggest we blacklist the MIDR until somebody from
> Qualcomm can help us further.

I'll see about putting a patch together.

Thanks,
Mark.

> 
> Will

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 15/20] ARM/hw_breakpoint: Convert to hotplug state machine
  2017-01-05 15:26               ` Linus Walleij
@ 2017-01-05 17:14                 ` Mark Rutland
  0 siblings, 0 replies; 21+ messages in thread
From: Mark Rutland @ 2017-01-05 17:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jan 05, 2017 at 04:26:45PM +0100, Linus Walleij wrote:
> On Wed, Jan 4, 2017 at 2:56 PM, Mark Rutland <mark.rutland@arm.com> wrote:
 
> > Linus, I wasn't able to get ethernet working. Do I need anything on top
> > of v4.10-rc2 && multi_v7_defconfig?
> 
> I haven't tried it with multi_v7 but I should probably try that and patch
> up the defconfigs, those are probably the root of the problem.
> 
> I do this on top of qcom_defconfig:
> 
> scripts/config --file .config \
>         --enable QCOM_EBI2 \
>         --enable ETHERNET \
>         --enable NET_VENDOR_SMSC \
>         --enable SMSC911X
> 
> Maybe you are missing the EBI2 config?

That was it, yes! With QCOM_EBI2 atop of multi_v7_defconfig (along with
a hack to the hw_breakpoint code), I can boot to an NFS filesystem.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-01-05 17:14 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20161117183541.8588-1-bigeasy@linutronix.de>
2016-11-17 18:35 ` [PATCH 07/20] pci/xgene-msi: Convert to hotplug state machine Sebastian Andrzej Siewior
2016-11-17 18:35 ` [PATCH 14/20] arm/bL_switcher: " Sebastian Andrzej Siewior
2016-11-17 18:35 ` [PATCH 15/20] ARM/hw_breakpoint: " Sebastian Andrzej Siewior
2016-11-18 12:04   ` Will Deacon
2016-11-18 13:11     ` Thomas Gleixner
2016-11-18 13:29       ` Will Deacon
2016-11-18 13:42         ` Thomas Gleixner
2016-11-18 13:48           ` Will Deacon
2016-11-18 13:59             ` Thomas Gleixner
2016-11-18 14:11               ` Will Deacon
2017-01-02 14:15   ` Linus Walleij
2017-01-02 14:34     ` Linus Walleij
2017-01-02 15:00       ` Russell King - ARM Linux
2017-01-02 20:15         ` Linus Walleij
2017-01-03  9:33           ` Mark Rutland
2017-01-04 11:27             ` Sebastian Andrzej Siewior
2017-01-04 13:56             ` Mark Rutland
2017-01-04 14:32               ` Will Deacon
2017-01-05 15:57                 ` Mark Rutland
2017-01-05 15:26               ` Linus Walleij
2017-01-05 17:14                 ` Mark Rutland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).