LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 04/18] powerpc/512x: Move PHB discovery
From: Oliver O'Halloran @ 2020-11-03  4:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20201103043523.916109-1-oohall@gmail.com>

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
only compile tested
---
 arch/powerpc/platforms/512x/mpc5121_ads.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 6303fbfc4e4f..9d030c2e0004 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -24,21 +24,23 @@
 
 static void __init mpc5121_ads_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
 	printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n");
 	/*
 	 * cpld regs are needed early
 	 */
 	mpc5121_ads_cpld_map();
 
+	mpc512x_setup_arch();
+}
+
+static void __init mpc5121_ads_setup_pci(void)
+{
 #ifdef CONFIG_PCI
+	struct device_node *np;
+
 	for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
 		mpc83xx_add_bridge(np);
 #endif
-
-	mpc512x_setup_arch();
 }
 
 static void __init mpc5121_ads_init_IRQ(void)
@@ -64,6 +66,7 @@ define_machine(mpc5121_ads) {
 	.name			= "MPC5121 ADS",
 	.probe			= mpc5121_ads_probe,
 	.setup_arch		= mpc5121_ads_setup_arch,
+	.discover_phbs		= mpc5121_ads_setup_pci,
 	.init			= mpc512x_init,
 	.init_IRQ		= mpc5121_ads_init_IRQ,
 	.get_irq		= ipic_get_irq,
-- 
2.26.2


^ permalink raw reply related

* [PATCH 03/18] powerpc/maple: Move PHB discovery
From: Oliver O'Halloran @ 2020-11-03  4:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20201103043523.916109-1-oohall@gmail.com>

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/maple/setup.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index f7e66a2005b4..4e9ad5bf3efb 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -179,9 +179,6 @@ static void __init maple_setup_arch(void)
 #ifdef CONFIG_SMP
 	smp_ops = &maple_smp_ops;
 #endif
-	/* Lookup PCI hosts */
-       	maple_pci_init();
-
 	maple_use_rtas_reboot_and_halt_if_present();
 
 	printk(KERN_DEBUG "Using native/NAP idle loop\n");
@@ -351,6 +348,7 @@ define_machine(maple) {
 	.name			= "Maple",
 	.probe			= maple_probe,
 	.setup_arch		= maple_setup_arch,
+	.discover_phbs		= maple_pci_init,
 	.init_IRQ		= maple_init_IRQ,
 	.pci_irq_fixup		= maple_pci_irq_fixup,
 	.pci_get_legacy_ide_irq	= maple_pci_get_legacy_ide_irq,
-- 
2.26.2


^ permalink raw reply related

* [PATCH 02/18] powerpc/{powernv,pseries}: Move PHB discovery
From: Oliver O'Halloran @ 2020-11-03  4:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20201103043523.916109-1-oohall@gmail.com>

Make powernv and pseries use ppc_mc.discover_phbs. These two platforms need
to be done together because they both depends on pci_dn's being created
from the DT. The pci_dn contains a pointer to the relevant pci_controller
so they need to be created after the pci_controller structures are
available, but before  and before PCI devices are scanned. Currently this
ordering is provided by initcalls and the sequence is:

1. PHBs are discovered (setup_arch) (early boot, pre-initcalls)
2. pci_dn are created from the unflattended DT (core initcall)
3. PHBs are scanned pcibios_init() (subsys initcall)

The new ppc_md.discover_phbs() function is also a core_initcall so we can't
guarantee ordering between the creations of pci_controllers and the
creation of pci_dn's which require a pci_controller. We could use the
postcore, or core_sync initcall levels, but it's cleaner to just move the
pci_dn setup into the per-PHB inits which occur inside of .discover_phb()
for these platforms. This brings the boot-time path in line with the PHB
hotplug path that is used for pseries DLPAR operations too.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/kernel/pci_dn.c              | 22 ----------------------
 arch/powerpc/platforms/powernv/pci-ioda.c |  3 +++
 arch/powerpc/platforms/powernv/setup.c    |  4 +---
 arch/powerpc/platforms/pseries/setup.c    |  7 +++++--
 4 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 54e240597fd9..61571ae23953 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -481,28 +481,6 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
 	pci_traverse_device_nodes(dn, add_pdn, phb);
 }
 
-/** 
- * pci_devs_phb_init - Initialize phbs and pci devs under them.
- * 
- * This routine walks over all phb's (pci-host bridges) on the
- * system, and sets up assorted pci-related structures 
- * (including pci info in the device node structs) for each
- * pci device found underneath.  This routine runs once,
- * early in the boot sequence.
- */
-static int __init pci_devs_phb_init(void)
-{
-	struct pci_controller *phb, *tmp;
-
-	/* This must be done first so the device nodes have valid pci info! */
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-		pci_devs_phb_init_dynamic(phb);
-
-	return 0;
-}
-
-core_initcall(pci_devs_phb_init);
-
 static void pci_dev_pdn_setup(struct pci_dev *pdev)
 {
 	struct pci_dn *pdn;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2b4ceb5e6ce4..d6815f03fee3 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3176,6 +3176,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	/* Remove M64 resource if we can't configure it successfully */
 	if (!phb->init_m64 || phb->init_m64(phb))
 		hose->mem_resources[1].flags = 0;
+
+	/* create pci_dn's for DT nodes under this PHB */
+	pci_devs_phb_init_dynamic(hose);
 }
 
 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 9acaa0f131b9..92f5fa827909 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -162,9 +162,6 @@ static void __init pnv_setup_arch(void)
 	/* Initialize SMP */
 	pnv_smp_init();
 
-	/* Setup PCI */
-	pnv_pci_init();
-
 	/* Setup RTC and NVRAM callbacks */
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		opal_nvram_init();
@@ -524,6 +521,7 @@ define_machine(powernv) {
 	.init_IRQ		= pnv_init_IRQ,
 	.show_cpuinfo		= pnv_show_cpuinfo,
 	.get_proc_freq          = pnv_get_proc_freq,
+	.discover_phbs		= pnv_pci_init,
 	.progress		= pnv_progress,
 	.machine_shutdown	= pnv_shutdown,
 	.power_save             = NULL,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 633c45ec406d..e88b30d4b6cd 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -463,7 +463,7 @@ void pseries_little_endian_exceptions(void)
 }
 #endif
 
-static void __init find_and_init_phbs(void)
+static void __init pSeries_discover_phbs(void)
 {
 	struct device_node *node;
 	struct pci_controller *phb;
@@ -481,6 +481,9 @@ static void __init find_and_init_phbs(void)
 		pci_process_bridge_OF_ranges(phb, node, 0);
 		isa_bridge_find_early(phb);
 		phb->controller_ops = pseries_pci_controller_ops;
+
+		/* create pci_dn's for DT nodes under this PHB */
+		pci_devs_phb_init_dynamic(phb);
 	}
 
 	of_node_put(root);
@@ -777,7 +780,6 @@ static void __init pSeries_setup_arch(void)
 
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
-	find_and_init_phbs();
 	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
 
 	pSeries_nvram_init();
@@ -1041,6 +1043,7 @@ define_machine(pseries) {
 	.init_IRQ		= pseries_init_irq,
 	.show_cpuinfo		= pSeries_show_cpuinfo,
 	.log_error		= pSeries_log_error,
+	.discover_phbs		= pSeries_discover_phbs,
 	.pcibios_fixup		= pSeries_final_fixup,
 	.restart		= rtas_restart,
 	.halt			= rtas_halt,
-- 
2.26.2


^ permalink raw reply related

* [PATCH 01/18] powerpc/pci: Add ppc_md.discover_phbs()
From: Oliver O'Halloran @ 2020-11-03  4:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras, Oliver O'Halloran

On many powerpc platforms the discovery and initalisation of
pci_controllers (PHBs) happens inside of setup_arch(). This is very early
in boot (pre-initcalls) and means that we're initialising the PHB long
before many basic kernel services (slab allocator, debugfs, a real ioremap)
are available.

On PowerNV this causes an additional problem since we map the PHB registers
with ioremap(). As of commit d538aadc2718 ("powerpc/ioremap: warn on early
use of ioremap()") a warning is printed because we're using the "incorrect"
API to setup and MMIO mapping in searly boot. The kernel does provide
early_ioremap(), but that is not intended to create long-lived MMIO
mappings and a seperate warning is printed by generic code if
early_ioremap() mappings are "leaked."

This is all fixable with dumb hacks like using early_ioremap() to setup
the initial mapping then replacing it with a real ioremap later on in
boot, but it does raise the question: Why the hell are we setting up the
PHB's this early in boot?

The old and wise claim it's due to "hysterical rasins." Aside from amused
grapes there doesn't appear to be any real reason to maintain the current
behaviour. Already most of the newer embedded platforms perform PHB
discovery in an arch_initcall and between the end of setup_arch() and the
start of initcalls none of the generic kernel code does anything PCI
related. On powerpc scanning PHBs occurs in a subsys_initcall so it should
be possible to move the PHB discovery to a core, postcore or arch initcall.

This patch adds the ppc_md.discover_phbs hook and a core_initcall stub that
calls it. The core_initcalls are the earliest to be called so this will
any possibly issues with dependency between initcalls. This isn't just an
academic issue either since on pseries and PowerNV EEH init occurs in an
arch_initcall and depends on the pci_controllers being available, similarly
the creation of pci_dns occurs at core_initcall_sync (i.e. between core and
postcore initcalls). These problems need to be addressed seperately.

Cc: Paul Mackerras <paulus@samba.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/machdep.h |  3 +++
 arch/powerpc/kernel/pci-common.c   | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 475687f24f4a..d319160d790c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -59,6 +59,9 @@ struct machdep_calls {
 	int		(*pcibios_root_bridge_prepare)(struct pci_host_bridge
 				*bridge);
 
+	/* finds all the pci_controllers present at boot */
+	void 		(*discover_phbs)(void);
+
 	/* To setup PHBs when using automatic OF platform driver for PCI */
 	int		(*pci_setup_phb)(struct pci_controller *host);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index be108616a721..6265e7d1c697 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1625,3 +1625,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+
+
+int __init discover_phbs(void)
+{
+	if (ppc_md.discover_phbs)
+		ppc_md.discover_phbs();
+
+	return 0;
+}
+core_initcall(discover_phbs);
-- 
2.26.2


^ permalink raw reply related

* Re: Kernel panic from malloc() on SUSE 15.1?
From: Michael Ellerman @ 2020-11-03  2:26 UTC (permalink / raw)
  To: Carl Jacobsen, linuxppc-dev
In-Reply-To: <CAKkwB_S6Bs_+5At2aajbQbJg==WE_4NLdhSK=Bj+td67215Htg@mail.gmail.com>

Carl Jacobsen <cjacobsen@storix.com> writes:
> I've got a SUSE 15.1 install (on ppc64le) that kernel panics on a very
> simple
> test program, built in a slightly unusual way.
>
> I'm compiling on SUSE 12, using gcc 4.8.3. I'm linking to a static
> copy of libcrypto.a (from openssl-1.1.1g), built without threads.
> I have a 10 line C test program that compiles and runs fine on the
> SUSE 12 system. If I compile the same program on SUSE 15.1 (with
> gcc 7.4.1), it runs fine on SUSE 15.1.
>
> But, if I run the version that I compiled on SUSE 12, on the SUSE 15.1
> system, the call to RAND_status() gets to a malloc() and then panics.
> (And, of course, if I just compile a call to malloc(), that runs fine
> on both systems.) Here's the test program, it's really just a call to
> RAND_status():
>
>     #include <stdio.h>
>     #include <openssl/rand.h>
>
>     int main(int argc, char **argv)
>     {
>         int has_enough_data = RAND_status();
>         printf("The PRNG %s been seeded with enough data\n",
>                has_enough_data ? "HAS" : "has NOT");
>         return 0;
>     }
>
> openssl is configured/built with:
>     ./config no-shared no-dso no-threads -fPIC -ggdb3 -debug -static
>     make
>
> and the test program is compiled with:
>     gcc -ggdb3 -o rand_test rand_test.c libcrypto.a
>
> The kernel on SUSE 12 is: 3.12.28-4-default
> And glibc is: 2.19
>
> The kernel on SUSE 15.1 is: 4.12.14-197.18-default
> And glibc is: 2.26
>
> In a previous iteration it was panicking in pthread_once(), so
> I compiled openssl without pthreads support, and now it panics
> calling malloc().

What's the panic look like?

cheers

^ permalink raw reply

* [PATCH AUTOSEL 5.4 15/24] scsi: ibmvscsi: Fix potential race after loss of transport
From: Sasha Levin @ 2020-11-03  1:19 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Tyrel Datwyler, linuxppc-dev, linux-scsi,
	Martin K . Petersen
In-Reply-To: <20201103012007.183429-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.ibm.com>

[ Upstream commit 665e0224a3d76f36da40bd9012270fa629aa42ed ]

After a loss of transport due to an adapter migration or crash/disconnect
from the host partner there is a tiny window where we can race adjusting
the request_limit of the adapter. The request limit is atomically
increased/decreased to track the number of inflight requests against the
allowed limit of our VIOS partner.

After a transport loss we set the request_limit to zero to reflect this
state.  However, there is a window where the adapter may attempt to queue a
command because the transport loss event hasn't been fully processed yet
and request_limit is still greater than zero.  The hypercall to send the
event will fail and the error path will increment the request_limit as a
result.  If the adapter processes the transport event prior to this
increment the request_limit becomes out of sync with the adapter state and
can result in SCSI commands being submitted on the now reset connection
prior to an SRP Login resulting in a protocol violation.

Fix this race by protecting request_limit with the host lock when changing
the value via atomic_set() to indicate no transport.

Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 36 +++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index c5711c659b517..1ab0a61e3fb59 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 }
 
+/**
+ * ibmvscsi_set_request_limit - Set the adapter request_limit in response to
+ * an adapter failure, reset, or SRP Login. Done under host lock to prevent
+ * race with SCSI command submission.
+ * @hostdata:	adapter to adjust
+ * @limit:	new request limit
+ */
+static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	atomic_set(&hostdata->request_limit, limit);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+}
+
 /**
  * ibmvscsi_reset_host - Reset the connection to the server
  * @hostdata:	struct ibmvscsi_host_data to reset
@@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
 {
 	scsi_block_requests(hostdata->host);
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
 	purge_requests(hostdata, DID_ERROR);
 	hostdata->action = IBMVSCSI_HOST_ACTION_RESET;
@@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
 			 evt_struct->xfer_iu->srp.login_rej.reason);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	default:
 		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
 			evt_struct->xfer_iu->srp.login_rsp.opcode);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	}
 
@@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 	 * This value is set rather than added to request_limit because
 	 * request_limit could have been set to -1 by this client.
 	 */
-	atomic_set(&hostdata->request_limit,
+	ibmvscsi_set_request_limit(hostdata,
 		   be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
 
 	/* If we had any pending I/Os, kick them */
@@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					 SRP_BUF_FORMAT_INDIRECT);
 
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	/* Start out with a request limit of 0, since this is negotiated in
 	 * the login request we are just sending and login requests always
 	 * get sent by the driver regardless of request_limit.
 	 */
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
@@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		return;
 	case VIOSRP_CRQ_XPORT_EVENT:	/* Hypervisor telling us the connection is closed */
 		scsi_block_requests(hostdata->host);
-		atomic_set(&hostdata->request_limit, 0);
+		ibmvscsi_set_request_limit(hostdata, 0);
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
@@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
 	}
 
 	hostdata->action = IBMVSCSI_HOST_ACTION_NONE;
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	if (rc) {
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		dev_err(hostdata->dev, "error after %s\n", action);
 	}
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	scsi_unblock_requests(hostdata->host);
 }
@@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	init_waitqueue_head(&hostdata->work_wait_q);
 	hostdata->host = host;
 	hostdata->dev = dev;
-	atomic_set(&hostdata->request_limit, -1);
+	ibmvscsi_set_request_limit(hostdata, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
 	if (map_persist_bufs(hostdata)) {
-- 
2.27.0


^ permalink raw reply related

* [PATCH AUTOSEL 5.8 20/29] scsi: ibmvscsi: Fix potential race after loss of transport
From: Sasha Levin @ 2020-11-03  1:19 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Tyrel Datwyler, linuxppc-dev, linux-scsi,
	Martin K . Petersen
In-Reply-To: <20201103011928.183145-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.ibm.com>

[ Upstream commit 665e0224a3d76f36da40bd9012270fa629aa42ed ]

After a loss of transport due to an adapter migration or crash/disconnect
from the host partner there is a tiny window where we can race adjusting
the request_limit of the adapter. The request limit is atomically
increased/decreased to track the number of inflight requests against the
allowed limit of our VIOS partner.

After a transport loss we set the request_limit to zero to reflect this
state.  However, there is a window where the adapter may attempt to queue a
command because the transport loss event hasn't been fully processed yet
and request_limit is still greater than zero.  The hypercall to send the
event will fail and the error path will increment the request_limit as a
result.  If the adapter processes the transport event prior to this
increment the request_limit becomes out of sync with the adapter state and
can result in SCSI commands being submitted on the now reset connection
prior to an SRP Login resulting in a protocol violation.

Fix this race by protecting request_limit with the host lock when changing
the value via atomic_set() to indicate no transport.

Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 36 +++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 14f687e9b1f44..62faeab47d905 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 }
 
+/**
+ * ibmvscsi_set_request_limit - Set the adapter request_limit in response to
+ * an adapter failure, reset, or SRP Login. Done under host lock to prevent
+ * race with SCSI command submission.
+ * @hostdata:	adapter to adjust
+ * @limit:	new request limit
+ */
+static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	atomic_set(&hostdata->request_limit, limit);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+}
+
 /**
  * ibmvscsi_reset_host - Reset the connection to the server
  * @hostdata:	struct ibmvscsi_host_data to reset
@@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
 {
 	scsi_block_requests(hostdata->host);
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
 	purge_requests(hostdata, DID_ERROR);
 	hostdata->action = IBMVSCSI_HOST_ACTION_RESET;
@@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
 			 evt_struct->xfer_iu->srp.login_rej.reason);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	default:
 		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
 			evt_struct->xfer_iu->srp.login_rsp.opcode);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	}
 
@@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 	 * This value is set rather than added to request_limit because
 	 * request_limit could have been set to -1 by this client.
 	 */
-	atomic_set(&hostdata->request_limit,
+	ibmvscsi_set_request_limit(hostdata,
 		   be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
 
 	/* If we had any pending I/Os, kick them */
@@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					 SRP_BUF_FORMAT_INDIRECT);
 
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	/* Start out with a request limit of 0, since this is negotiated in
 	 * the login request we are just sending and login requests always
 	 * get sent by the driver regardless of request_limit.
 	 */
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
@@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		return;
 	case VIOSRP_CRQ_XPORT_EVENT:	/* Hypervisor telling us the connection is closed */
 		scsi_block_requests(hostdata->host);
-		atomic_set(&hostdata->request_limit, 0);
+		ibmvscsi_set_request_limit(hostdata, 0);
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
@@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
 	}
 
 	hostdata->action = IBMVSCSI_HOST_ACTION_NONE;
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	if (rc) {
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		dev_err(hostdata->dev, "error after %s\n", action);
 	}
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	scsi_unblock_requests(hostdata->host);
 }
@@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	init_waitqueue_head(&hostdata->work_wait_q);
 	hostdata->host = host;
 	hostdata->dev = dev;
-	atomic_set(&hostdata->request_limit, -1);
+	ibmvscsi_set_request_limit(hostdata, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
 	if (map_persist_bufs(hostdata)) {
-- 
2.27.0


^ permalink raw reply related

* [PATCH AUTOSEL 5.9 24/35] scsi: ibmvscsi: Fix potential race after loss of transport
From: Sasha Levin @ 2020-11-03  1:18 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Tyrel Datwyler, linuxppc-dev, linux-scsi,
	Martin K . Petersen
In-Reply-To: <20201103011840.182814-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.ibm.com>

[ Upstream commit 665e0224a3d76f36da40bd9012270fa629aa42ed ]

After a loss of transport due to an adapter migration or crash/disconnect
from the host partner there is a tiny window where we can race adjusting
the request_limit of the adapter. The request limit is atomically
increased/decreased to track the number of inflight requests against the
allowed limit of our VIOS partner.

After a transport loss we set the request_limit to zero to reflect this
state.  However, there is a window where the adapter may attempt to queue a
command because the transport loss event hasn't been fully processed yet
and request_limit is still greater than zero.  The hypercall to send the
event will fail and the error path will increment the request_limit as a
result.  If the adapter processes the transport event prior to this
increment the request_limit becomes out of sync with the adapter state and
can result in SCSI commands being submitted on the now reset connection
prior to an SRP Login resulting in a protocol violation.

Fix this race by protecting request_limit with the host lock when changing
the value via atomic_set() to indicate no transport.

Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 36 +++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index b1f3017b6547a..29fcc44be2d57 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 }
 
+/**
+ * ibmvscsi_set_request_limit - Set the adapter request_limit in response to
+ * an adapter failure, reset, or SRP Login. Done under host lock to prevent
+ * race with SCSI command submission.
+ * @hostdata:	adapter to adjust
+ * @limit:	new request limit
+ */
+static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	atomic_set(&hostdata->request_limit, limit);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+}
+
 /**
  * ibmvscsi_reset_host - Reset the connection to the server
  * @hostdata:	struct ibmvscsi_host_data to reset
@@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
 static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
 {
 	scsi_block_requests(hostdata->host);
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
 	purge_requests(hostdata, DID_ERROR);
 	hostdata->action = IBMVSCSI_HOST_ACTION_RESET;
@@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
 			 evt_struct->xfer_iu->srp.login_rej.reason);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	default:
 		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
 			evt_struct->xfer_iu->srp.login_rsp.opcode);
 		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		return;
 	}
 
@@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 	 * This value is set rather than added to request_limit because
 	 * request_limit could have been set to -1 by this client.
 	 */
-	atomic_set(&hostdata->request_limit,
+	ibmvscsi_set_request_limit(hostdata,
 		   be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
 
 	/* If we had any pending I/Os, kick them */
@@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					 SRP_BUF_FORMAT_INDIRECT);
 
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	/* Start out with a request limit of 0, since this is negotiated in
 	 * the login request we are just sending and login requests always
 	 * get sent by the driver regardless of request_limit.
 	 */
-	atomic_set(&hostdata->request_limit, 0);
+	ibmvscsi_set_request_limit(hostdata, 0);
 
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
@@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		return;
 	case VIOSRP_CRQ_XPORT_EVENT:	/* Hypervisor telling us the connection is closed */
 		scsi_block_requests(hostdata->host);
-		atomic_set(&hostdata->request_limit, 0);
+		ibmvscsi_set_request_limit(hostdata, 0);
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
@@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
 	}
 
 	hostdata->action = IBMVSCSI_HOST_ACTION_NONE;
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	if (rc) {
-		atomic_set(&hostdata->request_limit, -1);
+		ibmvscsi_set_request_limit(hostdata, -1);
 		dev_err(hostdata->dev, "error after %s\n", action);
 	}
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
 	scsi_unblock_requests(hostdata->host);
 }
@@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	init_waitqueue_head(&hostdata->work_wait_q);
 	hostdata->host = host;
 	hostdata->dev = dev;
-	atomic_set(&hostdata->request_limit, -1);
+	ibmvscsi_set_request_limit(hostdata, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
 	if (map_persist_bufs(hostdata)) {
-- 
2.27.0


^ permalink raw reply related

* [powerpc:merge] BUILD SUCCESS 09a0972ac14f67d600aa3c80035367a8074e90eb
From: kernel test robot @ 2020-11-03  0:21 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  merge
branch HEAD: 09a0972ac14f67d600aa3c80035367a8074e90eb  powerpc/mm: Reorder variables to match master

elapsed time: 1340m

configs tested: 206
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
c6x                        evmc6457_defconfig
arm                       imx_v6_v7_defconfig
powerpc                      cm5200_defconfig
arm                        magician_defconfig
powerpc                     skiroot_defconfig
sh                        sh7757lcr_defconfig
powerpc                    adder875_defconfig
mips                         bigsur_defconfig
mips                           rs90_defconfig
powerpc                       holly_defconfig
arm                        trizeps4_defconfig
arm                    vt8500_v6_v7_defconfig
powerpc                    amigaone_defconfig
arm                        spear6xx_defconfig
arm                            mmp2_defconfig
ia64                        generic_defconfig
mips                    maltaup_xpa_defconfig
sh                          rsk7269_defconfig
arm                      tct_hammer_defconfig
arm                            xcep_defconfig
sh                   rts7751r2dplus_defconfig
mips                      maltasmvp_defconfig
nios2                         10m50_defconfig
sh                           se7712_defconfig
mips                         mpc30x_defconfig
powerpc                   currituck_defconfig
sh                           se7750_defconfig
arm                         at91_dt_defconfig
arm                              zx_defconfig
sh                           se7343_defconfig
sh                            hp6xx_defconfig
powerpc                      makalu_defconfig
c6x                         dsk6455_defconfig
powerpc                     kilauea_defconfig
h8300                       h8s-sim_defconfig
arm                        realview_defconfig
arm                        multi_v7_defconfig
mips                      fuloong2e_defconfig
arm                        mvebu_v5_defconfig
m68k                        mvme16x_defconfig
mips                        bcm47xx_defconfig
mips                            gpr_defconfig
powerpc                 mpc837x_mds_defconfig
mips                            ar7_defconfig
m68k                            mac_defconfig
mips                      loongson3_defconfig
parisc                generic-64bit_defconfig
powerpc                mpc7448_hpc2_defconfig
sh                              ul2_defconfig
mips                      maltaaprp_defconfig
arc                            hsdk_defconfig
m68k                          sun3x_defconfig
powerpc                     tqm8548_defconfig
um                            kunit_defconfig
arc                     haps_hs_smp_defconfig
m68k                       bvme6000_defconfig
arm                         vf610m4_defconfig
arm                         mv78xx0_defconfig
powerpc                      ppc40x_defconfig
sh                          sdk7780_defconfig
powerpc                       eiger_defconfig
m68k                          multi_defconfig
arm                         socfpga_defconfig
riscv                            allyesconfig
arm                          badge4_defconfig
arm                           sunxi_defconfig
powerpc                      arches_defconfig
sh                            migor_defconfig
arm                          pxa168_defconfig
sh                  sh7785lcr_32bit_defconfig
powerpc                      ppc44x_defconfig
i386                             alldefconfig
powerpc                 mpc832x_rdb_defconfig
powerpc                 mpc8560_ads_defconfig
arm                          iop32x_defconfig
mips                malta_kvm_guest_defconfig
mips                        jmr3927_defconfig
powerpc                 mpc836x_rdk_defconfig
mips                malta_qemu_32r6_defconfig
powerpc                        cell_defconfig
sh                         microdev_defconfig
powerpc                     rainier_defconfig
arm                      footbridge_defconfig
powerpc                      katmai_defconfig
powerpc                 mpc8313_rdb_defconfig
powerpc                      ppc6xx_defconfig
arm                        keystone_defconfig
riscv                          rv32_defconfig
powerpc                       maple_defconfig
powerpc                     tqm8541_defconfig
sh                ecovec24-romimage_defconfig
sh                   sh7770_generic_defconfig
arm                        multi_v5_defconfig
powerpc                     tqm5200_defconfig
powerpc                   lite5200b_defconfig
m68k                        mvme147_defconfig
h8300                     edosk2674_defconfig
sh                           sh2007_defconfig
ia64                             alldefconfig
mips                         cobalt_defconfig
microblaze                    nommu_defconfig
arm                          gemini_defconfig
sparc                       sparc32_defconfig
arm                       aspeed_g4_defconfig
arm                       imx_v4_v5_defconfig
sh                          rsk7264_defconfig
arm                       versatile_defconfig
sh                            titan_defconfig
arm                             rpc_defconfig
c6x                              alldefconfig
powerpc                      pmac32_defconfig
powerpc                     ksi8560_defconfig
powerpc                        icon_defconfig
arm                          pxa3xx_defconfig
arm                       cns3420vb_defconfig
arm                  colibri_pxa270_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20201101
x86_64               randconfig-a003-20201101
x86_64               randconfig-a005-20201101
x86_64               randconfig-a002-20201101
x86_64               randconfig-a006-20201101
x86_64               randconfig-a001-20201101
i386                 randconfig-a004-20201102
i386                 randconfig-a006-20201102
i386                 randconfig-a005-20201102
i386                 randconfig-a001-20201102
i386                 randconfig-a002-20201102
i386                 randconfig-a003-20201102
i386                 randconfig-a004-20201101
i386                 randconfig-a006-20201101
i386                 randconfig-a005-20201101
i386                 randconfig-a001-20201101
i386                 randconfig-a002-20201101
i386                 randconfig-a003-20201101
x86_64               randconfig-a012-20201102
x86_64               randconfig-a015-20201102
x86_64               randconfig-a011-20201102
x86_64               randconfig-a013-20201102
x86_64               randconfig-a014-20201102
x86_64               randconfig-a016-20201102
i386                 randconfig-a013-20201102
i386                 randconfig-a015-20201102
i386                 randconfig-a014-20201102
i386                 randconfig-a016-20201102
i386                 randconfig-a011-20201102
i386                 randconfig-a012-20201102
riscv                    nommu_k210_defconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a004-20201102
x86_64               randconfig-a005-20201102
x86_64               randconfig-a003-20201102
x86_64               randconfig-a002-20201102
x86_64               randconfig-a006-20201102
x86_64               randconfig-a001-20201102
x86_64               randconfig-a012-20201101
x86_64               randconfig-a015-20201101
x86_64               randconfig-a013-20201101
x86_64               randconfig-a011-20201101
x86_64               randconfig-a014-20201101
x86_64               randconfig-a016-20201101

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:fixes-test] BUILD SUCCESS 99f070b62322a4b8c1252952735806d09eb44b68
From: kernel test robot @ 2020-11-03  0:21 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes-test
branch HEAD: 99f070b62322a4b8c1252952735806d09eb44b68  powerpc/smp: Call rcu_cpu_starting() earlier

elapsed time: 1341m

configs tested: 238
configs skipped: 95

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                               defconfig
arm                              allyesconfig
arm64                            allyesconfig
arm                              allmodconfig
arc                              allyesconfig
powerpc                    mvme5100_defconfig
powerpc                       eiger_defconfig
arm                  colibri_pxa270_defconfig
mips                 decstation_r4k_defconfig
mips                           ip22_defconfig
c6x                        evmc6457_defconfig
arm                       imx_v6_v7_defconfig
powerpc                      cm5200_defconfig
arm                        magician_defconfig
powerpc                     skiroot_defconfig
sh                        sh7757lcr_defconfig
powerpc                    adder875_defconfig
mips                         bigsur_defconfig
mips                           rs90_defconfig
powerpc                       holly_defconfig
arm                        trizeps4_defconfig
ia64                                defconfig
arm                    vt8500_v6_v7_defconfig
powerpc                    amigaone_defconfig
arm                        spear6xx_defconfig
arm                            mmp2_defconfig
ia64                        generic_defconfig
mips                    maltaup_xpa_defconfig
arm                       imx_v4_v5_defconfig
nios2                            allyesconfig
arm                      pxa255-idp_defconfig
powerpc                 mpc837x_mds_defconfig
sh                        sh7785lcr_defconfig
sh                          rsk7269_defconfig
arm                      tct_hammer_defconfig
arm                            xcep_defconfig
sh                   rts7751r2dplus_defconfig
mips                      maltasmvp_defconfig
nios2                         10m50_defconfig
sh                           se7712_defconfig
mips                         mpc30x_defconfig
powerpc                   currituck_defconfig
sh                           se7750_defconfig
arm                         at91_dt_defconfig
arm                              zx_defconfig
sh                           se7343_defconfig
sh                            hp6xx_defconfig
powerpc                      makalu_defconfig
sh                          polaris_defconfig
sh                           se7724_defconfig
m68k                       bvme6000_defconfig
arm                         lpc32xx_defconfig
ia64                            zx1_defconfig
powerpc                       maple_defconfig
mips                           ip28_defconfig
arm                         lpc18xx_defconfig
c6x                         dsk6455_defconfig
powerpc                     kilauea_defconfig
arc                 nsimosci_hs_smp_defconfig
powerpc                 mpc832x_mds_defconfig
sparc64                             defconfig
powerpc                        cell_defconfig
arc                          axs101_defconfig
arc                        vdk_hs38_defconfig
arm                           omap1_defconfig
mips                             allyesconfig
mips                      fuloong2e_defconfig
arm                        mvebu_v5_defconfig
powerpc                      ppc6xx_defconfig
m68k                        mvme16x_defconfig
mips                        bcm47xx_defconfig
mips                            gpr_defconfig
alpha                               defconfig
ia64                             allyesconfig
riscv                    nommu_k210_defconfig
powerpc                 mpc832x_rdb_defconfig
c6x                                 defconfig
arm                        oxnas_v6_defconfig
m68k                       m5208evb_defconfig
powerpc                      obs600_defconfig
powerpc                     asp8347_defconfig
mips                         tb0226_defconfig
mips                          ath25_defconfig
arm                     davinci_all_defconfig
mips                     decstation_defconfig
powerpc                 mpc8315_rdb_defconfig
parisc                generic-64bit_defconfig
powerpc                mpc7448_hpc2_defconfig
sh                              ul2_defconfig
mips                      maltaaprp_defconfig
nds32                               defconfig
powerpc                    klondike_defconfig
arm                          pcm027_defconfig
sh                   secureedge5410_defconfig
microblaze                      mmu_defconfig
arc                            hsdk_defconfig
m68k                          sun3x_defconfig
powerpc                     tqm8548_defconfig
um                            kunit_defconfig
arc                     haps_hs_smp_defconfig
arm                         vf610m4_defconfig
arm                         mv78xx0_defconfig
powerpc                      ppc40x_defconfig
sh                          sdk7780_defconfig
m68k                          multi_defconfig
arm                         socfpga_defconfig
riscv                            allyesconfig
arm                          badge4_defconfig
arm                           sunxi_defconfig
xtensa                generic_kc705_defconfig
sh                   sh7770_generic_defconfig
csky                                defconfig
nds32                            alldefconfig
sh                         ecovec24_defconfig
riscv                          rv32_defconfig
powerpc                      arches_defconfig
sh                            migor_defconfig
arm                          pxa168_defconfig
sh                  sh7785lcr_32bit_defconfig
powerpc                      ppc44x_defconfig
i386                             alldefconfig
powerpc                 mpc8560_ads_defconfig
arm                          iop32x_defconfig
mips                malta_kvm_guest_defconfig
mips                        jmr3927_defconfig
powerpc                 mpc836x_rdk_defconfig
mips                malta_qemu_32r6_defconfig
sh                         microdev_defconfig
powerpc                     rainier_defconfig
arm                      footbridge_defconfig
powerpc                      katmai_defconfig
powerpc                    ge_imp3a_defconfig
powerpc                 mpc8313_rdb_defconfig
powerpc                    gamecube_defconfig
powerpc                          allmodconfig
sh                           se7206_defconfig
powerpc                     tqm8541_defconfig
sh                ecovec24-romimage_defconfig
arm                        multi_v5_defconfig
powerpc                     tqm5200_defconfig
powerpc                   lite5200b_defconfig
m68k                        mvme147_defconfig
h8300                     edosk2674_defconfig
sh                           sh2007_defconfig
ia64                             alldefconfig
mips                         cobalt_defconfig
microblaze                    nommu_defconfig
arm                          gemini_defconfig
sparc                       sparc32_defconfig
powerpc                           allnoconfig
arm                       aspeed_g4_defconfig
sh                          rsk7264_defconfig
arm                       versatile_defconfig
sh                     sh7710voipgw_defconfig
sh                            titan_defconfig
arm                             rpc_defconfig
c6x                              alldefconfig
powerpc                      pmac32_defconfig
powerpc                     ksi8560_defconfig
powerpc                        icon_defconfig
mips                           ip27_defconfig
xtensa                    smp_lx200_defconfig
m68k                           sun3_defconfig
ia64                             allmodconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
nds32                             allnoconfig
c6x                              allyesconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allmodconfig
powerpc                          allyesconfig
x86_64               randconfig-a004-20201101
x86_64               randconfig-a003-20201101
x86_64               randconfig-a005-20201101
x86_64               randconfig-a002-20201101
x86_64               randconfig-a006-20201101
x86_64               randconfig-a001-20201101
i386                 randconfig-a004-20201102
i386                 randconfig-a006-20201102
i386                 randconfig-a005-20201102
i386                 randconfig-a001-20201102
i386                 randconfig-a002-20201102
i386                 randconfig-a003-20201102
i386                 randconfig-a004-20201101
i386                 randconfig-a006-20201101
i386                 randconfig-a005-20201101
i386                 randconfig-a001-20201101
i386                 randconfig-a002-20201101
i386                 randconfig-a003-20201101
x86_64               randconfig-a012-20201102
x86_64               randconfig-a015-20201102
x86_64               randconfig-a011-20201102
x86_64               randconfig-a013-20201102
x86_64               randconfig-a014-20201102
x86_64               randconfig-a016-20201102
i386                 randconfig-a013-20201102
i386                 randconfig-a015-20201102
i386                 randconfig-a014-20201102
i386                 randconfig-a016-20201102
i386                 randconfig-a011-20201102
i386                 randconfig-a012-20201102
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a004-20201102
x86_64               randconfig-a005-20201102
x86_64               randconfig-a003-20201102
x86_64               randconfig-a002-20201102
x86_64               randconfig-a006-20201102
x86_64               randconfig-a001-20201102
x86_64               randconfig-a012-20201101
x86_64               randconfig-a015-20201101
x86_64               randconfig-a013-20201101
x86_64               randconfig-a011-20201101
x86_64               randconfig-a014-20201101
x86_64               randconfig-a016-20201101

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:next-test] BUILD SUCCESS 2d83b0f30c1483a556c8aa1f7d891006fffcd5e0
From: kernel test robot @ 2020-11-03  0:21 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: 2d83b0f30c1483a556c8aa1f7d891006fffcd5e0  powerpc: Avoid broken GCC __attribute__((optimize))

elapsed time: 1338m

configs tested: 209
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                               defconfig
arm                              allyesconfig
arm64                            allyesconfig
arm                              allmodconfig
c6x                        evmc6457_defconfig
arm                       imx_v6_v7_defconfig
powerpc                      cm5200_defconfig
arm                        magician_defconfig
powerpc                     skiroot_defconfig
sh                        sh7757lcr_defconfig
powerpc                    adder875_defconfig
mips                         bigsur_defconfig
mips                           rs90_defconfig
powerpc                       holly_defconfig
arm                        trizeps4_defconfig
arm                    vt8500_v6_v7_defconfig
powerpc                    amigaone_defconfig
arm                        spear6xx_defconfig
arm                            mmp2_defconfig
ia64                        generic_defconfig
mips                    maltaup_xpa_defconfig
sh                          rsk7269_defconfig
arm                      tct_hammer_defconfig
arm                            xcep_defconfig
sh                   rts7751r2dplus_defconfig
mips                      maltasmvp_defconfig
nios2                         10m50_defconfig
sh                           se7712_defconfig
mips                         mpc30x_defconfig
powerpc                   currituck_defconfig
sh                           se7750_defconfig
arm                         at91_dt_defconfig
arm                              zx_defconfig
sh                           se7343_defconfig
sh                            hp6xx_defconfig
powerpc                      makalu_defconfig
powerpc                       maple_defconfig
mips                           ip28_defconfig
arm                         lpc18xx_defconfig
c6x                         dsk6455_defconfig
powerpc                     kilauea_defconfig
arc                          axs101_defconfig
arc                        vdk_hs38_defconfig
arm                           omap1_defconfig
arm                         lpc32xx_defconfig
mips                      fuloong2e_defconfig
arm                        mvebu_v5_defconfig
powerpc                      ppc6xx_defconfig
m68k                        mvme16x_defconfig
mips                        bcm47xx_defconfig
mips                            gpr_defconfig
powerpc                 mpc837x_mds_defconfig
alpha                               defconfig
ia64                             allyesconfig
riscv                    nommu_k210_defconfig
powerpc                 mpc832x_rdb_defconfig
c6x                                 defconfig
arm                        oxnas_v6_defconfig
arm                     davinci_all_defconfig
mips                     decstation_defconfig
powerpc                 mpc8315_rdb_defconfig
parisc                generic-64bit_defconfig
powerpc                mpc7448_hpc2_defconfig
sh                              ul2_defconfig
mips                      maltaaprp_defconfig
arc                            hsdk_defconfig
m68k                          sun3x_defconfig
powerpc                     tqm8548_defconfig
um                            kunit_defconfig
arc                     haps_hs_smp_defconfig
m68k                       bvme6000_defconfig
arm                         vf610m4_defconfig
arm                         mv78xx0_defconfig
powerpc                      ppc40x_defconfig
sh                          sdk7780_defconfig
powerpc                       eiger_defconfig
m68k                          multi_defconfig
arm                         socfpga_defconfig
riscv                            allyesconfig
arm                          badge4_defconfig
arm                           sunxi_defconfig
xtensa                generic_kc705_defconfig
powerpc                      obs600_defconfig
sh                   sh7770_generic_defconfig
powerpc                      arches_defconfig
sh                            migor_defconfig
arm                          pxa168_defconfig
sh                  sh7785lcr_32bit_defconfig
powerpc                      ppc44x_defconfig
i386                             alldefconfig
powerpc                 mpc8560_ads_defconfig
arm                          iop32x_defconfig
mips                malta_kvm_guest_defconfig
mips                        jmr3927_defconfig
powerpc                 mpc836x_rdk_defconfig
mips                malta_qemu_32r6_defconfig
powerpc                        cell_defconfig
powerpc                 mpc8313_rdb_defconfig
arm                        keystone_defconfig
riscv                          rv32_defconfig
powerpc                     tqm8541_defconfig
sh                ecovec24-romimage_defconfig
arm                        multi_v5_defconfig
powerpc                     tqm5200_defconfig
powerpc                   lite5200b_defconfig
m68k                        mvme147_defconfig
h8300                     edosk2674_defconfig
sh                           sh2007_defconfig
ia64                             alldefconfig
mips                         cobalt_defconfig
microblaze                    nommu_defconfig
arm                          gemini_defconfig
sparc                       sparc32_defconfig
powerpc                           allnoconfig
arm                       aspeed_g4_defconfig
arm                       imx_v4_v5_defconfig
sh                          rsk7264_defconfig
arm                       versatile_defconfig
sh                            titan_defconfig
arm                             rpc_defconfig
c6x                              alldefconfig
powerpc                      pmac32_defconfig
powerpc                     ksi8560_defconfig
powerpc                        icon_defconfig
arm                          pxa3xx_defconfig
arm                       cns3420vb_defconfig
arm                  colibri_pxa270_defconfig
ia64                             allmodconfig
ia64                                defconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
x86_64               randconfig-a004-20201101
x86_64               randconfig-a003-20201101
x86_64               randconfig-a005-20201101
x86_64               randconfig-a002-20201101
x86_64               randconfig-a006-20201101
x86_64               randconfig-a001-20201101
i386                 randconfig-a004-20201102
i386                 randconfig-a006-20201102
i386                 randconfig-a005-20201102
i386                 randconfig-a001-20201102
i386                 randconfig-a002-20201102
i386                 randconfig-a003-20201102
i386                 randconfig-a004-20201101
i386                 randconfig-a006-20201101
i386                 randconfig-a005-20201101
i386                 randconfig-a001-20201101
i386                 randconfig-a002-20201101
i386                 randconfig-a003-20201101
x86_64               randconfig-a012-20201102
x86_64               randconfig-a015-20201102
x86_64               randconfig-a011-20201102
x86_64               randconfig-a013-20201102
x86_64               randconfig-a014-20201102
x86_64               randconfig-a016-20201102
i386                 randconfig-a013-20201102
i386                 randconfig-a015-20201102
i386                 randconfig-a014-20201102
i386                 randconfig-a016-20201102
i386                 randconfig-a011-20201102
i386                 randconfig-a012-20201102
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a004-20201102
x86_64               randconfig-a005-20201102
x86_64               randconfig-a003-20201102
x86_64               randconfig-a002-20201102
x86_64               randconfig-a006-20201102
x86_64               randconfig-a001-20201102
x86_64               randconfig-a012-20201101
x86_64               randconfig-a015-20201101
x86_64               randconfig-a013-20201101
x86_64               randconfig-a011-20201101
x86_64               randconfig-a014-20201101
x86_64               randconfig-a016-20201101

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v2 2/2] misc: ocxl: config: Rename function attribute description
From: Andrew Donnellan @ 2020-11-03  0:01 UTC (permalink / raw)
  To: Lee Jones, gregkh, arnd; +Cc: Frederic Barrat, linuxppc-dev, linux-kernel
In-Reply-To: <20201102142001.560490-2-lee.jones@linaro.org>

On 3/11/20 1:20 am, Lee Jones wrote:
> Fixes the following W=1 kernel build warning(s):
> 
>   drivers/misc/ocxl/config.c:81: warning: Function parameter or member 'dev' not described in 'get_function_0'
>   drivers/misc/ocxl/config.c:81: warning: Excess function parameter 'device' description in 'get_function_0'
> 
> Cc: Frederic Barrat <fbarrat@linux.ibm.com>
> Cc: Andrew Donnellan <ajd@linux.ibm.com>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>

Thanks!

Acked-by: Andrew Donnellan <ajd@linux.ibm.com>

-- 
Andrew Donnellan              OzLabs, ADL Canberra
ajd@linux.ibm.com             IBM Australia Limited

^ permalink raw reply

* Re: [PATCH net-next 04/15] net: mlx5: Replace in_irq() usage.
From: Saeed Mahameed @ 2020-11-02 21:41 UTC (permalink / raw)
  To: Jakub Kicinski, Leon Romanovsky
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Thomas Gleixner,
	linux-arm-kernel, netdev, linux-wireless, Li Yang, linux-crypto,
	Jon Mason, linuxppc-dev, David S. Miller
In-Reply-To: <20201031095938.3878412e@kicinski-fedora-PC1C0HJN.hsd1.ca.comcast.net>

On Sat, 2020-10-31 at 09:59 -0700, Jakub Kicinski wrote:
> On Tue, 27 Oct 2020 23:54:43 +0100 Sebastian Andrzej Siewior wrote:
> > mlx5_eq_async_int() uses in_irq() to decide whether eq::lock needs
> > to be
> > acquired and released with spin_[un]lock() or the irq
> > saving/restoring
> > variants.
> > 
> > The usage of in_*() in drivers is phased out and Linus clearly
> > requested
> > that code which changes behaviour depending on context should
> > either be
> > seperated or the context be conveyed in an argument passed by the
> > caller,
> > which usually knows the context.
> > 
> > mlx5_eq_async_int() knows the context via the action argument
> > already so
> > using it for the lock variant decision is a straight forward
> > replacement
> > for in_irq().
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Cc: Saeed Mahameed <saeedm@nvidia.com>
> > Cc: Leon Romanovsky <leon@kernel.org>
> > Cc: "David S. Miller" <davem@davemloft.net>
> > Cc: Jakub Kicinski <kuba@kernel.org>
> > Cc: linux-rdma@vger.kernel.org
> 
> Saeed, please pick this up into your tree.

Ack


^ permalink raw reply

* Re: [PATCH v2 net-next 3/3] crypto: caam: Replace in_irq() usage.
From: Horia Geantă @ 2020-11-02 21:13 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, netdev@vger.kernel.org
  Cc: Aymen Sghaier, Herbert Xu, Madalin Bucur, Leo Li,
	linux-crypto@vger.kernel.org, Jakub Kicinski, Thomas Gleixner,
	linuxppc-dev@lists.ozlabs.org, David S. Miller,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20201101232257.3028508-4-bigeasy@linutronix.de>

On 11/2/2020 1:23 AM, Sebastian Andrzej Siewior wrote:
> The driver uses in_irq() + in_serving_softirq() magic to decide if NAPI
> scheduling is required or packet processing.
> 
> The usage of in_*() in drivers is phased out and Linus clearly requested
> that code which changes behaviour depending on context should either be
> separated or the context be conveyed in an argument passed by the caller,
> which usually knows the context.
> 
> Use the `sched_napi' argument passed by the callback. It is set true if
> called from the interrupt handler and NAPI should be scheduled.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Cc: "Horia Geantă" <horia.geanta@nxp.com>
> Cc: Aymen Sghaier <aymen.sghaier@nxp.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Madalin Bucur <madalin.bucur@nxp.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Li Yang <leoyang.li@nxp.com>
> Cc: linux-crypto@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-arm-kernel@lists.infradead.org
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>

Thanks,
Horia

^ permalink raw reply

* Re: [PATCH v2 net-next 1/3] soc/fsl/qbman: Add an argument to signal if NAPI processing is required.
From: Horia Geantă @ 2020-11-02 21:10 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, netdev@vger.kernel.org, Priyanka Jain,
	Vakul Garg
  Cc: Aymen Sghaier, Herbert Xu, Madalin Bucur, Leo Li,
	linux-crypto@vger.kernel.org, Jakub Kicinski, Thomas Gleixner,
	linuxppc-dev@lists.ozlabs.org, David S. Miller,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20201101232257.3028508-2-bigeasy@linutronix.de>

On 11/2/2020 1:23 AM, Sebastian Andrzej Siewior wrote:
> dpaa_eth_napi_schedule() and caam_qi_napi_schedule() schedule NAPI if
> invoked from:
> 
>  - Hard interrupt context
>  - Any context which is not serving soft interrupts
> 
> Any context which is not serving soft interrupts includes hard interrupts
> so the in_irq() check is redundant. caam_qi_napi_schedule() has a comment
> about this:
> 
>         /*
>          * In case of threaded ISR, for RT kernels in_irq() does not return
>          * appropriate value, so use in_serving_softirq to distinguish between
>          * softirq and irq contexts.
>          */
>          if (in_irq() || !in_serving_softirq())
> 
> This has nothing to do with RT. Even on a non RT kernel force threaded
> interrupts run obviously in thread context and therefore in_irq() returns
> false when invoked from the handler.
> 
> The extension of the in_irq() check with !in_serving_softirq() was there
> when the drivers were added, but in the out of tree FSL BSP the original
> condition was in_irq() which got extended due to failures on RT.
> 
Looks like the initial FSL BSP commit adding this check is:
edca0b7a448a ("dpaa_eth: Fix Rx-stall issue in threaded ISR")
https://source.codeaurora.org/external/qoriq/qoriq-yocto-sdk/linux/commit/?h=fsl-sdk-v1.2&id=edca0b7a448ac18ef0a9b1238209b7595d511e19

This was done for dpaa_eth and the same logic was reused in caam.
In the process of upstreaming the development history got lost and
the comment in dpaa_eth was removed.

This was back in 2012 on a v3.0.34 kernel.
Not sure if/how things changed in the meantime, i.e. whether in_irq()
behaviour when called from softirq changed on -rt kernels (assuming this was
the problem Priyanka tried solving).

> The usage of in_xxx() in drivers is phased out and Linus clearly requested
> that code which changes behaviour depending on context should either be
> separated or the context be conveyed in an argument passed by the caller,
> which usually knows the context. Right he is, the above construct is
> clearly showing why.
> 
> The following callchains have been analyzed to end up in
> dpaa_eth_napi_schedule():
> 
> qman_p_poll_dqrr()
>   __poll_portal_fast()
>     fq->cb.dqrr()
>        dpaa_eth_napi_schedule()
> 
> portal_isr()
>   __poll_portal_fast()
>     fq->cb.dqrr()
>        dpaa_eth_napi_schedule()
> 
> Both need to schedule NAPI.
Only the call from interrupt context.

> The crypto part has another code path leading up to this:
>   kill_fq()
>      empty_retired_fq()
>        qman_p_poll_dqrr()
>          __poll_portal_fast()
>             fq->cb.dqrr()
>                dpaa_eth_napi_schedule()
> 
> kill_fq() is called from task context and ends up scheduling NAPI, but
> that's pointless and an unintended side effect of the !in_serving_softirq()
> check.
> 
Correct.

> The code path:
>   caam_qi_poll() -> qman_p_poll_dqrr()
> 
> is invoked from NAPI and I *assume* from crypto's NAPI device and not
> from qbman's NAPI device. I *guess* it is okay to skip scheduling NAPI
> (because this is what happens now) but could be changed if it is wrong
> due to `budget' handling.
> 
Looks good to me.

> Add an argument to __poll_portal_fast() which is true if NAPI needs to be
> scheduled. This requires propagating the value to the caller including
> `qman_cb_dqrr' typedef which is used by the dpaa and the crypto driver.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Cc: "Horia Geantă" <horia.geanta@nxp.com>
> Cc: Aymen Sghaier <aymen.sghaier@nxp.com>
> Cc: Herbert XS <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Madalin Bucur <madalin.bucur@nxp.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Li Yang <leoyang.li@nxp.com>
> Cc: linux-crypto@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-arm-kernel@lists.infradead.org
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>

Thanks,
Horia

^ permalink raw reply

* Kernel panic from malloc() on SUSE 15.1?
From: Carl Jacobsen @ 2020-11-02 20:14 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 1915 bytes --]

I've got a SUSE 15.1 install (on ppc64le) that kernel panics on a very
simple
test program, built in a slightly unusual way.

I'm compiling on SUSE 12, using gcc 4.8.3. I'm linking to a static
copy of libcrypto.a (from openssl-1.1.1g), built without threads.
I have a 10 line C test program that compiles and runs fine on the
SUSE 12 system. If I compile the same program on SUSE 15.1 (with
gcc 7.4.1), it runs fine on SUSE 15.1.

But, if I run the version that I compiled on SUSE 12, on the SUSE 15.1
system, the call to RAND_status() gets to a malloc() and then panics.
(And, of course, if I just compile a call to malloc(), that runs fine
on both systems.) Here's the test program, it's really just a call to
RAND_status():

    #include <stdio.h>
    #include <openssl/rand.h>

    int main(int argc, char **argv)
    {
        int has_enough_data = RAND_status();
        printf("The PRNG %s been seeded with enough data\n",
               has_enough_data ? "HAS" : "has NOT");
        return 0;
    }

openssl is configured/built with:
    ./config no-shared no-dso no-threads -fPIC -ggdb3 -debug -static
    make

and the test program is compiled with:
    gcc -ggdb3 -o rand_test rand_test.c libcrypto.a

The kernel on SUSE 12 is: 3.12.28-4-default
And glibc is: 2.19

The kernel on SUSE 15.1 is: 4.12.14-197.18-default
And glibc is: 2.26

In a previous iteration it was panicking in pthread_once(), so
I compiled openssl without pthreads support, and now it panics
calling malloc().

If I link to the system-supplied libcrypto.so, it works fine, and
running the same tests on x86_64 works fine, it's only ppc64le
that panics, and only running code from the old system on the
new one.

I'm trying to dig further down into this to come up with a standalone
test case, but I'm wondering if anything here stands out as a known
problem, or if someone can point me in the right direction.

Thanks,
Carl Jacobsen

[-- Attachment #2: Type: text/html, Size: 2231 bytes --]

^ permalink raw reply

* Re: [PATCH 20/33] docs: ABI: testing: make the files compatible with ReST output
From: Gautham R Shenoy @ 2020-11-02 15:06 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: Gautham R. Shenoy, Jason A. Donenfeld, Heikki Krogerus,
	Peter Meerwald-Stadler, Petr Mladek, Linux Doc Mailing List,
	Alexander Shishkin, Nayna Jain, Alexandre Belloni, Mimi Zohar,
	Sebastian Reichel, Guenter Roeck, Bruno Meneguele, Vishal Verma,
	Pavel Machek, Hanjun Guo, Mauro Carvalho Chehab, netdev,
	Oleh Kravchenko, Dan Williams, Andrew Donnellan,
	Javier González, Fabrice Gasnier, Stefano Stabellini,
	linux-acpi, Jonathan Corbet, Chunyan Zhang, Mario Limonciello,
	linux-stm32, Lakshmi Ramasubramanian, Ludovic Desroches,
	Pawan Gupta, linux-arm-kernel, Frederic Barrat, Niklas Cassel,
	Len Brown, Juergen Gross, Mika Westerberg, Alexandre Torgue,
	linux-pm, linux-kernel, Richard Cochran, linuxppc-dev,
	Baolin Wang, Lars-Peter Clausen, Dan Murphy, Orson Zhai,
	Philippe Bergheaud, xen-devel, Boris Ostrovsky, Andy Shevchenko,
	Benson Leung, Konstantin Khlebnikov, Jens Axboe, Felipe Balbi,
	Kranthi Kuntala, Martin K. Petersen, linux-mm, Greg Kroah-Hartman,
	linux-usb, Rafael J. Wysocki, Nicolas Ferre, linux-iio,
	Thinh Nguyen, Sergey Senozhatsky, Thomas Gleixner,
	Leonid Maksymchuk, Maxime Coquelin, Johannes Thumshirn,
	Enric Balletbo i Serra, Vineela Tummalapalli, Peter Rosin,
	Jonathan Cameron, Mike Kravetz
In-Reply-To: <4ebaaa0320101479e392ce2db4b62e24fdf15ef1.1603893146.git.mchehab+huawei@kernel.org>

On Wed, Oct 28, 2020 at 03:23:18PM +0100, Mauro Carvalho Chehab wrote:
> From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
> 
> Some files over there won't parse well by Sphinx.
> 

[..snip..]



> diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
> index b555df825447..274c337ec6a9 100644
> --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
> +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
> @@ -151,23 +151,28 @@ Description:
>  		The processor idle states which are available for use have the
>  		following attributes:
> 
> -		name: (RO) Name of the idle state (string).
> +		======== ==== =================================================
> +		name:	 (RO) Name of the idle state (string).
> 
>  		latency: (RO) The latency to exit out of this idle state (in
> -		microseconds).
> +			      microseconds).
> 
> -		power: (RO) The power consumed while in this idle state (in
> -		milliwatts).
> +		power:   (RO) The power consumed while in this idle state (in
> +			      milliwatts).
> 
> -		time: (RO) The total time spent in this idle state (in microseconds).
> +		time:    (RO) The total time spent in this idle state
> +			      (in microseconds).
> 
> -		usage: (RO) Number of times this state was entered (a count).
> +		usage:	 (RO) Number of times this state was entered (a count).
> 
> -		above: (RO) Number of times this state was entered, but the
> -		       observed CPU idle duration was too short for it (a count).
> +		above:	 (RO) Number of times this state was entered, but the
> +			      observed CPU idle duration was too short for it
> +			      (a count).
> 
> -		below: (RO) Number of times this state was entered, but the
> -		       observed CPU idle duration was too long for it (a count).
> +		below: 	 (RO) Number of times this state was entered, but the
> +			      observed CPU idle duration was too long for it
> +			      (a count).
> +		======== ==== =================================================
> 
>  What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
>  Date:		February 2008
> @@ -290,6 +295,7 @@ Description:	Processor frequency boosting control
>  		This switch controls the boost setting for the whole system.
>  		Boosting allows the CPU and the firmware to run at a frequency
>  		beyound it's nominal limit.
> +
>  		More details can be found in
>  		Documentation/admin-guide/pm/cpufreq.rst
> 

The changes to cpuidle states look good to me.


[..snip..]

> @@ -414,30 +434,30 @@ Description:	POWERNV CPUFreq driver's frequency throttle stats directory and
>  		throttle attributes exported in the 'throttle_stats' directory:
> 
>  		- turbo_stat : This file gives the total number of times the max
> -		frequency is throttled to lower frequency in turbo (at and above
> -		nominal frequency) range of frequencies.
> +		  frequency is throttled to lower frequency in turbo (at and above
> +		  nominal frequency) range of frequencies.
> 
>  		- sub_turbo_stat : This file gives the total number of times the
> -		max frequency is throttled to lower frequency in sub-turbo(below
> -		nominal frequency) range of frequencies.
> +		  max frequency is throttled to lower frequency in sub-turbo(below
> +		  nominal frequency) range of frequencies.
> 
>  		- unthrottle : This file gives the total number of times the max
> -		frequency is unthrottled after being throttled.
> +		  frequency is unthrottled after being throttled.
> 
>  		- powercap : This file gives the total number of times the max
> -		frequency is throttled due to 'Power Capping'.
> +		  frequency is throttled due to 'Power Capping'.
> 
>  		- overtemp : This file gives the total number of times the max
> -		frequency is throttled due to 'CPU Over Temperature'.
> +		  frequency is throttled due to 'CPU Over Temperature'.
> 
>  		- supply_fault : This file gives the total number of times the
> -		max frequency is throttled due to 'Power Supply Failure'.
> +		  max frequency is throttled due to 'Power Supply Failure'.
> 
>  		- overcurrent : This file gives the total number of times the
> -		max frequency is throttled due to 'Overcurrent'.
> +		  max frequency is throttled due to 'Overcurrent'.
> 
>  		- occ_reset : This file gives the total number of times the max
> -		frequency is throttled due to 'OCC Reset'.
> +		  frequency is throttled due to 'OCC Reset'.
> 
>  		The sysfs attributes representing different throttle reasons like
>  		powercap, overtemp, supply_fault, overcurrent and occ_reset map to


This hunk for the powernv cpufreq driver looks good to me.
For these two hunks,

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>



^ permalink raw reply

* Re: [PATCH v2 20/39] docs: ABI: testing: make the files compatible with ReST output
From: Mauro Carvalho Chehab @ 2020-11-02 14:42 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Gautham R. Shenoy, Jason A. Donenfeld, Heikki Krogerus,
	Peter Meerwald-Stadler, Petr Mladek, Linux Doc Mailing List,
	Alexander Shishkin, Nayna Jain, Jonathan Cameron,
	Alexandre Belloni, Mimi Zohar, Sebastian Reichel, linux-mm,
	Bruno Meneguele, Vishal Verma, Pavel Machek, Hanjun Guo,
	Guenter Roeck, netdev, Oleh Kravchenko, Dan Williams,
	Andrew Donnellan, Javier González, Fabrice Gasnier,
	Mark Gross, linux-acpi, Jonathan Corbet, Chunyan Zhang,
	Mario Limonciello, linux-stm32, Lakshmi Ramasubramanian,
	Ludovic Desroches, Pawan Gupta, linux-arm-kernel, Tom Rix,
	Frederic Barrat, Niklas Cassel, Len Brown, Juergen Gross,
	linuxppc-dev, Mika Westerberg, Alexandre Torgue, linux-pm,
	linux-kernel, Richard Cochran, Oded Gabbay, Baolin Wang,
	Lars-Peter Clausen, Dan Murphy, Orson Zhai, Philippe Bergheaud,
	xen-devel, Boris Ostrovsky, Andy Shevchenko, Benson Leung,
	Konstantin Khlebnikov, Jens Axboe, Felipe Balbi, Kranthi Kuntala,
	Martin K. Petersen, Johannes Thumshirn, linux-usb,
	Rafael J. Wysocki, Nicolas Ferre, linux-iio, Thinh Nguyen,
	Sergey Senozhatsky, Stefano Stabellini, Thomas Gleixner,
	Leonid Maksymchuk, Maxime Coquelin, Enric Balletbo i Serra,
	Vaibhav Jain, Vineela Tummalapalli, Peter Rosin, Jonathan Cameron,
	Mike Kravetz
In-Reply-To: <20201102124641.GA881895@kroah.com>

Em Mon, 2 Nov 2020 13:46:41 +0100
Greg Kroah-Hartman <gregkh@linuxfoundation.org> escreveu:

> On Mon, Nov 02, 2020 at 12:04:36PM +0100, Fabrice Gasnier wrote:
> > On 10/30/20 11:09 AM, Mauro Carvalho Chehab wrote:  
> > > Em Fri, 30 Oct 2020 10:19:12 +0100
> > > Fabrice Gasnier <fabrice.gasnier@st.com> escreveu:
> > >   
> > >> Hi Mauro,
> > >>
> > >> [...]
> > >>  
> > >>>  
> > >>> +What:		/sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
> > >>> +KernelVersion:	4.12
> > >>> +Contact:	benjamin.gaignard@st.com
> > >>> +Description:
> > >>> +		Reading returns the list possible quadrature modes.
> > >>> +
> > >>> +What:		/sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode
> > >>> +KernelVersion:	4.12
> > >>> +Contact:	benjamin.gaignard@st.com
> > >>> +Description:
> > >>> +		Configure the device counter quadrature modes:
> > >>> +
> > >>> +		channel_A:
> > >>> +			Encoder A input servers as the count input and B as
> > >>> +			the UP/DOWN direction control input.
> > >>> +
> > >>> +		channel_B:
> > >>> +			Encoder B input serves as the count input and A as
> > >>> +			the UP/DOWN direction control input.
> > >>> +
> > >>> +		quadrature:
> > >>> +			Encoder A and B inputs are mixed to get direction
> > >>> +			and count with a scale of 0.25.
> > >>> +    
> > >>  
> > > 
> > > Hi Fabrice,
> > >   
> > >> I just noticed that since Jonathan question in v1.
> > >>
> > >> Above ABI has been moved in the past as discussed in [1]. You can take a
> > >> look at:
> > >> b299d00 IIO: stm32: Remove quadrature related functions from trigger driver
> > >>
> > >> Could you please remove the above chunk ?
> > >>
> > >> With that, for the stm32 part:
> > >> Acked-by: Fabrice Gasnier <fabrice.gasnier@st.com>  
> > > 
> > > 
> > > Hmm... probably those were re-introduced due to a rebase. This
> > > series were originally written about 1,5 years ago.
> > > 
> > > I'll drop those hunks.  
> > 
> > Hi Mauro, Greg,
> > 
> > I just figured out this patch has been applied with above hunk.
> > 
> > This should be dropped: is there a fix on its way already ?
> > (I may have missed it)  
> 
> Can you send a fix for just this hunk?

Hmm...

	$ git grep /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
	Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8:What:                /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
	Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32:What:             /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
	Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:What:               /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available

Even re-doing the changes from 
changeset b299d00420e2 ("IIO: stm32: Remove quadrature related functions from trigger driver")
at Documentation/ABI/testing/sysfs-bus-iio-timer-stm32, there's still
a third duplicate of some of those, as reported by the script:

	$ ./scripts/get_abi.pl validate 2>&1|grep quadra
	Warning: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:117  Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32:14
	Warning: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available is defined 3 times:  Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8:2  Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:111  Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32:8

As in_count_quadrature_mode_available is also defined at:
	Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8:2

The best here seems to have a patch that will also drop the other
duplication of this, probably moving in_count_quadrature_mode_available
to a generic node probably placing it inside 
Documentation/ABI/testing/sysfs-bus-iio.

Comments?

Thanks,
Mauro

PS.: the IIO subsystem is the one that currently has more duplicated
ABI entries:

$ ./scripts/get_abi.pl validate 2>&1|grep iio
Warning: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-icm42600:0  Documentation/ABI/testing/sysfs-bus-iio:394
Warning: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-icm42600:1  Documentation/ABI/testing/sysfs-bus-iio:395
Warning: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-icm42600:2  Documentation/ABI/testing/sysfs-bus-iio:396
Warning: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-icm42600:3  Documentation/ABI/testing/sysfs-bus-iio:397
Warning: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-icm42600:4  Documentation/ABI/testing/sysfs-bus-iio:398
Warning: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-icm42600:5  Documentation/ABI/testing/sysfs-bus-iio:399
Warning: /sys/bus/iio/devices/iio:deviceX/in_count0_preset is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:100  Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32:0
Warning: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:117  Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32:14
Warning: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available is defined 3 times:  Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8:2  Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:111  Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32:8
Warning: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371:0  Documentation/ABI/testing/sysfs-bus-iio:599
Warning: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_powerdown is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371:36  Documentation/ABI/testing/sysfs-bus-iio:588
Warning: /sys/bus/iio/devices/iio:deviceX/out_currentY_raw is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als:43  Documentation/ABI/testing/sysfs-bus-iio-health-afe440x:38
Warning: /sys/bus/iio/devices/iio:deviceX/out_current_heater_raw is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc2010:0  Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc100x:0
Warning: /sys/bus/iio/devices/iio:deviceX/out_current_heater_raw_available is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc2010:1  Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc100x:1
Warning: /sys/bus/iio/devices/iio:deviceX/sensor_sensitivity is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-distance-srf08:0  Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935:8
Warning: /sys/bus/iio/devices/triggerX/sampling_frequency is defined 2 times:  Documentation/ABI/testing/sysfs-bus-iio-timer-stm32:92  Documentation/ABI/testing/sysfs-bus-iio:45

^ permalink raw reply

* Re: [PATCH v2 20/39] docs: ABI: testing: make the files compatible with ReST output
From: Greg Kroah-Hartman @ 2020-11-02 12:46 UTC (permalink / raw)
  To: Fabrice Gasnier
  Cc: Gautham R. Shenoy, Jason A. Donenfeld, Heikki Krogerus,
	Peter Meerwald-Stadler, Petr Mladek, Linux Doc Mailing List,
	Alexander Shishkin, Nayna Jain, Jonathan Cameron,
	Alexandre Belloni, Mimi Zohar, Sebastian Reichel, linux-mm,
	Bruno Meneguele, Vishal Verma, Pavel Machek, Hanjun Guo,
	Guenter Roeck, netdev, Oleh Kravchenko, Dan Williams,
	Andrew Donnellan, Javier González, Lars-Peter Clausen,
	Mark Gross, linux-acpi, Jonathan Corbet, Mauro Carvalho Chehab,
	Chunyan Zhang, Mario Limonciello, linux-stm32,
	Lakshmi Ramasubramanian, Ludovic Desroches, Pawan Gupta,
	linux-arm-kernel, Tom Rix, Frederic Barrat, Niklas Cassel,
	Len Brown, Juergen Gross, linuxppc-dev, Mika Westerberg,
	Alexandre Torgue, linux-pm, linux-kernel, Richard Cochran,
	Oded Gabbay, Baolin Wang, Stefano Stabellini, Dan Murphy,
	Orson Zhai, Philippe Bergheaud, xen-devel, Boris Ostrovsky,
	Andy Shevchenko, Benson Leung, Konstantin Khlebnikov, Jens Axboe,
	Felipe Balbi, Kranthi Kuntala, Martin K. Petersen,
	Johannes Thumshirn, linux-usb, Rafael J. Wysocki, Nicolas Ferre,
	linux-iio, Thinh Nguyen, Sergey Senozhatsky, Thomas Gleixner,
	Leonid Maksymchuk, Maxime Coquelin, Enric Balletbo i Serra,
	Vaibhav Jain, Vineela Tummalapalli, Peter Rosin, Jonathan Cameron,
	Mike Kravetz
In-Reply-To: <cb586ea3-b6e6-4e48-2344-2bd641e5323f@st.com>

On Mon, Nov 02, 2020 at 12:04:36PM +0100, Fabrice Gasnier wrote:
> On 10/30/20 11:09 AM, Mauro Carvalho Chehab wrote:
> > Em Fri, 30 Oct 2020 10:19:12 +0100
> > Fabrice Gasnier <fabrice.gasnier@st.com> escreveu:
> > 
> >> Hi Mauro,
> >>
> >> [...]
> >>
> >>>  
> >>> +What:		/sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
> >>> +KernelVersion:	4.12
> >>> +Contact:	benjamin.gaignard@st.com
> >>> +Description:
> >>> +		Reading returns the list possible quadrature modes.
> >>> +
> >>> +What:		/sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode
> >>> +KernelVersion:	4.12
> >>> +Contact:	benjamin.gaignard@st.com
> >>> +Description:
> >>> +		Configure the device counter quadrature modes:
> >>> +
> >>> +		channel_A:
> >>> +			Encoder A input servers as the count input and B as
> >>> +			the UP/DOWN direction control input.
> >>> +
> >>> +		channel_B:
> >>> +			Encoder B input serves as the count input and A as
> >>> +			the UP/DOWN direction control input.
> >>> +
> >>> +		quadrature:
> >>> +			Encoder A and B inputs are mixed to get direction
> >>> +			and count with a scale of 0.25.
> >>> +  
> >>
> > 
> > Hi Fabrice,
> > 
> >> I just noticed that since Jonathan question in v1.
> >>
> >> Above ABI has been moved in the past as discussed in [1]. You can take a
> >> look at:
> >> b299d00 IIO: stm32: Remove quadrature related functions from trigger driver
> >>
> >> Could you please remove the above chunk ?
> >>
> >> With that, for the stm32 part:
> >> Acked-by: Fabrice Gasnier <fabrice.gasnier@st.com>
> > 
> > 
> > Hmm... probably those were re-introduced due to a rebase. This
> > series were originally written about 1,5 years ago.
> > 
> > I'll drop those hunks.
> 
> Hi Mauro, Greg,
> 
> I just figured out this patch has been applied with above hunk.
> 
> This should be dropped: is there a fix on its way already ?
> (I may have missed it)

Can you send a fix for just this hunk?

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH v2 20/39] docs: ABI: testing: make the files compatible with ReST output
From: Fabrice Gasnier @ 2020-11-02 11:04 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Greg Kroah-Hartman
  Cc: Gautham R. Shenoy, Jason A. Donenfeld, Heikki Krogerus,
	Peter Meerwald-Stadler, Petr Mladek, Linux Doc Mailing List,
	Alexander Shishkin, Nayna Jain, Jonathan Cameron,
	Alexandre Belloni, Mimi Zohar, Sebastian Reichel, linux-mm,
	Bruno Meneguele, Vishal Verma, Pavel Machek, Hanjun Guo,
	Guenter Roeck, netdev, Oleh Kravchenko, Dan Williams,
	Andrew Donnellan, Javier González, Lars-Peter Clausen,
	Mark Gross, linux-acpi, Jonathan Corbet, Chunyan Zhang,
	Mario Limonciello, linux-stm32, Lakshmi Ramasubramanian,
	Ludovic Desroches, Pawan Gupta, linux-arm-kernel, Tom Rix,
	Frederic Barrat, Niklas Cassel, Len Brown, Juergen Gross,
	linuxppc-dev, Mika Westerberg, Alexandre Torgue, linux-pm,
	linux-kernel, Richard Cochran, Oded Gabbay, Baolin Wang,
	Stefano Stabellini, Dan Murphy, Orson Zhai, Philippe Bergheaud,
	xen-devel, Boris Ostrovsky, Andy Shevchenko, Benson Leung,
	Konstantin Khlebnikov, Jens Axboe, Felipe Balbi, Kranthi Kuntala,
	Martin K. Petersen, Johannes Thumshirn, linux-usb,
	Rafael J. Wysocki, Nicolas Ferre, linux-iio, Thinh Nguyen,
	Sergey Senozhatsky, Thomas Gleixner, Leonid Maksymchuk,
	Maxime Coquelin, Enric Balletbo i Serra, Vaibhav Jain,
	Vineela Tummalapalli, Peter Rosin, Jonathan Cameron, Mike Kravetz
In-Reply-To: <20201030110925.3e09d59e@coco.lan>

On 10/30/20 11:09 AM, Mauro Carvalho Chehab wrote:
> Em Fri, 30 Oct 2020 10:19:12 +0100
> Fabrice Gasnier <fabrice.gasnier@st.com> escreveu:
> 
>> Hi Mauro,
>>
>> [...]
>>
>>>  
>>> +What:		/sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
>>> +KernelVersion:	4.12
>>> +Contact:	benjamin.gaignard@st.com
>>> +Description:
>>> +		Reading returns the list possible quadrature modes.
>>> +
>>> +What:		/sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode
>>> +KernelVersion:	4.12
>>> +Contact:	benjamin.gaignard@st.com
>>> +Description:
>>> +		Configure the device counter quadrature modes:
>>> +
>>> +		channel_A:
>>> +			Encoder A input servers as the count input and B as
>>> +			the UP/DOWN direction control input.
>>> +
>>> +		channel_B:
>>> +			Encoder B input serves as the count input and A as
>>> +			the UP/DOWN direction control input.
>>> +
>>> +		quadrature:
>>> +			Encoder A and B inputs are mixed to get direction
>>> +			and count with a scale of 0.25.
>>> +  
>>
> 
> Hi Fabrice,
> 
>> I just noticed that since Jonathan question in v1.
>>
>> Above ABI has been moved in the past as discussed in [1]. You can take a
>> look at:
>> b299d00 IIO: stm32: Remove quadrature related functions from trigger driver
>>
>> Could you please remove the above chunk ?
>>
>> With that, for the stm32 part:
>> Acked-by: Fabrice Gasnier <fabrice.gasnier@st.com>
> 
> 
> Hmm... probably those were re-introduced due to a rebase. This
> series were originally written about 1,5 years ago.
> 
> I'll drop those hunks.

Hi Mauro, Greg,

I just figured out this patch has been applied with above hunk.

This should be dropped: is there a fix on its way already ?
(I may have missed it)

Please advise,
Fabrice
> 
> Thanks!
> Mauro
> 

^ permalink raw reply

* [PATCH 11/11 v2.2] ftrace: Add recording of functions that caused recursion
From: Steven Rostedt @ 2020-11-02 19:23 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Anton Vorontsov, linux-doc, Peter Zijlstra,
	Sebastian Andrzej Siewior, Kamalesh Babulal, James E.J. Bottomley,
	Guo Ren, H. Peter Anvin, live-patching, Miroslav Benes,
	Ingo Molnar, linux-s390, Joe Lawrence, Jonathan Corbet,
	Mauro Carvalho Chehab, Helge Deller, x86, linux-csky,
	Christian Borntraeger, Kees Cook, Vasily Gorbik, Heiko Carstens,
	Jiri Kosina, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
	Tony Luck, linux-parisc, linux-kernel, Masami Hiramatsu,
	Colin Cross, Paul Mackerras, Andrew Morton, linuxppc-dev
In-Reply-To: <20201102124606.72bd89c5@gandalf.local.home>

From c532ff6b048dd4a12943b05c7b8ce30666c587c8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 29 Oct 2020 15:27:06 -0400
Subject: [PATCH] ftrace: Add recording of functions that caused recursion

This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file
"recursed_functions" all the functions that caused recursion while a
callback to the function tracer was running.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Guo Ren <guoren@kernel.org>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-csky@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: live-patching@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---

Changes since v2.1:
  Added EXPORT_SYMBOL_GPL() to ftrace_record_recursion() function

 Documentation/trace/ftrace-uses.rst   |   6 +-
 arch/csky/kernel/probes/ftrace.c      |   2 +-
 arch/parisc/kernel/ftrace.c           |   2 +-
 arch/powerpc/kernel/kprobes-ftrace.c  |   2 +-
 arch/s390/kernel/ftrace.c             |   2 +-
 arch/x86/kernel/kprobes/ftrace.c      |   2 +-
 fs/pstore/ftrace.c                    |   2 +-
 include/linux/trace_recursion.h       |  32 +++-
 kernel/livepatch/patch.c              |   2 +-
 kernel/trace/Kconfig                  |  25 +++
 kernel/trace/Makefile                 |   1 +
 kernel/trace/ftrace.c                 |   4 +-
 kernel/trace/trace_event_perf.c       |   2 +-
 kernel/trace/trace_functions.c        |   2 +-
 kernel/trace/trace_output.c           |   6 +-
 kernel/trace/trace_output.h           |   1 +
 kernel/trace/trace_recursion_record.c | 236 ++++++++++++++++++++++++++
 17 files changed, 309 insertions(+), 20 deletions(-)
 create mode 100644 kernel/trace/trace_recursion_record.c

diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 86cd14b8e126..5981d5691745 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -118,7 +118,7 @@ can help in this regard. If you start your code with:
 
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
@@ -130,7 +130,9 @@ The code in between will be safe to use, even if it ends up calling a
 function that the callback is tracing. Note, on success,
 ftrace_test_recursion_trylock() will disable preemption, and the
 ftrace_test_recursion_unlock() will enable it again (if it was previously
-enabled).
+enabled). The instruction pointer (ip) and its parent (parent_ip) is passed to
+ftrace_test_recursion_trylock() to record where the recursion happened
+(if CONFIG_FTRACE_RECORD_RECURSION is set).
 
 Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops
 (as explained below), then a helper trampoline will be used to test
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index 5eb2604fdf71..f30b179924ef 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -18,7 +18,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 4b1fdf15662c..8b0ed7c5a4ab 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -210,7 +210,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 5df8d50c65ae..fdfee39938ea 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 	struct kprobe_ctlblk *kcb;
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(nip, parent_nip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 88466d7fb6b2..a1556333d481 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -204,7 +204,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index a40a6cdfcca3..954d930a7127 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe_ctlblk *kcb;
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 816210fc5d3a..adb0935eb062 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -41,7 +41,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
 	if (unlikely(oops_in_progress))
 		return;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index ac3d73484cb2..1cba5fe8777a 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -142,7 +142,28 @@ static __always_inline int trace_get_context_bit(void)
 			pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
 }
 
-static __always_inline int trace_test_and_set_recursion(int start, int max)
+#ifdef CONFIG_FTRACE_RECORD_RECURSION
+extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
+/*
+* The paranoid_test check can cause dropped reports (unlikely), but
+* if the recursion is common, it will likely still be recorded later.
+* But the paranoid_test is needed to make sure we don't crash.
+*/
+# define do_ftrace_record_recursion(ip, pip)				\
+	do {								\
+		static atomic_t paranoid_test;				\
+		if (!atomic_read(&paranoid_test)) {			\
+			atomic_inc(&paranoid_test);			\
+			ftrace_record_recursion(ip, pip);		\
+			atomic_dec(&paranoid_test);			\
+		}							\
+	} while (0)
+#else
+# define do_ftrace_record_recursion(ip, pip)	do { } while (0)
+#endif
+
+static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
+							int start, int max)
 {
 	unsigned int val = current->trace_recursion;
 	int bit;
@@ -158,8 +179,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
 		 * a switch between contexts. Allow for a single recursion.
 		 */
 		bit = TRACE_TRANSITION_BIT;
-		if (trace_recursion_test(bit))
+		if (trace_recursion_test(bit)) {
+			do_ftrace_record_recursion(ip, pip);
 			return -1;
+		}
 		trace_recursion_set(bit);
 		barrier();
 		return bit + 1;
@@ -199,9 +222,10 @@ static __always_inline void trace_clear_recursion(int bit)
  * Returns: -1 if a recursion happened.
  *           >= 0 if no recursion
  */
-static __always_inline int ftrace_test_recursion_trylock(void)
+static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
+							 unsigned long parent_ip)
 {
-	return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+	return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
 }
 
 /**
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 15480bf3ce88..875c5dbbdd33 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -49,7 +49,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	ops = container_of(fops, struct klp_ops, fops);
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (WARN_ON_ONCE(bit < 0))
 		return;
 	/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a4020c0b4508..9b11c096d139 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -727,6 +727,31 @@ config TRACE_EVAL_MAP_FILE
 
 	If unsure, say N.
 
+config FTRACE_RECORD_RECURSION
+	bool "Record functions that recurse in function tracing"
+	depends on FUNCTION_TRACER
+	help
+	  All callbacks that attach to the function tracing have some sort
+	  of protection against recursion. Even though the protection exists,
+	  it adds overhead. This option will create a file in the tracefs
+	  file system called "recursed_functions" that will list the functions
+	  that triggered a recursion.
+
+	  This will add more overhead to cases that have recursion.
+
+	  If unsure, say N
+
+config FTRACE_RECORD_RECURSION_SIZE
+	int "Max number of recursed functions to record"
+	default	128
+	depends on FTRACE_RECORD_RECURSION
+	help
+	  This defines the limit of number of functions that can be
+	  listed in the "recursed_functions" file, that lists all
+	  the functions that caused a recursion to happen.
+	  This file can be reset, but the limit can not change in
+	  size at runtime.
+
 config GCOV_PROFILE_FTRACE
 	bool "Enable GCOV profiling on ftrace subsystem"
 	depends on GCOV_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e153be351548..7e44cea89fdc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
 obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
+obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39f2bba89b76..03aad2b5cd5e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6918,7 +6918,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	struct ftrace_ops *op;
 	int bit;
 
-	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
 	if (bit < 0)
 		return;
 
@@ -6993,7 +6993,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 {
 	int bit;
 
-	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
 	if (bit < 0)
 		return;
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a2b9fddb8148..1b202e28dfaa 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -447,7 +447,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 	if ((unsigned long)ops->private != smp_processor_id())
 		return;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 89c414ce1388..646eda6c44a5 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -141,7 +141,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (unlikely(!tr->function_enabled))
 		return;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 000e9dc224c6..92b1575ae0ca 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name)
 }
 #endif /* CONFIG_KRETPROBES */
 
-static void
-seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
+void
+trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
 {
 #ifdef CONFIG_KALLSYMS
 	char str[KSYM_SYMBOL_LEN];
@@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 		goto out;
 	}
 
-	seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
+	trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
 
 	if (sym_flags & TRACE_ITER_SYM_ADDR)
 		trace_seq_printf(s, " <" IP_FMT ">", ip);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 2f742b74e7e6..4c954636caf0 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -16,6 +16,7 @@ extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
 
+extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
 extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c
new file mode 100644
index 000000000000..b2edac1fe156
--- /dev/null
+++ b/kernel/trace/trace_recursion_record.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+
+struct recursed_functions {
+	unsigned long		ip;
+	unsigned long		parent_ip;
+};
+
+static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE];
+static atomic_t nr_records;
+
+/*
+ * Cache the last found function. Yes, updates to this is racey, but
+ * so is memory cache ;-)
+ */
+static unsigned long cached_function;
+
+void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
+{
+	int index = 0;
+	int i;
+	unsigned long old;
+
+ again:
+	/* First check the last one recorded */
+	if (ip == cached_function)
+		return;
+
+	i = atomic_read(&nr_records);
+	/* nr_records is -1 when clearing records */
+	smp_mb__after_atomic();
+	if (i < 0)
+		return;
+
+	/*
+	 * If there's two writers and this writer comes in second,
+	 * the cmpxchg() below to update the ip will fail. Then this
+	 * writer will try again. It is possible that index will now
+	 * be greater than nr_records. This is because the writer
+	 * that succeeded has not updated the nr_records yet.
+	 * This writer could keep trying again until the other writer
+	 * updates nr_records. But if the other writer takes an
+	 * interrupt, and that interrupt locks up that CPU, we do
+	 * not want this CPU to lock up due to the recursion protection,
+	 * and have a bug report showing this CPU as the cause of
+	 * locking up the computer. To not lose this record, this
+	 * writer will simply use the next position to update the
+	 * recursed_functions, and it will update the nr_records
+	 * accordingly.
+	 */
+	if (index < i)
+		index = i;
+	if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
+		return;
+
+	for (i = index - 1; i >= 0; i--) {
+		if (recursed_functions[i].ip == ip) {
+			cached_function = ip;
+			return;
+		}
+	}
+
+	cached_function = ip;
+
+	/*
+	 * We only want to add a function if it hasn't been added before.
+	 * Add to the current location before incrementing the count.
+	 * If it fails to add, then increment the index (save in i)
+	 * and try again.
+	 */
+	old = cmpxchg(&recursed_functions[index].ip, 0, ip);
+	if (old != 0) {
+		/* Did something else already added this for us? */
+		if (old == ip)
+			return;
+		/* Try the next location (use i for the next index) */
+		index++;
+		goto again;
+	}
+
+	recursed_functions[index].parent_ip = parent_ip;
+
+	/*
+	 * It's still possible that we could race with the clearing
+	 *    CPU0                                    CPU1
+	 *    ----                                    ----
+	 *                                       ip = func
+	 *  nr_records = -1;
+	 *  recursed_functions[0] = 0;
+	 *                                       i = -1
+	 *                                       if (i < 0)
+	 *  nr_records = 0;
+	 *  (new recursion detected)
+	 *      recursed_functions[0] = func
+	 *                                            cmpxchg(recursed_functions[0],
+	 *                                                    func, 0)
+	 *
+	 * But the worse that could happen is that we get a zero in
+	 * the recursed_functions array, and it's likely that "func" will
+	 * be recorded again.
+	 */
+	i = atomic_read(&nr_records);
+	smp_mb__after_atomic();
+	if (i < 0)
+		cmpxchg(&recursed_functions[index].ip, ip, 0);
+	else if (i <= index)
+		atomic_cmpxchg(&nr_records, i, index + 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_record_recursion);
+
+static DEFINE_MUTEX(recursed_function_lock);
+static struct trace_seq *tseq;
+
+static void *recursed_function_seq_start(struct seq_file *m, loff_t *pos)
+{
+	void *ret = NULL;
+	int index;
+
+	mutex_lock(&recursed_function_lock);
+	index = atomic_read(&nr_records);
+	if (*pos < index) {
+		ret = &recursed_functions[*pos];
+	}
+
+	tseq = kzalloc(sizeof(*tseq), GFP_KERNEL);
+	if (!tseq)
+		return ERR_PTR(-ENOMEM);
+
+	trace_seq_init(tseq);
+
+	return ret;
+}
+
+static void *recursed_function_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	int index;
+	int p;
+
+	index = atomic_read(&nr_records);
+	p = ++(*pos);
+
+	return p < index ? &recursed_functions[p] : NULL;
+}
+
+static void recursed_function_seq_stop(struct seq_file *m, void *v)
+{
+	kfree(tseq);
+	mutex_unlock(&recursed_function_lock);
+}
+
+static int recursed_function_seq_show(struct seq_file *m, void *v)
+{
+	struct recursed_functions *record = v;
+	int ret = 0;
+
+	if (record) {
+		trace_seq_print_sym(tseq, record->parent_ip, true);
+		trace_seq_puts(tseq, ":\t");
+		trace_seq_print_sym(tseq, record->ip, true);
+		trace_seq_putc(tseq, '\n');
+		ret = trace_print_seq(m, tseq);
+	}
+
+	return ret;
+}
+
+static const struct seq_operations recursed_function_seq_ops = {
+	.start  = recursed_function_seq_start,
+	.next   = recursed_function_seq_next,
+	.stop   = recursed_function_seq_stop,
+	.show   = recursed_function_seq_show
+};
+
+static int recursed_function_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	mutex_lock(&recursed_function_lock);
+	/* If this file was opened for write, then erase contents */
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		/* disable updating records */
+		atomic_set(&nr_records, -1);
+		smp_mb__after_atomic();
+		memset(recursed_functions, 0, sizeof(recursed_functions));
+		smp_wmb();
+		/* enable them again */
+		atomic_set(&nr_records, 0);
+	}
+	if (file->f_mode & FMODE_READ)
+		ret = seq_open(file, &recursed_function_seq_ops);
+	mutex_unlock(&recursed_function_lock);
+
+	return ret;
+}
+
+static ssize_t recursed_function_write(struct file *file,
+				       const char __user *buffer,
+				       size_t count, loff_t *ppos)
+{
+	return count;
+}
+
+static int recursed_function_release(struct inode *inode, struct file *file)
+{
+	if (file->f_mode & FMODE_READ)
+		seq_release(inode, file);
+	return 0;
+}
+
+static const struct file_operations recursed_functions_fops = {
+	.open           = recursed_function_open,
+	.write		= recursed_function_write,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = recursed_function_release,
+};
+
+__init static int create_recursed_functions(void)
+{
+	struct dentry *dentry;
+
+	dentry = trace_create_file("recursed_functions", 0644, NULL, NULL,
+				   &recursed_functions_fops);
+	if (!dentry)
+		pr_warn("WARNING: Failed to create recursed_functions\n");
+	return 0;
+}
+
+fs_initcall(create_recursed_functions);
-- 
2.25.4


^ permalink raw reply related

* [PATCH 11/11 v2.1] ftrace: Add recording of functions that caused recursion
From: Steven Rostedt @ 2020-11-02 19:14 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Anton Vorontsov, linux-doc, Peter Zijlstra,
	Sebastian Andrzej Siewior, Kamalesh Babulal, James E.J. Bottomley,
	Guo Ren, H. Peter Anvin, live-patching, Miroslav Benes,
	Ingo Molnar, linux-s390, Joe Lawrence, Jonathan Corbet,
	Mauro Carvalho Chehab, Helge Deller, x86, linux-csky,
	Christian Borntraeger, Kees Cook, Vasily Gorbik, Heiko Carstens,
	Jiri Kosina, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
	Tony Luck, linux-parisc, linux-kernel, Masami Hiramatsu,
	Colin Cross, Paul Mackerras, Andrew Morton, linuxppc-dev
In-Reply-To: <20201102123721.4fcce2cb@gandalf.local.home>

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file
"recursed_functions" all the functions that caused recursion while a
callback to the function tracer was running.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Guo Ren <guoren@kernel.org>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-csky@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: live-patching@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/ftrace-uses.rst   |   6 +-
 arch/csky/kernel/probes/ftrace.c      |   2 +-
 arch/parisc/kernel/ftrace.c           |   2 +-
 arch/powerpc/kernel/kprobes-ftrace.c  |   2 +-
 arch/s390/kernel/ftrace.c             |   2 +-
 arch/x86/kernel/kprobes/ftrace.c      |   2 +-
 fs/pstore/ftrace.c                    |   2 +-
 include/linux/trace_recursion.h       |  32 +++-
 kernel/livepatch/patch.c              |   2 +-
 kernel/trace/Kconfig                  |  25 +++
 kernel/trace/Makefile                 |   1 +
 kernel/trace/ftrace.c                 |   4 +-
 kernel/trace/trace_event_perf.c       |   2 +-
 kernel/trace/trace_functions.c        |   2 +-
 kernel/trace/trace_output.c           |   6 +-
 kernel/trace/trace_output.h           |   1 +
 kernel/trace/trace_recursion_record.c | 235 ++++++++++++++++++++++++++
 17 files changed, 308 insertions(+), 20 deletions(-)
 create mode 100644 kernel/trace/trace_recursion_record.c

diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 86cd14b8e126..5981d5691745 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -118,7 +118,7 @@ can help in this regard. If you start your code with:
 
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
@@ -130,7 +130,9 @@ The code in between will be safe to use, even if it ends up calling a
 function that the callback is tracing. Note, on success,
 ftrace_test_recursion_trylock() will disable preemption, and the
 ftrace_test_recursion_unlock() will enable it again (if it was previously
-enabled).
+enabled). The instruction pointer (ip) and its parent (parent_ip) is passed to
+ftrace_test_recursion_trylock() to record where the recursion happened
+(if CONFIG_FTRACE_RECORD_RECURSION is set).
 
 Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops
 (as explained below), then a helper trampoline will be used to test
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index 5eb2604fdf71..f30b179924ef 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -18,7 +18,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 4b1fdf15662c..8b0ed7c5a4ab 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -210,7 +210,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 5df8d50c65ae..fdfee39938ea 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 	struct kprobe_ctlblk *kcb;
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(nip, parent_nip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 88466d7fb6b2..a1556333d481 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -204,7 +204,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index a40a6cdfcca3..954d930a7127 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe_ctlblk *kcb;
 	int bit;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 816210fc5d3a..adb0935eb062 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -41,7 +41,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
 	if (unlikely(oops_in_progress))
 		return;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index ac3d73484cb2..1cba5fe8777a 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -142,7 +142,28 @@ static __always_inline int trace_get_context_bit(void)
 			pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
 }
 
-static __always_inline int trace_test_and_set_recursion(int start, int max)
+#ifdef CONFIG_FTRACE_RECORD_RECURSION
+extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
+/*
+* The paranoid_test check can cause dropped reports (unlikely), but
+* if the recursion is common, it will likely still be recorded later.
+* But the paranoid_test is needed to make sure we don't crash.
+*/
+# define do_ftrace_record_recursion(ip, pip)				\
+	do {								\
+		static atomic_t paranoid_test;				\
+		if (!atomic_read(&paranoid_test)) {			\
+			atomic_inc(&paranoid_test);			\
+			ftrace_record_recursion(ip, pip);		\
+			atomic_dec(&paranoid_test);			\
+		}							\
+	} while (0)
+#else
+# define do_ftrace_record_recursion(ip, pip)	do { } while (0)
+#endif
+
+static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
+							int start, int max)
 {
 	unsigned int val = current->trace_recursion;
 	int bit;
@@ -158,8 +179,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
 		 * a switch between contexts. Allow for a single recursion.
 		 */
 		bit = TRACE_TRANSITION_BIT;
-		if (trace_recursion_test(bit))
+		if (trace_recursion_test(bit)) {
+			do_ftrace_record_recursion(ip, pip);
 			return -1;
+		}
 		trace_recursion_set(bit);
 		barrier();
 		return bit + 1;
@@ -199,9 +222,10 @@ static __always_inline void trace_clear_recursion(int bit)
  * Returns: -1 if a recursion happened.
  *           >= 0 if no recursion
  */
-static __always_inline int ftrace_test_recursion_trylock(void)
+static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
+							 unsigned long parent_ip)
 {
-	return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+	return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
 }
 
 /**
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 15480bf3ce88..875c5dbbdd33 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -49,7 +49,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	ops = container_of(fops, struct klp_ops, fops);
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (WARN_ON_ONCE(bit < 0))
 		return;
 	/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a4020c0b4508..9b11c096d139 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -727,6 +727,31 @@ config TRACE_EVAL_MAP_FILE
 
 	If unsure, say N.
 
+config FTRACE_RECORD_RECURSION
+	bool "Record functions that recurse in function tracing"
+	depends on FUNCTION_TRACER
+	help
+	  All callbacks that attach to the function tracing have some sort
+	  of protection against recursion. Even though the protection exists,
+	  it adds overhead. This option will create a file in the tracefs
+	  file system called "recursed_functions" that will list the functions
+	  that triggered a recursion.
+
+	  This will add more overhead to cases that have recursion.
+
+	  If unsure, say N
+
+config FTRACE_RECORD_RECURSION_SIZE
+	int "Max number of recursed functions to record"
+	default	128
+	depends on FTRACE_RECORD_RECURSION
+	help
+	  This defines the limit of number of functions that can be
+	  listed in the "recursed_functions" file, that lists all
+	  the functions that caused a recursion to happen.
+	  This file can be reset, but the limit can not change in
+	  size at runtime.
+
 config GCOV_PROFILE_FTRACE
 	bool "Enable GCOV profiling on ftrace subsystem"
 	depends on GCOV_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e153be351548..7e44cea89fdc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
 obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
+obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39f2bba89b76..03aad2b5cd5e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6918,7 +6918,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	struct ftrace_ops *op;
 	int bit;
 
-	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
 	if (bit < 0)
 		return;
 
@@ -6993,7 +6993,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 {
 	int bit;
 
-	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
 	if (bit < 0)
 		return;
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a2b9fddb8148..1b202e28dfaa 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -447,7 +447,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 	if ((unsigned long)ops->private != smp_processor_id())
 		return;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 89c414ce1388..646eda6c44a5 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -141,7 +141,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (unlikely(!tr->function_enabled))
 		return;
 
-	bit = ftrace_test_recursion_trylock();
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 000e9dc224c6..92b1575ae0ca 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name)
 }
 #endif /* CONFIG_KRETPROBES */
 
-static void
-seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
+void
+trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
 {
 #ifdef CONFIG_KALLSYMS
 	char str[KSYM_SYMBOL_LEN];
@@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 		goto out;
 	}
 
-	seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
+	trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
 
 	if (sym_flags & TRACE_ITER_SYM_ADDR)
 		trace_seq_printf(s, " <" IP_FMT ">", ip);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 2f742b74e7e6..4c954636caf0 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -16,6 +16,7 @@ extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
 
+extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
 extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c
new file mode 100644
index 000000000000..a1859843781b
--- /dev/null
+++ b/kernel/trace/trace_recursion_record.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+
+struct recursed_functions {
+	unsigned long		ip;
+	unsigned long		parent_ip;
+};
+
+static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE];
+static atomic_t nr_records;
+
+/*
+ * Cache the last found function. Yes, updates to this is racey, but
+ * so is memory cache ;-)
+ */
+static unsigned long cached_function;
+
+void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
+{
+	int index = 0;
+	int i;
+	unsigned long old;
+
+ again:
+	/* First check the last one recorded */
+	if (ip == cached_function)
+		return;
+
+	i = atomic_read(&nr_records);
+	/* nr_records is -1 when clearing records */
+	smp_mb__after_atomic();
+	if (i < 0)
+		return;
+
+	/*
+	 * If there's two writers and this writer comes in second,
+	 * the cmpxchg() below to update the ip will fail. Then this
+	 * writer will try again. It is possible that index will now
+	 * be greater than nr_records. This is because the writer
+	 * that succeeded has not updated the nr_records yet.
+	 * This writer could keep trying again until the other writer
+	 * updates nr_records. But if the other writer takes an
+	 * interrupt, and that interrupt locks up that CPU, we do
+	 * not want this CPU to lock up due to the recursion protection,
+	 * and have a bug report showing this CPU as the cause of
+	 * locking up the computer. To not lose this record, this
+	 * writer will simply use the next position to update the
+	 * recursed_functions, and it will update the nr_records
+	 * accordingly.
+	 */
+	if (index < i)
+		index = i;
+	if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
+		return;
+
+	for (i = index - 1; i >= 0; i--) {
+		if (recursed_functions[i].ip == ip) {
+			cached_function = ip;
+			return;
+		}
+	}
+
+	cached_function = ip;
+
+	/*
+	 * We only want to add a function if it hasn't been added before.
+	 * Add to the current location before incrementing the count.
+	 * If it fails to add, then increment the index (save in i)
+	 * and try again.
+	 */
+	old = cmpxchg(&recursed_functions[index].ip, 0, ip);
+	if (old != 0) {
+		/* Did something else already added this for us? */
+		if (old == ip)
+			return;
+		/* Try the next location (use i for the next index) */
+		index++;
+		goto again;
+	}
+
+	recursed_functions[index].parent_ip = parent_ip;
+
+	/*
+	 * It's still possible that we could race with the clearing
+	 *    CPU0                                    CPU1
+	 *    ----                                    ----
+	 *                                       ip = func
+	 *  nr_records = -1;
+	 *  recursed_functions[0] = 0;
+	 *                                       i = -1
+	 *                                       if (i < 0)
+	 *  nr_records = 0;
+	 *  (new recursion detected)
+	 *      recursed_functions[0] = func
+	 *                                            cmpxchg(recursed_functions[0],
+	 *                                                    func, 0)
+	 *
+	 * But the worse that could happen is that we get a zero in
+	 * the recursed_functions array, and it's likely that "func" will
+	 * be recorded again.
+	 */
+	i = atomic_read(&nr_records);
+	smp_mb__after_atomic();
+	if (i < 0)
+		cmpxchg(&recursed_functions[index].ip, ip, 0);
+	else if (i <= index)
+		atomic_cmpxchg(&nr_records, i, index + 1);
+}
+
+static DEFINE_MUTEX(recursed_function_lock);
+static struct trace_seq *tseq;
+
+static void *recursed_function_seq_start(struct seq_file *m, loff_t *pos)
+{
+	void *ret = NULL;
+	int index;
+
+	mutex_lock(&recursed_function_lock);
+	index = atomic_read(&nr_records);
+	if (*pos < index) {
+		ret = &recursed_functions[*pos];
+	}
+
+	tseq = kzalloc(sizeof(*tseq), GFP_KERNEL);
+	if (!tseq)
+		return ERR_PTR(-ENOMEM);
+
+	trace_seq_init(tseq);
+
+	return ret;
+}
+
+static void *recursed_function_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	int index;
+	int p;
+
+	index = atomic_read(&nr_records);
+	p = ++(*pos);
+
+	return p < index ? &recursed_functions[p] : NULL;
+}
+
+static void recursed_function_seq_stop(struct seq_file *m, void *v)
+{
+	kfree(tseq);
+	mutex_unlock(&recursed_function_lock);
+}
+
+static int recursed_function_seq_show(struct seq_file *m, void *v)
+{
+	struct recursed_functions *record = v;
+	int ret = 0;
+
+	if (record) {
+		trace_seq_print_sym(tseq, record->parent_ip, true);
+		trace_seq_puts(tseq, ":\t");
+		trace_seq_print_sym(tseq, record->ip, true);
+		trace_seq_putc(tseq, '\n');
+		ret = trace_print_seq(m, tseq);
+	}
+
+	return ret;
+}
+
+static const struct seq_operations recursed_function_seq_ops = {
+	.start  = recursed_function_seq_start,
+	.next   = recursed_function_seq_next,
+	.stop   = recursed_function_seq_stop,
+	.show   = recursed_function_seq_show
+};
+
+static int recursed_function_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	mutex_lock(&recursed_function_lock);
+	/* If this file was opened for write, then erase contents */
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		/* disable updating records */
+		atomic_set(&nr_records, -1);
+		smp_mb__after_atomic();
+		memset(recursed_functions, 0, sizeof(recursed_functions));
+		smp_wmb();
+		/* enable them again */
+		atomic_set(&nr_records, 0);
+	}
+	if (file->f_mode & FMODE_READ)
+		ret = seq_open(file, &recursed_function_seq_ops);
+	mutex_unlock(&recursed_function_lock);
+
+	return ret;
+}
+
+static ssize_t recursed_function_write(struct file *file,
+				       const char __user *buffer,
+				       size_t count, loff_t *ppos)
+{
+	return count;
+}
+
+static int recursed_function_release(struct inode *inode, struct file *file)
+{
+	if (file->f_mode & FMODE_READ)
+		seq_release(inode, file);
+	return 0;
+}
+
+static const struct file_operations recursed_functions_fops = {
+	.open           = recursed_function_open,
+	.write		= recursed_function_write,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = recursed_function_release,
+};
+
+__init static int create_recursed_functions(void)
+{
+	struct dentry *dentry;
+
+	dentry = trace_create_file("recursed_functions", 0644, NULL, NULL,
+				   &recursed_functions_fops);
+	if (!dentry)
+		pr_warn("WARNING: Failed to create recursed_functions\n");
+	return 0;
+}
+
+fs_initcall(create_recursed_functions);
-- 
2.25.4


^ permalink raw reply related

* RE: Buggy commit tracked to: "Re: [PATCH 2/9] iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c"
From: David Laight @ 2020-11-02 18:23 UTC (permalink / raw)
  To: 'Greg KH'
  Cc: linux-aio@kvack.org, 'David Hildenbrand',
	linux-mips@vger.kernel.org, David Howells, linux-mm@kvack.org,
	keyrings@vger.kernel.org, sparclinux@vger.kernel.org,
	Christoph Hellwig, linux-arch@vger.kernel.org,
	linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org,
	kernel-team@android.com, Arnd Bergmann,
	linux-block@vger.kernel.org, Al Viro, io-uring@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, Jens Axboe,
	linux-parisc@vger.kernel.org, netdev@vger.kernel.org,
	Nick Desaulniers, linux-kernel@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, Andrew Morton,
	linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20201102135202.GA1016272@kroah.com>

From: 'Greg KH'
> Sent: 02 November 2020 13:52
> 
> On Mon, Nov 02, 2020 at 09:06:38AM +0000, David Laight wrote:
> > From: 'Greg KH'
> > > Sent: 23 October 2020 15:47
> > >
> > > On Fri, Oct 23, 2020 at 02:39:24PM +0000, David Laight wrote:
> > > > From: David Hildenbrand
> > > > > Sent: 23 October 2020 15:33
> > > > ...
> > > > > I just checked against upstream code generated by clang 10 and it
> > > > > properly discards the upper 32bit via a mov w23 w2.
> > > > >
> > > > > So at least clang 10 indeed properly assumes we could have garbage and
> > > > > masks it off.
> > > > >
> > > > > Maybe the issue is somewhere else, unrelated to nr_pages ... or clang 11
> > > > > behaves differently.
> > > >
> > > > We'll need the disassembly from a failing kernel image.
> > > > It isn't that big to hand annotate.
> > >
> > > I've worked around the merge at the moment in the android tree, but it
> > > is still quite reproducable, and will try to get a .o file to
> > > disassemble on Monday or so...
> >
> > Did this get properly resolved?
> 
> For some reason, 5.10-rc2 fixed all of this up.  I backed out all of the
> patches I had to revert to get 5.10-rc1 to work properly, and then did
> the merge and all is well.
> 
> It must have been something to do with the compat changes in this same
> area that went in after 5.10-rc1, and something got reorganized in the
> files somehow.  I really do not know, and at the moment, don't have the
> time to track it down anymore.  So for now, I'd say it's all good, sorry
> for the noise.

Hopefully it won't appear again.

Saved me spending a day off reading arm64 assembler.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply

* Re: [PATCH 11/11 v2] ftrace: Add recording of functions that caused recursion
From: Steven Rostedt @ 2020-11-02 17:46 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Anton Vorontsov, linux-doc, Peter Zijlstra,
	Sebastian Andrzej Siewior, Kamalesh Babulal, James E.J. Bottomley,
	Guo Ren, H. Peter Anvin, live-patching, Miroslav Benes,
	Ingo Molnar, linux-s390, Joe Lawrence, Jonathan Corbet,
	Mauro Carvalho Chehab, Helge Deller, x86, linux-csky,
	Christian Borntraeger, Kees Cook, Vasily Gorbik, Heiko Carstens,
	Jiri Kosina, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
	Tony Luck, linux-parisc, linux-kernel, Masami Hiramatsu,
	Colin Cross, Paul Mackerras, Andrew Morton, linuxppc-dev
In-Reply-To: <20201102123721.4fcce2cb@gandalf.local.home>

On Mon, 2 Nov 2020 12:37:21 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:


> The only race that I see that can happen, is the one in the comment I
> showed. And that is after enabling the recursed functions again after
> clearing, one CPU could add a function while another CPU that just added
> that same function could be just exiting this routine, notice that a
> clearing of the array happened, and remove its function (which was the same
> as the one just happened). So we get a "zero" in the array. If this
> happens, it is likely that that function will recurse again and will be
> added later.
> 

Updated version of this function:

-- Steve


void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
{
	int index = 0;
	int i;
	unsigned long old;

 again:
	/* First check the last one recorded */
	if (ip == cached_function)
		return;

	i = atomic_read(&nr_records);
	/* nr_records is -1 when clearing records */
	smp_mb__after_atomic();
	if (i < 0)
		return;

	/*
	 * If there's two writers and this writer comes in second,
	 * the cmpxchg() below to update the ip will fail. Then this
	 * writer will try again. It is possible that index will now
	 * be greater than nr_records. This is because the writer
	 * that succeeded has not updated the nr_records yet.
	 * This writer could keep trying again until the other writer
	 * updates nr_records. But if the other writer takes an
	 * interrupt, and that interrupt locks up that CPU, we do
	 * not want this CPU to lock up due to the recursion protection,
	 * and have a bug report showing this CPU as the cause of
	 * locking up the computer. To not lose this record, this
	 * writer will simply use the next position to update the
	 * recursed_functions, and it will update the nr_records
	 * accordingly.
	 */
	if (index < i)
		index = i;
	if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
		return;

	for (i = index - 1; i >= 0; i--) {
		if (recursed_functions[i].ip == ip) {
			cached_function = ip;
			return;
		}
	}

	cached_function = ip;

	/*
	 * We only want to add a function if it hasn't been added before.
	 * Add to the current location before incrementing the count.
	 * If it fails to add, then increment the index (save in i)
	 * and try again.
	 */
	old = cmpxchg(&recursed_functions[index].ip, 0, ip);
	if (old != 0) {
		/* Did something else already added this for us? */
		if (old == ip)
			return;
		/* Try the next location (use i for the next index) */
		index++;
		goto again;
	}

	recursed_functions[index].parent_ip = parent_ip;

	/*
	 * It's still possible that we could race with the clearing
	 *    CPU0                                    CPU1
	 *    ----                                    ----
	 *                                       ip = func
	 *  nr_records = -1;
	 *  recursed_functions[0] = 0;
	 *                                       i = -1
	 *                                       if (i < 0)
	 *  nr_records = 0;
	 *  (new recursion detected)
	 *      recursed_functions[0] = func
	 *                                            cmpxchg(recursed_functions[0],
	 *                                                    func, 0)
	 *
	 * But the worse that could happen is that we get a zero in
	 * the recursed_functions array, and it's likely that "func" will
	 * be recorded again.
	 */
	i = atomic_read(&nr_records);
	smp_mb__after_atomic();
	if (i < 0)
		cmpxchg(&recursed_functions[index].ip, ip, 0);
	else if (i <= index)
		atomic_cmpxchg(&nr_records, i, index + 1);
}

^ permalink raw reply

* Re: [PATCH 11/11 v2] ftrace: Add recording of functions that caused recursion
From: Steven Rostedt @ 2020-11-02 17:37 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Anton Vorontsov, linux-doc, Peter Zijlstra,
	Sebastian Andrzej Siewior, Kamalesh Babulal, James E.J. Bottomley,
	Guo Ren, H. Peter Anvin, live-patching, Miroslav Benes,
	Ingo Molnar, linux-s390, Joe Lawrence, Jonathan Corbet,
	Mauro Carvalho Chehab, Helge Deller, x86, linux-csky,
	Christian Borntraeger, Kees Cook, Vasily Gorbik, Heiko Carstens,
	Jiri Kosina, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
	Tony Luck, linux-parisc, linux-kernel, Masami Hiramatsu,
	Colin Cross, Paul Mackerras, Andrew Morton, linuxppc-dev
In-Reply-To: <20201102164147.GJ20201@alley>

On Mon, 2 Nov 2020 17:41:47 +0100
Petr Mladek <pmladek@suse.com> wrote:

> > +	i = atomic_read(&nr_records);
> > +	smp_mb__after_atomic();
> > +	if (i < 0)
> > +		cmpxchg(&recursed_functions[index].ip, ip, 0);
> > +	else if (i <= index)
> > +		atomic_cmpxchg(&nr_records, i, index + 1);  
> 
> This looks weird. It would shift nr_records past the record added
> in this call. It might skip many slots that were zeroed when clearing.
> Also we do not know if our entry was not zeroed as well.

nr_records always holds the next position to write to.

	index = nr_records;
	recursed_functions[index].ip = ip;
	nr_records++;

Before clearing, we have:

	nr_records = -1;
	smp_mb();
	memset(recursed_functions, 0);
	smp_wmb();
	nr_records = 0;

When we enter this function:

	i = nr_records;
	smp_mb();
	if (i < 0)
		return;


Thus, we just stopped all new updates while clearing the records.

But what about if something is currently updating?

	i = nr_records;
	smp_mb();
	if (i < 0)
		cmpxchg(recursed_functions, ip, 0);

The above shows that if the current updating process notices that the
clearing happens, it will clear the function it added.

	else if (i <= index)
		cmpxchg(nr_records, i, index + 1);

This makes sure that nr_records only grows if it is greater or equal to
zero.

The only race that I see that can happen, is the one in the comment I
showed. And that is after enabling the recursed functions again after
clearing, one CPU could add a function while another CPU that just added
that same function could be just exiting this routine, notice that a
clearing of the array happened, and remove its function (which was the same
as the one just happened). So we get a "zero" in the array. If this
happens, it is likely that that function will recurse again and will be
added later.

-- Steve

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox