* [PATCH v4 01/11] irqchip/riscv-imsic: Set irq_set_affinity for IMSIC base
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 02/11] irqchip/irq-msi-lib: Optionally set default irq_eoi/irq_ack Anup Patel
` (9 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
From: Andrew Jones <ajones@ventanamicro.com>
Instead of using imsic_irq_set_affinity() for leaf MSI domains,
use imsic_irq_set_affinity() for the non-leaf IMSIC base domain
and use irq_chip_set_affinity_parent() for leaf MSI domains.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/irq-riscv-imsic-platform.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index c708780e8760..5d7c30ad8855 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -96,9 +96,8 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
bool force)
{
struct imsic_vector *old_vec, *new_vec;
- struct irq_data *pd = d->parent_data;
- old_vec = irq_data_get_irq_chip_data(pd);
+ old_vec = irq_data_get_irq_chip_data(d);
if (WARN_ON(!old_vec))
return -ENOENT;
@@ -116,13 +115,13 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
return -ENOSPC;
/* Point device to the new vector */
- imsic_msi_update_msg(d, new_vec);
+ imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec);
/* Update irq descriptors with the new vector */
- pd->chip_data = new_vec;
+ d->chip_data = new_vec;
- /* Update effective affinity of parent irq data */
- irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu));
+ /* Update effective affinity */
+ irq_data_update_effective_affinity(d, cpumask_of(new_vec->cpu));
/* Move state of the old vector to the new vector */
imsic_vector_move(old_vec, new_vec);
@@ -135,6 +134,9 @@ static struct irq_chip imsic_irq_base_chip = {
.name = "IMSIC",
.irq_mask = imsic_irq_mask,
.irq_unmask = imsic_irq_unmask,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = imsic_irq_set_affinity,
+#endif
.irq_retrigger = imsic_irq_retrigger,
.irq_compose_msi_msg = imsic_irq_compose_msg,
.flags = IRQCHIP_SKIP_SET_WAKE |
@@ -245,7 +247,7 @@ static bool imsic_init_dev_msi_info(struct device *dev,
if (WARN_ON_ONCE(domain != real_parent))
return false;
#ifdef CONFIG_SMP
- info->chip->irq_set_affinity = imsic_irq_set_affinity;
+ info->chip->irq_set_affinity = irq_chip_set_affinity_parent;
#endif
break;
default:
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 02/11] irqchip/irq-msi-lib: Optionally set default irq_eoi/irq_ack
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
2025-02-05 15:59 ` [PATCH v4 01/11] irqchip/riscv-imsic: Set irq_set_affinity for IMSIC base Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 03/11] irqchip/riscv-imsic: Move to common MSI lib Anup Patel
` (8 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
From: Thomas Gleixner <tglx@linutronix.de>
Introduce chip_flags in struct msi_parent_ops. This allows
msi_lib_init_dev_msi_info() set default irq_eoi/irq_ack
callbacks only when the corresponding flags are set in
the chip_flags.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/irq-gic-v2m.c | 1 +
drivers/irqchip/irq-imx-mu-msi.c | 1 +
drivers/irqchip/irq-msi-lib.c | 11 ++++++-----
drivers/irqchip/irq-mvebu-gicp.c | 1 +
drivers/irqchip/irq-mvebu-odmi.c | 1 +
drivers/irqchip/irq-mvebu-sei.c | 1 +
include/linux/msi.h | 11 +++++++++++
7 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index be35c5349986..1e3476c335ca 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -255,6 +255,7 @@ static void __init gicv2m_teardown(void)
static struct msi_parent_ops gicv2m_msi_parent_ops = {
.supported_flags = GICV2M_MSI_FLAGS_SUPPORTED,
.required_flags = GICV2M_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
.prefix = "GICv2m-",
diff --git a/drivers/irqchip/irq-imx-mu-msi.c b/drivers/irqchip/irq-imx-mu-msi.c
index 4342a21de1eb..69aacdfc8bef 100644
--- a/drivers/irqchip/irq-imx-mu-msi.c
+++ b/drivers/irqchip/irq-imx-mu-msi.c
@@ -214,6 +214,7 @@ static void imx_mu_msi_irq_handler(struct irq_desc *desc)
static const struct msi_parent_ops imx_mu_msi_parent_ops = {
.supported_flags = IMX_MU_MSI_FLAGS_SUPPORTED,
.required_flags = IMX_MU_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "MU-MSI-",
diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c
index d8e29fc0d406..51464c6257f3 100644
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -28,6 +28,7 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
struct msi_domain_info *info)
{
const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
+ struct irq_chip *chip = info->chip;
u32 required_flags;
/* Parent ops available? */
@@ -92,10 +93,10 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
info->flags |= required_flags;
/* Chip updates for all child bus types */
- if (!info->chip->irq_eoi)
- info->chip->irq_eoi = irq_chip_eoi_parent;
- if (!info->chip->irq_ack)
- info->chip->irq_ack = irq_chip_ack_parent;
+ if (!chip->irq_eoi && (pops->chip_flags & MSI_CHIP_FLAG_SET_EOI))
+ chip->irq_eoi = irq_chip_eoi_parent;
+ if (!chip->irq_ack && (pops->chip_flags & MSI_CHIP_FLAG_SET_ACK))
+ chip->irq_ack = irq_chip_ack_parent;
/*
* The device MSI domain can never have a set affinity callback. It
@@ -105,7 +106,7 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
* device MSI domain aside of mask/unmask which is provided e.g. by
* PCI/MSI device domains.
*/
- info->chip->irq_set_affinity = msi_domain_set_affinity;
+ chip->irq_set_affinity = msi_domain_set_affinity;
return true;
}
EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info);
diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c
index 2b6183919ea4..d67f93f6d750 100644
--- a/drivers/irqchip/irq-mvebu-gicp.c
+++ b/drivers/irqchip/irq-mvebu-gicp.c
@@ -161,6 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = {
static const struct msi_parent_ops gicp_msi_parent_ops = {
.supported_flags = GICP_MSI_FLAGS_SUPPORTED,
.required_flags = GICP_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "GICP-",
diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c
index ff19bfd258dc..28f7e81df94f 100644
--- a/drivers/irqchip/irq-mvebu-odmi.c
+++ b/drivers/irqchip/irq-mvebu-odmi.c
@@ -157,6 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = {
static const struct msi_parent_ops odmi_msi_parent_ops = {
.supported_flags = ODMI_MSI_FLAGS_SUPPORTED,
.required_flags = ODMI_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "ODMI-",
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 065166ab5dbc..ebd4a9014e8d 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -356,6 +356,7 @@ static void mvebu_sei_reset(struct mvebu_sei *sei)
static const struct msi_parent_ops sei_msi_parent_ops = {
.supported_flags = SEI_MSI_FLAGS_SUPPORTED,
.required_flags = SEI_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_mask = MATCH_PLATFORM_MSI,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.prefix = "SEI-",
diff --git a/include/linux/msi.h b/include/linux/msi.h
index b10093c4d00e..9abef442c146 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -558,11 +558,21 @@ enum {
MSI_FLAG_NO_AFFINITY = (1 << 21),
};
+/*
+ * Flags for msi_parent_ops::chip_flags
+ */
+enum {
+ MSI_CHIP_FLAG_SET_EOI = (1 << 0),
+ MSI_CHIP_FLAG_SET_ACK = (1 << 1),
+};
+
/**
* struct msi_parent_ops - MSI parent domain callbacks and configuration info
*
* @supported_flags: Required: The supported MSI flags of the parent domain
* @required_flags: Optional: The required MSI flags of the parent MSI domain
+ * @chip_flags: Optional: Select MSI chip callbacks to update with defaults
+ * in msi_lib_init_dev_msi_info().
* @bus_select_token: Optional: The bus token of the real parent domain for
* irq_domain::select()
* @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
@@ -575,6 +585,7 @@ enum {
struct msi_parent_ops {
u32 supported_flags;
u32 required_flags;
+ u32 chip_flags;
u32 bus_select_token;
u32 bus_select_mask;
const char *prefix;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 03/11] irqchip/riscv-imsic: Move to common MSI lib
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
2025-02-05 15:59 ` [PATCH v4 01/11] irqchip/riscv-imsic: Set irq_set_affinity for IMSIC base Anup Patel
2025-02-05 15:59 ` [PATCH v4 02/11] irqchip/irq-msi-lib: Optionally set default irq_eoi/irq_ack Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 04/11] genirq: Introduce common irq_force_complete_move() implementation Anup Patel
` (7 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
From: Thomas Gleixner <tglx@linutronix.de>
Simplify the leaf MSI domain handling in the RISC-V IMSIC driver by
using msi_lib_init_dev_msi_info() and msi_lib_irq_domain_select()
provided by common MSI lib.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/Kconfig | 8 +-
drivers/irqchip/irq-riscv-imsic-platform.c | 114 +--------------------
2 files changed, 6 insertions(+), 116 deletions(-)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index be063bfb50c4..bc3f12af2dc7 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -589,13 +589,7 @@ config RISCV_IMSIC
select IRQ_DOMAIN_HIERARCHY
select GENERIC_IRQ_MATRIX_ALLOCATOR
select GENERIC_MSI_IRQ
-
-config RISCV_IMSIC_PCI
- bool
- depends on RISCV_IMSIC
- depends on PCI
- depends on PCI_MSI
- default RISCV_IMSIC
+ select IRQ_MSI_LIB
config SIFIVE_PLIC
bool
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index 5d7c30ad8855..9a5e7b4541f6 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -20,6 +20,7 @@
#include <linux/spinlock.h>
#include <linux/smp.h>
+#include "irq-msi-lib.h"
#include "irq-riscv-imsic-state.h"
static bool imsic_cpu_page_phys(unsigned int cpu, unsigned int guest_index,
@@ -174,22 +175,6 @@ static void imsic_irq_domain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
-static int imsic_irq_domain_select(struct irq_domain *domain, struct irq_fwspec *fwspec,
- enum irq_domain_bus_token bus_token)
-{
- const struct msi_parent_ops *ops = domain->msi_parent_ops;
- u32 busmask = BIT(bus_token);
-
- if (fwspec->fwnode != domain->fwnode || fwspec->param_count != 0)
- return 0;
-
- /* Handle pure domain searches */
- if (bus_token == ops->bus_select_token)
- return 1;
-
- return !!(ops->bus_select_mask & busmask);
-}
-
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d,
struct irq_data *irqd, int ind)
@@ -206,110 +191,21 @@ static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d,
static const struct irq_domain_ops imsic_base_domain_ops = {
.alloc = imsic_irq_domain_alloc,
.free = imsic_irq_domain_free,
- .select = imsic_irq_domain_select,
+ .select = msi_lib_irq_domain_select,
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
.debug_show = imsic_irq_debug_show,
#endif
};
-#ifdef CONFIG_RISCV_IMSIC_PCI
-
-static void imsic_pci_mask_irq(struct irq_data *d)
-{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
-
-static void imsic_pci_unmask_irq(struct irq_data *d)
-{
- irq_chip_unmask_parent(d);
- pci_msi_unmask_irq(d);
-}
-
-#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI)
-
-#else
-
-#define MATCH_PCI_MSI 0
-
-#endif
-
-static bool imsic_init_dev_msi_info(struct device *dev,
- struct irq_domain *domain,
- struct irq_domain *real_parent,
- struct msi_domain_info *info)
-{
- const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
-
- /* MSI parent domain specific settings */
- switch (real_parent->bus_token) {
- case DOMAIN_BUS_NEXUS:
- if (WARN_ON_ONCE(domain != real_parent))
- return false;
-#ifdef CONFIG_SMP
- info->chip->irq_set_affinity = irq_chip_set_affinity_parent;
-#endif
- break;
- default:
- WARN_ON_ONCE(1);
- return false;
- }
-
- /* Is the target supported? */
- switch (info->bus_token) {
-#ifdef CONFIG_RISCV_IMSIC_PCI
- case DOMAIN_BUS_PCI_DEVICE_MSI:
- case DOMAIN_BUS_PCI_DEVICE_MSIX:
- info->chip->irq_mask = imsic_pci_mask_irq;
- info->chip->irq_unmask = imsic_pci_unmask_irq;
- break;
-#endif
- case DOMAIN_BUS_DEVICE_MSI:
- /*
- * Per-device MSI should never have any MSI feature bits
- * set. It's sole purpose is to create a dumb interrupt
- * chip which has a device specific irq_write_msi_msg()
- * callback.
- */
- if (WARN_ON_ONCE(info->flags))
- return false;
-
- /* Core managed MSI descriptors */
- info->flags |= MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS |
- MSI_FLAG_FREE_MSI_DESCS;
- break;
- case DOMAIN_BUS_WIRED_TO_MSI:
- break;
- default:
- WARN_ON_ONCE(1);
- return false;
- }
-
- /* Use hierarchial chip operations re-trigger */
- info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
-
- /*
- * Mask out the domain specific MSI feature flags which are not
- * supported by the real parent.
- */
- info->flags &= pops->supported_flags;
-
- /* Enforce the required flags */
- info->flags |= pops->required_flags;
-
- return true;
-}
-
-#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI)
-
static const struct msi_parent_ops imsic_msi_parent_ops = {
.supported_flags = MSI_GENERIC_FLAGS_MASK |
MSI_FLAG_PCI_MSIX,
.required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
- MSI_FLAG_USE_DEF_CHIP_OPS,
+ MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSI_MASK_PARENT,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
- .init_dev_msi_info = imsic_init_dev_msi_info,
+ .init_dev_msi_info = msi_lib_init_dev_msi_info,
};
int imsic_irqdomain_init(void)
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 04/11] genirq: Introduce common irq_force_complete_move() implementation
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (2 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 03/11] irqchip/riscv-imsic: Move to common MSI lib Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-06 19:52 ` Thomas Gleixner
2025-02-05 15:59 ` [PATCH v4 05/11] genirq: Introduce irq_can_move_in_process_context() Anup Patel
` (6 subsequent siblings)
10 siblings, 1 reply; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
The GENERIC_PENDING_IRQ requires an arch specific implementation
of irq_force_complete_move(). At the moment, only x86 implements
this but for RISC-V the irq_force_complete_move() is only needed
when RISC-V IMSIC driver is in use and not needed otherwise.
To address the above, introduce common weak implementation of
the irq_force_complete_move() which lets irqchip do the actual
irq_force_complete_move().
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
include/linux/irq.h | 5 +++++
kernel/irq/migration.c | 9 +++++++++
2 files changed, 14 insertions(+)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8daa17f0107a..1884fa4ec9b5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* @ipi_send_mask: send an IPI to destination cpus in cpumask
* @irq_nmi_setup: function called from core code before enabling an NMI
* @irq_nmi_teardown: function called from core code after disabling an NMI
+ * @irq_force_complete_move: optional function to force complete pending irq move
* @flags: chip specific flags
*/
struct irq_chip {
@@ -537,6 +538,10 @@ struct irq_chip {
int (*irq_nmi_setup)(struct irq_data *data);
void (*irq_nmi_teardown)(struct irq_data *data);
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
+ void (*irq_force_complete_move)(struct irq_data *data);
+#endif
+
unsigned long flags;
};
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index eb150afd671f..2920024475a3 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -5,6 +5,15 @@
#include "internals.h"
+void __weak irq_force_complete_move(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ struct irq_chip *chip = irq_data_get_irq_chip(d);
+
+ if (chip && chip->irq_force_complete_move)
+ chip->irq_force_complete_move(d);
+}
+
/**
* irq_fixup_move_pending - Cleanup irq move pending from a dying CPU
* @desc: Interrupt descriptor to clean up
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 04/11] genirq: Introduce common irq_force_complete_move() implementation
2025-02-05 15:59 ` [PATCH v4 04/11] genirq: Introduce common irq_force_complete_move() implementation Anup Patel
@ 2025-02-06 19:52 ` Thomas Gleixner
2025-02-08 16:39 ` Anup Patel
0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2025-02-06 19:52 UTC (permalink / raw)
To: Anup Patel
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
On Wed, Feb 05 2025 at 21:29, Anup Patel wrote:
> The GENERIC_PENDING_IRQ requires an arch specific implementation
> of irq_force_complete_move(). At the moment, only x86 implements
> this but for RISC-V the irq_force_complete_move() is only needed
> when RISC-V IMSIC driver is in use and not needed otherwise.
>
> To address the above, introduce common weak implementation of
> the irq_force_complete_move() which lets irqchip do the actual
> irq_force_complete_move().
>
> +void __weak irq_force_complete_move(struct irq_desc *desc)
> +{
> + struct irq_data *d = irq_desc_get_irq_data(desc);
> + struct irq_chip *chip = irq_data_get_irq_chip(d);
> +
> + if (chip && chip->irq_force_complete_move)
> + chip->irq_force_complete_move(d);
> +}
I seriously doubt that works as intended.
In patch 9 you implement this as callback for the imsic_irq_base_chip,
but that's not the top level chip in the hierarchy. The top level chip
is the PCI/MSI device domain chip, which obviously does not have that
callback set.
Something like uncompiled below should work. That avoids the weak
function and handles x86 in exactly the same way.
Thanks,
tglx
---
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -888,8 +888,85 @@ static int apic_set_affinity(struct irq_
return err ? err : IRQ_SET_MASK_OK;
}
+/*
+ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
+ */
+static void apic_force_complete_move(struct irq_data *irqd)
+{
+ unsigned int cpu = smp_processor_id();
+ struct apic_chip_data *apicd;
+ unsigned int vector;
+
+ guard(raw_spinlock)(&vector_lock);
+ apicd = apic_chip_data(irqd);
+ if (!apicd)
+ return;
+
+ /*
+ * If prev_vector is empty or the descriptor is neither currently
+ * nor previously on the outgoing CPU no action required.
+ */
+ vector = apicd->prev_vector;
+ if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+ return;
+
+ /*
+ * This is tricky. If the cleanup of the old vector has not been
+ * done yet, then the following setaffinity call will fail with
+ * -EBUSY. This can leave the interrupt in a stale state.
+ *
+ * All CPUs are stuck in stop machine with interrupts disabled so
+ * calling __irq_complete_move() would be completely pointless.
+ *
+ * 1) The interrupt is in move_in_progress state. That means that we
+ * have not seen an interrupt since the io_apic was reprogrammed to
+ * the new vector.
+ *
+ * 2) The interrupt has fired on the new vector, but the cleanup IPIs
+ * have not been processed yet.
+ */
+ if (apicd->move_in_progress) {
+ /*
+ * In theory there is a race:
+ *
+ * set_ioapic(new_vector) <-- Interrupt is raised before update
+ * is effective, i.e. it's raised on
+ * the old vector.
+ *
+ * So if the target cpu cannot handle that interrupt before
+ * the old vector is cleaned up, we get a spurious interrupt
+ * and in the worst case the ioapic irq line becomes stale.
+ *
+ * But in case of cpu hotplug this should be a non issue
+ * because if the affinity update happens right before all
+ * cpus rendezvous in stop machine, there is no way that the
+ * interrupt can be blocked on the target cpu because all cpus
+ * loops first with interrupts enabled in stop machine, so the
+ * old vector is not yet cleaned up when the interrupt fires.
+ *
+ * So the only way to run into this issue is if the delivery
+ * of the interrupt on the apic/system bus would be delayed
+ * beyond the point where the target cpu disables interrupts
+ * in stop machine. I doubt that it can happen, but at least
+ * there is a theoretical chance. Virtualization might be
+ * able to expose this, but AFAICT the IOAPIC emulation is not
+ * as stupid as the real hardware.
+ *
+ * Anyway, there is nothing we can do about that at this point
+ * w/o refactoring the whole fixup_irq() business completely.
+ * We print at least the irq number and the old vector number,
+ * so we have the necessary information when a problem in that
+ * area arises.
+ */
+ pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
+ irqd->irq, vector);
+ }
+ free_moved_vector(apicd);
+}
+
#else
-# define apic_set_affinity NULL
+# define apic_set_affinity NULL
+# define apic_force_complete_move NULL
#endif
static int apic_retrigger_irq(struct irq_data *irqd)
@@ -923,11 +1000,12 @@ static void x86_vector_msi_compose_msg(s
}
static struct irq_chip lapic_controller = {
- .name = "APIC",
- .irq_ack = apic_ack_edge,
- .irq_set_affinity = apic_set_affinity,
- .irq_compose_msi_msg = x86_vector_msi_compose_msg,
- .irq_retrigger = apic_retrigger_irq,
+ .name = "APIC",
+ .irq_ack = apic_ack_edge,
+ .irq_set_affinity = apic_set_affinity,
+ .irq_compose_msi_msg = x86_vector_msi_compose_msg,
+ .irq_force_complete_move = apic_force_complete_move,
+ .irq_retrigger = apic_retrigger_irq,
};
#ifdef CONFIG_SMP
@@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *c
__vector_schedule_cleanup(apicd);
}
-/*
- * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
- */
-void irq_force_complete_move(struct irq_desc *desc)
-{
- unsigned int cpu = smp_processor_id();
- struct apic_chip_data *apicd;
- struct irq_data *irqd;
- unsigned int vector;
-
- /*
- * The function is called for all descriptors regardless of which
- * irqdomain they belong to. For example if an IRQ is provided by
- * an irq_chip as part of a GPIO driver, the chip data for that
- * descriptor is specific to the irq_chip in question.
- *
- * Check first that the chip_data is what we expect
- * (apic_chip_data) before touching it any further.
- */
- irqd = irq_domain_get_irq_data(x86_vector_domain,
- irq_desc_get_irq(desc));
- if (!irqd)
- return;
-
- raw_spin_lock(&vector_lock);
- apicd = apic_chip_data(irqd);
- if (!apicd)
- goto unlock;
-
- /*
- * If prev_vector is empty or the descriptor is neither currently
- * nor previously on the outgoing CPU no action required.
- */
- vector = apicd->prev_vector;
- if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
- goto unlock;
-
- /*
- * This is tricky. If the cleanup of the old vector has not been
- * done yet, then the following setaffinity call will fail with
- * -EBUSY. This can leave the interrupt in a stale state.
- *
- * All CPUs are stuck in stop machine with interrupts disabled so
- * calling __irq_complete_move() would be completely pointless.
- *
- * 1) The interrupt is in move_in_progress state. That means that we
- * have not seen an interrupt since the io_apic was reprogrammed to
- * the new vector.
- *
- * 2) The interrupt has fired on the new vector, but the cleanup IPIs
- * have not been processed yet.
- */
- if (apicd->move_in_progress) {
- /*
- * In theory there is a race:
- *
- * set_ioapic(new_vector) <-- Interrupt is raised before update
- * is effective, i.e. it's raised on
- * the old vector.
- *
- * So if the target cpu cannot handle that interrupt before
- * the old vector is cleaned up, we get a spurious interrupt
- * and in the worst case the ioapic irq line becomes stale.
- *
- * But in case of cpu hotplug this should be a non issue
- * because if the affinity update happens right before all
- * cpus rendezvous in stop machine, there is no way that the
- * interrupt can be blocked on the target cpu because all cpus
- * loops first with interrupts enabled in stop machine, so the
- * old vector is not yet cleaned up when the interrupt fires.
- *
- * So the only way to run into this issue is if the delivery
- * of the interrupt on the apic/system bus would be delayed
- * beyond the point where the target cpu disables interrupts
- * in stop machine. I doubt that it can happen, but at least
- * there is a theoretical chance. Virtualization might be
- * able to expose this, but AFAICT the IOAPIC emulation is not
- * as stupid as the real hardware.
- *
- * Anyway, there is nothing we can do about that at this point
- * w/o refactoring the whole fixup_irq() business completely.
- * We print at least the irq number and the old vector number,
- * so we have the necessary information when a problem in that
- * area arises.
- */
- pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
- irqd->irq, vector);
- }
- free_moved_vector(apicd);
-unlock:
- raw_spin_unlock(&vector_lock);
-}
-
#ifdef CONFIG_HOTPLUG_CPU
/*
* Note, this is not accurate accounting, but at least good enough to
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hw
* @ipi_send_mask: send an IPI to destination cpus in cpumask
* @irq_nmi_setup: function called from core code before enabling an NMI
* @irq_nmi_teardown: function called from core code after disabling an NMI
+ * @irq_force_complete_move: optional function to force complete pending irq move
* @flags: chip specific flags
*/
struct irq_chip {
@@ -537,6 +538,8 @@ struct irq_chip {
int (*irq_nmi_setup)(struct irq_data *data);
void (*irq_nmi_teardown)(struct irq_data *data);
+ void (*irq_force_complete_move)(struct irq_data *data);
+
unsigned long flags;
};
@@ -619,11 +622,9 @@ static inline void irq_move_irq(struct i
__irq_move_irq(data);
}
void irq_move_masked_irq(struct irq_data *data);
-void irq_force_complete_move(struct irq_desc *desc);
#else
static inline void irq_move_irq(struct irq_data *data) { }
static inline void irq_move_masked_irq(struct irq_data *data) { }
-static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif
extern int no_irq_affinity;
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_g
return desc->pending_mask;
}
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
+void irq_force_complete_move(struct irq_desc *desc);
#else /* CONFIG_GENERIC_PENDING_IRQ */
static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
@@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pendin
{
return false;
}
+static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_d
return true;
}
+void irq_force_complete_move(struct irq_desc *desc)
+{
+ for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
+ if (d->chip && d->chip->irq_force_complete_move) {
+ d->chip->irq_force_complete_move(d);
+ return;
+ }
+ }
+}
+
void irq_move_masked_irq(struct irq_data *idata)
{
struct irq_desc *desc = irq_data_to_desc(idata);
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 04/11] genirq: Introduce common irq_force_complete_move() implementation
2025-02-06 19:52 ` Thomas Gleixner
@ 2025-02-08 16:39 ` Anup Patel
0 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-08 16:39 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx
On Fri, Feb 7, 2025 at 1:22 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Wed, Feb 05 2025 at 21:29, Anup Patel wrote:
> > The GENERIC_PENDING_IRQ requires an arch specific implementation
> > of irq_force_complete_move(). At the moment, only x86 implements
> > this but for RISC-V the irq_force_complete_move() is only needed
> > when RISC-V IMSIC driver is in use and not needed otherwise.
> >
> > To address the above, introduce common weak implementation of
> > the irq_force_complete_move() which lets irqchip do the actual
> > irq_force_complete_move().
> >
> > +void __weak irq_force_complete_move(struct irq_desc *desc)
> > +{
> > + struct irq_data *d = irq_desc_get_irq_data(desc);
> > + struct irq_chip *chip = irq_data_get_irq_chip(d);
> > +
> > + if (chip && chip->irq_force_complete_move)
> > + chip->irq_force_complete_move(d);
> > +}
>
> I seriously doubt that works as intended.
Ahh, yes. This function needs to consider hierarchical domains.
>
> In patch 9 you implement this as callback for the imsic_irq_base_chip,
> but that's not the top level chip in the hierarchy. The top level chip
> is the PCI/MSI device domain chip, which obviously does not have that
> callback set.
>
> Something like uncompiled below should work. That avoids the weak
> function and handles x86 in exactly the same way.
Okay, I will try the below code and include it in the next revision.
Thanks,
Anup
>
> Thanks,
>
> tglx
> ---
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -888,8 +888,85 @@ static int apic_set_affinity(struct irq_
> return err ? err : IRQ_SET_MASK_OK;
> }
>
> +/*
> + * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
> + */
> +static void apic_force_complete_move(struct irq_data *irqd)
> +{
> + unsigned int cpu = smp_processor_id();
> + struct apic_chip_data *apicd;
> + unsigned int vector;
> +
> + guard(raw_spinlock)(&vector_lock);
> + apicd = apic_chip_data(irqd);
> + if (!apicd)
> + return;
> +
> + /*
> + * If prev_vector is empty or the descriptor is neither currently
> + * nor previously on the outgoing CPU no action required.
> + */
> + vector = apicd->prev_vector;
> + if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
> + return;
> +
> + /*
> + * This is tricky. If the cleanup of the old vector has not been
> + * done yet, then the following setaffinity call will fail with
> + * -EBUSY. This can leave the interrupt in a stale state.
> + *
> + * All CPUs are stuck in stop machine with interrupts disabled so
> + * calling __irq_complete_move() would be completely pointless.
> + *
> + * 1) The interrupt is in move_in_progress state. That means that we
> + * have not seen an interrupt since the io_apic was reprogrammed to
> + * the new vector.
> + *
> + * 2) The interrupt has fired on the new vector, but the cleanup IPIs
> + * have not been processed yet.
> + */
> + if (apicd->move_in_progress) {
> + /*
> + * In theory there is a race:
> + *
> + * set_ioapic(new_vector) <-- Interrupt is raised before update
> + * is effective, i.e. it's raised on
> + * the old vector.
> + *
> + * So if the target cpu cannot handle that interrupt before
> + * the old vector is cleaned up, we get a spurious interrupt
> + * and in the worst case the ioapic irq line becomes stale.
> + *
> + * But in case of cpu hotplug this should be a non issue
> + * because if the affinity update happens right before all
> + * cpus rendezvous in stop machine, there is no way that the
> + * interrupt can be blocked on the target cpu because all cpus
> + * loops first with interrupts enabled in stop machine, so the
> + * old vector is not yet cleaned up when the interrupt fires.
> + *
> + * So the only way to run into this issue is if the delivery
> + * of the interrupt on the apic/system bus would be delayed
> + * beyond the point where the target cpu disables interrupts
> + * in stop machine. I doubt that it can happen, but at least
> + * there is a theoretical chance. Virtualization might be
> + * able to expose this, but AFAICT the IOAPIC emulation is not
> + * as stupid as the real hardware.
> + *
> + * Anyway, there is nothing we can do about that at this point
> + * w/o refactoring the whole fixup_irq() business completely.
> + * We print at least the irq number and the old vector number,
> + * so we have the necessary information when a problem in that
> + * area arises.
> + */
> + pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
> + irqd->irq, vector);
> + }
> + free_moved_vector(apicd);
> +}
> +
> #else
> -# define apic_set_affinity NULL
> +# define apic_set_affinity NULL
> +# define apic_force_complete_move NULL
> #endif
>
> static int apic_retrigger_irq(struct irq_data *irqd)
> @@ -923,11 +1000,12 @@ static void x86_vector_msi_compose_msg(s
> }
>
> static struct irq_chip lapic_controller = {
> - .name = "APIC",
> - .irq_ack = apic_ack_edge,
> - .irq_set_affinity = apic_set_affinity,
> - .irq_compose_msi_msg = x86_vector_msi_compose_msg,
> - .irq_retrigger = apic_retrigger_irq,
> + .name = "APIC",
> + .irq_ack = apic_ack_edge,
> + .irq_set_affinity = apic_set_affinity,
> + .irq_compose_msi_msg = x86_vector_msi_compose_msg,
> + .irq_force_complete_move = apic_force_complete_move,
> + .irq_retrigger = apic_retrigger_irq,
> };
>
> #ifdef CONFIG_SMP
> @@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *c
> __vector_schedule_cleanup(apicd);
> }
>
> -/*
> - * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
> - */
> -void irq_force_complete_move(struct irq_desc *desc)
> -{
> - unsigned int cpu = smp_processor_id();
> - struct apic_chip_data *apicd;
> - struct irq_data *irqd;
> - unsigned int vector;
> -
> - /*
> - * The function is called for all descriptors regardless of which
> - * irqdomain they belong to. For example if an IRQ is provided by
> - * an irq_chip as part of a GPIO driver, the chip data for that
> - * descriptor is specific to the irq_chip in question.
> - *
> - * Check first that the chip_data is what we expect
> - * (apic_chip_data) before touching it any further.
> - */
> - irqd = irq_domain_get_irq_data(x86_vector_domain,
> - irq_desc_get_irq(desc));
> - if (!irqd)
> - return;
> -
> - raw_spin_lock(&vector_lock);
> - apicd = apic_chip_data(irqd);
> - if (!apicd)
> - goto unlock;
> -
> - /*
> - * If prev_vector is empty or the descriptor is neither currently
> - * nor previously on the outgoing CPU no action required.
> - */
> - vector = apicd->prev_vector;
> - if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
> - goto unlock;
> -
> - /*
> - * This is tricky. If the cleanup of the old vector has not been
> - * done yet, then the following setaffinity call will fail with
> - * -EBUSY. This can leave the interrupt in a stale state.
> - *
> - * All CPUs are stuck in stop machine with interrupts disabled so
> - * calling __irq_complete_move() would be completely pointless.
> - *
> - * 1) The interrupt is in move_in_progress state. That means that we
> - * have not seen an interrupt since the io_apic was reprogrammed to
> - * the new vector.
> - *
> - * 2) The interrupt has fired on the new vector, but the cleanup IPIs
> - * have not been processed yet.
> - */
> - if (apicd->move_in_progress) {
> - /*
> - * In theory there is a race:
> - *
> - * set_ioapic(new_vector) <-- Interrupt is raised before update
> - * is effective, i.e. it's raised on
> - * the old vector.
> - *
> - * So if the target cpu cannot handle that interrupt before
> - * the old vector is cleaned up, we get a spurious interrupt
> - * and in the worst case the ioapic irq line becomes stale.
> - *
> - * But in case of cpu hotplug this should be a non issue
> - * because if the affinity update happens right before all
> - * cpus rendezvous in stop machine, there is no way that the
> - * interrupt can be blocked on the target cpu because all cpus
> - * loops first with interrupts enabled in stop machine, so the
> - * old vector is not yet cleaned up when the interrupt fires.
> - *
> - * So the only way to run into this issue is if the delivery
> - * of the interrupt on the apic/system bus would be delayed
> - * beyond the point where the target cpu disables interrupts
> - * in stop machine. I doubt that it can happen, but at least
> - * there is a theoretical chance. Virtualization might be
> - * able to expose this, but AFAICT the IOAPIC emulation is not
> - * as stupid as the real hardware.
> - *
> - * Anyway, there is nothing we can do about that at this point
> - * w/o refactoring the whole fixup_irq() business completely.
> - * We print at least the irq number and the old vector number,
> - * so we have the necessary information when a problem in that
> - * area arises.
> - */
> - pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
> - irqd->irq, vector);
> - }
> - free_moved_vector(apicd);
> -unlock:
> - raw_spin_unlock(&vector_lock);
> -}
> -
> #ifdef CONFIG_HOTPLUG_CPU
> /*
> * Note, this is not accurate accounting, but at least good enough to
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hw
> * @ipi_send_mask: send an IPI to destination cpus in cpumask
> * @irq_nmi_setup: function called from core code before enabling an NMI
> * @irq_nmi_teardown: function called from core code after disabling an NMI
> + * @irq_force_complete_move: optional function to force complete pending irq move
> * @flags: chip specific flags
> */
> struct irq_chip {
> @@ -537,6 +538,8 @@ struct irq_chip {
> int (*irq_nmi_setup)(struct irq_data *data);
> void (*irq_nmi_teardown)(struct irq_data *data);
>
> + void (*irq_force_complete_move)(struct irq_data *data);
> +
> unsigned long flags;
> };
>
> @@ -619,11 +622,9 @@ static inline void irq_move_irq(struct i
> __irq_move_irq(data);
> }
> void irq_move_masked_irq(struct irq_data *data);
> -void irq_force_complete_move(struct irq_desc *desc);
> #else
> static inline void irq_move_irq(struct irq_data *data) { }
> static inline void irq_move_masked_irq(struct irq_data *data) { }
> -static inline void irq_force_complete_move(struct irq_desc *desc) { }
> #endif
>
> extern int no_irq_affinity;
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_g
> return desc->pending_mask;
> }
> bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
> +void irq_force_complete_move(struct irq_desc *desc);
> #else /* CONFIG_GENERIC_PENDING_IRQ */
> static inline bool irq_can_move_pcntxt(struct irq_data *data)
> {
> @@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pendin
> {
> return false;
> }
> +static inline void irq_force_complete_move(struct irq_desc *desc) { }
> #endif /* !CONFIG_GENERIC_PENDING_IRQ */
>
> #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
> --- a/kernel/irq/migration.c
> +++ b/kernel/irq/migration.c
> @@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_d
> return true;
> }
>
> +void irq_force_complete_move(struct irq_desc *desc)
> +{
> + for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
> + if (d->chip && d->chip->irq_force_complete_move) {
> + d->chip->irq_force_complete_move(d);
> + return;
> + }
> + }
> +}
> +
> void irq_move_masked_irq(struct irq_data *idata)
> {
> struct irq_desc *desc = irq_data_to_desc(idata);
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 05/11] genirq: Introduce irq_can_move_in_process_context()
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (3 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 04/11] genirq: Introduce common irq_force_complete_move() implementation Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 06/11] genirq: Remove unused GENERIC_PENDING_IRQ_CHIPFLAGS kconfig option Anup Patel
` (5 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
The interrupt controller drivers which use GENERIC_PENDING_IRQ
can move interrupts in process context for downstrean devices
which support atomic MSI configuration.
Introduce irq_can_move_in_process_context() which allows interrupt
controller drivers to test whether a particular interrupt can be
moved process context.
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
include/linux/irq.h | 2 ++
kernel/irq/migration.c | 11 +++++++++++
2 files changed, 13 insertions(+)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1884fa4ec9b5..5b67a47da144 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -617,6 +617,7 @@ extern int irq_affinity_online_cpu(unsigned int cpu);
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
+bool irq_can_move_in_process_context(struct irq_data *data);
void __irq_move_irq(struct irq_data *data);
static inline void irq_move_irq(struct irq_data *data)
{
@@ -626,6 +627,7 @@ static inline void irq_move_irq(struct irq_data *data)
void irq_move_masked_irq(struct irq_data *data);
void irq_force_complete_move(struct irq_desc *desc);
#else
+static inline bool irq_can_move_in_process_context(struct irq_data *data) { return true; }
static inline void irq_move_irq(struct irq_data *data) { }
static inline void irq_move_masked_irq(struct irq_data *data) { }
static inline void irq_force_complete_move(struct irq_desc *desc) { }
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 2920024475a3..428e42d3bc91 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -126,3 +126,14 @@ void __irq_move_irq(struct irq_data *idata)
if (!masked)
idata->chip->irq_unmask(idata);
}
+
+bool irq_can_move_in_process_context(struct irq_data *data)
+{
+ /*
+ * Get top level irq_data when CONFIG_IRQ_DOMAIN_HIERARCHY is enabled,
+ * and it should be optimized away when CONFIG_IRQ_DOMAIN_HIERARCHY is
+ * disabled. So we avoid an "#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY" here.
+ */
+ data = irq_desc_get_irq_data(irq_data_to_desc(data));
+ return irq_can_move_pcntxt(data);
+}
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 06/11] genirq: Remove unused GENERIC_PENDING_IRQ_CHIPFLAGS kconfig option
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (4 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 05/11] genirq: Introduce irq_can_move_in_process_context() Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 07/11] RISC-V: Select GENERIC_PENDING_IRQ Anup Patel
` (4 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
The GENERIC_PENDING_IRQ_CHIPFLAGS kconfig option is no used anywhere
hence remove it.
Fixes: f94a18249b7f ("genirq: Remove IRQ_MOVE_PCNTXT and related code")
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
kernel/irq/Kconfig | 4 ----
1 file changed, 4 deletions(-)
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 5432418c0fea..875f25ed6f71 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -31,10 +31,6 @@ config GENERIC_IRQ_EFFECTIVE_AFF_MASK
config GENERIC_PENDING_IRQ
bool
-# Deduce delayed migration from top-level interrupt chip flags
-config GENERIC_PENDING_IRQ_CHIPFLAGS
- bool
-
# Support for generic irq migrating off cpu before the cpu is offline.
config GENERIC_IRQ_MIGRATION
bool
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 07/11] RISC-V: Select GENERIC_PENDING_IRQ
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (5 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 06/11] genirq: Remove unused GENERIC_PENDING_IRQ_CHIPFLAGS kconfig option Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 08/11] irqchip/riscv-imsic: Separate next and previous pointers in IMSIC vector Anup Patel
` (3 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
Enable GENERIC_PENDING_IRQ for RISC-V so that RISC-V irqchips can
support delayed irq mirgration in the interrupt context.
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
arch/riscv/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 7612c52e9b1e..a32f39748775 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -111,6 +111,7 @@ config RISCV
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_LIB_DEVMEM_IS_ALLOWED
+ select GENERIC_PENDING_IRQ if SMP
select GENERIC_PCI_IOMAP
select GENERIC_PTDUMP if MMU
select GENERIC_SCHED_CLOCK
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 08/11] irqchip/riscv-imsic: Separate next and previous pointers in IMSIC vector
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (6 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 07/11] RISC-V: Select GENERIC_PENDING_IRQ Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 09/11] irqchip/riscv-imsic: Implement irq_force_complete_move() for IMSIC Anup Patel
` (2 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
Currently, there is only one "move" pointer in the struct imsic_vector
so during vector movement the old vector points to the new vector and
new vector points to itself.
To support force cleanup of old vector, add separate "move_next" and
"move_prev" pointers in the struct imsic_vector where during vector
movement the "move_next" pointer of the old vector points to the new
vector and the "move_prev" pointer of the new vector points to the
old vector.
Both "move_next" and "move_prev" pointers are cleared separately by
__imsic_local_sync() with a restriction that "move_prev" on the new
CPU is cleared only after old CPU has cleared "move_next".
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/irq-riscv-imsic-early.c | 8 ++-
drivers/irqchip/irq-riscv-imsic-state.c | 96 +++++++++++++++++--------
drivers/irqchip/irq-riscv-imsic-state.h | 7 +-
3 files changed, 78 insertions(+), 33 deletions(-)
diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c
index c5c2e6929a2f..b5def6268936 100644
--- a/drivers/irqchip/irq-riscv-imsic-early.c
+++ b/drivers/irqchip/irq-riscv-imsic-early.c
@@ -77,6 +77,12 @@ static void imsic_handle_irq(struct irq_desc *desc)
struct imsic_vector *vec;
unsigned long local_id;
+ /*
+ * Process pending local synchronization instead of waiting
+ * for per-CPU local timer to expire.
+ */
+ imsic_local_sync_all(false);
+
chained_irq_enter(chip, desc);
while ((local_id = csr_swap(CSR_TOPEI, 0))) {
@@ -120,7 +126,7 @@ static int imsic_starting_cpu(unsigned int cpu)
* Interrupts identities might have been enabled/disabled while
* this CPU was not running so sync-up local enable/disable state.
*/
- imsic_local_sync_all();
+ imsic_local_sync_all(true);
/* Enable local interrupt delivery */
imsic_local_delivery(true);
diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
index b97e6cd89ed7..96e994443fc7 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.c
+++ b/drivers/irqchip/irq-riscv-imsic-state.c
@@ -124,10 +124,11 @@ void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend,
}
}
-static void __imsic_local_sync(struct imsic_local_priv *lpriv)
+static bool __imsic_local_sync(struct imsic_local_priv *lpriv)
{
struct imsic_local_config *mlocal;
struct imsic_vector *vec, *mvec;
+ bool ret = true;
int i;
lockdep_assert_held(&lpriv->lock);
@@ -143,35 +144,75 @@ static void __imsic_local_sync(struct imsic_local_priv *lpriv)
__imsic_id_clear_enable(i);
/*
- * If the ID was being moved to a new ID on some other CPU
- * then we can get a MSI during the movement so check the
- * ID pending bit and re-trigger the new ID on other CPU
- * using MMIO write.
+ * Clear the pervious vector pointer of the new vector only
+ * after the movement is complete on the old CPU.
*/
- mvec = READ_ONCE(vec->move);
- WRITE_ONCE(vec->move, NULL);
- if (mvec && mvec != vec) {
+ mvec = READ_ONCE(vec->move_prev);
+ if (mvec) {
+ /*
+ * If the old vector has not been updated then
+ * try again in the next sync-up call.
+ */
+ if (READ_ONCE(mvec->move_next)) {
+ ret = false;
+ continue;
+ }
+
+ WRITE_ONCE(vec->move_prev, NULL);
+ }
+
+ /*
+ * If a vector was being moved to a new vector on some other
+ * CPU then we can get a MSI during the movement so check the
+ * ID pending bit and re-trigger the new ID on other CPU using
+ * MMIO write.
+ */
+ mvec = READ_ONCE(vec->move_next);
+ if (mvec) {
if (__imsic_id_read_clear_pending(i)) {
mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu);
writel_relaxed(mvec->local_id, mlocal->msi_va);
}
+ WRITE_ONCE(vec->move_next, NULL);
imsic_vector_free(&lpriv->vectors[i]);
}
skip:
bitmap_clear(lpriv->dirty_bitmap, i, 1);
}
+
+ return ret;
}
-void imsic_local_sync_all(void)
+#ifdef CONFIG_SMP
+static void __imsic_local_timer_start(struct imsic_local_priv *lpriv)
+{
+ lockdep_assert_held(&lpriv->lock);
+
+ if (!timer_pending(&lpriv->timer)) {
+ lpriv->timer.expires = jiffies + 1;
+ add_timer_on(&lpriv->timer, smp_processor_id());
+ }
+}
+#else
+static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv)
+{
+}
+#endif
+
+void imsic_local_sync_all(bool force_all)
{
struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
unsigned long flags;
raw_spin_lock_irqsave(&lpriv->lock, flags);
- bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1);
- __imsic_local_sync(lpriv);
+
+ if (force_all)
+ bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1);
+ if (!__imsic_local_sync(lpriv))
+ __imsic_local_timer_start(lpriv);
+
raw_spin_unlock_irqrestore(&lpriv->lock, flags);
}
@@ -190,12 +231,7 @@ void imsic_local_delivery(bool enable)
#ifdef CONFIG_SMP
static void imsic_local_timer_callback(struct timer_list *timer)
{
- struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
- unsigned long flags;
-
- raw_spin_lock_irqsave(&lpriv->lock, flags);
- __imsic_local_sync(lpriv);
- raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+ imsic_local_sync_all(false);
}
static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
@@ -216,14 +252,11 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu
*/
if (cpu_online(cpu)) {
if (cpu == smp_processor_id()) {
- __imsic_local_sync(lpriv);
- return;
+ if (__imsic_local_sync(lpriv))
+ return;
}
- if (!timer_pending(&lpriv->timer)) {
- lpriv->timer.expires = jiffies + 1;
- add_timer_on(&lpriv->timer, cpu);
- }
+ __imsic_local_timer_start(lpriv);
}
}
#else
@@ -278,8 +311,9 @@ void imsic_vector_unmask(struct imsic_vector *vec)
raw_spin_unlock(&lpriv->lock);
}
-static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsic_vector *vec,
- bool new_enable, struct imsic_vector *new_move)
+static bool imsic_vector_move_update(struct imsic_local_priv *lpriv,
+ struct imsic_vector *vec, bool is_old_vec,
+ bool new_enable, struct imsic_vector *move_vec)
{
unsigned long flags;
bool enabled;
@@ -289,7 +323,10 @@ static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsi
/* Update enable and move details */
enabled = READ_ONCE(vec->enable);
WRITE_ONCE(vec->enable, new_enable);
- WRITE_ONCE(vec->move, new_move);
+ if (is_old_vec)
+ WRITE_ONCE(vec->move_next, move_vec);
+ else
+ WRITE_ONCE(vec->move_prev, move_vec);
/* Mark the vector as dirty and synchronize */
bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
@@ -322,8 +359,8 @@ void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_ve
* interrupt on the old vector while device was being moved
* to the new vector.
*/
- enabled = imsic_vector_move_update(old_lpriv, old_vec, false, new_vec);
- imsic_vector_move_update(new_lpriv, new_vec, enabled, new_vec);
+ enabled = imsic_vector_move_update(old_lpriv, old_vec, true, false, new_vec);
+ imsic_vector_move_update(new_lpriv, new_vec, false, enabled, old_vec);
}
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
@@ -386,7 +423,8 @@ struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask
vec = &lpriv->vectors[local_id];
vec->hwirq = hwirq;
vec->enable = false;
- vec->move = NULL;
+ vec->move_next = NULL;
+ vec->move_prev = NULL;
return vec;
}
diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h
index 391e44280827..f02842b84ed5 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.h
+++ b/drivers/irqchip/irq-riscv-imsic-state.h
@@ -23,7 +23,8 @@ struct imsic_vector {
unsigned int hwirq;
/* Details accessed using local lock held */
bool enable;
- struct imsic_vector *move;
+ struct imsic_vector *move_next;
+ struct imsic_vector *move_prev;
};
struct imsic_local_priv {
@@ -74,7 +75,7 @@ static inline void __imsic_id_clear_enable(unsigned long id)
__imsic_eix_update(id, 1, false, false);
}
-void imsic_local_sync_all(void);
+void imsic_local_sync_all(bool force_all);
void imsic_local_delivery(bool enable);
void imsic_vector_mask(struct imsic_vector *vec);
@@ -87,7 +88,7 @@ static inline bool imsic_vector_isenabled(struct imsic_vector *vec)
static inline struct imsic_vector *imsic_vector_get_move(struct imsic_vector *vec)
{
- return READ_ONCE(vec->move);
+ return READ_ONCE(vec->move_prev);
}
void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec);
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 09/11] irqchip/riscv-imsic: Implement irq_force_complete_move() for IMSIC
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (7 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 08/11] irqchip/riscv-imsic: Separate next and previous pointers in IMSIC vector Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 10/11] irqchip/riscv-imsic: Replace hwirq with irq in the IMSIC vector Anup Patel
2025-02-05 15:59 ` [PATCH v4 11/11] irqchip/riscv-imsic: Special handling for non-atomic device MSI update Anup Patel
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
Implement irq_force_complete_move() for IMSIC driver so that in-flight
vector movements on a CPU can be cleaned-up when the CPU goes down.
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/irq-riscv-imsic-platform.c | 32 ++++++++++++++++++++++
drivers/irqchip/irq-riscv-imsic-state.c | 17 ++++++++++++
drivers/irqchip/irq-riscv-imsic-state.h | 1 +
3 files changed, 50 insertions(+)
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index 9a5e7b4541f6..b9e3f9030bdf 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -129,6 +129,37 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
return IRQ_SET_MASK_OK_DONE;
}
+
+static void imsic_irq_force_complete_move(struct irq_data *d)
+{
+ struct imsic_vector *mvec, *vec = irq_data_get_irq_chip_data(d);
+ unsigned int cpu = smp_processor_id();
+
+ if (WARN_ON(!vec))
+ return;
+
+ /* Do nothing if there is no in-flight move */
+ mvec = imsic_vector_get_move(vec);
+ if (!mvec)
+ return;
+
+ /* Do nothing if the old IMSIC vector does not belong to current CPU */
+ if (mvec->cpu != cpu)
+ return;
+
+ /*
+ * The best we can do is force cleanup the old IMSIC vector.
+ *
+ * The challenges over here are same as x86 vector domain so
+ * refer to the comments in irq_force_complete_move() function
+ * implemented at arch/x86/kernel/apic/vector.c.
+ */
+
+ /* Force cleanup in-flight move */
+ pr_info("IRQ fixup: irq %d move in progress, old vector cpu %d local_id %d\n",
+ d->irq, mvec->cpu, mvec->local_id);
+ imsic_vector_force_move_cleanup(vec);
+}
#endif
static struct irq_chip imsic_irq_base_chip = {
@@ -137,6 +168,7 @@ static struct irq_chip imsic_irq_base_chip = {
.irq_unmask = imsic_irq_unmask,
#ifdef CONFIG_SMP
.irq_set_affinity = imsic_irq_set_affinity,
+ .irq_force_complete_move = imsic_irq_force_complete_move,
#endif
.irq_retrigger = imsic_irq_retrigger,
.irq_compose_msi_msg = imsic_irq_compose_msg,
diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
index 96e994443fc7..5ec2b6bdffb2 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.c
+++ b/drivers/irqchip/irq-riscv-imsic-state.c
@@ -311,6 +311,23 @@ void imsic_vector_unmask(struct imsic_vector *vec)
raw_spin_unlock(&lpriv->lock);
}
+void imsic_vector_force_move_cleanup(struct imsic_vector *vec)
+{
+ struct imsic_local_priv *lpriv;
+ struct imsic_vector *mvec;
+ unsigned long flags;
+
+ lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
+ raw_spin_lock_irqsave(&lpriv->lock, flags);
+
+ mvec = READ_ONCE(vec->move_prev);
+ WRITE_ONCE(vec->move_prev, NULL);
+ if (mvec)
+ imsic_vector_free(mvec);
+
+ raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+}
+
static bool imsic_vector_move_update(struct imsic_local_priv *lpriv,
struct imsic_vector *vec, bool is_old_vec,
bool new_enable, struct imsic_vector *move_vec)
diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h
index f02842b84ed5..19dea0c77738 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.h
+++ b/drivers/irqchip/irq-riscv-imsic-state.h
@@ -91,6 +91,7 @@ static inline struct imsic_vector *imsic_vector_get_move(struct imsic_vector *ve
return READ_ONCE(vec->move_prev);
}
+void imsic_vector_force_move_cleanup(struct imsic_vector *vec);
void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec);
struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id);
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 10/11] irqchip/riscv-imsic: Replace hwirq with irq in the IMSIC vector
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (8 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 09/11] irqchip/riscv-imsic: Implement irq_force_complete_move() for IMSIC Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
2025-02-05 15:59 ` [PATCH v4 11/11] irqchip/riscv-imsic: Special handling for non-atomic device MSI update Anup Patel
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
Currently, the imsic_handle_irq() uses generic_handle_domain_irq() to
handle the irq which internally has an extra step of resolving hwirq
using domain. This extra step can be avoided by replacing hwirq with
irq in the IMSIC vector and directly calling generic_handle_irq().
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/irq-riscv-imsic-early.c | 6 ++----
drivers/irqchip/irq-riscv-imsic-platform.c | 2 +-
drivers/irqchip/irq-riscv-imsic-state.c | 8 ++++----
drivers/irqchip/irq-riscv-imsic-state.h | 4 ++--
4 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c
index b5def6268936..d2e8ed70d396 100644
--- a/drivers/irqchip/irq-riscv-imsic-early.c
+++ b/drivers/irqchip/irq-riscv-imsic-early.c
@@ -73,7 +73,7 @@ static int __init imsic_ipi_domain_init(void) { return 0; }
static void imsic_handle_irq(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- int err, cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct imsic_vector *vec;
unsigned long local_id;
@@ -103,9 +103,7 @@ static void imsic_handle_irq(struct irq_desc *desc)
continue;
}
- err = generic_handle_domain_irq(imsic->base_domain, vec->hwirq);
- if (unlikely(err))
- pr_warn_ratelimited("hwirq 0x%x mapping not found\n", vec->hwirq);
+ generic_handle_irq(vec->irq);
}
chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index b9e3f9030bdf..6bf5d63f614e 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -111,7 +111,7 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
return -EBUSY;
/* Get a new vector on the desired set of CPUs */
- new_vec = imsic_vector_alloc(old_vec->hwirq, mask_val);
+ new_vec = imsic_vector_alloc(old_vec->irq, mask_val);
if (!new_vec)
return -ENOSPC;
diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
index 5ec2b6bdffb2..d0148e48ab05 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.c
+++ b/drivers/irqchip/irq-riscv-imsic-state.c
@@ -422,7 +422,7 @@ struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int l
return &lpriv->vectors[local_id];
}
-struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask)
+struct imsic_vector *imsic_vector_alloc(unsigned int irq, const struct cpumask *mask)
{
struct imsic_vector *vec = NULL;
struct imsic_local_priv *lpriv;
@@ -438,7 +438,7 @@ struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask
lpriv = per_cpu_ptr(imsic->lpriv, cpu);
vec = &lpriv->vectors[local_id];
- vec->hwirq = hwirq;
+ vec->irq = irq;
vec->enable = false;
vec->move_next = NULL;
vec->move_prev = NULL;
@@ -451,7 +451,7 @@ void imsic_vector_free(struct imsic_vector *vec)
unsigned long flags;
raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
- vec->hwirq = UINT_MAX;
+ vec->irq = 0;
irq_matrix_free(imsic->matrix, vec->cpu, vec->local_id, false);
raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
}
@@ -510,7 +510,7 @@ static int __init imsic_local_init(void)
vec = &lpriv->vectors[i];
vec->cpu = cpu;
vec->local_id = i;
- vec->hwirq = UINT_MAX;
+ vec->irq = 0;
}
}
diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h
index 19dea0c77738..3202ffa4e849 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.h
+++ b/drivers/irqchip/irq-riscv-imsic-state.h
@@ -20,7 +20,7 @@ struct imsic_vector {
unsigned int cpu;
unsigned int local_id;
/* Details saved by driver in the vector */
- unsigned int hwirq;
+ unsigned int irq;
/* Details accessed using local lock held */
bool enable;
struct imsic_vector *move_next;
@@ -96,7 +96,7 @@ void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_ve
struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id);
-struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask);
+struct imsic_vector *imsic_vector_alloc(unsigned int irq, const struct cpumask *mask);
void imsic_vector_free(struct imsic_vector *vector);
void imsic_vector_debug_show(struct seq_file *m, struct imsic_vector *vec, int ind);
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 11/11] irqchip/riscv-imsic: Special handling for non-atomic device MSI update
2025-02-05 15:59 [PATCH v4 00/11] RISC-V IMSIC driver improvements Anup Patel
` (9 preceding siblings ...)
2025-02-05 15:59 ` [PATCH v4 10/11] irqchip/riscv-imsic: Replace hwirq with irq in the IMSIC vector Anup Patel
@ 2025-02-05 15:59 ` Anup Patel
10 siblings, 0 replies; 14+ messages in thread
From: Anup Patel @ 2025-02-05 15:59 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Marc Zyngier, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Andrew Lunn, Gregory Clement, Sebastian Hesselbarth,
Palmer Dabbelt, Paul Walmsley, Atish Patra, Andrew Jones,
Sunil V L, Anup Patel, linux-riscv, linux-arm-kernel,
linux-kernel, imx, Anup Patel
Device having non-atomic MSI update might see an intermediate
state when changing target IMSIC vector from one CPU to another.
To avoid losing interrupt to such intermediate state, do the
following (just like x86 APIC):
1) First write a temporary IMSIC vector to the device which
has MSI address same as the old IMSIC vector but with MSI
data matches the new IMSIC vector.
2) Next write the new IMSIC vector to the device.
Based on the above, the __imsic_local_sync() must check pending
status of both old MSI data and new MSI data on the old CPU. In
addition, the movement of IMSIC vector for non-atomic device MSI
update must be done in interrupt context using IRQCHIP_MOVE_DEFERRED.
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
drivers/irqchip/irq-riscv-imsic-platform.c | 73 +++++++++++++++++++++-
drivers/irqchip/irq-riscv-imsic-state.c | 31 +++++++--
2 files changed, 98 insertions(+), 6 deletions(-)
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index 6bf5d63f614e..828102c46f51 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -64,6 +64,11 @@ static int imsic_irq_retrigger(struct irq_data *d)
return 0;
}
+static void imsic_irq_ack(struct irq_data *d)
+{
+ irq_move_irq(d);
+}
+
static void imsic_irq_compose_vector_msg(struct imsic_vector *vec, struct msi_msg *msg)
{
phys_addr_t msi_addr;
@@ -97,6 +102,21 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
bool force)
{
struct imsic_vector *old_vec, *new_vec;
+ struct imsic_vector tmp_vec;
+
+ /*
+ * Requirements for the downstream irqdomains (or devices):
+ *
+ * 1) Downstream irqdomains (or devices) with atomic MSI update can
+ * happily do imsic_irq_set_affinity() in the process-context on
+ * any CPU so the irqchip of such irqdomains must not set the
+ * IRQCHIP_MOVE_DEFERRED flag.
+ *
+ * 2) Downstream irqdomains (or devices) with non-atomic MSI update
+ * must do imsic_irq_set_affinity() in the interrupt-context upon
+ * next interrupt so the irqchip of such irqdomains must set the
+ * IRQCHIP_MOVE_DEFERRED flag.
+ */
old_vec = irq_data_get_irq_chip_data(d);
if (WARN_ON(!old_vec))
@@ -115,6 +135,33 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
if (!new_vec)
return -ENOSPC;
+ /*
+ * Device having non-atomic MSI update might see an intermediate
+ * state when changing target IMSIC vector from one CPU to another.
+ *
+ * To avoid losing interrupt to such intermediate state, do the
+ * following (just like x86 APIC):
+ *
+ * 1) First write a temporary IMSIC vector to the device which
+ * has MSI address same as the old IMSIC vector but MSI data
+ * matches the new IMSIC vector.
+ *
+ * 2) Next write the new IMSIC vector to the device.
+ *
+ * Based on the above, the __imsic_local_sync() must check pending
+ * status of both old MSI data and new MSI data on the old CPU.
+ */
+
+ if (!irq_can_move_in_process_context(d) &&
+ new_vec->local_id != old_vec->local_id) {
+ /* Setup temporary vector */
+ tmp_vec.cpu = old_vec->cpu;
+ tmp_vec.local_id = new_vec->local_id;
+
+ /* Point device to the temporary vector */
+ imsic_msi_update_msg(irq_get_irq_data(d->irq), &tmp_vec);
+ }
+
/* Point device to the new vector */
imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec);
@@ -171,6 +218,7 @@ static struct irq_chip imsic_irq_base_chip = {
.irq_force_complete_move = imsic_irq_force_complete_move,
#endif
.irq_retrigger = imsic_irq_retrigger,
+ .irq_ack = imsic_irq_ack,
.irq_compose_msi_msg = imsic_irq_compose_msg,
.flags = IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
@@ -190,7 +238,7 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
return -ENOSPC;
irq_domain_set_info(domain, virq, virq, &imsic_irq_base_chip, vec,
- handle_simple_irq, NULL, NULL);
+ handle_edge_irq, NULL, NULL);
irq_set_noprobe(virq);
irq_set_affinity(virq, cpu_online_mask);
irq_data_update_effective_affinity(irq_get_irq_data(virq), cpumask_of(vec->cpu));
@@ -229,15 +277,36 @@ static const struct irq_domain_ops imsic_base_domain_ops = {
#endif
};
+static bool imsic_init_dev_msi_info(struct device *dev,
+ struct irq_domain *domain,
+ struct irq_domain *real_parent,
+ struct msi_domain_info *info)
+{
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
+
+ switch (info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ info->chip->flags |= IRQCHIP_MOVE_DEFERRED;
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
static const struct msi_parent_ops imsic_msi_parent_ops = {
.supported_flags = MSI_GENERIC_FLAGS_MASK |
MSI_FLAG_PCI_MSIX,
.required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
MSI_FLAG_USE_DEF_CHIP_OPS |
MSI_FLAG_PCI_MSI_MASK_PARENT,
+ .chip_flags = MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
- .init_dev_msi_info = msi_lib_init_dev_msi_info,
+ .init_dev_msi_info = imsic_init_dev_msi_info,
};
int imsic_irqdomain_init(void)
diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
index d0148e48ab05..3a2a381e4fa1 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.c
+++ b/drivers/irqchip/irq-riscv-imsic-state.c
@@ -126,8 +126,8 @@ void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend,
static bool __imsic_local_sync(struct imsic_local_priv *lpriv)
{
- struct imsic_local_config *mlocal;
- struct imsic_vector *vec, *mvec;
+ struct imsic_local_config *tlocal, *mlocal;
+ struct imsic_vector *vec, *tvec, *mvec;
bool ret = true;
int i;
@@ -169,13 +169,36 @@ static bool __imsic_local_sync(struct imsic_local_priv *lpriv)
*/
mvec = READ_ONCE(vec->move_next);
if (mvec) {
- if (__imsic_id_read_clear_pending(i)) {
+ /*
+ * Device having non-atomic MSI update might see an
+ * intermediate state so check both old ID and new ID
+ * for pending interrupts.
+ *
+ * For details, refer imsic_irq_set_affinity().
+ */
+
+ tvec = vec->local_id == mvec->local_id ?
+ NULL : &lpriv->vectors[mvec->local_id];
+ if (tvec &&
+ !irq_can_move_in_process_context(irq_get_irq_data(vec->irq)) &&
+ __imsic_id_read_clear_pending(tvec->local_id)) {
+ /* Retrigger temporary vector if it was already in-use */
+ if (READ_ONCE(tvec->enable)) {
+ tlocal = per_cpu_ptr(imsic->global.local, tvec->cpu);
+ writel_relaxed(tvec->local_id, tlocal->msi_va);
+ }
+
+ mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu);
+ writel_relaxed(mvec->local_id, mlocal->msi_va);
+ }
+
+ if (__imsic_id_read_clear_pending(vec->local_id)) {
mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu);
writel_relaxed(mvec->local_id, mlocal->msi_va);
}
WRITE_ONCE(vec->move_next, NULL);
- imsic_vector_free(&lpriv->vectors[i]);
+ imsic_vector_free(vec);
}
skip:
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread