public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: Florian Eckert <fe@dev.tdt.de>, Rob Herring <robh@kernel.org>,
	Krzysztof Kozlowski <krzk+dt@kernel.org>,
	Conor Dooley <conor+dt@kernel.org>
Cc: linux-kernel@vger.kernel.org, devicetree@vger.kernel.org,
	Florian Eckert <fe@dev.tdt.de>,
	Eckert.Florian@googlemail.com, ms@dev.tdt.de,
	Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Subject: Re: [PATCH 2/2] irqchip: Add Lightning Mountain irqchip support
Date: Fri, 20 Mar 2026 13:04:44 +0100	[thread overview]
Message-ID: <87v7eqk8pv.ffs@tglx> (raw)
In-Reply-To: <20260318-irq-intel-soc-msi-v1-2-0e8cdf844fa8@dev.tdt.de>

Florian!

On Wed, Mar 18 2026 at 14:10, Florian Eckert wrote:
> The Lightning Mountain (LGM) has a MSI irqchip connected to the x86 vector
> domain. This commit adds the driver for this IP core, which is available on

git grep 'This patch' Documentation/process.

It doesn't make it better when you replace patch with commit.

> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Based on arch/x86/kernel/apic/msi.c and kernel/irq/msi.c
> + * Copyright (c) 2019 Intel Corporation.
> + * Copyright (c) 2020-2022, MaxLinear, Inc.
> + * Copyright (c) 2026 TDT AG.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/irq.h>
> +#include <linux/irqchip.h>
> +#include <linux/irqdomain.h>
> +#include <linux/msi.h>
> +#include <linux/platform_device.h>
> +
> +#include <asm/irqdomain.h>
> +#include <asm/apic.h>
> +
> +#define MSI_MSGA(x)	((x) << 2)
> +#define MSI_MSGD(x)	(0x200 + ((x) << 2))
> +#define MSI_CTRL	0x400
> +#define MSI_CTRL_EN	BIT(0)
> +#define MSI_MSK_L	0x404
> +#define MSI_MSK_H	0x408
> +
> +#define NMI_MSI_47	47
> +#define NMI_MSI_49	49
> +#define NMI_MSI_62	62
> +#define NMI_MSI_63	63
> +
> +#define MAX_SOC_MSI_IRQ_PINS	64
> +
> +struct soc_msi_dev {
> +	struct device		*dev;
> +	void __iomem		*base;
> +	raw_spinlock_t		lock; /* protect register handling */

No tail comments please

> +};
> +
> +struct soc_nmi_msi {
> +	irq_hw_number_t irq;
> +	int		cpuid;

unsigned int. We won't have negative CPU IDs in the forseeable future.

> +};
> +
> +static const struct soc_nmi_msi nmi_msi[] = {
> +	{NMI_MSI_47, 3},
> +	{NMI_MSI_49, 2},
> +	{NMI_MSI_62, 1},
> +	{NMI_MSI_63, 0},
> +	{ },

No C89 initializers and get rid of the pointless trailing {} entry.

> +};
> +
> +static bool soc_nmi_msi(irq_hw_number_t hwirq)
> +{
> +	if (hwirq == NMI_MSI_47 || hwirq == NMI_MSI_49 ||
> +	    hwirq == NMI_MSI_62 || hwirq == NMI_MSI_63)
> +		return true;
> +
> +	return false;
> +}
> +
> +static u32 nmi_irq_to_cpuid(irq_hw_number_t hwirq)
> +{
> +	int i;
> +	unsigned int nr_pcpus = num_possible_cpus();

Variable declaration ordering. See

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

> +	for (i = 0; i < ARRAY_SIZE(nmi_msi); i++) {

Now you use ARRAY_SIZE() which makes the trailing entry even more wrong.

> +		if (nmi_msi[i].irq == hwirq) {
> +			if (nmi_msi[i].cpuid >= nr_pcpus) {
> +				WARN(1, "NMI on invalid CPU: cpu: %d\n",
> +				     nmi_msi[i].cpuid);

No line break required. You have 100 characters. But aside of that
this WARN() is bogus as the code path is well known already, no?
pr_warn() is sufficient.

> +				return -EINVAL;
> +			}
> +			return nmi_msi[i].cpuid;
> +		}
> +	}
> +
> +	WARN((i >= ARRAY_SIZE(nmi_msi)), "Should never come");

Ditto

> +	return -EINVAL;
> +}
> +
> +static inline void
> +soc_msi_update_bits(struct soc_msi_dev *mdev, u32 clr, u32 set, u32 ofs)

s/ofs/offs/

> +{
> +	writel((readl(mdev->base +  ofs) & ~clr) | set, mdev->base + ofs);
> +}
> +
> +static void soc_msi_enable(struct soc_msi_dev *mdev)
> +{
> +	soc_msi_update_bits(mdev, MSI_CTRL_EN, MSI_CTRL_EN, MSI_CTRL);
> +}
> +
> +static void soc_msi_write_msg(struct irq_data *d, struct msi_msg *msg)
> +{
> +	struct soc_msi_dev *mdev = irq_data_get_irq_chip_data(d);
> +	unsigned long flag;
> +
> +	raw_spin_lock_irqsave(&mdev->lock, flag);

  guard(raw_spinlock)(&mdev->lock);

There is no reason for irqsave as irq_write_msi_msg() is invoked with
the interrupt descriptor lock held, which implies interrupts are disabled.

> +	writel(msg->address_lo, mdev->base + MSI_MSGA(d->hwirq));
> +	writel(msg->data, mdev->base + MSI_MSGD(d->hwirq));

As the comment on top claims this was modelled after apic/msi.c I have
to ask the obvious question how this MSI incarnation is magically not
affected by the problem described in great length in msi_set_affinity().

> +static void nmi_msi_compose_msg(struct soc_msi_dev *mdev, irq_hw_number_t hwirq)

x86 lacks NMI support for irq domain based allocations....

> +{
> +	struct msi_msg msg = {0};
> +	unsigned long flag;
> +	u32 cpuid, destid;
> +	u32 off;
> +
> +	cpuid = nmi_irq_to_cpuid(hwirq);
> +	if (cpuid < 0)
> +		return;
> +
> +	destid = apic->calc_dest_apicid(cpuid);
> +	off = hwirq < 32 ? MSI_MSK_L : MSI_MSK_H;
> +
> +	msg.arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
> +	msg.arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
> +	msg.arch_addr_lo.redirect_hint = 0;
> +	msg.arch_addr_lo.destid_0_7 = destid & 0xFF;
> +
> +	msg.address_hi = X86_MSI_BASE_ADDRESS_HIGH;
> +
> +	/*
> +	 * On edge trigger, we don't care about assert level. Also,
> +	 * since delivery mode is NMI, no irq vector is needed.
> +	 */
> +	msg.arch_data.is_level = 0;
> +	msg.arch_data.delivery_mode = APIC_DELIVERY_MODE_NMI;
> +
> +	raw_spin_lock_irqsave(&mdev->lock, flag);
> +	writel(msg.address_lo, mdev->base + MSI_MSGA(hwirq));
> +	writel(msg.data, mdev->base + MSI_MSGD(hwirq));
> +	soc_msi_update_bits(mdev, BIT(hwirq % 32), 0, off);
> +	raw_spin_unlock_irqrestore(&mdev->lock, flag);

Writing the MSI message in compose_msg() smells more than fishy.

> +}
> +
> +
> +static int soc_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
> +				unsigned int nr_irqs, void *arg)
> +{
> +	struct msi_domain_info *msi_info = domain->host_data;
> +	struct irq_fwspec *fwspec = arg;
> +	struct irq_alloc_info info;
> +	irq_hw_number_t hwirq;
> +	unsigned int type;
> +	void *chip_data;
> +	int i, ret;
> +
> +	if (!msi_info)
> +		return -EINVAL;
> +
> +	chip_data = msi_info->chip_data;
> +
> +	ret = soc_msi_domain_xlate(domain, fwspec, &hwirq, &type);
> +	if (ret)
> +		return ret;
> +
> +	if (irq_find_mapping(domain, hwirq) > 0)
> +		return -EEXIST;
> +
> +	/*
> +	 * All NMI interrupts go to vector 2, no irq mapping needed.
> +	 * What we want is to configure hardware once, don't do anything else.
> +	 * 0 means it will continue to initialize other stuff in the irqdomain.
> +	 * We can just return other value after hw initialized. In this case,
> +	 * irqdomain will release all resources.

This comment is incomprehensible word salad. Aside of that ...

> +	 */
> +	if (soc_nmi_msi(hwirq)) {
> +		nmi_msi_compose_msg((struct soc_msi_dev *)chip_data, hwirq);
> +		return -EINVAL;

... this is a blatant violation of all layering rules known to mankind in
one go. Admittedly you get creativity points, but that's not making it
technically more correct.

> +static int soc_msi_domain_activate(struct irq_domain *domain,
> +				   struct irq_data *irq_data, bool early)
> +{
> +	struct msi_msg msg[2] = { [1] = { }, };
> +
> +	WARN_ON(irq_chip_compose_msi_msg(irq_data, msg));
> +	soc_msi_write_msg(irq_data, msg);
> +
> +	return 0;

That's just a copy of the corresponding kernel/irq/msi.c function.

> +}
> +
> +static void soc_msi_domain_deactivate(struct irq_domain *domain,
> +				      struct irq_data *irq_data)
> +{
> +	struct msi_msg msg[2];
> +
> +	memset(msg, 0, sizeof(msg));
> +	soc_msi_write_msg(irq_data, msg);

Ditto.

> +}
> +
> +static const struct irq_domain_ops soc_msi_domain_ops = {

Now it's obvious why you need them. You are using the wrong interrupt
domain type.

> +	.translate	= soc_msi_domain_xlate,
> +	.alloc		= soc_msi_domain_alloc,
> +	.free		= irq_domain_free_irqs_common,
> +	.activate	= soc_msi_domain_activate,
> +	.deactivate	= soc_msi_domain_deactivate,
> +};
> +
> +static int intel_soc_msi_probe(struct platform_device *pdev)
> +{
> +	struct device_node *node = pdev->dev.of_node;
> +	struct msi_domain_info *msi_info;
> +	struct irq_domain *domain;
> +	struct soc_msi_dev *mdev;
> +	struct resource *res;
> +
> +	mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_KERNEL);
> +	if (!mdev)
> +		return -ENOMEM;
> +
> +	msi_info = devm_kzalloc(&pdev->dev, sizeof(*msi_info), GFP_KERNEL);
> +	if (!msi_info)
> +		return -ENOMEM;
> +
> +	mdev->dev = &pdev->dev;
> +
> +	msi_info->flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS;
> +	msi_info->chip_data = mdev;

Q: Does this magically create a MSI domain?
A: No

Q: Why?
A: Because irq_domain_create_hierarchy() will never reach the MSI code
   that handles this.

> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!res)
> +		return -EINVAL;
> +
> +	mdev->base = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(mdev->base)) {
> +		dev_err(&pdev->dev, "failed to ioremap %pR\n", res);
> +		return PTR_ERR(mdev->base);
> +	}
> +
> +	domain = irq_domain_create_hierarchy(x86_vector_domain, 0,

So this is hardwired to the vector domain and does not allow the
interrupts to be remapped? Those SoCs have VT-x which implies interrupt
remapping support. But what do I know about the infinite wisdom of
hardware designers.

TBH, if they decided to hardwire it to the vector domain, then they are
begging for a cluebat treatment.


Let me summarize what I can crystal-ball out of your comprehensive
change log and the insane amount of comments in the code:

    1) The IP block converts 'wired' interrupts to MSI messages

    2) It needs to route four interrupts as NMI

Right?

#1 The implementation gets the MSI interrupt domain concept completely
   wrong

   X86 uses the MSI parent domain concept.

   [vector domain] -- [remap domain] -- [ device domain]

   The remap domain is optional, but both the vector domain and the
   remap domain act as MSI parent domains.

   So what you want to create for that chip is a MSI device domain and
   that domain needs to set the bus token to DOMAIN_BUS_WIRED_TO_MSI.

   See drivers/irqchip/irq-mbigen.c mbigen_create_device_domain() and
   related code as an example for a proper wired to MSI implementation.


#2 NMI routing

   There has been attempts to implement that before in a clean way. The
   patch set dried out, but the underlying changes for NMI support are
   still valid and Ricardo (CC'ed) is working on them again, IIRC. See:

      https://lore.kernel.org/lkml/20230301234753.28582-1-ricardo.neri-calderon@linux.intel.com/

Thanks,

        tglx

  parent reply	other threads:[~2026-03-20 12:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-18 13:10 [PATCH 0/2] Add MSI driver support for the Lightning Mountain SoC Florian Eckert
2026-03-18 13:10 ` [PATCH 1/2] dt-bindings: Add Lightning Mountain MSI interrupt controller bindings Florian Eckert
2026-03-18 14:46   ` Rob Herring (Arm)
2026-03-18 22:52   ` Rob Herring
2026-03-19 11:01   ` Krzysztof Kozlowski
2026-03-18 13:10 ` [PATCH 2/2] irqchip: Add Lightning Mountain irqchip support Florian Eckert
2026-03-19  9:03   ` kernel test robot
2026-03-19 10:41   ` kernel test robot
2026-03-19 11:44   ` kernel test robot
2026-03-20 12:04   ` Thomas Gleixner [this message]
2026-03-23 12:14     ` Florian Eckert
2026-03-23 12:28       ` Ricardo Neri
2026-03-23 21:15       ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87v7eqk8pv.ffs@tglx \
    --to=tglx@kernel.org \
    --cc=Eckert.Florian@googlemail.com \
    --cc=conor+dt@kernel.org \
    --cc=devicetree@vger.kernel.org \
    --cc=fe@dev.tdt.de \
    --cc=krzk+dt@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ms@dev.tdt.de \
    --cc=ricardo.neri-calderon@linux.intel.com \
    --cc=robh@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox