[PATCH 2/2] iommu/vt-d: Share DMAR fault IRQ to prevent vector exhaustion

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Jacob Pan <jacob.jun.pan@linux.intel.com>
To: sivanich@hpe.com, Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>,
	iommu@lists.linux.dev, "Lu Baolu" <baolu.lu@linux.intel.com>,
	Joerg Roedel <joro@8bytes.org>
Cc: "Tian, Kevin" <kevin.tian@intel.com>, Yi Liu <yi.l.liu@intel.com>,
	steve.wahl@hpe.com, russ.anderson@hpe.com,
	Peter Zijlstra <peterz@infradead.org>,
	"Will Deacon" <will@kernel.org>,
	"Robin Murphy" <robin.murphy@arm.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>
Subject: [PATCH 2/2] iommu/vt-d: Share DMAR fault IRQ to prevent vector exhaustion
Date: Wed,  3 Apr 2024 16:45:48 -0700	[thread overview]
Message-ID: <20240403234548.989061-2-jacob.jun.pan@linux.intel.com> (raw)
In-Reply-To: <20240403234548.989061-1-jacob.jun.pan@linux.intel.com>

DMAR fault interrupt is used for per-IOMMU unrecoverable fault reporting,
it occurs only if there is a kernel programming error or serious hardware
failure. In other words, they should never occur under normal circumstances.

However, we are permanently occupying IRQ vectors per DMAR unit. On a
dual-socket Saphire Rapids system, DMAR fault interrupts can consume 16
vectors on BSP, which can lead to vector exhaustion. The effort to spread
vectors to each socket only partially alleviates the problem.

This patch leverages the shared IRQ mechanism such that only a single IRQ
vector is consumed for all the DMAR units on a system. When any DMAR faults
occur, all DMAR handlers are called to check their own fault records.

After this patch /proc/interrupts will show the list of DMAR units that share
the fault interrupt, e.g.

24 DMAR-MSI   14-edge      dmar14, dmar13, dmar12, dmar11, dmar10, dmar9,
dmar8, dmar7, dmar6, dmar5, dmar4, dmar3, dmar2, dmar1, dmar0, dmar15

Link: https://lore.kernel.org/lkml/20240325115638.342716e5@jacob-builder/t/#mc08892e405456428773bcc3b0bbe8971886c5ab9

Reported-by: Dimitri Sivanich <sivanich@hpe.com>
Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
 drivers/iommu/intel/dmar.c  | 71 +++++++++++++++++++++++++++++--------
 drivers/iommu/intel/iommu.h |  1 +
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index ab325af93f71..cf68464b3404 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1182,7 +1182,6 @@ static void free_iommu(struct intel_iommu *iommu)
 			iommu->pr_irq = 0;
 		}
 		free_irq(iommu->fault_irq, iommu);
-		dmar_free_hwirq(iommu->fault_irq);
 		iommu->fault_irq = 0;
 	}
 
@@ -1956,9 +1955,8 @@ void dmar_msi_mask(struct irq_data *data)
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
-void dmar_msi_write(int irq, struct msi_msg *msg)
+static void dmar_msi_write_msg(struct intel_iommu *iommu, int irq, struct msi_msg *msg)
 {
-	struct intel_iommu *iommu = irq_get_handler_data(irq);
 	int reg = dmar_msi_reg(iommu, irq);
 	unsigned long flag;
 
@@ -1969,6 +1967,13 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
+void dmar_msi_write(int irq, struct msi_msg *msg)
+{
+	struct intel_iommu *iommu = irq_get_handler_data(irq);
+
+	dmar_msi_write_msg(iommu, irq, msg);
+}
+
 void dmar_msi_read(int irq, struct msi_msg *msg)
 {
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
@@ -2098,27 +2103,63 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static inline void dmar_fault_irq_unmask(struct intel_iommu *iommu)
+{
+	unsigned long flag;
+
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
+	writel(0, iommu->reg + DMAR_FECTL_REG);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
 int dmar_set_interrupt(struct intel_iommu *iommu)
 {
-	int irq, ret;
+	static int dmar_irq;
+	int ret;
 
-	/*
-	 * Check if the fault interrupt is already initialized.
-	 */
+	/* Don't initialize it twice for a given iommu */
 	if (iommu->fault_irq)
 		return 0;
+	/*
+	 * There is one shared interrupt for all IOMMUs to prevent vector
+	 * exhaustion.
+	 */
+	if (!dmar_irq) {
+		int irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
 
-	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
-	if (irq > 0) {
-		iommu->fault_irq = irq;
+		if (irq <= 0) {
+			pr_err("No free IRQ vectors\n");
+			return -EINVAL;
+		}
+		dmar_irq = irq;
+		iommu->fault_irq = dmar_irq;
+		iommu->flags |= VTD_FLAG_FAULT_IRQ_OWNER;
 	} else {
-		pr_err("No free IRQ vectors\n");
-		return -EINVAL;
-	}
+		struct msi_msg msg;
 
-	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
-	if (ret)
+		/*
+		 * Get the MSI message from the shared interrupt and write
+		 * it to the iommu MSI registers. Must assign fault_irq to get
+		 * the MSI register offset.
+		 */
+		iommu->fault_irq = dmar_irq;
+		dmar_msi_read(dmar_irq, &msg);
+		dmar_msi_write_msg(iommu, dmar_irq, &msg);
+	}
+	ret = request_irq(dmar_irq, dmar_fault, IRQF_NO_THREAD | IRQF_SHARED | IRQF_NOBALANCING, iommu->name, iommu);
+	if (ret) {
 		pr_err("Can't request irq\n");
+		return ret;
+	}
+
+	/*
+	 * Only the owner IOMMU of the shared IRQ has its fault event
+	 * interrupt unmasked after request_irq(), the rest are explicitly
+	 * unmasked.
+	 */
+	if (!(iommu->flags & VTD_FLAG_FAULT_IRQ_OWNER))
+		dmar_fault_irq_unmask(iommu);
+
 	return ret;
 }
 
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index deebd4817d27..128f6cdaebac 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -536,6 +536,7 @@ enum {
 #define VTD_FLAG_TRANS_PRE_ENABLED	(1 << 0)
 #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED	(1 << 1)
 #define VTD_FLAG_SVM_CAPABLE		(1 << 2)
+#define VTD_FLAG_FAULT_IRQ_OWNER	(1 << 3)
 
 #define sm_supported(iommu)	(intel_iommu_sm && ecap_smts((iommu)->ecap))
 #define pasid_supported(iommu)	(sm_supported(iommu) &&			\
-- 
2.25.1

next prev parent reply	other threads:[~2024-04-03 23:41 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-03 23:45 [PATCH 1/2] iommu/vt-d: Rename fault IRQ variable Jacob Pan
2024-04-03 23:45 ` Jacob Pan [this message]
2024-04-08  8:48   ` [PATCH 2/2] iommu/vt-d: Share DMAR fault IRQ to prevent vector exhaustion Tian, Kevin
2024-04-08 16:05     ` Jacob Pan
2024-04-08 17:38       ` Jacob Pan
2024-04-09  7:07         ` Tian, Kevin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ab325af93f7 dfblob:cf68464b340 dfblob:deebd4817d2
dfblob:128f6cdaeba )
 OR (
bs:"[PATCH 2/2] iommu/vt-d: Share DMAR fault IRQ to prevent vector exhaustion" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240403234548.989061-2-jacob.jun.pan@linux.intel.com \
    --to=jacob.jun.pan@linux.intel.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=robin.murphy@arm.com \
    --cc=russ.anderson@hpe.com \
    --cc=sivanich@hpe.com \
    --cc=steve.wahl@hpe.com \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox