From: Joel Fernandes <joelagnelf@nvidia.com>
To: linux-kernel@vger.kernel.org
Cc: "Danilo Krummrich" <dakr@kernel.org>,
"Alexandre Courbot" <acourbot@nvidia.com>,
"John Hubbard" <jhubbard@nvidia.com>,
"Alice Ryhl" <aliceryhl@google.com>,
"David Airlie" <airlied@gmail.com>,
"Simona Vetter" <simona@ffwll.ch>,
"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
"Maxime Ripard" <mripard@kernel.org>,
"Thomas Zimmermann" <tzimmermann@suse.de>,
"Miguel Ojeda" <ojeda@kernel.org>,
"Boqun Feng" <boqun@kernel.org>, "Gary Guo" <gary@garyguo.net>,
"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
"Benno Lossin" <lossin@kernel.org>,
"Andreas Hindborg" <a.hindborg@kernel.org>,
"Trevor Gross" <tmgross@umich.edu>,
"Jonathan Corbet" <corbet@lwn.net>,
"Shuah Khan" <skhan@linuxfoundation.org>,
nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org,
rust-for-linux@vger.kernel.org, linux-doc@vger.kernel.org,
"Joel Fernandes" <joelagnelf@nvidia.com>
Subject: [PATCH v1 6/7] gpu: nova-core: add CPU doorbell IRQ self-test
Date: Fri, 1 May 2026 16:58:24 -0400 [thread overview]
Message-ID: <20260501205825.73614-7-joelagnelf@nvidia.com> (raw)
In-Reply-To: <20260501205825.73614-1-joelagnelf@nvidia.com>
Add a CPU doorbell interrupt self-test that runs during probe, after GSP
boot. The test validates the full MSI interrupt path from GPU through
PCIe to the CPU interrupt handler.
Tested with qemu + GPU passthrough on GA102, with dmesg as follows:
NovaCore 0000:00:06.0: CPU doorbell self-test: PASS (irq_count=1)
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/Kconfig | 13 ++
drivers/gpu/nova-core/gpu.rs | 8 +
drivers/gpu/nova-core/irq.rs | 2 +
drivers/gpu/nova-core/irq/doorbell_test.rs | 203 +++++++++++++++++++++
drivers/gpu/nova-core/nova_core.rs | 2 +-
5 files changed, 227 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/nova-core/irq/doorbell_test.rs
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig
index d8456f8eaa05..e2c8a090c7ff 100644
--- a/drivers/gpu/nova-core/Kconfig
+++ b/drivers/gpu/nova-core/Kconfig
@@ -15,3 +15,16 @@ config NOVA_CORE
This driver is work in progress and may not be functional.
If M is selected, the module will be called nova_core.
+
+config NOVA_CORE_IRQ_SELFTEST
+ bool "Nova IRQ self-test during probe"
+ depends on NOVA_CORE
+ help
+ Enable the CPU doorbell IRQ self-test that runs during nova-core
+ probe. The test triggers vector 129 (CPU doorbell) and verifies
+ the interrupt is received through the INTR_CTRL interrupt tree.
+
+ This validates the full MSI interrupt path from GPU through PCIe
+ to the CPU interrupt handler.
+
+ If unsure, say N.
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 3b45bce6738b..f6e02007ef8f 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -305,6 +305,14 @@ pub(crate) fn new<'a>(
// Allocate a PCI interrupt vector.
_: {
let _irq_vector = irq::alloc_vector(pdev)?;
+
+ #[cfg(CONFIG_NOVA_CORE_IRQ_SELFTEST)]
+ irq::doorbell_test::run_selftest(
+ pdev,
+ &devres_bar,
+ spec.chipset,
+ _irq_vector,
+ )?;
},
bar: devres_bar,
diff --git a/drivers/gpu/nova-core/irq.rs b/drivers/gpu/nova-core/irq.rs
index 01ae638bf494..f4ed4593e795 100644
--- a/drivers/gpu/nova-core/irq.rs
+++ b/drivers/gpu/nova-core/irq.rs
@@ -10,6 +10,8 @@
prelude::*,
};
+#[cfg(CONFIG_NOVA_CORE_IRQ_SELFTEST)]
+pub(crate) mod doorbell_test;
mod intr_ctrl;
pub(crate) fn alloc_vector(pdev: &pci::Device<Bound>) -> Result<pci::IrqVector<'_>> {
diff --git a/drivers/gpu/nova-core/irq/doorbell_test.rs b/drivers/gpu/nova-core/irq/doorbell_test.rs
new file mode 100644
index 000000000000..fb4e039ac032
--- /dev/null
+++ b/drivers/gpu/nova-core/irq/doorbell_test.rs
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ device::{Bound, Device},
+ devres::Devres,
+ irq, pci,
+ prelude::*,
+ sync::{
+ atomic::{
+ Atomic,
+ Relaxed, //
+ },
+ Arc, Completion,
+ },
+ time,
+};
+
+use super::intr_ctrl::{
+ IntrCtrl,
+ Leaf,
+ LeafIndex, //
+};
+use crate::{
+ driver::Bar0,
+ gpu::Chipset, //
+};
+
+// The following are constant across all architectures.
+
+/// CPU doorbell vector.
+const DOORBELL_VECTOR: u32 = 129;
+
+/// Leaf index for the doorbell vector: 129 / 32 = 4.
+const DOORBELL_LEAF: usize = 4;
+
+/// Bit within the leaf: 129 % 32 = 1.
+const DOORBELL_BIT: u32 = 1 << 1;
+
+/// IRQ handler for the CPU doorbell self-test.
+///
+/// Performs a minimal interrupt-tree drain cycle:
+/// unarm -> read TOP -> iterate leaves -> ack -> rearm.
+/// Signals completion and increments the interrupt counter on each handled interrupt.
+/// Records the leaf index and pending mask observed by the handler for verification.
+#[pin_data]
+struct DoorbellTestHandler {
+ bar: Arc<Devres<Bar0>>,
+ intr_ctrl: IntrCtrl,
+ #[pin]
+ completion: Completion,
+ /// Used to confirm the number of interrupts handled.
+ irq_count: Atomic<u32>,
+ /// Used to confirm the mask observed on the doorbell leaf (leaf 4).
+ doorbell_leaf_mask: Atomic<u32>,
+}
+
+impl irq::Handler for DoorbellTestHandler {
+ fn handle(&self, dev: &Device<Bound>) -> irq::IrqReturn {
+ let Ok(bar) = self.bar.access(dev) else {
+ return irq::IrqReturn::None;
+ };
+
+ let top = self.intr_ctrl.top().unarm(bar).read_pending(bar);
+
+ if top.mask() == 0 {
+ top.rearm(bar);
+ return irq::IrqReturn::None;
+ }
+
+ // Record the doorbell leaf mask for later verification.
+ let doorbell_leaf = Leaf::from_index(LeafIndex::new::<DOORBELL_LEAF>());
+
+ for subtree in top.iter_subtrees() {
+ for leaf in subtree.iter_pending_leaves(&self.intr_ctrl, bar) {
+ if leaf == doorbell_leaf {
+ self.doorbell_leaf_mask.store(leaf.mask(), Relaxed);
+ }
+ leaf.ack(bar);
+ }
+ }
+
+ top.rearm(bar);
+
+ // Increment the interrupt counter and signal the completion.
+ self.irq_count.fetch_add(1, Relaxed);
+ self.completion.complete_all();
+
+ irq::IrqReturn::Handled
+ }
+}
+
+/// Run the CPU doorbell IRQ self-test.
+///
+/// Registers an IRQ handler, triggers CPU doorbell vector, and verifies the
+/// interrupt is received through the interrupt tree. This validates the full MSI path:
+/// GPU -> PCIe -> CPU -> handler.
+pub(crate) fn run_selftest(
+ pdev: &pci::Device<Bound>,
+ bar_devres: &Arc<Devres<Bar0>>,
+ chipset: Chipset,
+ irq_vector: pci::IrqVector<'_>,
+) -> Result {
+ let bar = bar_devres.access(pdev.as_ref())?;
+ let intr_ctrl = IntrCtrl::new(chipset);
+
+ // Clear stale pending bits before enabling the doorbell.
+ intr_ctrl.drain(bar);
+
+ let handler_init = try_pin_init!(DoorbellTestHandler {
+ bar: bar_devres.clone(),
+ intr_ctrl,
+ completion <- Completion::new(),
+ irq_count: Atomic::new(0),
+ doorbell_leaf_mask: Atomic::new(0),
+ }? Error);
+
+ let reg = Arc::pin_init(
+ pdev.request_irq(
+ irq_vector,
+ irq::Flags::TRIGGER_NONE,
+ c"nova-core",
+ handler_init,
+ ),
+ GFP_KERNEL,
+ )?;
+
+ let handler = reg.handler();
+
+ // Allow doorbell leaf.
+ let doorbell_leaf_idx = LeafIndex::new::<DOORBELL_LEAF>();
+ handler
+ .intr_ctrl
+ .leaf(doorbell_leaf_idx)
+ .allow(bar, DOORBELL_BIT);
+
+ // The doorbell bit must be clear before triggering, otherwise the test
+ // cannot prove that the IRQ came from the trigger below.
+ let pre_mask = handler
+ .intr_ctrl
+ .leaf(doorbell_leaf_idx)
+ .read_pending(bar)
+ .mask();
+ if pre_mask & DOORBELL_BIT != 0 {
+ handler
+ .intr_ctrl
+ .leaf(doorbell_leaf_idx)
+ .block(bar, DOORBELL_BIT);
+ let _ = handler.intr_ctrl.top().unarm(bar);
+ dev_warn!(
+ pdev.as_ref(),
+ "CPU doorbell self-test: FAIL (doorbell bit already pending, leaf[{}] mask={:#x})\n",
+ DOORBELL_LEAF,
+ pre_mask,
+ );
+ return Err(EIO);
+ }
+
+ // Arm the INTR_CTRL top level to enable MSI generation.
+ handler.intr_ctrl.top().arm(bar);
+
+ // Trigger the CPU doorbell interrupt.
+ handler.intr_ctrl.trigger(bar, DOORBELL_VECTOR);
+
+ // Wait up to 1 second for the interrupt handler to fire.
+ let completed = handler
+ .completion
+ .wait_for_completion_timeout(time::msecs_to_jiffies(1000));
+
+ let count = handler.irq_count.load(Relaxed);
+ let leaf_mask = handler.doorbell_leaf_mask.load(Relaxed);
+
+ // Block the doorbell leaf after the test.
+ handler
+ .intr_ctrl
+ .leaf(doorbell_leaf_idx)
+ .block(bar, DOORBELL_BIT);
+ let _ = handler.intr_ctrl.top().unarm(bar);
+
+ // Verify that the doorbell IRQ fired.
+ let doorbell_bit_seen = leaf_mask & DOORBELL_BIT != 0;
+ let pass = completed && count == 1 && doorbell_bit_seen;
+
+ if pass {
+ dev_info!(
+ pdev.as_ref(),
+ "CPU doorbell self-test: PASS (irq_count={}, leaf[{}] mask={:#x})\n",
+ count,
+ DOORBELL_LEAF,
+ leaf_mask,
+ );
+ } else {
+ dev_warn!(
+ pdev.as_ref(),
+ "CPU doorbell self-test: FAIL (completed={}, irq_count={}, leaf[{}] mask={:#x})\n",
+ completed,
+ count,
+ DOORBELL_LEAF,
+ leaf_mask,
+ );
+ }
+
+ Ok(())
+}
diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
index 6d0e4b2f53c7..5fce7068db03 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -19,7 +19,7 @@
mod firmware;
mod gpu;
mod gsp;
-#[expect(dead_code)]
+#[cfg_attr(not(CONFIG_NOVA_CORE_IRQ_SELFTEST), expect(dead_code))]
mod irq;
#[macro_use]
mod num;
--
2.34.1
next prev parent reply other threads:[~2026-05-01 20:58 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-01 20:58 [PATCH v1 0/7] gpu: nova-core: add INTR_CTRL interrupt controller and CPU doorbell self-test Joel Fernandes
2026-05-01 20:58 ` [PATCH v1 1/7] rust: sync: completion: add wait_for_completion_timeout() Joel Fernandes
2026-05-05 12:17 ` Miguel Ojeda
2026-05-05 20:19 ` Joel Fernandes
2026-05-01 20:58 ` [PATCH v1 2/7] gpu: nova-core: allocate PCI MSI vector during probe Joel Fernandes
2026-05-01 20:58 ` [PATCH v1 3/7] gpu: nova-core: add interrupt controller register definitions Joel Fernandes
2026-05-01 20:58 ` [PATCH v1 4/7] gpu: nova-core: add Architecture::is_pre_hopper() helper Joel Fernandes
2026-05-01 20:58 ` [PATCH v1 5/7] gpu: nova-core: add INTR_CTRL interrupt controller API Joel Fernandes
2026-05-01 20:58 ` Joel Fernandes [this message]
2026-05-01 20:58 ` [PATCH v1 7/7] gpu: nova-core: document INTR_CTRL interrupt tree Joel Fernandes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260501205825.73614-7-joelagnelf@nvidia.com \
--to=joelagnelf@nvidia.com \
--cc=a.hindborg@kernel.org \
--cc=acourbot@nvidia.com \
--cc=airlied@gmail.com \
--cc=aliceryhl@google.com \
--cc=bjorn3_gh@protonmail.com \
--cc=boqun@kernel.org \
--cc=corbet@lwn.net \
--cc=dakr@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=gary@garyguo.net \
--cc=jhubbard@nvidia.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lossin@kernel.org \
--cc=maarten.lankhorst@linux.intel.com \
--cc=mripard@kernel.org \
--cc=nova-gpu@lists.linux.dev \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=simona@ffwll.ch \
--cc=skhan@linuxfoundation.org \
--cc=tmgross@umich.edu \
--cc=tzimmermann@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox