From: Alexandre Courbot <acourbot@nvidia.com>
To: Danilo Krummrich <dakr@kernel.org>,
Alice Ryhl <aliceryhl@google.com>,
David Airlie <airlied@gmail.com>,
Simona Vetter <simona@ffwll.ch>, Gary Guo <gary@garyguo.net>,
John Hubbard <jhubbard@nvidia.com>,
Alistair Popple <apopple@nvidia.com>,
Timur Tabi <ttabi@nvidia.com>,
Eliot Courtney <ecourtney@nvidia.com>,
Zhi Wang <zhiw@nvidia.com>
Cc: nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org,
linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
Alexandre Courbot <acourbot@nvidia.com>
Subject: [PATCH v3 03/12] gpu: nova-core: gsp: replace BootUnloadGuard with local handlers
Date: Mon, 29 Jun 2026 21:31:46 +0900 [thread overview]
Message-ID: <20260629-nova-bootcontext-v3-3-26cb29ee8dee@nvidia.com> (raw)
In-Reply-To: <20260629-nova-bootcontext-v3-0-26cb29ee8dee@nvidia.com>
When adding the GSP unload capability, we introduced `BootUnloadGuard`
to automatically call `Gsp::unload` whenever an error occurred during
the boot process, in order to try to reset the GSP to a valid state.
This approach is not well-suited to the errors that may occur in HALs:
by definition, an error occurring in the HAL means that the GSP is not
booted; yet the first thing that `Gsp::unload` does is queue a shutdown
message to the GSP, which will inevitably result in a timeout when done
from a HAL.
Furthermore, `BootUnloadGuard` is problematic because it holds
additional references to the boot context, notably the `Falcon`s. These
extra references stand in the way of making some of the `Falcon`'s
methods mutable, since those methods would require exclusive access. As
this behavior is only needed in one place, introducing dedicated types
for it is distracting and unnecessary.
Thus, remove `BootUnloadGuard` and adopt a two-level error handling
strategy:
- HALs are free to handle their errors as they see fit (most likely, by
running their unload bundle if it is ready by the time of the error),
- `Gsp::boot` uses a `ScopeGuard` that runs `Gsp::unload`, since the
GSP should be up and running by the time `GspHal::boot` has returned.
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 67 +++-------------------------------
drivers/gpu/nova-core/gsp/hal.rs | 13 +++----
drivers/gpu/nova-core/gsp/hal/gh100.rs | 20 ++++------
drivers/gpu/nova-core/gsp/hal/tu102.rs | 23 +++++++-----
4 files changed, 33 insertions(+), 90 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index bb2000b7a78b..0d2213fb1569 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -30,66 +30,6 @@
},
};
-/// Arguments required to call [`Gsp::unload`](super::Gsp::unload).
-///
-/// Stored as their own type to avoid repeating a long and tedious list in [`BootUnloadGuard`].
-pub(super) struct BootUnloadArgs<'a> {
- gsp: &'a super::Gsp,
- dev: &'a device::Device<device::Bound>,
- bar: Bar0<'a>,
- gsp_falcon: &'a Falcon<Gsp>,
- sec2_falcon: &'a Falcon<Sec2>,
- unload_bundle: Option<super::UnloadBundle>,
-}
-
-/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a
-/// [`UnloadBundle`](super::UnloadBundle) when dropped.
-///
-/// Used to ensure the `UnloadBundle` is run during failure paths.
-pub(super) struct BootUnloadGuard<'a> {
- guard: ScopeGuard<BootUnloadArgs<'a>, fn(BootUnloadArgs<'a>)>,
-}
-
-impl<'a> BootUnloadGuard<'a> {
- /// Wraps `unload_bundle` into a guard that executes it when dropped.
- pub(super) fn new(
- gsp: &'a super::Gsp,
- dev: &'a device::Device<device::Bound>,
- bar: Bar0<'a>,
- gsp_falcon: &'a Falcon<Gsp>,
- sec2_falcon: &'a Falcon<Sec2>,
- unload_bundle: Option<super::UnloadBundle>,
- ) -> Self {
- Self {
- guard: ScopeGuard::new_with_data(
- BootUnloadArgs {
- gsp,
- dev,
- bar,
- gsp_falcon,
- sec2_falcon,
- unload_bundle,
- },
- |args| {
- let _ = super::Gsp::unload(
- args.gsp,
- args.dev,
- args.bar,
- args.gsp_falcon,
- args.sec2_falcon,
- args.unload_bundle,
- );
- },
- ),
- }
- }
-
- /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBundle) it contains.
- pub(super) fn dismiss(self) -> Option<super::UnloadBundle> {
- self.guard.dismiss().unload_bundle
- }
-}
-
impl super::Gsp {
/// Attempt to boot the GSP.
///
@@ -107,6 +47,7 @@ pub(crate) fn boot(
let bar = ctx.bar;
let chipset = ctx.chipset;
let gsp_falcon = ctx.gsp_falcon;
+ let sec2_falcon = ctx.sec2_falcon;
let dev = pdev.as_ref();
let hal = super::hal::gsp_hal(chipset);
@@ -118,7 +59,11 @@ pub(crate) fn boot(
let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
// Perform the chipset-specific boot sequence, and retrieve the unload bundle.
- let unload_guard = hal.boot(&self, &ctx, &fb_layout, &wpr_meta)?;
+ let unload_bundle = hal.boot(&self, &ctx, &fb_layout, &wpr_meta)?;
+
+ let unload_guard = ScopeGuard::new_with_data(unload_bundle, |unload_bundle| {
+ let _ = self.unload(dev, bar, gsp_falcon, sec2_falcon, unload_bundle);
+ });
gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);
diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs
index 51a277fe97bb..0d65a32f9949 100644
--- a/drivers/gpu/nova-core/gsp/hal.rs
+++ b/drivers/gpu/nova-core/gsp/hal.rs
@@ -24,7 +24,6 @@
Chipset, //
},
gsp::{
- boot::BootUnloadGuard,
Gsp,
GspBootContext,
GspFwWprMeta, //
@@ -51,15 +50,15 @@ fn run(
pub(super) trait GspHal: Send {
/// Performs the GSP boot process, loading and running the required firmwares as needed.
///
- /// Upon success, returns a guard that runs the GSP unload sequence if GSP boot does not
- /// complete.
- fn boot<'a>(
+ /// Upon success, returns the [`crate::gsp::UnloadBundle`] to use with [`Gsp::unload`], if one
+ /// could be created.
+ fn boot(
&self,
- gsp: &'a Gsp,
- ctx: &GspBootContext<'a>,
+ gsp: &Gsp,
+ ctx: &GspBootContext<'_>,
fb_layout: &FbLayout,
wpr_meta: &Coherent<GspFwWprMeta>,
- ) -> Result<BootUnloadGuard<'a>>;
+ ) -> Result<Option<crate::gsp::UnloadBundle>>;
/// Performs HAL-specific post-GSP boot tasks.
///
diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs
index 2187e11168b2..bd15a3067ffe 100644
--- a/drivers/gpu/nova-core/gsp/hal/gh100.rs
+++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs
@@ -23,7 +23,6 @@
Fsp, //
},
gsp::{
- boot::BootUnloadGuard,
hal::{
GspHal,
UnloadBundle, //
@@ -145,27 +144,22 @@ impl GspHal for Gh100 {
///
/// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP handles
/// the GSP boot internally - no manual GSP reset/boot is needed.
- fn boot<'a>(
+ fn boot(
&self,
- gsp: &'a Gsp,
- ctx: &GspBootContext<'a>,
+ gsp: &Gsp,
+ ctx: &GspBootContext<'_>,
fb_layout: &FbLayout,
wpr_meta: &Coherent<GspFwWprMeta>,
- ) -> Result<BootUnloadGuard<'a>> {
+ ) -> Result<Option<crate::gsp::UnloadBundle>> {
let dev = ctx.dev();
let bar = ctx.bar;
let chipset = ctx.chipset;
let gsp_falcon = ctx.gsp_falcon;
- let sec2_falcon = ctx.sec2_falcon;
let unload_bundle = crate::gsp::UnloadBundle(
KBox::new(FspUnloadBundle, GFP_KERNEL)? as KBox<dyn UnloadBundle>
);
- // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
- let unload_guard =
- BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, Some(unload_bundle));
-
let mut fsp = Fsp::wait_secure_boot(dev, bar, chipset)?;
let args = FmcBootArgs::new(
@@ -176,11 +170,13 @@ fn boot<'a>(
false,
)?;
- fsp.boot_fmc(dev, bar, fb_layout, &args)?;
+ // Keep the result as we want to wait for lockdown release even in case of error, to make
+ // sure `args` is not accessed by the GSP anymore.
+ let res = fsp.boot_fmc(dev, bar, fb_layout, &args);
wait_for_gsp_lockdown_release(dev, bar, gsp_falcon, args.boot_params_dma_handle())?;
- Ok(unload_guard)
+ res.map(|()| Some(unload_bundle))
}
}
diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs
index 6ed4ee268086..8511cc647596 100644
--- a/drivers/gpu/nova-core/gsp/hal/tu102.rs
+++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs
@@ -6,7 +6,8 @@
use kernel::{
device,
dma::Coherent,
- io::Io, //
+ io::Io,
+ types::ScopeGuard, //
};
use crate::{
@@ -32,7 +33,6 @@
},
gpu::Chipset,
gsp::{
- boot::BootUnloadGuard,
hal::{
GspHal,
UnloadBundle, //
@@ -264,13 +264,13 @@ fn run_fwsec_frts(
struct Tu102;
impl GspHal for Tu102 {
- fn boot<'a>(
+ fn boot(
&self,
- gsp: &'a Gsp,
- ctx: &GspBootContext<'a>,
+ gsp: &Gsp,
+ ctx: &GspBootContext<'_>,
fb_layout: &FbLayout,
wpr_meta: &Coherent<GspFwWprMeta>,
- ) -> Result<BootUnloadGuard<'a>> {
+ ) -> Result<Option<crate::gsp::UnloadBundle>> {
let dev = ctx.dev();
let bar = ctx.bar;
let chipset = ctx.chipset;
@@ -296,9 +296,12 @@ fn boot<'a>(
.ok()
.map(crate::gsp::UnloadBundle);
- // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
- let unload_guard =
- BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, unload_bundle);
+ // Run the unload bundle to try and recover the GSP if an error occurs.
+ let unload_guard = ScopeGuard::new_with_data(unload_bundle, |unload_bundle| {
+ if let Some(unload_bundle) = unload_bundle {
+ let _ = unload_bundle.0.run(dev, bar, gsp_falcon, sec2_falcon);
+ }
+ });
// FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100).
if !fb_layout.frts.is_empty() {
@@ -329,7 +332,7 @@ fn boot<'a>(
)?
.run(dev, bar, sec2_falcon, wpr_meta)?;
- Ok(unload_guard)
+ Ok(unload_guard.dismiss())
}
fn post_boot(&self, gsp: &Gsp, ctx: &GspBootContext<'_>, gsp_fw: &GspFirmware) -> Result {
--
2.54.0
next prev parent reply other threads:[~2026-06-29 12:32 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-29 12:31 [PATCH v3 00/12] gpu: nova-core: consolidate and streamline GSP boot process Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 01/12] gpu: nova-core: gsp: sequencer: use GspBootContext Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 02/12] gpu: nova-core: gsp: sequencer: do not store sequence into GspSequencer Alexandre Courbot
2026-06-29 12:31 ` Alexandre Courbot [this message]
2026-06-29 12:31 ` [PATCH v3 04/12] gpu: nova-core: gsp: pass GspBootContext to unload methods Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 05/12] gpu: nova-core: gsp: centralize missing unload bundle warnings Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 06/12] gpu: nova-core: gsp: fold TU102 unload bundle construction into HAL method Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 07/12] gpu: nova-core: gsp: turn FWSEC execution " Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 08/12] gpu: nova-core: gsp: make use of FWSEC bootloader a property of the TU102 HAL Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 09/12] gpu: nova-core: introduce GspBootMethod Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 10/12] gpu: nova-core: avoid repeated calls to pci::Device::as_ref Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 11/12] gpu: nova-core: gsp: pass GspBootContext mutably Alexandre Courbot
2026-06-29 12:31 ` [PATCH v3 12/12] gpu: nova-core: store Fsp instance in Gpu Alexandre Courbot
2026-06-29 13:05 ` [PATCH v3 00/12] gpu: nova-core: consolidate and streamline GSP boot process Alexandre Courbot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260629-nova-bootcontext-v3-3-26cb29ee8dee@nvidia.com \
--to=acourbot@nvidia.com \
--cc=airlied@gmail.com \
--cc=aliceryhl@google.com \
--cc=apopple@nvidia.com \
--cc=dakr@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=ecourtney@nvidia.com \
--cc=gary@garyguo.net \
--cc=jhubbard@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=nova-gpu@lists.linux.dev \
--cc=rust-for-linux@vger.kernel.org \
--cc=simona@ffwll.ch \
--cc=ttabi@nvidia.com \
--cc=zhiw@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox