NVIDIA GPU driver infrastructure
 help / color / mirror / Atom feed
From: "Eliot Courtney" <ecourtney@nvidia.com>
To: "Alexandre Courbot" <acourbot@nvidia.com>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Benno Lossin" <lossin@kernel.org>, "Gary Guo" <gary@garyguo.net>
Cc: <nova-gpu@lists.linux.dev>, <dri-devel@lists.freedesktop.org>,
	<linux-kernel@vger.kernel.org>, <rust-for-linux@vger.kernel.org>,
	"dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Subject: Re: [PATCH 1/4] gpu: nova-core: move GSP unload state to a pinned Gpu subobject
Date: Wed, 10 Jun 2026 12:52:06 +0900	[thread overview]
Message-ID: <DJ52BSFTRYBX.2ZR8NX8A5H4ML@nvidia.com> (raw)
In-Reply-To: <20260609-boot-vram-v1-1-d9382610507a@nvidia.com>

On Tue Jun 9, 2026 at 5:03 PM JST, Alexandre Courbot wrote:
> `Gpu` currently owns the state needed to unload the GSP directly. This
> means that `unload_bundle` has to be the last initialized field: once GSP
> boot succeeds, any later initialization failure would leave `Gpu`
> partially initialized, and its `PinnedDrop` implementation would not run.
>
> This prevents adding fallible `Gpu` fields that need to query the GSP
> after it has booted.
>
> Move the GSP state and unload bundle into a dedicated pinned
> `GspResources` object. Once that subobject has been initialized, its
> `PinnedDrop` implementation will run even if initialization of a later
> `Gpu` field fails, ensuring that the GSP unload sequence is executed.
>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
>  drivers/gpu/nova-core/gpu.rs | 86 +++++++++++++++++++++++++-------------------
>  1 file changed, 49 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index b3c91731db45..6b3e02c71dee 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -262,35 +262,59 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>      }
>  }
>  
> -/// Structure holding the resources required to operate the GPU.
> +/// Self-contained resources to operate and drop the GSP.
>  #[pin_data(PinnedDrop)]
> -pub(crate) struct Gpu<'gpu> {
> +struct GspResources<'gpu> {
>      /// Device owning the GPU.
>      device: &'gpu device::Device<device::Bound>,
> -    spec: Spec,
>      /// MMIO mapping of PCI BAR 0.
>      bar: Bar0<'gpu>,
> -    /// System memory page required for flushing all pending GPU-side memory writes done through
> -    /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
> -    sysmem_flush: SysmemFlush<'gpu>,
>      /// GSP falcon instance, used for GSP boot up and cleanup.
>      gsp_falcon: Falcon<GspFalcon>,
>      /// SEC2 falcon instance, used for GSP boot up and cleanup.
>      sec2_falcon: Falcon<Sec2Falcon>,
> -    /// GSP runtime data. Temporarily an empty placeholder.
> +    /// GSP runtime data.
>      #[pin]
>      gsp: Gsp,
>      /// GSP unload firmware bundle, if any.
>      unload_bundle: Option<gsp::UnloadBundle>,
>  }
>  
> +/// Structure holding the resources required to operate the GPU.
> +#[pin_data]
> +pub(crate) struct Gpu<'gpu> {
> +    spec: Spec,
> +    /// System memory page required for flushing all pending GPU-side memory writes done through
> +    /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
> +    sysmem_flush: SysmemFlush<'gpu>,

This means sysmem_flush is dropped before unload is run. Before this
patch, PinnedDrop runs the unload bundle before sysmem_flush's drop
actually runs. But with this code it'll drop sysmem_flush first, and
that isn't allowed according to the comment in fb.rs saying that it's
needed for falcon reset. What about sysmem flush into GspResources as
well? 

> +    /// GSP and its resources.
> +    #[pin]
> +    gsp_resources: GspResources<'gpu>,
> +}
> +
> +#[pinned_drop]
> +impl PinnedDrop for GspResources<'_> {
> +    fn drop(self: Pin<&mut Self>) {
> +        let this = self.project();
> +        let device = *this.device;
> +        let bar = *this.bar;
> +        let bundle = this.unload_bundle.take();
> +
> +        let _ = this
> +            .gsp
> +            .as_ref()
> +            .get_ref()
> +            .unload(device, bar, &*this.gsp_falcon, &*this.sec2_falcon, bundle)
> +            .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\n", e));
> +    }
> +}
> +
>  impl<'gpu> Gpu<'gpu> {
>      pub(crate) fn new(
>          pdev: &'gpu pci::Device<device::Core<'_>>,
>          bar: Bar0<'gpu>,
>      ) -> impl PinInit<Self, Error> + 'gpu {
>          try_pin_init!(Self {
> -            device: pdev.as_ref(),
>              spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
>                  dev_info!(pdev,"NVIDIA ({})\n", spec);
>              })?,
> @@ -310,38 +334,26 @@ pub(crate) fn new(
>  
>              sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
>  
> -            gsp_falcon: Falcon::new(
> -                pdev.as_ref(),
> -                spec.chipset,
> -            )
> -            .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
> +            gsp_resources <- try_pin_init!(GspResources {
> +                device: pdev.as_ref(),
>  
> -            sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
> +                bar,
>  
> -            gsp <- Gsp::new(pdev),
> +                gsp_falcon: Falcon::new(
> +                    pdev.as_ref(),
> +                    spec.chipset,
> +                )
> +                .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
>  
> -            // This member must be initialized last, so the `UnloadBundle` can never be dropped from
> -            // outside of the constructed `Gpu`, ensuring that the unload sequence is properly run
> -            // in case of failure.
> -            unload_bundle: gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?,
> -            bar,
> +                sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
> +
> +                gsp <- Gsp::new(pdev),
> +
> +                // This member must be initialized last, so the `UnloadBundle` can never be dropped
> +                // from outside of the constructed `GspResources`, ensuring that the unload sequence
> +                // is properly run in case of failure.
> +                unload_bundle: gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?,
> +            }),
>          })
>      }
>  }
> -
> -#[pinned_drop]
> -impl PinnedDrop for Gpu<'_> {
> -    fn drop(self: Pin<&mut Self>) {
> -        let this = self.project();
> -        let device = *this.device;
> -        let bar = *this.bar;
> -        let bundle = this.unload_bundle.take();
> -
> -        let _ = this
> -            .gsp
> -            .as_ref()
> -            .get_ref()
> -            .unload(device, bar, &*this.gsp_falcon, &*this.sec2_falcon, bundle)
> -            .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\n", e));
> -    }
> -}


  reply	other threads:[~2026-06-10  3:52 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-09  8:03 [PATCH 0/4] gpu: nova-core: obtain and display VRAM amount Alexandre Courbot
2026-06-09  8:03 ` [PATCH 1/4] gpu: nova-core: move GSP unload state to a pinned Gpu subobject Alexandre Courbot
2026-06-10  3:52   ` Eliot Courtney [this message]
2026-06-10 11:18     ` Alexandre Courbot
2026-06-10 12:30       ` Eliot Courtney
2026-06-10 10:14   ` Gary Guo
2026-06-10 11:21     ` Alexandre Courbot
2026-06-09  8:04 ` [PATCH 2/4] gpu: nova-core: move GPU static information acquisition to a GSP method Alexandre Courbot
2026-06-10  3:39   ` Eliot Courtney
2026-06-10 10:16   ` Gary Guo
2026-06-09  8:04 ` [PATCH 3/4] gpu: nova-core: gsp: Extract usable FB region from GSP Alexandre Courbot
2026-06-10  3:35   ` Eliot Courtney
2026-06-10 10:23   ` Gary Guo
2026-06-10 10:27     ` Gary Guo
2026-06-10 15:38       ` Timur Tabi
2026-06-09  8:04 ` [PATCH 4/4] gpu: nova-core: gsp: Expose total physical VRAM end from FB region info Alexandre Courbot
2026-06-10  3:37   ` Eliot Courtney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DJ52BSFTRYBX.2ZR8NX8A5H4ML@nvidia.com \
    --to=ecourtney@nvidia.com \
    --cc=acourbot@nvidia.com \
    --cc=airlied@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel-bounces@lists.freedesktop.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=gary@garyguo.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=nova-gpu@lists.linux.dev \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=simona@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox