From: "Alexandre Courbot" <acourbot@nvidia.com>
To: "Joel Fernandes" <joelagnelf@nvidia.com>,
"Alexandre Courbot" <acourbot@nvidia.com>
Cc: "Danilo Krummrich" <dakr@kernel.org>,
"Alice Ryhl" <aliceryhl@google.com>,
"Simona Vetter" <simona@ffwll.ch>,
"Bjorn Helgaas" <bhelgaas@google.com>,
"Krzysztof Wilczyński" <kwilczynski@kernel.org>,
"Miguel Ojeda" <ojeda@kernel.org>,
"Boqun Feng" <boqun.feng@gmail.com>,
"Gary Guo" <gary@garyguo.net>,
"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
"Benno Lossin" <lossin@kernel.org>,
"Andreas Hindborg" <a.hindborg@kernel.org>,
"Trevor Gross" <tmgross@umich.edu>,
"Alistair Popple" <apopple@nvidia.com>,
"Eliot Courtney" <ecourtney@nvidia.com>,
"nouveau@lists.freedesktop.org" <nouveau@lists.freedesktop.org>,
"dri-devel@lists.freedesktop.org"
<dri-devel@lists.freedesktop.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"linux-pci@vger.kernel.org" <linux-pci@vger.kernel.org>,
"rust-for-linux@vger.kernel.org" <rust-for-linux@vger.kernel.org>
Subject: Re: [PATCH 6/7] gpu: nova-core: send UNLOADING_GUEST_DRIVER GSP command GSP upon unloading
Date: Thu, 18 Dec 2025 22:27:57 +0900 [thread overview]
Message-ID: <DF1DLWE9OOR6.2P43ATQYNAU3A@nvidia.com> (raw)
In-Reply-To: <C890CCBB-76C0-4E70-A7B8-846E34DABECE@nvidia.com>
On Wed Dec 17, 2025 at 12:39 AM JST, Joel Fernandes wrote:
> Hi Alex,
>
> Just did a quick pass, will do a proper review later:
>
>> On Dec 16, 2025, at 12:13 AM, Alexandre Courbot <acourbot@nvidia.com> wrote:
>>
>> Currently, the GSP is left running after the driver is unbound. This is
>> not great for several reasons, notably that it can still access shared
>> memory areas that the kernel will now reclaim (especially problematic on
>> setups without an IOMMU).
>>
>> Fix this by sending the `UNLOADING_GUEST_DRIVER` GSP command when
>> unbinding. This stops the GSP and let us proceed with the rest of the
>> unbind sequence in the next patch.
>>
>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>> ---
>> drivers/gpu/nova-core/gpu.rs | 5 +++
>> drivers/gpu/nova-core/gsp/boot.rs | 42 +++++++++++++++++++++++
>> drivers/gpu/nova-core/gsp/commands.rs | 42 +++++++++++++++++++++++
>> drivers/gpu/nova-core/gsp/fw.rs | 4 +++
>> drivers/gpu/nova-core/gsp/fw/commands.rs | 27 +++++++++++++++
>> drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 8 +++++
>> 6 files changed, 128 insertions(+)
>>
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index ef6ceced350e..b94784f57b36 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -291,6 +291,9 @@ pub(crate) fn new<'a>(
>>
>> /// Called when the corresponding [`Device`](device::Device) is unbound.
>> ///
>> + /// Prepares the GPU for unbinding by shutting down the GSP and unregistering the sysmem flush
>> + /// memory page.
>> + ///
>> /// Note: This method must only be called from `Driver::unbind`.
>> pub(crate) fn unbind(self: Pin<&mut Self>, dev: &device::Device<device::Core>) {
>> let this = self.project();
>> @@ -299,6 +302,8 @@ pub(crate) fn unbind(self: Pin<&mut Self>, dev: &device::Device<device::Core>) {
>> return;
>> };
>>
>> + let _ = kernel::warn_on_err!(this.gsp.unload(dev, bar, this.gsp_falcon,));
>> +
>> this.sysmem_flush.unregister(bar);
>> }
>> }
>> diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
>> index ca7efdaa753b..e12e1d3fd53f 100644
>> --- a/drivers/gpu/nova-core/gsp/boot.rs
>> +++ b/drivers/gpu/nova-core/gsp/boot.rs
>> @@ -32,6 +32,7 @@
>> },
>> gpu::Chipset,
>> gsp::{
>> + cmdq::Cmdq,
>> commands,
>> sequencer::{
>> GspSequencer,
>> @@ -231,4 +232,45 @@ pub(crate) fn boot(
>>
>> Ok(())
>> }
>> +
>> + /// Shutdowns the GSP and wait until it is offline.
>> + fn shutdown_gsp(
>> + cmdq: &mut Cmdq,
>> + bar: &Bar0,
>> + gsp_falcon: &Falcon<Gsp>,
>> + suspend: bool,
>> + ) -> Result<()> {
>> + // Send command to shutdown GSP and wait for response.
>> + commands::unload_guest_driver(cmdq, bar, suspend)?;
>> +
>> + // Wait until GSP signals it is suspended.
>> + const LIBOS_INTERRUPT_PROCESSOR_SUSPENDED: u32 = 0x8000_0000;
>> + read_poll_timeout(
>> + || Ok(gsp_falcon.read_mailbox0(bar)),
>> + |&mb0| mb0 == LIBOS_INTERRUPT_PROCESSOR_SUSPENDED,
>> + Delta::ZERO,
>> + Delta::from_secs(5),
>> + )
>> + .map(|_| ())
>> + }
>> +
>> + /// Attempts to unload the GSP firmware.
>> + ///
>> + /// This stops all activity on the GSP.
>> + pub(crate) fn unload(
>> + self: Pin<&mut Self>,
>> + dev: &device::Device<device::Bound>,
>> + bar: &Bar0,
>> + gsp_falcon: &Falcon<Gsp>,
>> + ) -> Result {
>> + let this = self.project();
>> +
>> + /* Shutdown the GSP */
>> +
>> + Self::shutdown_gsp(this.cmdq, bar, gsp_falcon, false)
>> + .inspect_err(|e| dev_err!(dev, "unload guest driver failed: {:?}", e))?;
>> + dev_dbg!(dev, "GSP shut down\n");
>> +
>> + Ok(())
>> + }
>> }
>> diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
>> index 2050771f9b53..0bcfca8c7e42 100644
>> --- a/drivers/gpu/nova-core/gsp/commands.rs
>> +++ b/drivers/gpu/nova-core/gsp/commands.rs
>> @@ -225,3 +225,45 @@ pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result<GetGspStaticIn
>> }
>> }
>> }
>> +
>> +impl CommandToGsp for UnloadingGuestDriver {
>> + const FUNCTION: MsgFunction = MsgFunction::UnloadingGuestDriver;
>> + type Command = Self;
>> + type InitError = Infallible;
>> +
>> + fn init(&self) -> impl Init<Self::Command, Self::InitError> {
>> + *self
>> + }
>> +}
>> +
>> +pub(crate) struct UnloadingGuestDriverReply;
>> +
>> +impl MessageFromGsp for UnloadingGuestDriverReply {
>> + const FUNCTION: MsgFunction = MsgFunction::UnloadingGuestDriver;
>> + type InitError = Infallible;
>> + type Message = ();
>> +
>> + fn read(
>> + _msg: &Self::Message,
>> + _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>,
>> + ) -> Result<Self, Self::InitError> {
>> + Ok(UnloadingGuestDriverReply)
>> + }
>> +}
>> +
>> +/// Send the [`UnloadingGuestDriver`] command to the GSP and await the reply.
>> +pub(crate) fn unload_guest_driver(
>> + cmdq: &mut Cmdq,
>> + bar: &Bar0,
>> + suspend: bool,
>> +) -> Result<UnloadingGuestDriverReply> {
>> + cmdq.send_command(bar, UnloadingGuestDriver::new(suspend))?;
>> +
>> + loop {
>> + match cmdq.receive_msg::<UnloadingGuestDriverReply>(Delta::from_secs(5)) {
>> + Ok(resp) => return Ok(resp),
>> + Err(ERANGE) => continue,
>> + Err(e) => return Err(e),
>> + }
>
> This outer loop can go on infinitely, lets bound it?
>
> Either outer timeout or bounded number of tries. Bounded tries better since it has inner timeout.
Yes. And what we really want is a more generic way to do this, because
this pattern occurs with several commands so far (and more to come).
>
>> + }
>> +}
>> diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
>> index 09549f7db52d..228464fd47a0 100644
>> --- a/drivers/gpu/nova-core/gsp/fw.rs
>> +++ b/drivers/gpu/nova-core/gsp/fw.rs
>> @@ -209,6 +209,7 @@ pub(crate) enum MsgFunction {
>> GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU,
>> GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL,
>> GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO,
>> + UnloadingGuestDriver = bindings::NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER,
>>
>> // Event codes
>> GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE,
>> @@ -249,6 +250,9 @@ fn try_from(value: u32) -> Result<MsgFunction> {
>> }
>> bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl),
>> bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo),
>> + bindings::NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER => {
>> + Ok(MsgFunction::UnloadingGuestDriver)
>> + }
>> bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone),
>> bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => {
>> Ok(MsgFunction::GspRunCpuSequencer)
>> diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs
>> index 85465521de32..c7df4783ad21 100644
>> --- a/drivers/gpu/nova-core/gsp/fw/commands.rs
>> +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
>> @@ -125,3 +125,30 @@ unsafe impl AsBytes for GspStaticConfigInfo {}
>> // SAFETY: This struct only contains integer types for which all bit patterns
>> // are valid.
>> unsafe impl FromBytes for GspStaticConfigInfo {}
>> +
>> +/// Payload of the `UnloadingGuestDriver` command and message.
>> +#[repr(transparent)]
>> +#[derive(Clone, Copy, Debug, Zeroable)]
>> +pub(crate) struct UnloadingGuestDriver {
>> + inner: bindings::rpc_unloading_guest_driver_v1F_07,
>> +}
>> +
>> +impl UnloadingGuestDriver {
>> + pub(crate) fn new(suspend: bool) -> Self {
>> + Self {
>> + inner: bindings::rpc_unloading_guest_driver_v1F_07 {
>
> We should go through intermediate firmware representation than direct bindings access?
Only if the size of the bindings justifies it - here having an opaque
wrapper just just well, and spares us some code down the line as we
would have to initialize the bindings anyway.
>
>
>> + bInPMTransition: if suspend { 1 } else { 0 },
>
> Then this can just be passed as a bool.
>
>> + bGc6Entering: 0,
>> + newLevel: if suspend { 3 } else { 0 },
Note to self to figure out these magic numbers. :)
next prev parent reply other threads:[~2025-12-18 13:28 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-16 5:13 [PATCH 0/7] gpu: nova-core: run unload sequence upon unbinding Alexandre Courbot
2025-12-16 5:13 ` [PATCH 1/7] rust: pci: pass driver data by value to `unbind` Alexandre Courbot
2025-12-16 12:14 ` Danilo Krummrich
2025-12-16 14:38 ` Alexandre Courbot
2025-12-16 5:13 ` [PATCH 2/7] rust: add warn_on_err macro Alexandre Courbot
2025-12-16 5:13 ` [PATCH 3/7] gpu: nova-core: use " Alexandre Courbot
2025-12-16 5:13 ` [PATCH RFC 4/7] rust: pin-init: allow `dead_code` on projection structure Alexandre Courbot
2025-12-16 6:12 ` Benno Lossin
2025-12-16 5:13 ` [PATCH 5/7] gpu: nova-nova: use pin-init projections Alexandre Courbot
2025-12-16 5:13 ` [PATCH 6/7] gpu: nova-core: send UNLOADING_GUEST_DRIVER GSP command GSP upon unloading Alexandre Courbot
2025-12-16 15:39 ` Joel Fernandes
2025-12-18 13:27 ` Alexandre Courbot [this message]
2025-12-18 20:52 ` Joel Fernandes
2025-12-19 3:26 ` Alexandre Courbot
2025-12-19 6:42 ` Joel Fernandes
2025-12-18 22:33 ` Timur Tabi
2025-12-18 22:44 ` Joel Fernandes
2025-12-18 23:34 ` Timur Tabi
2025-12-19 1:46 ` Joel Fernandes
2025-12-19 1:48 ` Joel Fernandes
2025-12-19 3:39 ` Alexandre Courbot
2025-12-20 21:30 ` Timur Tabi
2026-01-14 14:02 ` Alexandre Courbot
2025-12-16 5:13 ` [PATCH 7/7] gpu: nova-core: run Booter Unloader and FWSEC-SB upon unbinding Alexandre Courbot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DF1DLWE9OOR6.2P43ATQYNAU3A@nvidia.com \
--to=acourbot@nvidia.com \
--cc=a.hindborg@kernel.org \
--cc=aliceryhl@google.com \
--cc=apopple@nvidia.com \
--cc=bhelgaas@google.com \
--cc=bjorn3_gh@protonmail.com \
--cc=boqun.feng@gmail.com \
--cc=dakr@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=ecourtney@nvidia.com \
--cc=gary@garyguo.net \
--cc=joelagnelf@nvidia.com \
--cc=kwilczynski@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=lossin@kernel.org \
--cc=nouveau@lists.freedesktop.org \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=simona@ffwll.ch \
--cc=tmgross@umich.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox