From: Timur Tabi <ttabi@nvidia.com>
To: Gary Guo <gary@garyguo.net>, Danilo Krummrich <dakr@kernel.org>,
"Alexandre Courbot" <acourbot@nvidia.com>,
John Hubbard <jhubbard@nvidia.com>,
"Joel Fernandes" <joelagnelf@nvidia.com>,
<rust-for-linux@vger.kernel.org>, <nouveau@lists.freedesktop.org>
Subject: [PATCH v7 12/12] gpu: nova-core: add PIO support for loading firmware images
Date: Wed, 21 Jan 2026 17:53:02 -0600 [thread overview]
Message-ID: <20260121235302.1962185-13-ttabi@nvidia.com> (raw)
In-Reply-To: <20260121235302.1962185-1-ttabi@nvidia.com>
Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.
A new firmware called the Generic Bootloader (as opposed to the
GSP Bootloader) is used to upload FWSEC.
Signed-off-by: Timur Tabi <ttabi@nvidia.com>
---
drivers/gpu/nova-core/falcon.rs | 183 ++++++++++++++++++++++++
drivers/gpu/nova-core/firmware/fwsec.rs | 129 ++++++++++++++++-
drivers/gpu/nova-core/gsp/boot.rs | 6 +-
drivers/gpu/nova-core/regs.rs | 30 ++++
4 files changed, 344 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index d779fcda0e2a..ccb5390ae9c2 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -18,11 +18,17 @@
time::{
Delta, //
},
+ transmute::AsBytes, //
};
use crate::{
dma::DmaObject,
driver::Bar0,
+ falcon::hal::LoadMethod,
+ firmware::fwsec::{
+ BootloaderDmemDescV2,
+ GenericBootloader, //
+ },
gpu::Chipset,
num::{
FromSafeCast,
@@ -409,6 +415,170 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
Ok(())
}
+ /// Write a slice to Falcon memory using programmed I/O (PIO).
+ ///
+ /// Writes `img` to the specified `target_mem` (IMEM or DMEM) starting at `mem_base`.
+ /// For IMEM writes, tags are set for each 256-byte block starting from `start_tag`.
+ /// For DMEM, start_tag is ignored.
+ ///
+ /// Returns `EINVAL` if `img.len()` is not a multiple of 4.
+ fn pio_wr_slice(
+ &self,
+ bar: &Bar0,
+ img: &[u8],
+ mem_base: u16,
+ target_mem: FalconMem,
+ start_tag: u16,
+ ) -> Result {
+ // Rejecting misaligned images here allows us to avoid checking
+ // inside the loops.
+ if img.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ // NV_PFALCON_FALCON_IMEMC supports up to four ports,
+ // but we only ever use one, so just hard-code it.
+ const PORT: usize = 0;
+
+ match target_mem {
+ FalconMem::ImemSecure | FalconMem::ImemNonSecure => {
+ regs::NV_PFALCON_FALCON_IMEMC::default()
+ .set_secure(target_mem == FalconMem::ImemSecure)
+ .set_aincw(true)
+ .set_offs(mem_base)
+ .write(bar, &E::ID, PORT);
+
+ for (n, block) in img.chunks(256).enumerate() {
+ let n = u16::try_from(n)?;
+ let tag: u16 = start_tag.checked_add(n).ok_or(ERANGE)?;
+ regs::NV_PFALCON_FALCON_IMEMT::default()
+ .set_tag(tag)
+ .write(bar, &E::ID, PORT);
+ for word in block.chunks_exact(4) {
+ let w = [word[0], word[1], word[2], word[3]];
+ regs::NV_PFALCON_FALCON_IMEMD::default()
+ .set_data(u32::from_le_bytes(w))
+ .write(bar, &E::ID, PORT);
+ }
+ }
+ }
+ FalconMem::Dmem => {
+ regs::NV_PFALCON_FALCON_DMEMC::default()
+ .set_aincw(true)
+ .set_offs(mem_base)
+ .write(bar, &E::ID, PORT);
+
+ for word in img.chunks_exact(4) {
+ let w = [word[0], word[1], word[2], word[3]];
+ regs::NV_PFALCON_FALCON_DMEMD::default()
+ .set_data(u32::from_le_bytes(w))
+ .write(bar, &E::ID, PORT);
+ }
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Perform a PIO write of a firmware section to falcon memory.
+ ///
+ /// Extracts the data slice specified by `load_offsets` from `fw` and writes it to
+ /// `target_mem` using the given port and tag.
+ fn pio_wr<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ target_mem: FalconMem,
+ load_offsets: &FalconLoadTarget,
+ start_tag: u16,
+ ) -> Result {
+ let start = usize::from_safe_cast(load_offsets.src_start);
+ let len = usize::from_safe_cast(load_offsets.len);
+ let mem_base = u16::try_from(load_offsets.dst_start)?;
+
+ // SAFETY: we are the only user of the firmware image at this stage
+ let data = unsafe { fw.as_slice(start, len).map_err(|_| EINVAL)? };
+
+ self.pio_wr_slice(bar, data, mem_base, target_mem, start_tag)
+ }
+
+ /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
+ pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ gbl: Option<&GenericBootloader>,
+ ) -> Result {
+ let imem_sec = fw.imem_sec_load_params();
+ let imem_ns = fw.imem_ns_load_params().ok_or(EINVAL)?;
+ let dmem = fw.dmem_load_params();
+
+ regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
+ .set_allow_phys_no_ctx(true)
+ .write(bar, &E::ID);
+
+ regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID);
+
+ // If the Generic Bootloader was passed, then use it to boot FRTS
+ if let Some(gbl) = gbl {
+ let dst_start = u16::try_from(0x10000 - gbl.desc.code_size)?;
+ let data = &gbl.ucode[..usize::from_safe_cast(gbl.desc.code_size)];
+ let tag = u16::try_from(gbl.desc.start_tag)?;
+
+ self.pio_wr_slice(bar, data, dst_start, FalconMem::ImemNonSecure, tag)?;
+
+ // This structure tells the generic bootloader where to find the FWSEC
+ // image.
+ let dmem_desc = BootloaderDmemDescV2 {
+ reserved: [0; 4],
+ signature: [0; 4],
+ ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH
+ code_dma_base: fw.dma_handle(),
+ non_sec_code_off: imem_ns.dst_start,
+ non_sec_code_size: imem_ns.len,
+ sec_code_off: imem_sec.dst_start,
+ sec_code_size: imem_sec.len,
+ code_entry_point: 0,
+ data_dma_base: fw.dma_handle() + u64::from(dmem.src_start),
+ data_size: dmem.len,
+ argc: 0,
+ argv: 0,
+ };
+
+ regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| {
+ v.set_target(FalconFbifTarget::CoherentSysmem)
+ .set_mem_type(FalconFbifMemType::Physical)
+ });
+
+ self.pio_wr_slice(bar, dmem_desc.as_bytes(), 0, FalconMem::Dmem, 0)?;
+ } else {
+ self.pio_wr(
+ bar,
+ fw,
+ FalconMem::ImemNonSecure,
+ &imem_ns,
+ u16::try_from(imem_ns.dst_start >> 8)?,
+ )?;
+ self.pio_wr(
+ bar,
+ fw,
+ FalconMem::ImemSecure,
+ &imem_sec,
+ u16::try_from(imem_sec.dst_start >> 8)?,
+ )?;
+ self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0)?;
+ }
+
+ self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+ // Set `BootVec` to start of non-secure code.
+ regs::NV_PFALCON_FALCON_BOOTVEC::default()
+ .set_value(fw.boot_addr())
+ .write(bar, &E::ID);
+
+ Ok(())
+ }
+
/// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's
/// `target_mem`.
///
@@ -637,6 +807,19 @@ pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool {
self.hal.is_riscv_active(bar)
}
+ // Load a firmware image into Falcon memory
+ pub(crate) fn load<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ gbl: Option<&GenericBootloader>,
+ ) -> Result {
+ match self.hal.load_method() {
+ LoadMethod::Pio => self.pio_load(bar, fw, gbl),
+ LoadMethod::Dma => self.dma_load(bar, fw),
+ }
+ }
+
/// Write the application version to the OS register.
pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) {
regs::NV_PFALCON_FALCON_OS::default()
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs
index 89dc4526041b..762674ca5087 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -40,12 +40,15 @@
FalconLoadTarget, //
},
firmware::{
+ BinHdr,
FalconUCodeDesc,
FirmwareDmaObject,
FirmwareSignature,
Signed,
Unsigned, //
+ FIRMWARE_VERSION,
},
+ gpu::Chipset,
num::{
FromSafeCast,
IntoSafeCast, //
@@ -213,6 +216,68 @@ unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>(
T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL)
}
+/// Descriptor used by RM to figure out the requirements of the boot loader.
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDesc {
+ /// Starting tag of bootloader.
+ pub start_tag: u32,
+ /// DMEM offset where [`BootloaderDmemDescV2`] is to be loaded.
+ pub dmem_load_off: u32,
+ /// Offset of code section in the image.
+ pub code_off: u32,
+ /// Size of code section in the image.
+ pub code_size: u32,
+ /// Offset of data section in the image.
+ pub data_off: u32,
+ /// Size of data section in the image.
+ pub data_size: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl FromBytes for BootloaderDesc {}
+
+/// Structure used by the boot-loader to load the rest of the code.
+///
+/// This has to be filled by the GPU driver and copied into DMEM at offset
+/// [`BootloaderDesc.dmem_load_off`].
+#[repr(C, packed)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDmemDescV2 {
+ /// Reserved, should always be first element.
+ pub reserved: [u32; 4],
+ /// 16B signature for secure code, 0s if no secure code.
+ pub signature: [u32; 4],
+ /// DMA context used by the bootloader while loading code/data.
+ pub ctx_dma: u32,
+ /// 256B-aligned physical FB address where code is located.
+ pub code_dma_base: u64,
+ /// Offset from `code_dma_base` where the non-secure code is located (must be multiple of 256).
+ pub non_sec_code_off: u32,
+ /// Size of the non-secure code part.
+ pub non_sec_code_size: u32,
+ /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256).
+ pub sec_code_off: u32,
+ /// Size of the secure code part.
+ pub sec_code_size: u32,
+ /// Code entry point invoked by the bootloader after code is loaded.
+ pub code_entry_point: u32,
+ /// 256B-aligned physical FB address where data is located.
+ pub data_dma_base: u64,
+ /// Size of data block (should be multiple of 256B).
+ pub data_size: u32,
+ /// Arguments to be passed to the target firmware being loaded.
+ pub argc: u32,
+ /// Number of arguments to be passed to the target firmware being loaded.
+ pub argv: u32,
+}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
+unsafe impl AsBytes for BootloaderDmemDescV2 {}
+
+pub(crate) struct GenericBootloader {
+ pub desc: BootloaderDesc,
+ pub ucode: KVec<u8>,
+}
+
/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon.
///
/// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow.
@@ -221,6 +286,8 @@ pub(crate) struct FwsecFirmware {
desc: FalconUCodeDesc,
/// GPU-accessible DMA object containing the firmware.
ucode: FirmwareDmaObject<Self, Signed>,
+ /// Generic bootloader
+ gen_bootloader: Option<GenericBootloader>,
}
impl FalconLoadParams for FwsecFirmware {
@@ -245,7 +312,19 @@ fn brom_params(&self) -> FalconBromParams {
}
fn boot_addr(&self) -> u32 {
- 0
+ match &self.desc {
+ FalconUCodeDesc::V2(_v2) => {
+ // On V2 platforms, the boot address is extracted from the
+ // generic bootloader, because the gbl is what actually copies
+ // FWSEC into memory, so that is what needs to be booted.
+ if let Some(ref gbl) = self.gen_bootloader {
+ gbl.desc.start_tag << 8
+ } else {
+ 0
+ }
+ }
+ FalconUCodeDesc::V3(_v3) => 0,
+ }
}
}
@@ -346,6 +425,7 @@ impl FwsecFirmware {
/// command.
pub(crate) fn new(
dev: &Device<device::Bound>,
+ chipset: Chipset,
falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
@@ -402,9 +482,54 @@ pub(crate) fn new(
ucode_dma.no_patch_signature()
};
+ // The Generic Bootloader exists only on Turing and GA100. To avoid a bogus
+ // console error message on other platforms, only try to load it if it's
+ // supposed to be there.
+ let gbl_fw = if chipset < Chipset::GA102 {
+ Some(super::request_firmware(
+ dev,
+ chipset,
+ "gen_bootloader",
+ FIRMWARE_VERSION,
+ )?)
+ } else {
+ None
+ };
+
+ let gbl = match gbl_fw {
+ Some(fw) => {
+ let hdr = fw
+ .data()
+ .get(0..size_of::<BinHdr>())
+ .and_then(BinHdr::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let desc_offset = usize::from_safe_cast(hdr.header_offset);
+ let desc = fw
+ .data()
+ .get(desc_offset..desc_offset + size_of::<BootloaderDesc>())
+ .and_then(BootloaderDesc::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let ucode_start = usize::from_safe_cast(hdr.data_offset);
+ let ucode_size = usize::from_safe_cast(hdr.data_size);
+ let ucode_data = fw
+ .data()
+ .get(ucode_start..ucode_start + ucode_size)
+ .ok_or(EINVAL)?;
+
+ let mut ucode = KVec::new();
+ ucode.extend_from_slice(ucode_data, GFP_KERNEL)?;
+
+ Some(GenericBootloader { desc, ucode })
+ }
+ None => None,
+ };
+
Ok(FwsecFirmware {
desc,
ucode: ucode_signed,
+ gen_bootloader: gbl,
})
}
@@ -420,7 +545,7 @@ pub(crate) fn run(
.reset(bar)
.inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?;
falcon
- .dma_load(bar, self)
+ .load(bar, self, self.gen_bootloader.as_ref())
.inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?;
let (mbox0, _) = falcon
.boot(bar, Some(0), None)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index 581b412554dc..f253d5f12252 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -48,6 +48,7 @@ impl super::Gsp {
/// created the WPR2 region.
fn run_fwsec_frts(
dev: &device::Device<device::Bound>,
+ chipset: Chipset,
falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
@@ -65,6 +66,7 @@ fn run_fwsec_frts(
let fwsec_frts = FwsecFirmware::new(
dev,
+ chipset,
falcon,
bar,
bios,
@@ -144,7 +146,7 @@ pub(crate) fn boot(
let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
- Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+ Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?;
let booter_loader = BooterFirmware::new(
dev,
@@ -183,7 +185,7 @@ pub(crate) fn boot(
);
sec2_falcon.reset(bar)?;
- sec2_falcon.dma_load(bar, &booter_loader)?;
+ sec2_falcon.load(bar, &booter_loader, None)?;
let wpr_handle = wpr_meta.dma_handle();
let (mbox0, mbox1) = sec2_falcon.boot(
bar,
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index ea0d32f5396c..53f412f0ca32 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -364,6 +364,36 @@ pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self {
1:1 startcpu as bool;
});
+// IMEM access control register. Up to 4 ports are available for IMEM access.
+register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] {
+ 15:0 offs as u16, "IMEM block and word offset";
+ 24:24 aincw as bool, "Auto-increment on write";
+ 28:28 secure as bool, "Access secure IMEM";
+});
+
+// IMEM data register. Reading/writing this register accesses IMEM at the address
+// specified by the corresponding IMEMC register.
+register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] {
+ 31:0 data as u32;
+});
+
+// IMEM tag register. Used to set the tag for the current IMEM block.
+register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] {
+ 15:0 tag as u16;
+});
+
+// DMEM access control register. Up to 8 ports are available for DMEM access.
+register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] {
+ 15:0 offs as u16, "DMEM block and word offset";
+ 24:24 aincw as bool, "Auto-increment on write";
+});
+
+// DMEM data register. Reading/writing this register accesses DMEM at the address
+// specified by the corresponding DMEMC register.
+register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] {
+ 31:0 data as u32;
+});
+
// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon
// instance.
register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] {
--
2.52.0
next prev parent reply other threads:[~2026-01-21 23:53 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-21 23:52 [PATCH v7 00/12] gpu: nova-core: add Turing support Timur Tabi
2026-01-21 23:52 ` [PATCH v7 01/12] gpu: nova-core: rename Imem to ImemSecure Timur Tabi
2026-01-21 23:52 ` [PATCH v7 02/12] gpu: nova-core: add ImemNonSecure section infrastructure Timur Tabi
2026-01-21 23:52 ` [PATCH v7 03/12] gpu: nova-core: support header parsing on Turing/GA100 Timur Tabi
2026-01-21 23:52 ` [PATCH v7 04/12] gpu: nova-core: add support for Turing/GA100 fwsignature Timur Tabi
2026-01-21 23:52 ` [PATCH v7 05/12] gpu: nova-core: add NV_PFALCON_FALCON_DMATRFCMD::with_falcon_mem() Timur Tabi
2026-01-21 23:52 ` [PATCH v7 06/12] gpu: nova-core: move some functions into the HAL Timur Tabi
2026-01-21 23:52 ` [PATCH v7 07/12] gpu: nova-core: Add basic Turing HAL Timur Tabi
2026-01-21 23:52 ` [PATCH v7 08/12] gpu: nova-core: add NV_PFALCON_FALCON_ENGINE::reset_engine() Timur Tabi
2026-01-21 23:52 ` [PATCH v7 09/12] gpu: nova-core: add Falcon HAL method load_method() Timur Tabi
2026-01-21 23:53 ` [PATCH v7 10/12] gpu: nova-core: add FalconUCodeDescV2 support Timur Tabi
2026-01-22 1:45 ` Joel Fernandes
2026-01-22 1:49 ` Joel Fernandes
2026-01-21 23:53 ` [PATCH v7 11/12] gpu: nova-core: align LibosMemoryRegionInitArgument size to page size Timur Tabi
2026-01-21 23:53 ` Timur Tabi [this message]
2026-01-22 0:14 ` [PATCH v7 00/12] gpu: nova-core: add Turing support Joel Fernandes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260121235302.1962185-13-ttabi@nvidia.com \
--to=ttabi@nvidia.com \
--cc=acourbot@nvidia.com \
--cc=dakr@kernel.org \
--cc=gary@garyguo.net \
--cc=jhubbard@nvidia.com \
--cc=joelagnelf@nvidia.com \
--cc=nouveau@lists.freedesktop.org \
--cc=rust-for-linux@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox