Linux Documentation
 help / color / mirror / Atom feed
* Re: [PATCH v10 03/21] gpu: nova-core: gsp: Expose total physical VRAM end from FB region info
From: Eliot Courtney @ 2026-04-02  5:37 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-4-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> Add `total_fb_end()` to `GspStaticConfigInfo` that computes the
> exclusive end address of the highest valid FB region covering both
> usable and GSP-reserved areas.
>
> This allows callers to know the full physical VRAM extent, not just
> the allocatable portion.
>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  drivers/gpu/nova-core/gsp/commands.rs    | 6 ++++++
>  drivers/gpu/nova-core/gsp/fw/commands.rs | 7 +++++++
>  2 files changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
> index 41742c1633c8..5e0649024637 100644
> --- a/drivers/gpu/nova-core/gsp/commands.rs
> +++ b/drivers/gpu/nova-core/gsp/commands.rs
> @@ -196,6 +196,9 @@ pub(crate) struct GetGspStaticInfoReply {
>      /// Usable FB (VRAM) region for driver memory allocation.
>      #[expect(dead_code)]
>      pub(crate) usable_fb_region: Range<u64>,
> +    /// End of VRAM.
> +    #[expect(dead_code)]
> +    pub(crate) total_fb_end: u64,
>  }
>  
>  impl MessageFromGsp for GetGspStaticInfoReply {
> @@ -209,9 +212,12 @@ fn read(
>      ) -> Result<Self, Self::InitError> {
>          let (base, size) = msg.first_usable_fb_region().ok_or(ENODEV)?;
>  
> +        let total_fb_end = msg.total_fb_end().ok_or(ENODEV)?;
> +
>          Ok(GetGspStaticInfoReply {
>              gpu_name: msg.gpu_name_str(),
>              usable_fb_region: base..base.saturating_add(size),
> +            total_fb_end,
>          })
>      }
>  }
> diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs
> index 9fffa74d03f9..46932d5c8c1d 100644
> --- a/drivers/gpu/nova-core/gsp/fw/commands.rs
> +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
> @@ -163,6 +163,13 @@ pub(crate) fn first_usable_fb_region(&self) -> Option<(u64, u64)> {
>              }
>          })
>      }
> +
> +    /// Compute the end of physical VRAM from all FB regions.
> +    pub(crate) fn total_fb_end(&self) -> Option<u64> {
> +        self.fb_regions()
> +            .map(|reg| reg.limit.saturating_add(1))

I think it would be better to used checked_add here.

> +            .max()
> +    }
>  }
>  
>  // SAFETY: Padding is explicit and will not contain uninitialized data.


^ permalink raw reply

* Re: [PATCH 5/5] types: Add standard __ob_trap and __ob_wrap scalar types
From: Peter Zijlstra @ 2026-04-02  5:38 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linus Torvalds, Justin Stitt, Miguel Ojeda, Nathan Chancellor,
	Andrew Morton, Andy Shevchenko, Arnd Bergmann, Mark Rutland,
	Matthew Wilcox (Oracle), Suren Baghdasaryan, Thomas Gleixner,
	Finn Thain, Geert Uytterhoeven, Thomas Weißschuh, llvm,
	Marco Elver, Jonathan Corbet, Nicolas Schier, Greg Kroah-Hartman,
	linux-kernel, kasan-dev, linux-hardening, linux-doc, linux-kbuild
In-Reply-To: <202604011328.D3821379@keescook>

On Wed, Apr 01, 2026 at 01:52:26PM -0700, Kees Cook wrote:

> (Though I would note that GCC does _not_ refuse the jump when there is a
> cleanup; it only see the other two uninitialized values.)

Yeah.. I know, but since we also build with clang, any such issue will
get discovered.

> So that makes it not totally broken, but it does make it a bit fragile.

Right.

> Another concern I have is dealing with older compilers and how to
> "hide" the label and its code. e.g. if I remove the "goto" from above:
> 
> ../drivers/misc/lkdtm/bugs.c:1060:1: warning: label 'weird' defined but not used [-Wunused-label]
>  1060 | weird:
>       | ^~~~~
> 
> Oddly, the unreachable code isn't a problem, so we could just wrap the
> label is some macro like:
> 
> #define force_label(x) if (0) goto x; x
> 
> force_label(weird):
>         pr_info("value: %lu\n", value);
>         pr_info("outcome: %zu\n", outcome);
> 

__maybe_unused also works on labels. Like:

__overflow: __maybe_unused
	dead-code-here;



^ permalink raw reply

* Re: [PATCH v10 12/21] gpu: nova-core: mm: Add unified page table entry wrapper enums
From: Eliot Courtney @ 2026-04-02  5:40 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-13-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> Add unified Pte, Pde, and DualPde wrapper enums that abstract over
> MMU v2 and v3 page table entry formats. These enums allow the page
> table walker and VMM to work with both MMU versions.
>
> Each unified type:
> - Takes MmuVersion parameter in constructors
> - Wraps both ver2 and ver3 variants
> - Delegates method calls to the appropriate variant
>
> This enables version-agnostic page table operations while keeping
> version-specific implementation details encapsulated in the ver2
> and ver3 modules.
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  drivers/gpu/nova-core/mm/pagetable.rs | 330 ++++++++++++++++++++++++++
>  1 file changed, 330 insertions(+)
>
> diff --git a/drivers/gpu/nova-core/mm/pagetable.rs b/drivers/gpu/nova-core/mm/pagetable.rs
> index 6e01a1af5222..909df37c3ee8 100644
> --- a/drivers/gpu/nova-core/mm/pagetable.rs
> +++ b/drivers/gpu/nova-core/mm/pagetable.rs
> @@ -12,6 +12,13 @@
>  pub(crate) mod ver3;
>  
>  use crate::gpu::Architecture;
> +use crate::mm::{
> +    pramin,
> +    Pfn,
> +    VirtualAddress,
> +    VramAddress, //
> +};
> +use kernel::prelude::*;
>  
>  /// Extracts the page table index at a given level from a virtual address.
>  pub(crate) trait VaLevelIndex {
> @@ -84,6 +91,96 @@ pub(crate) const fn as_index(&self) -> u64 {
>      }
>  }
>  
> +impl MmuVersion {
> +    /// Get the `PDE` levels (excluding PTE level) for page table walking.
> +    pub(crate) fn pde_levels(&self) -> &'static [PageTableLevel] {
> +        match self {
> +            Self::V2 => ver2::PDE_LEVELS,
> +            Self::V3 => ver3::PDE_LEVELS,
> +        }
> +    }
> +
> +    /// Get the PTE level for this MMU version.
> +    pub(crate) fn pte_level(&self) -> PageTableLevel {
> +        match self {
> +            Self::V2 => ver2::PTE_LEVEL,
> +            Self::V3 => ver3::PTE_LEVEL,
> +        }
> +    }
> +
> +    /// Get the dual PDE level (128-bit entries) for this MMU version.
> +    pub(crate) fn dual_pde_level(&self) -> PageTableLevel {
> +        match self {
> +            Self::V2 => ver2::DUAL_PDE_LEVEL,
> +            Self::V3 => ver3::DUAL_PDE_LEVEL,
> +        }
> +    }
> +
> +    /// Get the number of PDE levels for this MMU version.
> +    pub(crate) fn pde_level_count(&self) -> usize {
> +        self.pde_levels().len()
> +    }
> +
> +    /// Get the entry size in bytes for a given level.
> +    pub(crate) fn entry_size(&self, level: PageTableLevel) -> usize {
> +        if level == self.dual_pde_level() {
> +            16 // 128-bit dual PDE
> +        } else {
> +            8 // 64-bit PDE/PTE
> +        }
> +    }
> +
> +    /// Get the number of entries per page table page for a given level.
> +    pub(crate) fn entries_per_page(&self, level: PageTableLevel) -> usize {
> +        match self {
> +            Self::V2 => match level {
> +                // TODO: Calculate these values from the bitfield dynamically
> +                // instead of hardcoding them.
> +                PageTableLevel::Pdb => 4, // PD3 root: bits [48:47] = 2 bits
> +                PageTableLevel::L3 => 256, // PD0 dual: bits [28:21] = 8 bits
> +                _ => 512,                 // PD2, PD1, PT: 9 bits each
> +            },
> +            Self::V3 => match level {
> +                PageTableLevel::Pdb => 2,  // PDE4 root: bit [56] = 1 bit, 2 entries
> +                PageTableLevel::L4 => 256, // PDE0 dual: bits [28:21] = 8 bits
> +                _ => 512,                  // PDE3, PDE2, PDE1, PT: 9 bits each
> +            },
> +        }
> +    }
> +
> +    /// Extract the page table index at `level` from `va` for this MMU version.
> +    pub(crate) fn level_index(&self, va: VirtualAddress, level: u64) -> u64 {
> +        match self {
> +            Self::V2 => ver2::VirtualAddressV2::new(va).level_index(level),
> +            Self::V3 => ver3::VirtualAddressV3::new(va).level_index(level),
> +        }
> +    }
> +
> +    /// Compute upper bound on page table pages needed for `num_virt_pages`.
> +    ///
> +    /// Walks from PTE level up through PDE levels, accumulating the tree.
> +    pub(crate) fn pt_pages_upper_bound(&self, num_virt_pages: usize) -> usize {
> +        let mut total = 0;
> +
> +        // PTE pages at the leaf level.
> +        let pte_epp = self.entries_per_page(self.pte_level());
> +        let mut pages_at_level = num_virt_pages.div_ceil(pte_epp);
> +        total += pages_at_level;
> +
> +        // Walk PDE levels bottom-up (reverse of pde_levels()).
> +        for &level in self.pde_levels().iter().rev() {
> +            let epp = self.entries_per_page(level);
> +
> +            // How many pages at this level do we need to point to
> +            // the previous pages_at_level?
> +            pages_at_level = pages_at_level.div_ceil(epp);
> +            total += pages_at_level;
> +        }
> +
> +        total
> +    }
> +}
> +

We have a lot of matches on the MMU version here (and below in Pte, Pde,
DualPde). What about making MmuVersion into a trait (e.g. Mmu) with
associated types for Pte, Pde, DualPde which can implement traits
defining their common operations too? Then you can parameterise
Vmm/PtWalk on this type.

^ permalink raw reply

* RE: [PATCH v3 17/27] mtd: spi-nor: debugfs: Add locking support
From: Takahiro.Kuwano @ 2026-04-02  5:40 UTC (permalink / raw)
  To: miquel.raynal, pratyush, mwalle, richard, vigneshr, corbet
  Cc: tudor.ambarus, sean.anderson, thomas.petazzoni, STLin2, linux-mtd,
	linux-kernel, linux-doc
In-Reply-To: <20260317-winbond-v6-18-rc1-spi-nor-swp-v3-17-2ca9ea4e7b9b@bootlin.com>

Hi Miquel,

> The ioctl output may be counter intuitive in some cases. Asking for a
> "locked status" over a region that is only partially locked will return
> "unlocked" whereas in practice maybe the biggest part is actually
> locked.
> 
> Knowing what is the real software locking state through debugfs would be
> very convenient for development/debugging purposes, hence this proposal
> for adding an extra block at the end of the file: a "locked sectors"
> array which lists every section, if it is locked or not, showing both
> the address ranges and the sizes in numbers of blocks.
> 
> Here is an example of output, what is after the "sector map" is new.
> 
> $ cat /sys/kernel/debug/spi-nor/spi0.0/params
> name            (null)
> id              ef a0 20 00 00 00
> size            64.0 MiB
> write size      1
> page size       256
> address nbytes  4
> flags           HAS_SR_TB | 4B_OPCODES | HAS_4BAIT | HAS_LOCK | HAS_16BIT_SR | HAS_SR_TB_BIT6 | HAS_4BIT_BP |
> SOFT_RESET | NO_WP
> 
> opcodes
>  read           0xec
>   dummy cycles  6
>  erase          0xdc
>  program        0x34
>  8D extension   none
> 
> protocols
>  read           1S-4S-4S
>  write          1S-1S-4S
>  register       1S-1S-1S
> 
> erase commands
>  21 (4.00 KiB) [1]
>  dc (64.0 KiB) [3]
>  c7 (64.0 MiB)
> 
> sector map
>  region (in hex)   | erase mask | overlaid
>  ------------------+------------+---------
>  00000000-03ffffff |     [   3] | no
> 
> locked sectors
>  region (in hex)   | status   | #blocks
>  ------------------+----------+--------
>  00000000-03ffffff | unlocked | 1024
> 
> Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
> ---
> Here are below more examples of output with various situations. The full
> output of the "params" content has been manually removed to only show
> what has been added and how it behaves.
> 
> $ flash_lock -l /dev/mtd0 0x3f00000 16
> $ cat /sys/kernel/debug/spi-nor/spi0.0/params
> locked sectors
>  region (in hex)   | status   | #blocks
>  ------------------+----------+--------
>  00000000-03efffff | unlocked | 1008
>  03f00000-03ffffff |   locked | 16
> $
> $ flash_lock -u /dev/mtd0 0x3f00000 8
> $ cat /sys/kernel/debug/spi-nor/spi0.0/params
> locked sectors
>  region (in hex)   | status   | #blocks
>  ------------------+----------+--------
>  00000000-03f7ffff | unlocked | 1016
>  03f80000-03ffffff |   locked | 8
> $
> $ flash_lock -u /dev/mtd0
> $ cat /sys/kernel/debug/spi-nor/spi0.0/params
> locked sectors
>  region (in hex)   | status   | #blocks
>  ------------------+----------+--------
>  00000000-03ffffff | unlocked | 1024
> $
> $ flash_lock -l /dev/mtd0
> $ cat /sys/kernel/debug/spi-nor/spi0.0/params
> locked sectors
>  region (in hex)   | status   | #blocks
>  ------------------+----------+--------
>  00000000-03ffffff |   locked | 1024
> $
> $ flash_lock -u /dev/mtd0 0x20000 1022
> $ cat /sys/kernel/debug/spi-nor/spi0.0/params
> locked sectors
>  region (in hex)   | status   | #blocks
>  ------------------+----------+--------
>  00000000-0001ffff |   locked | 2
>  00020000-03ffffff | unlocked | 1022
> ---
>  drivers/mtd/spi-nor/core.h    |  4 ++++
>  drivers/mtd/spi-nor/debugfs.c | 22 ++++++++++++++++++++++
>  drivers/mtd/spi-nor/swp.c     | 11 +++++++----
>  3 files changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
> index 091eb934abe4..99ed6c54b90f 100644
> --- a/drivers/mtd/spi-nor/core.h
> +++ b/drivers/mtd/spi-nor/core.h
> @@ -707,6 +707,10 @@ static inline bool spi_nor_needs_sfdp(const struct spi_nor *nor)
>         return !nor->info->size;
>  }
> 
> +u64 spi_nor_get_min_prot_length_sr(struct spi_nor *nor);
> +void spi_nor_get_locked_range_sr(struct spi_nor *nor, const u8 *sr, loff_t *ofs, u64 *len);
> +bool spi_nor_is_locked_sr(struct spi_nor *nor, loff_t ofs, u64 len, const u8 *sr);
> +

It would be better to have generic helper functions rather than using SR-based
functions directly. The locking_ops is vendor/chip specific and can provide
other locking mechanism than SR-based block protection. For instance, Infineon,
Micron, Macronix (and maybe other vendors) offer protection mechanisms that can
protect sectors individually with volatile or non-volatile manner.

>  #ifdef CONFIG_DEBUG_FS
>  void spi_nor_debugfs_register(struct spi_nor *nor);
>  void spi_nor_debugfs_shutdown(void);
> diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c
> index d0191eb9f879..821fbc9587dc 100644
> --- a/drivers/mtd/spi-nor/debugfs.c
> +++ b/drivers/mtd/spi-nor/debugfs.c
> @@ -77,10 +77,12 @@ static void spi_nor_print_flags(struct seq_file *s, unsigned long flags,
>  static int spi_nor_params_show(struct seq_file *s, void *data)
>  {
>         struct spi_nor *nor = s->private;
> +       unsigned int min_prot_len = spi_nor_get_min_prot_length_sr(nor);
>         struct spi_nor_flash_parameter *params = nor->params;
>         struct spi_nor_erase_map *erase_map = &params->erase_map;
>         struct spi_nor_erase_region *region = erase_map->regions;
>         const struct flash_info *info = nor->info;
> +       loff_t lock_start, lock_length;
>         char buf[16], *str;
>         unsigned int i;
> 
> @@ -159,6 +161,26 @@ static int spi_nor_params_show(struct seq_file *s, void *data)
>                            region[i].overlaid ? "yes" : "no");
>         }
> 

Don't we need to check 'SNOR_F_HAS_LOCK' flag here?

> +       seq_puts(s, "\nlocked sectors\n");
> +       seq_puts(s, " region (in hex)   | status   | #blocks\n");
> +       seq_puts(s, " ------------------+----------+--------\n");
> +
> +       spi_nor_get_locked_range_sr(nor, nor->dfs_sr_cache, &lock_start, &lock_length);
> +       if (!lock_length || lock_length == params->size) {
> +               seq_printf(s, " %08llx-%08llx | %s | %llu\n", 0ULL, params->size - 1,
> +                          lock_length ? "  locked" : "unlocked", params->size / min_prot_len);

div_u64() is needed. 
I got undefined reference to `__aeabi_uldivmod' for my 32-bit ARM platform.
Same for following four seq_printf().

> +       } else if (!lock_start) {
> +               seq_printf(s, " %08llx-%08llx | %s | %llu\n", 0ULL, lock_length - 1,
> +                          "  locked", lock_length / min_prot_len);
> +               seq_printf(s, " %08llx-%08llx | %s | %llu\n", lock_length, params->size - 1,
> +                          "unlocked", (params->size - lock_length) / min_prot_len);
> +       } else {
> +               seq_printf(s, " %08llx-%08llx | %s | %llu\n", 0ULL, lock_start - 1,
> +                          "unlocked", lock_start / min_prot_len);
> +               seq_printf(s, " %08llx-%08llx | %s | %llu\n", lock_start, params->size - 1,
> +                          "  locked", lock_length / min_prot_len);
> +       }
> +
>         return 0;
>  }
>  DEFINE_SHOW_ATTRIBUTE(spi_nor_params);
> diff --git a/drivers/mtd/spi-nor/swp.c b/drivers/mtd/spi-nor/swp.c
> index 7a6c2b8ef921..8de8459e8e90 100644
> --- a/drivers/mtd/spi-nor/swp.c
> +++ b/drivers/mtd/spi-nor/swp.c
> @@ -32,7 +32,7 @@ static u8 spi_nor_get_sr_tb_mask(struct spi_nor *nor)
>                 return SR_TB_BIT5;
>  }
> 
> -static u64 spi_nor_get_min_prot_length_sr(struct spi_nor *nor)
> +u64 spi_nor_get_min_prot_length_sr(struct spi_nor *nor)
>  {
>         unsigned int bp_slots, bp_slots_needed;
>         /*
> @@ -53,8 +53,8 @@ static u64 spi_nor_get_min_prot_length_sr(struct spi_nor *nor)
>                 return sector_size;
>  }
> 
> -static void spi_nor_get_locked_range_sr(struct spi_nor *nor, const u8 *sr, loff_t *ofs,
> -                                       u64 *len)
> +void spi_nor_get_locked_range_sr(struct spi_nor *nor, const u8 *sr, loff_t *ofs,
> +                                u64 *len)
>  {
>         u64 min_prot_len;
>         u8 bp_mask = spi_nor_get_sr_bp_mask(nor);
> @@ -112,7 +112,7 @@ static bool spi_nor_check_lock_status_sr(struct spi_nor *nor, loff_t ofs,
>                 return (ofs >= lock_offs_max) || (offs_max <= lock_offs);
>  }
> 
> -static bool spi_nor_is_locked_sr(struct spi_nor *nor, loff_t ofs, u64 len, const u8 *sr)
> +bool spi_nor_is_locked_sr(struct spi_nor *nor, loff_t ofs, u64 len, const u8 *sr)
>  {
>         return spi_nor_check_lock_status_sr(nor, ofs, len, sr, true);
>  }
> @@ -410,6 +410,9 @@ static int spi_nor_sr_is_locked(struct spi_nor *nor, loff_t ofs, u64 len)
>   * -is_locked(): Checks if the region is *fully* locked, returns false otherwise.
>   *               This feeback may be misleading because users may get an "unlocked"
>   *               status even though a subpart of the region is effectively locked.
> + *
> + * If in doubt during development, check-out the debugfs output which tries to
> + * be more user friendly.
>   */
>  static const struct spi_nor_locking_ops spi_nor_sr_locking_ops = {
>         .lock = spi_nor_sr_lock,
> 
> --
> 2.51.1

Thanks!
Takahiro


^ permalink raw reply

* Re: [PATCH v10 10/21] gpu: nova-core: mm: Add MMU v2 page table types
From: Eliot Courtney @ 2026-04-02  5:41 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-11-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> Add page table entry and directory structures for MMU version 2
> used by Turing/Ampere/Ada GPUs.
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  drivers/gpu/nova-core/mm/pagetable.rs      |   2 +
>  drivers/gpu/nova-core/mm/pagetable/ver2.rs | 232 +++++++++++++++++++++
>  2 files changed, 234 insertions(+)
>  create mode 100644 drivers/gpu/nova-core/mm/pagetable/ver2.rs
>
> diff --git a/drivers/gpu/nova-core/mm/pagetable.rs b/drivers/gpu/nova-core/mm/pagetable.rs
> index 50b76d5e5aaf..38d88f8f09a9 100644
> --- a/drivers/gpu/nova-core/mm/pagetable.rs
> +++ b/drivers/gpu/nova-core/mm/pagetable.rs
> @@ -8,6 +8,8 @@
>  
>  #![expect(dead_code)]
>  
> +pub(crate) mod ver2;
> +

This looks like it has more visibility than necessary. And it seems
incorrect for anyone in the crate to care about MMU version details.
This can probably be just 'mod ver2'. There are a lot of other types /
functions in this series that could have tighter visibility. Could you
go through and see if you can reduce a bunch to private or pub(super)?

thanks!

^ permalink raw reply

* Re: [PATCH v10 07/21] gpu: nova-core: mm: Add TLB flush support
From: Eliot Courtney @ 2026-04-02  5:49 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-8-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> Add TLB (Translation Lookaside Buffer) flush support for GPU MMU.
>
> After modifying page table entries, the GPU's TLB must be invalidated
> to ensure the new mappings take effect. The Tlb struct provides flush
> functionality through BAR0 registers.
>
> The flush operation writes the page directory base address and triggers
> an invalidation, polling for completion with a 2 second timeout matching
> the Nouveau driver.
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  drivers/gpu/nova-core/mm.rs     |  1 +
>  drivers/gpu/nova-core/mm/tlb.rs | 95 +++++++++++++++++++++++++++++++++
>  drivers/gpu/nova-core/regs.rs   | 42 +++++++++++++++
>  3 files changed, 138 insertions(+)
>  create mode 100644 drivers/gpu/nova-core/mm/tlb.rs
>
> diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
> index 8f3089a5fa88..cfe9cbe11d57 100644
> --- a/drivers/gpu/nova-core/mm.rs
> +++ b/drivers/gpu/nova-core/mm.rs
> @@ -5,6 +5,7 @@
>  #![expect(dead_code)]
>  
>  pub(crate) mod pramin;
> +pub(crate) mod tlb;
>  
>  use kernel::sizes::SZ_4K;
>  
> diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
> new file mode 100644
> index 000000000000..cd3cbcf4c739
> --- /dev/null
> +++ b/drivers/gpu/nova-core/mm/tlb.rs
> @@ -0,0 +1,95 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! TLB (Translation Lookaside Buffer) flush support for GPU MMU.
> +//!
> +//! After modifying page table entries, the GPU's TLB must be flushed to
> +//! ensure the new mappings take effect. This module provides TLB flush
> +//! functionality for virtual memory managers.
> +//!
> +//! # Example
> +//!
> +//! ```ignore
> +//! use crate::mm::tlb::Tlb;
> +//!
> +//! fn page_table_update(tlb: &Tlb, pdb_addr: VramAddress) -> Result<()> {
> +//!     // ... modify page tables ...
> +//!
> +//!     // Flush TLB to make changes visible (polls for completion).
> +//!     tlb.flush(pdb_addr)?;
> +//!
> +//!     Ok(())
> +//! }
> +//! ```
> +
> +use kernel::{
> +    devres::Devres,
> +    io::poll::read_poll_timeout,
> +    io::Io,
> +    new_mutex,
> +    prelude::*,
> +    sync::{
> +        Arc,
> +        Mutex, //
> +    },
> +    time::Delta, //
> +};
> +
> +use crate::{
> +    driver::Bar0,
> +    mm::VramAddress,
> +    regs, //
> +};
> +
> +/// TLB manager for GPU translation buffer operations.
> +#[pin_data]
> +pub(crate) struct Tlb {
> +    bar: Arc<Devres<Bar0>>,
> +    /// TLB flush serialization lock: This lock is acquired during the
> +    /// DMA fence signalling critical path. It must NEVER be held across any
> +    /// reclaimable CPU memory allocations because the memory reclaim path can
> +    /// call `dma_fence_wait()`, which would deadlock with this lock held.
> +    #[pin]

This comment says that the lock is acquired during the DMA fence
signalling critical path, but IIUC we don't have anything like that
right now. Is this based on future yet to be done work? Can we reword
this in a way so it makes sense in the current state?

> +    lock: Mutex<()>,
> +}
> +
> +impl Tlb {
> +    /// Create a new TLB manager.
> +    pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
> +        pin_init!(Self {
> +            bar,
> +            lock <- new_mutex!((), "tlb_flush"),
> +        })
> +    }
> +
> +    /// Flush the GPU TLB for a specific page directory base.
> +    ///
> +    /// This invalidates all TLB entries associated with the given PDB address.
> +    /// Must be called after modifying page table entries to ensure the GPU sees
> +    /// the updated mappings.

If this must be called after every operation like that, I wonder if we
can change the design to require a guard like pattern something to
ensure flush is called. WDYT?

> +    pub(crate) fn flush(&self, pdb_addr: VramAddress) -> Result {

Hopefully we don't need to be calling flush() from anywhere in the
entire crate. Can you tighten the visibility here and in other places?
Many things seem to be pub(crate) that don't need to be.

> +        let _guard = self.lock.lock();
> +
> +        let bar = self.bar.try_access().ok_or(ENODEV)?;
> +
> +        // Write PDB address.
> +        bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
> +        bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
> +
> +        // Trigger flush: invalidate all pages and enable.
> +        bar.write_reg(
> +            regs::NV_TLB_FLUSH_CTRL::zeroed()
> +                .with_page_all(true)
> +                .with_enable(true),
> +        );
> +
> +        // Poll for completion - enable bit clears when flush is done.
> +        read_poll_timeout(
> +            || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
> +            |ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.enable(),
> +            Delta::ZERO,
> +            Delta::from_secs(2),
> +        )?;

This has zero delay on the read_poll_timeout - what about adding a small
delay of a microsecond or so?


^ permalink raw reply

* Re: [PATCH v10 02/21] gpu: nova-core: gsp: Extract usable FB region from GSP
From: Eliot Courtney @ 2026-04-02  5:49 UTC (permalink / raw)
  To: Joel Fernandes, Eliot Courtney, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, joel, linux-doc, amd-gfx, intel-gfx,
	intel-xe, linux-fbdev
In-Reply-To: <d6fe3e40-0310-4b90-ac8c-8beeec886f90@nvidia.com>

On Thu Apr 2, 2026 at 8:24 AM JST, Joel Fernandes wrote:
>
>
> On 4/1/2026 4:27 AM, Eliot Courtney wrote:
>> On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
>>> Add first_usable_fb_region() to GspStaticConfigInfo to extract the first
>>> usable FB region from GSP's fbRegionInfoParams. Usable regions are those
>>> that are not reserved or protected.
>>>
>>> The extracted region is stored in GetGspStaticInfoReply and exposed as
>>> usable_fb_region field for use by the memory subsystem.
>>>
>>> Cc: Nikola Djukic <ndjukic@nvidia.com>
>>> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
>>> ---
>> 
>> Please see my feedback from v9[1] which still applies.
>> 
>> [1]: https://lore.kernel.org/all/DH1GK30TUB4V.2GR6ANXIZDFFQ@nvidia.com/
>
> Yeah, I am seeing it now. Amidst making the earlier 7.1 merge window for
> the DRM buddy and earlier patches in the series, I missed this. They seem
> to be simple nits and I will address them in the next revision. thanks,
>
> --
> Joel Fernandes

No worries. Sorry I have not gotten to more of the patches yet. Trying
to get through some more now. Thanks!

^ permalink raw reply

* Re: [PATCH v10 21/21] gpu: nova-core: Use runtime BAR1 size instead of hardcoded 256MB
From: Eliot Courtney @ 2026-04-02  5:54 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-22-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> From: Zhi Wang <zhiw@nvidia.com>
>
> Remove the hardcoded BAR1_SIZE = SZ_256M constant. On GPUs like L40 the
> BAR1 aperture is larger than 256MB; using a hardcoded size prevents large
> BAR1 from working and mapping it would fail.
>
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  drivers/gpu/nova-core/driver.rs | 8 ++------
>  drivers/gpu/nova-core/gpu.rs    | 7 +------
>  2 files changed, 3 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
> index b1aafaff0cee..6f95f8672158 100644
> --- a/drivers/gpu/nova-core/driver.rs
> +++ b/drivers/gpu/nova-core/driver.rs
> @@ -13,10 +13,7 @@
>          Vendor, //
>      },
>      prelude::*,
> -    sizes::{
> -        SZ_16M,
> -        SZ_256M, //
> -    },
> +    sizes::SZ_16M,
>      sync::{
>          atomic::{
>              Atomic,
> @@ -40,7 +37,6 @@ pub(crate) struct NovaCore {
>  }
>  
>  const BAR0_SIZE: usize = SZ_16M;
> -pub(crate) const BAR1_SIZE: usize = SZ_256M;
>  
>  // For now we only support Ampere which can use up to 47-bit DMA addresses.
>  //
> @@ -51,7 +47,7 @@ pub(crate) struct NovaCore {
>  const GPU_DMA_BITS: u32 = 47;
>  
>  pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
> -pub(crate) type Bar1 = pci::Bar<BAR1_SIZE>;
> +pub(crate) type Bar1 = pci::Bar;
>  
>  kernel::pci_device_table!(
>      PCI_TABLE,
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 8206ec015b26..ba6f1f6f0485 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -353,16 +353,11 @@ pub(crate) fn run_selftests(
>  
>      #[cfg(CONFIG_NOVA_MM_SELFTESTS)]
>      fn run_mm_selftests(self: Pin<&mut Self>, pdev: &pci::Device<device::Bound>) -> Result {
> -        use crate::driver::BAR1_SIZE;
> -
>          // PRAMIN aperture self-tests.
>          crate::mm::pramin::run_self_test(pdev.as_ref(), self.mm.pramin(), self.spec.chipset)?;
>  
>          // BAR1 self-tests.
> -        let bar1 = Arc::pin_init(
> -            pdev.iomap_region_sized::<BAR1_SIZE>(1, c"nova-core/bar1"),
> -            GFP_KERNEL,
> -        )?;
> +        let bar1 = Arc::pin_init(pdev.iomap_region(1, c"nova-core/bar1"), GFP_KERNEL)?;
>          let bar1_access = bar1.access(pdev.as_ref())?;
>  
>          crate::mm::bar_user::run_self_test(

Can we move this directly after patch 17 which adds the fixed bar1? Or
alternatively fold it in while preserving Zhi's attribution (I am not
sure what the conventional method for this is).

^ permalink raw reply

* Re: [PATCH v10 07/21] gpu: nova-core: mm: Add TLB flush support
From: Matthew Brost @ 2026-04-02  5:59 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Dave Airlie, Daniel Almeida, Koen Koning,
	dri-devel, rust-for-linux, Nikola Djukic, Maarten Lankhorst,
	Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
	Jonathan Corbet, Alex Deucher, Christian Koenig, Jani Nikula,
	Joonas Lahtinen, Rodrigo Vivi, Tvrtko Ursulin, Huang Rui,
	Matthew Auld, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-8-joelagnelf@nvidia.com>

On Tue, Mar 31, 2026 at 05:20:34PM -0400, Joel Fernandes wrote:
> Add TLB (Translation Lookaside Buffer) flush support for GPU MMU.
> 
> After modifying page table entries, the GPU's TLB must be invalidated
> to ensure the new mappings take effect. The Tlb struct provides flush
> functionality through BAR0 registers.
> 
> The flush operation writes the page directory base address and triggers
> an invalidation, polling for completion with a 2 second timeout matching
> the Nouveau driver.
> 
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  drivers/gpu/nova-core/mm.rs     |  1 +
>  drivers/gpu/nova-core/mm/tlb.rs | 95 +++++++++++++++++++++++++++++++++
>  drivers/gpu/nova-core/regs.rs   | 42 +++++++++++++++
>  3 files changed, 138 insertions(+)
>  create mode 100644 drivers/gpu/nova-core/mm/tlb.rs
> 
> diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
> index 8f3089a5fa88..cfe9cbe11d57 100644
> --- a/drivers/gpu/nova-core/mm.rs
> +++ b/drivers/gpu/nova-core/mm.rs
> @@ -5,6 +5,7 @@
>  #![expect(dead_code)]
>  
>  pub(crate) mod pramin;
> +pub(crate) mod tlb;
>  
>  use kernel::sizes::SZ_4K;
>  
> diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
> new file mode 100644
> index 000000000000..cd3cbcf4c739
> --- /dev/null
> +++ b/drivers/gpu/nova-core/mm/tlb.rs
> @@ -0,0 +1,95 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! TLB (Translation Lookaside Buffer) flush support for GPU MMU.
> +//!
> +//! After modifying page table entries, the GPU's TLB must be flushed to
> +//! ensure the new mappings take effect. This module provides TLB flush
> +//! functionality for virtual memory managers.
> +//!
> +//! # Example
> +//!
> +//! ```ignore
> +//! use crate::mm::tlb::Tlb;
> +//!
> +//! fn page_table_update(tlb: &Tlb, pdb_addr: VramAddress) -> Result<()> {
> +//!     // ... modify page tables ...
> +//!
> +//!     // Flush TLB to make changes visible (polls for completion).
> +//!     tlb.flush(pdb_addr)?;
> +//!
> +//!     Ok(())
> +//! }
> +//! ```
> +
> +use kernel::{
> +    devres::Devres,
> +    io::poll::read_poll_timeout,
> +    io::Io,
> +    new_mutex,
> +    prelude::*,
> +    sync::{
> +        Arc,
> +        Mutex, //
> +    },
> +    time::Delta, //
> +};
> +
> +use crate::{
> +    driver::Bar0,
> +    mm::VramAddress,
> +    regs, //
> +};
> +
> +/// TLB manager for GPU translation buffer operations.
> +#[pin_data]
> +pub(crate) struct Tlb {
> +    bar: Arc<Devres<Bar0>>,
> +    /// TLB flush serialization lock: This lock is acquired during the
> +    /// DMA fence signalling critical path. It must NEVER be held across any
> +    /// reclaimable CPU memory allocations because the memory reclaim path can
> +    /// call `dma_fence_wait()`, which would deadlock with this lock held.
> +    #[pin]
> +    lock: Mutex<()>,
> +}
> +
> +impl Tlb {
> +    /// Create a new TLB manager.
> +    pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
> +        pin_init!(Self {
> +            bar,
> +            lock <- new_mutex!((), "tlb_flush"),
> +        })
> +    }
> +
> +    /// Flush the GPU TLB for a specific page directory base.
> +    ///
> +    /// This invalidates all TLB entries associated with the given PDB address.
> +    /// Must be called after modifying page table entries to ensure the GPU sees
> +    /// the updated mappings.
> +    pub(crate) fn flush(&self, pdb_addr: VramAddress) -> Result {

This landed on my list randomly, so I took a look.

Wouldn’t you want to virtualize the invalidation based on your device?

For example, what if you need to register interface changes on future hardware?

Or, if you’re a VF, can you even do MMIO?

I’d relayer this.

Matt

> +        let _guard = self.lock.lock();
> +
> +        let bar = self.bar.try_access().ok_or(ENODEV)?;
> +
> +        // Write PDB address.
> +        bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
> +        bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
> +
> +        // Trigger flush: invalidate all pages and enable.
> +        bar.write_reg(
> +            regs::NV_TLB_FLUSH_CTRL::zeroed()
> +                .with_page_all(true)
> +                .with_enable(true),
> +        );
> +
> +        // Poll for completion - enable bit clears when flush is done.
> +        read_poll_timeout(
> +            || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
> +            |ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.enable(),
> +            Delta::ZERO,
> +            Delta::from_secs(2),
> +        )?;
> +
> +        Ok(())
> +    }
> +}
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index a3ca02345e20..5e3f5933a55c 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -548,3 +548,45 @@ pub(crate) mod ga100 {
>          }
>      }
>  }
> +
> +// MMU TLB
> +
> +register! {
> +    /// TLB flush register: PDB address bits [39:8].
> +    pub(crate) NV_TLB_FLUSH_PDB_LO(u32) @ 0x00b830a0 {
> +        /// PDB address bits [39:8].
> +        31:0    pdb_lo => u32;
> +    }
> +
> +    /// TLB flush register: PDB address bits [47:40].
> +    pub(crate) NV_TLB_FLUSH_PDB_HI(u32) @ 0x00b830a4 {
> +        /// PDB address bits [47:40].
> +        7:0     pdb_hi => u8;
> +    }
> +
> +    /// TLB flush control register.
> +    pub(crate) NV_TLB_FLUSH_CTRL(u32) @ 0x00b830b0 {
> +        /// Invalidate all pages.
> +        0:0     page_all => bool;
> +        /// Enable/trigger flush (clears when flush completes).
> +        31:31   enable => bool;
> +    }
> +}
> +
> +impl NV_TLB_FLUSH_PDB_LO {
> +    /// Create a register value from a PDB address.
> +    ///
> +    /// Extracts bits [39:8] of the address and shifts it right by 8 bits.
> +    pub(crate) fn from_pdb_addr(addr: u64) -> Self {
> +        Self::zeroed().with_pdb_lo(((addr >> 8) & 0xFFFF_FFFF) as u32)
> +    }
> +}
> +
> +impl NV_TLB_FLUSH_PDB_HI {
> +    /// Create a register value from a PDB address.
> +    ///
> +    /// Extracts bits [47:40] of the address and shifts it right by 40 bits.
> +    pub(crate) fn from_pdb_addr(addr: u64) -> Self {
> +        Self::zeroed().with_pdb_hi(((addr >> 40) & 0xFF) as u8)
> +    }
> +}
> -- 
> 2.34.1
> 

^ permalink raw reply

* Re: [PATCH 15/33] rust: rust_is_available: remove warning for 0.66.[01] buggy versions
From: Miguel Ojeda @ 2026-04-02  7:12 UTC (permalink / raw)
  To: Gary Guo
  Cc: Miguel Ojeda, Nathan Chancellor, Nicolas Schier, Danilo Krummrich,
	Andreas Hindborg, Catalin Marinas, Will Deacon, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Courbot, David Airlie,
	Simona Vetter, Brendan Higgins, David Gow, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Alice Ryhl, Jonathan Corbet, Boqun Feng,
	Björn Roy Baron, Benno Lossin, Trevor Gross, rust-for-linux,
	linux-kbuild, Lorenzo Stoakes, Vlastimil Babka, Liam R . Howlett,
	Uladzislau Rezki, linux-block, moderated for non-subscribers,
	Alexandre Ghiti, linux-riscv, nouveau, dri-devel, Rae Moar,
	linux-kselftest, kunit-dev, Nick Desaulniers, Bill Wendling,
	Justin Stitt, llvm, linux-kernel, Shuah Khan, linux-doc
In-Reply-To: <DHHWO582XLXH.1DU3CO41F1PV7@garyguo.net>

On Wed, Apr 1, 2026 at 4:58 PM Gary Guo <gary@garyguo.net> wrote:
>
> The scripts/rust_is_available.sh change looks correct to me, although I couldn't
> get scripts/rust_is_available_test.py to run on NixOS.
>
> Looks like it filtered out PATH but uses /usr/bin/env to find python binary? For
> obvious reasons that will only work if python is located /usr/bin/python.

Yeah, the script has some assumptions on it (e.g. it also assumes
`dash` behavior vs. `bash` in a couple tests which I should relax
too). Happy to change that.

Thanks for testing it :)

Cheers,
Miguel

^ permalink raw reply

* Re: Re: [PATCH net-next v03 4/6] hinic3: Add ethtool rss ops
From: Fan Gong @ 2026-04-02  7:17 UTC (permalink / raw)
  To: mohsin.bashr
  Cc: andrew+netdev, davem, edumazet, gongfan1, guoxin09, horms,
	ioana.ciornei, kuba, linux-doc, linux-kernel, luosifu,
	maxime.chevallier, netdev, pabeni, shijing34, wulike1,
	zhengjiezhen, zhoushuai28, zhuyikai1
In-Reply-To: <3b252b2b-6be7-4a8d-9782-39695e948635@gmail.com>

On 4/1/2026 3:53 PM, Mohsin Bashir wrote:

> > +    err = hinic3_update_rss_hash_opts(netdev, cmd, rss_type);
> > +    if (err)
> > +        return err;
> > +
> > +    err = hinic3_set_rss_type(nic_dev->hwdev, *rss_type);
> 
> So if we fail here, we have already modified the rss_type in-place. From this on-wards, the HW state would diverge from in-memory state. How about use a local copy and only update if no error?
> 
> > +    if (err) {
> > +        netdev_err(netdev, "Failed to set rss type\n");
> > +        return err;
> > +    }
> > +
> > +    return 0;
> > +}
> > + 

Thanks for your four patch reviews.

For patch 01, we'll add nic_info to inform the user that depth is timmed and restore q_params.

For patch 02, we'll remove const.

For patch 03, we'll change errcode and modify rx_max_coalesced_frames_high  judgement condition.
As tx and rx share interrupt, we only use ETHTOOL_COALESCE_RX_USECS to avoid user misunderstanding.
So we do not add ETHTOOL_COALESCE_TX_USECS.

For patch 04, we overlooked the recovery of rss_type in error handling and we'll fix it.

We will fix them in next version.

Fan gong

^ permalink raw reply

* [PATCH v12 00/15] arm64/riscv: Add support for crashkernel CMA reservation
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie

The crash memory allocation, and the exclude of crashk_res, crashk_low_res
and crashk_cma memory are almost identical across different architectures,
This patch set handle them in crash core in a general way, which eliminate
a lot of duplication code.

And add support for crashkernel CMA reservation for arm64 and riscv.

Rebased on v7.0-rc1.

Basic second kernel boot test were performed on QEMU platforms for x86,
ARM64, and RISC-V architectures with the following parameters:

	"cma=256M crashkernel=256M crashkernel=64M,cma"

Changes in v12:
- Remove the unused "nr_mem_ranges" for x86.
- Add "Fix crashk_low_res not exclude bug" test log.
- Provide a separate patch for each architecture for using
  crash_prepare_headers(), which will make the review more convenient.
- Add Reviewed-by and Tested-by.
- Link to v11: https://lore.kernel.org/all/20260328074013.3589544-1-ruanjinjie@huawei.com/

Changes in v11:
- Avoid silently drop crash memory if the crash kernel is built without
  CONFIG_CMA.
- Remove unnecessary "cmem->nr_ranges = 0" for arch_crash_populate_cmem()
  as we use kvzalloc().
- Provide a separate patch for each architecture to fix the existing
  buffer overflow issue.
- Add Acked-bys for arm64.

Changes in v10:
- Fix crashk_low_res not excluded bug in the existing
  RISC-V code.
- Fix an existing memory leak issue in the existing PowerPC code.
- Fix the ordering issue of adding CMA ranges to
  "linux,usable-memory-range".
- Fix an existing concurrency issue. A Concurrent memory hotplug may occur
  between reading memblock and attempting to fill cmem during kexec_load()
  for almost all existing architectures.
- Link to v9: https://lore.kernel.org/all/20260323072745.2481719-1-ruanjinjie@huawei.com/

Changes in v9:
- Collect Reviewed-by and Acked-by, and prepare for Sashiko AI review.
- Link to v8: https://lore.kernel.org/all/20260302035315.3892241-1-ruanjinjie@huawei.com/

Changes in v8:
- Fix the build issues reported by kernel test robot and Sourabh.
- Link to v7: https://lore.kernel.org/all/20260226130437.1867658-1-ruanjinjie@huawei.com/

Changes in v7:
- Correct the inclusion of CMA-reserved ranges for kdump kernel in of/kexec
  for arm64 and riscv.
- Add Acked-by.
- Link to v6: https://lore.kernel.org/all/20260224085342.387996-1-ruanjinjie@huawei.com/

Changes in v6:
- Update the crash core exclude code as Mike suggested.
- Rebased on v7.0-rc1.
- Add acked-by.
- Link to v5: https://lore.kernel.org/all/20260212101001.343158-1-ruanjinjie@huawei.com/

Jinjie Ruan (14):
  riscv: kexec_file: Fix crashk_low_res not exclude bug
  powerpc/crash: Fix possible memory leak in update_crash_elfcorehdr()
  x86/kexec: Fix potential buffer overflow in prepare_elf_headers()
  arm64: kexec_file: Fix potential buffer overflow in
    prepare_elf_headers()
  riscv: kexec_file: Fix potential buffer overflow in
    prepare_elf_headers()
  LoongArch: kexec: Fix potential buffer overflow in
    prepare_elf_headers()
  crash: Add crash_prepare_headers() to exclude crash kernel memory
  arm64: kexec_file: Use crash_prepare_headers() helper to simplify code
  x86/kexec: Use crash_prepare_headers() helper to simplify code
  riscv: kexec_file: Use crash_prepare_headers() helper to simplify code
  LoongArch: kexec: Use crash_prepare_headers() helper to simplify code
  crash: Use crash_exclude_core_ranges() on powerpc
  arm64: kexec: Add support for crashkernel CMA reservation
  riscv: kexec: Add support for crashkernel CMA reservation

Sourabh Jain (1):
  powerpc/crash: sort crash memory ranges before preparing elfcorehdr

 .../admin-guide/kernel-parameters.txt         |  16 +--
 arch/arm64/kernel/machine_kexec_file.c        |  43 +++-----
 arch/arm64/mm/init.c                          |   5 +-
 arch/loongarch/kernel/machine_kexec_file.c    |  43 +++-----
 arch/powerpc/include/asm/kexec_ranges.h       |   1 -
 arch/powerpc/kexec/crash.c                    |   7 +-
 arch/powerpc/kexec/ranges.c                   | 101 +-----------------
 arch/riscv/kernel/machine_kexec_file.c        |  42 +++-----
 arch/riscv/mm/init.c                          |   5 +-
 arch/x86/kernel/crash.c                       |  92 +++-------------
 drivers/of/fdt.c                              |   9 +-
 drivers/of/kexec.c                            |   9 ++
 include/linux/crash_core.h                    |   9 ++
 include/linux/crash_reserve.h                 |   4 +-
 kernel/crash_core.c                           |  89 ++++++++++++++-
 15 files changed, 193 insertions(+), 282 deletions(-)

-- 
2.34.1


^ permalink raw reply

* [PATCH v12 01/15] riscv: kexec_file: Fix crashk_low_res not exclude bug
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

As done in commit 944a45abfabc ("arm64: kdump: Reimplement crashkernel=X")
and commit 4831be702b95 ("arm64/kexec: Fix missing extra range for
crashkres_low.") for arm64, while implementing crashkernel=X,[high,low],
riscv should have excluded the "crashk_low_res" reserved ranges from
the crash kernel memory to prevent them from being exported through
/proc/vmcore, and the exclusion would need an extra crash_mem range.

Just simply tested on qemu with crashkernel=4G with kexec in [1] mentioned
in [2]. And the second kernel can be started normally.

	# dmesg | grep crash
	[    0.000000] crashkernel low memory reserved: 0xf8000000 - 0x100000000 (128 MB)
	[    0.000000] crashkernel reserved: 0x000000017fe00000 - 0x000000027fe00000 (4096 MB)

Cc: Guo Ren <guoren@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
[1]: https://github.com/chenjh005/kexec-tools/tree/build-test-riscv-v2
[2]: https://lore.kernel.org/all/20230726175000.2536220-1-chenjiahao16@huawei.com/
Fixes: 5882e5acf18d ("riscv: kdump: Implement crashkernel=X,[high,low]")
Reviewed-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/riscv/kernel/machine_kexec_file.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
index 54e2d9552e93..3f7766057cac 100644
--- a/arch/riscv/kernel/machine_kexec_file.c
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -61,7 +61,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 	unsigned int nr_ranges;
 	int ret;
 
-	nr_ranges = 1; /* For exclusion of crashkernel region */
+	nr_ranges = 2; /* For exclusion of crashkernel region */
 	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
 
 	cmem = kmalloc_flex(*cmem, ranges, nr_ranges);
@@ -76,8 +76,16 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 
 	/* Exclude crashkernel region */
 	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
-	if (!ret)
-		ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+	if (ret)
+		goto out;
+
+	if (crashk_low_res.end) {
+		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+		if (ret)
+			goto out;
+	}
+
+	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
 
 out:
 	kfree(cmem);
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 02/15] powerpc/crash: Fix possible memory leak in update_crash_elfcorehdr()
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

In get_crash_memory_ranges(), if crash_exclude_mem_range() failed
after realloc_mem_ranges() has successfully allocated the cmem
memory, it just returns an error but leaves cmem pointing to
the allocated memory, nor is it freed in the caller
update_crash_elfcorehdr(), which cause a memory leak, goto out
to free the cmem.

Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Fixes: 849599b702ef ("powerpc/crash: add crash memory hotplug support")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/powerpc/kexec/crash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index a325c1c02f96..1d12cef8e1e0 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -440,7 +440,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *
 	ret = get_crash_memory_ranges(&cmem);
 	if (ret) {
 		pr_err("Failed to get crash mem range\n");
-		return;
+		goto out;
 	}
 
 	/*
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 03/15] x86/kexec: Fix potential buffer overflow in prepare_elf_headers()
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

There is a race condition between the kexec_load() system call
(crash kernel loading path) and memory hotplug operations that can lead
to buffer overflow and potential kernel crash.

During prepare_elf_headers(), the following steps occur:
1. get_nr_ram_ranges_callback() queries current System RAM memory ranges
2. Allocates buffer based on queried count
3. prepare_elf64_ram_headers_callback() populates ranges from memblock

If memory hotplug occurs between step 1 and step 3, the number of ranges
can increase, causing out-of-bounds write when populating cmem->ranges[].

This happens because kexec_load() uses kexec_trylock (atomic_t) while
memory hotplug uses device_hotplug_lock (mutex), so they don't serialize
with each other.

Just add bounds checking in prepare_elf64_ram_headers_callback() to
prevent out-of-bounds (OOB) access,

Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Fixes: 8d5f894a3108 ("x86: kexec_file: lift CRASH_MAX_RANGES limit on crash_mem buffer")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/x86/kernel/crash.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 335fd2ee9766..7fa6d45ebe3f 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -225,6 +225,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 {
 	struct crash_mem *cmem = arg;
 
+	if (cmem->nr_ranges >= cmem->max_nr_ranges)
+		return -ENOMEM;
+
 	cmem->ranges[cmem->nr_ranges].start = res->start;
 	cmem->ranges[cmem->nr_ranges].end = res->end;
 	cmem->nr_ranges++;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 04/15] arm64: kexec_file: Fix potential buffer overflow in prepare_elf_headers()
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

There is a race condition between the kexec_load() system call
(crash kernel loading path) and memory hotplug operations that can lead
to buffer overflow and potential kernel crash.

During prepare_elf_headers(), the following steps occur:
1. The first for_each_mem_range()  queries current System RAM memory ranges
2. Allocates buffer based on queried count
3. The 2st for_each_mem_range() populates ranges from memblock

If memory hotplug occurs between step 1 and step 3, the number of ranges
can increase, causing out-of-bounds write when populating cmem->ranges[].

This happens because kexec_load() uses kexec_trylock (atomic_t) while
memory hotplug uses device_hotplug_lock (mutex), so they don't serialize
with each other.

Just add bounds checking to prevent out-of-bounds access.

Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: 3751e728cef2 ("arm64: kexec_file: add crash dump support")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/arm64/kernel/machine_kexec_file.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index fba260ad87a9..df52ac4474c9 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -59,6 +59,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 	cmem->max_nr_ranges = nr_ranges;
 	cmem->nr_ranges = 0;
 	for_each_mem_range(i, &start, &end) {
+		if (cmem->nr_ranges >= cmem->max_nr_ranges) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
 		cmem->ranges[cmem->nr_ranges].start = start;
 		cmem->ranges[cmem->nr_ranges].end = end - 1;
 		cmem->nr_ranges++;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 05/15] riscv: kexec_file: Fix potential buffer overflow in prepare_elf_headers()
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

There is a race condition between the kexec_load() system call
(crash kernel loading path) and memory hotplug operations that can lead
to buffer overflow and potential kernel crash.

During prepare_elf_headers(), the following steps occur:
1. get_nr_ram_ranges_callback() queries current System RAM memory ranges
2. Allocates buffer based on queried count
3. prepare_elf64_ram_headers_callback() populates ranges from memblock

If memory hotplug occurs between step 1 and step 3, the number of ranges
can increase, causing out-of-bounds write when populating cmem->ranges[].

This happens because kexec_load() uses kexec_trylock (atomic_t) while
memory hotplug uses device_hotplug_lock (mutex), so they don't serialize
with each other.

Just add bounds checking in prepare_elf64_ram_headers_callback() to prevent
out-of-bounds (OOB) access.

Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic")
Reviewed-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/riscv/kernel/machine_kexec_file.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
index 3f7766057cac..773a1cba8ba0 100644
--- a/arch/riscv/kernel/machine_kexec_file.c
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -48,6 +48,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 {
 	struct crash_mem *cmem = arg;
 
+	if (cmem->nr_ranges >= cmem->max_nr_ranges)
+		return -ENOMEM;
+
 	cmem->ranges[cmem->nr_ranges].start = res->start;
 	cmem->ranges[cmem->nr_ranges].end = res->end;
 	cmem->nr_ranges++;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 06/15] LoongArch: kexec: Fix potential buffer overflow in prepare_elf_headers()
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

There is a race condition between the kexec_load() system call
(crash kernel loading path) and memory hotplug operations that can lead
to buffer overflow and potential kernel crash.

During prepare_elf_headers(), the following steps occur:
1. The first for_each_mem_range()  queries current System RAM memory ranges
2. Allocates buffer based on queried count
3. The 2st for_each_mem_range() populates ranges from memblock

If memory hotplug occurs between step 1 and step 3, the number of ranges
can increase, causing out-of-bounds write when populating cmem->ranges[].

This happens because kexec_load() uses kexec_trylock (atomic_t) while
memory hotplug uses device_hotplug_lock (mutex), so they don't serialize
with each other.

Just add bounds checking to prevent out-of-bounds access.

Cc: Youling Tang <tangyouling@kylinos.cn>
Cc: Huacai Chen <chenhuacai@loongson.cn>
Fixes: 1bcca8620a91 ("LoongArch: Add crash dump support for kexec_file")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/loongarch/kernel/machine_kexec_file.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c
index 5584b798ba46..167392c1da33 100644
--- a/arch/loongarch/kernel/machine_kexec_file.c
+++ b/arch/loongarch/kernel/machine_kexec_file.c
@@ -75,6 +75,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 	cmem->max_nr_ranges = nr_ranges;
 	cmem->nr_ranges = 0;
 	for_each_mem_range(i, &start, &end) {
+		if (cmem->nr_ranges >= cmem->max_nr_ranges) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
 		cmem->ranges[cmem->nr_ranges].start = start;
 		cmem->ranges[cmem->nr_ranges].end = end - 1;
 		cmem->nr_ranges++;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 07/15] powerpc/crash: sort crash memory ranges before preparing elfcorehdr
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

From: Sourabh Jain <sourabhjain@linux.ibm.com>

During a memory hot-remove event, the elfcorehdr is rebuilt to exclude
the removed memory. While updating the crash memory ranges for this
operation, the crash memory ranges array can become unsorted. This
happens because remove_mem_range() may split a memory range into two
parts and append the higher-address part as a separate range at the end
of the array.

So far, no issues have been observed due to the unsorted crash memory
ranges. However, this could lead to problems once crash memory range
removal is handled by generic code, as introduced in the upcoming
patches in this series.

Currently, powerpc uses a platform-specific function,
remove_mem_range(), to exclude hot-removed memory from the crash memory
ranges. This function performs the same task as the generic
crash_exclude_mem_range() in crash_core.c. The generic helper also
ensures that the crash memory ranges remain sorted. So remove the
redundant powerpc-specific implementation and instead call
crash_exclude_mem_range_guarded() (which internally calls
crash_exclude_mem_range()) to exclude the hot-removed memory ranges.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baoquan he <bhe@redhat.com>
Cc: Jinjie Ruan <ruanjinjie@huawei.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Cc: Shivang Upadhyay <shivangu@linux.ibm.com>
Cc: linux-kernel@vger.kernel.org
Acked-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/powerpc/include/asm/kexec_ranges.h |  4 +-
 arch/powerpc/kexec/crash.c              |  5 +-
 arch/powerpc/kexec/ranges.c             | 87 +------------------------
 3 files changed, 7 insertions(+), 89 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
index 14055896cbcb..ad95e3792d10 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,7 +7,9 @@
 void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
-int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges,
+				    unsigned long long mstart,
+				    unsigned long long mend);
 int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
 int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
 int get_crash_memory_ranges(struct crash_mem **mem_ranges);
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 1d12cef8e1e0..1426d2099bad 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -431,7 +431,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *
 	struct crash_mem *cmem = NULL;
 	struct kexec_segment *ksegment;
 	void *ptr, *mem, *elfbuf = NULL;
-	unsigned long elfsz, memsz, base_addr, size;
+	unsigned long elfsz, memsz, base_addr, size, end;
 
 	ksegment = &image->segment[image->elfcorehdr_index];
 	mem = (void *) ksegment->mem;
@@ -450,7 +450,8 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *
 	if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
 		base_addr = PFN_PHYS(mn->start_pfn);
 		size = mn->nr_pages * PAGE_SIZE;
-		ret = remove_mem_range(&cmem, base_addr, size);
+		end = base_addr + size - 1;
+		ret = crash_exclude_mem_range_guarded(&cmem, base_addr, end);
 		if (ret) {
 			pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
 			goto out;
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 867135560e5c..6c58bcc3e130 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -553,7 +553,7 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges)
 #endif /* CONFIG_KEXEC_FILE */
 
 #ifdef CONFIG_CRASH_DUMP
-static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges,
+int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges,
 					   unsigned long long mstart,
 					   unsigned long long mend)
 {
@@ -641,89 +641,4 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges)
 		pr_err("Failed to setup crash memory ranges\n");
 	return ret;
 }
-
-/**
- * remove_mem_range - Removes the given memory range from the range list.
- * @mem_ranges:    Range list to remove the memory range to.
- * @base:          Base address of the range to remove.
- * @size:          Size of the memory range to remove.
- *
- * (Re)allocates memory, if needed.
- *
- * Returns 0 on success, negative errno on error.
- */
-int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
-{
-	u64 end;
-	int ret = 0;
-	unsigned int i;
-	u64 mstart, mend;
-	struct crash_mem *mem_rngs = *mem_ranges;
-
-	if (!size)
-		return 0;
-
-	/*
-	 * Memory range are stored as start and end address, use
-	 * the same format to do remove operation.
-	 */
-	end = base + size - 1;
-
-	for (i = 0; i < mem_rngs->nr_ranges; i++) {
-		mstart = mem_rngs->ranges[i].start;
-		mend = mem_rngs->ranges[i].end;
-
-		/*
-		 * Memory range to remove is not part of this range entry
-		 * in the memory range list
-		 */
-		if (!(base >= mstart && end <= mend))
-			continue;
-
-		/*
-		 * Memory range to remove is equivalent to this entry in the
-		 * memory range list. Remove the range entry from the list.
-		 */
-		if (base == mstart && end == mend) {
-			for (; i < mem_rngs->nr_ranges - 1; i++) {
-				mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
-				mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
-			}
-			mem_rngs->nr_ranges--;
-			goto out;
-		}
-		/*
-		 * Start address of the memory range to remove and the
-		 * current memory range entry in the list is same. Just
-		 * move the start address of the current memory range
-		 * entry in the list to end + 1.
-		 */
-		else if (base == mstart) {
-			mem_rngs->ranges[i].start = end + 1;
-			goto out;
-		}
-		/*
-		 * End address of the memory range to remove and the
-		 * current memory range entry in the list is same.
-		 * Just move the end address of the current memory
-		 * range entry in the list to base - 1.
-		 */
-		else if (end == mend)  {
-			mem_rngs->ranges[i].end = base - 1;
-			goto out;
-		}
-		/*
-		 * Memory range to remove is not at the edge of current
-		 * memory range entry. Split the current memory entry into
-		 * two half.
-		 */
-		else {
-			size = mem_rngs->ranges[i].end - end + 1;
-			mem_rngs->ranges[i].end = base - 1;
-			ret = add_mem_range(mem_ranges, end + 1, size);
-		}
-	}
-out:
-	return ret;
-}
 #endif /* CONFIG_CRASH_DUMP */
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 08/15] crash: Add crash_prepare_headers() to exclude crash kernel memory
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

The crash memory alloc, and the exclude of crashk_res, crashk_low_res
and crashk_cma memory are almost identical across different architectures,
handling them in the crash core would eliminate a lot of duplication, so
add crash_prepare_headers() helper to handle them in the common code.

To achieve the above goal, three architecture-specific functions are
introduced:

- arch_get_system_nr_ranges(). Pre-counts the max number of memory ranges.

- arch_crash_populate_cmem(). Collects the memory ranges and fills them
  into cmem.

- arch_crash_exclude_ranges(). Architecture's additional crash memory
  ranges exclusion, defaulting to empty.

Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 include/linux/crash_core.h |  5 +++
 kernel/crash_core.c        | 82 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index d35726d6a415..033b20204aca 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -66,6 +66,8 @@ extern int crash_exclude_mem_range(struct crash_mem *mem,
 				   unsigned long long mend);
 extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 				       void **addr, unsigned long *sz);
+extern int crash_prepare_headers(int need_kernel_map, void **addr,
+				 unsigned long *sz, unsigned long *nr_mem_ranges);
 
 struct kimage;
 struct kexec_segment;
@@ -83,6 +85,9 @@ int kexec_should_crash(struct task_struct *p);
 int kexec_crash_loaded(void);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
 extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
+extern unsigned int arch_get_system_nr_ranges(void);
+extern int arch_crash_populate_cmem(struct crash_mem *cmem);
+extern int arch_crash_exclude_ranges(struct crash_mem *cmem);
 
 #else /* !CONFIG_CRASH_DUMP*/
 struct pt_regs;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 2c1a3791e410..96a96e511f5a 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -170,9 +170,6 @@ static inline resource_size_t crash_resource_size(const struct resource *res)
 	return !res->end ? 0 : resource_size(res);
 }
 
-
-
-
 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 			  void **addr, unsigned long *sz)
 {
@@ -274,6 +271,85 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 	return 0;
 }
 
+static struct crash_mem *alloc_cmem(unsigned int nr_ranges)
+{
+	struct crash_mem *cmem;
+
+	cmem = kvzalloc_flex(*cmem, ranges, nr_ranges);
+	if (!cmem)
+		return NULL;
+
+	cmem->max_nr_ranges = nr_ranges;
+	return cmem;
+}
+
+unsigned int __weak arch_get_system_nr_ranges(void) { return 0; }
+int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; }
+int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; }
+
+static int crash_exclude_core_ranges(struct crash_mem *cmem)
+{
+	int ret, i;
+
+	/* Exclude crashkernel region */
+	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+	if (ret)
+		return ret;
+
+	if (crashk_low_res.end) {
+		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < crashk_cma_cnt; ++i) {
+		ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start,
+					      crashk_cma_ranges[i].end);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz,
+			  unsigned long *nr_mem_ranges)
+{
+	unsigned int max_nr_ranges;
+	struct crash_mem *cmem;
+	int ret;
+
+	max_nr_ranges = arch_get_system_nr_ranges();
+	if (!max_nr_ranges)
+		return -ENOMEM;
+
+	cmem = alloc_cmem(max_nr_ranges);
+	if (!cmem)
+		return -ENOMEM;
+
+	ret = arch_crash_populate_cmem(cmem);
+	if (ret)
+		goto out;
+
+	ret = crash_exclude_core_ranges(cmem);
+	if (ret)
+		goto out;
+
+	ret = arch_crash_exclude_ranges(cmem);
+	if (ret)
+		goto out;
+
+	/* Return the computed number of memory ranges, for hotplug usage */
+	if (nr_mem_ranges)
+		*nr_mem_ranges = cmem->nr_ranges;
+
+	ret = crash_prepare_elf64_headers(cmem, need_kernel_map, addr, sz);
+
+out:
+	kvfree(cmem);
+	return ret;
+}
+
 /**
  * crash_exclude_mem_range - exclude a mem range for existing ranges
  * @mem: mem->range contains an array of ranges sorted in ascending order
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 09/15] arm64: kexec_file: Use crash_prepare_headers() helper to simplify code
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

Use the newly introduced crash_prepare_headers() function to replace
the existing prepare_elf_headers(), allocate cmem and exclude crash
kernel memory in the crash core, which reduce code duplication.

Only the following two architecture functions need to be implemented:
- arch_get_system_nr_ranges(). Use for_each_mem_range() to traverse
  and pre-count the max number of memory ranges.

- arch_crash_populate_cmem(). Use for_each_mem_range to traverse
  and collect the memory ranges and fills them into cmem.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/arm64/kernel/machine_kexec_file.c | 46 ++++++++------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index df52ac4474c9..558408f403b5 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -40,51 +40,33 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 }
 
 #ifdef CONFIG_CRASH_DUMP
-static int prepare_elf_headers(void **addr, unsigned long *sz)
+unsigned int arch_get_system_nr_ranges(void)
 {
-	struct crash_mem *cmem;
-	unsigned int nr_ranges;
-	int ret;
-	u64 i;
+	unsigned int nr_ranges = 2; /* for exclusion of crashkernel region */
 	phys_addr_t start, end;
+	u64 i;
 
-	nr_ranges = 2; /* for exclusion of crashkernel region */
 	for_each_mem_range(i, &start, &end)
 		nr_ranges++;
 
-	cmem = kmalloc_flex(*cmem, ranges, nr_ranges);
-	if (!cmem)
-		return -ENOMEM;
+	return nr_ranges;
+}
+
+int arch_crash_populate_cmem(struct crash_mem *cmem)
+{
+	phys_addr_t start, end;
+	u64 i;
 
-	cmem->max_nr_ranges = nr_ranges;
-	cmem->nr_ranges = 0;
 	for_each_mem_range(i, &start, &end) {
-		if (cmem->nr_ranges >= cmem->max_nr_ranges) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (cmem->nr_ranges >= cmem->max_nr_ranges)
+			return -ENOMEM;
 
 		cmem->ranges[cmem->nr_ranges].start = start;
 		cmem->ranges[cmem->nr_ranges].end = end - 1;
 		cmem->nr_ranges++;
 	}
 
-	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
-	if (ret)
-		goto out;
-
-	if (crashk_low_res.end) {
-		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
-		if (ret)
-			goto out;
-	}
-
-	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
-
-out:
-	kfree(cmem);
-	return ret;
+	return 0;
 }
 #endif
 
@@ -114,7 +96,7 @@ int load_other_segments(struct kimage *image,
 	void *headers;
 	unsigned long headers_sz;
 	if (image->type == KEXEC_TYPE_CRASH) {
-		ret = prepare_elf_headers(&headers, &headers_sz);
+		ret = crash_prepare_headers(true, &headers, &headers_sz, NULL);
 		if (ret) {
 			pr_err("Preparing elf core header failed\n");
 			goto out_err;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 10/15] x86/kexec: Use crash_prepare_headers() helper to simplify code
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

Use the newly introduced crash_prepare_headers() function to replace
the existing prepare_elf_headers(), allocate cmem and exclude crash kernel
memory in the crash core, which reduce code duplication.

Only the following three architecture functions need to be implemented:
- arch_get_system_nr_ranges(). Call get_nr_ram_ranges_callback()
  to pre-count the max number of memory ranges.

- arch_crash_populate_cmem(). Use prepare_elf64_ram_headers_callback()
  to collect the memory ranges and fills them into cmem.

- arch_crash_exclude_ranges(). Exclude the low 1M for x86.

By the way, remove the unused "nr_mem_ranges" in
arch_crash_handle_hotplug_event().

Cc: Thomas Gleixner <tglx@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/x86/kernel/crash.c | 89 +++++------------------------------------
 1 file changed, 11 insertions(+), 78 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 7fa6d45ebe3f..10ef24611f2a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -152,16 +152,8 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 	return 0;
 }
 
-/* Gather all the required information to prepare elf headers for ram regions */
-static struct crash_mem *fill_up_crash_elf_data(void)
+unsigned int arch_get_system_nr_ranges(void)
 {
-	unsigned int nr_ranges = 0;
-	struct crash_mem *cmem;
-
-	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
-	if (!nr_ranges)
-		return NULL;
-
 	/*
 	 * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges
 	 * may cause range splits. So add extra slots here.
@@ -176,49 +168,16 @@ static struct crash_mem *fill_up_crash_elf_data(void)
 	 * But in order to lest the low 1M could be changed in the future,
 	 * (e.g. [start, 1M]), add a extra slot.
 	 */
-	nr_ranges += 3 + crashk_cma_cnt;
-	cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
-	if (!cmem)
-		return NULL;
-
-	cmem->max_nr_ranges = nr_ranges;
+	unsigned int nr_ranges = 3 + crashk_cma_cnt;
 
-	return cmem;
+	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
+	return nr_ranges;
 }
 
-/*
- * Look for any unwanted ranges between mstart, mend and remove them. This
- * might lead to split and split ranges are put in cmem->ranges[] array
- */
-static int elf_header_exclude_ranges(struct crash_mem *cmem)
+int arch_crash_exclude_ranges(struct crash_mem *cmem)
 {
-	int ret = 0;
-	int i;
-
 	/* Exclude the low 1M because it is always reserved */
-	ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1);
-	if (ret)
-		return ret;
-
-	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
-	if (ret)
-		return ret;
-
-	if (crashk_low_res.end)
-		ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
-					      crashk_low_res.end);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < crashk_cma_cnt; ++i) {
-		ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start,
-					      crashk_cma_ranges[i].end);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
+	return crash_exclude_mem_range(cmem, 0, SZ_1M - 1);
 }
 
 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
@@ -235,35 +194,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 	return 0;
 }
 
-/* Prepare elf headers. Return addr and size */
-static int prepare_elf_headers(void **addr, unsigned long *sz,
-			       unsigned long *nr_mem_ranges)
+int arch_crash_populate_cmem(struct crash_mem *cmem)
 {
-	struct crash_mem *cmem;
-	int ret;
-
-	cmem = fill_up_crash_elf_data();
-	if (!cmem)
-		return -ENOMEM;
-
-	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
-	if (ret)
-		goto out;
-
-	/* Exclude unwanted mem ranges */
-	ret = elf_header_exclude_ranges(cmem);
-	if (ret)
-		goto out;
-
-	/* Return the computed number of memory ranges, for hotplug usage */
-	*nr_mem_ranges = cmem->nr_ranges;
-
-	/* By default prepare 64bit headers */
-	ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
-
-out:
-	vfree(cmem);
-	return ret;
+	return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
 }
 #endif
 
@@ -421,7 +354,8 @@ int crash_load_segments(struct kimage *image)
 				  .buf_max = ULONG_MAX, .top_down = false };
 
 	/* Prepare elf headers and add a segment */
-	ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum);
+	ret = crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &kbuf.buffer,
+				    &kbuf.bufsz, &pnum);
 	if (ret)
 		return ret;
 
@@ -514,7 +448,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void)
 void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
 {
 	void *elfbuf = NULL, *old_elfcorehdr;
-	unsigned long nr_mem_ranges;
 	unsigned long mem, memsz;
 	unsigned long elfsz = 0;
 
@@ -532,7 +465,7 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
 	 * Create the new elfcorehdr reflecting the changes to CPU and/or
 	 * memory resources.
 	 */
-	if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) {
+	if (crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &elfbuf, &elfsz, NULL)) {
 		pr_err("unable to create new elfcorehdr");
 		goto out;
 	}
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 11/15] riscv: kexec_file: Use crash_prepare_headers() helper to simplify code
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

Use the newly introduced crash_prepare_headers() function to replace
the existing prepare_elf_headers(), allocate cmem and exclude crash kernel
memory in the crash core, which reduce code duplication.

Only the following two architecture functions need to be implemented:
- arch_get_system_nr_ranges(). Call get_nr_ram_ranges_callback()
  to pre-counts the max number of memory ranges.

- arch_crash_populate_cmem(). Use prepare_elf64_ram_headers_callback()
  to collects the memory ranges and fills them into cmem.

Cc: Paul Walmsley <pjw@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/riscv/kernel/machine_kexec_file.c | 47 +++++++-------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
index 773a1cba8ba0..bea818f75dd6 100644
--- a/arch/riscv/kernel/machine_kexec_file.c
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -44,6 +44,15 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 	return 0;
 }
 
+unsigned int arch_get_system_nr_ranges(void)
+{
+	unsigned int nr_ranges = 2; /* For exclusion of crashkernel region */
+
+	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
+
+	return nr_ranges;
+}
+
 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 {
 	struct crash_mem *cmem = arg;
@@ -58,41 +67,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 	return 0;
 }
 
-static int prepare_elf_headers(void **addr, unsigned long *sz)
+int arch_crash_populate_cmem(struct crash_mem *cmem)
 {
-	struct crash_mem *cmem;
-	unsigned int nr_ranges;
-	int ret;
-
-	nr_ranges = 2; /* For exclusion of crashkernel region */
-	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
-
-	cmem = kmalloc_flex(*cmem, ranges, nr_ranges);
-	if (!cmem)
-		return -ENOMEM;
-
-	cmem->max_nr_ranges = nr_ranges;
-	cmem->nr_ranges = 0;
-	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
-	if (ret)
-		goto out;
-
-	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
-	if (ret)
-		goto out;
-
-	if (crashk_low_res.end) {
-		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
-		if (ret)
-			goto out;
-	}
-
-	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
-
-out:
-	kfree(cmem);
-	return ret;
+	return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
 }
 
 static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
@@ -284,7 +261,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start,
 	if (image->type == KEXEC_TYPE_CRASH) {
 		void *headers;
 		unsigned long headers_sz;
-		ret = prepare_elf_headers(&headers, &headers_sz);
+		ret = crash_prepare_headers(true, &headers, &headers_sz, NULL);
 		if (ret) {
 			pr_err("Preparing elf core header failed\n");
 			goto out;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 13/15] crash: Use crash_exclude_core_ranges() on powerpc
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

The crash memory exclude of crashk_res and crashk_cma memory on powerpc
are almost identical to the generic crash_exclude_core_ranges().

By introducing the architecture-specific arch_crash_exclude_mem_range()
function with a default implementation of crash_exclude_mem_range(),
and using crash_exclude_mem_range_guarded as powerpc's separate
implementation, the generic crash_exclude_core_ranges() helper function
can be reused.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Cc: Shivang Upadhyay <shivangu@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/powerpc/include/asm/kexec_ranges.h |  3 ---
 arch/powerpc/kexec/crash.c              |  2 +-
 arch/powerpc/kexec/ranges.c             | 16 ++++------------
 include/linux/crash_core.h              |  4 ++++
 kernel/crash_core.c                     | 19 +++++++++++++------
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
index ad95e3792d10..8489e844b447 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,9 +7,6 @@
 void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
-int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges,
-				    unsigned long long mstart,
-				    unsigned long long mend);
 int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
 int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
 int get_crash_memory_ranges(struct crash_mem **mem_ranges);
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 1426d2099bad..52992309e28c 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -451,7 +451,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *
 		base_addr = PFN_PHYS(mn->start_pfn);
 		size = mn->nr_pages * PAGE_SIZE;
 		end = base_addr + size - 1;
-		ret = crash_exclude_mem_range_guarded(&cmem, base_addr, end);
+		ret = arch_crash_exclude_mem_range(&cmem, base_addr, end);
 		if (ret) {
 			pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
 			goto out;
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 6c58bcc3e130..e5fea23b191b 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -553,9 +553,9 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges)
 #endif /* CONFIG_KEXEC_FILE */
 
 #ifdef CONFIG_CRASH_DUMP
-int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges,
-					   unsigned long long mstart,
-					   unsigned long long mend)
+int arch_crash_exclude_mem_range(struct crash_mem **mem_ranges,
+				 unsigned long long mstart,
+				 unsigned long long mend)
 {
 	struct crash_mem *tmem = *mem_ranges;
 
@@ -604,18 +604,10 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges)
 			sort_memory_ranges(*mem_ranges, true);
 	}
 
-	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end);
+	ret = crash_exclude_core_ranges(mem_ranges);
 	if (ret)
 		goto out;
 
-	for (i = 0; i < crashk_cma_cnt; ++i) {
-		ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start,
-					      crashk_cma_ranges[i].end);
-		if (ret)
-			goto out;
-	}
-
 	/*
 	 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
 	 *        regions are exported to save their context at the time of
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 033b20204aca..dbec826dc53b 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -68,6 +68,7 @@ extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_ma
 				       void **addr, unsigned long *sz);
 extern int crash_prepare_headers(int need_kernel_map, void **addr,
 				 unsigned long *sz, unsigned long *nr_mem_ranges);
+extern int crash_exclude_core_ranges(struct crash_mem **cmem);
 
 struct kimage;
 struct kexec_segment;
@@ -88,6 +89,9 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 extern unsigned int arch_get_system_nr_ranges(void);
 extern int arch_crash_populate_cmem(struct crash_mem *cmem);
 extern int arch_crash_exclude_ranges(struct crash_mem *cmem);
+extern int arch_crash_exclude_mem_range(struct crash_mem **mem,
+					unsigned long long mstart,
+					unsigned long long mend);
 
 #else /* !CONFIG_CRASH_DUMP*/
 struct pt_regs;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 96a96e511f5a..300d44ad5471 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -287,24 +287,31 @@ unsigned int __weak arch_get_system_nr_ranges(void) { return 0; }
 int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; }
 int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; }
 
-static int crash_exclude_core_ranges(struct crash_mem *cmem)
+int __weak arch_crash_exclude_mem_range(struct crash_mem **mem,
+					unsigned long long mstart,
+					unsigned long long mend)
+{
+	return crash_exclude_mem_range(*mem, mstart, mend);
+}
+
+int crash_exclude_core_ranges(struct crash_mem **cmem)
 {
 	int ret, i;
 
 	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+	ret = arch_crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
 	if (ret)
 		return ret;
 
 	if (crashk_low_res.end) {
-		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+		ret = arch_crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
 		if (ret)
 			return ret;
 	}
 
 	for (i = 0; i < crashk_cma_cnt; ++i) {
-		ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start,
-					      crashk_cma_ranges[i].end);
+		ret = arch_crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start,
+						   crashk_cma_ranges[i].end);
 		if (ret)
 			return ret;
 	}
@@ -331,7 +338,7 @@ int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz,
 	if (ret)
 		goto out;
 
-	ret = crash_exclude_core_ranges(cmem);
+	ret = crash_exclude_core_ranges(&cmem);
 	if (ret)
 		goto out;
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH v12 12/15] LoongArch: kexec: Use crash_prepare_headers() helper to simplify code
From: Jinjie Ruan @ 2026-04-02  7:26 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
  Cc: ruanjinjie
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>

Use the newly introduced crash_prepare_headers() function to replace
the existing prepare_elf_headers(), allocate cmem and exclude crash kernel
memory in the crash core, which reduce code duplication.

Only the following two architecture functions need to be implemented:
- arch_get_system_nr_ranges(). Use for_each_mem_range to traverse
  and pre-count the max number of memory ranges.

- arch_crash_populate_cmem(). Use for_each_mem_range to traverse
  and collect the memory ranges and fills them into cmem.

Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/loongarch/kernel/machine_kexec_file.c | 46 +++++++---------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c
index 167392c1da33..3d0386ee18ef 100644
--- a/arch/loongarch/kernel/machine_kexec_file.c
+++ b/arch/loongarch/kernel/machine_kexec_file.c
@@ -56,51 +56,33 @@ static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmpl
 }
 
 #ifdef CONFIG_CRASH_DUMP
-
-static int prepare_elf_headers(void **addr, unsigned long *sz)
+unsigned int arch_get_system_nr_ranges(void)
 {
-	int ret, nr_ranges;
-	uint64_t i;
+	int nr_ranges = 2; /* for exclusion of crashkernel region */
 	phys_addr_t start, end;
-	struct crash_mem *cmem;
+	uint64_t i;
 
-	nr_ranges = 2; /* for exclusion of crashkernel region */
 	for_each_mem_range(i, &start, &end)
 		nr_ranges++;
 
-	cmem = kmalloc_flex(*cmem, ranges, nr_ranges);
-	if (!cmem)
-		return -ENOMEM;
+	return nr_ranges;
+}
+
+int arch_crash_populate_cmem(struct crash_mem *cmem)
+{
+	phys_addr_t start, end;
+	uint64_t i;
 
-	cmem->max_nr_ranges = nr_ranges;
-	cmem->nr_ranges = 0;
 	for_each_mem_range(i, &start, &end) {
-		if (cmem->nr_ranges >= cmem->max_nr_ranges) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (cmem->nr_ranges >= cmem->max_nr_ranges)
+			return -ENOMEM;
 
 		cmem->ranges[cmem->nr_ranges].start = start;
 		cmem->ranges[cmem->nr_ranges].end = end - 1;
 		cmem->nr_ranges++;
 	}
 
-	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
-	if (ret < 0)
-		goto out;
-
-	if (crashk_low_res.end) {
-		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
-		if (ret < 0)
-			goto out;
-	}
-
-	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
-
-out:
-	kfree(cmem);
-	return ret;
+	return 0;
 }
 
 /*
@@ -168,7 +150,7 @@ int load_other_segments(struct kimage *image,
 		void *headers;
 		unsigned long headers_sz;
 
-		ret = prepare_elf_headers(&headers, &headers_sz);
+		ret = crash_prepare_headers(true, &headers, &headers_sz, NULL);
 		if (ret < 0) {
 			pr_err("Preparing elf core header failed\n");
 			goto out_err;
-- 
2.34.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox